Coverage for src/local_deep_research/journal_quality/data_sources/__init__.py: 89%

14 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1"""Registry of academic data sources used by the journal-quality system. 

2 

3Each source is a `DataSource` subclass that declares its metadata and 

4implements `fetch()`. To add a new dataset (institutions, conferences, 

5…), drop in a new module containing one subclass and append an instance 

6to `ALL_SOURCES` below. The bulk downloader, status endpoint, dashboard 

7banner, and lazy-load auto-download path will all pick it up 

8automatically. 

9""" 

10 

11from __future__ import annotations 

12 

13from .base import DataSource 

14from .doaj import DOAJSource 

15from .institutions import InstitutionSource 

16from .jabref import JabRefSource 

17from .openalex import OpenAlexSource 

18from .predatory import PredatorySource 

19 

20# Order matters for the bulk download flow: 

21# 1. OpenAlex first (required=True; failure aborts the batch) 

22# 2. DOAJ, predatory, jabref, institutions are best-effort and can 

23# fail independently 

24ALL_SOURCES: list[DataSource] = [ 

25 OpenAlexSource(), 

26 DOAJSource(), 

27 PredatorySource(), 

28 JabRefSource(), 

29 InstitutionSource(), 

30] 

31 

32 

33def get_source(key: str) -> DataSource: 

34 """Look up a data source by its `key` attribute. 

35 

36 Raises: 

37 KeyError: if no source with that key is registered. 

38 """ 

39 for src in ALL_SOURCES: 39 ↛ 42line 39 didn't jump to line 42 because the loop on line 39 didn't complete

40 if src.key == key: 

41 return src 

42 raise KeyError(f"Unknown data source: {key!r}") 

43 

44 

45__all__ = ["DataSource", "ALL_SOURCES", "get_source"]