Coverage for src/local_deep_research/journal_quality/data_sources/__init__.py: 89%
14 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
1"""Registry of academic data sources used by the journal-quality system.
3Each source is a `DataSource` subclass that declares its metadata and
4implements `fetch()`. To add a new dataset (institutions, conferences,
5…), drop in a new module containing one subclass and append an instance
6to `ALL_SOURCES` below. The bulk downloader, status endpoint, dashboard
7banner, and lazy-load auto-download path will all pick it up
8automatically.
9"""
11from __future__ import annotations
13from .base import DataSource
14from .doaj import DOAJSource
15from .institutions import InstitutionSource
16from .jabref import JabRefSource
17from .openalex import OpenAlexSource
18from .predatory import PredatorySource
20# Order matters for the bulk download flow:
21# 1. OpenAlex first (required=True; failure aborts the batch)
22# 2. DOAJ, predatory, jabref, institutions are best-effort and can
23# fail independently
24ALL_SOURCES: list[DataSource] = [
25 OpenAlexSource(),
26 DOAJSource(),
27 PredatorySource(),
28 JabRefSource(),
29 InstitutionSource(),
30]
33def get_source(key: str) -> DataSource:
34 """Look up a data source by its `key` attribute.
36 Raises:
37 KeyError: if no source with that key is registered.
38 """
39 for src in ALL_SOURCES: 39 ↛ 42line 39 didn't jump to line 42 because the loop on line 39 didn't complete
40 if src.key == key:
41 return src
42 raise KeyError(f"Unknown data source: {key!r}")
45__all__ = ["DataSource", "ALL_SOURCES", "get_source"]