Coverage for src/local_deep_research/database/models/journal.py: 93%

1"""

2Journal model — per-user cache for Tier 4 LLM-scored journals.

4Tiers 1–3 read directly from the bundled read-only reference DB

5(``journal_quality.db``) on every scoring pass — lookups are

6100–300µs and don't need a second-level cache. Only Tier 4 (LLM

7analysis via SearXNG) is expensive enough to warrant caching, so

8this table holds only LLM-sourced rows (``score_source == "llm"``)

9and the filter predicate matches on ``quality_model`` so scores

10from a superseded LLM version miss the cache and get re-scored.

12Historical note: earlier PR iterations included h_index, sjr_quartile,

13publisher, is_predatory, etc. as a Tier 2/3 cache, but the read path

14never consulted them — the final schema deliberately omits them.

16This module used to live inside ``logs.py``. Moved to its own file

17for discoverability — ``Journal`` is unrelated to the ``ResearchLog``

18table that ``logs.py`` otherwise owns.

19"""

21from sqlalchemy import Column, Integer, Sequence, String, UniqueConstraint

23from .base import Base

26class Journal(Base):

27 """Per-user cache for Tier 4 LLM-scored journals. See module docstring."""

29 __tablename__ = "journals"

31 id = Column(Integer, Sequence("journal_id_seq"), primary_key=True)

33 # --- Identity ---

34 # Cleaned/normalized journal name. UNIQUE provides the backing

35 # index SQLite needs for the exact-name lookup path; a separate

36 # non-unique index would be pure duplication.

37 name = Column(String(255), nullable=False, unique=True)

38 # Lower-cased NFKC-normalized copy of ``name`` for indexed case-

39 # insensitive lookups on the citation-save hot path. The UNIQUE

40 # constraint in ``__table_args__`` provides the backing index; a

41 # separate non-unique ``index=True`` would be a second redundant

42 # B-tree on the same column.

43 name_lower = Column(String(255), nullable=True)

45 # --- LLM cache payload ---

46 # Composite quality score (1-10). See constants.VALID_QUALITY_SCORES.

47 quality = Column(Integer, nullable=True)

48 # Always ``"llm"`` for rows in this table; kept as a string so

49 # downgrade + historical rows still parse.

50 # KNOWN-DEFERRED: currently always holds "llm" because this table

51 # is a Tier 4 LLM cache only. Retained so that a future "downgrade"

52 # of this cache to include non-LLM rows (e.g., manual admin

53 # overrides) does not require a schema change. Post-merge candidate

54 # for removal if no multi-source plan materializes.

55 score_source = Column(String(50), nullable=True)

56 # LLM model identifier (from utilities.llm_utils.get_model_identifier),

57 # indexed because the cache predicate filters on it so a change in

58 # configured model invalidates stale scores.

59 quality_model = Column(String(255), nullable=True, index=True)

60 # Unix timestamp (seconds, not milliseconds) of last analysis — used

61 # for the TTL / reanalysis_period freshness check. Read/write via

62 # ``int(time.time())``; a plain Integer column is cheaper on the

63 # freshness-check hot path than UtcDateTime would be.

64 quality_analysis_time = Column(Integer, nullable=False)

66 __table_args__ = (

67 UniqueConstraint("name_lower", name="uq_journals_name_lower"),

68 )

70 def __repr__(self):

71 return f"<Journal(name='{self.name}', quality={self.quality})>"