Coverage for src/local_deep_research/database/models/journal.py: 93%
14 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
1"""
2Journal model — per-user cache for Tier 4 LLM-scored journals.
4Tiers 1–3 read directly from the bundled read-only reference DB
5(``journal_quality.db``) on every scoring pass — lookups are
6100–300µs and don't need a second-level cache. Only Tier 4 (LLM
7analysis via SearXNG) is expensive enough to warrant caching, so
8this table holds only LLM-sourced rows (``score_source == "llm"``)
9and the filter predicate matches on ``quality_model`` so scores
10from a superseded LLM version miss the cache and get re-scored.
12Historical note: earlier PR iterations included h_index, sjr_quartile,
13publisher, is_predatory, etc. as a Tier 2/3 cache, but the read path
14never consulted them — the final schema deliberately omits them.
16This module used to live inside ``logs.py``. Moved to its own file
17for discoverability — ``Journal`` is unrelated to the ``ResearchLog``
18table that ``logs.py`` otherwise owns.
19"""
21from sqlalchemy import Column, Integer, Sequence, String, UniqueConstraint
23from .base import Base
26class Journal(Base):
27 """Per-user cache for Tier 4 LLM-scored journals. See module docstring."""
29 __tablename__ = "journals"
31 id = Column(Integer, Sequence("journal_id_seq"), primary_key=True)
33 # --- Identity ---
34 # Cleaned/normalized journal name. UNIQUE provides the backing
35 # index SQLite needs for the exact-name lookup path; a separate
36 # non-unique index would be pure duplication.
37 name = Column(String(255), nullable=False, unique=True)
38 # Lower-cased NFKC-normalized copy of ``name`` for indexed case-
39 # insensitive lookups on the citation-save hot path. The UNIQUE
40 # constraint in ``__table_args__`` provides the backing index; a
41 # separate non-unique ``index=True`` would be a second redundant
42 # B-tree on the same column.
43 name_lower = Column(String(255), nullable=True)
45 # --- LLM cache payload ---
46 # Composite quality score (1-10). See constants.VALID_QUALITY_SCORES.
47 quality = Column(Integer, nullable=True)
48 # Always ``"llm"`` for rows in this table; kept as a string so
49 # downgrade + historical rows still parse.
50 # KNOWN-DEFERRED: currently always holds "llm" because this table
51 # is a Tier 4 LLM cache only. Retained so that a future "downgrade"
52 # of this cache to include non-LLM rows (e.g., manual admin
53 # overrides) does not require a schema change. Post-merge candidate
54 # for removal if no multi-source plan materializes.
55 score_source = Column(String(50), nullable=True)
56 # LLM model identifier (from utilities.llm_utils.get_model_identifier),
57 # indexed because the cache predicate filters on it so a change in
58 # configured model invalidates stale scores.
59 quality_model = Column(String(255), nullable=True, index=True)
60 # Unix timestamp (seconds, not milliseconds) of last analysis — used
61 # for the TTL / reanalysis_period freshness check. Read/write via
62 # ``int(time.time())``; an Integer column (matching SearchCache) is
63 # cheaper on the cache hot path than UtcDateTime would be.
64 quality_analysis_time = Column(Integer, nullable=False)
66 __table_args__ = (
67 UniqueConstraint("name_lower", name="uq_journals_name_lower"),
68 )
70 def __repr__(self):
71 return f"<Journal(name='{self.name}', quality={self.quality})>"