Coverage for src/local_deep_research/database/models/journal.py: 93%

14 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1""" 

2Journal model — per-user cache for Tier 4 LLM-scored journals. 

3 

4Tiers 1–3 read directly from the bundled read-only reference DB 

5(``journal_quality.db``) on every scoring pass — lookups are 

6100–300µs and don't need a second-level cache. Only Tier 4 (LLM 

7analysis via SearXNG) is expensive enough to warrant caching, so 

8this table holds only LLM-sourced rows (``score_source == "llm"``) 

9and the filter predicate matches on ``quality_model`` so scores 

10from a superseded LLM version miss the cache and get re-scored. 

11 

12Historical note: earlier PR iterations included h_index, sjr_quartile, 

13publisher, is_predatory, etc. as a Tier 2/3 cache, but the read path 

14never consulted them — the final schema deliberately omits them. 

15 

16This module used to live inside ``logs.py``. Moved to its own file 

17for discoverability — ``Journal`` is unrelated to the ``ResearchLog`` 

18table that ``logs.py`` otherwise owns. 

19""" 

20 

21from sqlalchemy import Column, Integer, Sequence, String, UniqueConstraint 

22 

23from .base import Base 

24 

25 

26class Journal(Base): 

27 """Per-user cache for Tier 4 LLM-scored journals. See module docstring.""" 

28 

29 __tablename__ = "journals" 

30 

31 id = Column(Integer, Sequence("journal_id_seq"), primary_key=True) 

32 

33 # --- Identity --- 

34 # Cleaned/normalized journal name. UNIQUE provides the backing 

35 # index SQLite needs for the exact-name lookup path; a separate 

36 # non-unique index would be pure duplication. 

37 name = Column(String(255), nullable=False, unique=True) 

38 # Lower-cased NFKC-normalized copy of ``name`` for indexed case- 

39 # insensitive lookups on the citation-save hot path. The UNIQUE 

40 # constraint in ``__table_args__`` provides the backing index; a 

41 # separate non-unique ``index=True`` would be a second redundant 

42 # B-tree on the same column. 

43 name_lower = Column(String(255), nullable=True) 

44 

45 # --- LLM cache payload --- 

46 # Composite quality score (1-10). See constants.VALID_QUALITY_SCORES. 

47 quality = Column(Integer, nullable=True) 

48 # Always ``"llm"`` for rows in this table; kept as a string so 

49 # downgrade + historical rows still parse. 

50 # KNOWN-DEFERRED: currently always holds "llm" because this table 

51 # is a Tier 4 LLM cache only. Retained so that a future "downgrade" 

52 # of this cache to include non-LLM rows (e.g., manual admin 

53 # overrides) does not require a schema change. Post-merge candidate 

54 # for removal if no multi-source plan materializes. 

55 score_source = Column(String(50), nullable=True) 

56 # LLM model identifier (from utilities.llm_utils.get_model_identifier), 

57 # indexed because the cache predicate filters on it so a change in 

58 # configured model invalidates stale scores. 

59 quality_model = Column(String(255), nullable=True, index=True) 

60 # Unix timestamp (seconds, not milliseconds) of last analysis — used 

61 # for the TTL / reanalysis_period freshness check. Read/write via 

62 # ``int(time.time())``; an Integer column (matching SearchCache) is 

63 # cheaper on the cache hot path than UtcDateTime would be. 

64 quality_analysis_time = Column(Integer, nullable=False) 

65 

66 __table_args__ = ( 

67 UniqueConstraint("name_lower", name="uq_journals_name_lower"), 

68 ) 

69 

70 def __repr__(self): 

71 return f"<Journal(name='{self.name}', quality={self.quality})>"