Coverage for src / local_deep_research / database / models / file_integrity.py: 95%
39 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Database models for file integrity tracking.
4Provides efficient storage of file checksums and verification statistics
5with sparse logging of failures for audit trail.
6"""
8from sqlalchemy import (
9 Column,
10 Integer,
11 String,
12 Text,
13 Boolean,
14 Float,
15 ForeignKey,
16)
17from sqlalchemy.orm import relationship
18from sqlalchemy_utc import UtcDateTime, utcnow
20from .base import Base
23class FileIntegrityRecord(Base):
24 """
25 Track file integrity with embedded statistics.
27 Stores current checksum and verification stats for files.
28 Only failures are logged to separate table for efficiency.
29 """
31 __tablename__ = "file_integrity_records"
33 # Identity
34 id = Column(Integer, primary_key=True, autoincrement=True)
35 file_path = Column(Text, nullable=False, unique=True, index=True)
36 file_type = Column(
37 String(50), nullable=False, index=True
38 ) # 'faiss_index', 'pdf', 'export'
40 # Current state
41 checksum = Column(String(64), nullable=False) # SHA256 hash
42 algorithm = Column(String(20), default="sha256")
43 file_size = Column(Integer, nullable=True)
44 file_mtime = Column(
45 Float, nullable=True
46 ) # OS modification time for smart verification
48 # Policy
49 verify_on_load = Column(
50 Boolean, default=True
51 ) # Should this file be verified before use?
52 allow_modifications = Column(
53 Boolean, default=False
54 ) # Can file be legitimately modified? (PDFs=True, FAISS=False)
56 # Embedded statistics (for efficiency)
57 total_verifications = Column(Integer, default=0)
58 last_verified_at = Column(UtcDateTime, nullable=True)
59 last_verification_passed = Column(Boolean, default=True)
60 consecutive_successes = Column(Integer, default=0)
61 consecutive_failures = Column(Integer, default=0)
63 # Timestamps
64 created_at = Column(UtcDateTime, default=utcnow())
65 updated_at = Column(UtcDateTime, onupdate=utcnow())
67 # Polymorphic relationship - can link to any entity
68 related_entity_type = Column(
69 String(50), nullable=True
70 ) # 'rag_index', 'library_document', etc.
71 related_entity_id = Column(Integer, nullable=True)
73 # Sparse history - only failures logged
74 verification_failures = relationship(
75 "FileVerificationFailure",
76 back_populates="file_record",
77 cascade="all, delete-orphan",
78 )
80 def __repr__(self):
81 return (
82 f"<FileIntegrityRecord(id={self.id}, "
83 f"path={self.file_path}, "
84 f"type={self.file_type}, "
85 f"verifications={self.total_verifications})>"
86 )
89class FileVerificationFailure(Base):
90 """
91 Audit trail of file integrity verification failures.
93 Only failures are logged to keep storage efficient.
94 Provides debugging trail for corruption/tampering incidents.
95 """
97 __tablename__ = "file_verification_failures"
99 id = Column(Integer, primary_key=True, autoincrement=True)
100 file_record_id = Column(
101 Integer,
102 ForeignKey("file_integrity_records.id"),
103 index=True,
104 nullable=False,
105 )
107 verified_at = Column(UtcDateTime, default=utcnow())
108 expected_checksum = Column(String(64), nullable=False)
109 actual_checksum = Column(
110 String(64), nullable=True
111 ) # Null if file missing/unreadable
112 file_size = Column(Integer, nullable=True)
113 failure_reason = Column(
114 Text, nullable=False
115 ) # "checksum_mismatch", "file_missing", etc.
117 file_record = relationship(
118 "FileIntegrityRecord", back_populates="verification_failures"
119 )
121 def __repr__(self):
122 return (
123 f"<FileVerificationFailure(id={self.id}, "
124 f"file_record_id={self.file_record_id}, "
125 f"reason={self.failure_reason}, "
126 f"verified_at={self.verified_at})>"
127 )