Coverage for src / local_deep_research / database / models / file_integrity.py: 95%

39 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Database models for file integrity tracking. 

3 

4Provides efficient storage of file checksums and verification statistics 

5with sparse logging of failures for audit trail. 

6""" 

7 

8from sqlalchemy import ( 

9 Column, 

10 Integer, 

11 String, 

12 Text, 

13 Boolean, 

14 Float, 

15 ForeignKey, 

16) 

17from sqlalchemy.orm import relationship 

18from sqlalchemy_utc import UtcDateTime, utcnow 

19 

20from .base import Base 

21 

22 

23class FileIntegrityRecord(Base): 

24 """ 

25 Track file integrity with embedded statistics. 

26 

27 Stores current checksum and verification stats for files. 

28 Only failures are logged to separate table for efficiency. 

29 """ 

30 

31 __tablename__ = "file_integrity_records" 

32 

33 # Identity 

34 id = Column(Integer, primary_key=True, autoincrement=True) 

35 file_path = Column(Text, nullable=False, unique=True, index=True) 

36 file_type = Column( 

37 String(50), nullable=False, index=True 

38 ) # 'faiss_index', 'pdf', 'export' 

39 

40 # Current state 

41 checksum = Column(String(64), nullable=False) # SHA256 hash 

42 algorithm = Column(String(20), default="sha256") 

43 file_size = Column(Integer, nullable=True) 

44 file_mtime = Column( 

45 Float, nullable=True 

46 ) # OS modification time for smart verification 

47 

48 # Policy 

49 verify_on_load = Column( 

50 Boolean, default=True 

51 ) # Should this file be verified before use? 

52 allow_modifications = Column( 

53 Boolean, default=False 

54 ) # Can file be legitimately modified? (PDFs=True, FAISS=False) 

55 

56 # Embedded statistics (for efficiency) 

57 total_verifications = Column(Integer, default=0) 

58 last_verified_at = Column(UtcDateTime, nullable=True) 

59 last_verification_passed = Column(Boolean, default=True) 

60 consecutive_successes = Column(Integer, default=0) 

61 consecutive_failures = Column(Integer, default=0) 

62 

63 # Timestamps 

64 created_at = Column(UtcDateTime, default=utcnow()) 

65 updated_at = Column(UtcDateTime, onupdate=utcnow()) 

66 

67 # Polymorphic relationship - can link to any entity 

68 related_entity_type = Column( 

69 String(50), nullable=True 

70 ) # 'rag_index', 'library_document', etc. 

71 related_entity_id = Column(Integer, nullable=True) 

72 

73 # Sparse history - only failures logged 

74 verification_failures = relationship( 

75 "FileVerificationFailure", 

76 back_populates="file_record", 

77 cascade="all, delete-orphan", 

78 ) 

79 

80 def __repr__(self): 

81 return ( 

82 f"<FileIntegrityRecord(id={self.id}, " 

83 f"path={self.file_path}, " 

84 f"type={self.file_type}, " 

85 f"verifications={self.total_verifications})>" 

86 ) 

87 

88 

89class FileVerificationFailure(Base): 

90 """ 

91 Audit trail of file integrity verification failures. 

92 

93 Only failures are logged to keep storage efficient. 

94 Provides debugging trail for corruption/tampering incidents. 

95 """ 

96 

97 __tablename__ = "file_verification_failures" 

98 

99 id = Column(Integer, primary_key=True, autoincrement=True) 

100 file_record_id = Column( 

101 Integer, 

102 ForeignKey("file_integrity_records.id"), 

103 index=True, 

104 nullable=False, 

105 ) 

106 

107 verified_at = Column(UtcDateTime, default=utcnow()) 

108 expected_checksum = Column(String(64), nullable=False) 

109 actual_checksum = Column( 

110 String(64), nullable=True 

111 ) # Null if file missing/unreadable 

112 file_size = Column(Integer, nullable=True) 

113 failure_reason = Column( 

114 Text, nullable=False 

115 ) # "checksum_mismatch", "file_missing", etc. 

116 

117 file_record = relationship( 

118 "FileIntegrityRecord", back_populates="verification_failures" 

119 ) 

120 

121 def __repr__(self): 

122 return ( 

123 f"<FileVerificationFailure(id={self.id}, " 

124 f"file_record_id={self.file_record_id}, " 

125 f"reason={self.failure_reason}, " 

126 f"verified_at={self.verified_at})>" 

127 )