Coverage for src / local_deep_research / security / file_integrity / base_verifier.py: 61%

26 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Abstract base class for file integrity verifiers. 

3 

4Defines the interface for file-type-specific integrity verification. 

5Concrete implementations specify which files to verify and policies. 

6""" 

7 

8from abc import ABC, abstractmethod 

9from enum import Enum 

10from pathlib import Path 

11import hashlib 

12 

13 

14class FileType(str, Enum): 

15 """Enum for file types - ensures consistency across the codebase""" 

16 

17 FAISS_INDEX = "faiss_index" 

18 PDF = "pdf" 

19 EXPORT = "export" 

20 

21 

22class BaseFileVerifier(ABC): 

23 """ 

24 Base class for file integrity verification. 

25 

26 Subclasses implement file-type-specific logic for: 

27 - Identifying which files they handle 

28 - Defining verification policies 

29 - Optionally customizing checksum algorithms 

30 """ 

31 

32 @abstractmethod 

33 def should_verify(self, file_path: Path) -> bool: 

34 """ 

35 Determine if this verifier handles the given file. 

36 

37 Args: 

38 file_path: Path to file to check 

39 

40 Returns: 

41 True if this verifier should handle this file type 

42 """ 

43 pass 

44 

45 @abstractmethod 

46 def get_file_type(self) -> FileType: 

47 """ 

48 Get the file type identifier for this verifier. 

49 

50 Returns: 

51 FileType enum value 

52 """ 

53 pass 

54 

55 @abstractmethod 

56 def allows_modifications(self) -> bool: 

57 """ 

58 Whether this file type can be legitimately modified by users. 

59 

60 Returns: 

61 True if users can modify files (e.g., PDFs with annotations) 

62 False if files should never be manually modified (e.g., FAISS indexes) 

63 """ 

64 pass 

65 

66 def calculate_checksum(self, file_path: Path) -> str: 

67 """ 

68 Calculate SHA256 checksum of file. 

69 

70 Can be overridden by subclasses for different algorithms. 

71 

72 Args: 

73 file_path: Path to file to checksum 

74 

75 Returns: 

76 Hex string of checksum 

77 

78 Raises: 

79 FileNotFoundError: If file doesn't exist 

80 IOError: If file can't be read 

81 """ 

82 sha256_hash = hashlib.sha256() 

83 with open(file_path, "rb") as f: 

84 # Read in chunks to handle large files efficiently 

85 for byte_block in iter(lambda: f.read(4096), b""): 

86 sha256_hash.update(byte_block) 

87 return sha256_hash.hexdigest() 

88 

89 def get_algorithm(self) -> str: 

90 """ 

91 Get the checksum algorithm name. 

92 

93 Returns: 

94 Algorithm identifier (default: 'sha256') 

95 """ 

96 return "sha256"