Coverage for src / local_deep_research / security / file_integrity / base_verifier.py: 61%
26 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Abstract base class for file integrity verifiers.
4Defines the interface for file-type-specific integrity verification.
5Concrete implementations specify which files to verify and policies.
6"""
8from abc import ABC, abstractmethod
9from enum import Enum
10from pathlib import Path
11import hashlib
14class FileType(str, Enum):
15 """Enum for file types - ensures consistency across the codebase"""
17 FAISS_INDEX = "faiss_index"
18 PDF = "pdf"
19 EXPORT = "export"
22class BaseFileVerifier(ABC):
23 """
24 Base class for file integrity verification.
26 Subclasses implement file-type-specific logic for:
27 - Identifying which files they handle
28 - Defining verification policies
29 - Optionally customizing checksum algorithms
30 """
32 @abstractmethod
33 def should_verify(self, file_path: Path) -> bool:
34 """
35 Determine if this verifier handles the given file.
37 Args:
38 file_path: Path to file to check
40 Returns:
41 True if this verifier should handle this file type
42 """
43 pass
45 @abstractmethod
46 def get_file_type(self) -> FileType:
47 """
48 Get the file type identifier for this verifier.
50 Returns:
51 FileType enum value
52 """
53 pass
55 @abstractmethod
56 def allows_modifications(self) -> bool:
57 """
58 Whether this file type can be legitimately modified by users.
60 Returns:
61 True if users can modify files (e.g., PDFs with annotations)
62 False if files should never be manually modified (e.g., FAISS indexes)
63 """
64 pass
66 def calculate_checksum(self, file_path: Path) -> str:
67 """
68 Calculate SHA256 checksum of file.
70 Can be overridden by subclasses for different algorithms.
72 Args:
73 file_path: Path to file to checksum
75 Returns:
76 Hex string of checksum
78 Raises:
79 FileNotFoundError: If file doesn't exist
80 IOError: If file can't be read
81 """
82 sha256_hash = hashlib.sha256()
83 with open(file_path, "rb") as f:
84 # Read in chunks to handle large files efficiently
85 for byte_block in iter(lambda: f.read(4096), b""):
86 sha256_hash.update(byte_block)
87 return sha256_hash.hexdigest()
89 def get_algorithm(self) -> str:
90 """
91 Get the checksum algorithm name.
93 Returns:
94 Algorithm identifier (default: 'sha256')
95 """
96 return "sha256"