Coverage for src / local_deep_research / security / data_sanitizer.py: 100%
27 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""Security module for sanitizing sensitive data from data structures.
3This module ensures that sensitive information like API keys, passwords, and tokens
4are not accidentally leaked in logs, files, or API responses.
5"""
7from typing import Any, Set
10class DataSanitizer:
11 """Utility class for removing sensitive information from data structures."""
13 # Default set of sensitive key names to redact
14 DEFAULT_SENSITIVE_KEYS: Set[str] = {
15 "api_key",
16 "apikey",
17 "password",
18 "secret",
19 "access_token",
20 "refresh_token",
21 "private_key",
22 "auth_token",
23 "session_token",
24 "csrf_token",
25 }
27 @staticmethod
28 def sanitize(data: Any, sensitive_keys: Set[str] | None = None) -> Any:
29 """
30 Recursively remove sensitive keys from data structures.
32 This method traverses dictionaries and lists, removing any keys that match
33 the sensitive keys list (case-insensitive). This prevents accidental
34 credential leakage in optimization results, logs, or API responses.
36 Args:
37 data: The data structure to sanitize (dict, list, or primitive)
38 sensitive_keys: Set of key names to remove (case-insensitive).
39 If None, uses DEFAULT_SENSITIVE_KEYS.
41 Returns:
42 Sanitized copy of the data with sensitive keys removed
44 Example:
45 >>> sanitizer = DataSanitizer()
46 >>> data = {"username": "user", "api_key": "secret123"}
47 >>> sanitizer.sanitize(data)
48 {"username": "user"}
49 """
50 if sensitive_keys is None:
51 sensitive_keys = DataSanitizer.DEFAULT_SENSITIVE_KEYS
53 # Convert to lowercase for case-insensitive comparison
54 sensitive_keys_lower = {key.lower() for key in sensitive_keys}
56 if isinstance(data, dict):
57 return {
58 k: DataSanitizer.sanitize(v, sensitive_keys)
59 for k, v in data.items()
60 if k.lower() not in sensitive_keys_lower
61 }
62 elif isinstance(data, list):
63 return [
64 DataSanitizer.sanitize(item, sensitive_keys) for item in data
65 ]
66 else:
67 # Return primitives unchanged
68 return data
70 @staticmethod
71 def redact(
72 data: Any,
73 sensitive_keys: Set[str] | None = None,
74 redaction_text: str = "[REDACTED]",
75 ) -> Any:
76 """
77 Recursively redact (replace with placeholder) sensitive values in data structures.
79 Unlike sanitize() which removes keys entirely, this method replaces their
80 values with a redaction placeholder, preserving the structure.
82 Args:
83 data: The data structure to redact (dict, list, or primitive)
84 sensitive_keys: Set of key names to redact (case-insensitive).
85 If None, uses DEFAULT_SENSITIVE_KEYS.
86 redaction_text: Text to replace sensitive values with
88 Returns:
89 Copy of the data with sensitive values redacted
91 Example:
92 >>> sanitizer = DataSanitizer()
93 >>> data = {"username": "user", "api_key": "secret123"}
94 >>> sanitizer.redact(data)
95 {"username": "user", "api_key": "[REDACTED]"}
96 """
97 if sensitive_keys is None:
98 sensitive_keys = DataSanitizer.DEFAULT_SENSITIVE_KEYS
100 # Convert to lowercase for case-insensitive comparison
101 sensitive_keys_lower = {key.lower() for key in sensitive_keys}
103 if isinstance(data, dict):
104 return {
105 k: (
106 redaction_text
107 if k.lower() in sensitive_keys_lower
108 else DataSanitizer.redact(v, sensitive_keys, redaction_text)
109 )
110 for k, v in data.items()
111 }
112 elif isinstance(data, list):
113 return [
114 DataSanitizer.redact(item, sensitive_keys, redaction_text)
115 for item in data
116 ]
117 else:
118 # Return primitives unchanged
119 return data
122# Convenience functions for direct use
123def sanitize_data(data: Any, sensitive_keys: Set[str] | None = None) -> Any:
124 """
125 Remove sensitive keys from data structures.
127 Convenience function that calls DataSanitizer.sanitize().
129 Args:
130 data: The data structure to sanitize
131 sensitive_keys: Optional set of sensitive key names
133 Returns:
134 Sanitized copy of the data
135 """
136 return DataSanitizer.sanitize(data, sensitive_keys)
139def redact_data(
140 data: Any,
141 sensitive_keys: Set[str] | None = None,
142 redaction_text: str = "[REDACTED]",
143) -> Any:
144 """
145 Redact (replace) sensitive values in data structures.
147 Convenience function that calls DataSanitizer.redact().
149 Args:
150 data: The data structure to redact
151 sensitive_keys: Optional set of sensitive key names
152 redaction_text: Text to replace sensitive values with
154 Returns:
155 Copy of the data with sensitive values redacted
156 """
157 return DataSanitizer.redact(data, sensitive_keys, redaction_text)