Coverage for src / local_deep_research / security / data_sanitizer.py: 100%

27 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1"""Security module for sanitizing sensitive data from data structures. 

2 

3This module ensures that sensitive information like API keys, passwords, and tokens 

4are not accidentally leaked in logs, files, or API responses. 

5""" 

6 

7from typing import Any, Set 

8 

9 

10class DataSanitizer: 

11 """Utility class for removing sensitive information from data structures.""" 

12 

13 # Default set of sensitive key names to redact 

14 DEFAULT_SENSITIVE_KEYS: Set[str] = { 

15 "api_key", 

16 "apikey", 

17 "password", 

18 "secret", 

19 "access_token", 

20 "refresh_token", 

21 "private_key", 

22 "auth_token", 

23 "session_token", 

24 "csrf_token", 

25 } 

26 

27 @staticmethod 

28 def sanitize(data: Any, sensitive_keys: Set[str] | None = None) -> Any: 

29 """ 

30 Recursively remove sensitive keys from data structures. 

31 

32 This method traverses dictionaries and lists, removing any keys that match 

33 the sensitive keys list (case-insensitive). This prevents accidental 

34 credential leakage in optimization results, logs, or API responses. 

35 

36 Args: 

37 data: The data structure to sanitize (dict, list, or primitive) 

38 sensitive_keys: Set of key names to remove (case-insensitive). 

39 If None, uses DEFAULT_SENSITIVE_KEYS. 

40 

41 Returns: 

42 Sanitized copy of the data with sensitive keys removed 

43 

44 Example: 

45 >>> sanitizer = DataSanitizer() 

46 >>> data = {"username": "user", "api_key": "secret123"} 

47 >>> sanitizer.sanitize(data) 

48 {"username": "user"} 

49 """ 

50 if sensitive_keys is None: 

51 sensitive_keys = DataSanitizer.DEFAULT_SENSITIVE_KEYS 

52 

53 # Convert to lowercase for case-insensitive comparison 

54 sensitive_keys_lower = {key.lower() for key in sensitive_keys} 

55 

56 if isinstance(data, dict): 

57 return { 

58 k: DataSanitizer.sanitize(v, sensitive_keys) 

59 for k, v in data.items() 

60 if k.lower() not in sensitive_keys_lower 

61 } 

62 elif isinstance(data, list): 

63 return [ 

64 DataSanitizer.sanitize(item, sensitive_keys) for item in data 

65 ] 

66 else: 

67 # Return primitives unchanged 

68 return data 

69 

70 @staticmethod 

71 def redact( 

72 data: Any, 

73 sensitive_keys: Set[str] | None = None, 

74 redaction_text: str = "[REDACTED]", 

75 ) -> Any: 

76 """ 

77 Recursively redact (replace with placeholder) sensitive values in data structures. 

78 

79 Unlike sanitize() which removes keys entirely, this method replaces their 

80 values with a redaction placeholder, preserving the structure. 

81 

82 Args: 

83 data: The data structure to redact (dict, list, or primitive) 

84 sensitive_keys: Set of key names to redact (case-insensitive). 

85 If None, uses DEFAULT_SENSITIVE_KEYS. 

86 redaction_text: Text to replace sensitive values with 

87 

88 Returns: 

89 Copy of the data with sensitive values redacted 

90 

91 Example: 

92 >>> sanitizer = DataSanitizer() 

93 >>> data = {"username": "user", "api_key": "secret123"} 

94 >>> sanitizer.redact(data) 

95 {"username": "user", "api_key": "[REDACTED]"} 

96 """ 

97 if sensitive_keys is None: 

98 sensitive_keys = DataSanitizer.DEFAULT_SENSITIVE_KEYS 

99 

100 # Convert to lowercase for case-insensitive comparison 

101 sensitive_keys_lower = {key.lower() for key in sensitive_keys} 

102 

103 if isinstance(data, dict): 

104 return { 

105 k: ( 

106 redaction_text 

107 if k.lower() in sensitive_keys_lower 

108 else DataSanitizer.redact(v, sensitive_keys, redaction_text) 

109 ) 

110 for k, v in data.items() 

111 } 

112 elif isinstance(data, list): 

113 return [ 

114 DataSanitizer.redact(item, sensitive_keys, redaction_text) 

115 for item in data 

116 ] 

117 else: 

118 # Return primitives unchanged 

119 return data 

120 

121 

122# Convenience functions for direct use 

123def sanitize_data(data: Any, sensitive_keys: Set[str] | None = None) -> Any: 

124 """ 

125 Remove sensitive keys from data structures. 

126 

127 Convenience function that calls DataSanitizer.sanitize(). 

128 

129 Args: 

130 data: The data structure to sanitize 

131 sensitive_keys: Optional set of sensitive key names 

132 

133 Returns: 

134 Sanitized copy of the data 

135 """ 

136 return DataSanitizer.sanitize(data, sensitive_keys) 

137 

138 

139def redact_data( 

140 data: Any, 

141 sensitive_keys: Set[str] | None = None, 

142 redaction_text: str = "[REDACTED]", 

143) -> Any: 

144 """ 

145 Redact (replace) sensitive values in data structures. 

146 

147 Convenience function that calls DataSanitizer.redact(). 

148 

149 Args: 

150 data: The data structure to redact 

151 sensitive_keys: Optional set of sensitive key names 

152 redaction_text: Text to replace sensitive values with 

153 

154 Returns: 

155 Copy of the data with sensitive values redacted 

156 """ 

157 return DataSanitizer.redact(data, sensitive_keys, redaction_text)