Coverage for src / local_deep_research / security / module_whitelist.py: 100%
41 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
1"""
2Module whitelist for safe dynamic imports.
4This module provides secure dynamic import functionality with a strict whitelist
5of allowed modules and class names. This prevents arbitrary code execution through
6user-controlled configuration values.
8Security Features:
9- Validates module paths against a whitelist of trusted modules
10- Validates class names against a whitelist of legitimate search engine classes
11- Prevents loading of arbitrary code through malicious configuration
12- Logs security-relevant events for auditing
13"""
15import importlib
16from typing import Optional, Type
18from loguru import logger
21# Whitelist of allowed module paths for dynamic import.
22# Only modules in this list can be loaded dynamically.
23# SECURITY: Only relative paths (starting with ".") are allowed.
24# This ensures all imports are relative to local_deep_research.web_search_engines
25# and prevents arbitrary module imports from other packages.
26ALLOWED_MODULE_PATHS: frozenset[str] = frozenset(
27 [
28 # Relative imports only (relative to local_deep_research.web_search_engines)
29 # Absolute paths are NOT allowed for security reasons
30 ".engines.full_search",
31 ".engines.full_serp_search_results_old",
32 ".engines.meta_search_engine",
33 ".engines.parallel_search_engine",
34 ".engines.search_engine_arxiv",
35 ".engines.search_engine_brave",
36 ".engines.search_engine_collection",
37 ".engines.search_engine_ddg",
38 ".engines.search_engine_elasticsearch",
39 ".engines.search_engine_github",
40 ".engines.search_engine_google_pse",
41 ".engines.search_engine_guardian",
42 ".engines.search_engine_library",
43 ".engines.search_engine_local",
44 ".engines.search_engine_local_all",
45 ".engines.search_engine_mojeek",
46 ".engines.search_engine_nasa_ads",
47 ".engines.search_engine_openalex",
48 ".engines.search_engine_paperless",
49 ".engines.search_engine_pubmed",
50 ".engines.search_engine_retriever",
51 ".engines.search_engine_scaleserp",
52 ".engines.search_engine_searxng",
53 ".engines.search_engine_semantic_scholar",
54 ".engines.search_engine_serper",
55 ".engines.search_engine_serpapi",
56 ".engines.search_engine_tavily",
57 ".engines.search_engine_wayback",
58 ".engines.search_engine_wikipedia",
59 ".engines.search_engine_wikinews",
60 ".search_engine_base",
61 ]
62)
64# Legacy alias for backward compatibility
65ALLOWED_MODULES = ALLOWED_MODULE_PATHS
68# Whitelist of allowed class names for search engines.
69# Only classes in this list can be instantiated through dynamic import.
70ALLOWED_CLASS_NAMES: frozenset[str] = frozenset(
71 [
72 # Search engine implementation classes
73 "ArXivSearchEngine",
74 "BaseSearchEngine",
75 "BraveSearchEngine",
76 "CollectionSearchEngine",
77 "DuckDuckGoSearchEngine",
78 "ElasticsearchSearchEngine",
79 "FullSearchResults",
80 "GitHubSearchEngine",
81 "GooglePSESearchEngine",
82 "GuardianSearchEngine",
83 "LibraryRAGSearchEngine",
84 "LocalAllSearchEngine",
85 "LocalSearchEngine",
86 "MetaSearchEngine",
87 "MojeekSearchEngine",
88 "NasaAdsSearchEngine",
89 "OpenAlexSearchEngine",
90 "PaperlessSearchEngine",
91 "ParallelSearchEngine",
92 "PubMedSearchEngine",
93 "RetrieverSearchEngine",
94 "ScaleSerpSearchEngine",
95 "SearXNGSearchEngine",
96 "SemanticScholarSearchEngine",
97 "SerpAPISearchEngine",
98 "SerperSearchEngine",
99 "TavilySearchEngine",
100 "WaybackSearchEngine",
101 "WikinewsSearchEngine",
102 "WikipediaSearchEngine",
103 ]
104)
107class SecurityError(Exception):
108 """Raised when a security validation fails during module import."""
110 pass
113# Legacy alias for backward compatibility
114ModuleNotAllowedError = SecurityError
117def validate_module_import(module_path: str, class_name: str) -> bool:
118 """
119 Validate that both module_path and class_name are in their respective whitelists.
121 This function provides a security check to ensure that dynamically loaded
122 search engine modules are from trusted sources only. It validates:
123 1. Module path starts with "." (relative import only)
124 2. Module path is in the whitelist
125 3. Class name is in the whitelist
127 Args:
128 module_path: The Python module path (MUST be relative, starting with ".")
129 class_name: The class name to import from the module
131 Returns:
132 True if all validations pass, False otherwise
134 Example:
135 >>> validate_module_import(".engines.search_engine_brave", "BraveSearchEngine")
136 True
137 >>> validate_module_import("os", "system")
138 False
139 """
140 if not module_path or not class_name:
141 logger.warning(
142 "Module validation failed: empty module_path or class_name"
143 )
144 return False
146 # SECURITY: Only allow relative imports (starting with ".")
147 # This ensures imports are relative to local_deep_research.web_search_engines
148 # and prevents loading arbitrary modules like "os" or "subprocess"
149 if not module_path.startswith("."):
150 logger.warning(
151 f"Security: Rejected non-relative module path: {module_path}. "
152 f"Only relative imports (starting with '.') are allowed."
153 )
154 return False
156 module_valid = module_path in ALLOWED_MODULE_PATHS
157 class_valid = class_name in ALLOWED_CLASS_NAMES
159 if not module_valid:
160 logger.warning(f"Module path not in whitelist: {module_path}")
162 if not class_valid:
163 logger.warning(f"Class name not in whitelist: {class_name}")
165 return module_valid and class_valid
168def get_safe_module_class(
169 module_path: str,
170 class_name: str,
171 package: Optional[str] = None,
172) -> Type:
173 """
174 Safely import a class from a module, validating against both whitelists.
176 This function provides secure dynamic import functionality by:
177 1. Validating the module path against a strict whitelist
178 2. Validating the class name against a strict whitelist
179 3. Only allowing imports from trusted local_deep_research modules
180 4. Preventing arbitrary code execution through configuration
182 Args:
183 module_path: The module path to import from
184 (e.g., "local_deep_research.web_search_engines.engines.search_engine_brave"
185 or ".engines.search_engine_brave" for relative imports)
186 class_name: The class name to retrieve from the module
187 package: Optional package for relative imports. If not provided and
188 module_path starts with ".", defaults to
189 "local_deep_research.web_search_engines"
191 Returns:
192 The requested class from the module
194 Raises:
195 SecurityError: If the module path or class name is not in the whitelist
196 ModuleNotFoundError: If the module cannot be imported
197 AttributeError: If the class does not exist in the module
199 Example:
200 >>> cls = get_safe_module_class(".engines.search_engine_brave", "BraveSearchEngine")
201 >>> engine = cls(api_key="...")
202 """
203 # Validate both module path and class name against whitelists
204 if not validate_module_import(module_path, class_name):
205 logger.error(
206 f"Security: Blocked attempt to import non-whitelisted module/class: "
207 f"module_path={module_path!r}, class_name={class_name!r}"
208 )
209 raise SecurityError(
210 f"Import blocked: module_path={module_path!r} or class_name={class_name!r} "
211 f"is not in the security whitelist. Only trusted local_deep_research "
212 f"modules and classes can be dynamically imported."
213 )
215 # Determine package for relative imports
216 if package is None and module_path.startswith("."):
217 package = "local_deep_research.web_search_engines"
219 # Import the module
220 try:
221 # bearer:disable python_lang_code_injection
222 module = importlib.import_module(module_path, package=package)
223 except ModuleNotFoundError:
224 logger.exception(f"Failed to import whitelisted module {module_path}")
225 raise
227 # Get the class from the module
228 try:
229 engine_class = getattr(module, class_name)
230 except AttributeError:
231 logger.exception(
232 f"Class '{class_name}' not found in module '{module_path}'"
233 )
234 raise
236 logger.debug(f"Successfully loaded {class_name} from {module_path}")
237 return engine_class