Coverage for src / local_deep_research / web_search_engines / engines / search_engine_brave.py: 66%
92 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1from typing import Any, Dict, List, Optional
3from langchain_community.tools import BraveSearch
4from langchain_core.language_models import BaseLLM
5from loguru import logger
7from ...config import search_config
8from ..rate_limiting import RateLimitError
9from ..search_engine_base import BaseSearchEngine
12class BraveSearchEngine(BaseSearchEngine):
13 """Brave search engine implementation with two-phase approach"""
15 # Mark as public search engine
16 is_public = True
17 # Mark as generic search engine (general web search)
18 is_generic = True
20 def __init__(
21 self,
22 max_results: int = 10,
23 region: str = "US",
24 time_period: str = "y",
25 safe_search: bool = True,
26 search_language: str = "English",
27 api_key: Optional[str] = None,
28 language_code_mapping: Optional[Dict[str, str]] = None,
29 llm: Optional[BaseLLM] = None,
30 include_full_content: bool = True,
31 max_filtered_results: Optional[int] = None,
32 settings_snapshot: Optional[Dict[str, Any]] = None,
33 **kwargs,
34 ):
35 """
36 Initialize the Brave search engine.
38 Args:
39 max_results: Maximum number of search results
40 region: Region code for search results
41 time_period: Time period for search results
42 safe_search: Whether to enable safe search
43 search_language: Language for search results
44 api_key: Brave Search API key (can also be set in BRAVE_API_KEY env)
45 language_code_mapping: Mapping from language names to codes
46 llm: Language model for relevance filtering
47 include_full_content: Whether to include full webpage content in results
48 max_filtered_results: Maximum number of results to keep after filtering
49 settings_snapshot: Settings snapshot for thread context
50 **kwargs: Additional parameters (ignored but accepted for compatibility)
51 """
52 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
53 super().__init__(
54 llm=llm,
55 max_filtered_results=max_filtered_results,
56 max_results=max_results,
57 )
58 self.include_full_content = include_full_content
60 # Set up language code mapping
61 if language_code_mapping is None: 61 ↛ 75line 61 didn't jump to line 75 because the condition on line 61 was always true
62 language_code_mapping = {
63 "english": "en",
64 "spanish": "es",
65 "chinese": "zh",
66 "hindi": "hi",
67 "french": "fr",
68 "arabic": "ar",
69 "bengali": "bn",
70 "portuguese": "pt",
71 "russian": "ru",
72 }
74 # Get API key - check params, env vars, or database
75 from ...config.search_config import get_setting_from_snapshot
77 brave_api_key = api_key
78 if not brave_api_key:
79 brave_api_key = get_setting_from_snapshot(
80 "search.engine.web.brave.api_key",
81 settings_snapshot=settings_snapshot,
82 )
84 if not brave_api_key:
85 raise ValueError(
86 "Brave API key not found. Please provide api_key parameter, set the BRAVE_API_KEY environment variable, or set it in the UI settings."
87 )
89 # Get language code
90 language_code = language_code_mapping.get(search_language.lower(), "en")
92 # Convert time period format to Brave's format
93 brave_time_period = f"p{time_period}"
95 # Convert safe search to Brave's format
96 brave_safe_search = "moderate" if safe_search else "off"
98 # Initialize Brave Search
99 self.engine = BraveSearch.from_api_key(
100 api_key=brave_api_key,
101 search_kwargs={
102 "count": min(20, max_results),
103 "country": region.upper(),
104 "search_lang": language_code,
105 "safesearch": brave_safe_search,
106 "freshness": brave_time_period,
107 },
108 )
110 # User agent is not needed for Brave Search API
112 # If full content is requested, initialize FullSearchResults
113 if include_full_content:
114 # Import FullSearchResults only if needed
115 try:
116 from .full_search import FullSearchResults
118 self.full_search = FullSearchResults(
119 llm=llm,
120 web_search=self.engine,
121 language=search_language,
122 max_results=max_results,
123 region=region,
124 time=time_period,
125 safesearch=brave_safe_search,
126 )
127 except ImportError:
128 logger.warning(
129 "Warning: FullSearchResults not available. Full content retrieval disabled."
130 )
131 self.include_full_content = False
133 def _get_previews(self, query: str) -> List[Dict[str, Any]]:
134 """
135 Get preview information from Brave Search.
137 Args:
138 query: The search query
140 Returns:
141 List of preview dictionaries
142 """
143 logger.info("Getting search results from Brave Search")
145 try:
146 # Get search results from Brave Search
147 raw_results = self.engine.run(query[:400])
149 # Parse results if they're in string format
150 if isinstance(raw_results, str):
151 try:
152 import json
154 raw_results = json.loads(raw_results)
155 except json.JSONDecodeError:
156 logger.exception(
157 "Error: Unable to parse BraveSearch response as JSON."
158 )
159 return []
161 # Format results as previews
162 previews = []
163 for i, result in enumerate(raw_results):
164 preview = {
165 "id": i, # Use index as ID
166 "title": result.get("title", ""),
167 "link": result.get("link", ""),
168 "snippet": result.get("snippet", ""),
169 "displayed_link": result.get("link", ""),
170 "position": i,
171 }
173 # Store full Brave result for later
174 preview["_full_result"] = result
176 previews.append(preview)
178 # Store the previews for potential full content retrieval
179 self._search_results = previews
181 return previews
183 except Exception as e:
184 error_msg = str(e)
185 logger.exception("Error getting Brave Search results")
187 # Check for rate limit patterns
188 if (
189 "429" in error_msg
190 or "too many requests" in error_msg.lower()
191 or "rate limit" in error_msg.lower()
192 or "quota" in error_msg.lower()
193 ):
194 raise RateLimitError(
195 f"Brave Search rate limit hit: {error_msg}"
196 )
198 return []
200 def _get_full_content(
201 self, relevant_items: List[Dict[str, Any]]
202 ) -> List[Dict[str, Any]]:
203 """
204 Get full content for the relevant search results.
205 If include_full_content is True and FullSearchResults is available,
206 retrieves full webpage content for the results.
208 Args:
209 relevant_items: List of relevant preview dictionaries
211 Returns:
212 List of result dictionaries with full content if requested
213 """
214 # Check if we should get full content
215 if ( 215 ↛ 219line 215 didn't jump to line 219 because the condition on line 215 was never true
216 hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
217 and search_config.SEARCH_SNIPPETS_ONLY
218 ):
219 logger.info("Snippet-only mode, skipping full content retrieval")
221 # Return the relevant items with their full Brave information
222 results = []
223 for item in relevant_items:
224 # Use the full result if available, otherwise use the preview
225 if "_full_result" in item:
226 result = item["_full_result"]
227 # Remove temporary field
228 if "_full_result" in result:
229 del result["_full_result"]
230 else:
231 result = item
233 results.append(result)
235 return results
237 # If full content retrieval is enabled
238 if self.include_full_content and hasattr(self, "full_search"): 238 ↛ 239line 238 didn't jump to line 239 because the condition on line 238 was never true
239 logger.info("Retrieving full webpage content")
241 try:
242 # Use FullSearchResults to get full content
243 results_with_content = self.full_search._get_full_content(
244 relevant_items
245 )
247 return results_with_content
249 except Exception:
250 logger.exception("Error retrieving full content")
251 # Fall back to returning the items without full content
253 # Return items with their full Brave information
254 results = []
255 for item in relevant_items:
256 # Use the full result if available, otherwise use the preview
257 if "_full_result" in item: 257 ↛ 263line 257 didn't jump to line 263 because the condition on line 257 was always true
258 result = item["_full_result"].copy()
259 # Remove temporary field
260 if "_full_result" in result: 260 ↛ 261line 260 didn't jump to line 261 because the condition on line 260 was never true
261 del result["_full_result"]
262 else:
263 result = item.copy()
264 if "_full_result" in result:
265 del result["_full_result"]
267 results.append(result)
269 return results
271 def run(
272 self, query: str, research_context: Dict[str, Any] | None = None
273 ) -> List[Dict[str, Any]]:
274 """
275 Execute a search using Brave Search with the two-phase approach.
277 Args:
278 query: The search query
279 research_context: Context from previous research to use.
281 Returns:
282 List of search results
283 """
284 logger.info("---Execute a search using Brave Search---")
286 # Use the implementation from the parent class which handles all phases
287 results = super().run(query, research_context=research_context)
289 # Clean up
290 if hasattr(self, "_search_results"):
291 del self._search_results
293 return results