Coverage for src / local_deep_research / web_search_engines / engines / search_engine_brave.py: 98%
43 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1from typing import Any, Dict, List, Optional
3from langchain_community.tools import BraveSearch
4from langchain_core.language_models import BaseLLM
5from loguru import logger
7from ..rate_limiting import RateLimitError
8from ..search_engine_base import BaseSearchEngine
11class BraveSearchEngine(BaseSearchEngine):
12 """Brave search engine implementation with two-phase approach"""
14 # Mark as public search engine
15 is_public = True
16 # Mark as generic search engine (general web search)
17 is_generic = True
19 def __init__(
20 self,
21 max_results: int = 10,
22 region: str = "US",
23 time_period: str = "y",
24 safe_search: bool = True,
25 search_language: str = "English",
26 api_key: Optional[str] = None,
27 language_code_mapping: Optional[Dict[str, str]] = None,
28 llm: Optional[BaseLLM] = None,
29 include_full_content: bool = True,
30 max_filtered_results: Optional[int] = None,
31 settings_snapshot: Optional[Dict[str, Any]] = None,
32 **kwargs,
33 ):
34 """
35 Initialize the Brave search engine.
37 Args:
38 max_results: Maximum number of search results
39 region: Region code for search results
40 time_period: Time period for search results
41 safe_search: Whether to enable safe search
42 search_language: Language for search results
43 api_key: Brave Search API key (can also be set via LDR_SEARCH_ENGINE_WEB_BRAVE_API_KEY env var or in UI settings)
44 language_code_mapping: Mapping from language names to codes
45 llm: Language model for relevance filtering
46 include_full_content: Whether to include full webpage content in results
47 max_filtered_results: Maximum number of results to keep after filtering
48 settings_snapshot: Settings snapshot for thread context
49 **kwargs: Additional parameters (ignored but accepted for compatibility)
50 """
51 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
52 super().__init__(
53 llm=llm,
54 max_filtered_results=max_filtered_results,
55 max_results=max_results,
56 include_full_content=include_full_content,
57 settings_snapshot=settings_snapshot,
58 )
60 # Set up language code mapping
61 if language_code_mapping is None:
62 language_code_mapping = {
63 "english": "en",
64 "spanish": "es",
65 "chinese": "zh",
66 "hindi": "hi",
67 "french": "fr",
68 "arabic": "ar",
69 "bengali": "bn",
70 "portuguese": "pt",
71 "russian": "ru",
72 }
74 # Get API key - check params, settings, or env vars
75 brave_api_key = self._resolve_api_key(
76 api_key,
77 "search.engine.web.brave.api_key",
78 engine_name="Brave Search",
79 settings_snapshot=settings_snapshot,
80 )
82 # Get language code
83 language_code = language_code_mapping.get(search_language.lower(), "en")
85 # Convert time period format to Brave's format
86 brave_time_period = f"p{time_period}"
88 # Convert safe search to Brave's format
89 brave_safe_search = "moderate" if safe_search else "off"
91 # Initialize Brave Search
92 self.engine = BraveSearch.from_api_key(
93 api_key=brave_api_key,
94 search_kwargs={
95 "count": min(20, max_results),
96 "country": region.upper(),
97 "search_lang": language_code,
98 "safesearch": brave_safe_search,
99 "freshness": brave_time_period,
100 },
101 )
103 # User agent is not needed for Brave Search API
105 # If full content is requested, initialize FullSearchResults
106 self._init_full_search(
107 web_search=self.engine,
108 language=search_language,
109 max_results=max_results,
110 region=region,
111 time_period=time_period,
112 safe_search=brave_safe_search,
113 )
115 def _get_previews(self, query: str) -> List[Dict[str, Any]]:
116 """
117 Get preview information from Brave Search.
119 Args:
120 query: The search query
122 Returns:
123 List of preview dictionaries
124 """
125 logger.info("Getting search results from Brave Search")
127 try:
128 # Get search results from Brave Search
129 raw_results = self.engine.run(query[:400])
131 # Parse results if they're in string format
132 if isinstance(raw_results, str):
133 try:
134 import json
136 raw_results = json.loads(raw_results)
137 except json.JSONDecodeError:
138 logger.exception(
139 "Error: Unable to parse BraveSearch response as JSON."
140 )
141 return []
143 # Format results as previews
144 previews = []
145 for i, result in enumerate(raw_results):
146 preview = {
147 "id": i, # Use index as ID
148 "title": result.get("title", ""),
149 "link": result.get("link", ""),
150 "snippet": result.get("snippet", ""),
151 "displayed_link": result.get("link", ""),
152 "position": i,
153 }
155 # Store full Brave result for later
156 preview["_full_result"] = result
158 previews.append(preview)
160 # Store the previews for potential full content retrieval
161 self._search_results = previews
163 return previews
165 except RateLimitError:
166 raise
167 except Exception as e:
168 logger.exception("Error getting Brave Search results")
169 self._raise_if_rate_limit(e)
170 return []