Coverage for src / local_deep_research / web_search_engines / engines / search_engine_brave.py: 66%

92 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1from typing import Any, Dict, List, Optional 

2 

3from langchain_community.tools import BraveSearch 

4from langchain_core.language_models import BaseLLM 

5from loguru import logger 

6 

7from ...config import search_config 

8from ..rate_limiting import RateLimitError 

9from ..search_engine_base import BaseSearchEngine 

10 

11 

12class BraveSearchEngine(BaseSearchEngine): 

13 """Brave search engine implementation with two-phase approach""" 

14 

15 # Mark as public search engine 

16 is_public = True 

17 # Mark as generic search engine (general web search) 

18 is_generic = True 

19 

20 def __init__( 

21 self, 

22 max_results: int = 10, 

23 region: str = "US", 

24 time_period: str = "y", 

25 safe_search: bool = True, 

26 search_language: str = "English", 

27 api_key: Optional[str] = None, 

28 language_code_mapping: Optional[Dict[str, str]] = None, 

29 llm: Optional[BaseLLM] = None, 

30 include_full_content: bool = True, 

31 max_filtered_results: Optional[int] = None, 

32 settings_snapshot: Optional[Dict[str, Any]] = None, 

33 **kwargs, 

34 ): 

35 """ 

36 Initialize the Brave search engine. 

37 

38 Args: 

39 max_results: Maximum number of search results 

40 region: Region code for search results 

41 time_period: Time period for search results 

42 safe_search: Whether to enable safe search 

43 search_language: Language for search results 

44 api_key: Brave Search API key (can also be set in BRAVE_API_KEY env) 

45 language_code_mapping: Mapping from language names to codes 

46 llm: Language model for relevance filtering 

47 include_full_content: Whether to include full webpage content in results 

48 max_filtered_results: Maximum number of results to keep after filtering 

49 settings_snapshot: Settings snapshot for thread context 

50 **kwargs: Additional parameters (ignored but accepted for compatibility) 

51 """ 

52 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results 

53 super().__init__( 

54 llm=llm, 

55 max_filtered_results=max_filtered_results, 

56 max_results=max_results, 

57 ) 

58 self.include_full_content = include_full_content 

59 

60 # Set up language code mapping 

61 if language_code_mapping is None: 61 ↛ 75line 61 didn't jump to line 75 because the condition on line 61 was always true

62 language_code_mapping = { 

63 "english": "en", 

64 "spanish": "es", 

65 "chinese": "zh", 

66 "hindi": "hi", 

67 "french": "fr", 

68 "arabic": "ar", 

69 "bengali": "bn", 

70 "portuguese": "pt", 

71 "russian": "ru", 

72 } 

73 

74 # Get API key - check params, env vars, or database 

75 from ...config.search_config import get_setting_from_snapshot 

76 

77 brave_api_key = api_key 

78 if not brave_api_key: 

79 brave_api_key = get_setting_from_snapshot( 

80 "search.engine.web.brave.api_key", 

81 settings_snapshot=settings_snapshot, 

82 ) 

83 

84 if not brave_api_key: 

85 raise ValueError( 

86 "Brave API key not found. Please provide api_key parameter, set the BRAVE_API_KEY environment variable, or set it in the UI settings." 

87 ) 

88 

89 # Get language code 

90 language_code = language_code_mapping.get(search_language.lower(), "en") 

91 

92 # Convert time period format to Brave's format 

93 brave_time_period = f"p{time_period}" 

94 

95 # Convert safe search to Brave's format 

96 brave_safe_search = "moderate" if safe_search else "off" 

97 

98 # Initialize Brave Search 

99 self.engine = BraveSearch.from_api_key( 

100 api_key=brave_api_key, 

101 search_kwargs={ 

102 "count": min(20, max_results), 

103 "country": region.upper(), 

104 "search_lang": language_code, 

105 "safesearch": brave_safe_search, 

106 "freshness": brave_time_period, 

107 }, 

108 ) 

109 

110 # User agent is not needed for Brave Search API 

111 

112 # If full content is requested, initialize FullSearchResults 

113 if include_full_content: 

114 # Import FullSearchResults only if needed 

115 try: 

116 from .full_search import FullSearchResults 

117 

118 self.full_search = FullSearchResults( 

119 llm=llm, 

120 web_search=self.engine, 

121 language=search_language, 

122 max_results=max_results, 

123 region=region, 

124 time=time_period, 

125 safesearch=brave_safe_search, 

126 ) 

127 except ImportError: 

128 logger.warning( 

129 "Warning: FullSearchResults not available. Full content retrieval disabled." 

130 ) 

131 self.include_full_content = False 

132 

133 def _get_previews(self, query: str) -> List[Dict[str, Any]]: 

134 """ 

135 Get preview information from Brave Search. 

136 

137 Args: 

138 query: The search query 

139 

140 Returns: 

141 List of preview dictionaries 

142 """ 

143 logger.info("Getting search results from Brave Search") 

144 

145 try: 

146 # Get search results from Brave Search 

147 raw_results = self.engine.run(query[:400]) 

148 

149 # Parse results if they're in string format 

150 if isinstance(raw_results, str): 

151 try: 

152 import json 

153 

154 raw_results = json.loads(raw_results) 

155 except json.JSONDecodeError: 

156 logger.exception( 

157 "Error: Unable to parse BraveSearch response as JSON." 

158 ) 

159 return [] 

160 

161 # Format results as previews 

162 previews = [] 

163 for i, result in enumerate(raw_results): 

164 preview = { 

165 "id": i, # Use index as ID 

166 "title": result.get("title", ""), 

167 "link": result.get("link", ""), 

168 "snippet": result.get("snippet", ""), 

169 "displayed_link": result.get("link", ""), 

170 "position": i, 

171 } 

172 

173 # Store full Brave result for later 

174 preview["_full_result"] = result 

175 

176 previews.append(preview) 

177 

178 # Store the previews for potential full content retrieval 

179 self._search_results = previews 

180 

181 return previews 

182 

183 except Exception as e: 

184 error_msg = str(e) 

185 logger.exception("Error getting Brave Search results") 

186 

187 # Check for rate limit patterns 

188 if ( 

189 "429" in error_msg 

190 or "too many requests" in error_msg.lower() 

191 or "rate limit" in error_msg.lower() 

192 or "quota" in error_msg.lower() 

193 ): 

194 raise RateLimitError( 

195 f"Brave Search rate limit hit: {error_msg}" 

196 ) 

197 

198 return [] 

199 

200 def _get_full_content( 

201 self, relevant_items: List[Dict[str, Any]] 

202 ) -> List[Dict[str, Any]]: 

203 """ 

204 Get full content for the relevant search results. 

205 If include_full_content is True and FullSearchResults is available, 

206 retrieves full webpage content for the results. 

207 

208 Args: 

209 relevant_items: List of relevant preview dictionaries 

210 

211 Returns: 

212 List of result dictionaries with full content if requested 

213 """ 

214 # Check if we should get full content 

215 if ( 215 ↛ 219line 215 didn't jump to line 219 because the condition on line 215 was never true

216 hasattr(search_config, "SEARCH_SNIPPETS_ONLY") 

217 and search_config.SEARCH_SNIPPETS_ONLY 

218 ): 

219 logger.info("Snippet-only mode, skipping full content retrieval") 

220 

221 # Return the relevant items with their full Brave information 

222 results = [] 

223 for item in relevant_items: 

224 # Use the full result if available, otherwise use the preview 

225 if "_full_result" in item: 

226 result = item["_full_result"] 

227 # Remove temporary field 

228 if "_full_result" in result: 

229 del result["_full_result"] 

230 else: 

231 result = item 

232 

233 results.append(result) 

234 

235 return results 

236 

237 # If full content retrieval is enabled 

238 if self.include_full_content and hasattr(self, "full_search"): 238 ↛ 239line 238 didn't jump to line 239 because the condition on line 238 was never true

239 logger.info("Retrieving full webpage content") 

240 

241 try: 

242 # Use FullSearchResults to get full content 

243 results_with_content = self.full_search._get_full_content( 

244 relevant_items 

245 ) 

246 

247 return results_with_content 

248 

249 except Exception: 

250 logger.exception("Error retrieving full content") 

251 # Fall back to returning the items without full content 

252 

253 # Return items with their full Brave information 

254 results = [] 

255 for item in relevant_items: 

256 # Use the full result if available, otherwise use the preview 

257 if "_full_result" in item: 257 ↛ 263line 257 didn't jump to line 263 because the condition on line 257 was always true

258 result = item["_full_result"].copy() 

259 # Remove temporary field 

260 if "_full_result" in result: 260 ↛ 261line 260 didn't jump to line 261 because the condition on line 260 was never true

261 del result["_full_result"] 

262 else: 

263 result = item.copy() 

264 if "_full_result" in result: 

265 del result["_full_result"] 

266 

267 results.append(result) 

268 

269 return results 

270 

271 def run( 

272 self, query: str, research_context: Dict[str, Any] | None = None 

273 ) -> List[Dict[str, Any]]: 

274 """ 

275 Execute a search using Brave Search with the two-phase approach. 

276 

277 Args: 

278 query: The search query 

279 research_context: Context from previous research to use. 

280 

281 Returns: 

282 List of search results 

283 """ 

284 logger.info("---Execute a search using Brave Search---") 

285 

286 # Use the implementation from the parent class which handles all phases 

287 results = super().run(query, research_context=research_context) 

288 

289 # Clean up 

290 if hasattr(self, "_search_results"): 

291 del self._search_results 

292 

293 return results