Coverage for src / local_deep_research / web_search_engines / engines / search_engine_brave.py: 98%

43 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1from typing import Any, Dict, List, Optional 

2 

3from langchain_community.tools import BraveSearch 

4from langchain_core.language_models import BaseLLM 

5from loguru import logger 

6 

7from ..rate_limiting import RateLimitError 

8from ..search_engine_base import BaseSearchEngine 

9 

10 

11class BraveSearchEngine(BaseSearchEngine): 

12 """Brave search engine implementation with two-phase approach""" 

13 

14 # Mark as public search engine 

15 is_public = True 

16 # Mark as generic search engine (general web search) 

17 is_generic = True 

18 

19 def __init__( 

20 self, 

21 max_results: int = 10, 

22 region: str = "US", 

23 time_period: str = "y", 

24 safe_search: bool = True, 

25 search_language: str = "English", 

26 api_key: Optional[str] = None, 

27 language_code_mapping: Optional[Dict[str, str]] = None, 

28 llm: Optional[BaseLLM] = None, 

29 include_full_content: bool = True, 

30 max_filtered_results: Optional[int] = None, 

31 settings_snapshot: Optional[Dict[str, Any]] = None, 

32 **kwargs, 

33 ): 

34 """ 

35 Initialize the Brave search engine. 

36 

37 Args: 

38 max_results: Maximum number of search results 

39 region: Region code for search results 

40 time_period: Time period for search results 

41 safe_search: Whether to enable safe search 

42 search_language: Language for search results 

43 api_key: Brave Search API key (can also be set via LDR_SEARCH_ENGINE_WEB_BRAVE_API_KEY env var or in UI settings) 

44 language_code_mapping: Mapping from language names to codes 

45 llm: Language model for relevance filtering 

46 include_full_content: Whether to include full webpage content in results 

47 max_filtered_results: Maximum number of results to keep after filtering 

48 settings_snapshot: Settings snapshot for thread context 

49 **kwargs: Additional parameters (ignored but accepted for compatibility) 

50 """ 

51 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results 

52 super().__init__( 

53 llm=llm, 

54 max_filtered_results=max_filtered_results, 

55 max_results=max_results, 

56 include_full_content=include_full_content, 

57 settings_snapshot=settings_snapshot, 

58 ) 

59 

60 # Set up language code mapping 

61 if language_code_mapping is None: 

62 language_code_mapping = { 

63 "english": "en", 

64 "spanish": "es", 

65 "chinese": "zh", 

66 "hindi": "hi", 

67 "french": "fr", 

68 "arabic": "ar", 

69 "bengali": "bn", 

70 "portuguese": "pt", 

71 "russian": "ru", 

72 } 

73 

74 # Get API key - check params, settings, or env vars 

75 brave_api_key = self._resolve_api_key( 

76 api_key, 

77 "search.engine.web.brave.api_key", 

78 engine_name="Brave Search", 

79 settings_snapshot=settings_snapshot, 

80 ) 

81 

82 # Get language code 

83 language_code = language_code_mapping.get(search_language.lower(), "en") 

84 

85 # Convert time period format to Brave's format 

86 brave_time_period = f"p{time_period}" 

87 

88 # Convert safe search to Brave's format 

89 brave_safe_search = "moderate" if safe_search else "off" 

90 

91 # Initialize Brave Search 

92 self.engine = BraveSearch.from_api_key( 

93 api_key=brave_api_key, 

94 search_kwargs={ 

95 "count": min(20, max_results), 

96 "country": region.upper(), 

97 "search_lang": language_code, 

98 "safesearch": brave_safe_search, 

99 "freshness": brave_time_period, 

100 }, 

101 ) 

102 

103 # User agent is not needed for Brave Search API 

104 

105 # If full content is requested, initialize FullSearchResults 

106 self._init_full_search( 

107 web_search=self.engine, 

108 language=search_language, 

109 max_results=max_results, 

110 region=region, 

111 time_period=time_period, 

112 safe_search=brave_safe_search, 

113 ) 

114 

115 def _get_previews(self, query: str) -> List[Dict[str, Any]]: 

116 """ 

117 Get preview information from Brave Search. 

118 

119 Args: 

120 query: The search query 

121 

122 Returns: 

123 List of preview dictionaries 

124 """ 

125 logger.info("Getting search results from Brave Search") 

126 

127 try: 

128 # Get search results from Brave Search 

129 raw_results = self.engine.run(query[:400]) 

130 

131 # Parse results if they're in string format 

132 if isinstance(raw_results, str): 

133 try: 

134 import json 

135 

136 raw_results = json.loads(raw_results) 

137 except json.JSONDecodeError: 

138 logger.exception( 

139 "Error: Unable to parse BraveSearch response as JSON." 

140 ) 

141 return [] 

142 

143 # Format results as previews 

144 previews = [] 

145 for i, result in enumerate(raw_results): 

146 preview = { 

147 "id": i, # Use index as ID 

148 "title": result.get("title", ""), 

149 "link": result.get("link", ""), 

150 "snippet": result.get("snippet", ""), 

151 "displayed_link": result.get("link", ""), 

152 "position": i, 

153 } 

154 

155 # Store full Brave result for later 

156 preview["_full_result"] = result 

157 

158 previews.append(preview) 

159 

160 # Store the previews for potential full content retrieval 

161 self._search_results = previews 

162 

163 return previews 

164 

165 except RateLimitError: 

166 raise 

167 except Exception as e: 

168 logger.exception("Error getting Brave Search results") 

169 self._raise_if_rate_limit(e) 

170 return []