Coverage for src / local_deep_research / web_search_engines / engines / search_engine_serpapi.py: 71%

79 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1from loguru import logger 

2from typing import Any, Dict, List, Optional 

3 

4from langchain_community.utilities import SerpAPIWrapper 

5from langchain_core.language_models import BaseLLM 

6 

7from ...config import search_config 

8from ..search_engine_base import BaseSearchEngine 

9 

10 

11class SerpAPISearchEngine(BaseSearchEngine): 

12 """Google search engine implementation using SerpAPI with two-phase approach""" 

13 

14 # Mark as public search engine 

15 is_public = True 

16 # Mark as generic search engine (general web search via Google) 

17 is_generic = True 

18 

19 def __init__( 

20 self, 

21 max_results: int = 10, 

22 region: str = "us", 

23 time_period: str = "y", 

24 safe_search: bool = True, 

25 search_language: str = "English", 

26 api_key: Optional[str] = None, 

27 language_code_mapping: Optional[Dict[str, str]] = None, 

28 llm: Optional[BaseLLM] = None, 

29 include_full_content: bool = False, 

30 max_filtered_results: Optional[int] = None, 

31 settings_snapshot: Optional[Dict[str, Any]] = None, 

32 **kwargs, 

33 ): 

34 """ 

35 Initialize the SerpAPI search engine. 

36 

37 Args: 

38 max_results: Maximum number of search results 

39 region: Region code for search results 

40 time_period: Time period for search results 

41 safe_search: Whether to enable safe search 

42 search_language: Language for search results 

43 api_key: SerpAPI API key (can also be set in SERP_API_KEY env) 

44 language_code_mapping: Mapping from language names to codes 

45 llm: Language model for relevance filtering 

46 include_full_content: Whether to include full webpage content in results 

47 max_filtered_results: Maximum number of results to keep after filtering 

48 settings_snapshot: Settings snapshot for thread context 

49 **kwargs: Additional parameters (ignored but accepted for compatibility) 

50 """ 

51 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results 

52 super().__init__( 

53 llm=llm, 

54 max_filtered_results=max_filtered_results, 

55 max_results=max_results, 

56 ) 

57 self.include_full_content = include_full_content 

58 

59 # Set up language code mapping 

60 if language_code_mapping is None: 

61 language_code_mapping = { 

62 "english": "en", 

63 "spanish": "es", 

64 "chinese": "zh", 

65 "hindi": "hi", 

66 "french": "fr", 

67 "arabic": "ar", 

68 "bengali": "bn", 

69 "portuguese": "pt", 

70 "russian": "ru", 

71 } 

72 

73 # Get API key - check params, env vars, or database 

74 from ...config.search_config import get_setting_from_snapshot 

75 

76 serpapi_api_key = api_key 

77 if not serpapi_api_key: 

78 serpapi_api_key = get_setting_from_snapshot( 

79 "search.engine.web.serpapi.api_key", 

80 settings_snapshot=settings_snapshot, 

81 ) 

82 

83 if not serpapi_api_key: 

84 raise ValueError( 

85 "SerpAPI key not found. Please provide api_key parameter, set the SERP_API_KEY environment variable, or set it in the UI settings." 

86 ) 

87 

88 # Get language code 

89 language_code = language_code_mapping.get(search_language.lower(), "en") 

90 

91 # Initialize SerpAPI wrapper 

92 self.engine = SerpAPIWrapper( 

93 serpapi_api_key=serpapi_api_key, 

94 params={ 

95 "engine": "google", 

96 "hl": language_code, 

97 "gl": region, 

98 "safe": "active" if safe_search else "off", 

99 "tbs": f"qdr:{time_period}", 

100 "num": max_results, 

101 }, 

102 ) 

103 

104 # If full content is requested, initialize FullSearchResults 

105 if include_full_content: 

106 # Import FullSearchResults only if needed 

107 try: 

108 from .full_search import FullSearchResults 

109 

110 self.full_search = FullSearchResults( 

111 llm=llm, 

112 web_search=self.engine, 

113 language=search_language, 

114 max_results=max_results, 

115 region=region, 

116 time=time_period, 

117 safesearch="Moderate" if safe_search else "Off", 

118 ) 

119 except ImportError: 

120 logger.warning( 

121 "Warning: FullSearchResults not available. Full content retrieval disabled." 

122 ) 

123 self.include_full_content = False 

124 

125 def _get_previews(self, query: str) -> List[Dict[str, Any]]: 

126 """ 

127 Get preview information from SerpAPI. 

128 

129 Args: 

130 query: The search query 

131 

132 Returns: 

133 List of preview dictionaries 

134 """ 

135 logger.info("Getting search results from SerpAPI") 

136 

137 try: 

138 # Get search results from SerpAPI 

139 organic_results = self.engine.results(query).get( 

140 "organic_results", [] 

141 ) 

142 

143 # Format results as previews 

144 previews = [] 

145 for result in organic_results: 

146 preview = { 

147 "id": result.get( 

148 "position", len(previews) 

149 ), # Use position as ID 

150 "title": result.get("title", ""), 

151 "link": result.get("link", ""), 

152 "snippet": result.get("snippet", ""), 

153 "displayed_link": result.get("displayed_link", ""), 

154 "position": result.get("position"), 

155 } 

156 

157 # Store full SerpAPI result for later 

158 preview["_full_result"] = result 

159 

160 previews.append(preview) 

161 

162 # Store the previews for potential full content retrieval 

163 self._search_results = previews 

164 

165 return previews 

166 

167 except Exception: 

168 logger.exception("Error getting SerpAPI results") 

169 return [] 

170 

171 def _get_full_content( 

172 self, relevant_items: List[Dict[str, Any]] 

173 ) -> List[Dict[str, Any]]: 

174 """ 

175 Get full content for the relevant search results. 

176 If include_full_content is True and FullSearchResults is available, 

177 retrieves full webpage content for the results. 

178 

179 Args: 

180 relevant_items: List of relevant preview dictionaries 

181 

182 Returns: 

183 List of result dictionaries with full content if requested 

184 """ 

185 # Check if we should get full content 

186 if ( 186 ↛ 190line 186 didn't jump to line 190 because the condition on line 186 was never true

187 hasattr(search_config, "SEARCH_SNIPPETS_ONLY") 

188 and search_config.SEARCH_SNIPPETS_ONLY 

189 ): 

190 logger.info("Snippet-only mode, skipping full content retrieval") 

191 

192 # Return the relevant items with their full SerpAPI information 

193 results = [] 

194 for item in relevant_items: 

195 # Use the full result if available, otherwise use the preview 

196 if "_full_result" in item: 

197 result = item["_full_result"] 

198 # Remove temporary field 

199 if "_full_result" in result: 

200 del result["_full_result"] 

201 else: 

202 result = item 

203 

204 results.append(result) 

205 

206 return results 

207 

208 # If full content retrieval is enabled 

209 if self.include_full_content and hasattr(self, "full_search"): 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true

210 logger.info("Retrieving full webpage content") 

211 

212 try: 

213 # Use FullSearchResults to get full content 

214 # This is a simplified approach - in a real implementation, 

215 # you would need to fetch and process the URLs 

216 results_with_content = self.full_search._get_full_content( 

217 relevant_items 

218 ) 

219 

220 return results_with_content 

221 

222 except Exception as e: 

223 logger.info(f"Error retrieving full content: {e}") 

224 # Fall back to returning the items without full content 

225 

226 # Return items with their full SerpAPI information 

227 results = [] 

228 for item in relevant_items: 

229 # Use the full result if available, otherwise use the preview 

230 if "_full_result" in item: 

231 result = item["_full_result"].copy() 

232 # Remove temporary field 

233 if "_full_result" in result: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true

234 del result["_full_result"] 

235 else: 

236 result = item.copy() 

237 if "_full_result" in result: 237 ↛ 238line 237 didn't jump to line 238 because the condition on line 237 was never true

238 del result["_full_result"] 

239 

240 results.append(result) 

241 

242 return results 

243 

244 def run( 

245 self, query: str, research_context: Dict[str, Any] | None = None 

246 ) -> List[Dict[str, Any]]: 

247 """ 

248 Execute a search using SerpAPI with the two-phase approach. 

249 

250 Args: 

251 query: The search query 

252 research_context: Context from previous research to use. 

253 

254 Returns: 

255 List of search results 

256 """ 

257 logger.info("---Execute a search using SerpAPI (Google)---") 

258 

259 # Use the implementation from the parent class which handles all phases 

260 results = super().run(query, research_context=research_context) 

261 

262 # Clean up 

263 if hasattr(self, "_search_results"): 263 ↛ 266line 263 didn't jump to line 266 because the condition on line 263 was always true

264 del self._search_results 

265 

266 return results