Coverage for src / local_deep_research / web_search_engines / engines / search_engine_serpapi.py: 71%
79 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1from loguru import logger
2from typing import Any, Dict, List, Optional
4from langchain_community.utilities import SerpAPIWrapper
5from langchain_core.language_models import BaseLLM
7from ...config import search_config
8from ..search_engine_base import BaseSearchEngine
11class SerpAPISearchEngine(BaseSearchEngine):
12 """Google search engine implementation using SerpAPI with two-phase approach"""
14 # Mark as public search engine
15 is_public = True
16 # Mark as generic search engine (general web search via Google)
17 is_generic = True
19 def __init__(
20 self,
21 max_results: int = 10,
22 region: str = "us",
23 time_period: str = "y",
24 safe_search: bool = True,
25 search_language: str = "English",
26 api_key: Optional[str] = None,
27 language_code_mapping: Optional[Dict[str, str]] = None,
28 llm: Optional[BaseLLM] = None,
29 include_full_content: bool = False,
30 max_filtered_results: Optional[int] = None,
31 settings_snapshot: Optional[Dict[str, Any]] = None,
32 **kwargs,
33 ):
34 """
35 Initialize the SerpAPI search engine.
37 Args:
38 max_results: Maximum number of search results
39 region: Region code for search results
40 time_period: Time period for search results
41 safe_search: Whether to enable safe search
42 search_language: Language for search results
43 api_key: SerpAPI API key (can also be set in SERP_API_KEY env)
44 language_code_mapping: Mapping from language names to codes
45 llm: Language model for relevance filtering
46 include_full_content: Whether to include full webpage content in results
47 max_filtered_results: Maximum number of results to keep after filtering
48 settings_snapshot: Settings snapshot for thread context
49 **kwargs: Additional parameters (ignored but accepted for compatibility)
50 """
51 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
52 super().__init__(
53 llm=llm,
54 max_filtered_results=max_filtered_results,
55 max_results=max_results,
56 )
57 self.include_full_content = include_full_content
59 # Set up language code mapping
60 if language_code_mapping is None:
61 language_code_mapping = {
62 "english": "en",
63 "spanish": "es",
64 "chinese": "zh",
65 "hindi": "hi",
66 "french": "fr",
67 "arabic": "ar",
68 "bengali": "bn",
69 "portuguese": "pt",
70 "russian": "ru",
71 }
73 # Get API key - check params, env vars, or database
74 from ...config.search_config import get_setting_from_snapshot
76 serpapi_api_key = api_key
77 if not serpapi_api_key:
78 serpapi_api_key = get_setting_from_snapshot(
79 "search.engine.web.serpapi.api_key",
80 settings_snapshot=settings_snapshot,
81 )
83 if not serpapi_api_key:
84 raise ValueError(
85 "SerpAPI key not found. Please provide api_key parameter, set the SERP_API_KEY environment variable, or set it in the UI settings."
86 )
88 # Get language code
89 language_code = language_code_mapping.get(search_language.lower(), "en")
91 # Initialize SerpAPI wrapper
92 self.engine = SerpAPIWrapper(
93 serpapi_api_key=serpapi_api_key,
94 params={
95 "engine": "google",
96 "hl": language_code,
97 "gl": region,
98 "safe": "active" if safe_search else "off",
99 "tbs": f"qdr:{time_period}",
100 "num": max_results,
101 },
102 )
104 # If full content is requested, initialize FullSearchResults
105 if include_full_content:
106 # Import FullSearchResults only if needed
107 try:
108 from .full_search import FullSearchResults
110 self.full_search = FullSearchResults(
111 llm=llm,
112 web_search=self.engine,
113 language=search_language,
114 max_results=max_results,
115 region=region,
116 time=time_period,
117 safesearch="Moderate" if safe_search else "Off",
118 )
119 except ImportError:
120 logger.warning(
121 "Warning: FullSearchResults not available. Full content retrieval disabled."
122 )
123 self.include_full_content = False
125 def _get_previews(self, query: str) -> List[Dict[str, Any]]:
126 """
127 Get preview information from SerpAPI.
129 Args:
130 query: The search query
132 Returns:
133 List of preview dictionaries
134 """
135 logger.info("Getting search results from SerpAPI")
137 try:
138 # Get search results from SerpAPI
139 organic_results = self.engine.results(query).get(
140 "organic_results", []
141 )
143 # Format results as previews
144 previews = []
145 for result in organic_results:
146 preview = {
147 "id": result.get(
148 "position", len(previews)
149 ), # Use position as ID
150 "title": result.get("title", ""),
151 "link": result.get("link", ""),
152 "snippet": result.get("snippet", ""),
153 "displayed_link": result.get("displayed_link", ""),
154 "position": result.get("position"),
155 }
157 # Store full SerpAPI result for later
158 preview["_full_result"] = result
160 previews.append(preview)
162 # Store the previews for potential full content retrieval
163 self._search_results = previews
165 return previews
167 except Exception:
168 logger.exception("Error getting SerpAPI results")
169 return []
171 def _get_full_content(
172 self, relevant_items: List[Dict[str, Any]]
173 ) -> List[Dict[str, Any]]:
174 """
175 Get full content for the relevant search results.
176 If include_full_content is True and FullSearchResults is available,
177 retrieves full webpage content for the results.
179 Args:
180 relevant_items: List of relevant preview dictionaries
182 Returns:
183 List of result dictionaries with full content if requested
184 """
185 # Check if we should get full content
186 if ( 186 ↛ 190line 186 didn't jump to line 190 because the condition on line 186 was never true
187 hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
188 and search_config.SEARCH_SNIPPETS_ONLY
189 ):
190 logger.info("Snippet-only mode, skipping full content retrieval")
192 # Return the relevant items with their full SerpAPI information
193 results = []
194 for item in relevant_items:
195 # Use the full result if available, otherwise use the preview
196 if "_full_result" in item:
197 result = item["_full_result"]
198 # Remove temporary field
199 if "_full_result" in result:
200 del result["_full_result"]
201 else:
202 result = item
204 results.append(result)
206 return results
208 # If full content retrieval is enabled
209 if self.include_full_content and hasattr(self, "full_search"): 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true
210 logger.info("Retrieving full webpage content")
212 try:
213 # Use FullSearchResults to get full content
214 # This is a simplified approach - in a real implementation,
215 # you would need to fetch and process the URLs
216 results_with_content = self.full_search._get_full_content(
217 relevant_items
218 )
220 return results_with_content
222 except Exception as e:
223 logger.info(f"Error retrieving full content: {e}")
224 # Fall back to returning the items without full content
226 # Return items with their full SerpAPI information
227 results = []
228 for item in relevant_items:
229 # Use the full result if available, otherwise use the preview
230 if "_full_result" in item:
231 result = item["_full_result"].copy()
232 # Remove temporary field
233 if "_full_result" in result: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true
234 del result["_full_result"]
235 else:
236 result = item.copy()
237 if "_full_result" in result: 237 ↛ 238line 237 didn't jump to line 238 because the condition on line 237 was never true
238 del result["_full_result"]
240 results.append(result)
242 return results
244 def run(
245 self, query: str, research_context: Dict[str, Any] | None = None
246 ) -> List[Dict[str, Any]]:
247 """
248 Execute a search using SerpAPI with the two-phase approach.
250 Args:
251 query: The search query
252 research_context: Context from previous research to use.
254 Returns:
255 List of search results
256 """
257 logger.info("---Execute a search using SerpAPI (Google)---")
259 # Use the implementation from the parent class which handles all phases
260 results = super().run(query, research_context=research_context)
262 # Clean up
263 if hasattr(self, "_search_results"): 263 ↛ 266line 263 didn't jump to line 266 because the condition on line 263 was always true
264 del self._search_results
266 return results