Coverage for src / local_deep_research / web_search_engines / engines / search_engine_serpapi.py: 100%

34 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1from loguru import logger 

2from typing import Any, Dict, List, Optional 

3 

4from langchain_community.utilities import SerpAPIWrapper 

5from langchain_core.language_models import BaseLLM 

6 

7from ..rate_limiting import RateLimitError 

8from ..search_engine_base import BaseSearchEngine 

9 

10 

11class SerpAPISearchEngine(BaseSearchEngine): 

12 """Google search engine implementation using SerpAPI with two-phase approach""" 

13 

14 # Mark as public search engine 

15 is_public = True 

16 # Mark as generic search engine (general web search via Google) 

17 is_generic = True 

18 

19 def __init__( 

20 self, 

21 max_results: int = 10, 

22 region: str = "us", 

23 time_period: str = "y", 

24 safe_search: bool = True, 

25 search_language: str = "English", 

26 api_key: Optional[str] = None, 

27 language_code_mapping: Optional[Dict[str, str]] = None, 

28 llm: Optional[BaseLLM] = None, 

29 include_full_content: bool = False, 

30 max_filtered_results: Optional[int] = None, 

31 settings_snapshot: Optional[Dict[str, Any]] = None, 

32 **kwargs, 

33 ): 

34 """ 

35 Initialize the SerpAPI search engine. 

36 

37 Args: 

38 max_results: Maximum number of search results 

39 region: Region code for search results 

40 time_period: Time period for search results 

41 safe_search: Whether to enable safe search 

42 search_language: Language for search results 

43 api_key: SerpAPI API key (can also be set via LDR_SEARCH_ENGINE_WEB_SERPAPI_API_KEY env var or in UI settings) 

44 language_code_mapping: Mapping from language names to codes 

45 llm: Language model for relevance filtering 

46 include_full_content: Whether to include full webpage content in results 

47 max_filtered_results: Maximum number of results to keep after filtering 

48 settings_snapshot: Settings snapshot for thread context 

49 **kwargs: Additional parameters (ignored but accepted for compatibility) 

50 """ 

51 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results 

52 super().__init__( 

53 llm=llm, 

54 max_filtered_results=max_filtered_results, 

55 max_results=max_results, 

56 include_full_content=include_full_content, 

57 settings_snapshot=settings_snapshot, 

58 ) 

59 

60 # Set up language code mapping 

61 if language_code_mapping is None: 

62 language_code_mapping = { 

63 "english": "en", 

64 "spanish": "es", 

65 "chinese": "zh", 

66 "hindi": "hi", 

67 "french": "fr", 

68 "arabic": "ar", 

69 "bengali": "bn", 

70 "portuguese": "pt", 

71 "russian": "ru", 

72 } 

73 

74 # Get API key - check params, settings, or env vars 

75 serpapi_api_key = self._resolve_api_key( 

76 api_key, 

77 "search.engine.web.serpapi.api_key", 

78 engine_name="SerpAPI", 

79 settings_snapshot=settings_snapshot, 

80 ) 

81 

82 # Get language code 

83 language_code = language_code_mapping.get(search_language.lower(), "en") 

84 

85 # Initialize SerpAPI wrapper 

86 self.engine = SerpAPIWrapper( 

87 serpapi_api_key=serpapi_api_key, 

88 params={ 

89 "engine": "google", 

90 "hl": language_code, 

91 "gl": region, 

92 "safe": "active" if safe_search else "off", 

93 "tbs": f"qdr:{time_period}", 

94 "num": max_results, 

95 }, 

96 ) 

97 

98 # If full content is requested, initialize FullSearchResults 

99 self._init_full_search( 

100 web_search=self.engine, 

101 language=search_language, 

102 max_results=max_results, 

103 region=region, 

104 time_period=time_period, 

105 safe_search="Moderate" if safe_search else "Off", 

106 ) 

107 

108 def _get_previews(self, query: str) -> List[Dict[str, Any]]: 

109 """ 

110 Get preview information from SerpAPI. 

111 

112 Args: 

113 query: The search query 

114 

115 Returns: 

116 List of preview dictionaries 

117 """ 

118 logger.info("Getting search results from SerpAPI") 

119 

120 try: 

121 # Get search results from SerpAPI 

122 organic_results = self.engine.results(query).get( 

123 "organic_results", [] 

124 ) 

125 

126 # Format results as previews 

127 previews: list[dict[str, Any]] = [] 

128 for result in organic_results: 

129 preview = { 

130 "id": result.get( 

131 "position", len(previews) 

132 ), # Use position as ID 

133 "title": result.get("title", ""), 

134 "link": result.get("link", ""), 

135 "snippet": result.get("snippet", ""), 

136 "displayed_link": result.get("displayed_link", ""), 

137 "position": result.get("position"), 

138 } 

139 

140 # Store full SerpAPI result for later 

141 preview["_full_result"] = result 

142 

143 previews.append(preview) 

144 

145 # Store the previews for potential full content retrieval 

146 self._search_results = previews 

147 

148 return previews 

149 

150 except RateLimitError: 

151 raise 

152 except Exception as e: 

153 logger.exception("Error getting SerpAPI results") 

154 self._raise_if_rate_limit(e) 

155 return []