Coverage for src / local_deep_research / web_search_engines / engines / search_engine_tavily.py: 97%

62 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1from typing import Any, Dict, List, Optional 

2 

3import requests 

4from langchain_core.language_models import BaseLLM 

5from loguru import logger 

6 

7from ...security.safe_requests import safe_post 

8from ..rate_limiting import RateLimitError 

9from ..search_engine_base import BaseSearchEngine 

10 

11 

12class TavilySearchEngine(BaseSearchEngine): 

13 """Tavily search engine implementation with two-phase approach""" 

14 

15 # Mark as public search engine 

16 is_public = True 

17 # Mark as generic search engine (general web search) 

18 is_generic = True 

19 

20 def __init__( 

21 self, 

22 max_results: int = 10, 

23 region: str = "US", 

24 time_period: str = "y", 

25 safe_search: bool = True, 

26 search_language: str = "English", 

27 api_key: Optional[str] = None, 

28 llm: Optional[BaseLLM] = None, 

29 include_full_content: bool = True, 

30 max_filtered_results: Optional[int] = None, 

31 search_depth: str = "basic", 

32 include_domains: Optional[List[str]] = None, 

33 exclude_domains: Optional[List[str]] = None, 

34 settings_snapshot: Optional[Dict[str, Any]] = None, 

35 **kwargs, 

36 ): 

37 """ 

38 Initialize the Tavily search engine. 

39 

40 Args: 

41 max_results: Maximum number of search results 

42 region: Region code for search results (not used by Tavily currently) 

43 time_period: Time period for search results (not used by Tavily currently) 

44 safe_search: Whether to enable safe search (not used by Tavily currently) 

45 search_language: Language for search results (not used by Tavily currently) 

46 api_key: Tavily API key (can also be set via LDR_SEARCH_ENGINE_WEB_TAVILY_API_KEY env var or in UI settings) 

47 llm: Language model for relevance filtering 

48 include_full_content: Whether to include full webpage content in results 

49 max_filtered_results: Maximum number of results to keep after filtering 

50 search_depth: "basic" or "advanced" - controls search quality vs speed 

51 include_domains: List of domains to include in search 

52 exclude_domains: List of domains to exclude from search 

53 settings_snapshot: Settings snapshot for thread context 

54 **kwargs: Additional parameters (ignored but accepted for compatibility) 

55 """ 

56 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results 

57 super().__init__( 

58 llm=llm, 

59 max_filtered_results=max_filtered_results, 

60 max_results=max_results, 

61 include_full_content=include_full_content, 

62 settings_snapshot=settings_snapshot, 

63 ) 

64 self.search_depth = search_depth 

65 self.include_domains = include_domains or [] 

66 self.exclude_domains = exclude_domains or [] 

67 

68 # Get API key - check params, settings, or env vars 

69 tavily_api_key = self._resolve_api_key( 

70 api_key, 

71 "search.engine.web.tavily.api_key", 

72 engine_name="Tavily", 

73 settings_snapshot=settings_snapshot, 

74 ) 

75 

76 self.api_key = tavily_api_key 

77 self.base_url = "https://api.tavily.com" 

78 

79 # If full content is requested, initialize FullSearchResults 

80 if include_full_content: 

81 # Create a simple wrapper for Tavily API calls 

82 class TavilyWrapper: 

83 def __init__(self, parent): 

84 self.parent = parent 

85 

86 def run(self, query): 

87 return self.parent._get_previews(query) 

88 

89 self._init_full_search( 

90 web_search=TavilyWrapper(self), 

91 language=search_language, 

92 max_results=max_results, 

93 region=region, 

94 time_period=time_period, 

95 safe_search="moderate" if safe_search else "off", 

96 ) 

97 

98 def _get_previews(self, query: str) -> List[Dict[str, Any]]: 

99 """ 

100 Get preview information from Tavily Search. 

101 

102 Args: 

103 query: The search query 

104 

105 Returns: 

106 List of preview dictionaries 

107 """ 

108 logger.info("Getting search results from Tavily") 

109 

110 try: 

111 # Prepare the request payload 

112 payload = { 

113 "api_key": self.api_key, 

114 "query": query[:400], # Limit query length 

115 "search_depth": self.search_depth, 

116 "max_results": min( 

117 20, self.max_results 

118 ), # Tavily has a max limit 

119 "include_answer": False, # We don't need the AI answer 

120 "include_images": False, # We don't need images 

121 "include_raw_content": self.include_full_content, # Get content if requested 

122 } 

123 

124 # Add domain filters if specified 

125 if self.include_domains: 

126 payload["include_domains"] = self.include_domains 

127 if self.exclude_domains: 

128 payload["exclude_domains"] = self.exclude_domains 

129 

130 # Apply rate limiting before request 

131 self._last_wait_time = self.rate_tracker.apply_rate_limit( 

132 self.engine_type 

133 ) 

134 

135 # Make the API request 

136 response = safe_post( 

137 f"{self.base_url}/search", 

138 json=payload, 

139 headers={"Content-Type": "application/json"}, 

140 timeout=30, 

141 ) 

142 

143 # Check for rate limits 

144 self._raise_if_rate_limit(response.status_code) 

145 

146 response.raise_for_status() 

147 

148 # Parse the response 

149 data = response.json() 

150 results = data.get("results", []) 

151 

152 # Format results as previews 

153 previews = [] 

154 for i, result in enumerate(results): 

155 preview = { 

156 "id": result.get("url", str(i)), # Use URL as ID 

157 "title": result.get("title", ""), 

158 "link": result.get("url", ""), 

159 "snippet": result.get( 

160 "content", "" 

161 ), # Tavily calls it "content" 

162 "displayed_link": result.get("url", ""), 

163 "position": i, 

164 } 

165 

166 # Store full Tavily result for later 

167 preview["_full_result"] = result 

168 

169 previews.append(preview) 

170 

171 # Store the previews for potential full content retrieval 

172 self._search_results = previews 

173 

174 return previews 

175 

176 except RateLimitError: 

177 raise # Re-raise rate limit errors 

178 except requests.exceptions.RequestException as e: 

179 logger.exception("Error getting Tavily results") 

180 self._raise_if_rate_limit(e) 

181 return [] 

182 except Exception: 

183 logger.exception("Unexpected error getting Tavily results") 

184 return [] 

185 

186 def _get_full_content( 

187 self, relevant_items: List[Dict[str, Any]] 

188 ) -> List[Dict[str, Any]]: 

189 """ 

190 Get full content for the relevant search results. 

191 Extends base implementation to include Tavily's raw_content. 

192 

193 Args: 

194 relevant_items: List of relevant preview dictionaries 

195 

196 Returns: 

197 List of result dictionaries with full content if available 

198 """ 

199 results = super()._get_full_content(relevant_items) 

200 

201 # If Tavily provided raw_content and full content is requested, use it 

202 if self.include_full_content: 

203 for result in results: 

204 if "raw_content" in result: 204 ↛ 203line 204 didn't jump to line 203 because the condition on line 204 was always true

205 result["content"] = result.get( 

206 "raw_content", result.get("content", "") 

207 ) 

208 

209 return results