Coverage for src / local_deep_research / web_search_engines / engines / search_engine_tavily.py: 97%
62 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1from typing import Any, Dict, List, Optional
3import requests
4from langchain_core.language_models import BaseLLM
5from loguru import logger
7from ...security.safe_requests import safe_post
8from ..rate_limiting import RateLimitError
9from ..search_engine_base import BaseSearchEngine
12class TavilySearchEngine(BaseSearchEngine):
13 """Tavily search engine implementation with two-phase approach"""
15 # Mark as public search engine
16 is_public = True
17 # Mark as generic search engine (general web search)
18 is_generic = True
20 def __init__(
21 self,
22 max_results: int = 10,
23 region: str = "US",
24 time_period: str = "y",
25 safe_search: bool = True,
26 search_language: str = "English",
27 api_key: Optional[str] = None,
28 llm: Optional[BaseLLM] = None,
29 include_full_content: bool = True,
30 max_filtered_results: Optional[int] = None,
31 search_depth: str = "basic",
32 include_domains: Optional[List[str]] = None,
33 exclude_domains: Optional[List[str]] = None,
34 settings_snapshot: Optional[Dict[str, Any]] = None,
35 **kwargs,
36 ):
37 """
38 Initialize the Tavily search engine.
40 Args:
41 max_results: Maximum number of search results
42 region: Region code for search results (not used by Tavily currently)
43 time_period: Time period for search results (not used by Tavily currently)
44 safe_search: Whether to enable safe search (not used by Tavily currently)
45 search_language: Language for search results (not used by Tavily currently)
46 api_key: Tavily API key (can also be set via LDR_SEARCH_ENGINE_WEB_TAVILY_API_KEY env var or in UI settings)
47 llm: Language model for relevance filtering
48 include_full_content: Whether to include full webpage content in results
49 max_filtered_results: Maximum number of results to keep after filtering
50 search_depth: "basic" or "advanced" - controls search quality vs speed
51 include_domains: List of domains to include in search
52 exclude_domains: List of domains to exclude from search
53 settings_snapshot: Settings snapshot for thread context
54 **kwargs: Additional parameters (ignored but accepted for compatibility)
55 """
56 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
57 super().__init__(
58 llm=llm,
59 max_filtered_results=max_filtered_results,
60 max_results=max_results,
61 include_full_content=include_full_content,
62 settings_snapshot=settings_snapshot,
63 )
64 self.search_depth = search_depth
65 self.include_domains = include_domains or []
66 self.exclude_domains = exclude_domains or []
68 # Get API key - check params, settings, or env vars
69 tavily_api_key = self._resolve_api_key(
70 api_key,
71 "search.engine.web.tavily.api_key",
72 engine_name="Tavily",
73 settings_snapshot=settings_snapshot,
74 )
76 self.api_key = tavily_api_key
77 self.base_url = "https://api.tavily.com"
79 # If full content is requested, initialize FullSearchResults
80 if include_full_content:
81 # Create a simple wrapper for Tavily API calls
82 class TavilyWrapper:
83 def __init__(self, parent):
84 self.parent = parent
86 def run(self, query):
87 return self.parent._get_previews(query)
89 self._init_full_search(
90 web_search=TavilyWrapper(self),
91 language=search_language,
92 max_results=max_results,
93 region=region,
94 time_period=time_period,
95 safe_search="moderate" if safe_search else "off",
96 )
98 def _get_previews(self, query: str) -> List[Dict[str, Any]]:
99 """
100 Get preview information from Tavily Search.
102 Args:
103 query: The search query
105 Returns:
106 List of preview dictionaries
107 """
108 logger.info("Getting search results from Tavily")
110 try:
111 # Prepare the request payload
112 payload = {
113 "api_key": self.api_key,
114 "query": query[:400], # Limit query length
115 "search_depth": self.search_depth,
116 "max_results": min(
117 20, self.max_results
118 ), # Tavily has a max limit
119 "include_answer": False, # We don't need the AI answer
120 "include_images": False, # We don't need images
121 "include_raw_content": self.include_full_content, # Get content if requested
122 }
124 # Add domain filters if specified
125 if self.include_domains:
126 payload["include_domains"] = self.include_domains
127 if self.exclude_domains:
128 payload["exclude_domains"] = self.exclude_domains
130 # Apply rate limiting before request
131 self._last_wait_time = self.rate_tracker.apply_rate_limit(
132 self.engine_type
133 )
135 # Make the API request
136 response = safe_post(
137 f"{self.base_url}/search",
138 json=payload,
139 headers={"Content-Type": "application/json"},
140 timeout=30,
141 )
143 # Check for rate limits
144 self._raise_if_rate_limit(response.status_code)
146 response.raise_for_status()
148 # Parse the response
149 data = response.json()
150 results = data.get("results", [])
152 # Format results as previews
153 previews = []
154 for i, result in enumerate(results):
155 preview = {
156 "id": result.get("url", str(i)), # Use URL as ID
157 "title": result.get("title", ""),
158 "link": result.get("url", ""),
159 "snippet": result.get(
160 "content", ""
161 ), # Tavily calls it "content"
162 "displayed_link": result.get("url", ""),
163 "position": i,
164 }
166 # Store full Tavily result for later
167 preview["_full_result"] = result
169 previews.append(preview)
171 # Store the previews for potential full content retrieval
172 self._search_results = previews
174 return previews
176 except RateLimitError:
177 raise # Re-raise rate limit errors
178 except requests.exceptions.RequestException as e:
179 logger.exception("Error getting Tavily results")
180 self._raise_if_rate_limit(e)
181 return []
182 except Exception:
183 logger.exception("Unexpected error getting Tavily results")
184 return []
186 def _get_full_content(
187 self, relevant_items: List[Dict[str, Any]]
188 ) -> List[Dict[str, Any]]:
189 """
190 Get full content for the relevant search results.
191 Extends base implementation to include Tavily's raw_content.
193 Args:
194 relevant_items: List of relevant preview dictionaries
196 Returns:
197 List of result dictionaries with full content if available
198 """
199 results = super()._get_full_content(relevant_items)
201 # If Tavily provided raw_content and full content is requested, use it
202 if self.include_full_content:
203 for result in results:
204 if "raw_content" in result: 204 ↛ 203line 204 didn't jump to line 203 because the condition on line 204 was always true
205 result["content"] = result.get(
206 "raw_content", result.get("content", "")
207 )
209 return results