Coverage for src / local_deep_research / web_search_engines / engines / search_engine_ddg.py: 100%
41 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1from typing import Any, Dict, List, Optional
3from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
4from langchain_core.language_models import BaseLLM
5from loguru import logger
7from ..rate_limiting import RateLimitError
8from ..search_engine_base import BaseSearchEngine
11class DuckDuckGoSearchEngine(BaseSearchEngine):
12 """DuckDuckGo search engine implementation with two-phase retrieval"""
14 # Mark as public search engine
15 is_public = True
16 # Mark as generic search engine (general web search)
17 is_generic = True
19 def __init__(
20 self,
21 max_results: int = 10,
22 region: str = "us",
23 safe_search: bool = True,
24 llm: Optional[BaseLLM] = None,
25 language: str = "English",
26 include_full_content: bool = False,
27 max_filtered_results=5,
28 settings_snapshot: Optional[Dict[str, Any]] = None,
29 ):
30 """
31 Initialize the DuckDuckGo search engine.
33 Args:
34 max_results: Maximum number of search results
35 region: Region code for search results
36 safe_search: Whether to enable safe search
37 llm: Language model for relevance filtering
38 language: Language for content processing
39 include_full_content: Whether to include full webpage content in results
40 """
41 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
42 super().__init__(
43 llm=llm,
44 max_filtered_results=max_filtered_results,
45 max_results=max_results,
46 include_full_content=include_full_content,
47 settings_snapshot=settings_snapshot,
48 )
49 self.region = region
50 self.safe_search = safe_search
51 self.language = language
53 # Initialize the DuckDuckGo wrapper
54 self.engine = DuckDuckGoSearchAPIWrapper(
55 region=region,
56 max_results=max_results,
57 safesearch="moderate" if safe_search else "off",
58 )
60 # Initialize FullSearchResults if full content is requested
61 self._init_full_search(
62 web_search=self.engine,
63 language=language,
64 max_results=max_results,
65 region=region,
66 time_period="y",
67 safe_search="Moderate" if safe_search else "Off",
68 )
70 def _get_previews(self, query: str) -> List[Dict[str, Any]]:
71 """
72 Get preview information (titles and snippets) for initial search results.
74 Args:
75 query: The search query
77 Returns:
78 List of preview dictionaries with 'id', 'title', and 'snippet' keys
79 """
80 try:
81 # Get search results from DuckDuckGo
82 results = self.engine.results(query, max_results=self.max_results)
84 if not isinstance(results, list):
85 return []
87 # Process results to get previews
88 previews = []
89 for i, result in enumerate(results):
90 preview = {
91 "id": result.get("link"), # Use URL as ID for DDG
92 "title": result.get("title", ""),
93 "snippet": result.get("snippet", ""),
94 "link": result.get("link", ""),
95 }
97 previews.append(preview)
99 return previews
101 except Exception as e:
102 error_msg = str(e)
103 sanitized = self._sanitize_error_message(error_msg)
104 logger.exception("Error getting DuckDuckGo previews: {}", sanitized)
106 # Check for known rate limit patterns
107 if "202 Ratelimit" in error_msg or "ratelimit" in error_msg.lower():
108 raise RateLimitError(f"DuckDuckGo rate limit hit: {sanitized}")
109 if "403" in error_msg or "forbidden" in error_msg.lower():
110 raise RateLimitError(
111 f"DuckDuckGo access forbidden (possible rate limit): {sanitized}"
112 )
113 if (
114 "timeout" in error_msg.lower()
115 or "timed out" in error_msg.lower()
116 ):
117 # Timeouts can sometimes indicate rate limiting
118 raise RateLimitError(
119 f"DuckDuckGo timeout (possible rate limit): {sanitized}"
120 )
122 return []
124 def _get_full_content(
125 self, relevant_items: List[Dict[str, Any]]
126 ) -> List[Dict[str, Any]]:
127 """
128 Get full content for the relevant items by using FullSearchResults.
130 Args:
131 relevant_items: List of relevant preview dictionaries
133 Returns:
134 List of result dictionaries with full content
135 """
136 # If we have FullSearchResults, use it to get full content
137 if hasattr(self, "full_search"):
138 return self.full_search._get_full_content(relevant_items)
140 # Otherwise, just return the relevant items without full content
141 return relevant_items