Coverage for src / local_deep_research / web_search_engines / engines / search_engine_ddg.py: 0%
46 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1from typing import Any, Dict, List, Optional
3from langchain_community.utilities import DuckDuckGoSearchAPIWrapper
4from langchain_core.language_models import BaseLLM
5from loguru import logger
7from ..rate_limiting import RateLimitError
8from ..search_engine_base import BaseSearchEngine
9from .full_search import FullSearchResults # Import the FullSearchResults class
12class DuckDuckGoSearchEngine(BaseSearchEngine):
13 """DuckDuckGo search engine implementation with two-phase retrieval"""
15 # Mark as public search engine
16 is_public = True
17 # Mark as generic search engine (general web search)
18 is_generic = True
20 def __init__(
21 self,
22 max_results: int = 10,
23 region: str = "us",
24 safe_search: bool = True,
25 llm: Optional[BaseLLM] = None,
26 language: str = "English",
27 include_full_content: bool = False,
28 max_filtered_results=5,
29 ):
30 """
31 Initialize the DuckDuckGo search engine.
33 Args:
34 max_results: Maximum number of search results
35 region: Region code for search results
36 safe_search: Whether to enable safe search
37 llm: Language model for relevance filtering
38 language: Language for content processing
39 include_full_content: Whether to include full webpage content in results
40 """
41 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results
42 super().__init__(
43 llm=llm,
44 max_filtered_results=max_filtered_results,
45 max_results=max_results,
46 )
47 self.region = region
48 self.safe_search = safe_search
49 self.language = language
50 self.include_full_content = include_full_content
52 # Initialize the DuckDuckGo wrapper
53 self.engine = DuckDuckGoSearchAPIWrapper(
54 region=region,
55 max_results=max_results,
56 safesearch="moderate" if safe_search else "off",
57 )
59 # Initialize FullSearchResults if full content is requested
60 if include_full_content and llm:
61 self.full_search = FullSearchResults(
62 llm=llm,
63 web_search=self.engine,
64 language=language,
65 max_results=max_results,
66 region=region,
67 time="y",
68 safesearch="Moderate" if safe_search else "Off",
69 )
71 def run(
72 self, query: str, research_context: Dict[str, Any] | None = None
73 ) -> List[Dict[str, Any]]:
74 """
75 Execute a search using DuckDuckGo with the two-phase approach.
76 Respects config parameters:
77 - SEARCH_SNIPPETS_ONLY: If True, only returns snippets without full content
78 - SKIP_RELEVANCE_FILTER: If True, returns all results without filtering
80 Args:
81 query: The search query
82 research_context: Context from previous research to use.
84 Returns:
85 List of search results
86 """
87 logger.info("---Execute a search using DuckDuckGo---")
89 # Implementation of the two-phase approach (from parent class)
90 return super().run(query, research_context=research_context)
92 def _get_previews(self, query: str) -> List[Dict[str, Any]]:
93 """
94 Get preview information (titles and snippets) for initial search results.
96 Args:
97 query: The search query
99 Returns:
100 List of preview dictionaries with 'id', 'title', and 'snippet' keys
101 """
102 try:
103 # Get search results from DuckDuckGo
104 results = self.engine.results(query, max_results=self.max_results)
106 if not isinstance(results, list):
107 return []
109 # Process results to get previews
110 previews = []
111 for i, result in enumerate(results):
112 preview = {
113 "id": result.get("link"), # Use URL as ID for DDG
114 "title": result.get("title", ""),
115 "snippet": result.get("snippet", ""),
116 "link": result.get("link", ""),
117 }
119 previews.append(preview)
121 return previews
123 except Exception as e:
124 error_msg = str(e)
125 logger.exception(f"Error getting DuckDuckGo previews: {error_msg}")
127 # Check for known rate limit patterns
128 if "202 Ratelimit" in error_msg or "ratelimit" in error_msg.lower():
129 raise RateLimitError(f"DuckDuckGo rate limit hit: {error_msg}")
130 elif "403" in error_msg or "forbidden" in error_msg.lower():
131 raise RateLimitError(
132 f"DuckDuckGo access forbidden (possible rate limit): {error_msg}"
133 )
134 elif (
135 "timeout" in error_msg.lower()
136 or "timed out" in error_msg.lower()
137 ):
138 # Timeouts can sometimes indicate rate limiting
139 raise RateLimitError(
140 f"DuckDuckGo timeout (possible rate limit): {error_msg}"
141 )
143 return []
145 def _get_full_content(
146 self, relevant_items: List[Dict[str, Any]]
147 ) -> List[Dict[str, Any]]:
148 """
149 Get full content for the relevant items by using FullSearchResults.
151 Args:
152 relevant_items: List of relevant preview dictionaries
154 Returns:
155 List of result dictionaries with full content
156 """
157 # If we have FullSearchResults, use it to get full content
158 if hasattr(self, "full_search"):
159 return self.full_search._get_full_content(relevant_items)
161 # Otherwise, just return the relevant items without full content
162 return relevant_items