Coverage for src / local_deep_research / web_search_engines / engines / search_engine_ddg.py: 0%

46 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1from typing import Any, Dict, List, Optional 

2 

3from langchain_community.utilities import DuckDuckGoSearchAPIWrapper 

4from langchain_core.language_models import BaseLLM 

5from loguru import logger 

6 

7from ..rate_limiting import RateLimitError 

8from ..search_engine_base import BaseSearchEngine 

9from .full_search import FullSearchResults # Import the FullSearchResults class 

10 

11 

12class DuckDuckGoSearchEngine(BaseSearchEngine): 

13 """DuckDuckGo search engine implementation with two-phase retrieval""" 

14 

15 # Mark as public search engine 

16 is_public = True 

17 # Mark as generic search engine (general web search) 

18 is_generic = True 

19 

20 def __init__( 

21 self, 

22 max_results: int = 10, 

23 region: str = "us", 

24 safe_search: bool = True, 

25 llm: Optional[BaseLLM] = None, 

26 language: str = "English", 

27 include_full_content: bool = False, 

28 max_filtered_results=5, 

29 ): 

30 """ 

31 Initialize the DuckDuckGo search engine. 

32 

33 Args: 

34 max_results: Maximum number of search results 

35 region: Region code for search results 

36 safe_search: Whether to enable safe search 

37 llm: Language model for relevance filtering 

38 language: Language for content processing 

39 include_full_content: Whether to include full webpage content in results 

40 """ 

41 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results 

42 super().__init__( 

43 llm=llm, 

44 max_filtered_results=max_filtered_results, 

45 max_results=max_results, 

46 ) 

47 self.region = region 

48 self.safe_search = safe_search 

49 self.language = language 

50 self.include_full_content = include_full_content 

51 

52 # Initialize the DuckDuckGo wrapper 

53 self.engine = DuckDuckGoSearchAPIWrapper( 

54 region=region, 

55 max_results=max_results, 

56 safesearch="moderate" if safe_search else "off", 

57 ) 

58 

59 # Initialize FullSearchResults if full content is requested 

60 if include_full_content and llm: 

61 self.full_search = FullSearchResults( 

62 llm=llm, 

63 web_search=self.engine, 

64 language=language, 

65 max_results=max_results, 

66 region=region, 

67 time="y", 

68 safesearch="Moderate" if safe_search else "Off", 

69 ) 

70 

71 def run( 

72 self, query: str, research_context: Dict[str, Any] | None = None 

73 ) -> List[Dict[str, Any]]: 

74 """ 

75 Execute a search using DuckDuckGo with the two-phase approach. 

76 Respects config parameters: 

77 - SEARCH_SNIPPETS_ONLY: If True, only returns snippets without full content 

78 - SKIP_RELEVANCE_FILTER: If True, returns all results without filtering 

79 

80 Args: 

81 query: The search query 

82 research_context: Context from previous research to use. 

83 

84 Returns: 

85 List of search results 

86 """ 

87 logger.info("---Execute a search using DuckDuckGo---") 

88 

89 # Implementation of the two-phase approach (from parent class) 

90 return super().run(query, research_context=research_context) 

91 

92 def _get_previews(self, query: str) -> List[Dict[str, Any]]: 

93 """ 

94 Get preview information (titles and snippets) for initial search results. 

95 

96 Args: 

97 query: The search query 

98 

99 Returns: 

100 List of preview dictionaries with 'id', 'title', and 'snippet' keys 

101 """ 

102 try: 

103 # Get search results from DuckDuckGo 

104 results = self.engine.results(query, max_results=self.max_results) 

105 

106 if not isinstance(results, list): 

107 return [] 

108 

109 # Process results to get previews 

110 previews = [] 

111 for i, result in enumerate(results): 

112 preview = { 

113 "id": result.get("link"), # Use URL as ID for DDG 

114 "title": result.get("title", ""), 

115 "snippet": result.get("snippet", ""), 

116 "link": result.get("link", ""), 

117 } 

118 

119 previews.append(preview) 

120 

121 return previews 

122 

123 except Exception as e: 

124 error_msg = str(e) 

125 logger.exception(f"Error getting DuckDuckGo previews: {error_msg}") 

126 

127 # Check for known rate limit patterns 

128 if "202 Ratelimit" in error_msg or "ratelimit" in error_msg.lower(): 

129 raise RateLimitError(f"DuckDuckGo rate limit hit: {error_msg}") 

130 elif "403" in error_msg or "forbidden" in error_msg.lower(): 

131 raise RateLimitError( 

132 f"DuckDuckGo access forbidden (possible rate limit): {error_msg}" 

133 ) 

134 elif ( 

135 "timeout" in error_msg.lower() 

136 or "timed out" in error_msg.lower() 

137 ): 

138 # Timeouts can sometimes indicate rate limiting 

139 raise RateLimitError( 

140 f"DuckDuckGo timeout (possible rate limit): {error_msg}" 

141 ) 

142 

143 return [] 

144 

145 def _get_full_content( 

146 self, relevant_items: List[Dict[str, Any]] 

147 ) -> List[Dict[str, Any]]: 

148 """ 

149 Get full content for the relevant items by using FullSearchResults. 

150 

151 Args: 

152 relevant_items: List of relevant preview dictionaries 

153 

154 Returns: 

155 List of result dictionaries with full content 

156 """ 

157 # If we have FullSearchResults, use it to get full content 

158 if hasattr(self, "full_search"): 

159 return self.full_search._get_full_content(relevant_items) 

160 

161 # Otherwise, just return the relevant items without full content 

162 return relevant_items