Coverage for src / local_deep_research / web_search_engines / engines / search_engine_ddg.py: 100%

41 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1from typing import Any, Dict, List, Optional 

2 

3from langchain_community.utilities import DuckDuckGoSearchAPIWrapper 

4from langchain_core.language_models import BaseLLM 

5from loguru import logger 

6 

7from ..rate_limiting import RateLimitError 

8from ..search_engine_base import BaseSearchEngine 

9 

10 

11class DuckDuckGoSearchEngine(BaseSearchEngine): 

12 """DuckDuckGo search engine implementation with two-phase retrieval""" 

13 

14 # Mark as public search engine 

15 is_public = True 

16 # Mark as generic search engine (general web search) 

17 is_generic = True 

18 

19 def __init__( 

20 self, 

21 max_results: int = 10, 

22 region: str = "us", 

23 safe_search: bool = True, 

24 llm: Optional[BaseLLM] = None, 

25 language: str = "English", 

26 include_full_content: bool = False, 

27 max_filtered_results=5, 

28 settings_snapshot: Optional[Dict[str, Any]] = None, 

29 ): 

30 """ 

31 Initialize the DuckDuckGo search engine. 

32 

33 Args: 

34 max_results: Maximum number of search results 

35 region: Region code for search results 

36 safe_search: Whether to enable safe search 

37 llm: Language model for relevance filtering 

38 language: Language for content processing 

39 include_full_content: Whether to include full webpage content in results 

40 """ 

41 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results 

42 super().__init__( 

43 llm=llm, 

44 max_filtered_results=max_filtered_results, 

45 max_results=max_results, 

46 include_full_content=include_full_content, 

47 settings_snapshot=settings_snapshot, 

48 ) 

49 self.region = region 

50 self.safe_search = safe_search 

51 self.language = language 

52 

53 # Initialize the DuckDuckGo wrapper 

54 self.engine = DuckDuckGoSearchAPIWrapper( 

55 region=region, 

56 max_results=max_results, 

57 safesearch="moderate" if safe_search else "off", 

58 ) 

59 

60 # Initialize FullSearchResults if full content is requested 

61 self._init_full_search( 

62 web_search=self.engine, 

63 language=language, 

64 max_results=max_results, 

65 region=region, 

66 time_period="y", 

67 safe_search="Moderate" if safe_search else "Off", 

68 ) 

69 

70 def _get_previews(self, query: str) -> List[Dict[str, Any]]: 

71 """ 

72 Get preview information (titles and snippets) for initial search results. 

73 

74 Args: 

75 query: The search query 

76 

77 Returns: 

78 List of preview dictionaries with 'id', 'title', and 'snippet' keys 

79 """ 

80 try: 

81 # Get search results from DuckDuckGo 

82 results = self.engine.results(query, max_results=self.max_results) 

83 

84 if not isinstance(results, list): 

85 return [] 

86 

87 # Process results to get previews 

88 previews = [] 

89 for i, result in enumerate(results): 

90 preview = { 

91 "id": result.get("link"), # Use URL as ID for DDG 

92 "title": result.get("title", ""), 

93 "snippet": result.get("snippet", ""), 

94 "link": result.get("link", ""), 

95 } 

96 

97 previews.append(preview) 

98 

99 return previews 

100 

101 except Exception as e: 

102 error_msg = str(e) 

103 sanitized = self._sanitize_error_message(error_msg) 

104 logger.exception("Error getting DuckDuckGo previews: {}", sanitized) 

105 

106 # Check for known rate limit patterns 

107 if "202 Ratelimit" in error_msg or "ratelimit" in error_msg.lower(): 

108 raise RateLimitError(f"DuckDuckGo rate limit hit: {sanitized}") 

109 if "403" in error_msg or "forbidden" in error_msg.lower(): 

110 raise RateLimitError( 

111 f"DuckDuckGo access forbidden (possible rate limit): {sanitized}" 

112 ) 

113 if ( 

114 "timeout" in error_msg.lower() 

115 or "timed out" in error_msg.lower() 

116 ): 

117 # Timeouts can sometimes indicate rate limiting 

118 raise RateLimitError( 

119 f"DuckDuckGo timeout (possible rate limit): {sanitized}" 

120 ) 

121 

122 return [] 

123 

124 def _get_full_content( 

125 self, relevant_items: List[Dict[str, Any]] 

126 ) -> List[Dict[str, Any]]: 

127 """ 

128 Get full content for the relevant items by using FullSearchResults. 

129 

130 Args: 

131 relevant_items: List of relevant preview dictionaries 

132 

133 Returns: 

134 List of result dictionaries with full content 

135 """ 

136 # If we have FullSearchResults, use it to get full content 

137 if hasattr(self, "full_search"): 

138 return self.full_search._get_full_content(relevant_items) 

139 

140 # Otherwise, just return the relevant items without full content 

141 return relevant_items