Coverage for src/local_deep_research/web_search_engines/engines/search_engine

1from typing import Any, Dict, List, Optional

3from langchain_community.utilities import DuckDuckGoSearchAPIWrapper

4from langchain_core.language_models import BaseLLM

5from loguru import logger

7from ..rate_limiting import RateLimitError

8from ..search_engine_base import BaseSearchEngine

9from .full_search import FullSearchResults # Import the FullSearchResults class

12class DuckDuckGoSearchEngine(BaseSearchEngine):

13 """DuckDuckGo search engine implementation with two-phase retrieval"""

15 # Mark as public search engine

16 is_public = True

17 # Mark as generic search engine (general web search)

18 is_generic = True

20 def __init__(

21 self,

22 max_results: int = 10,

23 region: str = "us",

24 safe_search: bool = True,

25 llm: Optional[BaseLLM] = None,

26 language: str = "English",

27 include_full_content: bool = False,

28 max_filtered_results=5,

29 ):

30 """

31 Initialize the DuckDuckGo search engine.

33 Args:

34 max_results: Maximum number of search results

35 region: Region code for search results

36 safe_search: Whether to enable safe search

37 llm: Language model for relevance filtering

38 language: Language for content processing

39 include_full_content: Whether to include full webpage content in results

40 """

41 # Initialize the BaseSearchEngine with LLM, max_filtered_results, and max_results

42 super().__init__(

43 llm=llm,

44 max_filtered_results=max_filtered_results,

45 max_results=max_results,

46 )

47 self.region = region

48 self.safe_search = safe_search

49 self.language = language

50 self.include_full_content = include_full_content

52 # Initialize the DuckDuckGo wrapper

53 self.engine = DuckDuckGoSearchAPIWrapper(

54 region=region,

55 max_results=max_results,

56 safesearch="moderate" if safe_search else "off",

57 )

59 # Initialize FullSearchResults if full content is requested

60 if include_full_content and llm:

61 self.full_search = FullSearchResults(

62 llm=llm,

63 web_search=self.engine,

64 language=language,

65 max_results=max_results,

66 region=region,

67 time="y",

68 safesearch="Moderate" if safe_search else "Off",

69 )

71 def run(

72 self, query: str, research_context: Dict[str, Any] | None = None

73 ) -> List[Dict[str, Any]]:

74 """

75 Execute a search using DuckDuckGo with the two-phase approach.

76 Respects config parameters:

77 - SEARCH_SNIPPETS_ONLY: If True, only returns snippets without full content

78 - SKIP_RELEVANCE_FILTER: If True, returns all results without filtering

80 Args:

81 query: The search query

82 research_context: Context from previous research to use.

84 Returns:

85 List of search results

86 """

87 logger.info("---Execute a search using DuckDuckGo---")

89 # Implementation of the two-phase approach (from parent class)

90 return super().run(query, research_context=research_context)

92 def _get_previews(self, query: str) -> List[Dict[str, Any]]:

93 """

94 Get preview information (titles and snippets) for initial search results.

96 Args:

97 query: The search query

99 Returns:

100 List of preview dictionaries with 'id', 'title', and 'snippet' keys

101 """

102 try:

103 # Get search results from DuckDuckGo

104 results = self.engine.results(query, max_results=self.max_results)

105

106 if not isinstance(results, list):

107 return []

108

109 # Process results to get previews

110 previews = []

111 for i, result in enumerate(results):

112 preview = {

113 "id": result.get("link"), # Use URL as ID for DDG

114 "title": result.get("title", ""),

115 "snippet": result.get("snippet", ""),

116 "link": result.get("link", ""),

117 }

118

119 previews.append(preview)

120

121 return previews

122

123 except Exception as e:

124 error_msg = str(e)

125 logger.exception(f"Error getting DuckDuckGo previews: {error_msg}")

126

127 # Check for known rate limit patterns

128 if "202 Ratelimit" in error_msg or "ratelimit" in error_msg.lower():

129 raise RateLimitError(f"DuckDuckGo rate limit hit: {error_msg}")

130 elif "403" in error_msg or "forbidden" in error_msg.lower():

131 raise RateLimitError(

132 f"DuckDuckGo access forbidden (possible rate limit): {error_msg}"

133 )

134 elif (

135 "timeout" in error_msg.lower()

136 or "timed out" in error_msg.lower()

137 ):

138 # Timeouts can sometimes indicate rate limiting

139 raise RateLimitError(

140 f"DuckDuckGo timeout (possible rate limit): {error_msg}"

141 )

142

143 return []

144

145 def _get_full_content(

146 self, relevant_items: List[Dict[str, Any]]

147 ) -> List[Dict[str, Any]]:

148 """

149 Get full content for the relevant items by using FullSearchResults.

150

151 Args:

152 relevant_items: List of relevant preview dictionaries

153

154 Returns:

155 List of result dictionaries with full content

156 """

157 # If we have FullSearchResults, use it to get full content

158 if hasattr(self, "full_search"):

159 return self.full_search._get_full_content(relevant_items)

160

161 # Otherwise, just return the relevant items without full content

162 return relevant_items

Coverage for src / local_deep_research / web_search_engines / engines / search_engine_ddg.py: 100%

46 statements