Coverage for src / local_deep_research / web_search_engines / engines / search_engine_mojeek.py: 91%

96 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-25 01:07 +0000

1from typing import Any, Dict, List, Optional 

2 

3from langchain_core.language_models import BaseLLM 

4from loguru import logger 

5 

6from ...config import search_config 

7from ...security.safe_requests import safe_get 

8from ..rate_limiting import RateLimitError 

9from ..search_engine_base import BaseSearchEngine 

10 

11 

12class MojeekSearchEngine(BaseSearchEngine): 

13 """ 

14 Mojeek search engine implementation. 

15 

16 Mojeek is a privacy-focused search engine with its own independent 

17 web crawler and index. Requires a paid API key from mojeek.com. 

18 """ 

19 

20 # Mark as public search engine 

21 is_public = True 

22 # Mark as generic search engine (general web search) 

23 is_generic = True 

24 

25 def _is_valid_search_result(self, url: str) -> bool: 

26 """ 

27 Check if a URL is a valid absolute HTTP(S) URL. 

28 

29 Returns False for relative URLs, empty strings, or non-HTTP schemes. 

30 """ 

31 if not url or not url.lower().startswith(("http://", "https://")): 

32 return False 

33 return True 

34 

35 def __init__( 

36 self, 

37 max_results: int = 10, 

38 language: str = "en", 

39 region: str = "", 

40 safe_search: bool = False, 

41 api_key: Optional[str] = None, 

42 llm: Optional[BaseLLM] = None, 

43 max_filtered_results: Optional[int] = None, 

44 settings_snapshot: Optional[Dict[str, Any]] = None, 

45 include_full_content: bool = True, 

46 **kwargs, 

47 ): 

48 """ 

49 Initialize the Mojeek search engine. 

50 

51 Args: 

52 max_results: Maximum number of search results 

53 language: Language code in ISO 639-1 format (e.g. 'en', 'fr') 

54 region: Country code in ISO 3166-1 alpha-2 format (e.g. 'GB', 'FR') 

55 safe_search: Whether to enable safe search filtering 

56 api_key: Mojeek API key 

57 llm: Language model for relevance filtering 

58 max_filtered_results: Maximum number of results to keep after filtering 

59 settings_snapshot: Settings snapshot for thread context 

60 include_full_content: Whether to include full webpage content 

61 """ 

62 super().__init__( 

63 llm=llm, 

64 max_filtered_results=max_filtered_results, 

65 max_results=max_results, 

66 **kwargs, 

67 ) 

68 

69 from ...config.search_config import get_setting_from_snapshot 

70 

71 mojeek_api_key = api_key 

72 if not mojeek_api_key: 

73 mojeek_api_key = get_setting_from_snapshot( 

74 "search.engine.web.mojeek.api_key", 

75 settings_snapshot=settings_snapshot, 

76 ) 

77 

78 if not mojeek_api_key: 

79 raise ValueError( 

80 "Mojeek API key not found. Please provide api_key parameter " 

81 "or set it in the UI settings." 

82 ) 

83 

84 self.search_url = "https://api.mojeek.com/search" 

85 self.max_results = max_results 

86 self.language = language 

87 self.region = region 

88 self.safe_search = safe_search 

89 self.api_key = mojeek_api_key 

90 self.include_full_content = include_full_content 

91 

92 if include_full_content: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true

93 try: 

94 from .full_search import FullSearchResults 

95 

96 self.full_search = FullSearchResults( 

97 llm=llm, 

98 web_search=self, 

99 language=language, 

100 max_results=max_results, 

101 region=region, 

102 safesearch=safe_search, 

103 ) 

104 except ImportError: 

105 logger.warning( 

106 "FullSearchResults not available. " 

107 "Full content retrieval disabled." 

108 ) 

109 self.include_full_content = False 

110 

111 def _get_search_results(self, query: str) -> List[Dict[str, Any]]: 

112 """ 

113 Get search results from the Mojeek API. 

114 

115 Args: 

116 query: The search query 

117 

118 Returns: 

119 List of search result dicts 

120 """ 

121 logger.info(f"Mojeek running search for query: {query}") 

122 

123 try: 

124 params = { 

125 "q": query, 

126 "api_key": self.api_key, 

127 "fmt": "json", 

128 "t": self.max_results, 

129 "safe": 1 if self.safe_search else 0, 

130 } 

131 

132 if self.language: 132 ↛ 136line 132 didn't jump to line 136 because the condition on line 132 was always true

133 params["lb"] = self.language 

134 params["lbb"] = 100 

135 

136 if self.region: 

137 params["rb"] = self.region 

138 params["rbb"] = 10 

139 

140 logger.info(f"Sending request to Mojeek API at {self.search_url}") 

141 

142 response = safe_get( 

143 self.search_url, 

144 params=params, 

145 timeout=15, 

146 ) 

147 

148 if response.status_code == 403: 

149 raise RateLimitError( 

150 "Mojeek API rate limit hit (403 Forbidden)" 

151 ) 

152 

153 if response.status_code != 200: 

154 logger.warning( 

155 f"Mojeek API returned status {response.status_code}" 

156 ) 

157 return [] 

158 

159 data = response.json() 

160 

161 response_data = data.get("response", {}) 

162 if response_data.get("status") != "OK": 

163 logger.warning( 

164 f"Mojeek API response status: " 

165 f"{response_data.get('status', 'missing')}" 

166 ) 

167 return [] 

168 

169 raw_results = response_data.get("results", []) 

170 results = [] 

171 for result in raw_results: 

172 url = result.get("url", "") 

173 if not self._is_valid_search_result(url): 173 ↛ 174line 173 didn't jump to line 174 because the condition on line 173 was never true

174 continue 

175 results.append( 

176 { 

177 "title": result.get("title", ""), 

178 "url": url, 

179 "content": result.get("desc", ""), 

180 "engine": "mojeek", 

181 "category": result.get("cats", ""), 

182 } 

183 ) 

184 

185 if results: 

186 logger.info(f"Mojeek returned {len(results)} valid results") 

187 else: 

188 logger.warning( 

189 f"Mojeek returned no valid results for query: {query}" 

190 ) 

191 

192 return results 

193 

194 except RateLimitError: 

195 raise 

196 except Exception: 

197 logger.exception("Error when searching using Mojeek") 

198 return [] 

199 

200 def _get_previews(self, query: str) -> List[Dict[str, Any]]: 

201 """ 

202 Get preview information for Mojeek search results. 

203 

204 Args: 

205 query: The search query 

206 

207 Returns: 

208 List of preview dictionaries 

209 """ 

210 logger.info(f"Getting Mojeek previews for query: {query}") 

211 

212 results = self._get_search_results(query) 

213 

214 if not results: 

215 logger.warning(f"No Mojeek results found for query: {query}") 

216 return [] 

217 

218 previews = [] 

219 for i, result in enumerate(results): 

220 preview = { 

221 "id": result.get("url", "") or f"mojeek-result-{i}", 

222 "title": result.get("title", ""), 

223 "link": result.get("url", ""), 

224 "snippet": result.get("content", ""), 

225 "engine": result.get("engine", "mojeek"), 

226 "category": result.get("category", ""), 

227 } 

228 previews.append(preview) 

229 

230 return previews 

231 

232 def _get_full_content( 

233 self, relevant_items: List[Dict[str, Any]] 

234 ) -> List[Dict[str, Any]]: 

235 """ 

236 Get full content for the relevant search results. 

237 

238 Args: 

239 relevant_items: List of relevant preview dictionaries 

240 

241 Returns: 

242 List of result dictionaries with full content 

243 """ 

244 if ( 

245 hasattr(search_config, "SEARCH_SNIPPETS_ONLY") 

246 and search_config.SEARCH_SNIPPETS_ONLY 

247 ): 

248 logger.info("Snippet-only mode, skipping full content retrieval") 

249 return relevant_items 

250 

251 if self.include_full_content and hasattr(self, "full_search"): 

252 logger.info("Retrieving full webpage content") 

253 try: 

254 return self.full_search._get_full_content(relevant_items) 

255 except Exception: 

256 logger.exception("Error retrieving full content") 

257 

258 return relevant_items