Coverage for src / local_deep_research / web_search_engines / engines / search_engine_mojeek.py: 96%
86 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1from typing import Any, Dict, List, Optional
3from langchain_core.language_models import BaseLLM
4from loguru import logger
6from ...config import search_config
7from ...security.safe_requests import safe_get
8from ..rate_limiting import RateLimitError
9from ..search_engine_base import BaseSearchEngine
12class MojeekSearchEngine(BaseSearchEngine):
13 """
14 Mojeek search engine implementation.
16 Mojeek is a privacy-focused search engine with its own independent
17 web crawler and index. Requires a paid API key from mojeek.com.
18 """
20 # Mark as public search engine
21 is_public = True
22 # Mark as generic search engine (general web search)
23 is_generic = True
24 is_lexical = True
25 needs_llm_relevance_filter = True
27 def _is_valid_search_result(self, url: str) -> bool:
28 """
29 Check if a URL is a valid absolute HTTP(S) URL.
31 Returns False for relative URLs, empty strings, or non-HTTP schemes.
32 """
33 if not url or not url.lower().startswith(("http://", "https://")):
34 return False
35 return True
37 def __init__(
38 self,
39 max_results: int = 10,
40 language: str = "en",
41 region: str = "",
42 safe_search: bool = False,
43 api_key: Optional[str] = None,
44 llm: Optional[BaseLLM] = None,
45 max_filtered_results: Optional[int] = None,
46 settings_snapshot: Optional[Dict[str, Any]] = None,
47 include_full_content: bool = True,
48 **kwargs,
49 ):
50 """
51 Initialize the Mojeek search engine.
53 Args:
54 max_results: Maximum number of search results
55 language: Language code in ISO 639-1 format (e.g. 'en', 'fr')
56 region: Country code in ISO 3166-1 alpha-2 format (e.g. 'GB', 'FR')
57 safe_search: Whether to enable safe search filtering
58 api_key: Mojeek API key
59 llm: Language model for relevance filtering
60 max_filtered_results: Maximum number of results to keep after filtering
61 settings_snapshot: Settings snapshot for thread context
62 include_full_content: Whether to include full webpage content
63 """
64 super().__init__(
65 llm=llm,
66 max_filtered_results=max_filtered_results,
67 max_results=max_results,
68 include_full_content=include_full_content,
69 settings_snapshot=settings_snapshot,
70 **kwargs,
71 )
73 # Get API key - check params, settings, or env vars
74 mojeek_api_key = self._resolve_api_key(
75 api_key,
76 "search.engine.web.mojeek.api_key",
77 engine_name="Mojeek",
78 settings_snapshot=settings_snapshot,
79 )
81 self.search_url = "https://api.mojeek.com/search"
82 self.max_results = max_results
83 self.language = language
84 self.region = region
85 self.safe_search = safe_search
86 self.api_key = mojeek_api_key
88 # If full content is requested, initialize FullSearchResults
89 self._init_full_search(
90 web_search=self,
91 language=language,
92 max_results=max_results,
93 region=region,
94 safe_search=safe_search,
95 time_period="y",
96 )
98 def _get_search_results(self, query: str) -> List[Dict[str, Any]]:
99 """
100 Get search results from the Mojeek API.
102 Args:
103 query: The search query
105 Returns:
106 List of search result dicts
107 """
108 logger.info(f"Mojeek running search for query: {query}")
110 try:
111 params = {
112 "q": query,
113 "api_key": self.api_key,
114 "fmt": "json",
115 "t": self.max_results,
116 "safe": 1 if self.safe_search else 0,
117 }
119 if self.language: 119 ↛ 123line 119 didn't jump to line 123 because the condition on line 119 was always true
120 params["lb"] = self.language
121 params["lbb"] = 100
123 if self.region:
124 params["rb"] = self.region
125 params["rbb"] = 10
127 logger.info(f"Sending request to Mojeek API at {self.search_url}")
129 response = safe_get(
130 self.search_url,
131 params=params,
132 timeout=15,
133 )
135 if response.status_code == 403:
136 raise RateLimitError( # noqa: TRY301 — re-raised by except RateLimitError for base class retry
137 "Mojeek API rate limit hit (403 Forbidden)"
138 )
140 if response.status_code != 200:
141 logger.warning(
142 f"Mojeek API returned status {response.status_code}"
143 )
144 return []
146 data = response.json()
148 response_data = data.get("response", {})
149 if response_data.get("status") != "OK":
150 logger.warning(
151 f"Mojeek API response status: "
152 f"{response_data.get('status', 'missing')}"
153 )
154 return []
156 raw_results = response_data.get("results", [])
157 results = []
158 for result in raw_results:
159 url = result.get("url", "")
160 if not self._is_valid_search_result(url): 160 ↛ 161line 160 didn't jump to line 161 because the condition on line 160 was never true
161 continue
162 results.append(
163 {
164 "title": result.get("title", ""),
165 "url": url,
166 "content": result.get("desc", ""),
167 "engine": "mojeek",
168 "category": result.get("cats", ""),
169 }
170 )
172 if results:
173 logger.info(f"Mojeek returned {len(results)} valid results")
174 else:
175 logger.warning(
176 f"Mojeek returned no valid results for query: {query}"
177 )
179 return results
181 except RateLimitError:
182 raise
183 except Exception:
184 logger.exception("Error when searching using Mojeek")
185 return []
187 def _get_previews(self, query: str) -> List[Dict[str, Any]]:
188 """
189 Get preview information for Mojeek search results.
191 Args:
192 query: The search query
194 Returns:
195 List of preview dictionaries
196 """
197 logger.info(f"Getting Mojeek previews for query: {query}")
199 results = self._get_search_results(query)
201 if not results:
202 logger.warning(f"No Mojeek results found for query: {query}")
203 return []
205 previews = []
206 for i, result in enumerate(results):
207 preview = {
208 "id": result.get("url", "") or f"mojeek-result-{i}",
209 "title": result.get("title", ""),
210 "link": result.get("url", ""),
211 "snippet": result.get("content", ""),
212 "engine": result.get("engine", "mojeek"),
213 "category": result.get("category", ""),
214 }
215 previews.append(preview)
217 return previews
219 def _get_full_content(
220 self, relevant_items: List[Dict[str, Any]]
221 ) -> List[Dict[str, Any]]:
222 """
223 Get full content for the relevant search results.
225 Args:
226 relevant_items: List of relevant preview dictionaries
228 Returns:
229 List of result dictionaries with full content
230 """
231 if (
232 hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
233 and search_config.SEARCH_SNIPPETS_ONLY
234 ):
235 logger.info("Snippet-only mode, skipping full content retrieval")
236 return relevant_items
238 if self.include_full_content and hasattr(self, "full_search"):
239 logger.info("Retrieving full webpage content")
240 try:
241 return self.full_search._get_full_content(relevant_items)
242 except Exception:
243 logger.exception("Error retrieving full content")
245 return relevant_items