Coverage for src / local_deep_research / web_search_engines / engines / search_engine_mojeek.py: 91%
96 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
1from typing import Any, Dict, List, Optional
3from langchain_core.language_models import BaseLLM
4from loguru import logger
6from ...config import search_config
7from ...security.safe_requests import safe_get
8from ..rate_limiting import RateLimitError
9from ..search_engine_base import BaseSearchEngine
12class MojeekSearchEngine(BaseSearchEngine):
13 """
14 Mojeek search engine implementation.
16 Mojeek is a privacy-focused search engine with its own independent
17 web crawler and index. Requires a paid API key from mojeek.com.
18 """
20 # Mark as public search engine
21 is_public = True
22 # Mark as generic search engine (general web search)
23 is_generic = True
25 def _is_valid_search_result(self, url: str) -> bool:
26 """
27 Check if a URL is a valid absolute HTTP(S) URL.
29 Returns False for relative URLs, empty strings, or non-HTTP schemes.
30 """
31 if not url or not url.lower().startswith(("http://", "https://")):
32 return False
33 return True
35 def __init__(
36 self,
37 max_results: int = 10,
38 language: str = "en",
39 region: str = "",
40 safe_search: bool = False,
41 api_key: Optional[str] = None,
42 llm: Optional[BaseLLM] = None,
43 max_filtered_results: Optional[int] = None,
44 settings_snapshot: Optional[Dict[str, Any]] = None,
45 include_full_content: bool = True,
46 **kwargs,
47 ):
48 """
49 Initialize the Mojeek search engine.
51 Args:
52 max_results: Maximum number of search results
53 language: Language code in ISO 639-1 format (e.g. 'en', 'fr')
54 region: Country code in ISO 3166-1 alpha-2 format (e.g. 'GB', 'FR')
55 safe_search: Whether to enable safe search filtering
56 api_key: Mojeek API key
57 llm: Language model for relevance filtering
58 max_filtered_results: Maximum number of results to keep after filtering
59 settings_snapshot: Settings snapshot for thread context
60 include_full_content: Whether to include full webpage content
61 """
62 super().__init__(
63 llm=llm,
64 max_filtered_results=max_filtered_results,
65 max_results=max_results,
66 **kwargs,
67 )
69 from ...config.search_config import get_setting_from_snapshot
71 mojeek_api_key = api_key
72 if not mojeek_api_key:
73 mojeek_api_key = get_setting_from_snapshot(
74 "search.engine.web.mojeek.api_key",
75 settings_snapshot=settings_snapshot,
76 )
78 if not mojeek_api_key:
79 raise ValueError(
80 "Mojeek API key not found. Please provide api_key parameter "
81 "or set it in the UI settings."
82 )
84 self.search_url = "https://api.mojeek.com/search"
85 self.max_results = max_results
86 self.language = language
87 self.region = region
88 self.safe_search = safe_search
89 self.api_key = mojeek_api_key
90 self.include_full_content = include_full_content
92 if include_full_content: 92 ↛ 93line 92 didn't jump to line 93 because the condition on line 92 was never true
93 try:
94 from .full_search import FullSearchResults
96 self.full_search = FullSearchResults(
97 llm=llm,
98 web_search=self,
99 language=language,
100 max_results=max_results,
101 region=region,
102 safesearch=safe_search,
103 )
104 except ImportError:
105 logger.warning(
106 "FullSearchResults not available. "
107 "Full content retrieval disabled."
108 )
109 self.include_full_content = False
111 def _get_search_results(self, query: str) -> List[Dict[str, Any]]:
112 """
113 Get search results from the Mojeek API.
115 Args:
116 query: The search query
118 Returns:
119 List of search result dicts
120 """
121 logger.info(f"Mojeek running search for query: {query}")
123 try:
124 params = {
125 "q": query,
126 "api_key": self.api_key,
127 "fmt": "json",
128 "t": self.max_results,
129 "safe": 1 if self.safe_search else 0,
130 }
132 if self.language: 132 ↛ 136line 132 didn't jump to line 136 because the condition on line 132 was always true
133 params["lb"] = self.language
134 params["lbb"] = 100
136 if self.region:
137 params["rb"] = self.region
138 params["rbb"] = 10
140 logger.info(f"Sending request to Mojeek API at {self.search_url}")
142 response = safe_get(
143 self.search_url,
144 params=params,
145 timeout=15,
146 )
148 if response.status_code == 403:
149 raise RateLimitError(
150 "Mojeek API rate limit hit (403 Forbidden)"
151 )
153 if response.status_code != 200:
154 logger.warning(
155 f"Mojeek API returned status {response.status_code}"
156 )
157 return []
159 data = response.json()
161 response_data = data.get("response", {})
162 if response_data.get("status") != "OK":
163 logger.warning(
164 f"Mojeek API response status: "
165 f"{response_data.get('status', 'missing')}"
166 )
167 return []
169 raw_results = response_data.get("results", [])
170 results = []
171 for result in raw_results:
172 url = result.get("url", "")
173 if not self._is_valid_search_result(url): 173 ↛ 174line 173 didn't jump to line 174 because the condition on line 173 was never true
174 continue
175 results.append(
176 {
177 "title": result.get("title", ""),
178 "url": url,
179 "content": result.get("desc", ""),
180 "engine": "mojeek",
181 "category": result.get("cats", ""),
182 }
183 )
185 if results:
186 logger.info(f"Mojeek returned {len(results)} valid results")
187 else:
188 logger.warning(
189 f"Mojeek returned no valid results for query: {query}"
190 )
192 return results
194 except RateLimitError:
195 raise
196 except Exception:
197 logger.exception("Error when searching using Mojeek")
198 return []
200 def _get_previews(self, query: str) -> List[Dict[str, Any]]:
201 """
202 Get preview information for Mojeek search results.
204 Args:
205 query: The search query
207 Returns:
208 List of preview dictionaries
209 """
210 logger.info(f"Getting Mojeek previews for query: {query}")
212 results = self._get_search_results(query)
214 if not results:
215 logger.warning(f"No Mojeek results found for query: {query}")
216 return []
218 previews = []
219 for i, result in enumerate(results):
220 preview = {
221 "id": result.get("url", "") or f"mojeek-result-{i}",
222 "title": result.get("title", ""),
223 "link": result.get("url", ""),
224 "snippet": result.get("content", ""),
225 "engine": result.get("engine", "mojeek"),
226 "category": result.get("category", ""),
227 }
228 previews.append(preview)
230 return previews
232 def _get_full_content(
233 self, relevant_items: List[Dict[str, Any]]
234 ) -> List[Dict[str, Any]]:
235 """
236 Get full content for the relevant search results.
238 Args:
239 relevant_items: List of relevant preview dictionaries
241 Returns:
242 List of result dictionaries with full content
243 """
244 if (
245 hasattr(search_config, "SEARCH_SNIPPETS_ONLY")
246 and search_config.SEARCH_SNIPPETS_ONLY
247 ):
248 logger.info("Snippet-only mode, skipping full content retrieval")
249 return relevant_items
251 if self.include_full_content and hasattr(self, "full_search"):
252 logger.info("Retrieving full webpage content")
253 try:
254 return self.full_search._get_full_content(relevant_items)
255 except Exception:
256 logger.exception("Error retrieving full content")
258 return relevant_items