Coverage for src / local_deep_research / api / client.py: 96%
131 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2HTTP Client for Local Deep Research API.
3Simplifies authentication and API access by handling CSRF tokens automatically.
5This client allows you to programmatically interact with the Local Deep Research (LDR)
6application, enabling seamless integration with Python scripts and applications.
7It handles all the complexity of authentication, session management, and request formatting.
9Why CSRF with login?
10--------------------
11CSRF tokens prevent cross-site request forgery attacks. Even though you're logged in,
12CSRF ensures requests come from YOUR code, not from malicious websites that might
13try to use your browser's active session cookies to make unauthorized requests.
15Features:
16---------
17- Automatic login and session management
18- CSRF token handling
19- Research query submission and result retrieval
20- User settings management
21- Research history access
23Example usage:
24-------------
25 from local_deep_research.api.client import LDRClient
27 # Simple usage
28 client = LDRClient()
29 client.login("username", "password")
30 result = client.quick_research("What is quantum computing?")
31 print(result["summary"])
33 # With context manager (auto-logout)
34 with LDRClient() as client:
35 client.login("username", "password")
36 result = client.quick_research("What is quantum computing?")
37 print(result["summary"])
39 # Get research history
40 with LDRClient() as client:
41 client.login("username", "password")
42 history = client.get_history()
43 for item in history:
44 print(f"Research: {item['query']}")
46 # One-liner for quick queries
47 from local_deep_research.api.client import quick_query
48 summary = quick_query("username", "password", "What is DNA?")
50 # Update user settings
51 with LDRClient() as client:
52 client.login("username", "password")
53 client.update_setting("llm.model", "gemma:7b")
54 settings = client.get_settings()
55 print(f"Current model: {settings['llm']['model']}")
56"""
58import time
59from typing import Any
61from ..constants import ResearchStatus
62from loguru import logger
63from local_deep_research.benchmarks.comparison.results import Benchmark_results
64from local_deep_research.security import SafeSession
67class LDRClient:
68 """
69 HTTP client for LDR API access with automatic CSRF handling.
71 This client abstracts away the complexity of:
72 - Extracting CSRF tokens from HTML forms
73 - Managing session cookies
74 - Handling authentication flow
75 - Polling for research results
76 """
78 def __init__(self, base_url: str = "http://localhost:5000"):
79 """
80 Initialize the client.
82 Args:
83 base_url: URL of the LDR server (default: http://localhost:5000)
84 """
85 self.base_url = base_url
86 # Use SafeSession with allow_localhost since client connects to local LDR server
87 self.session = SafeSession(allow_localhost=True)
88 self.csrf_token = None
89 self.logged_in = False
90 self.username = None
92 def login(self, username: str, password: str) -> bool:
93 """
94 Login to LDR server. Handles all CSRF complexity internally.
96 This method:
97 1. Gets the login page to extract CSRF token from HTML form
98 2. Submits login with form data (not JSON)
99 3. Retrieves CSRF token for subsequent API calls
101 Args:
102 username: Your LDR username
103 password: Your LDR password
105 Returns:
106 True if login successful, False otherwise
107 """
108 try:
109 # Step 1: Get login page to extract CSRF token
110 # We need to parse HTML because Flask-WTF embeds CSRF in forms
111 login_page = self.session.get(f"{self.base_url}/auth/login")
113 # Simple CSRF extraction without BeautifulSoup dependency
114 # Look for: <input type="hidden" name="csrf_token" value="..."/>
115 import re
117 csrf_match = re.search(
118 r'<input[^>]*name="csrf_token"[^>]*value="([^"]*)"',
119 login_page.text,
120 )
122 if not csrf_match:
123 logger.error("Could not find CSRF token in login page")
124 return False
126 login_csrf = csrf_match.group(1)
128 # Step 2: Login with form data (NOT JSON!)
129 # Flask-WTF expects form-encoded data for login
130 response = self.session.post(
131 f"{self.base_url}/auth/login",
132 data={
133 "username": username,
134 "password": password,
135 "csrf_token": login_csrf,
136 },
137 allow_redirects=True,
138 )
140 if response.status_code not in [200, 302]:
141 logger.error(
142 f"Login failed with status: {response.status_code}"
143 )
144 return False
146 # Step 3: Get CSRF token for API requests
147 # This uses our new endpoint that returns JSON
148 csrf_response = self.session.get(f"{self.base_url}/auth/csrf-token")
149 if csrf_response.status_code == 200:
150 self.csrf_token = csrf_response.json()["csrf_token"]
151 self.logged_in = True
152 self.username = username
153 logger.info(f"Successfully logged in as {username}")
154 return True
155 logger.warning("Logged in but could not get API CSRF token")
156 # Still logged in, just no CSRF for API calls
157 self.logged_in = True
158 self.username = username
159 return True
161 except Exception:
162 logger.exception("Login error")
163 return False
165 def _api_headers(self) -> dict[str, str]:
166 """Get headers with CSRF token for API requests."""
167 if self.csrf_token:
168 return {"X-CSRF-Token": self.csrf_token}
169 return {}
171 def quick_research(
172 self,
173 query: str,
174 model: str | None = None,
175 search_engines: list[str] | None = None,
176 iterations: int = 2,
177 wait_for_result: bool = True,
178 timeout: int = 300,
179 ) -> dict[str, Any]:
180 """
181 Research a topic using LLMs and search engines.
183 This method runs a research process on your query using search engines
184 and large language models. It might take a few minutes to complete.
186 Args:
187 query: Your research question
188 model: LLM model to use (e.g., "gemma:7b", "llama2:7b")
189 search_engines: Search engines to use (default: ["searxng"])
190 iterations: How many research cycles to run (default: 2)
191 wait_for_result: If True, wait until done. If False, return immediately
192 timeout: Maximum seconds to wait (default: 300)
194 Returns:
195 If waiting for result: Dict with summary, sources, and findings
196 If not waiting: Dict with research_id to check status later
198 Raises:
199 RuntimeError: If not logged in or request fails
201 Example:
202 result = client.quick_research("Latest developments in fusion energy")
203 print(result["summary"])
204 """
205 if not self.logged_in:
206 raise RuntimeError("Not logged in. Call login() first.")
208 # Default search engines
209 if search_engines is None: 209 ↛ 213line 209 didn't jump to line 213 because the condition on line 209 was always true
210 search_engines = ["searxng"]
212 # Start research
213 response = self.session.post(
214 f"{self.base_url}/research/api/start",
215 json={
216 "query": query,
217 "model": model,
218 "search_engines": search_engines,
219 "iterations": iterations,
220 "questions_per_iteration": 3,
221 },
222 headers=self._api_headers(),
223 )
225 # Handle response
226 if response.status_code != 200:
227 # Try to extract error message
228 try:
229 error_data = response.json()
230 if isinstance(error_data, list) and len(error_data) > 0: 230 ↛ 233line 230 didn't jump to line 233 because the condition on line 230 was always true
231 error_msg = error_data[0].get("message", "Unknown error")
232 else:
233 error_msg = str(error_data)
234 except (ValueError, KeyError, AttributeError):
235 error_msg = response.text[:200]
236 raise RuntimeError(f"Failed to start research: {error_msg}")
238 result = response.json()
239 research_id = result.get("research_id")
241 if not research_id:
242 raise RuntimeError("No research ID returned")
244 if not wait_for_result:
245 return {"research_id": research_id}
247 # Poll for results
248 return self.wait_for_research(research_id, timeout)
250 def wait_for_research(
251 self, research_id: str, timeout: int = 300
252 ) -> dict[str, Any]:
253 """
254 Wait for research to complete and get results.
256 Use this after starting research with quick_research(wait_for_result=False).
257 Checks status every 5 seconds until complete or timeout.
259 Args:
260 research_id: ID of the research to wait for
261 timeout: Maximum seconds to wait (default: 300)
263 Returns:
264 Dict with research results (summary, sources, findings)
266 Raises:
267 RuntimeError: If research fails or times out
269 Example:
270 # Start research without waiting
271 resp = client.quick_research("Climate change impacts", wait_for_result=False)
272 # Get results when ready
273 results = client.wait_for_research(resp["research_id"])
274 """
275 start_time = time.time()
277 while time.time() - start_time < timeout:
278 status_response = self.session.get(
279 f"{self.base_url}/research/api/status/{research_id}"
280 )
282 if status_response.status_code == 200: 282 ↛ 298line 282 didn't jump to line 298 because the condition on line 282 was always true
283 status = status_response.json()
285 if status.get("status") == ResearchStatus.COMPLETED:
286 # Get final results
287 results_response = self.session.get(
288 f"{self.base_url}/api/report/{research_id}"
289 )
290 if results_response.status_code == 200:
291 return results_response.json()
292 raise RuntimeError("Failed to get results")
294 if status.get("status") == ResearchStatus.FAILED:
295 error_msg = status.get("error", "Unknown error")
296 raise RuntimeError(f"Research failed: {error_msg}")
298 time.sleep(5)
300 raise RuntimeError(f"Research timed out after {timeout} seconds")
302 def get_settings(self) -> dict[str, Any]:
303 """Get current user settings."""
304 if not self.logged_in:
305 raise RuntimeError("Not logged in. Call login() first.")
307 response = self.session.get(f"{self.base_url}/settings/api")
308 if response.status_code == 200:
309 return response.json()
310 raise RuntimeError(f"Failed to get settings: {response.status_code}")
312 def update_setting(self, key: str, value: Any) -> bool:
313 """
314 Update a setting.
316 Args:
317 key: Setting key (e.g., "llm.model")
318 value: New value for the setting
320 Returns:
321 True if successful
322 """
323 if not self.logged_in:
324 raise RuntimeError("Not logged in. Call login() first.")
326 response = self.session.put(
327 f"{self.base_url}/settings/api/{key}",
328 json={"value": value},
329 headers=self._api_headers(),
330 )
331 return response.status_code == 200
333 def get_history(self) -> list[dict[str, Any]]:
334 """
335 Get your past research queries.
337 Returns a list of previous research sessions with their details.
339 Returns:
340 List of research items with query, timestamp, and status info
342 Raises:
343 RuntimeError: If not logged in
345 Example:
346 history = client.get_history()
347 for item in history[:5]:
348 print(f"{item['timestamp']}: {item['query']}")
349 """
350 if not self.logged_in:
351 raise RuntimeError("Not logged in. Call login() first.")
353 response = self.session.get(f"{self.base_url}/history/api")
354 if response.status_code == 200:
355 data = response.json()
356 # Handle different response formats
357 if isinstance(data, dict):
358 return data.get("history", data.get("items", []))
359 if isinstance(data, list): 359 ↛ 361line 359 didn't jump to line 361 because the condition on line 359 was always true
360 return data
361 return []
362 raise RuntimeError(f"Failed to get history: {response.status_code}")
364 def logout(self):
365 """Logout and clear session."""
366 if self.logged_in:
367 self.session.post(
368 f"{self.base_url}/auth/logout", headers=self._api_headers()
369 )
370 self.session.close()
371 self.csrf_token = None
372 self.logged_in = False
373 self.username = None
375 def submit_benchmark(
376 self,
377 model,
378 hardware,
379 accuracy_focused,
380 accuracy_source,
381 avg_time_per_question,
382 context_window,
383 temperature,
384 ldr_version,
385 date_tested,
386 notes="",
387 ):
388 """
389 Submit your benchmark results to help the community.
391 Args:
392 model: Model name (e.g., "Llama-3.3-70B-Q4_K_M")
393 hardware: Hardware specs (e.g., "RTX 4090 24GB")
394 accuracy_focused: Accuracy percentage for focused strategy
395 accuracy_source: Accuracy percentage for source-based strategy
396 avg_time_per_question: Average time per question in seconds
397 context_window: Context window size used
398 temperature: Temperature setting used
399 ldr_version: Version of LDR used (e.g., "0.6.0")
400 date_tested: Date tested (YYYY-MM-DD format)
401 notes: Optional notes about the test
403 Returns:
404 True if submission was successful
406 Example:
407 client.submit_benchmark(
408 "Llama-3.3-70B-Q4_K_M", "RTX 4090 24GB",
409 87.0, 82.0, 45.2, 32000, 0.1, "0.6.0", "2024-01-15"
410 )
411 """
412 benchmarks = Benchmark_results()
413 return benchmarks.add_result(
414 model,
415 hardware,
416 accuracy_focused,
417 accuracy_source,
418 avg_time_per_question,
419 context_window,
420 temperature,
421 ldr_version,
422 date_tested,
423 notes,
424 )
426 def get_benchmarks(self, best_only=False):
427 """
428 Get community benchmark results.
430 Args:
431 best_only: If True, only return top performers
433 Returns:
434 List of benchmark results
436 Example:
437 all_results = client.get_benchmarks()
438 top_results = client.get_benchmarks(best_only=True)
439 """
440 benchmarks = Benchmark_results()
441 if best_only:
442 return benchmarks.get_best()
443 return benchmarks.get_all()
445 def __enter__(self):
446 """Support context manager for auto-cleanup."""
447 return self
449 def __exit__(self, exc_type, exc_val, exc_tb):
450 """Auto logout when used as context manager."""
451 self.logout()
454# Convenience functions for simple use cases
457def quick_query(
458 username: str,
459 password: str,
460 query: str,
461 base_url: str = "http://localhost:5000",
462) -> str:
463 """
464 One-liner for quick research queries.
466 Example:
467 summary = quick_query("user", "pass", "What is DNA?")
468 print(summary)
470 Args:
471 username: LDR username
472 password: LDR password
473 query: Research question
474 base_url: Server URL
476 Returns:
477 Research summary as string
478 """
479 with LDRClient(base_url) as client:
480 if not client.login(username, password):
481 raise RuntimeError("Login failed")
483 result = client.quick_research(query)
484 return result.get("summary", "No summary available")