Coverage for src / local_deep_research / api / client.py: 96%

131 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-25 01:07 +0000

1""" 

2HTTP Client for Local Deep Research API. 

3Simplifies authentication and API access by handling CSRF tokens automatically. 

4 

5This client allows you to programmatically interact with the Local Deep Research (LDR) 

6application, enabling seamless integration with Python scripts and applications. 

7It handles all the complexity of authentication, session management, and request formatting. 

8 

9Why CSRF with login? 

10-------------------- 

11CSRF tokens prevent cross-site request forgery attacks. Even though you're logged in, 

12CSRF ensures requests come from YOUR code, not from malicious websites that might 

13try to use your browser's active session cookies to make unauthorized requests. 

14 

15Features: 

16--------- 

17- Automatic login and session management 

18- CSRF token handling 

19- Research query submission and result retrieval 

20- User settings management 

21- Research history access 

22 

23Example usage: 

24------------- 

25 from local_deep_research.api.client import LDRClient 

26 

27 # Simple usage 

28 client = LDRClient() 

29 client.login("username", "password") 

30 result = client.quick_research("What is quantum computing?") 

31 print(result["summary"]) 

32 

33 # With context manager (auto-logout) 

34 with LDRClient() as client: 

35 client.login("username", "password") 

36 result = client.quick_research("What is quantum computing?") 

37 print(result["summary"]) 

38 

39 # Get research history 

40 with LDRClient() as client: 

41 client.login("username", "password") 

42 history = client.get_history() 

43 for item in history: 

44 print(f"Research: {item['query']}") 

45 

46 # One-liner for quick queries 

47 from local_deep_research.api.client import quick_query 

48 summary = quick_query("username", "password", "What is DNA?") 

49 

50 # Update user settings 

51 with LDRClient() as client: 

52 client.login("username", "password") 

53 client.update_setting("llm.model", "gemma:7b") 

54 settings = client.get_settings() 

55 print(f"Current model: {settings['llm']['model']}") 

56""" 

57 

58import time 

59from typing import Any 

60 

61from ..constants import ResearchStatus 

62from loguru import logger 

63from local_deep_research.benchmarks.comparison.results import Benchmark_results 

64from local_deep_research.security import SafeSession 

65 

66 

67class LDRClient: 

68 """ 

69 HTTP client for LDR API access with automatic CSRF handling. 

70 

71 This client abstracts away the complexity of: 

72 - Extracting CSRF tokens from HTML forms 

73 - Managing session cookies 

74 - Handling authentication flow 

75 - Polling for research results 

76 """ 

77 

78 def __init__(self, base_url: str = "http://localhost:5000"): 

79 """ 

80 Initialize the client. 

81 

82 Args: 

83 base_url: URL of the LDR server (default: http://localhost:5000) 

84 """ 

85 self.base_url = base_url 

86 # Use SafeSession with allow_localhost since client connects to local LDR server 

87 self.session = SafeSession(allow_localhost=True) 

88 self.csrf_token = None 

89 self.logged_in = False 

90 self.username = None 

91 

92 def login(self, username: str, password: str) -> bool: 

93 """ 

94 Login to LDR server. Handles all CSRF complexity internally. 

95 

96 This method: 

97 1. Gets the login page to extract CSRF token from HTML form 

98 2. Submits login with form data (not JSON) 

99 3. Retrieves CSRF token for subsequent API calls 

100 

101 Args: 

102 username: Your LDR username 

103 password: Your LDR password 

104 

105 Returns: 

106 True if login successful, False otherwise 

107 """ 

108 try: 

109 # Step 1: Get login page to extract CSRF token 

110 # We need to parse HTML because Flask-WTF embeds CSRF in forms 

111 login_page = self.session.get(f"{self.base_url}/auth/login") 

112 

113 # Simple CSRF extraction without BeautifulSoup dependency 

114 # Look for: <input type="hidden" name="csrf_token" value="..."/> 

115 import re 

116 

117 csrf_match = re.search( 

118 r'<input[^>]*name="csrf_token"[^>]*value="([^"]*)"', 

119 login_page.text, 

120 ) 

121 

122 if not csrf_match: 

123 logger.error("Could not find CSRF token in login page") 

124 return False 

125 

126 login_csrf = csrf_match.group(1) 

127 

128 # Step 2: Login with form data (NOT JSON!) 

129 # Flask-WTF expects form-encoded data for login 

130 response = self.session.post( 

131 f"{self.base_url}/auth/login", 

132 data={ 

133 "username": username, 

134 "password": password, 

135 "csrf_token": login_csrf, 

136 }, 

137 allow_redirects=True, 

138 ) 

139 

140 if response.status_code not in [200, 302]: 

141 logger.error( 

142 f"Login failed with status: {response.status_code}" 

143 ) 

144 return False 

145 

146 # Step 3: Get CSRF token for API requests 

147 # This uses our new endpoint that returns JSON 

148 csrf_response = self.session.get(f"{self.base_url}/auth/csrf-token") 

149 if csrf_response.status_code == 200: 

150 self.csrf_token = csrf_response.json()["csrf_token"] 

151 self.logged_in = True 

152 self.username = username 

153 logger.info(f"Successfully logged in as {username}") 

154 return True 

155 else: 

156 logger.warning("Logged in but could not get API CSRF token") 

157 # Still logged in, just no CSRF for API calls 

158 self.logged_in = True 

159 self.username = username 

160 return True 

161 

162 except Exception: 

163 logger.exception("Login error") 

164 return False 

165 

166 def _api_headers(self) -> dict[str, str]: 

167 """Get headers with CSRF token for API requests.""" 

168 if self.csrf_token: 

169 return {"X-CSRF-Token": self.csrf_token} 

170 return {} 

171 

172 def quick_research( 

173 self, 

174 query: str, 

175 model: str | None = None, 

176 search_engines: list[str] | None = None, 

177 iterations: int = 2, 

178 wait_for_result: bool = True, 

179 timeout: int = 300, 

180 ) -> dict[str, Any]: 

181 """ 

182 Research a topic using LLMs and search engines. 

183 

184 This method runs a research process on your query using search engines 

185 and large language models. It might take a few minutes to complete. 

186 

187 Args: 

188 query: Your research question 

189 model: LLM model to use (e.g., "gemma:7b", "llama2:7b") 

190 search_engines: Search engines to use (default: ["searxng"]) 

191 iterations: How many research cycles to run (default: 2) 

192 wait_for_result: If True, wait until done. If False, return immediately 

193 timeout: Maximum seconds to wait (default: 300) 

194 

195 Returns: 

196 If waiting for result: Dict with summary, sources, and findings 

197 If not waiting: Dict with research_id to check status later 

198 

199 Raises: 

200 RuntimeError: If not logged in or request fails 

201 

202 Example: 

203 result = client.quick_research("Latest developments in fusion energy") 

204 print(result["summary"]) 

205 """ 

206 if not self.logged_in: 

207 raise RuntimeError("Not logged in. Call login() first.") 

208 

209 # Default search engines 

210 if search_engines is None: 210 ↛ 214line 210 didn't jump to line 214 because the condition on line 210 was always true

211 search_engines = ["searxng"] 

212 

213 # Start research 

214 response = self.session.post( 

215 f"{self.base_url}/research/api/start", 

216 json={ 

217 "query": query, 

218 "model": model, 

219 "search_engines": search_engines, 

220 "iterations": iterations, 

221 "questions_per_iteration": 3, 

222 }, 

223 headers=self._api_headers(), 

224 ) 

225 

226 # Handle response 

227 if response.status_code != 200: 

228 # Try to extract error message 

229 try: 

230 error_data = response.json() 

231 if isinstance(error_data, list) and len(error_data) > 0: 231 ↛ 234line 231 didn't jump to line 234 because the condition on line 231 was always true

232 error_msg = error_data[0].get("message", "Unknown error") 

233 else: 

234 error_msg = str(error_data) 

235 except (ValueError, KeyError, AttributeError): 

236 error_msg = response.text[:200] 

237 raise RuntimeError(f"Failed to start research: {error_msg}") 

238 

239 result = response.json() 

240 research_id = result.get("research_id") 

241 

242 if not research_id: 

243 raise RuntimeError("No research ID returned") 

244 

245 if not wait_for_result: 

246 return {"research_id": research_id} 

247 

248 # Poll for results 

249 return self.wait_for_research(research_id, timeout) 

250 

251 def wait_for_research( 

252 self, research_id: str, timeout: int = 300 

253 ) -> dict[str, Any]: 

254 """ 

255 Wait for research to complete and get results. 

256 

257 Use this after starting research with quick_research(wait_for_result=False). 

258 Checks status every 5 seconds until complete or timeout. 

259 

260 Args: 

261 research_id: ID of the research to wait for 

262 timeout: Maximum seconds to wait (default: 300) 

263 

264 Returns: 

265 Dict with research results (summary, sources, findings) 

266 

267 Raises: 

268 RuntimeError: If research fails or times out 

269 

270 Example: 

271 # Start research without waiting 

272 resp = client.quick_research("Climate change impacts", wait_for_result=False) 

273 # Get results when ready 

274 results = client.wait_for_research(resp["research_id"]) 

275 """ 

276 start_time = time.time() 

277 

278 while time.time() - start_time < timeout: 

279 status_response = self.session.get( 

280 f"{self.base_url}/research/api/status/{research_id}" 

281 ) 

282 

283 if status_response.status_code == 200: 283 ↛ 300line 283 didn't jump to line 300 because the condition on line 283 was always true

284 status = status_response.json() 

285 

286 if status.get("status") == ResearchStatus.COMPLETED: 

287 # Get final results 

288 results_response = self.session.get( 

289 f"{self.base_url}/api/report/{research_id}" 

290 ) 

291 if results_response.status_code == 200: 

292 return results_response.json() 

293 else: 

294 raise RuntimeError("Failed to get results") 

295 

296 elif status.get("status") == ResearchStatus.FAILED: 

297 error_msg = status.get("error", "Unknown error") 

298 raise RuntimeError(f"Research failed: {error_msg}") 

299 

300 time.sleep(5) 

301 

302 raise RuntimeError(f"Research timed out after {timeout} seconds") 

303 

304 def get_settings(self) -> dict[str, Any]: 

305 """Get current user settings.""" 

306 if not self.logged_in: 

307 raise RuntimeError("Not logged in. Call login() first.") 

308 

309 response = self.session.get(f"{self.base_url}/settings/api") 

310 if response.status_code == 200: 

311 return response.json() 

312 else: 

313 raise RuntimeError( 

314 f"Failed to get settings: {response.status_code}" 

315 ) 

316 

317 def update_setting(self, key: str, value: Any) -> bool: 

318 """ 

319 Update a setting. 

320 

321 Args: 

322 key: Setting key (e.g., "llm.model") 

323 value: New value for the setting 

324 

325 Returns: 

326 True if successful 

327 """ 

328 if not self.logged_in: 

329 raise RuntimeError("Not logged in. Call login() first.") 

330 

331 response = self.session.put( 

332 f"{self.base_url}/settings/api/{key}", 

333 json={"value": value}, 

334 headers=self._api_headers(), 

335 ) 

336 return response.status_code == 200 

337 

338 def get_history(self) -> list[dict[str, Any]]: 

339 """ 

340 Get your past research queries. 

341 

342 Returns a list of previous research sessions with their details. 

343 

344 Returns: 

345 List of research items with query, timestamp, and status info 

346 

347 Raises: 

348 RuntimeError: If not logged in 

349 

350 Example: 

351 history = client.get_history() 

352 for item in history[:5]: 

353 print(f"{item['timestamp']}: {item['query']}") 

354 """ 

355 if not self.logged_in: 

356 raise RuntimeError("Not logged in. Call login() first.") 

357 

358 response = self.session.get(f"{self.base_url}/history/api") 

359 if response.status_code == 200: 

360 data = response.json() 

361 # Handle different response formats 

362 if isinstance(data, dict): 

363 return data.get("history", data.get("items", [])) 

364 elif isinstance(data, list): 364 ↛ 366line 364 didn't jump to line 366 because the condition on line 364 was always true

365 return data 

366 return [] 

367 else: 

368 raise RuntimeError(f"Failed to get history: {response.status_code}") 

369 

370 def logout(self): 

371 """Logout and clear session.""" 

372 if self.logged_in: 

373 self.session.post( 

374 f"{self.base_url}/auth/logout", headers=self._api_headers() 

375 ) 

376 self.session.close() 

377 self.csrf_token = None 

378 self.logged_in = False 

379 self.username = None 

380 

381 def submit_benchmark( 

382 self, 

383 model, 

384 hardware, 

385 accuracy_focused, 

386 accuracy_source, 

387 avg_time_per_question, 

388 context_window, 

389 temperature, 

390 ldr_version, 

391 date_tested, 

392 notes="", 

393 ): 

394 """ 

395 Submit your benchmark results to help the community. 

396 

397 Args: 

398 model: Model name (e.g., "Llama-3.3-70B-Q4_K_M") 

399 hardware: Hardware specs (e.g., "RTX 4090 24GB") 

400 accuracy_focused: Accuracy percentage for focused strategy 

401 accuracy_source: Accuracy percentage for source-based strategy 

402 avg_time_per_question: Average time per question in seconds 

403 context_window: Context window size used 

404 temperature: Temperature setting used 

405 ldr_version: Version of LDR used (e.g., "0.6.0") 

406 date_tested: Date tested (YYYY-MM-DD format) 

407 notes: Optional notes about the test 

408 

409 Returns: 

410 True if submission was successful 

411 

412 Example: 

413 client.submit_benchmark( 

414 "Llama-3.3-70B-Q4_K_M", "RTX 4090 24GB", 

415 87.0, 82.0, 45.2, 32000, 0.1, "0.6.0", "2024-01-15" 

416 ) 

417 """ 

418 benchmarks = Benchmark_results() 

419 return benchmarks.add_result( 

420 model, 

421 hardware, 

422 accuracy_focused, 

423 accuracy_source, 

424 avg_time_per_question, 

425 context_window, 

426 temperature, 

427 ldr_version, 

428 date_tested, 

429 notes, 

430 ) 

431 

432 def get_benchmarks(self, best_only=False): 

433 """ 

434 Get community benchmark results. 

435 

436 Args: 

437 best_only: If True, only return top performers 

438 

439 Returns: 

440 List of benchmark results 

441 

442 Example: 

443 all_results = client.get_benchmarks() 

444 top_results = client.get_benchmarks(best_only=True) 

445 """ 

446 benchmarks = Benchmark_results() 

447 if best_only: 

448 return benchmarks.get_best() 

449 return benchmarks.get_all() 

450 

451 def __enter__(self): 

452 """Support context manager for auto-cleanup.""" 

453 return self 

454 

455 def __exit__(self, exc_type, exc_val, exc_tb): 

456 """Auto logout when used as context manager.""" 

457 self.logout() 

458 

459 

460# Convenience functions for simple use cases 

461 

462 

463def quick_query( 

464 username: str, 

465 password: str, 

466 query: str, 

467 base_url: str = "http://localhost:5000", 

468) -> str: 

469 """ 

470 One-liner for quick research queries. 

471 

472 Example: 

473 summary = quick_query("user", "pass", "What is DNA?") 

474 print(summary) 

475 

476 Args: 

477 username: LDR username 

478 password: LDR password 

479 query: Research question 

480 base_url: Server URL 

481 

482 Returns: 

483 Research summary as string 

484 """ 

485 with LDRClient(base_url) as client: 

486 if not client.login(username, password): 

487 raise RuntimeError("Login failed") 

488 

489 result = client.quick_research(query) 

490 return result.get("summary", "No summary available")