Coverage for src / local_deep_research / api / client.py: 96%

131 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1""" 

2HTTP Client for Local Deep Research API. 

3Simplifies authentication and API access by handling CSRF tokens automatically. 

4 

5This client allows you to programmatically interact with the Local Deep Research (LDR) 

6application, enabling seamless integration with Python scripts and applications. 

7It handles all the complexity of authentication, session management, and request formatting. 

8 

9Why CSRF with login? 

10-------------------- 

11CSRF tokens prevent cross-site request forgery attacks. Even though you're logged in, 

12CSRF ensures requests come from YOUR code, not from malicious websites that might 

13try to use your browser's active session cookies to make unauthorized requests. 

14 

15Features: 

16--------- 

17- Automatic login and session management 

18- CSRF token handling 

19- Research query submission and result retrieval 

20- User settings management 

21- Research history access 

22 

23Example usage: 

24------------- 

25 from local_deep_research.api.client import LDRClient 

26 

27 # Simple usage 

28 client = LDRClient() 

29 client.login("username", "password") 

30 result = client.quick_research("What is quantum computing?") 

31 print(result["summary"]) 

32 

33 # With context manager (auto-logout) 

34 with LDRClient() as client: 

35 client.login("username", "password") 

36 result = client.quick_research("What is quantum computing?") 

37 print(result["summary"]) 

38 

39 # Get research history 

40 with LDRClient() as client: 

41 client.login("username", "password") 

42 history = client.get_history() 

43 for item in history: 

44 print(f"Research: {item['query']}") 

45 

46 # One-liner for quick queries 

47 from local_deep_research.api.client import quick_query 

48 summary = quick_query("username", "password", "What is DNA?") 

49 

50 # Update user settings 

51 with LDRClient() as client: 

52 client.login("username", "password") 

53 client.update_setting("llm.model", "gemma:7b") 

54 settings = client.get_settings() 

55 print(f"Current model: {settings['llm']['model']}") 

56""" 

57 

58import time 

59from typing import Any 

60 

61from ..constants import ResearchStatus 

62from loguru import logger 

63from local_deep_research.benchmarks.comparison.results import Benchmark_results 

64from local_deep_research.security import SafeSession 

65 

66 

67class LDRClient: 

68 """ 

69 HTTP client for LDR API access with automatic CSRF handling. 

70 

71 This client abstracts away the complexity of: 

72 - Extracting CSRF tokens from HTML forms 

73 - Managing session cookies 

74 - Handling authentication flow 

75 - Polling for research results 

76 """ 

77 

78 def __init__(self, base_url: str = "http://localhost:5000"): 

79 """ 

80 Initialize the client. 

81 

82 Args: 

83 base_url: URL of the LDR server (default: http://localhost:5000) 

84 """ 

85 self.base_url = base_url 

86 # Use SafeSession with allow_localhost since client connects to local LDR server 

87 self.session = SafeSession(allow_localhost=True) 

88 self.csrf_token = None 

89 self.logged_in = False 

90 self.username = None 

91 

92 def login(self, username: str, password: str) -> bool: 

93 """ 

94 Login to LDR server. Handles all CSRF complexity internally. 

95 

96 This method: 

97 1. Gets the login page to extract CSRF token from HTML form 

98 2. Submits login with form data (not JSON) 

99 3. Retrieves CSRF token for subsequent API calls 

100 

101 Args: 

102 username: Your LDR username 

103 password: Your LDR password 

104 

105 Returns: 

106 True if login successful, False otherwise 

107 """ 

108 try: 

109 # Step 1: Get login page to extract CSRF token 

110 # We need to parse HTML because Flask-WTF embeds CSRF in forms 

111 login_page = self.session.get(f"{self.base_url}/auth/login") 

112 

113 # Simple CSRF extraction without BeautifulSoup dependency 

114 # Look for: <input type="hidden" name="csrf_token" value="..."/> 

115 import re 

116 

117 csrf_match = re.search( 

118 r'<input[^>]*name="csrf_token"[^>]*value="([^"]*)"', 

119 login_page.text, 

120 ) 

121 

122 if not csrf_match: 

123 logger.error("Could not find CSRF token in login page") 

124 return False 

125 

126 login_csrf = csrf_match.group(1) 

127 

128 # Step 2: Login with form data (NOT JSON!) 

129 # Flask-WTF expects form-encoded data for login 

130 response = self.session.post( 

131 f"{self.base_url}/auth/login", 

132 data={ 

133 "username": username, 

134 "password": password, 

135 "csrf_token": login_csrf, 

136 }, 

137 allow_redirects=True, 

138 ) 

139 

140 if response.status_code not in [200, 302]: 

141 logger.error( 

142 f"Login failed with status: {response.status_code}" 

143 ) 

144 return False 

145 

146 # Step 3: Get CSRF token for API requests 

147 # This uses our new endpoint that returns JSON 

148 csrf_response = self.session.get(f"{self.base_url}/auth/csrf-token") 

149 if csrf_response.status_code == 200: 

150 self.csrf_token = csrf_response.json()["csrf_token"] 

151 self.logged_in = True 

152 self.username = username 

153 logger.info(f"Successfully logged in as {username}") 

154 return True 

155 logger.warning("Logged in but could not get API CSRF token") 

156 # Still logged in, just no CSRF for API calls 

157 self.logged_in = True 

158 self.username = username 

159 return True 

160 

161 except Exception: 

162 logger.exception("Login error") 

163 return False 

164 

165 def _api_headers(self) -> dict[str, str]: 

166 """Get headers with CSRF token for API requests.""" 

167 if self.csrf_token: 

168 return {"X-CSRF-Token": self.csrf_token} 

169 return {} 

170 

171 def quick_research( 

172 self, 

173 query: str, 

174 model: str | None = None, 

175 search_engines: list[str] | None = None, 

176 iterations: int = 2, 

177 wait_for_result: bool = True, 

178 timeout: int = 300, 

179 ) -> dict[str, Any]: 

180 """ 

181 Research a topic using LLMs and search engines. 

182 

183 This method runs a research process on your query using search engines 

184 and large language models. It might take a few minutes to complete. 

185 

186 Args: 

187 query: Your research question 

188 model: LLM model to use (e.g., "gemma:7b", "llama2:7b") 

189 search_engines: Search engines to use (default: ["searxng"]) 

190 iterations: How many research cycles to run (default: 2) 

191 wait_for_result: If True, wait until done. If False, return immediately 

192 timeout: Maximum seconds to wait (default: 300) 

193 

194 Returns: 

195 If waiting for result: Dict with summary, sources, and findings 

196 If not waiting: Dict with research_id to check status later 

197 

198 Raises: 

199 RuntimeError: If not logged in or request fails 

200 

201 Example: 

202 result = client.quick_research("Latest developments in fusion energy") 

203 print(result["summary"]) 

204 """ 

205 if not self.logged_in: 

206 raise RuntimeError("Not logged in. Call login() first.") 

207 

208 # Default search engines 

209 if search_engines is None: 209 ↛ 213line 209 didn't jump to line 213 because the condition on line 209 was always true

210 search_engines = ["searxng"] 

211 

212 # Start research 

213 response = self.session.post( 

214 f"{self.base_url}/research/api/start", 

215 json={ 

216 "query": query, 

217 "model": model, 

218 "search_engines": search_engines, 

219 "iterations": iterations, 

220 "questions_per_iteration": 3, 

221 }, 

222 headers=self._api_headers(), 

223 ) 

224 

225 # Handle response 

226 if response.status_code != 200: 

227 # Try to extract error message 

228 try: 

229 error_data = response.json() 

230 if isinstance(error_data, list) and len(error_data) > 0: 230 ↛ 233line 230 didn't jump to line 233 because the condition on line 230 was always true

231 error_msg = error_data[0].get("message", "Unknown error") 

232 else: 

233 error_msg = str(error_data) 

234 except (ValueError, KeyError, AttributeError): 

235 error_msg = response.text[:200] 

236 raise RuntimeError(f"Failed to start research: {error_msg}") 

237 

238 result = response.json() 

239 research_id = result.get("research_id") 

240 

241 if not research_id: 

242 raise RuntimeError("No research ID returned") 

243 

244 if not wait_for_result: 

245 return {"research_id": research_id} 

246 

247 # Poll for results 

248 return self.wait_for_research(research_id, timeout) 

249 

250 def wait_for_research( 

251 self, research_id: str, timeout: int = 300 

252 ) -> dict[str, Any]: 

253 """ 

254 Wait for research to complete and get results. 

255 

256 Use this after starting research with quick_research(wait_for_result=False). 

257 Checks status every 5 seconds until complete or timeout. 

258 

259 Args: 

260 research_id: ID of the research to wait for 

261 timeout: Maximum seconds to wait (default: 300) 

262 

263 Returns: 

264 Dict with research results (summary, sources, findings) 

265 

266 Raises: 

267 RuntimeError: If research fails or times out 

268 

269 Example: 

270 # Start research without waiting 

271 resp = client.quick_research("Climate change impacts", wait_for_result=False) 

272 # Get results when ready 

273 results = client.wait_for_research(resp["research_id"]) 

274 """ 

275 start_time = time.time() 

276 

277 while time.time() - start_time < timeout: 

278 status_response = self.session.get( 

279 f"{self.base_url}/research/api/status/{research_id}" 

280 ) 

281 

282 if status_response.status_code == 200: 282 ↛ 298line 282 didn't jump to line 298 because the condition on line 282 was always true

283 status = status_response.json() 

284 

285 if status.get("status") == ResearchStatus.COMPLETED: 

286 # Get final results 

287 results_response = self.session.get( 

288 f"{self.base_url}/api/report/{research_id}" 

289 ) 

290 if results_response.status_code == 200: 

291 return results_response.json() 

292 raise RuntimeError("Failed to get results") 

293 

294 if status.get("status") == ResearchStatus.FAILED: 

295 error_msg = status.get("error", "Unknown error") 

296 raise RuntimeError(f"Research failed: {error_msg}") 

297 

298 time.sleep(5) 

299 

300 raise RuntimeError(f"Research timed out after {timeout} seconds") 

301 

302 def get_settings(self) -> dict[str, Any]: 

303 """Get current user settings.""" 

304 if not self.logged_in: 

305 raise RuntimeError("Not logged in. Call login() first.") 

306 

307 response = self.session.get(f"{self.base_url}/settings/api") 

308 if response.status_code == 200: 

309 return response.json() 

310 raise RuntimeError(f"Failed to get settings: {response.status_code}") 

311 

312 def update_setting(self, key: str, value: Any) -> bool: 

313 """ 

314 Update a setting. 

315 

316 Args: 

317 key: Setting key (e.g., "llm.model") 

318 value: New value for the setting 

319 

320 Returns: 

321 True if successful 

322 """ 

323 if not self.logged_in: 

324 raise RuntimeError("Not logged in. Call login() first.") 

325 

326 response = self.session.put( 

327 f"{self.base_url}/settings/api/{key}", 

328 json={"value": value}, 

329 headers=self._api_headers(), 

330 ) 

331 return response.status_code == 200 

332 

333 def get_history(self) -> list[dict[str, Any]]: 

334 """ 

335 Get your past research queries. 

336 

337 Returns a list of previous research sessions with their details. 

338 

339 Returns: 

340 List of research items with query, timestamp, and status info 

341 

342 Raises: 

343 RuntimeError: If not logged in 

344 

345 Example: 

346 history = client.get_history() 

347 for item in history[:5]: 

348 print(f"{item['timestamp']}: {item['query']}") 

349 """ 

350 if not self.logged_in: 

351 raise RuntimeError("Not logged in. Call login() first.") 

352 

353 response = self.session.get(f"{self.base_url}/history/api") 

354 if response.status_code == 200: 

355 data = response.json() 

356 # Handle different response formats 

357 if isinstance(data, dict): 

358 return data.get("history", data.get("items", [])) 

359 if isinstance(data, list): 359 ↛ 361line 359 didn't jump to line 361 because the condition on line 359 was always true

360 return data 

361 return [] 

362 raise RuntimeError(f"Failed to get history: {response.status_code}") 

363 

364 def logout(self): 

365 """Logout and clear session.""" 

366 if self.logged_in: 

367 self.session.post( 

368 f"{self.base_url}/auth/logout", headers=self._api_headers() 

369 ) 

370 self.session.close() 

371 self.csrf_token = None 

372 self.logged_in = False 

373 self.username = None 

374 

375 def submit_benchmark( 

376 self, 

377 model, 

378 hardware, 

379 accuracy_focused, 

380 accuracy_source, 

381 avg_time_per_question, 

382 context_window, 

383 temperature, 

384 ldr_version, 

385 date_tested, 

386 notes="", 

387 ): 

388 """ 

389 Submit your benchmark results to help the community. 

390 

391 Args: 

392 model: Model name (e.g., "Llama-3.3-70B-Q4_K_M") 

393 hardware: Hardware specs (e.g., "RTX 4090 24GB") 

394 accuracy_focused: Accuracy percentage for focused strategy 

395 accuracy_source: Accuracy percentage for source-based strategy 

396 avg_time_per_question: Average time per question in seconds 

397 context_window: Context window size used 

398 temperature: Temperature setting used 

399 ldr_version: Version of LDR used (e.g., "0.6.0") 

400 date_tested: Date tested (YYYY-MM-DD format) 

401 notes: Optional notes about the test 

402 

403 Returns: 

404 True if submission was successful 

405 

406 Example: 

407 client.submit_benchmark( 

408 "Llama-3.3-70B-Q4_K_M", "RTX 4090 24GB", 

409 87.0, 82.0, 45.2, 32000, 0.1, "0.6.0", "2024-01-15" 

410 ) 

411 """ 

412 benchmarks = Benchmark_results() 

413 return benchmarks.add_result( 

414 model, 

415 hardware, 

416 accuracy_focused, 

417 accuracy_source, 

418 avg_time_per_question, 

419 context_window, 

420 temperature, 

421 ldr_version, 

422 date_tested, 

423 notes, 

424 ) 

425 

426 def get_benchmarks(self, best_only=False): 

427 """ 

428 Get community benchmark results. 

429 

430 Args: 

431 best_only: If True, only return top performers 

432 

433 Returns: 

434 List of benchmark results 

435 

436 Example: 

437 all_results = client.get_benchmarks() 

438 top_results = client.get_benchmarks(best_only=True) 

439 """ 

440 benchmarks = Benchmark_results() 

441 if best_only: 

442 return benchmarks.get_best() 

443 return benchmarks.get_all() 

444 

445 def __enter__(self): 

446 """Support context manager for auto-cleanup.""" 

447 return self 

448 

449 def __exit__(self, exc_type, exc_val, exc_tb): 

450 """Auto logout when used as context manager.""" 

451 self.logout() 

452 

453 

454# Convenience functions for simple use cases 

455 

456 

457def quick_query( 

458 username: str, 

459 password: str, 

460 query: str, 

461 base_url: str = "http://localhost:5000", 

462) -> str: 

463 """ 

464 One-liner for quick research queries. 

465 

466 Example: 

467 summary = quick_query("user", "pass", "What is DNA?") 

468 print(summary) 

469 

470 Args: 

471 username: LDR username 

472 password: LDR password 

473 query: Research question 

474 base_url: Server URL 

475 

476 Returns: 

477 Research summary as string 

478 """ 

479 with LDRClient(base_url) as client: 

480 if not client.login(username, password): 

481 raise RuntimeError("Login failed") 

482 

483 result = client.quick_research(query) 

484 return result.get("summary", "No summary available")