Coverage for src / local_deep_research / api / client.py: 11%

130 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2HTTP Client for Local Deep Research API. 

3Simplifies authentication and API access by handling CSRF tokens automatically. 

4 

5This client allows you to programmatically interact with the Local Deep Research (LDR) 

6application, enabling seamless integration with Python scripts and applications. 

7It handles all the complexity of authentication, session management, and request formatting. 

8 

9Why CSRF with login? 

10-------------------- 

11CSRF tokens prevent cross-site request forgery attacks. Even though you're logged in, 

12CSRF ensures requests come from YOUR code, not from malicious websites that might 

13try to use your browser's active session cookies to make unauthorized requests. 

14 

15Features: 

16--------- 

17- Automatic login and session management 

18- CSRF token handling 

19- Research query submission and result retrieval 

20- User settings management 

21- Research history access 

22 

23Example usage: 

24------------- 

25 from local_deep_research.api.client import LDRClient 

26 

27 # Simple usage 

28 client = LDRClient() 

29 client.login("username", "password") 

30 result = client.quick_research("What is quantum computing?") 

31 print(result["summary"]) 

32 

33 # With context manager (auto-logout) 

34 with LDRClient() as client: 

35 client.login("username", "password") 

36 result = client.quick_research("What is quantum computing?") 

37 print(result["summary"]) 

38 

39 # Get research history 

40 with LDRClient() as client: 

41 client.login("username", "password") 

42 history = client.get_history() 

43 for item in history: 

44 print(f"Research: {item['query']}") 

45 

46 # One-liner for quick queries 

47 from local_deep_research.api.client import quick_query 

48 summary = quick_query("username", "password", "What is DNA?") 

49 

50 # Update user settings 

51 with LDRClient() as client: 

52 client.login("username", "password") 

53 client.update_setting("llm.model", "gemma:7b") 

54 settings = client.get_settings() 

55 print(f"Current model: {settings['llm']['model']}") 

56""" 

57 

58import time 

59from typing import Optional, Dict, Any, List 

60from loguru import logger 

61from local_deep_research.benchmarks.comparison.results import Benchmark_results 

62from local_deep_research.security import SafeSession 

63 

64 

65class LDRClient: 

66 """ 

67 HTTP client for LDR API access with automatic CSRF handling. 

68 

69 This client abstracts away the complexity of: 

70 - Extracting CSRF tokens from HTML forms 

71 - Managing session cookies 

72 - Handling authentication flow 

73 - Polling for research results 

74 """ 

75 

76 def __init__(self, base_url: str = "http://localhost:5000"): 

77 """ 

78 Initialize the client. 

79 

80 Args: 

81 base_url: URL of the LDR server (default: http://localhost:5000) 

82 """ 

83 self.base_url = base_url 

84 # Use SafeSession with allow_localhost since client connects to local LDR server 

85 self.session = SafeSession(allow_localhost=True) 

86 self.csrf_token = None 

87 self.logged_in = False 

88 self.username = None 

89 

90 def login(self, username: str, password: str) -> bool: 

91 """ 

92 Login to LDR server. Handles all CSRF complexity internally. 

93 

94 This method: 

95 1. Gets the login page to extract CSRF token from HTML form 

96 2. Submits login with form data (not JSON) 

97 3. Retrieves CSRF token for subsequent API calls 

98 

99 Args: 

100 username: Your LDR username 

101 password: Your LDR password 

102 

103 Returns: 

104 True if login successful, False otherwise 

105 """ 

106 try: 

107 # Step 1: Get login page to extract CSRF token 

108 # We need to parse HTML because Flask-WTF embeds CSRF in forms 

109 login_page = self.session.get(f"{self.base_url}/auth/login") 

110 

111 # Simple CSRF extraction without BeautifulSoup dependency 

112 # Look for: <input type="hidden" name="csrf_token" value="..."/> 

113 import re 

114 

115 csrf_match = re.search( 

116 r'<input[^>]*name="csrf_token"[^>]*value="([^"]*)"', 

117 login_page.text, 

118 ) 

119 

120 if not csrf_match: 

121 logger.error("Could not find CSRF token in login page") 

122 return False 

123 

124 login_csrf = csrf_match.group(1) 

125 

126 # Step 2: Login with form data (NOT JSON!) 

127 # Flask-WTF expects form-encoded data for login 

128 response = self.session.post( 

129 f"{self.base_url}/auth/login", 

130 data={ 

131 "username": username, 

132 "password": password, 

133 "csrf_token": login_csrf, 

134 }, 

135 allow_redirects=True, 

136 ) 

137 

138 if response.status_code not in [200, 302]: 

139 logger.error( 

140 f"Login failed with status: {response.status_code}" 

141 ) 

142 return False 

143 

144 # Step 3: Get CSRF token for API requests 

145 # This uses our new endpoint that returns JSON 

146 csrf_response = self.session.get(f"{self.base_url}/auth/csrf-token") 

147 if csrf_response.status_code == 200: 

148 self.csrf_token = csrf_response.json()["csrf_token"] 

149 self.logged_in = True 

150 self.username = username 

151 logger.info(f"Successfully logged in as {username}") 

152 return True 

153 else: 

154 logger.warning("Logged in but could not get API CSRF token") 

155 # Still logged in, just no CSRF for API calls 

156 self.logged_in = True 

157 self.username = username 

158 return True 

159 

160 except Exception: 

161 logger.exception("Login error") 

162 return False 

163 

164 def _api_headers(self) -> Dict[str, str]: 

165 """Get headers with CSRF token for API requests.""" 

166 if self.csrf_token: 

167 return {"X-CSRF-Token": self.csrf_token} 

168 return {} 

169 

170 def quick_research( 

171 self, 

172 query: str, 

173 model: Optional[str] = None, 

174 search_engines: Optional[List[str]] = None, 

175 iterations: int = 2, 

176 wait_for_result: bool = True, 

177 timeout: int = 300, 

178 ) -> Dict[str, Any]: 

179 """ 

180 Research a topic using LLMs and search engines. 

181 

182 This method runs a research process on your query using search engines 

183 and large language models. It might take a few minutes to complete. 

184 

185 Args: 

186 query: Your research question 

187 model: LLM model to use (e.g., "gemma:7b", "llama2:7b") 

188 search_engines: Search engines to use (default: ["searxng"]) 

189 iterations: How many research cycles to run (default: 2) 

190 wait_for_result: If True, wait until done. If False, return immediately 

191 timeout: Maximum seconds to wait (default: 300) 

192 

193 Returns: 

194 If waiting for result: Dict with summary, sources, and findings 

195 If not waiting: Dict with research_id to check status later 

196 

197 Raises: 

198 RuntimeError: If not logged in or request fails 

199 

200 Example: 

201 result = client.quick_research("Latest developments in fusion energy") 

202 print(result["summary"]) 

203 """ 

204 if not self.logged_in: 

205 raise RuntimeError("Not logged in. Call login() first.") 

206 

207 # Default search engines 

208 if search_engines is None: 

209 search_engines = ["searxng"] 

210 

211 # Start research 

212 response = self.session.post( 

213 f"{self.base_url}/research/api/start", 

214 json={ 

215 "query": query, 

216 "model": model, 

217 "search_engines": search_engines, 

218 "iterations": iterations, 

219 "questions_per_iteration": 3, 

220 }, 

221 headers=self._api_headers(), 

222 ) 

223 

224 # Handle response 

225 if response.status_code != 200: 

226 # Try to extract error message 

227 try: 

228 error_data = response.json() 

229 if isinstance(error_data, list) and len(error_data) > 0: 

230 error_msg = error_data[0].get("message", "Unknown error") 

231 else: 

232 error_msg = str(error_data) 

233 except: 

234 error_msg = response.text[:200] 

235 raise RuntimeError(f"Failed to start research: {error_msg}") 

236 

237 result = response.json() 

238 research_id = result.get("research_id") 

239 

240 if not research_id: 

241 raise RuntimeError("No research ID returned") 

242 

243 if not wait_for_result: 

244 return {"research_id": research_id} 

245 

246 # Poll for results 

247 return self.wait_for_research(research_id, timeout) 

248 

249 def wait_for_research( 

250 self, research_id: str, timeout: int = 300 

251 ) -> Dict[str, Any]: 

252 """ 

253 Wait for research to complete and get results. 

254 

255 Use this after starting research with quick_research(wait_for_result=False). 

256 Checks status every 5 seconds until complete or timeout. 

257 

258 Args: 

259 research_id: ID of the research to wait for 

260 timeout: Maximum seconds to wait (default: 300) 

261 

262 Returns: 

263 Dict with research results (summary, sources, findings) 

264 

265 Raises: 

266 RuntimeError: If research fails or times out 

267 

268 Example: 

269 # Start research without waiting 

270 resp = client.quick_research("Climate change impacts", wait_for_result=False) 

271 # Get results when ready 

272 results = client.wait_for_research(resp["research_id"]) 

273 """ 

274 start_time = time.time() 

275 

276 while time.time() - start_time < timeout: 

277 status_response = self.session.get( 

278 f"{self.base_url}/research/api/research/{research_id}/status" 

279 ) 

280 

281 if status_response.status_code == 200: 

282 status = status_response.json() 

283 

284 if status.get("status") == "completed": 

285 # Get final results 

286 results_response = self.session.get( 

287 f"{self.base_url}/research/api/research/{research_id}/result" 

288 ) 

289 if results_response.status_code == 200: 

290 return results_response.json() 

291 else: 

292 raise RuntimeError("Failed to get results") 

293 

294 elif status.get("status") == "failed": 

295 error_msg = status.get("error", "Unknown error") 

296 raise RuntimeError(f"Research failed: {error_msg}") 

297 

298 time.sleep(5) 

299 

300 raise RuntimeError(f"Research timed out after {timeout} seconds") 

301 

302 def get_settings(self) -> Dict[str, Any]: 

303 """Get current user settings.""" 

304 if not self.logged_in: 

305 raise RuntimeError("Not logged in. Call login() first.") 

306 

307 response = self.session.get(f"{self.base_url}/settings/api") 

308 if response.status_code == 200: 

309 return response.json() 

310 else: 

311 raise RuntimeError( 

312 f"Failed to get settings: {response.status_code}" 

313 ) 

314 

315 def update_setting(self, key: str, value: Any) -> bool: 

316 """ 

317 Update a setting. 

318 

319 Args: 

320 key: Setting key (e.g., "llm.model") 

321 value: New value for the setting 

322 

323 Returns: 

324 True if successful 

325 """ 

326 if not self.logged_in: 

327 raise RuntimeError("Not logged in. Call login() first.") 

328 

329 response = self.session.put( 

330 f"{self.base_url}/settings/api/{key}", 

331 json={"value": value}, 

332 headers=self._api_headers(), 

333 ) 

334 return response.status_code == 200 

335 

336 def get_history(self) -> List[Dict[str, Any]]: 

337 """ 

338 Get your past research queries. 

339 

340 Returns a list of previous research sessions with their details. 

341 

342 Returns: 

343 List of research items with query, timestamp, and status info 

344 

345 Raises: 

346 RuntimeError: If not logged in 

347 

348 Example: 

349 history = client.get_history() 

350 for item in history[:5]: 

351 print(f"{item['timestamp']}: {item['query']}") 

352 """ 

353 if not self.logged_in: 

354 raise RuntimeError("Not logged in. Call login() first.") 

355 

356 response = self.session.get(f"{self.base_url}/history/api") 

357 if response.status_code == 200: 

358 data = response.json() 

359 # Handle different response formats 

360 if isinstance(data, dict): 

361 return data.get("history", data.get("items", [])) 

362 elif isinstance(data, list): 

363 return data 

364 return [] 

365 else: 

366 raise RuntimeError(f"Failed to get history: {response.status_code}") 

367 

368 def logout(self): 

369 """Logout and clear session.""" 

370 if self.logged_in: 

371 self.session.post( 

372 f"{self.base_url}/auth/logout", headers=self._api_headers() 

373 ) 

374 self.session.close() 

375 self.csrf_token = None 

376 self.logged_in = False 

377 self.username = None 

378 

379 def submit_benchmark( 

380 self, 

381 model, 

382 hardware, 

383 accuracy_focused, 

384 accuracy_source, 

385 avg_time_per_question, 

386 context_window, 

387 temperature, 

388 ldr_version, 

389 date_tested, 

390 notes="", 

391 ): 

392 """ 

393 Submit your benchmark results to help the community. 

394 

395 Args: 

396 model: Model name (e.g., "Llama-3.3-70B-Q4_K_M") 

397 hardware: Hardware specs (e.g., "RTX 4090 24GB") 

398 accuracy_focused: Accuracy percentage for focused strategy 

399 accuracy_source: Accuracy percentage for source-based strategy 

400 avg_time_per_question: Average time per question in seconds 

401 context_window: Context window size used 

402 temperature: Temperature setting used 

403 ldr_version: Version of LDR used (e.g., "0.6.0") 

404 date_tested: Date tested (YYYY-MM-DD format) 

405 notes: Optional notes about the test 

406 

407 Returns: 

408 True if submission was successful 

409 

410 Example: 

411 client.submit_benchmark( 

412 "Llama-3.3-70B-Q4_K_M", "RTX 4090 24GB", 

413 87.0, 82.0, 45.2, 32000, 0.1, "0.6.0", "2024-01-15" 

414 ) 

415 """ 

416 benchmarks = Benchmark_results() 

417 return benchmarks.add_result( 

418 model, 

419 hardware, 

420 accuracy_focused, 

421 accuracy_source, 

422 avg_time_per_question, 

423 context_window, 

424 temperature, 

425 ldr_version, 

426 date_tested, 

427 notes, 

428 ) 

429 

430 def get_benchmarks(self, best_only=False): 

431 """ 

432 Get community benchmark results. 

433 

434 Args: 

435 best_only: If True, only return top performers 

436 

437 Returns: 

438 List of benchmark results 

439 

440 Example: 

441 all_results = client.get_benchmarks() 

442 top_results = client.get_benchmarks(best_only=True) 

443 """ 

444 benchmarks = Benchmark_results() 

445 if best_only: 

446 return benchmarks.get_best() 

447 return benchmarks.get_all() 

448 

449 def __enter__(self): 

450 """Support context manager for auto-cleanup.""" 

451 return self 

452 

453 def __exit__(self, exc_type, exc_val, exc_tb): 

454 """Auto logout when used as context manager.""" 

455 self.logout() 

456 

457 

458# Convenience functions for simple use cases 

459 

460 

461def quick_query( 

462 username: str, 

463 password: str, 

464 query: str, 

465 base_url: str = "http://localhost:5000", 

466) -> str: 

467 """ 

468 One-liner for quick research queries. 

469 

470 Example: 

471 summary = quick_query("user", "pass", "What is DNA?") 

472 print(summary) 

473 

474 Args: 

475 username: LDR username 

476 password: LDR password 

477 query: Research question 

478 base_url: Server URL 

479 

480 Returns: 

481 Research summary as string 

482 """ 

483 with LDRClient(base_url) as client: 

484 if not client.login(username, password): 

485 raise RuntimeError("Login failed") 

486 

487 result = client.quick_research(query) 

488 return result.get("summary", "No summary available")