Coverage for src/local_deep_research/api/client.py: 96%

1"""

2HTTP Client for Local Deep Research API.

3Simplifies authentication and API access by handling CSRF tokens automatically.

5This client allows you to programmatically interact with the Local Deep Research (LDR)

6application, enabling seamless integration with Python scripts and applications.

7It handles all the complexity of authentication, session management, and request formatting.

9Why CSRF with login?

10--------------------

11CSRF tokens prevent cross-site request forgery attacks. Even though you're logged in,

12CSRF ensures requests come from YOUR code, not from malicious websites that might

13try to use your browser's active session cookies to make unauthorized requests.

15Features:

16---------

17- Automatic login and session management

18- CSRF token handling

19- Research query submission and result retrieval

20- User settings management

21- Research history access

23Example usage:

24-------------

25 from local_deep_research.api.client import LDRClient

27 # Simple usage

28 client = LDRClient()

29 client.login("username", "password")

30 result = client.quick_research("What is quantum computing?")

31 print(result["summary"])

33 # With context manager (auto-logout)

34 with LDRClient() as client:

35 client.login("username", "password")

36 result = client.quick_research("What is quantum computing?")

37 print(result["summary"])

39 # Get research history

40 with LDRClient() as client:

41 client.login("username", "password")

42 history = client.get_history()

43 for item in history:

44 print(f"Research: {item['query']}")

46 # One-liner for quick queries

47 from local_deep_research.api.client import quick_query

48 summary = quick_query("username", "password", "What is DNA?")

50 # Update user settings

51 with LDRClient() as client:

52 client.login("username", "password")

53 client.update_setting("llm.model", "gemma:7b")

54 settings = client.get_settings()

55 print(f"Current model: {settings['llm']['model']}")

56"""

58import time

59from typing import Any

61from ..constants import ResearchStatus

62from loguru import logger

63from local_deep_research.benchmarks.comparison.results import Benchmark_results

64from local_deep_research.security import SafeSession

67class LDRClient:

68 """

69 HTTP client for LDR API access with automatic CSRF handling.

71 This client abstracts away the complexity of:

72 - Extracting CSRF tokens from HTML forms

73 - Managing session cookies

74 - Handling authentication flow

75 - Polling for research results

76 """

78 def __init__(self, base_url: str = "http://localhost:5000"):

79 """

80 Initialize the client.

82 Args:

83 base_url: URL of the LDR server (default: http://localhost:5000)

84 """

85 self.base_url = base_url

86 # Use SafeSession with allow_localhost since client connects to local LDR server

87 self.session = SafeSession(allow_localhost=True)

88 self.csrf_token = None

89 self.logged_in = False

90 self.username = None

92 def login(self, username: str, password: str) -> bool:

93 """

94 Login to LDR server. Handles all CSRF complexity internally.

96 This method:

97 1. Gets the login page to extract CSRF token from HTML form

98 2. Submits login with form data (not JSON)

99 3. Retrieves CSRF token for subsequent API calls

100

101 Args:

102 username: Your LDR username

103 password: Your LDR password

104

105 Returns:

106 True if login successful, False otherwise

107 """

108 try:

109 # Step 1: Get login page to extract CSRF token

110 # We need to parse HTML because Flask-WTF embeds CSRF in forms

111 login_page = self.session.get(f"{self.base_url}/auth/login")

112

113 # Simple CSRF extraction without BeautifulSoup dependency

114 # Look for: <input type="hidden" name="csrf_token" value="..."/>

115 import re

116

117 csrf_match = re.search(

118 r'<input[^>]*name="csrf_token"[^>]*value="([^"]*)"',

119 login_page.text,

120 )

121

122 if not csrf_match:

123 logger.error("Could not find CSRF token in login page")

124 return False

125

126 login_csrf = csrf_match.group(1)

127

128 # Step 2: Login with form data (NOT JSON!)

129 # Flask-WTF expects form-encoded data for login

130 response = self.session.post(

131 f"{self.base_url}/auth/login",

132 data={

133 "username": username,

134 "password": password,

135 "csrf_token": login_csrf,

136 },

137 allow_redirects=True,

138 )

139

140 if response.status_code not in [200, 302]:

141 logger.error(

142 f"Login failed with status: {response.status_code}"

143 )

144 return False

145

146 # Step 3: Get CSRF token for API requests

147 # This uses our new endpoint that returns JSON

148 csrf_response = self.session.get(f"{self.base_url}/auth/csrf-token")

149 if csrf_response.status_code == 200:

150 self.csrf_token = csrf_response.json()["csrf_token"]

151 self.logged_in = True

152 self.username = username

153 logger.info(f"Successfully logged in as {username}")

154 return True

155 else:

156 logger.warning("Logged in but could not get API CSRF token")

157 # Still logged in, just no CSRF for API calls

158 self.logged_in = True

159 self.username = username

160 return True

161

162 except Exception:

163 logger.exception("Login error")

164 return False

165

166 def _api_headers(self) -> dict[str, str]:

167 """Get headers with CSRF token for API requests."""

168 if self.csrf_token:

169 return {"X-CSRF-Token": self.csrf_token}

170 return {}

171

172 def quick_research(

173 self,

174 query: str,

175 model: str | None = None,

176 search_engines: list[str] | None = None,

177 iterations: int = 2,

178 wait_for_result: bool = True,

179 timeout: int = 300,

180 ) -> dict[str, Any]:

181 """

182 Research a topic using LLMs and search engines.

183

184 This method runs a research process on your query using search engines

185 and large language models. It might take a few minutes to complete.

186

187 Args:

188 query: Your research question

189 model: LLM model to use (e.g., "gemma:7b", "llama2:7b")

190 search_engines: Search engines to use (default: ["searxng"])

191 iterations: How many research cycles to run (default: 2)

192 wait_for_result: If True, wait until done. If False, return immediately

193 timeout: Maximum seconds to wait (default: 300)

194

195 Returns:

196 If waiting for result: Dict with summary, sources, and findings

197 If not waiting: Dict with research_id to check status later

198

199 Raises:

200 RuntimeError: If not logged in or request fails

201

202 Example:

203 result = client.quick_research("Latest developments in fusion energy")

204 print(result["summary"])

205 """

206 if not self.logged_in:

207 raise RuntimeError("Not logged in. Call login() first.")

208

209 # Default search engines

210 if search_engines is None: 210 ↛ 214line 210 didn't jump to line 214 because the condition on line 210 was always true

211 search_engines = ["searxng"]

212

213 # Start research

214 response = self.session.post(

215 f"{self.base_url}/research/api/start",

216 json={

217 "query": query,

218 "model": model,

219 "search_engines": search_engines,

220 "iterations": iterations,

221 "questions_per_iteration": 3,

222 },

223 headers=self._api_headers(),

224 )

225

226 # Handle response

227 if response.status_code != 200:

228 # Try to extract error message

229 try:

230 error_data = response.json()

231 if isinstance(error_data, list) and len(error_data) > 0: 231 ↛ 234line 231 didn't jump to line 234 because the condition on line 231 was always true

232 error_msg = error_data[0].get("message", "Unknown error")

233 else:

234 error_msg = str(error_data)

235 except (ValueError, KeyError, AttributeError):

236 error_msg = response.text[:200]

237 raise RuntimeError(f"Failed to start research: {error_msg}")

238

239 result = response.json()

240 research_id = result.get("research_id")

241

242 if not research_id:

243 raise RuntimeError("No research ID returned")

244

245 if not wait_for_result:

246 return {"research_id": research_id}

247

248 # Poll for results

249 return self.wait_for_research(research_id, timeout)

250

251 def wait_for_research(

252 self, research_id: str, timeout: int = 300

253 ) -> dict[str, Any]:

254 """

255 Wait for research to complete and get results.

256

257 Use this after starting research with quick_research(wait_for_result=False).

258 Checks status every 5 seconds until complete or timeout.

259

260 Args:

261 research_id: ID of the research to wait for

262 timeout: Maximum seconds to wait (default: 300)

263

264 Returns:

265 Dict with research results (summary, sources, findings)

266

267 Raises:

268 RuntimeError: If research fails or times out

269

270 Example:

271 # Start research without waiting

272 resp = client.quick_research("Climate change impacts", wait_for_result=False)

273 # Get results when ready

274 results = client.wait_for_research(resp["research_id"])

275 """

276 start_time = time.time()

277

278 while time.time() - start_time < timeout:

279 status_response = self.session.get(

280 f"{self.base_url}/research/api/status/{research_id}"

281 )

282

283 if status_response.status_code == 200: 283 ↛ 300line 283 didn't jump to line 300 because the condition on line 283 was always true

284 status = status_response.json()

285

286 if status.get("status") == ResearchStatus.COMPLETED:

287 # Get final results

288 results_response = self.session.get(

289 f"{self.base_url}/api/report/{research_id}"

290 )

291 if results_response.status_code == 200:

292 return results_response.json()

293 else:

294 raise RuntimeError("Failed to get results")

295

296 elif status.get("status") == ResearchStatus.FAILED:

297 error_msg = status.get("error", "Unknown error")

298 raise RuntimeError(f"Research failed: {error_msg}")

299

300 time.sleep(5)

301

302 raise RuntimeError(f"Research timed out after {timeout} seconds")

303

304 def get_settings(self) -> dict[str, Any]:

305 """Get current user settings."""

306 if not self.logged_in:

307 raise RuntimeError("Not logged in. Call login() first.")

308

309 response = self.session.get(f"{self.base_url}/settings/api")

310 if response.status_code == 200:

311 return response.json()

312 else:

313 raise RuntimeError(

314 f"Failed to get settings: {response.status_code}"

315 )

316

317 def update_setting(self, key: str, value: Any) -> bool:

318 """

319 Update a setting.

320

321 Args:

322 key: Setting key (e.g., "llm.model")

323 value: New value for the setting

324

325 Returns:

326 True if successful

327 """

328 if not self.logged_in:

329 raise RuntimeError("Not logged in. Call login() first.")

330

331 response = self.session.put(

332 f"{self.base_url}/settings/api/{key}",

333 json={"value": value},

334 headers=self._api_headers(),

335 )

336 return response.status_code == 200

337

338 def get_history(self) -> list[dict[str, Any]]:

339 """

340 Get your past research queries.

341

342 Returns a list of previous research sessions with their details.

343

344 Returns:

345 List of research items with query, timestamp, and status info

346

347 Raises:

348 RuntimeError: If not logged in

349

350 Example:

351 history = client.get_history()

352 for item in history[:5]:

353 print(f"{item['timestamp']}: {item['query']}")

354 """

355 if not self.logged_in:

356 raise RuntimeError("Not logged in. Call login() first.")

357

358 response = self.session.get(f"{self.base_url}/history/api")

359 if response.status_code == 200:

360 data = response.json()

361 # Handle different response formats

362 if isinstance(data, dict):

363 return data.get("history", data.get("items", []))

364 elif isinstance(data, list): 364 ↛ 366line 364 didn't jump to line 366 because the condition on line 364 was always true

365 return data

366 return []

367 else:

368 raise RuntimeError(f"Failed to get history: {response.status_code}")

369

370 def logout(self):

371 """Logout and clear session."""

372 if self.logged_in:

373 self.session.post(

374 f"{self.base_url}/auth/logout", headers=self._api_headers()

375 )

376 self.session.close()

377 self.csrf_token = None

378 self.logged_in = False

379 self.username = None

380

381 def submit_benchmark(

382 self,

383 model,

384 hardware,

385 accuracy_focused,

386 accuracy_source,

387 avg_time_per_question,

388 context_window,

389 temperature,

390 ldr_version,

391 date_tested,

392 notes="",

393 ):

394 """

395 Submit your benchmark results to help the community.

396

397 Args:

398 model: Model name (e.g., "Llama-3.3-70B-Q4_K_M")

399 hardware: Hardware specs (e.g., "RTX 4090 24GB")

400 accuracy_focused: Accuracy percentage for focused strategy

401 accuracy_source: Accuracy percentage for source-based strategy

402 avg_time_per_question: Average time per question in seconds

403 context_window: Context window size used

404 temperature: Temperature setting used

405 ldr_version: Version of LDR used (e.g., "0.6.0")

406 date_tested: Date tested (YYYY-MM-DD format)

407 notes: Optional notes about the test

408

409 Returns:

410 True if submission was successful

411

412 Example:

413 client.submit_benchmark(

414 "Llama-3.3-70B-Q4_K_M", "RTX 4090 24GB",

415 87.0, 82.0, 45.2, 32000, 0.1, "0.6.0", "2024-01-15"

416 )

417 """

418 benchmarks = Benchmark_results()

419 return benchmarks.add_result(

420 model,

421 hardware,

422 accuracy_focused,

423 accuracy_source,

424 avg_time_per_question,

425 context_window,

426 temperature,

427 ldr_version,

428 date_tested,

429 notes,

430 )

431

432 def get_benchmarks(self, best_only=False):

433 """

434 Get community benchmark results.

435

436 Args:

437 best_only: If True, only return top performers

438

439 Returns:

440 List of benchmark results

441

442 Example:

443 all_results = client.get_benchmarks()

444 top_results = client.get_benchmarks(best_only=True)

445 """

446 benchmarks = Benchmark_results()

447 if best_only:

448 return benchmarks.get_best()

449 return benchmarks.get_all()

450

451 def __enter__(self):

452 """Support context manager for auto-cleanup."""

453 return self

454

455 def __exit__(self, exc_type, exc_val, exc_tb):

456 """Auto logout when used as context manager."""

457 self.logout()

458

459

460# Convenience functions for simple use cases

461

462

463def quick_query(

464 username: str,

465 password: str,

466 query: str,

467 base_url: str = "http://localhost:5000",

468) -> str:

469 """

470 One-liner for quick research queries.

471

472 Example:

473 summary = quick_query("user", "pass", "What is DNA?")

474 print(summary)

475

476 Args:

477 username: LDR username

478 password: LDR password

479 query: Research question

480 base_url: Server URL

481

482 Returns:

483 Research summary as string

484 """

485 with LDRClient(base_url) as client:

486 if not client.login(username, password):

487 raise RuntimeError("Login failed")

488

489 result = client.quick_research(query)

490 return result.get("summary", "No summary available")

Coverage for src / local_deep_research / api / client.py: 96%

131 statements