Coverage for src / local_deep_research / web / routes / research_routes.py: 45%

677 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1import io 

2import json 

3import platform 

4import subprocess 

5from datetime import datetime, UTC 

6from pathlib import Path 

7 

8from flask import ( 

9 Blueprint, 

10 g, 

11 jsonify, 

12 redirect, 

13 request, 

14 send_file, 

15 session, 

16 url_for, 

17) 

18from loguru import logger 

19from ...settings.logger import log_settings 

20from sqlalchemy import func 

21 

22# Security imports 

23from ...security import FileUploadValidator, upload_rate_limit 

24from ...config.paths import get_config_directory 

25 

26# Services imports 

27from ..services.pdf_extraction_service import get_pdf_extraction_service 

28 

29from ...database.models import ( 

30 QueuedResearch, 

31 ResearchHistory, 

32 ResearchLog, 

33 UserActiveResearch, 

34) 

35from ...database.models.library import Document as Document 

36from ...database.session_context import get_user_db_session 

37from ..auth.decorators import login_required 

38from ..models.database import calculate_duration 

39from ..services.research_service import ( 

40 export_report_to_memory, 

41 run_research_process, 

42 start_research_process, 

43) 

44from ..utils.rate_limiter import limiter 

45from ..utils.templates import render_template_with_defaults 

46from .globals import active_research, termination_flags 

47 

48# Create a Blueprint for the research application 

49research_bp = Blueprint("research", __name__) 

50 

51 

52# Add static route at the root level 

53@research_bp.route("/redirect-static/<path:path>") 

54def redirect_static(path): 

55 """Redirect old static URLs to new static URLs""" 

56 return redirect(url_for("static", filename=path)) 

57 

58 

59@research_bp.route("/progress/<string:research_id>") 

60@login_required 

61def progress_page(research_id): 

62 """Render the research progress page""" 

63 return render_template_with_defaults("pages/progress.html") 

64 

65 

66@research_bp.route("/details/<string:research_id>") 

67@login_required 

68def research_details_page(research_id): 

69 """Render the research details page""" 

70 return render_template_with_defaults("pages/details.html") 

71 

72 

73@research_bp.route("/results/<string:research_id>") 

74@login_required 

75def results_page(research_id): 

76 """Render the research results page""" 

77 return render_template_with_defaults("pages/results.html") 

78 

79 

80@research_bp.route("/history") 

81@login_required 

82def history_page(): 

83 """Render the history page""" 

84 return render_template_with_defaults("pages/history.html") 

85 

86 

87# Add missing settings routes 

88@research_bp.route("/settings", methods=["GET"]) 

89@login_required 

90def settings_page(): 

91 """Render the settings page""" 

92 return render_template_with_defaults("settings_dashboard.html") 

93 

94 

95@research_bp.route("/settings/main", methods=["GET"]) 

96@login_required 

97def main_config_page(): 

98 """Render the main settings config page""" 

99 return render_template_with_defaults("main_config.html") 

100 

101 

102@research_bp.route("/settings/collections", methods=["GET"]) 

103@login_required 

104def collections_config_page(): 

105 """Render the collections config page""" 

106 return render_template_with_defaults("collections_config.html") 

107 

108 

109@research_bp.route("/settings/api_keys", methods=["GET"]) 

110@login_required 

111def api_keys_config_page(): 

112 """Render the API keys config page""" 

113 return render_template_with_defaults("api_keys_config.html") 

114 

115 

116@research_bp.route("/settings/search_engines", methods=["GET"]) 

117@login_required 

118def search_engines_config_page(): 

119 """Render the search engines config page""" 

120 return render_template_with_defaults("search_engines_config.html") 

121 

122 

123@research_bp.route("/settings/llm", methods=["GET"]) 

124@login_required 

125def llm_config_page(): 

126 """Render the LLM config page""" 

127 return render_template_with_defaults("llm_config.html") 

128 

129 

130@research_bp.route("/api/start_research", methods=["POST"]) 

131@login_required 

132def start_research(): 

133 data = request.json 

134 # Debug logging to trace model parameter 

135 logger.debug(f"Received request data: {data}") 

136 logger.debug(f"Request data keys: {list(data.keys()) if data else 'None'}") 

137 

138 # Check if this is a news search 

139 metadata = data.get("metadata", {}) 

140 if metadata.get("is_news_search"): 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true

141 logger.info( 

142 f"News search request received: triggered_by={metadata.get('triggered_by', 'unknown')}" 

143 ) 

144 

145 query = data.get("query") 

146 mode = data.get("mode", "quick") 

147 

148 # Replace date placeholders if they exist 

149 if query and "YYYY-MM-DD" in query: 149 ↛ 151line 149 didn't jump to line 151 because the condition on line 149 was never true

150 # Use local system time 

151 current_date = datetime.now(UTC).strftime("%Y-%m-%d") 

152 

153 original_query = query 

154 query = query.replace("YYYY-MM-DD", current_date) 

155 logger.info( 

156 f"Replaced date placeholder in query: {original_query[:100]}... -> {query[:100]}..." 

157 ) 

158 logger.info(f"Using date: {current_date}") 

159 

160 # Update metadata to track the replacement 

161 if not metadata: 

162 metadata = {} 

163 metadata["original_query"] = original_query 

164 metadata["processed_query"] = query 

165 metadata["date_replaced"] = current_date 

166 data["metadata"] = metadata 

167 

168 # Get parameters from request or use database settings 

169 from ..services.settings_manager import SettingsManager 

170 

171 username = session.get("username") 

172 if not username: 172 ↛ 173line 172 didn't jump to line 173 because the condition on line 172 was never true

173 return jsonify({"error": "Not authenticated"}), 401 

174 

175 with get_user_db_session(username) as db_session: 

176 settings_manager = SettingsManager(db_session=db_session) 

177 

178 # Get model provider and model selections - use database settings if not provided 

179 model_provider = data.get("model_provider") 

180 if not model_provider: 

181 model_provider = settings_manager.get_setting("llm.provider", "OLLAMA") 

182 logger.debug( 

183 f"No model_provider in request, using database setting: {model_provider}" 

184 ) 

185 else: 

186 logger.debug(f"Using model_provider from request: {model_provider}") 

187 

188 model = data.get("model") 

189 if not model: 

190 model = settings_manager.get_setting("llm.model", None) 

191 logger.debug(f"No model in request, using database setting: {model}") 

192 else: 

193 logger.debug(f"Using model from request: {model}") 

194 

195 custom_endpoint = data.get("custom_endpoint") 

196 if not custom_endpoint and model_provider == "OPENAI_ENDPOINT": 196 ↛ 197line 196 didn't jump to line 197 because the condition on line 196 was never true

197 custom_endpoint = settings_manager.get_setting( 

198 "llm.openai_endpoint.url", None 

199 ) 

200 logger.debug( 

201 f"No custom_endpoint in request, using database setting: {custom_endpoint}" 

202 ) 

203 

204 # Get Ollama URL from request or settings 

205 ollama_url = data.get("ollama_url") 

206 if not ollama_url and model_provider == "OLLAMA": 206 ↛ 214line 206 didn't jump to line 214 because the condition on line 206 was always true

207 ollama_url = settings_manager.get_setting( 

208 "llm.ollama.url", "http://localhost:11434" 

209 ) 

210 logger.debug( 

211 f"No ollama_url in request, using database setting: {ollama_url}" 

212 ) 

213 

214 search_engine = data.get("search_engine") or data.get("search_tool") 

215 if not search_engine: 

216 search_engine = settings_manager.get_setting("search.tool", "searxng") 

217 

218 max_results = data.get("max_results") 

219 time_period = data.get("time_period") 

220 

221 iterations = data.get("iterations") 

222 if iterations is None: 

223 iterations = settings_manager.get_setting("search.iterations", 5) 

224 

225 questions_per_iteration = data.get("questions_per_iteration") 

226 if questions_per_iteration is None: 

227 questions_per_iteration = settings_manager.get_setting( 

228 "search.questions_per_iteration", 5 

229 ) 

230 

231 # Get strategy from request or database 

232 strategy = data.get("strategy") 

233 if not strategy: 

234 strategy = settings_manager.get_setting( 

235 "search.search_strategy", "source-based" 

236 ) 

237 

238 # Note: db_session already closed by context manager above 

239 

240 # Debug logging for model parameter specifically 

241 logger.debug( 

242 f"Extracted model value: '{model}' (type: {type(model).__name__})" 

243 ) 

244 

245 # Log the selections for troubleshooting 

246 logger.info( 

247 f"Starting research with provider: {model_provider}, model: {model}, search engine: {search_engine}" 

248 ) 

249 logger.info( 

250 f"Additional parameters: max_results={max_results}, time_period={time_period}, iterations={iterations}, questions={questions_per_iteration}, strategy={strategy}" 

251 ) 

252 

253 if not query: 

254 return jsonify({"status": "error", "message": "Query is required"}), 400 

255 

256 # Validate required parameters based on provider 

257 if model_provider == "OPENAI_ENDPOINT" and not custom_endpoint: 257 ↛ 258line 257 didn't jump to line 258 because the condition on line 257 was never true

258 return ( 

259 jsonify( 

260 { 

261 "status": "error", 

262 "message": "Custom endpoint URL is required for OpenAI endpoint provider", 

263 } 

264 ), 

265 400, 

266 ) 

267 

268 if not model: 268 ↛ 269line 268 didn't jump to line 269 because the condition on line 268 was never true

269 logger.error( 

270 f"No model specified or configured. Provider: {model_provider}" 

271 ) 

272 return jsonify( 

273 { 

274 "status": "error", 

275 "message": "Model is required. Please configure a model in the settings.", 

276 } 

277 ), 400 

278 

279 # Check if the user has too many active researches 

280 username = session.get("username") 

281 

282 # Get max concurrent researches from settings 

283 from ...settings import SettingsManager 

284 

285 with get_user_db_session() as db_session: 

286 settings_manager = SettingsManager(db_session) 

287 max_concurrent_researches = settings_manager.get_setting( 

288 "app.max_concurrent_researches", 3 

289 ) 

290 

291 # Use existing session from g to check active researches 

292 try: 

293 if hasattr(g, "db_session") and g.db_session: 293 ↛ 309line 293 didn't jump to line 309 because the condition on line 293 was always true

294 # Count active researches for this user 

295 active_count = ( 

296 g.db_session.query(UserActiveResearch) 

297 .filter_by(username=username, status="in_progress") 

298 .count() 

299 ) 

300 

301 # Debug logging 

302 logger.info( 

303 f"Active research count for {username}: {active_count}/{max_concurrent_researches}" 

304 ) 

305 

306 should_queue = active_count >= max_concurrent_researches 

307 logger.info(f"Should queue new research: {should_queue}") 

308 else: 

309 logger.warning( 

310 "No database session available to check active researches" 

311 ) 

312 should_queue = False 

313 except Exception: 

314 logger.exception("Failed to check active researches") 

315 # Default to not queueing if we can't check 

316 should_queue = False 

317 

318 # Create a record in the database with explicit UTC timestamp 

319 import uuid 

320 import threading 

321 

322 created_at = datetime.now(UTC).isoformat() 

323 research_id = str(uuid.uuid4()) 

324 

325 # Create organized research metadata with settings snapshot 

326 research_settings = { 

327 # Direct submission parameters 

328 "submission": { 

329 "model_provider": model_provider, 

330 "model": model, 

331 "custom_endpoint": custom_endpoint, 

332 "search_engine": search_engine, 

333 "max_results": max_results, 

334 "time_period": time_period, 

335 "iterations": iterations, 

336 "questions_per_iteration": questions_per_iteration, 

337 "strategy": strategy, 

338 }, 

339 # System information 

340 "system": { 

341 "timestamp": created_at, 

342 "user": username, 

343 "version": "1.0", # Track metadata version for future migrations 

344 "server_url": request.host_url, # Add server URL for link generation 

345 }, 

346 } 

347 

348 # Add any additional metadata from request 

349 additional_metadata = data.get("metadata", {}) 

350 if additional_metadata: 350 ↛ 351line 350 didn't jump to line 351 because the condition on line 350 was never true

351 research_settings.update(additional_metadata) 

352 # Get complete settings snapshot for this research 

353 try: 

354 from local_deep_research.settings import SettingsManager 

355 

356 # Use the existing session from g (set by middleware) 

357 if hasattr(g, "db_session") and g.db_session: 357 ↛ 376line 357 didn't jump to line 376 because the condition on line 357 was always true

358 # Create SettingsManager with the existing session 

359 username = session.get("username") 

360 # Ensure any pending changes are committed 

361 try: 

362 g.db_session.commit() 

363 except Exception: 

364 g.db_session.rollback() 

365 settings_manager = SettingsManager(g.db_session) 

366 # Get all current settings as a snapshot (bypass cache to ensure fresh data) 

367 all_settings = settings_manager.get_all_settings(bypass_cache=True) 

368 

369 # Add settings snapshot to metadata 

370 research_settings["settings_snapshot"] = all_settings 

371 logger.info( 

372 f"Captured {len(all_settings)} settings for research {research_id}" 

373 ) 

374 else: 

375 # If no session in g, create a new one temporarily to get settings 

376 logger.warning( 

377 "No database session in g, creating temporary session for settings snapshot" 

378 ) 

379 from ...database.thread_local_session import get_metrics_session 

380 

381 # Get password from session or g 

382 password = getattr(g, "user_password", None) 

383 if not password: 

384 # Try to get from session password store 

385 from ...database.session_passwords import session_password_store 

386 

387 session_id = session.get("session_id") 

388 if session_id: 

389 password = session_password_store.get_session_password( 

390 username, session_id 

391 ) 

392 

393 if password: 

394 temp_session = get_metrics_session(username, password) 

395 if temp_session: 

396 username = session.get("username") 

397 settings_manager = SettingsManager(temp_session) 

398 all_settings = settings_manager.get_all_settings( 

399 bypass_cache=True 

400 ) 

401 research_settings["settings_snapshot"] = all_settings 

402 logger.info( 

403 f"Captured {len(all_settings)} settings using temporary session for research {research_id}" 

404 ) 

405 else: 

406 logger.error( 

407 "Failed to create temporary session for settings snapshot" 

408 ) 

409 raise Exception( 

410 "Cannot create research without settings snapshot" 

411 ) 

412 else: 

413 logger.error( 

414 "No password available to create session for settings snapshot" 

415 ) 

416 raise Exception( 

417 "Cannot create research without settings snapshot" 

418 ) 

419 except Exception: 

420 logger.exception("Failed to capture settings snapshot") 

421 # Cannot continue without settings snapshot for thread-based research 

422 return jsonify( 

423 { 

424 "status": "error", 

425 "message": "Failed to capture settings for research. Please try again.", 

426 } 

427 ), 500 

428 

429 # Use existing session from g 

430 username = session.get("username") 

431 if not username: 431 ↛ 432line 431 didn't jump to line 432 because the condition on line 431 was never true

432 return jsonify({"status": "error", "message": "Not authenticated"}), 401 

433 

434 try: 

435 # Use existing session from g 

436 if hasattr(g, "db_session") and g.db_session: 436 ↛ 620line 436 didn't jump to line 620 because the condition on line 436 was always true

437 db_session = g.db_session 

438 # Determine initial status based on whether we need to queue 

439 initial_status = "queued" if should_queue else "in_progress" 

440 

441 research = ResearchHistory( 

442 id=research_id, # Set UUID as primary key 

443 query=query, 

444 mode=mode, 

445 status=initial_status, 

446 created_at=created_at, 

447 progress_log=[{"time": created_at, "progress": 0}], 

448 research_meta=research_settings, 

449 ) 

450 db_session.add(research) 

451 db_session.commit() 

452 logger.info( 

453 f"Created research entry with UUID: {research_id}, status: {initial_status}" 

454 ) 

455 

456 if should_queue: 456 ↛ 459line 456 didn't jump to line 459 because the condition on line 456 was never true

457 # Add to queue instead of starting immediately 

458 # Get the next position in queue for this user 

459 max_position = ( 

460 db_session.query(func.max(QueuedResearch.position)) 

461 .filter_by(username=username) 

462 .scalar() 

463 or 0 

464 ) 

465 

466 queued_record = QueuedResearch( 

467 username=username, 

468 research_id=research_id, 

469 query=query, 

470 mode=mode, 

471 settings_snapshot=research_settings, 

472 position=max_position + 1, 

473 ) 

474 db_session.add(queued_record) 

475 db_session.commit() 

476 logger.info( 

477 f"Queued research {research_id} at position {max_position + 1} for user {username}" 

478 ) 

479 

480 # Notify queue processor with all parameters for potential direct execution 

481 from ..queue.processor_v2 import queue_processor 

482 

483 # Get session ID for password access 

484 session_id = session.get("session_id") 

485 

486 # Pass all parameters needed for direct execution 

487 queue_processor.notify_research_queued( 

488 username, 

489 research_id, 

490 session_id=session_id, 

491 query=query, 

492 mode=mode, 

493 settings_snapshot=research_settings, 

494 model_provider=model_provider, 

495 model=model, 

496 custom_endpoint=custom_endpoint, 

497 search_engine=search_engine, 

498 max_results=max_results, 

499 time_period=time_period, 

500 iterations=iterations, 

501 questions_per_iteration=questions_per_iteration, 

502 strategy=strategy, 

503 ) 

504 

505 # Return queued status 

506 return jsonify( 

507 { 

508 "status": "queued", 

509 "research_id": research_id, 

510 "queue_position": max_position + 1, 

511 "message": f"Your research has been queued. Position in queue: {max_position + 1}", 

512 } 

513 ) 

514 else: 

515 # Start immediately 

516 # Create active research tracking record 

517 import threading 

518 

519 active_record = UserActiveResearch( 

520 username=username, 

521 research_id=research_id, 

522 status="in_progress", 

523 thread_id=str(threading.current_thread().ident), 

524 settings_snapshot=research_settings, 

525 ) 

526 db_session.add(active_record) 

527 db_session.commit() 

528 logger.info( 

529 f"Created active research record for user {username}" 

530 ) 

531 

532 # Double-check the count after committing to handle race conditions 

533 # Use the existing session for the recheck 

534 try: 

535 # Use the same session we already have 

536 recheck_session = db_session 

537 final_count = ( 

538 recheck_session.query(UserActiveResearch) 

539 .filter_by(username=username, status="in_progress") 

540 .count() 

541 ) 

542 logger.info( 

543 f"Final active count after commit: {final_count}/{max_concurrent_researches}" 

544 ) 

545 

546 if final_count > max_concurrent_researches: 546 ↛ 549line 546 didn't jump to line 549 because the condition on line 546 was never true

547 # We exceeded the limit due to a race condition 

548 # Remove this record and queue instead 

549 logger.warning( 

550 f"Race condition detected: {final_count} > {max_concurrent_researches}, moving to queue" 

551 ) 

552 db_session.delete(active_record) 

553 db_session.commit() 

554 

555 # Add to queue 

556 max_position = ( 

557 db_session.query(func.max(QueuedResearch.position)) 

558 .filter_by(username=username) 

559 .scalar() 

560 or 0 

561 ) 

562 

563 queued_record = QueuedResearch( 

564 username=username, 

565 research_id=research_id, 

566 query=query, 

567 mode=mode, 

568 settings_snapshot=research_settings, 

569 position=max_position + 1, 

570 ) 

571 db_session.add(queued_record) 

572 

573 # Update research status to queued 

574 research.status = "queued" 

575 db_session.commit() 

576 

577 # Notify queue processor for potential direct execution 

578 from ..queue.processor_v2 import queue_processor 

579 

580 # Get session ID for password access 

581 session_id = session.get("session_id") 

582 

583 # Pass all parameters needed for direct execution 

584 queue_processor.notify_research_queued( 

585 username, 

586 research_id, 

587 session_id=session_id, 

588 query=query, 

589 mode=mode, 

590 settings_snapshot=research_settings, 

591 model_provider=model_provider, 

592 model=model, 

593 custom_endpoint=custom_endpoint, 

594 search_engine=search_engine, 

595 max_results=max_results, 

596 time_period=time_period, 

597 iterations=iterations, 

598 questions_per_iteration=questions_per_iteration, 

599 strategy=strategy, 

600 ) 

601 

602 return jsonify( 

603 { 

604 "status": "queued", 

605 "research_id": research_id, 

606 "queue_position": max_position + 1, 

607 "message": f"Your research has been queued due to concurrent limit. Position in queue: {max_position + 1}", 

608 } 

609 ) 

610 except Exception as e: 

611 logger.warning(f"Could not recheck active count: {e}") 

612 

613 except Exception: 

614 logger.exception("Failed to create research entry") 

615 return jsonify( 

616 {"status": "error", "message": "Failed to create research entry"} 

617 ), 500 

618 

619 # Only start the research if not queued 

620 if not should_queue: 620 ↛ 707line 620 didn't jump to line 707 because the condition on line 620 was always true

621 # Save the research strategy to the database before starting the thread 

622 try: 

623 from ..services.research_service import save_research_strategy 

624 

625 save_research_strategy(research_id, strategy, username=username) 

626 except Exception as e: 

627 logger.warning(f"Could not save research strategy: {e}") 

628 

629 # Debug logging for settings snapshot 

630 snapshot_data = research_settings.get("settings_snapshot", {}) 

631 log_settings(snapshot_data, "Settings snapshot being passed to thread") 

632 if "search.tool" in snapshot_data: 632 ↛ 637line 632 didn't jump to line 637 because the condition on line 632 was always true

633 logger.debug( 

634 f"search.tool in snapshot: {snapshot_data['search.tool']}" 

635 ) 

636 else: 

637 logger.debug("search.tool NOT in snapshot") 

638 

639 # Get the user's password for metrics access in background thread 

640 # Try session password store first 

641 from ...database.session_passwords import session_password_store 

642 

643 session_id = session.get("session_id") 

644 user_password = None 

645 

646 if session_id: 646 ↛ 652line 646 didn't jump to line 652 because the condition on line 646 was always true

647 user_password = session_password_store.get_session_password( 

648 username, session_id 

649 ) 

650 

651 # Fallback to g.user_password (set by middleware if temp_auth was used) 

652 if not user_password: 652 ↛ 653line 652 didn't jump to line 653 because the condition on line 652 was never true

653 user_password = getattr(g, "user_password", None) 

654 

655 # Last resort: try temp_auth_store 

656 if not user_password: 656 ↛ 657line 656 didn't jump to line 657 because the condition on line 656 was never true

657 from ...database.temp_auth import temp_auth_store 

658 

659 auth_token = session.get("temp_auth_token") 

660 if auth_token: 

661 # Use peek_auth to avoid consuming the token 

662 auth_data = temp_auth_store.peek_auth(auth_token) 

663 if auth_data and auth_data[0] == username: 

664 user_password = auth_data[1] 

665 

666 if not user_password: 666 ↛ 667line 666 didn't jump to line 667 because the condition on line 666 was never true

667 logger.warning( 

668 f"No password available for metrics access for user {username}" 

669 ) 

670 

671 # Start the research process with the selected parameters 

672 research_thread = start_research_process( 

673 research_id, 

674 query, 

675 mode, 

676 active_research, 

677 termination_flags, 

678 run_research_process, 

679 username=username, # Pass username to the thread 

680 user_password=user_password, # Pass password for database access 

681 model_provider=model_provider, 

682 model=model, 

683 custom_endpoint=custom_endpoint, 

684 search_engine=search_engine, 

685 max_results=max_results, 

686 time_period=time_period, 

687 iterations=iterations, 

688 questions_per_iteration=questions_per_iteration, 

689 strategy=strategy, 

690 settings_snapshot=snapshot_data, # Pass complete settings 

691 ) 

692 

693 # Update the active research record with the actual thread ID 

694 try: 

695 with get_user_db_session(username) as thread_session: 

696 active_record = ( 

697 thread_session.query(UserActiveResearch) 

698 .filter_by(username=username, research_id=research_id) 

699 .first() 

700 ) 

701 if active_record: 701 ↛ 707line 701 didn't jump to line 707

702 active_record.thread_id = str(research_thread.ident) 

703 thread_session.commit() 

704 except Exception as e: 

705 logger.warning(f"Could not update thread ID: {e}") 

706 

707 return jsonify({"status": "success", "research_id": research_id}) 

708 

709 

710@research_bp.route("/api/terminate/<string:research_id>", methods=["POST"]) 

711@login_required 

712def terminate_research(research_id): 

713 """Terminate an in-progress research process""" 

714 username = session.get("username") 

715 if not username: 715 ↛ 716line 715 didn't jump to line 716 because the condition on line 715 was never true

716 return jsonify({"error": "Not authenticated"}), 401 

717 

718 # Check if the research exists and is in progress 

719 try: 

720 with get_user_db_session(username) as db_session: 

721 research = ( 

722 db_session.query(ResearchHistory) 

723 .filter_by(id=research_id) 

724 .first() 

725 ) 

726 

727 if not research: 727 ↛ 728line 727 didn't jump to line 728 because the condition on line 727 was never true

728 return jsonify( 

729 {"status": "error", "message": "Research not found"} 

730 ), 404 

731 

732 status = research.status 

733 

734 # If it's already completed or suspended, return success 

735 if status in ["completed", "suspended", "error"]: 735 ↛ 736line 735 didn't jump to line 736 because the condition on line 735 was never true

736 return jsonify( 

737 { 

738 "status": "success", 

739 "message": f"Research already {status}", 

740 } 

741 ) 

742 

743 # Check if it's in the active_research dict 

744 if research_id not in active_research: 744 ↛ 746line 744 didn't jump to line 746 because the condition on line 744 was never true

745 # Update the status in the database 

746 research.status = "suspended" 

747 db_session.commit() 

748 return jsonify( 

749 {"status": "success", "message": "Research terminated"} 

750 ) 

751 

752 # Set the termination flag 

753 termination_flags[research_id] = True 

754 

755 # Log the termination request - using UTC timestamp 

756 timestamp = datetime.now(UTC).isoformat() 

757 termination_message = "Research termination requested by user" 

758 current_progress = active_research[research_id]["progress"] 

759 

760 # Create log entry 

761 log_entry = { 

762 "time": timestamp, 

763 "message": termination_message, 

764 "progress": current_progress, 

765 "metadata": {"phase": "termination"}, 

766 } 

767 

768 # Add to in-memory log 

769 active_research[research_id]["log"].append(log_entry) 

770 

771 # Add to database log 

772 logger.log("MILESTONE", f"Research ended: {termination_message}") 

773 

774 # Update the log in the database 

775 if research.progress_log: 775 ↛ 784line 775 didn't jump to line 784 because the condition on line 775 was always true

776 try: 

777 if isinstance(research.progress_log, str): 777 ↛ 778line 777 didn't jump to line 778 because the condition on line 777 was never true

778 current_log = json.loads(research.progress_log) 

779 else: 

780 current_log = research.progress_log 

781 except Exception: 

782 current_log = [] 

783 else: 

784 current_log = [] 

785 

786 current_log.append(log_entry) 

787 research.progress_log = current_log 

788 research.status = "suspended" 

789 db_session.commit() 

790 

791 # Emit a socket event for the termination request 

792 try: 

793 event_data = { 

794 "status": "suspended", # Changed from 'terminating' to 'suspended' 

795 "message": "Research was suspended by user request", 

796 } 

797 

798 from ..services.socket_service import SocketIOService 

799 

800 SocketIOService().emit_socket_event( 

801 f"research_progress_{research_id}", event_data 

802 ) 

803 

804 except Exception: 

805 logger.exception("Socket emit error (non-critical)") 

806 

807 return jsonify( 

808 { 

809 "status": "success", 

810 "message": "Research termination requested", 

811 } 

812 ) 

813 except Exception: 

814 logger.exception("Error terminating research") 

815 return jsonify( 

816 {"status": "error", "message": "Failed to terminate research"} 

817 ), 500 

818 

819 

820@research_bp.route("/api/delete/<string:research_id>", methods=["DELETE"]) 

821@login_required 

822def delete_research(research_id): 

823 """Delete a research record""" 

824 username = session.get("username") 

825 if not username: 

826 return jsonify({"error": "Not authenticated"}), 401 

827 

828 try: 

829 with get_user_db_session(username) as db_session: 

830 research = ( 

831 db_session.query(ResearchHistory) 

832 .filter_by(id=research_id) 

833 .first() 

834 ) 

835 

836 if not research: 

837 return jsonify( 

838 {"status": "error", "message": "Research not found"} 

839 ), 404 

840 

841 status = research.status 

842 report_path = research.report_path 

843 

844 # Don't allow deleting research in progress 

845 if status == "in_progress" and research_id in active_research: 

846 return ( 

847 jsonify( 

848 { 

849 "status": "error", 

850 "message": "Cannot delete research that is in progress", 

851 } 

852 ), 

853 400, 

854 ) 

855 

856 # Delete report file if it exists 

857 if report_path and Path(report_path).exists(): 

858 try: 

859 Path(report_path).unlink() 

860 except Exception: 

861 logger.exception("Error removing report file") 

862 

863 # Delete the database record 

864 db_session.delete(research) 

865 db_session.commit() 

866 

867 return jsonify({"status": "success"}) 

868 except Exception: 

869 logger.exception("Error deleting research") 

870 return jsonify( 

871 {"status": "error", "message": "Failed to delete research"} 

872 ), 500 

873 

874 

875@research_bp.route("/api/clear_history", methods=["POST"]) 

876@login_required 

877def clear_history(): 

878 """Clear all research history""" 

879 username = session.get("username") 

880 if not username: 

881 return jsonify({"error": "Not authenticated"}), 401 

882 

883 try: 

884 with get_user_db_session(username) as db_session: 

885 # Get all research records first to clean up files 

886 research_records = db_session.query(ResearchHistory).all() 

887 

888 # Clean up report files 

889 for research in research_records: 

890 # Skip active research 

891 if research.id in active_research: 

892 continue 

893 

894 # Delete report file if it exists 

895 if research.report_path and Path(research.report_path).exists(): 

896 try: 

897 Path(research.report_path).unlink() 

898 except Exception: 

899 logger.exception("Error removing report file") 

900 

901 # Delete records from the database, except active research 

902 if active_research: 

903 db_session.query(ResearchHistory).filter( 

904 ~ResearchHistory.id.in_(list(active_research.keys())) 

905 ).delete(synchronize_session=False) 

906 else: 

907 db_session.query(ResearchHistory).delete( 

908 synchronize_session=False 

909 ) 

910 

911 db_session.commit() 

912 

913 return jsonify({"status": "success"}) 

914 except Exception: 

915 logger.exception("Error clearing history") 

916 return jsonify( 

917 {"status": "error", "message": "Failed to process request"} 

918 ), 500 

919 

920 

921@research_bp.route("/open_file_location", methods=["POST"]) 

922@login_required 

923def open_file_location(): 

924 """Open a file location in the system file explorer""" 

925 data = request.json 

926 file_path = data.get("path") 

927 

928 if not file_path: 

929 return jsonify({"status": "error", "message": "Path is required"}), 400 

930 

931 # Get the user's data directory as the safe root 

932 from ...config.paths import get_data_directory 

933 

934 safe_root = Path(get_data_directory()).resolve() 

935 

936 # Use centralized path validator for security 

937 try: 

938 from ...security.path_validator import PathValidator 

939 

940 file_path = PathValidator.validate_data_path(file_path, str(safe_root)) 

941 

942 except Exception: 

943 logger.exception("Path validation error") 

944 return jsonify({"status": "error", "message": "Invalid path"}), 400 

945 

946 # Check if path exists 

947 if not file_path.exists(): 

948 return jsonify( 

949 {"status": "error", "message": "Path does not exist"} 

950 ), 404 

951 

952 try: 

953 if platform.system() == "Windows": 

954 # On Windows, open the folder and select the file 

955 if file_path.is_file(): 

956 subprocess.run( 

957 ["explorer", "/select,", str(file_path)], check=True 

958 ) 

959 else: 

960 # If it's a directory, just open it 

961 subprocess.run(["explorer", str(file_path)], check=True) 

962 elif platform.system() == "Darwin": # macOS 

963 subprocess.run(["open", str(file_path)], check=True) 

964 else: # Linux and others 

965 subprocess.run(["xdg-open", str(file_path.parent)], check=True) 

966 

967 return jsonify({"status": "success"}) 

968 except Exception: 

969 logger.exception("Error opening a file") 

970 return jsonify( 

971 {"status": "error", "message": "Failed to process request"} 

972 ), 500 

973 

974 

975@research_bp.route("/api/save_raw_config", methods=["POST"]) 

976@login_required 

977def save_raw_config(): 

978 """Save raw configuration""" 

979 data = request.json 

980 raw_config = data.get("raw_config") 

981 

982 if not raw_config: 

983 return ( 

984 jsonify( 

985 {"success": False, "error": "Raw configuration is required"} 

986 ), 

987 400, 

988 ) 

989 

990 try: 

991 from ...security.file_write_verifier import write_file_verified 

992 

993 # Get the config file path (uses centralized path config, respects LDR_DATA_DIR) 

994 config_dir = get_config_directory() 

995 config_path = config_dir / "config.toml" 

996 

997 # Write the configuration to file 

998 write_file_verified( 

999 config_path, 

1000 raw_config, 

1001 "system.allow_config_write", 

1002 context="system configuration file", 

1003 ) 

1004 

1005 return jsonify({"success": True}) 

1006 except Exception: 

1007 logger.exception("Error saving configuration file") 

1008 return jsonify( 

1009 {"success": False, "error": "Failed to process request"} 

1010 ), 500 

1011 

1012 

1013@research_bp.route("/api/history", methods=["GET"]) 

1014@login_required 

1015def get_history(): 

1016 """Get research history""" 

1017 username = session.get("username") 

1018 if not username: 1018 ↛ 1019line 1018 didn't jump to line 1019 because the condition on line 1018 was never true

1019 return jsonify({"error": "Not authenticated"}), 401 

1020 

1021 try: 

1022 with get_user_db_session(username) as db_session: 

1023 # Query all research history ordered by created_at 

1024 research_records = ( 

1025 db_session.query(ResearchHistory) 

1026 .order_by(ResearchHistory.created_at.desc()) 

1027 .all() 

1028 ) 

1029 

1030 history_items = [] 

1031 for research in research_records: 1031 ↛ 1033line 1031 didn't jump to line 1033 because the loop on line 1031 never started

1032 # Calculate duration if completed 

1033 duration_seconds = None 

1034 if research.completed_at and research.created_at: 

1035 try: 

1036 duration_seconds = calculate_duration( 

1037 research.created_at, research.completed_at 

1038 ) 

1039 except Exception: 

1040 logger.exception("Error calculating duration") 

1041 

1042 # Count documents in the library for this research 

1043 doc_count = ( 

1044 db_session.query(Document) 

1045 .filter_by(research_id=research.id) 

1046 .count() 

1047 ) 

1048 

1049 # Create a history item 

1050 item = { 

1051 "id": research.id, 

1052 "query": research.query, 

1053 "mode": research.mode, 

1054 "status": research.status, 

1055 "created_at": research.created_at, 

1056 "completed_at": research.completed_at, 

1057 "duration_seconds": duration_seconds, 

1058 "report_path": research.report_path, 

1059 "metadata": research.research_meta, # Include metadata for news 

1060 "document_count": doc_count, # Add document count 

1061 } 

1062 

1063 # Add title if it exists 

1064 if hasattr(research, "title") and research.title is not None: 

1065 item["title"] = research.title 

1066 

1067 history_items.append(item) 

1068 

1069 return jsonify({"status": "success", "items": history_items}) 

1070 except Exception: 

1071 logger.exception("Error getting history") 

1072 return jsonify( 

1073 {"status": "error", "message": "Failed to process request"} 

1074 ), 500 

1075 

1076 

1077@research_bp.route("/api/research/<string:research_id>") 

1078@login_required 

1079def get_research_details(research_id): 

1080 """Get full details of a research using ORM""" 

1081 username = session.get("username") 

1082 if not username: 1082 ↛ 1083line 1082 didn't jump to line 1083 because the condition on line 1082 was never true

1083 return jsonify({"error": "Not authenticated"}), 401 

1084 

1085 try: 

1086 with get_user_db_session(username) as db_session: 

1087 research = ( 

1088 db_session.query(ResearchHistory) 

1089 .filter(ResearchHistory.id == research_id) 

1090 .first() 

1091 ) 

1092 

1093 if not research: 1093 ↛ 1094line 1093 didn't jump to line 1094 because the condition on line 1093 was never true

1094 return jsonify({"error": "Research not found"}), 404 

1095 

1096 return jsonify( 

1097 { 

1098 "id": research.id, 

1099 "query": research.query, 

1100 "status": research.status, 

1101 "progress": research.progress, 

1102 "progress_percentage": research.progress or 0, 

1103 "mode": research.mode, 

1104 "created_at": research.created_at, 

1105 "completed_at": research.completed_at, 

1106 "report_path": research.report_path, 

1107 "metadata": research.research_meta, 

1108 } 

1109 ) 

1110 except Exception as e: 

1111 logger.exception(f"Error getting research details: {e!s}") 

1112 return jsonify({"error": "An internal error has occurred"}), 500 

1113 

1114 

1115@research_bp.route("/api/research/<string:research_id>/logs") 

1116@login_required 

1117def get_research_logs(research_id): 

1118 """Get logs for a specific research""" 

1119 username = session.get("username") 

1120 if not username: 

1121 return jsonify({"error": "Not authenticated"}), 401 

1122 

1123 try: 

1124 # First check if the research exists 

1125 with get_user_db_session(username) as db_session: 

1126 research = ( 

1127 db_session.query(ResearchHistory) 

1128 .filter_by(id=research_id) 

1129 .first() 

1130 ) 

1131 if not research: 

1132 return jsonify({"error": "Research not found"}), 404 

1133 

1134 # Get logs from research_logs table 

1135 log_results = ( 

1136 db_session.query(ResearchLog) 

1137 .filter_by(research_id=research_id) 

1138 .order_by(ResearchLog.timestamp) 

1139 .all() 

1140 ) 

1141 

1142 logs = [] 

1143 for row in log_results: 

1144 logs.append( 

1145 { 

1146 "id": row.id, 

1147 "message": row.message, 

1148 "timestamp": row.timestamp, 

1149 "log_type": row.level, 

1150 } 

1151 ) 

1152 

1153 return jsonify(logs) 

1154 

1155 except Exception as e: 

1156 logger.exception(f"Error getting research logs: {e!s}") 

1157 return jsonify({"error": "An internal error has occurred"}), 500 

1158 

1159 

1160@research_bp.route("/api/report/<string:research_id>") 

1161@login_required 

1162def get_research_report(research_id): 

1163 """Get the research report content""" 

1164 username = session.get("username") 

1165 if not username: 

1166 return jsonify({"error": "Not authenticated"}), 401 

1167 

1168 try: 

1169 with get_user_db_session(username) as db_session: 

1170 # Query using ORM 

1171 research = ( 

1172 db_session.query(ResearchHistory) 

1173 .filter_by(id=research_id) 

1174 .first() 

1175 ) 

1176 

1177 if research is None: 

1178 return jsonify({"error": "Research not found"}), 404 

1179 

1180 # Parse metadata if it exists 

1181 metadata = research.research_meta 

1182 

1183 # Get report content using storage abstraction 

1184 from ...storage import get_report_storage 

1185 

1186 # Get settings snapshot for this thread 

1187 settings_snapshot = ( 

1188 metadata.get("settings_snapshot") if metadata else None 

1189 ) 

1190 

1191 # Pass settings_snapshot to avoid thread context issues 

1192 storage = get_report_storage( 

1193 session=db_session, settings_snapshot=settings_snapshot 

1194 ) 

1195 content = storage.get_report(research_id, username) 

1196 

1197 if content is None: 

1198 return jsonify({"error": "Report not found"}), 404 

1199 

1200 # Return the report data with backwards-compatible fields 

1201 # Examples expect 'summary', 'sources', 'findings' at top level 

1202 return jsonify( 

1203 { 

1204 "content": content, 

1205 # Backwards-compatible fields for examples 

1206 "summary": content, # The markdown report is the summary 

1207 "sources": metadata.get("all_links_of_system", []), 

1208 "findings": metadata.get("findings", []), 

1209 "metadata": { 

1210 "title": research.title if research.title else None, 

1211 "query": research.query, 

1212 "mode": research.mode if research.mode else None, 

1213 "created_at": research.created_at 

1214 if research.created_at 

1215 else None, 

1216 "completed_at": research.completed_at 

1217 if research.completed_at 

1218 else None, 

1219 "report_path": research.report_path, 

1220 **metadata, 

1221 }, 

1222 } 

1223 ) 

1224 

1225 except Exception as e: 

1226 logger.exception(f"Error getting research report: {e!s}") 

1227 return jsonify({"error": "An internal error has occurred"}), 500 

1228 

1229 

1230@research_bp.route( 

1231 "/api/v1/research/<research_id>/export/<format>", methods=["POST"] 

1232) 

1233@login_required 

1234def export_research_report(research_id, format): 

1235 """Export research report to different formats (LaTeX, Quarto, RIS, or PDF)""" 

1236 try: 

1237 if format not in ["latex", "quarto", "ris", "pdf"]: 

1238 return jsonify( 

1239 { 

1240 "error": "Invalid format. Use 'latex', 'quarto', 'ris', or 'pdf'" 

1241 } 

1242 ), 400 

1243 

1244 # Get research from database 

1245 username = session.get("username") 

1246 if not username: 

1247 return jsonify({"error": "Not authenticated"}), 401 

1248 

1249 try: 

1250 with get_user_db_session(username) as db_session: 

1251 research = ( 

1252 db_session.query(ResearchHistory) 

1253 .filter_by(id=research_id) 

1254 .first() 

1255 ) 

1256 if not research: 

1257 return jsonify({"error": "Research not found"}), 404 

1258 

1259 # Get report using storage abstraction 

1260 from ...storage import get_report_storage 

1261 

1262 # Get metadata for settings snapshot 

1263 metadata = ( 

1264 research.research_meta if research.research_meta else {} 

1265 ) 

1266 settings_snapshot = ( 

1267 metadata.get("settings_snapshot") if metadata else None 

1268 ) 

1269 

1270 storage = get_report_storage( 

1271 session=db_session, settings_snapshot=settings_snapshot 

1272 ) 

1273 

1274 # Get report content directly (in memory) 

1275 report_content = storage.get_report(research_id, username) 

1276 if not report_content: 

1277 return jsonify({"error": "Report content not found"}), 404 

1278 

1279 # Export to requested format (all in memory) 

1280 try: 

1281 # Use title or query for the PDF title 

1282 pdf_title = research.title or research.query 

1283 

1284 # Generate export content in memory 

1285 export_content, filename, mimetype = ( 

1286 export_report_to_memory( 

1287 report_content, format, title=pdf_title 

1288 ) 

1289 ) 

1290 

1291 # Send the file directly from memory 

1292 return send_file( 

1293 io.BytesIO(export_content), 

1294 as_attachment=True, 

1295 download_name=filename, 

1296 mimetype=mimetype, 

1297 ) 

1298 except Exception as e: 

1299 logger.exception(f"Error exporting report: {e!s}") 

1300 return jsonify( 

1301 { 

1302 "error": f"Failed to export to {format}. Please try again later." 

1303 } 

1304 ), 500 

1305 

1306 except Exception as e: 

1307 logger.exception(f"Error in export endpoint: {e!s}") 

1308 return jsonify({"error": "An internal error has occurred"}), 500 

1309 

1310 except Exception as e: 

1311 logger.exception(f"Unexpected error in export endpoint: {e!s}") 

1312 return jsonify({"error": "An internal error has occurred"}), 500 

1313 

1314 

1315@research_bp.route("/api/research/<string:research_id>/status") 

1316@limiter.exempt 

1317@login_required 

1318def get_research_status(research_id): 

1319 """Get the status of a research process""" 

1320 username = session.get("username") 

1321 if not username: 1321 ↛ 1322line 1321 didn't jump to line 1322 because the condition on line 1321 was never true

1322 return jsonify({"error": "Not authenticated"}), 401 

1323 

1324 try: 

1325 with get_user_db_session(username) as db_session: 

1326 research = ( 

1327 db_session.query(ResearchHistory) 

1328 .filter_by(id=research_id) 

1329 .first() 

1330 ) 

1331 

1332 if research is None: 1332 ↛ 1333line 1332 didn't jump to line 1333 because the condition on line 1332 was never true

1333 return jsonify({"error": "Research not found"}), 404 

1334 

1335 status = research.status 

1336 progress = research.progress 

1337 completed_at = research.completed_at 

1338 report_path = research.report_path 

1339 metadata = research.research_meta or {} 

1340 

1341 # Extract and format error information for better UI display 

1342 error_info = {} 

1343 if metadata and "error" in metadata: 1343 ↛ 1344line 1343 didn't jump to line 1344 because the condition on line 1343 was never true

1344 error_msg = metadata["error"] 

1345 error_type = "unknown" 

1346 

1347 # Detect specific error types 

1348 if "timeout" in error_msg.lower(): 

1349 error_type = "timeout" 

1350 error_info = { 

1351 "type": "timeout", 

1352 "message": "LLM service timed out during synthesis. This may be due to high server load or connectivity issues.", 

1353 "suggestion": "Try again later or use a smaller query scope.", 

1354 } 

1355 elif ( 

1356 "token limit" in error_msg.lower() 

1357 or "context length" in error_msg.lower() 

1358 ): 

1359 error_type = "token_limit" 

1360 error_info = { 

1361 "type": "token_limit", 

1362 "message": "The research query exceeded the AI model's token limit during synthesis.", 

1363 "suggestion": "Try using a more specific query or reduce the research scope.", 

1364 } 

1365 elif ( 

1366 "final answer synthesis fail" in error_msg.lower() 

1367 or "llm error" in error_msg.lower() 

1368 ): 

1369 error_type = "llm_error" 

1370 error_info = { 

1371 "type": "llm_error", 

1372 "message": "The AI model encountered an error during final answer synthesis.", 

1373 "suggestion": "Check that your LLM service is running correctly or try a different model.", 

1374 } 

1375 elif "ollama" in error_msg.lower(): 

1376 error_type = "ollama_error" 

1377 error_info = { 

1378 "type": "ollama_error", 

1379 "message": "The Ollama service is not responding properly.", 

1380 "suggestion": "Make sure Ollama is running with 'ollama serve' and the model is downloaded.", 

1381 } 

1382 elif "connection" in error_msg.lower(): 

1383 error_type = "connection" 

1384 error_info = { 

1385 "type": "connection", 

1386 "message": "Connection error with the AI service.", 

1387 "suggestion": "Check your internet connection and AI service status.", 

1388 } 

1389 elif metadata.get("solution"): 

1390 # Use the solution provided in metadata if available 

1391 error_info = { 

1392 "type": error_type, 

1393 "message": error_msg, 

1394 "suggestion": metadata.get("solution"), 

1395 } 

1396 else: 

1397 # Generic error with the original message 

1398 error_info = { 

1399 "type": error_type, 

1400 "message": error_msg, 

1401 "suggestion": "Try again with a different query or check the application logs.", 

1402 } 

1403 

1404 # Add error_info to the response if it exists 

1405 if error_info: 1405 ↛ 1406line 1405 didn't jump to line 1406 because the condition on line 1405 was never true

1406 metadata["error_info"] = error_info 

1407 

1408 # Get the latest milestone log for this research 

1409 latest_milestone = None 

1410 try: 

1411 milestone_log = ( 

1412 db_session.query(ResearchLog) 

1413 .filter_by(research_id=research_id, level="MILESTONE") 

1414 .order_by(ResearchLog.timestamp.desc()) 

1415 .first() 

1416 ) 

1417 if milestone_log: 1417 ↛ 1418line 1417 didn't jump to line 1418 because the condition on line 1417 was never true

1418 latest_milestone = { 

1419 "message": milestone_log.message, 

1420 "time": milestone_log.timestamp.isoformat() 

1421 if milestone_log.timestamp 

1422 else None, 

1423 "type": "MILESTONE", 

1424 } 

1425 logger.debug( 

1426 f"Found latest milestone for research {research_id}: {milestone_log.message}" 

1427 ) 

1428 else: 

1429 logger.debug( 

1430 f"No milestone logs found for research {research_id}" 

1431 ) 

1432 except Exception as e: 

1433 logger.warning(f"Error fetching latest milestone: {e!s}") 

1434 

1435 response_data = { 

1436 "status": status, 

1437 "progress": progress, 

1438 "completed_at": completed_at, 

1439 "report_path": report_path, 

1440 "metadata": metadata, 

1441 } 

1442 

1443 # Include latest milestone as a log_entry for frontend compatibility 

1444 if latest_milestone: 1444 ↛ 1445line 1444 didn't jump to line 1445 because the condition on line 1444 was never true

1445 response_data["log_entry"] = latest_milestone 

1446 

1447 return jsonify(response_data) 

1448 except Exception: 

1449 logger.exception("Error getting research status") 

1450 return jsonify({"error": "Error checking research status"}), 500 

1451 

1452 

1453@research_bp.route("/api/queue/status", methods=["GET"]) 

1454@login_required 

1455def get_queue_status(): 

1456 """Get the current queue status for the user""" 

1457 username = session.get("username") 

1458 

1459 from ..queue import QueueManager 

1460 

1461 try: 

1462 queue_items = QueueManager.get_user_queue(username) 

1463 

1464 return jsonify( 

1465 { 

1466 "status": "success", 

1467 "queue": queue_items, 

1468 "total": len(queue_items), 

1469 } 

1470 ) 

1471 except Exception: 

1472 logger.exception("Error getting queue status") 

1473 return jsonify( 

1474 {"status": "error", "message": "Failed to process request"} 

1475 ), 500 

1476 

1477 

1478@research_bp.route("/api/queue/<string:research_id>/position", methods=["GET"]) 

1479@login_required 

1480def get_queue_position(research_id): 

1481 """Get the queue position for a specific research""" 

1482 username = session.get("username") 

1483 

1484 from ..queue import QueueManager 

1485 

1486 try: 

1487 position = QueueManager.get_queue_position(username, research_id) 

1488 

1489 if position is None: 

1490 return jsonify( 

1491 {"status": "error", "message": "Research not found in queue"} 

1492 ), 404 

1493 

1494 return jsonify({"status": "success", "position": position}) 

1495 except Exception: 

1496 logger.exception("Error getting queue position") 

1497 return jsonify( 

1498 {"status": "error", "message": "Failed to process request"} 

1499 ), 500 

1500 

1501 

1502@research_bp.route("/api/config/limits", methods=["GET"]) 

1503def get_upload_limits(): 

1504 """ 

1505 Get file upload configuration limits. 

1506 

1507 Returns the backend's authoritative limits for file uploads, 

1508 allowing the frontend to stay in sync without hardcoding values. 

1509 """ 

1510 return jsonify( 

1511 { 

1512 "max_file_size": FileUploadValidator.MAX_FILE_SIZE, 

1513 "max_files": FileUploadValidator.MAX_FILES_PER_REQUEST, 

1514 "allowed_mime_types": list(FileUploadValidator.ALLOWED_MIME_TYPES), 

1515 } 

1516 ) 

1517 

1518 

1519@research_bp.route("/api/upload/pdf", methods=["POST"]) 

1520@login_required 

1521@upload_rate_limit 

1522def upload_pdf(): 

1523 """ 

1524 Upload and extract text from PDF files with comprehensive security validation. 

1525 

1526 Security features: 

1527 - Rate limiting (10 uploads/min, 100/hour per user) 

1528 - File size validation (50MB max per file) 

1529 - File count validation (100 files max) 

1530 - PDF structure validation 

1531 - MIME type validation 

1532 

1533 Performance improvements: 

1534 - Single-pass PDF processing (text + metadata) 

1535 - Optimized extraction service 

1536 """ 

1537 username = session.get("username") 

1538 if not username: 1538 ↛ 1539line 1538 didn't jump to line 1539 because the condition on line 1538 was never true

1539 return jsonify({"error": "Not authenticated"}), 401 

1540 

1541 try: 

1542 # Early request size validation (before reading any files) 

1543 # This prevents memory exhaustion from chunked encoding attacks 

1544 max_request_size = ( 

1545 FileUploadValidator.MAX_FILES_PER_REQUEST 

1546 * FileUploadValidator.MAX_FILE_SIZE 

1547 ) 

1548 if request.content_length and request.content_length > max_request_size: 1548 ↛ 1549line 1548 didn't jump to line 1549 because the condition on line 1548 was never true

1549 return jsonify( 

1550 { 

1551 "error": f"Request too large. Maximum size is {max_request_size // (1024 * 1024)}MB" 

1552 } 

1553 ), 413 

1554 

1555 # Check if files are present in the request 

1556 if "files" not in request.files: 

1557 return jsonify({"error": "No files provided"}), 400 

1558 

1559 files = request.files.getlist("files") 

1560 if not files or files[0].filename == "": 

1561 return jsonify({"error": "No files selected"}), 400 

1562 

1563 # Validate file count 

1564 is_valid, error_msg = FileUploadValidator.validate_file_count( 

1565 len(files) 

1566 ) 

1567 if not is_valid: 1567 ↛ 1568line 1567 didn't jump to line 1568 because the condition on line 1567 was never true

1568 return jsonify({"error": error_msg}), 400 

1569 

1570 # Get PDF extraction service 

1571 pdf_service = get_pdf_extraction_service() 

1572 

1573 extracted_texts = [] 

1574 total_files = len(files) 

1575 processed_files = 0 

1576 errors = [] 

1577 

1578 for file in files: 

1579 if not file or not file.filename: 1579 ↛ 1580line 1579 didn't jump to line 1580 because the condition on line 1579 was never true

1580 errors.append("Unnamed file: Skipped") 

1581 continue 

1582 

1583 try: 

1584 # Read file content (with disk spooling, large files are read from temp file) 

1585 pdf_content = file.read() 

1586 

1587 # Comprehensive validation 

1588 is_valid, error_msg = FileUploadValidator.validate_upload( 

1589 filename=file.filename, 

1590 file_content=pdf_content, 

1591 content_length=file.content_length, 

1592 ) 

1593 

1594 if not is_valid: 1594 ↛ 1599line 1594 didn't jump to line 1599 because the condition on line 1594 was always true

1595 errors.append(f"{file.filename}: {error_msg}") 

1596 continue 

1597 

1598 # Extract text and metadata in single pass (performance fix) 

1599 result = pdf_service.extract_text_and_metadata( 

1600 pdf_content, file.filename 

1601 ) 

1602 

1603 if result["success"]: 

1604 extracted_texts.append( 

1605 { 

1606 "filename": result["filename"], 

1607 "text": result["text"], 

1608 "size": result["size"], 

1609 "pages": result["pages"], 

1610 } 

1611 ) 

1612 processed_files += 1 

1613 else: 

1614 errors.append(f"{file.filename}: {result['error']}") 

1615 

1616 except Exception: 

1617 logger.exception(f"Error processing {file.filename}") 

1618 errors.append(f"{file.filename}: Error processing file") 

1619 finally: 

1620 # Close the file stream to release resources 

1621 try: 

1622 file.close() 

1623 except Exception: 

1624 pass 

1625 

1626 # Prepare response 

1627 response_data = { 

1628 "status": "success", 

1629 "processed_files": processed_files, 

1630 "total_files": total_files, 

1631 "extracted_texts": extracted_texts, 

1632 "combined_text": "\n\n".join( 

1633 [ 

1634 f"--- From {item['filename']} ---\n{item['text']}" 

1635 for item in extracted_texts 

1636 ] 

1637 ), 

1638 "errors": errors, 

1639 } 

1640 

1641 if processed_files == 0: 1641 ↛ 1650line 1641 didn't jump to line 1650 because the condition on line 1641 was always true

1642 return jsonify( 

1643 { 

1644 "status": "error", 

1645 "message": "No files were processed successfully", 

1646 "errors": errors, 

1647 } 

1648 ), 400 

1649 

1650 return jsonify(response_data) 

1651 

1652 except Exception: 

1653 logger.exception("Error processing PDF upload") 

1654 return jsonify({"error": "Failed to process PDF files"}), 500