Coverage for src / local_deep_research / web / routes / research_routes.py: 45%
677 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1import io
2import json
3import platform
4import subprocess
5from datetime import datetime, UTC
6from pathlib import Path
8from flask import (
9 Blueprint,
10 g,
11 jsonify,
12 redirect,
13 request,
14 send_file,
15 session,
16 url_for,
17)
18from loguru import logger
19from ...settings.logger import log_settings
20from sqlalchemy import func
22# Security imports
23from ...security import FileUploadValidator, upload_rate_limit
24from ...config.paths import get_config_directory
26# Services imports
27from ..services.pdf_extraction_service import get_pdf_extraction_service
29from ...database.models import (
30 QueuedResearch,
31 ResearchHistory,
32 ResearchLog,
33 UserActiveResearch,
34)
35from ...database.models.library import Document as Document
36from ...database.session_context import get_user_db_session
37from ..auth.decorators import login_required
38from ..models.database import calculate_duration
39from ..services.research_service import (
40 export_report_to_memory,
41 run_research_process,
42 start_research_process,
43)
44from ..utils.rate_limiter import limiter
45from ..utils.templates import render_template_with_defaults
46from .globals import active_research, termination_flags
48# Create a Blueprint for the research application
49research_bp = Blueprint("research", __name__)
52# Add static route at the root level
53@research_bp.route("/redirect-static/<path:path>")
54def redirect_static(path):
55 """Redirect old static URLs to new static URLs"""
56 return redirect(url_for("static", filename=path))
59@research_bp.route("/progress/<string:research_id>")
60@login_required
61def progress_page(research_id):
62 """Render the research progress page"""
63 return render_template_with_defaults("pages/progress.html")
66@research_bp.route("/details/<string:research_id>")
67@login_required
68def research_details_page(research_id):
69 """Render the research details page"""
70 return render_template_with_defaults("pages/details.html")
73@research_bp.route("/results/<string:research_id>")
74@login_required
75def results_page(research_id):
76 """Render the research results page"""
77 return render_template_with_defaults("pages/results.html")
80@research_bp.route("/history")
81@login_required
82def history_page():
83 """Render the history page"""
84 return render_template_with_defaults("pages/history.html")
87# Add missing settings routes
88@research_bp.route("/settings", methods=["GET"])
89@login_required
90def settings_page():
91 """Render the settings page"""
92 return render_template_with_defaults("settings_dashboard.html")
95@research_bp.route("/settings/main", methods=["GET"])
96@login_required
97def main_config_page():
98 """Render the main settings config page"""
99 return render_template_with_defaults("main_config.html")
102@research_bp.route("/settings/collections", methods=["GET"])
103@login_required
104def collections_config_page():
105 """Render the collections config page"""
106 return render_template_with_defaults("collections_config.html")
109@research_bp.route("/settings/api_keys", methods=["GET"])
110@login_required
111def api_keys_config_page():
112 """Render the API keys config page"""
113 return render_template_with_defaults("api_keys_config.html")
116@research_bp.route("/settings/search_engines", methods=["GET"])
117@login_required
118def search_engines_config_page():
119 """Render the search engines config page"""
120 return render_template_with_defaults("search_engines_config.html")
123@research_bp.route("/settings/llm", methods=["GET"])
124@login_required
125def llm_config_page():
126 """Render the LLM config page"""
127 return render_template_with_defaults("llm_config.html")
130@research_bp.route("/api/start_research", methods=["POST"])
131@login_required
132def start_research():
133 data = request.json
134 # Debug logging to trace model parameter
135 logger.debug(f"Received request data: {data}")
136 logger.debug(f"Request data keys: {list(data.keys()) if data else 'None'}")
138 # Check if this is a news search
139 metadata = data.get("metadata", {})
140 if metadata.get("is_news_search"): 140 ↛ 141line 140 didn't jump to line 141 because the condition on line 140 was never true
141 logger.info(
142 f"News search request received: triggered_by={metadata.get('triggered_by', 'unknown')}"
143 )
145 query = data.get("query")
146 mode = data.get("mode", "quick")
148 # Replace date placeholders if they exist
149 if query and "YYYY-MM-DD" in query: 149 ↛ 151line 149 didn't jump to line 151 because the condition on line 149 was never true
150 # Use local system time
151 current_date = datetime.now(UTC).strftime("%Y-%m-%d")
153 original_query = query
154 query = query.replace("YYYY-MM-DD", current_date)
155 logger.info(
156 f"Replaced date placeholder in query: {original_query[:100]}... -> {query[:100]}..."
157 )
158 logger.info(f"Using date: {current_date}")
160 # Update metadata to track the replacement
161 if not metadata:
162 metadata = {}
163 metadata["original_query"] = original_query
164 metadata["processed_query"] = query
165 metadata["date_replaced"] = current_date
166 data["metadata"] = metadata
168 # Get parameters from request or use database settings
169 from ..services.settings_manager import SettingsManager
171 username = session.get("username")
172 if not username: 172 ↛ 173line 172 didn't jump to line 173 because the condition on line 172 was never true
173 return jsonify({"error": "Not authenticated"}), 401
175 with get_user_db_session(username) as db_session:
176 settings_manager = SettingsManager(db_session=db_session)
178 # Get model provider and model selections - use database settings if not provided
179 model_provider = data.get("model_provider")
180 if not model_provider:
181 model_provider = settings_manager.get_setting("llm.provider", "OLLAMA")
182 logger.debug(
183 f"No model_provider in request, using database setting: {model_provider}"
184 )
185 else:
186 logger.debug(f"Using model_provider from request: {model_provider}")
188 model = data.get("model")
189 if not model:
190 model = settings_manager.get_setting("llm.model", None)
191 logger.debug(f"No model in request, using database setting: {model}")
192 else:
193 logger.debug(f"Using model from request: {model}")
195 custom_endpoint = data.get("custom_endpoint")
196 if not custom_endpoint and model_provider == "OPENAI_ENDPOINT": 196 ↛ 197line 196 didn't jump to line 197 because the condition on line 196 was never true
197 custom_endpoint = settings_manager.get_setting(
198 "llm.openai_endpoint.url", None
199 )
200 logger.debug(
201 f"No custom_endpoint in request, using database setting: {custom_endpoint}"
202 )
204 # Get Ollama URL from request or settings
205 ollama_url = data.get("ollama_url")
206 if not ollama_url and model_provider == "OLLAMA": 206 ↛ 214line 206 didn't jump to line 214 because the condition on line 206 was always true
207 ollama_url = settings_manager.get_setting(
208 "llm.ollama.url", "http://localhost:11434"
209 )
210 logger.debug(
211 f"No ollama_url in request, using database setting: {ollama_url}"
212 )
214 search_engine = data.get("search_engine") or data.get("search_tool")
215 if not search_engine:
216 search_engine = settings_manager.get_setting("search.tool", "searxng")
218 max_results = data.get("max_results")
219 time_period = data.get("time_period")
221 iterations = data.get("iterations")
222 if iterations is None:
223 iterations = settings_manager.get_setting("search.iterations", 5)
225 questions_per_iteration = data.get("questions_per_iteration")
226 if questions_per_iteration is None:
227 questions_per_iteration = settings_manager.get_setting(
228 "search.questions_per_iteration", 5
229 )
231 # Get strategy from request or database
232 strategy = data.get("strategy")
233 if not strategy:
234 strategy = settings_manager.get_setting(
235 "search.search_strategy", "source-based"
236 )
238 # Note: db_session already closed by context manager above
240 # Debug logging for model parameter specifically
241 logger.debug(
242 f"Extracted model value: '{model}' (type: {type(model).__name__})"
243 )
245 # Log the selections for troubleshooting
246 logger.info(
247 f"Starting research with provider: {model_provider}, model: {model}, search engine: {search_engine}"
248 )
249 logger.info(
250 f"Additional parameters: max_results={max_results}, time_period={time_period}, iterations={iterations}, questions={questions_per_iteration}, strategy={strategy}"
251 )
253 if not query:
254 return jsonify({"status": "error", "message": "Query is required"}), 400
256 # Validate required parameters based on provider
257 if model_provider == "OPENAI_ENDPOINT" and not custom_endpoint: 257 ↛ 258line 257 didn't jump to line 258 because the condition on line 257 was never true
258 return (
259 jsonify(
260 {
261 "status": "error",
262 "message": "Custom endpoint URL is required for OpenAI endpoint provider",
263 }
264 ),
265 400,
266 )
268 if not model: 268 ↛ 269line 268 didn't jump to line 269 because the condition on line 268 was never true
269 logger.error(
270 f"No model specified or configured. Provider: {model_provider}"
271 )
272 return jsonify(
273 {
274 "status": "error",
275 "message": "Model is required. Please configure a model in the settings.",
276 }
277 ), 400
279 # Check if the user has too many active researches
280 username = session.get("username")
282 # Get max concurrent researches from settings
283 from ...settings import SettingsManager
285 with get_user_db_session() as db_session:
286 settings_manager = SettingsManager(db_session)
287 max_concurrent_researches = settings_manager.get_setting(
288 "app.max_concurrent_researches", 3
289 )
291 # Use existing session from g to check active researches
292 try:
293 if hasattr(g, "db_session") and g.db_session: 293 ↛ 309line 293 didn't jump to line 309 because the condition on line 293 was always true
294 # Count active researches for this user
295 active_count = (
296 g.db_session.query(UserActiveResearch)
297 .filter_by(username=username, status="in_progress")
298 .count()
299 )
301 # Debug logging
302 logger.info(
303 f"Active research count for {username}: {active_count}/{max_concurrent_researches}"
304 )
306 should_queue = active_count >= max_concurrent_researches
307 logger.info(f"Should queue new research: {should_queue}")
308 else:
309 logger.warning(
310 "No database session available to check active researches"
311 )
312 should_queue = False
313 except Exception:
314 logger.exception("Failed to check active researches")
315 # Default to not queueing if we can't check
316 should_queue = False
318 # Create a record in the database with explicit UTC timestamp
319 import uuid
320 import threading
322 created_at = datetime.now(UTC).isoformat()
323 research_id = str(uuid.uuid4())
325 # Create organized research metadata with settings snapshot
326 research_settings = {
327 # Direct submission parameters
328 "submission": {
329 "model_provider": model_provider,
330 "model": model,
331 "custom_endpoint": custom_endpoint,
332 "search_engine": search_engine,
333 "max_results": max_results,
334 "time_period": time_period,
335 "iterations": iterations,
336 "questions_per_iteration": questions_per_iteration,
337 "strategy": strategy,
338 },
339 # System information
340 "system": {
341 "timestamp": created_at,
342 "user": username,
343 "version": "1.0", # Track metadata version for future migrations
344 "server_url": request.host_url, # Add server URL for link generation
345 },
346 }
348 # Add any additional metadata from request
349 additional_metadata = data.get("metadata", {})
350 if additional_metadata: 350 ↛ 351line 350 didn't jump to line 351 because the condition on line 350 was never true
351 research_settings.update(additional_metadata)
352 # Get complete settings snapshot for this research
353 try:
354 from local_deep_research.settings import SettingsManager
356 # Use the existing session from g (set by middleware)
357 if hasattr(g, "db_session") and g.db_session: 357 ↛ 376line 357 didn't jump to line 376 because the condition on line 357 was always true
358 # Create SettingsManager with the existing session
359 username = session.get("username")
360 # Ensure any pending changes are committed
361 try:
362 g.db_session.commit()
363 except Exception:
364 g.db_session.rollback()
365 settings_manager = SettingsManager(g.db_session)
366 # Get all current settings as a snapshot (bypass cache to ensure fresh data)
367 all_settings = settings_manager.get_all_settings(bypass_cache=True)
369 # Add settings snapshot to metadata
370 research_settings["settings_snapshot"] = all_settings
371 logger.info(
372 f"Captured {len(all_settings)} settings for research {research_id}"
373 )
374 else:
375 # If no session in g, create a new one temporarily to get settings
376 logger.warning(
377 "No database session in g, creating temporary session for settings snapshot"
378 )
379 from ...database.thread_local_session import get_metrics_session
381 # Get password from session or g
382 password = getattr(g, "user_password", None)
383 if not password:
384 # Try to get from session password store
385 from ...database.session_passwords import session_password_store
387 session_id = session.get("session_id")
388 if session_id:
389 password = session_password_store.get_session_password(
390 username, session_id
391 )
393 if password:
394 temp_session = get_metrics_session(username, password)
395 if temp_session:
396 username = session.get("username")
397 settings_manager = SettingsManager(temp_session)
398 all_settings = settings_manager.get_all_settings(
399 bypass_cache=True
400 )
401 research_settings["settings_snapshot"] = all_settings
402 logger.info(
403 f"Captured {len(all_settings)} settings using temporary session for research {research_id}"
404 )
405 else:
406 logger.error(
407 "Failed to create temporary session for settings snapshot"
408 )
409 raise Exception(
410 "Cannot create research without settings snapshot"
411 )
412 else:
413 logger.error(
414 "No password available to create session for settings snapshot"
415 )
416 raise Exception(
417 "Cannot create research without settings snapshot"
418 )
419 except Exception:
420 logger.exception("Failed to capture settings snapshot")
421 # Cannot continue without settings snapshot for thread-based research
422 return jsonify(
423 {
424 "status": "error",
425 "message": "Failed to capture settings for research. Please try again.",
426 }
427 ), 500
429 # Use existing session from g
430 username = session.get("username")
431 if not username: 431 ↛ 432line 431 didn't jump to line 432 because the condition on line 431 was never true
432 return jsonify({"status": "error", "message": "Not authenticated"}), 401
434 try:
435 # Use existing session from g
436 if hasattr(g, "db_session") and g.db_session: 436 ↛ 620line 436 didn't jump to line 620 because the condition on line 436 was always true
437 db_session = g.db_session
438 # Determine initial status based on whether we need to queue
439 initial_status = "queued" if should_queue else "in_progress"
441 research = ResearchHistory(
442 id=research_id, # Set UUID as primary key
443 query=query,
444 mode=mode,
445 status=initial_status,
446 created_at=created_at,
447 progress_log=[{"time": created_at, "progress": 0}],
448 research_meta=research_settings,
449 )
450 db_session.add(research)
451 db_session.commit()
452 logger.info(
453 f"Created research entry with UUID: {research_id}, status: {initial_status}"
454 )
456 if should_queue: 456 ↛ 459line 456 didn't jump to line 459 because the condition on line 456 was never true
457 # Add to queue instead of starting immediately
458 # Get the next position in queue for this user
459 max_position = (
460 db_session.query(func.max(QueuedResearch.position))
461 .filter_by(username=username)
462 .scalar()
463 or 0
464 )
466 queued_record = QueuedResearch(
467 username=username,
468 research_id=research_id,
469 query=query,
470 mode=mode,
471 settings_snapshot=research_settings,
472 position=max_position + 1,
473 )
474 db_session.add(queued_record)
475 db_session.commit()
476 logger.info(
477 f"Queued research {research_id} at position {max_position + 1} for user {username}"
478 )
480 # Notify queue processor with all parameters for potential direct execution
481 from ..queue.processor_v2 import queue_processor
483 # Get session ID for password access
484 session_id = session.get("session_id")
486 # Pass all parameters needed for direct execution
487 queue_processor.notify_research_queued(
488 username,
489 research_id,
490 session_id=session_id,
491 query=query,
492 mode=mode,
493 settings_snapshot=research_settings,
494 model_provider=model_provider,
495 model=model,
496 custom_endpoint=custom_endpoint,
497 search_engine=search_engine,
498 max_results=max_results,
499 time_period=time_period,
500 iterations=iterations,
501 questions_per_iteration=questions_per_iteration,
502 strategy=strategy,
503 )
505 # Return queued status
506 return jsonify(
507 {
508 "status": "queued",
509 "research_id": research_id,
510 "queue_position": max_position + 1,
511 "message": f"Your research has been queued. Position in queue: {max_position + 1}",
512 }
513 )
514 else:
515 # Start immediately
516 # Create active research tracking record
517 import threading
519 active_record = UserActiveResearch(
520 username=username,
521 research_id=research_id,
522 status="in_progress",
523 thread_id=str(threading.current_thread().ident),
524 settings_snapshot=research_settings,
525 )
526 db_session.add(active_record)
527 db_session.commit()
528 logger.info(
529 f"Created active research record for user {username}"
530 )
532 # Double-check the count after committing to handle race conditions
533 # Use the existing session for the recheck
534 try:
535 # Use the same session we already have
536 recheck_session = db_session
537 final_count = (
538 recheck_session.query(UserActiveResearch)
539 .filter_by(username=username, status="in_progress")
540 .count()
541 )
542 logger.info(
543 f"Final active count after commit: {final_count}/{max_concurrent_researches}"
544 )
546 if final_count > max_concurrent_researches: 546 ↛ 549line 546 didn't jump to line 549 because the condition on line 546 was never true
547 # We exceeded the limit due to a race condition
548 # Remove this record and queue instead
549 logger.warning(
550 f"Race condition detected: {final_count} > {max_concurrent_researches}, moving to queue"
551 )
552 db_session.delete(active_record)
553 db_session.commit()
555 # Add to queue
556 max_position = (
557 db_session.query(func.max(QueuedResearch.position))
558 .filter_by(username=username)
559 .scalar()
560 or 0
561 )
563 queued_record = QueuedResearch(
564 username=username,
565 research_id=research_id,
566 query=query,
567 mode=mode,
568 settings_snapshot=research_settings,
569 position=max_position + 1,
570 )
571 db_session.add(queued_record)
573 # Update research status to queued
574 research.status = "queued"
575 db_session.commit()
577 # Notify queue processor for potential direct execution
578 from ..queue.processor_v2 import queue_processor
580 # Get session ID for password access
581 session_id = session.get("session_id")
583 # Pass all parameters needed for direct execution
584 queue_processor.notify_research_queued(
585 username,
586 research_id,
587 session_id=session_id,
588 query=query,
589 mode=mode,
590 settings_snapshot=research_settings,
591 model_provider=model_provider,
592 model=model,
593 custom_endpoint=custom_endpoint,
594 search_engine=search_engine,
595 max_results=max_results,
596 time_period=time_period,
597 iterations=iterations,
598 questions_per_iteration=questions_per_iteration,
599 strategy=strategy,
600 )
602 return jsonify(
603 {
604 "status": "queued",
605 "research_id": research_id,
606 "queue_position": max_position + 1,
607 "message": f"Your research has been queued due to concurrent limit. Position in queue: {max_position + 1}",
608 }
609 )
610 except Exception as e:
611 logger.warning(f"Could not recheck active count: {e}")
613 except Exception:
614 logger.exception("Failed to create research entry")
615 return jsonify(
616 {"status": "error", "message": "Failed to create research entry"}
617 ), 500
619 # Only start the research if not queued
620 if not should_queue: 620 ↛ 707line 620 didn't jump to line 707 because the condition on line 620 was always true
621 # Save the research strategy to the database before starting the thread
622 try:
623 from ..services.research_service import save_research_strategy
625 save_research_strategy(research_id, strategy, username=username)
626 except Exception as e:
627 logger.warning(f"Could not save research strategy: {e}")
629 # Debug logging for settings snapshot
630 snapshot_data = research_settings.get("settings_snapshot", {})
631 log_settings(snapshot_data, "Settings snapshot being passed to thread")
632 if "search.tool" in snapshot_data: 632 ↛ 637line 632 didn't jump to line 637 because the condition on line 632 was always true
633 logger.debug(
634 f"search.tool in snapshot: {snapshot_data['search.tool']}"
635 )
636 else:
637 logger.debug("search.tool NOT in snapshot")
639 # Get the user's password for metrics access in background thread
640 # Try session password store first
641 from ...database.session_passwords import session_password_store
643 session_id = session.get("session_id")
644 user_password = None
646 if session_id: 646 ↛ 652line 646 didn't jump to line 652 because the condition on line 646 was always true
647 user_password = session_password_store.get_session_password(
648 username, session_id
649 )
651 # Fallback to g.user_password (set by middleware if temp_auth was used)
652 if not user_password: 652 ↛ 653line 652 didn't jump to line 653 because the condition on line 652 was never true
653 user_password = getattr(g, "user_password", None)
655 # Last resort: try temp_auth_store
656 if not user_password: 656 ↛ 657line 656 didn't jump to line 657 because the condition on line 656 was never true
657 from ...database.temp_auth import temp_auth_store
659 auth_token = session.get("temp_auth_token")
660 if auth_token:
661 # Use peek_auth to avoid consuming the token
662 auth_data = temp_auth_store.peek_auth(auth_token)
663 if auth_data and auth_data[0] == username:
664 user_password = auth_data[1]
666 if not user_password: 666 ↛ 667line 666 didn't jump to line 667 because the condition on line 666 was never true
667 logger.warning(
668 f"No password available for metrics access for user {username}"
669 )
671 # Start the research process with the selected parameters
672 research_thread = start_research_process(
673 research_id,
674 query,
675 mode,
676 active_research,
677 termination_flags,
678 run_research_process,
679 username=username, # Pass username to the thread
680 user_password=user_password, # Pass password for database access
681 model_provider=model_provider,
682 model=model,
683 custom_endpoint=custom_endpoint,
684 search_engine=search_engine,
685 max_results=max_results,
686 time_period=time_period,
687 iterations=iterations,
688 questions_per_iteration=questions_per_iteration,
689 strategy=strategy,
690 settings_snapshot=snapshot_data, # Pass complete settings
691 )
693 # Update the active research record with the actual thread ID
694 try:
695 with get_user_db_session(username) as thread_session:
696 active_record = (
697 thread_session.query(UserActiveResearch)
698 .filter_by(username=username, research_id=research_id)
699 .first()
700 )
701 if active_record: 701 ↛ 707line 701 didn't jump to line 707
702 active_record.thread_id = str(research_thread.ident)
703 thread_session.commit()
704 except Exception as e:
705 logger.warning(f"Could not update thread ID: {e}")
707 return jsonify({"status": "success", "research_id": research_id})
710@research_bp.route("/api/terminate/<string:research_id>", methods=["POST"])
711@login_required
712def terminate_research(research_id):
713 """Terminate an in-progress research process"""
714 username = session.get("username")
715 if not username: 715 ↛ 716line 715 didn't jump to line 716 because the condition on line 715 was never true
716 return jsonify({"error": "Not authenticated"}), 401
718 # Check if the research exists and is in progress
719 try:
720 with get_user_db_session(username) as db_session:
721 research = (
722 db_session.query(ResearchHistory)
723 .filter_by(id=research_id)
724 .first()
725 )
727 if not research: 727 ↛ 728line 727 didn't jump to line 728 because the condition on line 727 was never true
728 return jsonify(
729 {"status": "error", "message": "Research not found"}
730 ), 404
732 status = research.status
734 # If it's already completed or suspended, return success
735 if status in ["completed", "suspended", "error"]: 735 ↛ 736line 735 didn't jump to line 736 because the condition on line 735 was never true
736 return jsonify(
737 {
738 "status": "success",
739 "message": f"Research already {status}",
740 }
741 )
743 # Check if it's in the active_research dict
744 if research_id not in active_research: 744 ↛ 746line 744 didn't jump to line 746 because the condition on line 744 was never true
745 # Update the status in the database
746 research.status = "suspended"
747 db_session.commit()
748 return jsonify(
749 {"status": "success", "message": "Research terminated"}
750 )
752 # Set the termination flag
753 termination_flags[research_id] = True
755 # Log the termination request - using UTC timestamp
756 timestamp = datetime.now(UTC).isoformat()
757 termination_message = "Research termination requested by user"
758 current_progress = active_research[research_id]["progress"]
760 # Create log entry
761 log_entry = {
762 "time": timestamp,
763 "message": termination_message,
764 "progress": current_progress,
765 "metadata": {"phase": "termination"},
766 }
768 # Add to in-memory log
769 active_research[research_id]["log"].append(log_entry)
771 # Add to database log
772 logger.log("MILESTONE", f"Research ended: {termination_message}")
774 # Update the log in the database
775 if research.progress_log: 775 ↛ 784line 775 didn't jump to line 784 because the condition on line 775 was always true
776 try:
777 if isinstance(research.progress_log, str): 777 ↛ 778line 777 didn't jump to line 778 because the condition on line 777 was never true
778 current_log = json.loads(research.progress_log)
779 else:
780 current_log = research.progress_log
781 except Exception:
782 current_log = []
783 else:
784 current_log = []
786 current_log.append(log_entry)
787 research.progress_log = current_log
788 research.status = "suspended"
789 db_session.commit()
791 # Emit a socket event for the termination request
792 try:
793 event_data = {
794 "status": "suspended", # Changed from 'terminating' to 'suspended'
795 "message": "Research was suspended by user request",
796 }
798 from ..services.socket_service import SocketIOService
800 SocketIOService().emit_socket_event(
801 f"research_progress_{research_id}", event_data
802 )
804 except Exception:
805 logger.exception("Socket emit error (non-critical)")
807 return jsonify(
808 {
809 "status": "success",
810 "message": "Research termination requested",
811 }
812 )
813 except Exception:
814 logger.exception("Error terminating research")
815 return jsonify(
816 {"status": "error", "message": "Failed to terminate research"}
817 ), 500
820@research_bp.route("/api/delete/<string:research_id>", methods=["DELETE"])
821@login_required
822def delete_research(research_id):
823 """Delete a research record"""
824 username = session.get("username")
825 if not username:
826 return jsonify({"error": "Not authenticated"}), 401
828 try:
829 with get_user_db_session(username) as db_session:
830 research = (
831 db_session.query(ResearchHistory)
832 .filter_by(id=research_id)
833 .first()
834 )
836 if not research:
837 return jsonify(
838 {"status": "error", "message": "Research not found"}
839 ), 404
841 status = research.status
842 report_path = research.report_path
844 # Don't allow deleting research in progress
845 if status == "in_progress" and research_id in active_research:
846 return (
847 jsonify(
848 {
849 "status": "error",
850 "message": "Cannot delete research that is in progress",
851 }
852 ),
853 400,
854 )
856 # Delete report file if it exists
857 if report_path and Path(report_path).exists():
858 try:
859 Path(report_path).unlink()
860 except Exception:
861 logger.exception("Error removing report file")
863 # Delete the database record
864 db_session.delete(research)
865 db_session.commit()
867 return jsonify({"status": "success"})
868 except Exception:
869 logger.exception("Error deleting research")
870 return jsonify(
871 {"status": "error", "message": "Failed to delete research"}
872 ), 500
875@research_bp.route("/api/clear_history", methods=["POST"])
876@login_required
877def clear_history():
878 """Clear all research history"""
879 username = session.get("username")
880 if not username:
881 return jsonify({"error": "Not authenticated"}), 401
883 try:
884 with get_user_db_session(username) as db_session:
885 # Get all research records first to clean up files
886 research_records = db_session.query(ResearchHistory).all()
888 # Clean up report files
889 for research in research_records:
890 # Skip active research
891 if research.id in active_research:
892 continue
894 # Delete report file if it exists
895 if research.report_path and Path(research.report_path).exists():
896 try:
897 Path(research.report_path).unlink()
898 except Exception:
899 logger.exception("Error removing report file")
901 # Delete records from the database, except active research
902 if active_research:
903 db_session.query(ResearchHistory).filter(
904 ~ResearchHistory.id.in_(list(active_research.keys()))
905 ).delete(synchronize_session=False)
906 else:
907 db_session.query(ResearchHistory).delete(
908 synchronize_session=False
909 )
911 db_session.commit()
913 return jsonify({"status": "success"})
914 except Exception:
915 logger.exception("Error clearing history")
916 return jsonify(
917 {"status": "error", "message": "Failed to process request"}
918 ), 500
921@research_bp.route("/open_file_location", methods=["POST"])
922@login_required
923def open_file_location():
924 """Open a file location in the system file explorer"""
925 data = request.json
926 file_path = data.get("path")
928 if not file_path:
929 return jsonify({"status": "error", "message": "Path is required"}), 400
931 # Get the user's data directory as the safe root
932 from ...config.paths import get_data_directory
934 safe_root = Path(get_data_directory()).resolve()
936 # Use centralized path validator for security
937 try:
938 from ...security.path_validator import PathValidator
940 file_path = PathValidator.validate_data_path(file_path, str(safe_root))
942 except Exception:
943 logger.exception("Path validation error")
944 return jsonify({"status": "error", "message": "Invalid path"}), 400
946 # Check if path exists
947 if not file_path.exists():
948 return jsonify(
949 {"status": "error", "message": "Path does not exist"}
950 ), 404
952 try:
953 if platform.system() == "Windows":
954 # On Windows, open the folder and select the file
955 if file_path.is_file():
956 subprocess.run(
957 ["explorer", "/select,", str(file_path)], check=True
958 )
959 else:
960 # If it's a directory, just open it
961 subprocess.run(["explorer", str(file_path)], check=True)
962 elif platform.system() == "Darwin": # macOS
963 subprocess.run(["open", str(file_path)], check=True)
964 else: # Linux and others
965 subprocess.run(["xdg-open", str(file_path.parent)], check=True)
967 return jsonify({"status": "success"})
968 except Exception:
969 logger.exception("Error opening a file")
970 return jsonify(
971 {"status": "error", "message": "Failed to process request"}
972 ), 500
975@research_bp.route("/api/save_raw_config", methods=["POST"])
976@login_required
977def save_raw_config():
978 """Save raw configuration"""
979 data = request.json
980 raw_config = data.get("raw_config")
982 if not raw_config:
983 return (
984 jsonify(
985 {"success": False, "error": "Raw configuration is required"}
986 ),
987 400,
988 )
990 try:
991 from ...security.file_write_verifier import write_file_verified
993 # Get the config file path (uses centralized path config, respects LDR_DATA_DIR)
994 config_dir = get_config_directory()
995 config_path = config_dir / "config.toml"
997 # Write the configuration to file
998 write_file_verified(
999 config_path,
1000 raw_config,
1001 "system.allow_config_write",
1002 context="system configuration file",
1003 )
1005 return jsonify({"success": True})
1006 except Exception:
1007 logger.exception("Error saving configuration file")
1008 return jsonify(
1009 {"success": False, "error": "Failed to process request"}
1010 ), 500
1013@research_bp.route("/api/history", methods=["GET"])
1014@login_required
1015def get_history():
1016 """Get research history"""
1017 username = session.get("username")
1018 if not username: 1018 ↛ 1019line 1018 didn't jump to line 1019 because the condition on line 1018 was never true
1019 return jsonify({"error": "Not authenticated"}), 401
1021 try:
1022 with get_user_db_session(username) as db_session:
1023 # Query all research history ordered by created_at
1024 research_records = (
1025 db_session.query(ResearchHistory)
1026 .order_by(ResearchHistory.created_at.desc())
1027 .all()
1028 )
1030 history_items = []
1031 for research in research_records: 1031 ↛ 1033line 1031 didn't jump to line 1033 because the loop on line 1031 never started
1032 # Calculate duration if completed
1033 duration_seconds = None
1034 if research.completed_at and research.created_at:
1035 try:
1036 duration_seconds = calculate_duration(
1037 research.created_at, research.completed_at
1038 )
1039 except Exception:
1040 logger.exception("Error calculating duration")
1042 # Count documents in the library for this research
1043 doc_count = (
1044 db_session.query(Document)
1045 .filter_by(research_id=research.id)
1046 .count()
1047 )
1049 # Create a history item
1050 item = {
1051 "id": research.id,
1052 "query": research.query,
1053 "mode": research.mode,
1054 "status": research.status,
1055 "created_at": research.created_at,
1056 "completed_at": research.completed_at,
1057 "duration_seconds": duration_seconds,
1058 "report_path": research.report_path,
1059 "metadata": research.research_meta, # Include metadata for news
1060 "document_count": doc_count, # Add document count
1061 }
1063 # Add title if it exists
1064 if hasattr(research, "title") and research.title is not None:
1065 item["title"] = research.title
1067 history_items.append(item)
1069 return jsonify({"status": "success", "items": history_items})
1070 except Exception:
1071 logger.exception("Error getting history")
1072 return jsonify(
1073 {"status": "error", "message": "Failed to process request"}
1074 ), 500
1077@research_bp.route("/api/research/<string:research_id>")
1078@login_required
1079def get_research_details(research_id):
1080 """Get full details of a research using ORM"""
1081 username = session.get("username")
1082 if not username: 1082 ↛ 1083line 1082 didn't jump to line 1083 because the condition on line 1082 was never true
1083 return jsonify({"error": "Not authenticated"}), 401
1085 try:
1086 with get_user_db_session(username) as db_session:
1087 research = (
1088 db_session.query(ResearchHistory)
1089 .filter(ResearchHistory.id == research_id)
1090 .first()
1091 )
1093 if not research: 1093 ↛ 1094line 1093 didn't jump to line 1094 because the condition on line 1093 was never true
1094 return jsonify({"error": "Research not found"}), 404
1096 return jsonify(
1097 {
1098 "id": research.id,
1099 "query": research.query,
1100 "status": research.status,
1101 "progress": research.progress,
1102 "progress_percentage": research.progress or 0,
1103 "mode": research.mode,
1104 "created_at": research.created_at,
1105 "completed_at": research.completed_at,
1106 "report_path": research.report_path,
1107 "metadata": research.research_meta,
1108 }
1109 )
1110 except Exception as e:
1111 logger.exception(f"Error getting research details: {e!s}")
1112 return jsonify({"error": "An internal error has occurred"}), 500
1115@research_bp.route("/api/research/<string:research_id>/logs")
1116@login_required
1117def get_research_logs(research_id):
1118 """Get logs for a specific research"""
1119 username = session.get("username")
1120 if not username:
1121 return jsonify({"error": "Not authenticated"}), 401
1123 try:
1124 # First check if the research exists
1125 with get_user_db_session(username) as db_session:
1126 research = (
1127 db_session.query(ResearchHistory)
1128 .filter_by(id=research_id)
1129 .first()
1130 )
1131 if not research:
1132 return jsonify({"error": "Research not found"}), 404
1134 # Get logs from research_logs table
1135 log_results = (
1136 db_session.query(ResearchLog)
1137 .filter_by(research_id=research_id)
1138 .order_by(ResearchLog.timestamp)
1139 .all()
1140 )
1142 logs = []
1143 for row in log_results:
1144 logs.append(
1145 {
1146 "id": row.id,
1147 "message": row.message,
1148 "timestamp": row.timestamp,
1149 "log_type": row.level,
1150 }
1151 )
1153 return jsonify(logs)
1155 except Exception as e:
1156 logger.exception(f"Error getting research logs: {e!s}")
1157 return jsonify({"error": "An internal error has occurred"}), 500
1160@research_bp.route("/api/report/<string:research_id>")
1161@login_required
1162def get_research_report(research_id):
1163 """Get the research report content"""
1164 username = session.get("username")
1165 if not username:
1166 return jsonify({"error": "Not authenticated"}), 401
1168 try:
1169 with get_user_db_session(username) as db_session:
1170 # Query using ORM
1171 research = (
1172 db_session.query(ResearchHistory)
1173 .filter_by(id=research_id)
1174 .first()
1175 )
1177 if research is None:
1178 return jsonify({"error": "Research not found"}), 404
1180 # Parse metadata if it exists
1181 metadata = research.research_meta
1183 # Get report content using storage abstraction
1184 from ...storage import get_report_storage
1186 # Get settings snapshot for this thread
1187 settings_snapshot = (
1188 metadata.get("settings_snapshot") if metadata else None
1189 )
1191 # Pass settings_snapshot to avoid thread context issues
1192 storage = get_report_storage(
1193 session=db_session, settings_snapshot=settings_snapshot
1194 )
1195 content = storage.get_report(research_id, username)
1197 if content is None:
1198 return jsonify({"error": "Report not found"}), 404
1200 # Return the report data with backwards-compatible fields
1201 # Examples expect 'summary', 'sources', 'findings' at top level
1202 return jsonify(
1203 {
1204 "content": content,
1205 # Backwards-compatible fields for examples
1206 "summary": content, # The markdown report is the summary
1207 "sources": metadata.get("all_links_of_system", []),
1208 "findings": metadata.get("findings", []),
1209 "metadata": {
1210 "title": research.title if research.title else None,
1211 "query": research.query,
1212 "mode": research.mode if research.mode else None,
1213 "created_at": research.created_at
1214 if research.created_at
1215 else None,
1216 "completed_at": research.completed_at
1217 if research.completed_at
1218 else None,
1219 "report_path": research.report_path,
1220 **metadata,
1221 },
1222 }
1223 )
1225 except Exception as e:
1226 logger.exception(f"Error getting research report: {e!s}")
1227 return jsonify({"error": "An internal error has occurred"}), 500
1230@research_bp.route(
1231 "/api/v1/research/<research_id>/export/<format>", methods=["POST"]
1232)
1233@login_required
1234def export_research_report(research_id, format):
1235 """Export research report to different formats (LaTeX, Quarto, RIS, or PDF)"""
1236 try:
1237 if format not in ["latex", "quarto", "ris", "pdf"]:
1238 return jsonify(
1239 {
1240 "error": "Invalid format. Use 'latex', 'quarto', 'ris', or 'pdf'"
1241 }
1242 ), 400
1244 # Get research from database
1245 username = session.get("username")
1246 if not username:
1247 return jsonify({"error": "Not authenticated"}), 401
1249 try:
1250 with get_user_db_session(username) as db_session:
1251 research = (
1252 db_session.query(ResearchHistory)
1253 .filter_by(id=research_id)
1254 .first()
1255 )
1256 if not research:
1257 return jsonify({"error": "Research not found"}), 404
1259 # Get report using storage abstraction
1260 from ...storage import get_report_storage
1262 # Get metadata for settings snapshot
1263 metadata = (
1264 research.research_meta if research.research_meta else {}
1265 )
1266 settings_snapshot = (
1267 metadata.get("settings_snapshot") if metadata else None
1268 )
1270 storage = get_report_storage(
1271 session=db_session, settings_snapshot=settings_snapshot
1272 )
1274 # Get report content directly (in memory)
1275 report_content = storage.get_report(research_id, username)
1276 if not report_content:
1277 return jsonify({"error": "Report content not found"}), 404
1279 # Export to requested format (all in memory)
1280 try:
1281 # Use title or query for the PDF title
1282 pdf_title = research.title or research.query
1284 # Generate export content in memory
1285 export_content, filename, mimetype = (
1286 export_report_to_memory(
1287 report_content, format, title=pdf_title
1288 )
1289 )
1291 # Send the file directly from memory
1292 return send_file(
1293 io.BytesIO(export_content),
1294 as_attachment=True,
1295 download_name=filename,
1296 mimetype=mimetype,
1297 )
1298 except Exception as e:
1299 logger.exception(f"Error exporting report: {e!s}")
1300 return jsonify(
1301 {
1302 "error": f"Failed to export to {format}. Please try again later."
1303 }
1304 ), 500
1306 except Exception as e:
1307 logger.exception(f"Error in export endpoint: {e!s}")
1308 return jsonify({"error": "An internal error has occurred"}), 500
1310 except Exception as e:
1311 logger.exception(f"Unexpected error in export endpoint: {e!s}")
1312 return jsonify({"error": "An internal error has occurred"}), 500
1315@research_bp.route("/api/research/<string:research_id>/status")
1316@limiter.exempt
1317@login_required
1318def get_research_status(research_id):
1319 """Get the status of a research process"""
1320 username = session.get("username")
1321 if not username: 1321 ↛ 1322line 1321 didn't jump to line 1322 because the condition on line 1321 was never true
1322 return jsonify({"error": "Not authenticated"}), 401
1324 try:
1325 with get_user_db_session(username) as db_session:
1326 research = (
1327 db_session.query(ResearchHistory)
1328 .filter_by(id=research_id)
1329 .first()
1330 )
1332 if research is None: 1332 ↛ 1333line 1332 didn't jump to line 1333 because the condition on line 1332 was never true
1333 return jsonify({"error": "Research not found"}), 404
1335 status = research.status
1336 progress = research.progress
1337 completed_at = research.completed_at
1338 report_path = research.report_path
1339 metadata = research.research_meta or {}
1341 # Extract and format error information for better UI display
1342 error_info = {}
1343 if metadata and "error" in metadata: 1343 ↛ 1344line 1343 didn't jump to line 1344 because the condition on line 1343 was never true
1344 error_msg = metadata["error"]
1345 error_type = "unknown"
1347 # Detect specific error types
1348 if "timeout" in error_msg.lower():
1349 error_type = "timeout"
1350 error_info = {
1351 "type": "timeout",
1352 "message": "LLM service timed out during synthesis. This may be due to high server load or connectivity issues.",
1353 "suggestion": "Try again later or use a smaller query scope.",
1354 }
1355 elif (
1356 "token limit" in error_msg.lower()
1357 or "context length" in error_msg.lower()
1358 ):
1359 error_type = "token_limit"
1360 error_info = {
1361 "type": "token_limit",
1362 "message": "The research query exceeded the AI model's token limit during synthesis.",
1363 "suggestion": "Try using a more specific query or reduce the research scope.",
1364 }
1365 elif (
1366 "final answer synthesis fail" in error_msg.lower()
1367 or "llm error" in error_msg.lower()
1368 ):
1369 error_type = "llm_error"
1370 error_info = {
1371 "type": "llm_error",
1372 "message": "The AI model encountered an error during final answer synthesis.",
1373 "suggestion": "Check that your LLM service is running correctly or try a different model.",
1374 }
1375 elif "ollama" in error_msg.lower():
1376 error_type = "ollama_error"
1377 error_info = {
1378 "type": "ollama_error",
1379 "message": "The Ollama service is not responding properly.",
1380 "suggestion": "Make sure Ollama is running with 'ollama serve' and the model is downloaded.",
1381 }
1382 elif "connection" in error_msg.lower():
1383 error_type = "connection"
1384 error_info = {
1385 "type": "connection",
1386 "message": "Connection error with the AI service.",
1387 "suggestion": "Check your internet connection and AI service status.",
1388 }
1389 elif metadata.get("solution"):
1390 # Use the solution provided in metadata if available
1391 error_info = {
1392 "type": error_type,
1393 "message": error_msg,
1394 "suggestion": metadata.get("solution"),
1395 }
1396 else:
1397 # Generic error with the original message
1398 error_info = {
1399 "type": error_type,
1400 "message": error_msg,
1401 "suggestion": "Try again with a different query or check the application logs.",
1402 }
1404 # Add error_info to the response if it exists
1405 if error_info: 1405 ↛ 1406line 1405 didn't jump to line 1406 because the condition on line 1405 was never true
1406 metadata["error_info"] = error_info
1408 # Get the latest milestone log for this research
1409 latest_milestone = None
1410 try:
1411 milestone_log = (
1412 db_session.query(ResearchLog)
1413 .filter_by(research_id=research_id, level="MILESTONE")
1414 .order_by(ResearchLog.timestamp.desc())
1415 .first()
1416 )
1417 if milestone_log: 1417 ↛ 1418line 1417 didn't jump to line 1418 because the condition on line 1417 was never true
1418 latest_milestone = {
1419 "message": milestone_log.message,
1420 "time": milestone_log.timestamp.isoformat()
1421 if milestone_log.timestamp
1422 else None,
1423 "type": "MILESTONE",
1424 }
1425 logger.debug(
1426 f"Found latest milestone for research {research_id}: {milestone_log.message}"
1427 )
1428 else:
1429 logger.debug(
1430 f"No milestone logs found for research {research_id}"
1431 )
1432 except Exception as e:
1433 logger.warning(f"Error fetching latest milestone: {e!s}")
1435 response_data = {
1436 "status": status,
1437 "progress": progress,
1438 "completed_at": completed_at,
1439 "report_path": report_path,
1440 "metadata": metadata,
1441 }
1443 # Include latest milestone as a log_entry for frontend compatibility
1444 if latest_milestone: 1444 ↛ 1445line 1444 didn't jump to line 1445 because the condition on line 1444 was never true
1445 response_data["log_entry"] = latest_milestone
1447 return jsonify(response_data)
1448 except Exception:
1449 logger.exception("Error getting research status")
1450 return jsonify({"error": "Error checking research status"}), 500
1453@research_bp.route("/api/queue/status", methods=["GET"])
1454@login_required
1455def get_queue_status():
1456 """Get the current queue status for the user"""
1457 username = session.get("username")
1459 from ..queue import QueueManager
1461 try:
1462 queue_items = QueueManager.get_user_queue(username)
1464 return jsonify(
1465 {
1466 "status": "success",
1467 "queue": queue_items,
1468 "total": len(queue_items),
1469 }
1470 )
1471 except Exception:
1472 logger.exception("Error getting queue status")
1473 return jsonify(
1474 {"status": "error", "message": "Failed to process request"}
1475 ), 500
1478@research_bp.route("/api/queue/<string:research_id>/position", methods=["GET"])
1479@login_required
1480def get_queue_position(research_id):
1481 """Get the queue position for a specific research"""
1482 username = session.get("username")
1484 from ..queue import QueueManager
1486 try:
1487 position = QueueManager.get_queue_position(username, research_id)
1489 if position is None:
1490 return jsonify(
1491 {"status": "error", "message": "Research not found in queue"}
1492 ), 404
1494 return jsonify({"status": "success", "position": position})
1495 except Exception:
1496 logger.exception("Error getting queue position")
1497 return jsonify(
1498 {"status": "error", "message": "Failed to process request"}
1499 ), 500
1502@research_bp.route("/api/config/limits", methods=["GET"])
1503def get_upload_limits():
1504 """
1505 Get file upload configuration limits.
1507 Returns the backend's authoritative limits for file uploads,
1508 allowing the frontend to stay in sync without hardcoding values.
1509 """
1510 return jsonify(
1511 {
1512 "max_file_size": FileUploadValidator.MAX_FILE_SIZE,
1513 "max_files": FileUploadValidator.MAX_FILES_PER_REQUEST,
1514 "allowed_mime_types": list(FileUploadValidator.ALLOWED_MIME_TYPES),
1515 }
1516 )
1519@research_bp.route("/api/upload/pdf", methods=["POST"])
1520@login_required
1521@upload_rate_limit
1522def upload_pdf():
1523 """
1524 Upload and extract text from PDF files with comprehensive security validation.
1526 Security features:
1527 - Rate limiting (10 uploads/min, 100/hour per user)
1528 - File size validation (50MB max per file)
1529 - File count validation (100 files max)
1530 - PDF structure validation
1531 - MIME type validation
1533 Performance improvements:
1534 - Single-pass PDF processing (text + metadata)
1535 - Optimized extraction service
1536 """
1537 username = session.get("username")
1538 if not username: 1538 ↛ 1539line 1538 didn't jump to line 1539 because the condition on line 1538 was never true
1539 return jsonify({"error": "Not authenticated"}), 401
1541 try:
1542 # Early request size validation (before reading any files)
1543 # This prevents memory exhaustion from chunked encoding attacks
1544 max_request_size = (
1545 FileUploadValidator.MAX_FILES_PER_REQUEST
1546 * FileUploadValidator.MAX_FILE_SIZE
1547 )
1548 if request.content_length and request.content_length > max_request_size: 1548 ↛ 1549line 1548 didn't jump to line 1549 because the condition on line 1548 was never true
1549 return jsonify(
1550 {
1551 "error": f"Request too large. Maximum size is {max_request_size // (1024 * 1024)}MB"
1552 }
1553 ), 413
1555 # Check if files are present in the request
1556 if "files" not in request.files:
1557 return jsonify({"error": "No files provided"}), 400
1559 files = request.files.getlist("files")
1560 if not files or files[0].filename == "":
1561 return jsonify({"error": "No files selected"}), 400
1563 # Validate file count
1564 is_valid, error_msg = FileUploadValidator.validate_file_count(
1565 len(files)
1566 )
1567 if not is_valid: 1567 ↛ 1568line 1567 didn't jump to line 1568 because the condition on line 1567 was never true
1568 return jsonify({"error": error_msg}), 400
1570 # Get PDF extraction service
1571 pdf_service = get_pdf_extraction_service()
1573 extracted_texts = []
1574 total_files = len(files)
1575 processed_files = 0
1576 errors = []
1578 for file in files:
1579 if not file or not file.filename: 1579 ↛ 1580line 1579 didn't jump to line 1580 because the condition on line 1579 was never true
1580 errors.append("Unnamed file: Skipped")
1581 continue
1583 try:
1584 # Read file content (with disk spooling, large files are read from temp file)
1585 pdf_content = file.read()
1587 # Comprehensive validation
1588 is_valid, error_msg = FileUploadValidator.validate_upload(
1589 filename=file.filename,
1590 file_content=pdf_content,
1591 content_length=file.content_length,
1592 )
1594 if not is_valid: 1594 ↛ 1599line 1594 didn't jump to line 1599 because the condition on line 1594 was always true
1595 errors.append(f"{file.filename}: {error_msg}")
1596 continue
1598 # Extract text and metadata in single pass (performance fix)
1599 result = pdf_service.extract_text_and_metadata(
1600 pdf_content, file.filename
1601 )
1603 if result["success"]:
1604 extracted_texts.append(
1605 {
1606 "filename": result["filename"],
1607 "text": result["text"],
1608 "size": result["size"],
1609 "pages": result["pages"],
1610 }
1611 )
1612 processed_files += 1
1613 else:
1614 errors.append(f"{file.filename}: {result['error']}")
1616 except Exception:
1617 logger.exception(f"Error processing {file.filename}")
1618 errors.append(f"{file.filename}: Error processing file")
1619 finally:
1620 # Close the file stream to release resources
1621 try:
1622 file.close()
1623 except Exception:
1624 pass
1626 # Prepare response
1627 response_data = {
1628 "status": "success",
1629 "processed_files": processed_files,
1630 "total_files": total_files,
1631 "extracted_texts": extracted_texts,
1632 "combined_text": "\n\n".join(
1633 [
1634 f"--- From {item['filename']} ---\n{item['text']}"
1635 for item in extracted_texts
1636 ]
1637 ),
1638 "errors": errors,
1639 }
1641 if processed_files == 0: 1641 ↛ 1650line 1641 didn't jump to line 1650 because the condition on line 1641 was always true
1642 return jsonify(
1643 {
1644 "status": "error",
1645 "message": "No files were processed successfully",
1646 "errors": errors,
1647 }
1648 ), 400
1650 return jsonify(response_data)
1652 except Exception:
1653 logger.exception("Error processing PDF upload")
1654 return jsonify({"error": "Failed to process PDF files"}), 500