Coverage for src/local_deep_research/web/routes/history_routes.py: 99%
168 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
1import json
3from flask import Blueprint, jsonify, request, session
4from loguru import logger
5from sqlalchemy import func
7from ...constants import ResearchStatus
8from ...database.models import ResearchHistory
9from ...database.models.library import Document as Document
10from ...database.session_context import get_user_db_session
11from ..auth.decorators import login_required
12from ..models.database import (
13 get_logs_for_research,
14 get_total_logs_for_research,
15)
16from ..routes.globals import get_active_research_snapshot
17from ..services.research_service import get_research_strategy
18from ...security.rate_limiter import limiter
19from ...security import filter_research_metadata
20from ..utils.templates import render_template_with_defaults
22# Create a Blueprint for the history routes
23history_bp = Blueprint("history", __name__, url_prefix="/history")
25# NOTE: Routes use session["username"] (not .get()) intentionally.
26# @login_required guarantees the key exists; direct access fails fast
27# if the decorator is ever removed.
30# resolve_report_path removed - reports are now stored in database
33@history_bp.route("/")
34@login_required
35def history_page():
36 """Render the history page"""
37 return render_template_with_defaults("pages/history.html")
40@history_bp.route("/api", methods=["GET"])
41@login_required
42def get_history():
43 """Get the research history JSON data"""
44 username = session["username"]
46 try:
47 limit = request.args.get("limit", 200, type=int)
48 limit = max(1, min(limit, 500))
49 offset = request.args.get("offset", 0, type=int)
50 offset = max(0, offset)
52 with get_user_db_session(username) as db_session:
53 # Single query with JOIN to get history + document counts
54 results = (
55 db_session.query(
56 ResearchHistory,
57 func.count(Document.id).label("document_count"),
58 )
59 .outerjoin(Document, Document.research_id == ResearchHistory.id)
60 .group_by(ResearchHistory.id)
61 .order_by(ResearchHistory.created_at.desc())
62 .limit(limit)
63 .offset(offset)
64 .all()
65 )
67 logger.debug(f"All research count: {len(results)}")
69 # Convert to list of dicts
70 history = []
71 for research, doc_count in results:
72 item = {
73 "id": research.id,
74 "title": research.title,
75 "query": research.query,
76 "mode": research.mode,
77 "status": research.status,
78 "created_at": research.created_at,
79 "completed_at": research.completed_at,
80 "duration_seconds": research.duration_seconds,
81 "document_count": doc_count,
82 }
84 item["metadata"] = filter_research_metadata(
85 research.research_meta
86 )
87 if research.chat_session_id is not None: 87 ↛ 88line 87 didn't jump to line 88 because the condition on line 87 was never true
88 item["metadata"]["chat_session_id"] = (
89 research.chat_session_id
90 )
92 # Recalculate duration if null but both timestamps exist
93 if (
94 item["duration_seconds"] is None
95 and item["created_at"]
96 and item["completed_at"]
97 ):
98 try:
99 from dateutil import parser # type: ignore[import-untyped]
101 start_time = parser.parse(item["created_at"])
102 end_time = parser.parse(item["completed_at"])
103 item["duration_seconds"] = int(
104 (end_time - start_time).total_seconds()
105 )
106 except Exception:
107 logger.warning("Error recalculating duration")
108 logger.debug("Duration error details", exc_info=True)
110 history.append(item)
112 # Format response to match what client expects
113 response_data = {
114 "status": "success",
115 "items": history, # Use 'items' key as expected by client
116 }
118 # CORS headers are handled by SecurityHeaders middleware
119 return jsonify(response_data)
120 except Exception:
121 logger.exception("Error getting history")
122 return jsonify(
123 {
124 "status": "error",
125 "items": [],
126 "message": "Failed to retrieve history",
127 }
128 ), 500
131@history_bp.route("/status/<string:research_id>")
132@limiter.exempt
133@login_required
134def get_research_status(research_id):
135 username = session["username"]
137 with get_user_db_session(username) as db_session:
138 research = (
139 db_session.query(ResearchHistory).filter_by(id=research_id).first()
140 )
142 if not research:
143 return jsonify(
144 {"status": "error", "message": "Research not found"}
145 ), 404
147 # Extract attributes while session is active
148 # to avoid DetachedInstanceError after the with block exits
149 result = {
150 "id": research.id,
151 "query": research.query,
152 "mode": research.mode,
153 "status": research.status,
154 "created_at": research.created_at,
155 "completed_at": research.completed_at,
156 "progress_log": research.progress_log,
157 "report_path": research.report_path,
158 }
160 # Add progress information from active research (atomic snapshot)
161 snapshot = get_active_research_snapshot(research_id)
162 if snapshot is not None:
163 result["progress"] = snapshot["progress"]
164 result["log"] = snapshot["log"]
165 elif result.get("status") == ResearchStatus.COMPLETED:
166 result["progress"] = 100
167 try:
168 result["log"] = json.loads(result.get("progress_log", "[]"))
169 except Exception:
170 logger.warning(
171 "Error parsing progress_log for research {}", research_id
172 )
173 result["log"] = []
174 else:
175 result["progress"] = 0
176 try:
177 result["log"] = json.loads(result.get("progress_log", "[]"))
178 except Exception:
179 logger.warning(
180 "Error parsing progress_log for research {}", research_id
181 )
182 result["log"] = []
184 return jsonify(result)
187@history_bp.route("/details/<string:research_id>")
188@login_required
189def get_research_details(research_id):
190 """Get detailed progress log for a specific research"""
192 logger.debug(f"Details route accessed for research_id: {research_id}")
194 username = session["username"]
196 try:
197 with get_user_db_session(username) as db_session:
198 research = (
199 db_session.query(ResearchHistory)
200 .filter_by(id=research_id)
201 .first()
202 )
203 logger.debug(f"Research found: {research.id if research else None}")
205 if not research:
206 logger.error(f"Research not found for id: {research_id}")
207 return jsonify(
208 {"status": "error", "message": "Research not found"}
209 ), 404
211 # Extract all needed attributes while session is active
212 # to avoid DetachedInstanceError after the with block exits
213 research_data = {
214 "query": research.query,
215 "mode": research.mode,
216 "status": research.status,
217 "created_at": research.created_at,
218 "completed_at": research.completed_at,
219 }
220 except Exception:
221 logger.exception("Database error")
222 return jsonify(
223 {
224 "status": "error",
225 "message": "An internal database error occurred.",
226 }
227 ), 500
229 # Get logs from the dedicated log database
230 logs = get_logs_for_research(research_id)
232 # Get strategy information
233 strategy_name = get_research_strategy(research_id)
235 # Get an atomic snapshot of active research state
236 snapshot = get_active_research_snapshot(research_id)
238 # If this is an active research, merge with any in-memory logs
239 if snapshot is not None:
240 # Use the logs from memory temporarily until they're saved to the database
241 memory_logs = snapshot["log"]
243 # Filter out logs that are already in the database by timestamp
244 db_timestamps = {log["time"] for log in logs}
245 unique_memory_logs = [
246 log for log in memory_logs if log["time"] not in db_timestamps
247 ]
249 # Add unique memory logs to our return list
250 logs.extend(unique_memory_logs)
252 # Sort logs by timestamp
253 logs.sort(key=lambda x: x["time"])
255 progress = (
256 snapshot["progress"]
257 if snapshot is not None
258 else (100 if research_data["status"] == ResearchStatus.COMPLETED else 0)
259 )
261 return jsonify(
262 {
263 "research_id": research_id,
264 "query": research_data["query"],
265 "mode": research_data["mode"],
266 "status": research_data["status"],
267 "strategy": strategy_name,
268 "progress": progress,
269 "created_at": research_data["created_at"],
270 "completed_at": research_data["completed_at"],
271 "log": logs,
272 }
273 )
276@history_bp.route("/report/<string:research_id>")
277@login_required
278def get_report(research_id):
279 from ..auth.decorators import current_user
281 username = current_user()
283 with get_user_db_session(username) as db_session:
284 research = (
285 db_session.query(ResearchHistory).filter_by(id=research_id).first()
286 )
288 if not research:
289 return jsonify(
290 {"status": "error", "message": "Report not found"}
291 ), 404
293 try:
294 # research.report_content holds the answer-only string;
295 # the legacy display shape is reconstructed on demand by
296 # appending Sources (from research_resources) and Metrics
297 # (from research_meta).
298 from ..services.report_assembly_service import (
299 assemble_full_report,
300 )
302 content = assemble_full_report(research, db_session)
303 # Only None means "research not found" — the existence check
304 # above already returns 404 for that. An empty-but-found row
305 # (no body, no sources, no metrics) returns "" and is valid.
306 if content is None:
307 return jsonify(
308 {"status": "error", "message": "Report content not found"}
309 ), 404
311 stored_metadata = research.research_meta or {}
313 # Create an enhanced metadata dictionary with database fields
314 enhanced_metadata = {
315 "query": research.query,
316 "mode": research.mode,
317 "created_at": research.created_at,
318 "completed_at": research.completed_at,
319 "duration": research.duration_seconds,
320 }
322 # Merge with stored metadata
323 enhanced_metadata.update(stored_metadata)
325 return jsonify(
326 {
327 "status": "success",
328 "content": content,
329 "query": research.query,
330 "mode": research.mode,
331 "created_at": research.created_at,
332 "completed_at": research.completed_at,
333 "metadata": enhanced_metadata,
334 }
335 )
336 except Exception:
337 logger.exception(
338 "Failed to retrieve report for research {}", research_id
339 )
340 return jsonify(
341 {"status": "error", "message": "Failed to retrieve report"}
342 ), 500
345@history_bp.route("/markdown/<string:research_id>")
346@login_required
347def get_markdown(research_id):
348 """Get markdown export for a specific research"""
349 from ..auth.decorators import current_user
351 username = current_user()
353 with get_user_db_session(username) as db_session:
354 research = (
355 db_session.query(ResearchHistory).filter_by(id=research_id).first()
356 )
358 if not research:
359 return jsonify(
360 {"status": "error", "message": "Report not found"}
361 ), 404
363 try:
364 from ..services.report_assembly_service import (
365 assemble_full_report,
366 )
368 content = assemble_full_report(research, db_session)
369 if content is None:
370 return jsonify(
371 {"status": "error", "message": "Report content not found"}
372 ), 404
374 return jsonify({"status": "success", "content": content})
375 except Exception:
376 logger.exception(
377 "Failed to retrieve markdown report for research {}",
378 research_id,
379 )
380 return jsonify(
381 {"status": "error", "message": "Failed to retrieve report"}
382 ), 500
385@history_bp.route("/logs/<string:research_id>")
386@login_required
387def get_research_logs(research_id):
388 """Get logs for a specific research ID.
390 Accepts ``?limit=N`` to bound the response size; default 500 matches
391 the frontend's ``MAX_LOG_ENTRIES`` DOM cap. Clamped to ``[1, 5000]``
392 so a client cannot force an unbounded load (a long langgraph run can
393 persist thousands of 10 KB rows; pre-cap the route allocated ~150 MB
394 transient on the server and Firefox parsed a ~50 MB JSON response).
395 """
396 username = session["username"]
398 # Per-request cap. 500 matches MAX_LOG_ENTRIES in logpanel.js; the
399 # 5000 ceiling lets explicit log-download flows still get more rows
400 # but stops accidental unbounded loads.
401 limit = request.args.get("limit", default=500, type=int)
402 limit = max(1, min(limit, 5000))
404 # First check if the research exists
405 with get_user_db_session(username) as db_session:
406 research = (
407 db_session.query(ResearchHistory).filter_by(id=research_id).first()
408 )
410 if not research:
411 return jsonify(
412 {"status": "error", "message": "Research not found"}
413 ), 404
415 logs = get_logs_for_research(research_id, limit=limit)
417 # Defensive backfill for any row missing the three frontend-required
418 # fields. `get_logs_for_research` always sets these from ResearchLog
419 # columns, but the defensive layer is covered by
420 # test_logs_with_missing_fields_get_defaults (extra keys must be
421 # preserved, missing keys must take a default). In-place mutation is
422 # safe — the formatter returned a fresh list of fresh dicts.
423 for log in logs:
424 log.setdefault("time", "")
425 log.setdefault("message", "No message")
426 log.setdefault("type", "info")
428 return jsonify({"status": "success", "logs": logs})
431@history_bp.route("/log_count/<string:research_id>")
432@login_required
433def get_log_count(research_id):
434 """Get the total number of logs for a specific research ID"""
435 # Get the total number of logs for this research ID
436 total_logs = get_total_logs_for_research(research_id)
438 return jsonify({"status": "success", "total_logs": total_logs})