Coverage for src/local_deep_research/web/routes/history_routes.py: 99%

168 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1import json 

2 

3from flask import Blueprint, jsonify, request, session 

4from loguru import logger 

5from sqlalchemy import func 

6 

7from ...constants import ResearchStatus 

8from ...database.models import ResearchHistory 

9from ...database.models.library import Document as Document 

10from ...database.session_context import get_user_db_session 

11from ..auth.decorators import login_required 

12from ..models.database import ( 

13 get_logs_for_research, 

14 get_total_logs_for_research, 

15) 

16from ..routes.globals import get_active_research_snapshot 

17from ..services.research_service import get_research_strategy 

18from ...security.rate_limiter import limiter 

19from ...security import filter_research_metadata 

20from ..utils.templates import render_template_with_defaults 

21 

22# Create a Blueprint for the history routes 

23history_bp = Blueprint("history", __name__, url_prefix="/history") 

24 

25# NOTE: Routes use session["username"] (not .get()) intentionally. 

26# @login_required guarantees the key exists; direct access fails fast 

27# if the decorator is ever removed. 

28 

29 

30# resolve_report_path removed - reports are now stored in database 

31 

32 

33@history_bp.route("/") 

34@login_required 

35def history_page(): 

36 """Render the history page""" 

37 return render_template_with_defaults("pages/history.html") 

38 

39 

40@history_bp.route("/api", methods=["GET"]) 

41@login_required 

42def get_history(): 

43 """Get the research history JSON data""" 

44 username = session["username"] 

45 

46 try: 

47 limit = request.args.get("limit", 200, type=int) 

48 limit = max(1, min(limit, 500)) 

49 offset = request.args.get("offset", 0, type=int) 

50 offset = max(0, offset) 

51 

52 with get_user_db_session(username) as db_session: 

53 # Single query with JOIN to get history + document counts 

54 results = ( 

55 db_session.query( 

56 ResearchHistory, 

57 func.count(Document.id).label("document_count"), 

58 ) 

59 .outerjoin(Document, Document.research_id == ResearchHistory.id) 

60 .group_by(ResearchHistory.id) 

61 .order_by(ResearchHistory.created_at.desc()) 

62 .limit(limit) 

63 .offset(offset) 

64 .all() 

65 ) 

66 

67 logger.debug(f"All research count: {len(results)}") 

68 

69 # Convert to list of dicts 

70 history = [] 

71 for research, doc_count in results: 

72 item = { 

73 "id": research.id, 

74 "title": research.title, 

75 "query": research.query, 

76 "mode": research.mode, 

77 "status": research.status, 

78 "created_at": research.created_at, 

79 "completed_at": research.completed_at, 

80 "duration_seconds": research.duration_seconds, 

81 "document_count": doc_count, 

82 } 

83 

84 item["metadata"] = filter_research_metadata( 

85 research.research_meta 

86 ) 

87 if research.chat_session_id is not None: 87 ↛ 88line 87 didn't jump to line 88 because the condition on line 87 was never true

88 item["metadata"]["chat_session_id"] = ( 

89 research.chat_session_id 

90 ) 

91 

92 # Recalculate duration if null but both timestamps exist 

93 if ( 

94 item["duration_seconds"] is None 

95 and item["created_at"] 

96 and item["completed_at"] 

97 ): 

98 try: 

99 from dateutil import parser # type: ignore[import-untyped] 

100 

101 start_time = parser.parse(item["created_at"]) 

102 end_time = parser.parse(item["completed_at"]) 

103 item["duration_seconds"] = int( 

104 (end_time - start_time).total_seconds() 

105 ) 

106 except Exception: 

107 logger.warning("Error recalculating duration") 

108 logger.debug("Duration error details", exc_info=True) 

109 

110 history.append(item) 

111 

112 # Format response to match what client expects 

113 response_data = { 

114 "status": "success", 

115 "items": history, # Use 'items' key as expected by client 

116 } 

117 

118 # CORS headers are handled by SecurityHeaders middleware 

119 return jsonify(response_data) 

120 except Exception: 

121 logger.exception("Error getting history") 

122 return jsonify( 

123 { 

124 "status": "error", 

125 "items": [], 

126 "message": "Failed to retrieve history", 

127 } 

128 ), 500 

129 

130 

131@history_bp.route("/status/<string:research_id>") 

132@limiter.exempt 

133@login_required 

134def get_research_status(research_id): 

135 username = session["username"] 

136 

137 with get_user_db_session(username) as db_session: 

138 research = ( 

139 db_session.query(ResearchHistory).filter_by(id=research_id).first() 

140 ) 

141 

142 if not research: 

143 return jsonify( 

144 {"status": "error", "message": "Research not found"} 

145 ), 404 

146 

147 # Extract attributes while session is active 

148 # to avoid DetachedInstanceError after the with block exits 

149 result = { 

150 "id": research.id, 

151 "query": research.query, 

152 "mode": research.mode, 

153 "status": research.status, 

154 "created_at": research.created_at, 

155 "completed_at": research.completed_at, 

156 "progress_log": research.progress_log, 

157 "report_path": research.report_path, 

158 } 

159 

160 # Add progress information from active research (atomic snapshot) 

161 snapshot = get_active_research_snapshot(research_id) 

162 if snapshot is not None: 

163 result["progress"] = snapshot["progress"] 

164 result["log"] = snapshot["log"] 

165 elif result.get("status") == ResearchStatus.COMPLETED: 

166 result["progress"] = 100 

167 try: 

168 result["log"] = json.loads(result.get("progress_log", "[]")) 

169 except Exception: 

170 logger.warning( 

171 "Error parsing progress_log for research {}", research_id 

172 ) 

173 result["log"] = [] 

174 else: 

175 result["progress"] = 0 

176 try: 

177 result["log"] = json.loads(result.get("progress_log", "[]")) 

178 except Exception: 

179 logger.warning( 

180 "Error parsing progress_log for research {}", research_id 

181 ) 

182 result["log"] = [] 

183 

184 return jsonify(result) 

185 

186 

187@history_bp.route("/details/<string:research_id>") 

188@login_required 

189def get_research_details(research_id): 

190 """Get detailed progress log for a specific research""" 

191 

192 logger.debug(f"Details route accessed for research_id: {research_id}") 

193 

194 username = session["username"] 

195 

196 try: 

197 with get_user_db_session(username) as db_session: 

198 research = ( 

199 db_session.query(ResearchHistory) 

200 .filter_by(id=research_id) 

201 .first() 

202 ) 

203 logger.debug(f"Research found: {research.id if research else None}") 

204 

205 if not research: 

206 logger.error(f"Research not found for id: {research_id}") 

207 return jsonify( 

208 {"status": "error", "message": "Research not found"} 

209 ), 404 

210 

211 # Extract all needed attributes while session is active 

212 # to avoid DetachedInstanceError after the with block exits 

213 research_data = { 

214 "query": research.query, 

215 "mode": research.mode, 

216 "status": research.status, 

217 "created_at": research.created_at, 

218 "completed_at": research.completed_at, 

219 } 

220 except Exception: 

221 logger.exception("Database error") 

222 return jsonify( 

223 { 

224 "status": "error", 

225 "message": "An internal database error occurred.", 

226 } 

227 ), 500 

228 

229 # Get logs from the dedicated log database 

230 logs = get_logs_for_research(research_id) 

231 

232 # Get strategy information 

233 strategy_name = get_research_strategy(research_id) 

234 

235 # Get an atomic snapshot of active research state 

236 snapshot = get_active_research_snapshot(research_id) 

237 

238 # If this is an active research, merge with any in-memory logs 

239 if snapshot is not None: 

240 # Use the logs from memory temporarily until they're saved to the database 

241 memory_logs = snapshot["log"] 

242 

243 # Filter out logs that are already in the database by timestamp 

244 db_timestamps = {log["time"] for log in logs} 

245 unique_memory_logs = [ 

246 log for log in memory_logs if log["time"] not in db_timestamps 

247 ] 

248 

249 # Add unique memory logs to our return list 

250 logs.extend(unique_memory_logs) 

251 

252 # Sort logs by timestamp 

253 logs.sort(key=lambda x: x["time"]) 

254 

255 progress = ( 

256 snapshot["progress"] 

257 if snapshot is not None 

258 else (100 if research_data["status"] == ResearchStatus.COMPLETED else 0) 

259 ) 

260 

261 return jsonify( 

262 { 

263 "research_id": research_id, 

264 "query": research_data["query"], 

265 "mode": research_data["mode"], 

266 "status": research_data["status"], 

267 "strategy": strategy_name, 

268 "progress": progress, 

269 "created_at": research_data["created_at"], 

270 "completed_at": research_data["completed_at"], 

271 "log": logs, 

272 } 

273 ) 

274 

275 

276@history_bp.route("/report/<string:research_id>") 

277@login_required 

278def get_report(research_id): 

279 from ..auth.decorators import current_user 

280 

281 username = current_user() 

282 

283 with get_user_db_session(username) as db_session: 

284 research = ( 

285 db_session.query(ResearchHistory).filter_by(id=research_id).first() 

286 ) 

287 

288 if not research: 

289 return jsonify( 

290 {"status": "error", "message": "Report not found"} 

291 ), 404 

292 

293 try: 

294 # research.report_content holds the answer-only string; 

295 # the legacy display shape is reconstructed on demand by 

296 # appending Sources (from research_resources) and Metrics 

297 # (from research_meta). 

298 from ..services.report_assembly_service import ( 

299 assemble_full_report, 

300 ) 

301 

302 content = assemble_full_report(research, db_session) 

303 # Only None means "research not found" — the existence check 

304 # above already returns 404 for that. An empty-but-found row 

305 # (no body, no sources, no metrics) returns "" and is valid. 

306 if content is None: 

307 return jsonify( 

308 {"status": "error", "message": "Report content not found"} 

309 ), 404 

310 

311 stored_metadata = research.research_meta or {} 

312 

313 # Create an enhanced metadata dictionary with database fields 

314 enhanced_metadata = { 

315 "query": research.query, 

316 "mode": research.mode, 

317 "created_at": research.created_at, 

318 "completed_at": research.completed_at, 

319 "duration": research.duration_seconds, 

320 } 

321 

322 # Merge with stored metadata 

323 enhanced_metadata.update(stored_metadata) 

324 

325 return jsonify( 

326 { 

327 "status": "success", 

328 "content": content, 

329 "query": research.query, 

330 "mode": research.mode, 

331 "created_at": research.created_at, 

332 "completed_at": research.completed_at, 

333 "metadata": enhanced_metadata, 

334 } 

335 ) 

336 except Exception: 

337 logger.exception( 

338 "Failed to retrieve report for research {}", research_id 

339 ) 

340 return jsonify( 

341 {"status": "error", "message": "Failed to retrieve report"} 

342 ), 500 

343 

344 

345@history_bp.route("/markdown/<string:research_id>") 

346@login_required 

347def get_markdown(research_id): 

348 """Get markdown export for a specific research""" 

349 from ..auth.decorators import current_user 

350 

351 username = current_user() 

352 

353 with get_user_db_session(username) as db_session: 

354 research = ( 

355 db_session.query(ResearchHistory).filter_by(id=research_id).first() 

356 ) 

357 

358 if not research: 

359 return jsonify( 

360 {"status": "error", "message": "Report not found"} 

361 ), 404 

362 

363 try: 

364 from ..services.report_assembly_service import ( 

365 assemble_full_report, 

366 ) 

367 

368 content = assemble_full_report(research, db_session) 

369 if content is None: 

370 return jsonify( 

371 {"status": "error", "message": "Report content not found"} 

372 ), 404 

373 

374 return jsonify({"status": "success", "content": content}) 

375 except Exception: 

376 logger.exception( 

377 "Failed to retrieve markdown report for research {}", 

378 research_id, 

379 ) 

380 return jsonify( 

381 {"status": "error", "message": "Failed to retrieve report"} 

382 ), 500 

383 

384 

385@history_bp.route("/logs/<string:research_id>") 

386@login_required 

387def get_research_logs(research_id): 

388 """Get logs for a specific research ID. 

389 

390 Accepts ``?limit=N`` to bound the response size; default 500 matches 

391 the frontend's ``MAX_LOG_ENTRIES`` DOM cap. Clamped to ``[1, 5000]`` 

392 so a client cannot force an unbounded load (a long langgraph run can 

393 persist thousands of 10 KB rows; pre-cap the route allocated ~150 MB 

394 transient on the server and Firefox parsed a ~50 MB JSON response). 

395 """ 

396 username = session["username"] 

397 

398 # Per-request cap. 500 matches MAX_LOG_ENTRIES in logpanel.js; the 

399 # 5000 ceiling lets explicit log-download flows still get more rows 

400 # but stops accidental unbounded loads. 

401 limit = request.args.get("limit", default=500, type=int) 

402 limit = max(1, min(limit, 5000)) 

403 

404 # First check if the research exists 

405 with get_user_db_session(username) as db_session: 

406 research = ( 

407 db_session.query(ResearchHistory).filter_by(id=research_id).first() 

408 ) 

409 

410 if not research: 

411 return jsonify( 

412 {"status": "error", "message": "Research not found"} 

413 ), 404 

414 

415 logs = get_logs_for_research(research_id, limit=limit) 

416 

417 # Defensive backfill for any row missing the three frontend-required 

418 # fields. `get_logs_for_research` always sets these from ResearchLog 

419 # columns, but the defensive layer is covered by 

420 # test_logs_with_missing_fields_get_defaults (extra keys must be 

421 # preserved, missing keys must take a default). In-place mutation is 

422 # safe — the formatter returned a fresh list of fresh dicts. 

423 for log in logs: 

424 log.setdefault("time", "") 

425 log.setdefault("message", "No message") 

426 log.setdefault("type", "info") 

427 

428 return jsonify({"status": "success", "logs": logs}) 

429 

430 

431@history_bp.route("/log_count/<string:research_id>") 

432@login_required 

433def get_log_count(research_id): 

434 """Get the total number of logs for a specific research ID""" 

435 # Get the total number of logs for this research ID 

436 total_logs = get_total_logs_for_research(research_id) 

437 

438 return jsonify({"status": "success", "total_logs": total_logs})