Coverage for src / local_deep_research / web / routes / context_overflow_api.py: 88%

118 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-25 01:07 +0000

1"""API endpoints for context overflow analytics.""" 

2 

3from flask import Blueprint, jsonify, request, session as flask_session 

4from datetime import datetime, timedelta, timezone 

5from sqlalchemy import func, desc, case 

6from loguru import logger 

7 

8from ...database.session_context import get_user_db_session 

9from ...database.models import TokenUsage 

10from ..auth.decorators import login_required 

11 

12context_overflow_bp = Blueprint("context_overflow_api", __name__) 

13 

14 

15@context_overflow_bp.route("/api/context-overflow", methods=["GET"]) 

16@login_required 

17def get_context_overflow_metrics(): 

18 """Get context overflow metrics for the current user.""" 

19 try: 

20 # Get username from session 

21 username = flask_session.get("username") 

22 if not username: 22 ↛ 23line 22 didn't jump to line 23 because the condition on line 22 was never true

23 return jsonify( 

24 {"status": "error", "message": "User not authenticated"} 

25 ), 401 

26 

27 # Get time period from query params (whitelist valid values) 

28 VALID_PERIODS = {"7d", "30d", "3m", "1y", "all"} 

29 period = request.args.get("period", "30d") 

30 if period not in VALID_PERIODS: 

31 period = "30d" 

32 

33 # Pagination params for all_requests 

34 page = max(1, request.args.get("page", 1, type=int)) 

35 per_page = request.args.get("per_page", 50, type=int) 

36 per_page = max(1, min(per_page, 500)) 

37 

38 # Calculate date filter (use timezone-aware datetime) 

39 start_date = None 

40 if period != "all": 

41 now = datetime.now(timezone.utc) 

42 if period == "7d": 

43 start_date = now - timedelta(days=7) 

44 elif period == "30d": 44 ↛ 46line 44 didn't jump to line 46 because the condition on line 44 was always true

45 start_date = now - timedelta(days=30) 

46 elif period == "3m": 

47 start_date = now - timedelta(days=90) 

48 elif period == "1y": 

49 start_date = now - timedelta(days=365) 

50 

51 with get_user_db_session(username) as session: 

52 # Base query 

53 query = session.query(TokenUsage) 

54 

55 if start_date: 

56 query = query.filter(TokenUsage.timestamp >= start_date) 

57 

58 # Get overview statistics - merge count queries using CASE 

59 overview_counts = query.with_entities( 

60 func.count(TokenUsage.id).label("total_requests"), 

61 func.sum( 

62 case( 

63 (TokenUsage.context_limit.isnot(None), 1), 

64 else_=0, 

65 ) 

66 ).label("requests_with_context"), 

67 func.sum( 

68 case( 

69 (TokenUsage.context_truncated.is_(True), 1), 

70 else_=0, 

71 ) 

72 ).label("truncated_requests"), 

73 ).first() 

74 

75 total_requests = overview_counts.total_requests or 0 

76 requests_with_context = int( 

77 overview_counts.requests_with_context or 0 

78 ) 

79 truncated_requests = int(overview_counts.truncated_requests or 0) 

80 

81 # Calculate truncation rate 

82 truncation_rate = 0 

83 if requests_with_context > 0: 

84 truncation_rate = ( 

85 truncated_requests / requests_with_context 

86 ) * 100 

87 

88 # Get average tokens truncated 

89 avg_tokens_truncated = session.query( 

90 func.avg(TokenUsage.tokens_truncated) 

91 ).filter(TokenUsage.context_truncated.is_(True)) 

92 

93 if start_date: 

94 avg_tokens_truncated = avg_tokens_truncated.filter( 

95 TokenUsage.timestamp >= start_date 

96 ) 

97 

98 avg_tokens_truncated = avg_tokens_truncated.scalar() or 0 

99 

100 # --- Token summary (always populated, no context_limit filter) --- 

101 token_summary_row = query.with_entities( 

102 func.count(TokenUsage.id).label("total_requests"), 

103 func.coalesce(func.sum(TokenUsage.total_tokens), 0).label( 

104 "total_tokens" 

105 ), 

106 func.coalesce(func.sum(TokenUsage.prompt_tokens), 0).label( 

107 "total_prompt_tokens" 

108 ), 

109 func.coalesce(func.sum(TokenUsage.completion_tokens), 0).label( 

110 "total_completion_tokens" 

111 ), 

112 func.avg(TokenUsage.prompt_tokens).label("avg_prompt_tokens"), 

113 func.avg(TokenUsage.completion_tokens).label( 

114 "avg_completion_tokens" 

115 ), 

116 func.max(TokenUsage.prompt_tokens).label("max_prompt_tokens"), 

117 ).first() 

118 

119 token_summary = { 

120 "total_requests": token_summary_row.total_requests or 0, 

121 "total_tokens": int(token_summary_row.total_tokens or 0), 

122 "total_prompt_tokens": int( 

123 token_summary_row.total_prompt_tokens or 0 

124 ), 

125 "total_completion_tokens": int( 

126 token_summary_row.total_completion_tokens or 0 

127 ), 

128 "avg_prompt_tokens": round( 

129 token_summary_row.avg_prompt_tokens or 0, 0 

130 ), 

131 "avg_completion_tokens": round( 

132 token_summary_row.avg_completion_tokens or 0, 0 

133 ), 

134 "max_prompt_tokens": int( 

135 token_summary_row.max_prompt_tokens or 0 

136 ), 

137 } 

138 

139 # --- Model token stats (always populated, no context_limit filter) --- 

140 model_token_query = ( 

141 query.with_entities( 

142 TokenUsage.model_name, 

143 TokenUsage.model_provider, 

144 func.count(TokenUsage.id).label("total_requests"), 

145 func.coalesce(func.sum(TokenUsage.total_tokens), 0).label( 

146 "total_tokens" 

147 ), 

148 func.avg(TokenUsage.prompt_tokens).label("avg_prompt"), 

149 func.max(TokenUsage.prompt_tokens).label("max_prompt"), 

150 func.avg(TokenUsage.response_time_ms).label( 

151 "avg_response_time_ms" 

152 ), 

153 ) 

154 .group_by(TokenUsage.model_name, TokenUsage.model_provider) 

155 .all() 

156 ) 

157 

158 model_token_stats = [ 

159 { 

160 "model": row.model_name, 

161 "provider": row.model_provider, 

162 "total_requests": row.total_requests, 

163 "total_tokens": int(row.total_tokens or 0), 

164 "avg_prompt": round(row.avg_prompt or 0, 0), 

165 "max_prompt": int(row.max_prompt or 0), 

166 "avg_response_time_ms": round( 

167 row.avg_response_time_ms or 0, 0 

168 ), 

169 } 

170 for row in model_token_query 

171 ] 

172 

173 # --- Phase breakdown (always populated, no context_limit filter) --- 

174 phase_query = ( 

175 query.with_entities( 

176 TokenUsage.research_phase, 

177 func.count(TokenUsage.id).label("count"), 

178 func.coalesce(func.sum(TokenUsage.total_tokens), 0).label( 

179 "total_tokens" 

180 ), 

181 func.avg(TokenUsage.total_tokens).label("avg_tokens"), 

182 ) 

183 .group_by(TokenUsage.research_phase) 

184 .all() 

185 ) 

186 

187 phase_breakdown = [ 

188 { 

189 "phase": row.research_phase or "unknown", 

190 "count": row.count, 

191 "total_tokens": int(row.total_tokens or 0), 

192 "avg_tokens": round(row.avg_tokens or 0, 0), 

193 } 

194 for row in phase_query 

195 ] 

196 

197 # Get context limit distribution by model 

198 context_limits = session.query( 

199 TokenUsage.model_name, 

200 TokenUsage.context_limit, 

201 func.count(TokenUsage.id).label("count"), 

202 ).filter(TokenUsage.context_limit.isnot(None)) 

203 

204 if start_date: 

205 context_limits = context_limits.filter( 

206 TokenUsage.timestamp >= start_date 

207 ) 

208 

209 context_limits = context_limits.group_by( 

210 TokenUsage.model_name, TokenUsage.context_limit 

211 ).all() 

212 

213 # Get recent truncated requests 

214 recent_truncated = ( 

215 query.filter(TokenUsage.context_truncated.is_(True)) 

216 .order_by(desc(TokenUsage.timestamp)) 

217 .limit(20) 

218 .all() 

219 ) 

220 

221 # Get time series data for chart - include all records 

222 # (even those without context_limit for OpenRouter models) 

223 time_series_query = query.order_by(TokenUsage.timestamp) 

224 

225 if start_date: 

226 # For shorter periods, get all data points (capped at 1000) 

227 if period in ["7d", "30d"]: 227 ↛ 231line 227 didn't jump to line 231 because the condition on line 227 was always true

228 time_series_data = time_series_query.limit(1000).all() 

229 else: 

230 # For longer periods, sample data 

231 time_series_data = time_series_query.limit(500).all() 

232 else: 

233 time_series_data = time_series_query.limit(1000).all() 

234 

235 # Format time series for chart 

236 chart_data = [] 

237 for usage in time_series_data: 237 ↛ 239line 237 didn't jump to line 239 because the loop on line 237 never started

238 # Calculate original tokens (before truncation) 

239 ollama_used = ( 

240 usage.ollama_prompt_eval_count 

241 ) # What Ollama actually processed 

242 actual_prompt = ollama_used or usage.prompt_tokens 

243 tokens_truncated = usage.tokens_truncated or 0 

244 original_tokens = ( 

245 actual_prompt + tokens_truncated 

246 if usage.context_truncated 

247 else actual_prompt 

248 ) 

249 

250 chart_data.append( 

251 { 

252 "timestamp": usage.timestamp.isoformat(), 

253 "research_id": usage.research_id, 

254 "prompt_tokens": usage.prompt_tokens, # From our standard token counting 

255 "completion_tokens": usage.completion_tokens, 

256 "ollama_prompt_tokens": ollama_used, # What Ollama actually used (may be capped) 

257 "original_prompt_tokens": original_tokens, # What was originally requested (before truncation) 

258 "context_limit": usage.context_limit, 

259 "truncated": bool(usage.context_truncated), 

260 "tokens_truncated": tokens_truncated, 

261 "model": usage.model_name, 

262 } 

263 ) 

264 

265 # Get model-specific truncation stats 

266 model_stats = session.query( 

267 TokenUsage.model_name, 

268 TokenUsage.model_provider, 

269 func.count(TokenUsage.id).label("total_requests"), 

270 func.sum(TokenUsage.context_truncated).label("truncated_count"), 

271 func.avg(TokenUsage.context_limit).label("avg_context_limit"), 

272 ).filter(TokenUsage.context_limit.isnot(None)) 

273 

274 if start_date: 

275 model_stats = model_stats.filter( 

276 TokenUsage.timestamp >= start_date 

277 ) 

278 

279 model_stats = model_stats.group_by( 

280 TokenUsage.model_name, TokenUsage.model_provider 

281 ).all() 

282 

283 # --- Paginated all_requests --- 

284 all_requests_query = query.order_by(desc(TokenUsage.timestamp)) 

285 all_requests_total = all_requests_query.count() 

286 all_requests_pages = ( 

287 (all_requests_total + per_page - 1) // per_page 

288 if all_requests_total > 0 

289 else 1 

290 ) 

291 all_requests_data = ( 

292 all_requests_query.offset((page - 1) * per_page) 

293 .limit(per_page) 

294 .all() 

295 ) 

296 

297 # Format response 

298 response = { 

299 "status": "success", 

300 "overview": { 

301 "total_requests": total_requests, 

302 "requests_with_context_data": requests_with_context, 

303 "truncated_requests": truncated_requests, 

304 "truncation_rate": round(truncation_rate, 2), 

305 "avg_tokens_truncated": round(avg_tokens_truncated, 0) 

306 if avg_tokens_truncated 

307 else 0, 

308 }, 

309 "token_summary": token_summary, 

310 "model_token_stats": model_token_stats, 

311 "phase_breakdown": phase_breakdown, 

312 "context_limits": [ 

313 {"model": model, "limit": limit, "count": count} 

314 for model, limit, count in context_limits 

315 ], 

316 "model_stats": [ 

317 { 

318 "model": stat.model_name, 

319 "provider": stat.model_provider, 

320 "total_requests": stat.total_requests, 

321 "truncated_count": int(stat.truncated_count or 0), 

322 "truncation_rate": round( 

323 (stat.truncated_count or 0) 

324 / stat.total_requests 

325 * 100, 

326 2, 

327 ) 

328 if stat.total_requests > 0 

329 else 0, 

330 "avg_context_limit": round(stat.avg_context_limit, 0) 

331 if stat.avg_context_limit 

332 else None, 

333 } 

334 for stat in model_stats 

335 ], 

336 "recent_truncated": [ 

337 { 

338 "timestamp": req.timestamp.isoformat(), 

339 "research_id": req.research_id, 

340 "model": req.model_name, 

341 "prompt_tokens": req.prompt_tokens, # Standard token count 

342 "ollama_tokens": req.ollama_prompt_eval_count, # What Ollama actually used 

343 "original_tokens": ( 

344 req.ollama_prompt_eval_count or req.prompt_tokens 

345 ) 

346 + (req.tokens_truncated or 0), # What was requested 

347 "context_limit": req.context_limit, 

348 "tokens_truncated": req.tokens_truncated, 

349 "truncation_ratio": req.truncation_ratio, 

350 "research_query": req.research_query, 

351 } 

352 for req in recent_truncated 

353 ], 

354 "chart_data": chart_data, 

355 "all_requests": [ 

356 { 

357 "timestamp": req.timestamp.isoformat(), 

358 "research_id": req.research_id, 

359 "model": req.model_name, 

360 "provider": req.model_provider, 

361 "prompt_tokens": req.prompt_tokens, 

362 "completion_tokens": req.completion_tokens, 

363 "total_tokens": req.total_tokens, 

364 "context_limit": req.context_limit, 

365 "context_truncated": bool(req.context_truncated), 

366 "tokens_truncated": req.tokens_truncated or 0, 

367 "truncation_ratio": round(req.truncation_ratio * 100, 2) 

368 if req.truncation_ratio 

369 else 0, 

370 "ollama_prompt_eval_count": req.ollama_prompt_eval_count, 

371 "research_query": req.research_query, 

372 "research_phase": req.research_phase, 

373 } 

374 for req in all_requests_data 

375 ], 

376 "pagination": { 

377 "page": page, 

378 "per_page": per_page, 

379 "total_count": all_requests_total, 

380 "total_pages": all_requests_pages, 

381 }, 

382 } 

383 

384 return jsonify(response) 

385 

386 except Exception: 

387 logger.exception("Error getting context overflow metrics") 

388 return jsonify( 

389 { 

390 "status": "error", 

391 "message": "Failed to load context overflow metrics", 

392 } 

393 ), 500 

394 

395 

396@context_overflow_bp.route( 

397 "/api/research/<string:research_id>/context-overflow", methods=["GET"] 

398) 

399@login_required 

400def get_research_context_overflow(research_id): 

401 """Get context overflow metrics for a specific research.""" 

402 try: 

403 with get_user_db_session() as session: 

404 # Get all token usage for this research 

405 token_usage = ( 

406 session.query(TokenUsage) 

407 .filter(TokenUsage.research_id == research_id) 

408 .order_by(TokenUsage.timestamp) 

409 .all() 

410 ) 

411 

412 if not token_usage: 

413 return jsonify( 

414 { 

415 "status": "success", 

416 "data": { 

417 "overview": { 

418 "total_requests": 0, 

419 "total_tokens": 0, 

420 "context_limit": None, 

421 "max_tokens_used": 0, 

422 "truncation_occurred": False, 

423 }, 

424 "requests": [], 

425 }, 

426 } 

427 ) 

428 

429 # Calculate overview metrics 

430 total_tokens = sum(req.total_tokens or 0 for req in token_usage) 

431 total_prompt = sum(req.prompt_tokens or 0 for req in token_usage) 

432 total_completion = sum( 

433 req.completion_tokens or 0 for req in token_usage 

434 ) 

435 

436 # Get context limit (should be same for all requests in a research) 

437 context_limit = next( 

438 (req.context_limit for req in token_usage if req.context_limit), 

439 None, 

440 ) 

441 

442 # Check for truncation 

443 truncated_requests = [ 

444 req for req in token_usage if req.context_truncated 

445 ] 

446 max_tokens_used = max( 

447 (req.prompt_tokens or 0) for req in token_usage 

448 ) 

449 

450 # Get token usage by phase 

451 phase_stats = {} 

452 for req in token_usage: 

453 phase = req.research_phase or "unknown" 

454 if phase not in phase_stats: 

455 phase_stats[phase] = { 

456 "count": 0, 

457 "prompt_tokens": 0, 

458 "completion_tokens": 0, 

459 "total_tokens": 0, 

460 "truncated_count": 0, 

461 } 

462 phase_stats[phase]["count"] += 1 

463 phase_stats[phase]["prompt_tokens"] += req.prompt_tokens or 0 

464 phase_stats[phase]["completion_tokens"] += ( 

465 req.completion_tokens or 0 

466 ) 

467 phase_stats[phase]["total_tokens"] += req.total_tokens or 0 

468 if req.context_truncated: 

469 phase_stats[phase]["truncated_count"] += 1 

470 

471 # Format requests for response 

472 requests_data = [] 

473 for req in token_usage: 

474 requests_data.append( 

475 { 

476 "timestamp": req.timestamp.isoformat(), 

477 "phase": req.research_phase, 

478 "prompt_tokens": req.prompt_tokens, 

479 "completion_tokens": req.completion_tokens, 

480 "total_tokens": req.total_tokens, 

481 "context_limit": req.context_limit, 

482 "context_truncated": bool(req.context_truncated), 

483 "tokens_truncated": req.tokens_truncated or 0, 

484 "ollama_prompt_eval_count": req.ollama_prompt_eval_count, 

485 "calling_function": req.calling_function, 

486 "response_time_ms": req.response_time_ms, 

487 } 

488 ) 

489 

490 response = { 

491 "status": "success", 

492 "data": { 

493 "overview": { 

494 "total_requests": len(token_usage), 

495 "total_tokens": total_tokens, 

496 "total_prompt_tokens": total_prompt, 

497 "total_completion_tokens": total_completion, 

498 "context_limit": context_limit, 

499 "max_tokens_used": max_tokens_used, 

500 "truncation_occurred": len(truncated_requests) > 0, 

501 "truncated_count": len(truncated_requests), 

502 "tokens_lost": sum( 

503 req.tokens_truncated or 0 

504 for req in truncated_requests 

505 ), 

506 }, 

507 "phase_stats": phase_stats, 

508 "requests": requests_data, 

509 "model": token_usage[0].model_name if token_usage else None, 

510 "provider": token_usage[0].model_provider 

511 if token_usage 

512 else None, 

513 }, 

514 } 

515 

516 return jsonify(response) 

517 

518 except Exception: 

519 logger.exception("Error getting research context overflow") 

520 return jsonify( 

521 { 

522 "status": "error", 

523 "message": "Failed to load context overflow data", 

524 } 

525 ), 500