Coverage for src / local_deep_research / web / routes / context_overflow_api.py: 88%
118 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
1"""API endpoints for context overflow analytics."""
3from flask import Blueprint, jsonify, request, session as flask_session
4from datetime import datetime, timedelta, timezone
5from sqlalchemy import func, desc, case
6from loguru import logger
8from ...database.session_context import get_user_db_session
9from ...database.models import TokenUsage
10from ..auth.decorators import login_required
12context_overflow_bp = Blueprint("context_overflow_api", __name__)
15@context_overflow_bp.route("/api/context-overflow", methods=["GET"])
16@login_required
17def get_context_overflow_metrics():
18 """Get context overflow metrics for the current user."""
19 try:
20 # Get username from session
21 username = flask_session.get("username")
22 if not username: 22 ↛ 23line 22 didn't jump to line 23 because the condition on line 22 was never true
23 return jsonify(
24 {"status": "error", "message": "User not authenticated"}
25 ), 401
27 # Get time period from query params (whitelist valid values)
28 VALID_PERIODS = {"7d", "30d", "3m", "1y", "all"}
29 period = request.args.get("period", "30d")
30 if period not in VALID_PERIODS:
31 period = "30d"
33 # Pagination params for all_requests
34 page = max(1, request.args.get("page", 1, type=int))
35 per_page = request.args.get("per_page", 50, type=int)
36 per_page = max(1, min(per_page, 500))
38 # Calculate date filter (use timezone-aware datetime)
39 start_date = None
40 if period != "all":
41 now = datetime.now(timezone.utc)
42 if period == "7d":
43 start_date = now - timedelta(days=7)
44 elif period == "30d": 44 ↛ 46line 44 didn't jump to line 46 because the condition on line 44 was always true
45 start_date = now - timedelta(days=30)
46 elif period == "3m":
47 start_date = now - timedelta(days=90)
48 elif period == "1y":
49 start_date = now - timedelta(days=365)
51 with get_user_db_session(username) as session:
52 # Base query
53 query = session.query(TokenUsage)
55 if start_date:
56 query = query.filter(TokenUsage.timestamp >= start_date)
58 # Get overview statistics - merge count queries using CASE
59 overview_counts = query.with_entities(
60 func.count(TokenUsage.id).label("total_requests"),
61 func.sum(
62 case(
63 (TokenUsage.context_limit.isnot(None), 1),
64 else_=0,
65 )
66 ).label("requests_with_context"),
67 func.sum(
68 case(
69 (TokenUsage.context_truncated.is_(True), 1),
70 else_=0,
71 )
72 ).label("truncated_requests"),
73 ).first()
75 total_requests = overview_counts.total_requests or 0
76 requests_with_context = int(
77 overview_counts.requests_with_context or 0
78 )
79 truncated_requests = int(overview_counts.truncated_requests or 0)
81 # Calculate truncation rate
82 truncation_rate = 0
83 if requests_with_context > 0:
84 truncation_rate = (
85 truncated_requests / requests_with_context
86 ) * 100
88 # Get average tokens truncated
89 avg_tokens_truncated = session.query(
90 func.avg(TokenUsage.tokens_truncated)
91 ).filter(TokenUsage.context_truncated.is_(True))
93 if start_date:
94 avg_tokens_truncated = avg_tokens_truncated.filter(
95 TokenUsage.timestamp >= start_date
96 )
98 avg_tokens_truncated = avg_tokens_truncated.scalar() or 0
100 # --- Token summary (always populated, no context_limit filter) ---
101 token_summary_row = query.with_entities(
102 func.count(TokenUsage.id).label("total_requests"),
103 func.coalesce(func.sum(TokenUsage.total_tokens), 0).label(
104 "total_tokens"
105 ),
106 func.coalesce(func.sum(TokenUsage.prompt_tokens), 0).label(
107 "total_prompt_tokens"
108 ),
109 func.coalesce(func.sum(TokenUsage.completion_tokens), 0).label(
110 "total_completion_tokens"
111 ),
112 func.avg(TokenUsage.prompt_tokens).label("avg_prompt_tokens"),
113 func.avg(TokenUsage.completion_tokens).label(
114 "avg_completion_tokens"
115 ),
116 func.max(TokenUsage.prompt_tokens).label("max_prompt_tokens"),
117 ).first()
119 token_summary = {
120 "total_requests": token_summary_row.total_requests or 0,
121 "total_tokens": int(token_summary_row.total_tokens or 0),
122 "total_prompt_tokens": int(
123 token_summary_row.total_prompt_tokens or 0
124 ),
125 "total_completion_tokens": int(
126 token_summary_row.total_completion_tokens or 0
127 ),
128 "avg_prompt_tokens": round(
129 token_summary_row.avg_prompt_tokens or 0, 0
130 ),
131 "avg_completion_tokens": round(
132 token_summary_row.avg_completion_tokens or 0, 0
133 ),
134 "max_prompt_tokens": int(
135 token_summary_row.max_prompt_tokens or 0
136 ),
137 }
139 # --- Model token stats (always populated, no context_limit filter) ---
140 model_token_query = (
141 query.with_entities(
142 TokenUsage.model_name,
143 TokenUsage.model_provider,
144 func.count(TokenUsage.id).label("total_requests"),
145 func.coalesce(func.sum(TokenUsage.total_tokens), 0).label(
146 "total_tokens"
147 ),
148 func.avg(TokenUsage.prompt_tokens).label("avg_prompt"),
149 func.max(TokenUsage.prompt_tokens).label("max_prompt"),
150 func.avg(TokenUsage.response_time_ms).label(
151 "avg_response_time_ms"
152 ),
153 )
154 .group_by(TokenUsage.model_name, TokenUsage.model_provider)
155 .all()
156 )
158 model_token_stats = [
159 {
160 "model": row.model_name,
161 "provider": row.model_provider,
162 "total_requests": row.total_requests,
163 "total_tokens": int(row.total_tokens or 0),
164 "avg_prompt": round(row.avg_prompt or 0, 0),
165 "max_prompt": int(row.max_prompt or 0),
166 "avg_response_time_ms": round(
167 row.avg_response_time_ms or 0, 0
168 ),
169 }
170 for row in model_token_query
171 ]
173 # --- Phase breakdown (always populated, no context_limit filter) ---
174 phase_query = (
175 query.with_entities(
176 TokenUsage.research_phase,
177 func.count(TokenUsage.id).label("count"),
178 func.coalesce(func.sum(TokenUsage.total_tokens), 0).label(
179 "total_tokens"
180 ),
181 func.avg(TokenUsage.total_tokens).label("avg_tokens"),
182 )
183 .group_by(TokenUsage.research_phase)
184 .all()
185 )
187 phase_breakdown = [
188 {
189 "phase": row.research_phase or "unknown",
190 "count": row.count,
191 "total_tokens": int(row.total_tokens or 0),
192 "avg_tokens": round(row.avg_tokens or 0, 0),
193 }
194 for row in phase_query
195 ]
197 # Get context limit distribution by model
198 context_limits = session.query(
199 TokenUsage.model_name,
200 TokenUsage.context_limit,
201 func.count(TokenUsage.id).label("count"),
202 ).filter(TokenUsage.context_limit.isnot(None))
204 if start_date:
205 context_limits = context_limits.filter(
206 TokenUsage.timestamp >= start_date
207 )
209 context_limits = context_limits.group_by(
210 TokenUsage.model_name, TokenUsage.context_limit
211 ).all()
213 # Get recent truncated requests
214 recent_truncated = (
215 query.filter(TokenUsage.context_truncated.is_(True))
216 .order_by(desc(TokenUsage.timestamp))
217 .limit(20)
218 .all()
219 )
221 # Get time series data for chart - include all records
222 # (even those without context_limit for OpenRouter models)
223 time_series_query = query.order_by(TokenUsage.timestamp)
225 if start_date:
226 # For shorter periods, get all data points (capped at 1000)
227 if period in ["7d", "30d"]: 227 ↛ 231line 227 didn't jump to line 231 because the condition on line 227 was always true
228 time_series_data = time_series_query.limit(1000).all()
229 else:
230 # For longer periods, sample data
231 time_series_data = time_series_query.limit(500).all()
232 else:
233 time_series_data = time_series_query.limit(1000).all()
235 # Format time series for chart
236 chart_data = []
237 for usage in time_series_data: 237 ↛ 239line 237 didn't jump to line 239 because the loop on line 237 never started
238 # Calculate original tokens (before truncation)
239 ollama_used = (
240 usage.ollama_prompt_eval_count
241 ) # What Ollama actually processed
242 actual_prompt = ollama_used or usage.prompt_tokens
243 tokens_truncated = usage.tokens_truncated or 0
244 original_tokens = (
245 actual_prompt + tokens_truncated
246 if usage.context_truncated
247 else actual_prompt
248 )
250 chart_data.append(
251 {
252 "timestamp": usage.timestamp.isoformat(),
253 "research_id": usage.research_id,
254 "prompt_tokens": usage.prompt_tokens, # From our standard token counting
255 "completion_tokens": usage.completion_tokens,
256 "ollama_prompt_tokens": ollama_used, # What Ollama actually used (may be capped)
257 "original_prompt_tokens": original_tokens, # What was originally requested (before truncation)
258 "context_limit": usage.context_limit,
259 "truncated": bool(usage.context_truncated),
260 "tokens_truncated": tokens_truncated,
261 "model": usage.model_name,
262 }
263 )
265 # Get model-specific truncation stats
266 model_stats = session.query(
267 TokenUsage.model_name,
268 TokenUsage.model_provider,
269 func.count(TokenUsage.id).label("total_requests"),
270 func.sum(TokenUsage.context_truncated).label("truncated_count"),
271 func.avg(TokenUsage.context_limit).label("avg_context_limit"),
272 ).filter(TokenUsage.context_limit.isnot(None))
274 if start_date:
275 model_stats = model_stats.filter(
276 TokenUsage.timestamp >= start_date
277 )
279 model_stats = model_stats.group_by(
280 TokenUsage.model_name, TokenUsage.model_provider
281 ).all()
283 # --- Paginated all_requests ---
284 all_requests_query = query.order_by(desc(TokenUsage.timestamp))
285 all_requests_total = all_requests_query.count()
286 all_requests_pages = (
287 (all_requests_total + per_page - 1) // per_page
288 if all_requests_total > 0
289 else 1
290 )
291 all_requests_data = (
292 all_requests_query.offset((page - 1) * per_page)
293 .limit(per_page)
294 .all()
295 )
297 # Format response
298 response = {
299 "status": "success",
300 "overview": {
301 "total_requests": total_requests,
302 "requests_with_context_data": requests_with_context,
303 "truncated_requests": truncated_requests,
304 "truncation_rate": round(truncation_rate, 2),
305 "avg_tokens_truncated": round(avg_tokens_truncated, 0)
306 if avg_tokens_truncated
307 else 0,
308 },
309 "token_summary": token_summary,
310 "model_token_stats": model_token_stats,
311 "phase_breakdown": phase_breakdown,
312 "context_limits": [
313 {"model": model, "limit": limit, "count": count}
314 for model, limit, count in context_limits
315 ],
316 "model_stats": [
317 {
318 "model": stat.model_name,
319 "provider": stat.model_provider,
320 "total_requests": stat.total_requests,
321 "truncated_count": int(stat.truncated_count or 0),
322 "truncation_rate": round(
323 (stat.truncated_count or 0)
324 / stat.total_requests
325 * 100,
326 2,
327 )
328 if stat.total_requests > 0
329 else 0,
330 "avg_context_limit": round(stat.avg_context_limit, 0)
331 if stat.avg_context_limit
332 else None,
333 }
334 for stat in model_stats
335 ],
336 "recent_truncated": [
337 {
338 "timestamp": req.timestamp.isoformat(),
339 "research_id": req.research_id,
340 "model": req.model_name,
341 "prompt_tokens": req.prompt_tokens, # Standard token count
342 "ollama_tokens": req.ollama_prompt_eval_count, # What Ollama actually used
343 "original_tokens": (
344 req.ollama_prompt_eval_count or req.prompt_tokens
345 )
346 + (req.tokens_truncated or 0), # What was requested
347 "context_limit": req.context_limit,
348 "tokens_truncated": req.tokens_truncated,
349 "truncation_ratio": req.truncation_ratio,
350 "research_query": req.research_query,
351 }
352 for req in recent_truncated
353 ],
354 "chart_data": chart_data,
355 "all_requests": [
356 {
357 "timestamp": req.timestamp.isoformat(),
358 "research_id": req.research_id,
359 "model": req.model_name,
360 "provider": req.model_provider,
361 "prompt_tokens": req.prompt_tokens,
362 "completion_tokens": req.completion_tokens,
363 "total_tokens": req.total_tokens,
364 "context_limit": req.context_limit,
365 "context_truncated": bool(req.context_truncated),
366 "tokens_truncated": req.tokens_truncated or 0,
367 "truncation_ratio": round(req.truncation_ratio * 100, 2)
368 if req.truncation_ratio
369 else 0,
370 "ollama_prompt_eval_count": req.ollama_prompt_eval_count,
371 "research_query": req.research_query,
372 "research_phase": req.research_phase,
373 }
374 for req in all_requests_data
375 ],
376 "pagination": {
377 "page": page,
378 "per_page": per_page,
379 "total_count": all_requests_total,
380 "total_pages": all_requests_pages,
381 },
382 }
384 return jsonify(response)
386 except Exception:
387 logger.exception("Error getting context overflow metrics")
388 return jsonify(
389 {
390 "status": "error",
391 "message": "Failed to load context overflow metrics",
392 }
393 ), 500
396@context_overflow_bp.route(
397 "/api/research/<string:research_id>/context-overflow", methods=["GET"]
398)
399@login_required
400def get_research_context_overflow(research_id):
401 """Get context overflow metrics for a specific research."""
402 try:
403 with get_user_db_session() as session:
404 # Get all token usage for this research
405 token_usage = (
406 session.query(TokenUsage)
407 .filter(TokenUsage.research_id == research_id)
408 .order_by(TokenUsage.timestamp)
409 .all()
410 )
412 if not token_usage:
413 return jsonify(
414 {
415 "status": "success",
416 "data": {
417 "overview": {
418 "total_requests": 0,
419 "total_tokens": 0,
420 "context_limit": None,
421 "max_tokens_used": 0,
422 "truncation_occurred": False,
423 },
424 "requests": [],
425 },
426 }
427 )
429 # Calculate overview metrics
430 total_tokens = sum(req.total_tokens or 0 for req in token_usage)
431 total_prompt = sum(req.prompt_tokens or 0 for req in token_usage)
432 total_completion = sum(
433 req.completion_tokens or 0 for req in token_usage
434 )
436 # Get context limit (should be same for all requests in a research)
437 context_limit = next(
438 (req.context_limit for req in token_usage if req.context_limit),
439 None,
440 )
442 # Check for truncation
443 truncated_requests = [
444 req for req in token_usage if req.context_truncated
445 ]
446 max_tokens_used = max(
447 (req.prompt_tokens or 0) for req in token_usage
448 )
450 # Get token usage by phase
451 phase_stats = {}
452 for req in token_usage:
453 phase = req.research_phase or "unknown"
454 if phase not in phase_stats:
455 phase_stats[phase] = {
456 "count": 0,
457 "prompt_tokens": 0,
458 "completion_tokens": 0,
459 "total_tokens": 0,
460 "truncated_count": 0,
461 }
462 phase_stats[phase]["count"] += 1
463 phase_stats[phase]["prompt_tokens"] += req.prompt_tokens or 0
464 phase_stats[phase]["completion_tokens"] += (
465 req.completion_tokens or 0
466 )
467 phase_stats[phase]["total_tokens"] += req.total_tokens or 0
468 if req.context_truncated:
469 phase_stats[phase]["truncated_count"] += 1
471 # Format requests for response
472 requests_data = []
473 for req in token_usage:
474 requests_data.append(
475 {
476 "timestamp": req.timestamp.isoformat(),
477 "phase": req.research_phase,
478 "prompt_tokens": req.prompt_tokens,
479 "completion_tokens": req.completion_tokens,
480 "total_tokens": req.total_tokens,
481 "context_limit": req.context_limit,
482 "context_truncated": bool(req.context_truncated),
483 "tokens_truncated": req.tokens_truncated or 0,
484 "ollama_prompt_eval_count": req.ollama_prompt_eval_count,
485 "calling_function": req.calling_function,
486 "response_time_ms": req.response_time_ms,
487 }
488 )
490 response = {
491 "status": "success",
492 "data": {
493 "overview": {
494 "total_requests": len(token_usage),
495 "total_tokens": total_tokens,
496 "total_prompt_tokens": total_prompt,
497 "total_completion_tokens": total_completion,
498 "context_limit": context_limit,
499 "max_tokens_used": max_tokens_used,
500 "truncation_occurred": len(truncated_requests) > 0,
501 "truncated_count": len(truncated_requests),
502 "tokens_lost": sum(
503 req.tokens_truncated or 0
504 for req in truncated_requests
505 ),
506 },
507 "phase_stats": phase_stats,
508 "requests": requests_data,
509 "model": token_usage[0].model_name if token_usage else None,
510 "provider": token_usage[0].model_provider
511 if token_usage
512 else None,
513 },
514 }
516 return jsonify(response)
518 except Exception:
519 logger.exception("Error getting research context overflow")
520 return jsonify(
521 {
522 "status": "error",
523 "message": "Failed to load context overflow data",
524 }
525 ), 500