Coverage for src/local_deep_research/web/routes/context_overflow_api.py: 99%
112 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
1"""API endpoints for context overflow analytics."""
3from flask import Blueprint, jsonify, request, session as flask_session
4from datetime import datetime, timedelta, timezone
5from sqlalchemy import func, desc
6from loguru import logger
8from ...database.session_context import get_user_db_session
9from ...database.models import TokenUsage
10from ...metrics.query_utils import get_context_overflow_truncation_summary
11from ...settings import SettingsManager
12from ..auth.decorators import login_required
14context_overflow_bp = Blueprint("context_overflow_api", __name__)
16# NOTE: Routes use flask_session["username"] (not .get()) intentionally.
17# @login_required guarantees the key exists; direct access fails fast
18# if the decorator is ever removed.
21@context_overflow_bp.route("/api/context-overflow", methods=["GET"])
22@login_required
23def get_context_overflow_metrics():
24 """Get context overflow metrics for the current user."""
25 try:
26 # Get username from session
27 username = flask_session["username"]
29 # Get time period from query params (whitelist valid values)
30 VALID_PERIODS = {"7d", "30d", "3m", "1y", "all"}
31 period = request.args.get("period", "30d")
32 if period not in VALID_PERIODS:
33 period = "30d"
35 # Pagination params for all_requests
36 page = max(1, request.args.get("page", 1, type=int))
37 per_page = request.args.get("per_page", 50, type=int)
38 per_page = max(1, min(per_page, 500))
40 # Calculate date filter (use timezone-aware datetime)
41 start_date = None
42 if period != "all":
43 now = datetime.now(timezone.utc)
44 if period == "7d":
45 start_date = now - timedelta(days=7)
46 elif period == "30d":
47 start_date = now - timedelta(days=30)
48 elif period == "3m":
49 start_date = now - timedelta(days=90)
50 elif period == "1y": 50 ↛ 53line 50 didn't jump to line 53 because the condition on line 50 was always true
51 start_date = now - timedelta(days=365)
53 with get_user_db_session(username) as session:
54 # Truncation summary — shared with /metrics/api/metrics so the
55 # main dashboard's at-a-glance numbers cannot disagree with this
56 # endpoint's deep-dive. Helper internally uses
57 # get_time_filter_condition, equivalent to the start_date below.
58 summary = get_context_overflow_truncation_summary(session, period)
59 total_requests = summary["total_requests"]
60 requests_with_context = summary["requests_with_context"]
61 truncated_requests = summary["truncated_requests"]
62 truncation_rate = summary["truncation_rate"]
63 avg_tokens_truncated = summary["avg_tokens_truncated"]
65 # Base query — kept for downstream phase / chart_data / all_requests
66 # aggregations that share the same time window.
67 query = session.query(TokenUsage)
68 if start_date:
69 query = query.filter(TokenUsage.timestamp >= start_date)
71 token_summary = {
72 "total_requests": total_requests,
73 "total_tokens": summary["total_tokens"],
74 "total_prompt_tokens": summary["total_prompt_tokens"],
75 "total_completion_tokens": summary["total_completion_tokens"],
76 "avg_prompt_tokens": round(summary["avg_prompt_tokens"], 0),
77 "avg_completion_tokens": round(
78 summary["avg_completion_tokens"], 0
79 ),
80 "max_prompt_tokens": summary["max_prompt_tokens"],
81 }
83 # --- Model token stats (always populated, no context_limit filter) ---
84 model_token_query = (
85 query.with_entities(
86 TokenUsage.model_name,
87 TokenUsage.model_provider,
88 func.count(TokenUsage.id).label("total_requests"),
89 func.coalesce(func.sum(TokenUsage.total_tokens), 0).label(
90 "total_tokens"
91 ),
92 func.min(TokenUsage.prompt_tokens).label("min_prompt"),
93 func.avg(TokenUsage.prompt_tokens).label("avg_prompt"),
94 func.max(TokenUsage.prompt_tokens).label("max_prompt"),
95 func.avg(TokenUsage.response_time_ms).label(
96 "avg_response_time_ms"
97 ),
98 )
99 .group_by(TokenUsage.model_name, TokenUsage.model_provider)
100 .all()
101 )
103 model_token_stats = [
104 {
105 "model": row.model_name,
106 "provider": row.model_provider,
107 "total_requests": row.total_requests,
108 "total_tokens": int(row.total_tokens or 0),
109 "min_prompt": int(row.min_prompt or 0),
110 "avg_prompt": round(row.avg_prompt or 0, 0),
111 "max_prompt": int(row.max_prompt or 0),
112 "avg_response_time_ms": round(
113 row.avg_response_time_ms or 0, 0
114 ),
115 }
116 for row in model_token_query
117 ]
119 # --- Phase breakdown (always populated, no context_limit filter) ---
120 phase_query = (
121 query.with_entities(
122 TokenUsage.research_phase,
123 func.count(TokenUsage.id).label("count"),
124 func.coalesce(func.sum(TokenUsage.total_tokens), 0).label(
125 "total_tokens"
126 ),
127 func.avg(TokenUsage.total_tokens).label("avg_tokens"),
128 )
129 .group_by(TokenUsage.research_phase)
130 .all()
131 )
133 phase_breakdown = [
134 {
135 "phase": row.research_phase or "unknown",
136 "count": row.count,
137 "total_tokens": int(row.total_tokens or 0),
138 "avg_tokens": round(row.avg_tokens or 0, 0),
139 }
140 for row in phase_query
141 ]
143 # Get context limit distribution by model
144 context_limits = session.query(
145 TokenUsage.model_name,
146 TokenUsage.context_limit,
147 func.count(TokenUsage.id).label("count"),
148 ).filter(TokenUsage.context_limit.isnot(None))
150 if start_date:
151 context_limits = context_limits.filter(
152 TokenUsage.timestamp >= start_date
153 )
155 context_limits = context_limits.group_by(
156 TokenUsage.model_name, TokenUsage.context_limit
157 ).all()
159 # Get recent truncated requests
160 recent_truncated = (
161 query.filter(TokenUsage.context_truncated.is_(True))
162 .order_by(desc(TokenUsage.timestamp))
163 .limit(20)
164 .all()
165 )
167 # Get time series data for chart - include all records
168 # (even those without context_limit for OpenRouter models)
169 time_series_query = query.order_by(TokenUsage.timestamp)
171 if start_date:
172 # For shorter periods, get all data points (capped at 1000)
173 if period in ["7d", "30d"]:
174 time_series_data = time_series_query.limit(1000).all()
175 else:
176 # For longer periods, sample data
177 time_series_data = time_series_query.limit(500).all()
178 else:
179 time_series_data = time_series_query.limit(1000).all()
181 # Format time series for chart
182 chart_data = []
183 for usage in time_series_data:
184 # Calculate original tokens (before truncation)
185 ollama_used = (
186 usage.ollama_prompt_eval_count
187 ) # What Ollama actually processed
188 actual_prompt = ollama_used or usage.prompt_tokens
189 tokens_truncated = usage.tokens_truncated or 0
190 original_tokens = (
191 actual_prompt + tokens_truncated
192 if usage.context_truncated
193 else actual_prompt
194 )
196 chart_data.append(
197 {
198 "timestamp": usage.timestamp.isoformat(),
199 "research_id": usage.research_id,
200 "prompt_tokens": usage.prompt_tokens, # From our standard token counting
201 "completion_tokens": usage.completion_tokens,
202 "ollama_prompt_tokens": ollama_used, # What Ollama actually used (may be capped)
203 "original_prompt_tokens": original_tokens, # What was originally requested (before truncation)
204 "context_limit": usage.context_limit,
205 "truncated": bool(usage.context_truncated),
206 "tokens_truncated": tokens_truncated,
207 "model": usage.model_name,
208 "provider": usage.model_provider,
209 "research_phase": usage.research_phase,
210 "response_time_ms": usage.response_time_ms,
211 }
212 )
214 # Get model-specific truncation stats
215 model_stats = session.query(
216 TokenUsage.model_name,
217 TokenUsage.model_provider,
218 func.count(TokenUsage.id).label("total_requests"),
219 func.sum(TokenUsage.context_truncated).label("truncated_count"),
220 func.avg(TokenUsage.context_limit).label("avg_context_limit"),
221 ).filter(TokenUsage.context_limit.isnot(None))
223 if start_date:
224 model_stats = model_stats.filter(
225 TokenUsage.timestamp >= start_date
226 )
228 model_stats = model_stats.group_by(
229 TokenUsage.model_name, TokenUsage.model_provider
230 ).all()
232 # --- Paginated all_requests ---
233 all_requests_query = query.order_by(desc(TokenUsage.timestamp))
234 all_requests_total = all_requests_query.count()
235 all_requests_pages = (
236 (all_requests_total + per_page - 1) // per_page
237 if all_requests_total > 0
238 else 1
239 )
240 all_requests_data = (
241 all_requests_query.offset((page - 1) * per_page)
242 .limit(per_page)
243 .all()
244 )
246 # Format response
247 response = {
248 "status": "success",
249 "overview": {
250 "total_requests": total_requests,
251 "requests_with_context_data": requests_with_context,
252 "truncated_requests": truncated_requests,
253 "truncation_rate": round(truncation_rate, 2),
254 "avg_tokens_truncated": round(avg_tokens_truncated, 0)
255 if avg_tokens_truncated
256 else 0,
257 },
258 "token_summary": token_summary,
259 "model_token_stats": model_token_stats,
260 "phase_breakdown": phase_breakdown,
261 "context_limits": [
262 {"model": model, "limit": limit, "count": count}
263 for model, limit, count in context_limits
264 ],
265 "model_stats": [
266 {
267 "model": stat.model_name,
268 "provider": stat.model_provider,
269 "total_requests": stat.total_requests,
270 "truncated_count": int(stat.truncated_count or 0),
271 "truncation_rate": round(
272 (stat.truncated_count or 0)
273 / stat.total_requests
274 * 100,
275 2,
276 )
277 if stat.total_requests > 0
278 else 0,
279 "avg_context_limit": round(stat.avg_context_limit, 0)
280 if stat.avg_context_limit
281 else None,
282 }
283 for stat in model_stats
284 ],
285 "recent_truncated": [
286 {
287 "timestamp": req.timestamp.isoformat(),
288 "research_id": req.research_id,
289 "model": req.model_name,
290 "prompt_tokens": req.prompt_tokens, # Standard token count
291 "ollama_tokens": req.ollama_prompt_eval_count, # What Ollama actually used
292 "original_tokens": (
293 req.ollama_prompt_eval_count or req.prompt_tokens
294 )
295 + (req.tokens_truncated or 0), # What was requested
296 "context_limit": req.context_limit,
297 "tokens_truncated": req.tokens_truncated,
298 "truncation_ratio": req.truncation_ratio,
299 "research_query": req.research_query,
300 }
301 for req in recent_truncated
302 ],
303 "chart_data": chart_data,
304 "all_requests": [
305 {
306 "timestamp": req.timestamp.isoformat(),
307 "research_id": req.research_id,
308 "model": req.model_name,
309 "provider": req.model_provider,
310 "prompt_tokens": req.prompt_tokens,
311 "completion_tokens": req.completion_tokens,
312 "total_tokens": req.total_tokens,
313 "context_limit": req.context_limit,
314 "context_truncated": bool(req.context_truncated),
315 "tokens_truncated": req.tokens_truncated or 0,
316 "truncation_ratio": round(req.truncation_ratio * 100, 2)
317 if req.truncation_ratio
318 else 0,
319 "ollama_prompt_eval_count": req.ollama_prompt_eval_count,
320 "research_query": req.research_query,
321 "research_phase": req.research_phase,
322 }
323 for req in all_requests_data
324 ],
325 "pagination": {
326 "page": page,
327 "per_page": per_page,
328 "total_count": all_requests_total,
329 "total_pages": all_requests_pages,
330 },
331 "current_context_window": SettingsManager(session).get_setting(
332 "llm.local_context_window_size"
333 ),
334 }
336 return jsonify(response)
338 except Exception:
339 logger.exception("Error getting context overflow metrics")
340 return jsonify(
341 {
342 "status": "error",
343 "message": "Failed to load context overflow metrics",
344 }
345 ), 500
348@context_overflow_bp.route(
349 "/api/research/<string:research_id>/context-overflow", methods=["GET"]
350)
351@login_required
352def get_research_context_overflow(research_id):
353 """Get context overflow metrics for a specific research."""
354 try:
355 with get_user_db_session() as session:
356 # Get all token usage for this research
357 token_usage = (
358 session.query(TokenUsage)
359 .filter(TokenUsage.research_id == research_id)
360 .order_by(TokenUsage.timestamp)
361 .all()
362 )
364 if not token_usage:
365 return jsonify(
366 {
367 "status": "success",
368 "data": {
369 "overview": {
370 "total_requests": 0,
371 "total_tokens": 0,
372 "context_limit": None,
373 "max_tokens_used": 0,
374 "truncation_occurred": False,
375 },
376 "requests": [],
377 },
378 }
379 )
381 # Calculate overview metrics
382 total_tokens = sum(req.total_tokens or 0 for req in token_usage)
383 total_prompt = sum(req.prompt_tokens or 0 for req in token_usage)
384 total_completion = sum(
385 req.completion_tokens or 0 for req in token_usage
386 )
388 # Get context limit (should be same for all requests in a research)
389 context_limit = next(
390 (req.context_limit for req in token_usage if req.context_limit),
391 None,
392 )
394 # Check for truncation
395 truncated_requests = [
396 req for req in token_usage if req.context_truncated
397 ]
398 max_tokens_used = max(
399 (req.prompt_tokens or 0) for req in token_usage
400 )
402 # Get token usage by phase
403 phase_stats = {}
404 for req in token_usage:
405 phase = req.research_phase or "unknown"
406 if phase not in phase_stats:
407 phase_stats[phase] = {
408 "count": 0,
409 "prompt_tokens": 0,
410 "completion_tokens": 0,
411 "total_tokens": 0,
412 "truncated_count": 0,
413 }
414 phase_stats[phase]["count"] += 1
415 phase_stats[phase]["prompt_tokens"] += req.prompt_tokens or 0
416 phase_stats[phase]["completion_tokens"] += (
417 req.completion_tokens or 0
418 )
419 phase_stats[phase]["total_tokens"] += req.total_tokens or 0
420 if req.context_truncated:
421 phase_stats[phase]["truncated_count"] += 1
423 # Format requests for response
424 requests_data = []
425 for req in token_usage:
426 requests_data.append(
427 {
428 "timestamp": req.timestamp.isoformat(),
429 "phase": req.research_phase,
430 "prompt_tokens": req.prompt_tokens,
431 "completion_tokens": req.completion_tokens,
432 "total_tokens": req.total_tokens,
433 "context_limit": req.context_limit,
434 "context_truncated": bool(req.context_truncated),
435 "tokens_truncated": req.tokens_truncated or 0,
436 "ollama_prompt_eval_count": req.ollama_prompt_eval_count,
437 "calling_function": req.calling_function,
438 "response_time_ms": req.response_time_ms,
439 }
440 )
442 response = {
443 "status": "success",
444 "data": {
445 "overview": {
446 "total_requests": len(token_usage),
447 "total_tokens": total_tokens,
448 "total_prompt_tokens": total_prompt,
449 "total_completion_tokens": total_completion,
450 "context_limit": context_limit,
451 "max_tokens_used": max_tokens_used,
452 "truncation_occurred": len(truncated_requests) > 0,
453 "truncated_count": len(truncated_requests),
454 "tokens_lost": sum(
455 req.tokens_truncated or 0
456 for req in truncated_requests
457 ),
458 },
459 "phase_stats": phase_stats,
460 "requests": requests_data,
461 "model": token_usage[0].model_name if token_usage else None,
462 "provider": token_usage[0].model_provider
463 if token_usage
464 else None,
465 },
466 }
468 return jsonify(response)
470 except Exception:
471 logger.exception("Error getting research context overflow")
472 return jsonify(
473 {
474 "status": "error",
475 "message": "Failed to load context overflow data",
476 }
477 ), 500