Coverage for src / local_deep_research / web / routes / context_overflow_api.py: 10%
101 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""API endpoints for context overflow analytics."""
3from flask import Blueprint, jsonify, request, session as flask_session
4from datetime import datetime, timedelta, timezone
5from sqlalchemy import func, desc
6from loguru import logger
8from ...database.session_context import get_user_db_session
9from ...database.models import TokenUsage
10from ..auth.decorators import login_required
12context_overflow_bp = Blueprint("context_overflow_api", __name__)
15@context_overflow_bp.route("/api/context-overflow", methods=["GET"])
16@login_required
17def get_context_overflow_metrics():
18 """Get context overflow metrics for the current user."""
19 try:
20 # Get username from session
21 username = flask_session.get("username")
22 if not username:
23 return jsonify(
24 {"status": "error", "message": "User not authenticated"}
25 ), 401
27 # Get time period from query params
28 period = request.args.get("period", "30d")
30 # Calculate date filter (use timezone-aware datetime)
31 start_date = None
32 if period != "all":
33 now = datetime.now(timezone.utc)
34 if period == "7d":
35 start_date = now - timedelta(days=7)
36 elif period == "30d":
37 start_date = now - timedelta(days=30)
38 elif period == "3m":
39 start_date = now - timedelta(days=90)
40 elif period == "1y":
41 start_date = now - timedelta(days=365)
43 with get_user_db_session(username) as session:
44 # Base query
45 query = session.query(TokenUsage)
47 if start_date:
48 query = query.filter(TokenUsage.timestamp >= start_date)
50 # Get overview statistics
51 total_requests = query.count()
53 # Requests with context data
54 requests_with_context = query.filter(
55 TokenUsage.context_limit.isnot(None)
56 ).count()
58 # Truncated requests
59 truncated_requests = query.filter(
60 TokenUsage.context_truncated.is_(True)
61 ).count()
63 # Calculate truncation rate
64 truncation_rate = 0
65 if requests_with_context > 0:
66 truncation_rate = (
67 truncated_requests / requests_with_context
68 ) * 100
70 # Get average tokens truncated
71 avg_tokens_truncated = session.query(
72 func.avg(TokenUsage.tokens_truncated)
73 ).filter(TokenUsage.context_truncated.is_(True))
75 if start_date:
76 avg_tokens_truncated = avg_tokens_truncated.filter(
77 TokenUsage.timestamp >= start_date
78 )
80 avg_tokens_truncated = avg_tokens_truncated.scalar() or 0
82 # Get context limit distribution by model
83 context_limits = session.query(
84 TokenUsage.model_name,
85 TokenUsage.context_limit,
86 func.count(TokenUsage.id).label("count"),
87 ).filter(TokenUsage.context_limit.isnot(None))
89 if start_date:
90 context_limits = context_limits.filter(
91 TokenUsage.timestamp >= start_date
92 )
94 context_limits = context_limits.group_by(
95 TokenUsage.model_name, TokenUsage.context_limit
96 ).all()
98 # Get recent truncated requests
99 recent_truncated = (
100 query.filter(TokenUsage.context_truncated.is_(True))
101 .order_by(desc(TokenUsage.timestamp))
102 .limit(20)
103 .all()
104 )
106 # Get time series data for chart - include all records
107 # (even those without context_limit for OpenRouter models)
108 time_series_query = query.order_by(TokenUsage.timestamp)
110 if start_date:
111 # For shorter periods, get all data points
112 if period in ["7d", "30d"]:
113 time_series_data = time_series_query.all()
114 else:
115 # For longer periods, sample data
116 time_series_data = time_series_query.limit(500).all()
117 else:
118 time_series_data = time_series_query.limit(1000).all()
120 # Format time series for chart
121 chart_data = []
122 for usage in time_series_data:
123 # Calculate original tokens (before truncation)
124 ollama_used = (
125 usage.ollama_prompt_eval_count
126 ) # What Ollama actually processed
127 actual_prompt = ollama_used or usage.prompt_tokens
128 tokens_truncated = usage.tokens_truncated or 0
129 original_tokens = (
130 actual_prompt + tokens_truncated
131 if usage.context_truncated
132 else actual_prompt
133 )
135 chart_data.append(
136 {
137 "timestamp": usage.timestamp.isoformat(),
138 "research_id": usage.research_id,
139 "prompt_tokens": usage.prompt_tokens, # From our standard token counting
140 "ollama_prompt_tokens": ollama_used, # What Ollama actually used (may be capped)
141 "original_prompt_tokens": original_tokens, # What was originally requested (before truncation)
142 "context_limit": usage.context_limit,
143 "truncated": bool(usage.context_truncated),
144 "tokens_truncated": tokens_truncated,
145 "model": usage.model_name,
146 }
147 )
149 # Get model-specific truncation stats
150 model_stats = session.query(
151 TokenUsage.model_name,
152 TokenUsage.model_provider,
153 func.count(TokenUsage.id).label("total_requests"),
154 func.sum(TokenUsage.context_truncated).label("truncated_count"),
155 func.avg(TokenUsage.context_limit).label("avg_context_limit"),
156 ).filter(TokenUsage.context_limit.isnot(None))
158 if start_date:
159 model_stats = model_stats.filter(
160 TokenUsage.timestamp >= start_date
161 )
163 model_stats = model_stats.group_by(
164 TokenUsage.model_name, TokenUsage.model_provider
165 ).all()
167 # Format response
168 response = {
169 "status": "success",
170 "overview": {
171 "total_requests": total_requests,
172 "requests_with_context_data": requests_with_context,
173 "truncated_requests": truncated_requests,
174 "truncation_rate": round(truncation_rate, 2),
175 "avg_tokens_truncated": round(avg_tokens_truncated, 0)
176 if avg_tokens_truncated
177 else 0,
178 },
179 "context_limits": [
180 {"model": model, "limit": limit, "count": count}
181 for model, limit, count in context_limits
182 ],
183 "model_stats": [
184 {
185 "model": stat.model_name,
186 "provider": stat.model_provider,
187 "total_requests": stat.total_requests,
188 "truncated_count": int(stat.truncated_count or 0),
189 "truncation_rate": round(
190 (stat.truncated_count or 0)
191 / stat.total_requests
192 * 100,
193 2,
194 )
195 if stat.total_requests > 0
196 else 0,
197 "avg_context_limit": round(stat.avg_context_limit, 0)
198 if stat.avg_context_limit
199 else None,
200 }
201 for stat in model_stats
202 ],
203 "recent_truncated": [
204 {
205 "timestamp": req.timestamp.isoformat(),
206 "research_id": req.research_id,
207 "model": req.model_name,
208 "prompt_tokens": req.prompt_tokens, # Standard token count
209 "ollama_tokens": req.ollama_prompt_eval_count, # What Ollama actually used
210 "original_tokens": (
211 req.ollama_prompt_eval_count or req.prompt_tokens
212 )
213 + (req.tokens_truncated or 0), # What was requested
214 "context_limit": req.context_limit,
215 "tokens_truncated": req.tokens_truncated,
216 "truncation_ratio": req.truncation_ratio,
217 "research_query": req.research_query,
218 }
219 for req in recent_truncated
220 ],
221 "chart_data": chart_data,
222 # Add detailed table data for all requests
223 "all_requests": [
224 {
225 "timestamp": req.timestamp.isoformat(),
226 "research_id": req.research_id,
227 "model": req.model_name,
228 "provider": req.model_provider,
229 "prompt_tokens": req.prompt_tokens,
230 "completion_tokens": req.completion_tokens,
231 "total_tokens": req.total_tokens,
232 "context_limit": req.context_limit,
233 "context_truncated": bool(req.context_truncated),
234 "tokens_truncated": req.tokens_truncated or 0,
235 "truncation_ratio": round(req.truncation_ratio * 100, 2)
236 if req.truncation_ratio
237 else 0,
238 "ollama_prompt_eval_count": req.ollama_prompt_eval_count,
239 "research_query": req.research_query,
240 "research_phase": req.research_phase,
241 }
242 for req in query.order_by(desc(TokenUsage.timestamp))
243 .limit(100)
244 .all()
245 ],
246 }
248 return jsonify(response)
250 except Exception:
251 logger.exception("Error getting context overflow metrics")
252 return jsonify(
253 {
254 "status": "error",
255 "message": "Failed to load context overflow metrics",
256 }
257 ), 500
260@context_overflow_bp.route(
261 "/api/research/<string:research_id>/context-overflow", methods=["GET"]
262)
263@login_required
264def get_research_context_overflow(research_id):
265 """Get context overflow metrics for a specific research."""
266 try:
267 with get_user_db_session() as session:
268 # Get all token usage for this research
269 token_usage = (
270 session.query(TokenUsage)
271 .filter(TokenUsage.research_id == research_id)
272 .order_by(TokenUsage.timestamp)
273 .all()
274 )
276 if not token_usage:
277 return jsonify(
278 {
279 "status": "success",
280 "data": {
281 "overview": {
282 "total_requests": 0,
283 "total_tokens": 0,
284 "context_limit": None,
285 "max_tokens_used": 0,
286 "truncation_occurred": False,
287 },
288 "requests": [],
289 },
290 }
291 )
293 # Calculate overview metrics
294 total_tokens = sum(req.total_tokens or 0 for req in token_usage)
295 total_prompt = sum(req.prompt_tokens or 0 for req in token_usage)
296 total_completion = sum(
297 req.completion_tokens or 0 for req in token_usage
298 )
300 # Get context limit (should be same for all requests in a research)
301 context_limit = next(
302 (req.context_limit for req in token_usage if req.context_limit),
303 None,
304 )
306 # Check for truncation
307 truncated_requests = [
308 req for req in token_usage if req.context_truncated
309 ]
310 max_tokens_used = max(
311 (req.prompt_tokens or 0) for req in token_usage
312 )
314 # Get token usage by phase
315 phase_stats = {}
316 for req in token_usage:
317 phase = req.research_phase or "unknown"
318 if phase not in phase_stats:
319 phase_stats[phase] = {
320 "count": 0,
321 "prompt_tokens": 0,
322 "completion_tokens": 0,
323 "total_tokens": 0,
324 "truncated_count": 0,
325 }
326 phase_stats[phase]["count"] += 1
327 phase_stats[phase]["prompt_tokens"] += req.prompt_tokens or 0
328 phase_stats[phase]["completion_tokens"] += (
329 req.completion_tokens or 0
330 )
331 phase_stats[phase]["total_tokens"] += req.total_tokens or 0
332 if req.context_truncated:
333 phase_stats[phase]["truncated_count"] += 1
335 # Format requests for response
336 requests_data = []
337 for req in token_usage:
338 requests_data.append(
339 {
340 "timestamp": req.timestamp.isoformat(),
341 "phase": req.research_phase,
342 "prompt_tokens": req.prompt_tokens,
343 "completion_tokens": req.completion_tokens,
344 "total_tokens": req.total_tokens,
345 "context_limit": req.context_limit,
346 "context_truncated": bool(req.context_truncated),
347 "tokens_truncated": req.tokens_truncated or 0,
348 "ollama_prompt_eval_count": req.ollama_prompt_eval_count,
349 "calling_function": req.calling_function,
350 "response_time_ms": req.response_time_ms,
351 }
352 )
354 response = {
355 "status": "success",
356 "data": {
357 "overview": {
358 "total_requests": len(token_usage),
359 "total_tokens": total_tokens,
360 "total_prompt_tokens": total_prompt,
361 "total_completion_tokens": total_completion,
362 "context_limit": context_limit,
363 "max_tokens_used": max_tokens_used,
364 "truncation_occurred": len(truncated_requests) > 0,
365 "truncated_count": len(truncated_requests),
366 "tokens_lost": sum(
367 req.tokens_truncated or 0
368 for req in truncated_requests
369 ),
370 },
371 "phase_stats": phase_stats,
372 "requests": requests_data,
373 "model": token_usage[0].model_name if token_usage else None,
374 "provider": token_usage[0].model_provider
375 if token_usage
376 else None,
377 },
378 }
380 return jsonify(response)
382 except Exception:
383 logger.exception("Error getting research context overflow")
384 return jsonify(
385 {
386 "status": "error",
387 "message": "Failed to load context overflow data",
388 }
389 ), 500