Coverage for src / local_deep_research / web / routes / context_overflow_api.py: 10%

101 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1"""API endpoints for context overflow analytics.""" 

2 

3from flask import Blueprint, jsonify, request, session as flask_session 

4from datetime import datetime, timedelta, timezone 

5from sqlalchemy import func, desc 

6from loguru import logger 

7 

8from ...database.session_context import get_user_db_session 

9from ...database.models import TokenUsage 

10from ..auth.decorators import login_required 

11 

12context_overflow_bp = Blueprint("context_overflow_api", __name__) 

13 

14 

15@context_overflow_bp.route("/api/context-overflow", methods=["GET"]) 

16@login_required 

17def get_context_overflow_metrics(): 

18 """Get context overflow metrics for the current user.""" 

19 try: 

20 # Get username from session 

21 username = flask_session.get("username") 

22 if not username: 

23 return jsonify( 

24 {"status": "error", "message": "User not authenticated"} 

25 ), 401 

26 

27 # Get time period from query params 

28 period = request.args.get("period", "30d") 

29 

30 # Calculate date filter (use timezone-aware datetime) 

31 start_date = None 

32 if period != "all": 

33 now = datetime.now(timezone.utc) 

34 if period == "7d": 

35 start_date = now - timedelta(days=7) 

36 elif period == "30d": 

37 start_date = now - timedelta(days=30) 

38 elif period == "3m": 

39 start_date = now - timedelta(days=90) 

40 elif period == "1y": 

41 start_date = now - timedelta(days=365) 

42 

43 with get_user_db_session(username) as session: 

44 # Base query 

45 query = session.query(TokenUsage) 

46 

47 if start_date: 

48 query = query.filter(TokenUsage.timestamp >= start_date) 

49 

50 # Get overview statistics 

51 total_requests = query.count() 

52 

53 # Requests with context data 

54 requests_with_context = query.filter( 

55 TokenUsage.context_limit.isnot(None) 

56 ).count() 

57 

58 # Truncated requests 

59 truncated_requests = query.filter( 

60 TokenUsage.context_truncated.is_(True) 

61 ).count() 

62 

63 # Calculate truncation rate 

64 truncation_rate = 0 

65 if requests_with_context > 0: 

66 truncation_rate = ( 

67 truncated_requests / requests_with_context 

68 ) * 100 

69 

70 # Get average tokens truncated 

71 avg_tokens_truncated = session.query( 

72 func.avg(TokenUsage.tokens_truncated) 

73 ).filter(TokenUsage.context_truncated.is_(True)) 

74 

75 if start_date: 

76 avg_tokens_truncated = avg_tokens_truncated.filter( 

77 TokenUsage.timestamp >= start_date 

78 ) 

79 

80 avg_tokens_truncated = avg_tokens_truncated.scalar() or 0 

81 

82 # Get context limit distribution by model 

83 context_limits = session.query( 

84 TokenUsage.model_name, 

85 TokenUsage.context_limit, 

86 func.count(TokenUsage.id).label("count"), 

87 ).filter(TokenUsage.context_limit.isnot(None)) 

88 

89 if start_date: 

90 context_limits = context_limits.filter( 

91 TokenUsage.timestamp >= start_date 

92 ) 

93 

94 context_limits = context_limits.group_by( 

95 TokenUsage.model_name, TokenUsage.context_limit 

96 ).all() 

97 

98 # Get recent truncated requests 

99 recent_truncated = ( 

100 query.filter(TokenUsage.context_truncated.is_(True)) 

101 .order_by(desc(TokenUsage.timestamp)) 

102 .limit(20) 

103 .all() 

104 ) 

105 

106 # Get time series data for chart - include all records 

107 # (even those without context_limit for OpenRouter models) 

108 time_series_query = query.order_by(TokenUsage.timestamp) 

109 

110 if start_date: 

111 # For shorter periods, get all data points 

112 if period in ["7d", "30d"]: 

113 time_series_data = time_series_query.all() 

114 else: 

115 # For longer periods, sample data 

116 time_series_data = time_series_query.limit(500).all() 

117 else: 

118 time_series_data = time_series_query.limit(1000).all() 

119 

120 # Format time series for chart 

121 chart_data = [] 

122 for usage in time_series_data: 

123 # Calculate original tokens (before truncation) 

124 ollama_used = ( 

125 usage.ollama_prompt_eval_count 

126 ) # What Ollama actually processed 

127 actual_prompt = ollama_used or usage.prompt_tokens 

128 tokens_truncated = usage.tokens_truncated or 0 

129 original_tokens = ( 

130 actual_prompt + tokens_truncated 

131 if usage.context_truncated 

132 else actual_prompt 

133 ) 

134 

135 chart_data.append( 

136 { 

137 "timestamp": usage.timestamp.isoformat(), 

138 "research_id": usage.research_id, 

139 "prompt_tokens": usage.prompt_tokens, # From our standard token counting 

140 "ollama_prompt_tokens": ollama_used, # What Ollama actually used (may be capped) 

141 "original_prompt_tokens": original_tokens, # What was originally requested (before truncation) 

142 "context_limit": usage.context_limit, 

143 "truncated": bool(usage.context_truncated), 

144 "tokens_truncated": tokens_truncated, 

145 "model": usage.model_name, 

146 } 

147 ) 

148 

149 # Get model-specific truncation stats 

150 model_stats = session.query( 

151 TokenUsage.model_name, 

152 TokenUsage.model_provider, 

153 func.count(TokenUsage.id).label("total_requests"), 

154 func.sum(TokenUsage.context_truncated).label("truncated_count"), 

155 func.avg(TokenUsage.context_limit).label("avg_context_limit"), 

156 ).filter(TokenUsage.context_limit.isnot(None)) 

157 

158 if start_date: 

159 model_stats = model_stats.filter( 

160 TokenUsage.timestamp >= start_date 

161 ) 

162 

163 model_stats = model_stats.group_by( 

164 TokenUsage.model_name, TokenUsage.model_provider 

165 ).all() 

166 

167 # Format response 

168 response = { 

169 "status": "success", 

170 "overview": { 

171 "total_requests": total_requests, 

172 "requests_with_context_data": requests_with_context, 

173 "truncated_requests": truncated_requests, 

174 "truncation_rate": round(truncation_rate, 2), 

175 "avg_tokens_truncated": round(avg_tokens_truncated, 0) 

176 if avg_tokens_truncated 

177 else 0, 

178 }, 

179 "context_limits": [ 

180 {"model": model, "limit": limit, "count": count} 

181 for model, limit, count in context_limits 

182 ], 

183 "model_stats": [ 

184 { 

185 "model": stat.model_name, 

186 "provider": stat.model_provider, 

187 "total_requests": stat.total_requests, 

188 "truncated_count": int(stat.truncated_count or 0), 

189 "truncation_rate": round( 

190 (stat.truncated_count or 0) 

191 / stat.total_requests 

192 * 100, 

193 2, 

194 ) 

195 if stat.total_requests > 0 

196 else 0, 

197 "avg_context_limit": round(stat.avg_context_limit, 0) 

198 if stat.avg_context_limit 

199 else None, 

200 } 

201 for stat in model_stats 

202 ], 

203 "recent_truncated": [ 

204 { 

205 "timestamp": req.timestamp.isoformat(), 

206 "research_id": req.research_id, 

207 "model": req.model_name, 

208 "prompt_tokens": req.prompt_tokens, # Standard token count 

209 "ollama_tokens": req.ollama_prompt_eval_count, # What Ollama actually used 

210 "original_tokens": ( 

211 req.ollama_prompt_eval_count or req.prompt_tokens 

212 ) 

213 + (req.tokens_truncated or 0), # What was requested 

214 "context_limit": req.context_limit, 

215 "tokens_truncated": req.tokens_truncated, 

216 "truncation_ratio": req.truncation_ratio, 

217 "research_query": req.research_query, 

218 } 

219 for req in recent_truncated 

220 ], 

221 "chart_data": chart_data, 

222 # Add detailed table data for all requests 

223 "all_requests": [ 

224 { 

225 "timestamp": req.timestamp.isoformat(), 

226 "research_id": req.research_id, 

227 "model": req.model_name, 

228 "provider": req.model_provider, 

229 "prompt_tokens": req.prompt_tokens, 

230 "completion_tokens": req.completion_tokens, 

231 "total_tokens": req.total_tokens, 

232 "context_limit": req.context_limit, 

233 "context_truncated": bool(req.context_truncated), 

234 "tokens_truncated": req.tokens_truncated or 0, 

235 "truncation_ratio": round(req.truncation_ratio * 100, 2) 

236 if req.truncation_ratio 

237 else 0, 

238 "ollama_prompt_eval_count": req.ollama_prompt_eval_count, 

239 "research_query": req.research_query, 

240 "research_phase": req.research_phase, 

241 } 

242 for req in query.order_by(desc(TokenUsage.timestamp)) 

243 .limit(100) 

244 .all() 

245 ], 

246 } 

247 

248 return jsonify(response) 

249 

250 except Exception: 

251 logger.exception("Error getting context overflow metrics") 

252 return jsonify( 

253 { 

254 "status": "error", 

255 "message": "Failed to load context overflow metrics", 

256 } 

257 ), 500 

258 

259 

260@context_overflow_bp.route( 

261 "/api/research/<string:research_id>/context-overflow", methods=["GET"] 

262) 

263@login_required 

264def get_research_context_overflow(research_id): 

265 """Get context overflow metrics for a specific research.""" 

266 try: 

267 with get_user_db_session() as session: 

268 # Get all token usage for this research 

269 token_usage = ( 

270 session.query(TokenUsage) 

271 .filter(TokenUsage.research_id == research_id) 

272 .order_by(TokenUsage.timestamp) 

273 .all() 

274 ) 

275 

276 if not token_usage: 

277 return jsonify( 

278 { 

279 "status": "success", 

280 "data": { 

281 "overview": { 

282 "total_requests": 0, 

283 "total_tokens": 0, 

284 "context_limit": None, 

285 "max_tokens_used": 0, 

286 "truncation_occurred": False, 

287 }, 

288 "requests": [], 

289 }, 

290 } 

291 ) 

292 

293 # Calculate overview metrics 

294 total_tokens = sum(req.total_tokens or 0 for req in token_usage) 

295 total_prompt = sum(req.prompt_tokens or 0 for req in token_usage) 

296 total_completion = sum( 

297 req.completion_tokens or 0 for req in token_usage 

298 ) 

299 

300 # Get context limit (should be same for all requests in a research) 

301 context_limit = next( 

302 (req.context_limit for req in token_usage if req.context_limit), 

303 None, 

304 ) 

305 

306 # Check for truncation 

307 truncated_requests = [ 

308 req for req in token_usage if req.context_truncated 

309 ] 

310 max_tokens_used = max( 

311 (req.prompt_tokens or 0) for req in token_usage 

312 ) 

313 

314 # Get token usage by phase 

315 phase_stats = {} 

316 for req in token_usage: 

317 phase = req.research_phase or "unknown" 

318 if phase not in phase_stats: 

319 phase_stats[phase] = { 

320 "count": 0, 

321 "prompt_tokens": 0, 

322 "completion_tokens": 0, 

323 "total_tokens": 0, 

324 "truncated_count": 0, 

325 } 

326 phase_stats[phase]["count"] += 1 

327 phase_stats[phase]["prompt_tokens"] += req.prompt_tokens or 0 

328 phase_stats[phase]["completion_tokens"] += ( 

329 req.completion_tokens or 0 

330 ) 

331 phase_stats[phase]["total_tokens"] += req.total_tokens or 0 

332 if req.context_truncated: 

333 phase_stats[phase]["truncated_count"] += 1 

334 

335 # Format requests for response 

336 requests_data = [] 

337 for req in token_usage: 

338 requests_data.append( 

339 { 

340 "timestamp": req.timestamp.isoformat(), 

341 "phase": req.research_phase, 

342 "prompt_tokens": req.prompt_tokens, 

343 "completion_tokens": req.completion_tokens, 

344 "total_tokens": req.total_tokens, 

345 "context_limit": req.context_limit, 

346 "context_truncated": bool(req.context_truncated), 

347 "tokens_truncated": req.tokens_truncated or 0, 

348 "ollama_prompt_eval_count": req.ollama_prompt_eval_count, 

349 "calling_function": req.calling_function, 

350 "response_time_ms": req.response_time_ms, 

351 } 

352 ) 

353 

354 response = { 

355 "status": "success", 

356 "data": { 

357 "overview": { 

358 "total_requests": len(token_usage), 

359 "total_tokens": total_tokens, 

360 "total_prompt_tokens": total_prompt, 

361 "total_completion_tokens": total_completion, 

362 "context_limit": context_limit, 

363 "max_tokens_used": max_tokens_used, 

364 "truncation_occurred": len(truncated_requests) > 0, 

365 "truncated_count": len(truncated_requests), 

366 "tokens_lost": sum( 

367 req.tokens_truncated or 0 

368 for req in truncated_requests 

369 ), 

370 }, 

371 "phase_stats": phase_stats, 

372 "requests": requests_data, 

373 "model": token_usage[0].model_name if token_usage else None, 

374 "provider": token_usage[0].model_provider 

375 if token_usage 

376 else None, 

377 }, 

378 } 

379 

380 return jsonify(response) 

381 

382 except Exception: 

383 logger.exception("Error getting research context overflow") 

384 return jsonify( 

385 { 

386 "status": "error", 

387 "message": "Failed to load context overflow data", 

388 } 

389 ), 500