Coverage for src / local_deep_research / web / routes / metrics_routes.py: 47%
687 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""Routes for metrics dashboard."""
3from datetime import datetime, timedelta, UTC
5from flask import Blueprint, jsonify, request, session as flask_session
6from loguru import logger
7from sqlalchemy import case, func
9from ...database.models import (
10 RateLimitAttempt,
11 RateLimitEstimate,
12 Research,
13 ResearchRating,
14 ResearchResource,
15 ResearchStrategy,
16 TokenUsage,
17)
18from ...domain_classifier import DomainClassifier, DomainClassification
19from ...database.session_context import get_user_db_session
20from ...metrics import TokenCounter
21from ...metrics.query_utils import get_time_filter_condition
22from ...metrics.search_tracker import get_search_tracker
23from ...web_search_engines.rate_limiting import get_tracker
24from ..auth.decorators import login_required
25from ..utils.templates import render_template_with_defaults
27# Create a Blueprint for metrics
28metrics_bp = Blueprint("metrics", __name__, url_prefix="/metrics")
31def get_rating_analytics(period="30d", research_mode="all", username=None):
32 """Get rating analytics for the specified period and research mode."""
33 try:
34 if not username: 34 ↛ 35line 34 didn't jump to line 35 because the condition on line 34 was never true
35 username = flask_session.get("username")
37 if not username: 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true
38 return {
39 "rating_analytics": {
40 "avg_rating": None,
41 "total_ratings": 0,
42 "rating_distribution": {},
43 "satisfaction_stats": {
44 "very_satisfied": 0,
45 "satisfied": 0,
46 "neutral": 0,
47 "dissatisfied": 0,
48 "very_dissatisfied": 0,
49 },
50 "error": "No user session",
51 }
52 }
54 # Calculate date range
55 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None}
56 days = days_map.get(period, 30)
58 with get_user_db_session(username) as session:
59 query = session.query(ResearchRating)
61 # Apply time filter
62 if days: 62 ↛ 67line 62 didn't jump to line 67 because the condition on line 62 was always true
63 cutoff_date = datetime.now(UTC) - timedelta(days=days)
64 query = query.filter(ResearchRating.created_at >= cutoff_date)
66 # Get all ratings
67 ratings = query.all()
69 if not ratings: 69 ↛ 86line 69 didn't jump to line 86 because the condition on line 69 was always true
70 return {
71 "rating_analytics": {
72 "avg_rating": None,
73 "total_ratings": 0,
74 "rating_distribution": {},
75 "satisfaction_stats": {
76 "very_satisfied": 0,
77 "satisfied": 0,
78 "neutral": 0,
79 "dissatisfied": 0,
80 "very_dissatisfied": 0,
81 },
82 }
83 }
85 # Calculate statistics
86 rating_values = [r.rating for r in ratings]
87 avg_rating = sum(rating_values) / len(rating_values)
89 # Rating distribution
90 rating_counts = {}
91 for i in range(1, 6):
92 rating_counts[str(i)] = rating_values.count(i)
94 # Satisfaction categories
95 satisfaction_stats = {
96 "very_satisfied": rating_values.count(5),
97 "satisfied": rating_values.count(4),
98 "neutral": rating_values.count(3),
99 "dissatisfied": rating_values.count(2),
100 "very_dissatisfied": rating_values.count(1),
101 }
103 return {
104 "rating_analytics": {
105 "avg_rating": round(avg_rating, 1),
106 "total_ratings": len(ratings),
107 "rating_distribution": rating_counts,
108 "satisfaction_stats": satisfaction_stats,
109 }
110 }
112 except Exception:
113 logger.exception("Error getting rating analytics")
114 return {
115 "rating_analytics": {
116 "avg_rating": None,
117 "total_ratings": 0,
118 "rating_distribution": {},
119 "satisfaction_stats": {
120 "very_satisfied": 0,
121 "satisfied": 0,
122 "neutral": 0,
123 "dissatisfied": 0,
124 "very_dissatisfied": 0,
125 },
126 }
127 }
130def get_link_analytics(period="30d", username=None):
131 """Get link analytics from research resources."""
132 try:
133 if not username:
134 username = flask_session.get("username")
136 if not username: 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true
137 return {
138 "link_analytics": {
139 "top_domains": [],
140 "total_unique_domains": 0,
141 "avg_links_per_research": 0,
142 "domain_distribution": {},
143 "source_type_analysis": {},
144 "academic_vs_general": {},
145 "total_links": 0,
146 "error": "No user session",
147 }
148 }
150 # Calculate date range
151 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None}
152 days = days_map.get(period, 30)
154 with get_user_db_session(username) as session:
155 # Base query
156 query = session.query(ResearchResource)
158 # Apply time filter
159 if days: 159 ↛ 166line 159 didn't jump to line 166 because the condition on line 159 was always true
160 cutoff_date = datetime.now(UTC) - timedelta(days=days)
161 query = query.filter(
162 ResearchResource.created_at >= cutoff_date.isoformat()
163 )
165 # Get all resources
166 resources = query.all()
168 if not resources: 168 ↛ 169line 168 didn't jump to line 169 because the condition on line 168 was never true
169 return {
170 "link_analytics": {
171 "top_domains": [],
172 "total_unique_domains": 0,
173 "avg_links_per_research": 0,
174 "domain_distribution": {},
175 "source_type_analysis": {},
176 "academic_vs_general": {},
177 "total_links": 0,
178 }
179 }
181 # Extract domains from URLs
182 from urllib.parse import urlparse
183 from ...domain_classifier.classifier import DomainClassifier
185 domain_counts = {}
186 domain_researches = {} # Track which researches used each domain
187 source_types = {}
188 temporal_data = {} # Track links over time
189 domain_connections = {} # Track domain co-occurrences
191 # Generic category counting from LLM classifications
192 category_counts = {}
194 # Initialize domain classifier for LLM-based categorization
195 domain_classifier = DomainClassifier(username=username)
196 quality_metrics = {
197 "with_title": 0,
198 "with_preview": 0,
199 "with_both": 0,
200 "total": 0,
201 }
203 for resource in resources:
204 if resource.url: 204 ↛ 203line 204 didn't jump to line 203 because the condition on line 204 was always true
205 try:
206 parsed = urlparse(resource.url)
207 domain = parsed.netloc.lower()
208 # Remove www. prefix
209 if domain.startswith("www."): 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true
210 domain = domain[4:]
212 # Count domains
213 domain_counts[domain] = domain_counts.get(domain, 0) + 1
215 # Track research IDs for each domain
216 if domain not in domain_researches: 216 ↛ 218line 216 didn't jump to line 218 because the condition on line 216 was always true
217 domain_researches[domain] = set()
218 domain_researches[domain].add(resource.research_id)
220 # Track temporal data (daily counts)
221 if resource.created_at: 221 ↛ 230line 221 didn't jump to line 230 because the condition on line 221 was always true
222 date_str = resource.created_at[
223 :10
224 ] # Extract YYYY-MM-DD
225 temporal_data[date_str] = (
226 temporal_data.get(date_str, 0) + 1
227 )
229 # Count categories from LLM classification
230 classification = domain_classifier.get_classification(
231 domain
232 )
233 if classification: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true
234 category = classification.category
235 category_counts[category] = (
236 category_counts.get(category, 0) + 1
237 )
238 else:
239 category_counts["Unclassified"] = (
240 category_counts.get("Unclassified", 0) + 1
241 )
243 # Track source type from metadata if available
244 if resource.source_type: 244 ↛ 250line 244 didn't jump to line 250 because the condition on line 244 was always true
245 source_types[resource.source_type] = (
246 source_types.get(resource.source_type, 0) + 1
247 )
249 # Track quality metrics
250 quality_metrics["total"] += 1
251 if resource.title: 251 ↛ 253line 251 didn't jump to line 253 because the condition on line 251 was always true
252 quality_metrics["with_title"] += 1
253 if resource.content_preview: 253 ↛ 255line 253 didn't jump to line 255 because the condition on line 253 was always true
254 quality_metrics["with_preview"] += 1
255 if resource.title and resource.content_preview: 255 ↛ 259line 255 didn't jump to line 259 because the condition on line 255 was always true
256 quality_metrics["with_both"] += 1
258 # Track domain co-occurrences for network visualization
259 research_id = resource.research_id
260 if research_id not in domain_connections:
261 domain_connections[research_id] = []
262 domain_connections[research_id].append(domain)
264 except Exception as e:
265 logger.warning(f"Error parsing URL {resource.url}: {e}")
267 # Sort domains by count and get top 10
268 sorted_domains = sorted(
269 domain_counts.items(), key=lambda x: x[1], reverse=True
270 )
271 top_10_domains = sorted_domains[:10]
273 # Calculate domain distribution (top domains vs others)
274 top_10_count = sum(count for _, count in top_10_domains)
275 others_count = len(resources) - top_10_count
277 # Get unique research IDs to calculate average
278 unique_research_ids = set(r.research_id for r in resources)
279 avg_links = (
280 len(resources) / len(unique_research_ids)
281 if unique_research_ids
282 else 0
283 )
285 # Prepare temporal trend data (sorted by date)
286 temporal_trend = sorted(
287 [
288 {"date": date, "count": count}
289 for date, count in temporal_data.items()
290 ],
291 key=lambda x: x["date"],
292 )
294 # Get most recent research for each top domain and classifications
295 domain_recent_research = {}
296 domain_classifications = {}
297 with get_user_db_session(username) as session:
298 from ...database.models import Research
300 # Get classifications for all domains
301 all_classifications = session.query(DomainClassification).all()
302 for classification in all_classifications:
303 domain_classifications[classification.domain] = {
304 "category": classification.category,
305 "subcategory": classification.subcategory,
306 "confidence": classification.confidence,
307 }
309 for domain, _ in top_10_domains:
310 if domain in domain_researches: 310 ↛ 309line 310 didn't jump to line 309 because the condition on line 310 was always true
311 research_ids = list(domain_researches[domain])[
312 :3
313 ] # Get up to 3 recent researches
314 researches = (
315 session.query(Research)
316 .filter(Research.id.in_(research_ids))
317 .all()
318 )
319 domain_recent_research[domain] = [
320 {
321 "id": r.id,
322 "query": r.query[:50]
323 if r.query
324 else "Research",
325 }
326 for r in researches
327 ]
329 return {
330 "link_analytics": {
331 "top_domains": [
332 {
333 "domain": domain,
334 "count": count,
335 "percentage": round(
336 count / len(resources) * 100, 1
337 ),
338 "research_count": len(
339 domain_researches.get(domain, set())
340 ),
341 "recent_researches": domain_recent_research.get(
342 domain, []
343 ),
344 "classification": domain_classifications.get(
345 domain, None
346 ),
347 }
348 for domain, count in top_10_domains
349 ],
350 "total_unique_domains": len(domain_counts),
351 "avg_links_per_research": round(avg_links, 1),
352 "domain_distribution": {
353 "top_10": top_10_count,
354 "others": others_count,
355 },
356 "source_type_analysis": source_types,
357 "category_distribution": category_counts,
358 # Generic pie chart data - use whatever LLM classifier outputs
359 "domain_categories": category_counts,
360 "total_links": len(resources),
361 "total_researches": len(unique_research_ids),
362 "temporal_trend": temporal_trend,
363 "domain_metrics": {
364 domain: {
365 "usage_count": count,
366 "usage_percentage": round(
367 count / len(resources) * 100, 1
368 ),
369 "research_diversity": len(
370 domain_researches.get(domain, set())
371 ),
372 "frequency_rank": rank + 1,
373 }
374 for rank, (domain, count) in enumerate(top_10_domains)
375 },
376 }
377 }
379 except Exception:
380 logger.exception("Error getting link analytics")
381 return {
382 "link_analytics": {
383 "top_domains": [],
384 "total_unique_domains": 0,
385 "avg_links_per_research": 0,
386 "domain_distribution": {},
387 "source_type_analysis": {},
388 "academic_vs_general": {},
389 "total_links": 0,
390 "error": "Failed to retrieve link analytics",
391 }
392 }
395def get_available_strategies():
396 """Get list of all available search strategies from the search system."""
397 # This list comes from the AdvancedSearchSystem.__init__ method
398 strategies = [
399 {"name": "standard", "description": "Basic iterative search strategy"},
400 {
401 "name": "iterdrag",
402 "description": "Iterative Dense Retrieval Augmented Generation",
403 },
404 {
405 "name": "source-based",
406 "description": "Focuses on finding and extracting from sources",
407 },
408 {
409 "name": "parallel",
410 "description": "Runs multiple search queries in parallel",
411 },
412 {"name": "rapid", "description": "Quick single-pass search"},
413 {
414 "name": "recursive",
415 "description": "Recursive decomposition of complex queries",
416 },
417 {
418 "name": "iterative",
419 "description": "Loop-based reasoning with persistent knowledge",
420 },
421 {"name": "adaptive", "description": "Adaptive step-by-step reasoning"},
422 {
423 "name": "smart",
424 "description": "Automatically chooses best strategy based on query",
425 },
426 {
427 "name": "browsecomp",
428 "description": "Optimized for BrowseComp-style puzzle queries",
429 },
430 {
431 "name": "evidence",
432 "description": "Enhanced evidence-based verification with improved candidate discovery",
433 },
434 {
435 "name": "constrained",
436 "description": "Progressive constraint-based search that narrows candidates step by step",
437 },
438 {
439 "name": "parallel-constrained",
440 "description": "Parallel constraint-based search with combined constraint execution",
441 },
442 {
443 "name": "early-stop-constrained",
444 "description": "Parallel constraint search with immediate evaluation and early stopping at 99% confidence",
445 },
446 {
447 "name": "smart-query",
448 "description": "Smart query generation strategy",
449 },
450 {
451 "name": "dual-confidence",
452 "description": "Dual confidence scoring with positive/negative/uncertainty",
453 },
454 {
455 "name": "dual-confidence-with-rejection",
456 "description": "Dual confidence with early rejection of poor candidates",
457 },
458 {
459 "name": "concurrent-dual-confidence",
460 "description": "Concurrent search & evaluation with progressive constraint relaxation",
461 },
462 {
463 "name": "modular",
464 "description": "Modular architecture using constraint checking and candidate exploration modules",
465 },
466 {
467 "name": "modular-parallel",
468 "description": "Modular strategy with parallel exploration",
469 },
470 {
471 "name": "focused-iteration",
472 "description": "Focused iteration strategy optimized for accuracy",
473 },
474 {
475 "name": "browsecomp-entity",
476 "description": "Entity-focused search for BrowseComp questions with knowledge graph building",
477 },
478 ]
479 return strategies
482def get_strategy_analytics(period="30d", username=None):
483 """Get strategy usage analytics for the specified period."""
484 try:
485 if not username: 485 ↛ 486line 485 didn't jump to line 486 because the condition on line 485 was never true
486 username = flask_session.get("username")
488 if not username: 488 ↛ 489line 488 didn't jump to line 489 because the condition on line 488 was never true
489 return {
490 "strategy_analytics": {
491 "total_research_with_strategy": 0,
492 "total_research": 0,
493 "most_popular_strategy": None,
494 "strategy_usage": [],
495 "strategy_distribution": {},
496 "available_strategies": get_available_strategies(),
497 "error": "No user session",
498 }
499 }
501 # Calculate date range
502 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None}
503 days = days_map.get(period, 30)
505 with get_user_db_session(username) as session:
506 # Check if we have any ResearchStrategy records
507 strategy_count = session.query(ResearchStrategy).count()
509 if strategy_count == 0: 509 ↛ 524line 509 didn't jump to line 524 because the condition on line 509 was always true
510 logger.warning("No research strategies found in database")
511 return {
512 "strategy_analytics": {
513 "total_research_with_strategy": 0,
514 "total_research": 0,
515 "most_popular_strategy": None,
516 "strategy_usage": [],
517 "strategy_distribution": {},
518 "available_strategies": get_available_strategies(),
519 "message": "Strategy tracking not yet available - run a research to start tracking",
520 }
521 }
523 # Base query for strategy usage (no JOIN needed since we just want strategy counts)
524 query = session.query(
525 ResearchStrategy.strategy_name,
526 func.count(ResearchStrategy.id).label("usage_count"),
527 )
529 # Apply time filter if specified
530 if days:
531 cutoff_date = datetime.now(UTC) - timedelta(days=days)
532 query = query.filter(ResearchStrategy.created_at >= cutoff_date)
534 # Group by strategy and order by usage
535 strategy_results = (
536 query.group_by(ResearchStrategy.strategy_name)
537 .order_by(func.count(ResearchStrategy.id).desc())
538 .all()
539 )
541 # Get total strategy count for percentage calculation
542 total_query = session.query(ResearchStrategy)
543 if days:
544 total_query = total_query.filter(
545 ResearchStrategy.created_at >= cutoff_date
546 )
547 total_research = total_query.count()
549 # Format strategy data
550 strategy_usage = []
551 strategy_distribution = {}
553 for strategy_name, usage_count in strategy_results:
554 percentage = (
555 (usage_count / total_research * 100)
556 if total_research > 0
557 else 0
558 )
559 strategy_usage.append(
560 {
561 "strategy": strategy_name,
562 "count": usage_count,
563 "percentage": round(percentage, 1),
564 }
565 )
566 strategy_distribution[strategy_name] = usage_count
568 # Find most popular strategy
569 most_popular = (
570 strategy_usage[0]["strategy"] if strategy_usage else None
571 )
573 return {
574 "strategy_analytics": {
575 "total_research_with_strategy": sum(
576 item["count"] for item in strategy_usage
577 ),
578 "total_research": total_research,
579 "most_popular_strategy": most_popular,
580 "strategy_usage": strategy_usage,
581 "strategy_distribution": strategy_distribution,
582 "available_strategies": get_available_strategies(),
583 }
584 }
586 except Exception:
587 logger.exception("Error getting strategy analytics")
588 return {
589 "strategy_analytics": {
590 "total_research_with_strategy": 0,
591 "total_research": 0,
592 "most_popular_strategy": None,
593 "strategy_usage": [],
594 "strategy_distribution": {},
595 "available_strategies": get_available_strategies(),
596 "error": "Failed to retrieve strategy data",
597 }
598 }
601def get_rate_limiting_analytics(period="30d", username=None):
602 """Get rate limiting analytics for the specified period."""
603 try:
604 if not username: 604 ↛ 605line 604 didn't jump to line 605 because the condition on line 604 was never true
605 username = flask_session.get("username")
607 if not username: 607 ↛ 608line 607 didn't jump to line 608 because the condition on line 607 was never true
608 return {
609 "rate_limiting": {
610 "total_attempts": 0,
611 "successful_attempts": 0,
612 "failed_attempts": 0,
613 "success_rate": 0,
614 "rate_limit_events": 0,
615 "avg_wait_time": 0,
616 "avg_successful_wait": 0,
617 "tracked_engines": 0,
618 "engine_stats": [],
619 "total_engines_tracked": 0,
620 "healthy_engines": 0,
621 "degraded_engines": 0,
622 "poor_engines": 0,
623 "error": "No user session",
624 }
625 }
627 # Calculate date range for timestamp filtering
628 import time
630 if period == "7d": 630 ↛ 631line 630 didn't jump to line 631 because the condition on line 630 was never true
631 cutoff_time = time.time() - (7 * 24 * 3600)
632 elif period == "30d": 632 ↛ 634line 632 didn't jump to line 634 because the condition on line 632 was always true
633 cutoff_time = time.time() - (30 * 24 * 3600)
634 elif period == "3m":
635 cutoff_time = time.time() - (90 * 24 * 3600)
636 elif period == "1y":
637 cutoff_time = time.time() - (365 * 24 * 3600)
638 else: # all
639 cutoff_time = 0
641 with get_user_db_session(username) as session:
642 # Get rate limit attempts
643 rate_limit_query = session.query(RateLimitAttempt)
645 # Apply time filter
646 if cutoff_time > 0: 646 ↛ 652line 646 didn't jump to line 652 because the condition on line 646 was always true
647 rate_limit_query = rate_limit_query.filter(
648 RateLimitAttempt.timestamp >= cutoff_time
649 )
651 # Get rate limit statistics
652 total_attempts = rate_limit_query.count()
653 successful_attempts = rate_limit_query.filter(
654 RateLimitAttempt.success
655 ).count()
656 failed_attempts = total_attempts - successful_attempts
658 # Count rate limiting events (failures with RateLimitError)
659 rate_limit_events = rate_limit_query.filter(
660 ~RateLimitAttempt.success,
661 RateLimitAttempt.error_type == "RateLimitError",
662 ).count()
664 logger.info(
665 f"Rate limit attempts in database: total={total_attempts}, successful={successful_attempts}"
666 )
668 # Get all attempts for detailed calculations
669 attempts = rate_limit_query.all()
671 # Calculate average wait times
672 if attempts: 672 ↛ 673line 672 didn't jump to line 673 because the condition on line 672 was never true
673 avg_wait_time = sum(a.wait_time for a in attempts) / len(
674 attempts
675 )
676 successful_wait_times = [
677 a.wait_time for a in attempts if a.success
678 ]
679 avg_successful_wait = (
680 sum(successful_wait_times) / len(successful_wait_times)
681 if successful_wait_times
682 else 0
683 )
684 else:
685 avg_wait_time = 0
686 avg_successful_wait = 0
688 # Get tracked engines - count distinct engine types from attempts
689 tracked_engines_query = session.query(
690 func.count(func.distinct(RateLimitAttempt.engine_type))
691 )
692 if cutoff_time > 0: 692 ↛ 696line 692 didn't jump to line 696 because the condition on line 692 was always true
693 tracked_engines_query = tracked_engines_query.filter(
694 RateLimitAttempt.timestamp >= cutoff_time
695 )
696 tracked_engines = tracked_engines_query.scalar() or 0
698 # Get engine-specific stats from attempts
699 engine_stats = []
701 # Get distinct engine types from attempts
702 engine_types_query = session.query(
703 RateLimitAttempt.engine_type
704 ).distinct()
705 if cutoff_time > 0: 705 ↛ 709line 705 didn't jump to line 709 because the condition on line 705 was always true
706 engine_types_query = engine_types_query.filter(
707 RateLimitAttempt.timestamp >= cutoff_time
708 )
709 engine_types = [row.engine_type for row in engine_types_query.all()]
711 for engine_type in engine_types: 711 ↛ 712line 711 didn't jump to line 712 because the loop on line 711 never started
712 engine_attempts_list = [
713 a for a in attempts if a.engine_type == engine_type
714 ]
715 engine_attempts = len(engine_attempts_list)
716 engine_success = len(
717 [a for a in engine_attempts_list if a.success]
718 )
720 # Get estimate if exists
721 estimate = (
722 session.query(RateLimitEstimate)
723 .filter(RateLimitEstimate.engine_type == engine_type)
724 .first()
725 )
727 # Calculate recent success rate
728 recent_success_rate = (
729 (engine_success / engine_attempts * 100)
730 if engine_attempts > 0
731 else 0
732 )
734 # Determine status based on success rate
735 if estimate:
736 status = (
737 "healthy"
738 if estimate.success_rate > 0.8
739 else "degraded"
740 if estimate.success_rate > 0.5
741 else "poor"
742 )
743 else:
744 status = (
745 "healthy"
746 if recent_success_rate > 80
747 else "degraded"
748 if recent_success_rate > 50
749 else "poor"
750 )
752 engine_stat = {
753 "engine": engine_type,
754 "base_wait": estimate.base_wait_seconds
755 if estimate
756 else 0.0,
757 "base_wait_seconds": round(
758 estimate.base_wait_seconds if estimate else 0.0, 2
759 ),
760 "min_wait_seconds": round(
761 estimate.min_wait_seconds if estimate else 0.0, 2
762 ),
763 "max_wait_seconds": round(
764 estimate.max_wait_seconds if estimate else 0.0, 2
765 ),
766 "success_rate": round(estimate.success_rate * 100, 1)
767 if estimate
768 else recent_success_rate,
769 "total_attempts": estimate.total_attempts
770 if estimate
771 else engine_attempts,
772 "recent_attempts": engine_attempts,
773 "recent_success_rate": round(recent_success_rate, 1),
774 "attempts": engine_attempts,
775 "status": status,
776 }
778 if estimate:
779 from datetime import datetime
781 engine_stat["last_updated"] = datetime.fromtimestamp(
782 estimate.last_updated, UTC
783 ).isoformat() # ISO format already includes timezone
784 else:
785 engine_stat["last_updated"] = "Never"
787 engine_stats.append(engine_stat)
789 logger.info(
790 f"Tracked engines: {tracked_engines}, engine_stats: {engine_stats}"
791 )
793 result = {
794 "rate_limiting": {
795 "total_attempts": total_attempts,
796 "successful_attempts": successful_attempts,
797 "failed_attempts": failed_attempts,
798 "success_rate": (successful_attempts / total_attempts * 100)
799 if total_attempts > 0
800 else 0,
801 "rate_limit_events": rate_limit_events,
802 "avg_wait_time": round(float(avg_wait_time), 2),
803 "avg_successful_wait": round(float(avg_successful_wait), 2),
804 "tracked_engines": tracked_engines,
805 "engine_stats": engine_stats,
806 "total_engines_tracked": tracked_engines,
807 "healthy_engines": len(
808 [s for s in engine_stats if s["status"] == "healthy"]
809 ),
810 "degraded_engines": len(
811 [s for s in engine_stats if s["status"] == "degraded"]
812 ),
813 "poor_engines": len(
814 [s for s in engine_stats if s["status"] == "poor"]
815 ),
816 }
817 }
819 logger.info(
820 f"DEBUG: Returning rate_limiting_analytics result: {result}"
821 )
822 return result
824 except Exception:
825 logger.exception("Error getting rate limiting analytics")
826 return {
827 "rate_limiting": {
828 "total_attempts": 0,
829 "successful_attempts": 0,
830 "failed_attempts": 0,
831 "success_rate": 0,
832 "rate_limit_events": 0,
833 "avg_wait_time": 0,
834 "avg_successful_wait": 0,
835 "tracked_engines": 0,
836 "engine_stats": [],
837 "total_engines_tracked": 0,
838 "healthy_engines": 0,
839 "degraded_engines": 0,
840 "poor_engines": 0,
841 "error": "An internal error occurred while processing the request.",
842 }
843 }
846@metrics_bp.route("/")
847@login_required
848def metrics_dashboard():
849 """Render the metrics dashboard page."""
850 return render_template_with_defaults("pages/metrics.html")
853@metrics_bp.route("/context-overflow")
854@login_required
855def context_overflow_page():
856 """Context overflow analytics page."""
857 return render_template_with_defaults("pages/context_overflow.html")
860@metrics_bp.route("/api/metrics")
861@login_required
862def api_metrics():
863 """Get overall metrics data."""
864 logger.debug("api_metrics endpoint called")
865 try:
866 # Get username from session
867 username = flask_session.get("username")
868 if not username: 868 ↛ 869line 868 didn't jump to line 869 because the condition on line 868 was never true
869 return jsonify(
870 {"status": "error", "message": "No user session found"}
871 ), 401
873 # Get time period and research mode from query parameters
874 period = request.args.get("period", "30d")
875 research_mode = request.args.get("mode", "all")
877 token_counter = TokenCounter()
878 search_tracker = get_search_tracker()
880 # Get both token and search metrics
881 token_metrics = token_counter.get_overall_metrics(
882 period=period, research_mode=research_mode
883 )
884 search_metrics = search_tracker.get_search_metrics(
885 period=period, research_mode=research_mode
886 )
888 # Get user satisfaction rating data
889 try:
890 with get_user_db_session(username) as session:
891 # Build base query with time filter
892 ratings_query = session.query(ResearchRating)
893 time_condition = get_time_filter_condition(
894 period, ResearchRating.created_at
895 )
896 if time_condition is not None: 896 ↛ 900line 896 didn't jump to line 900 because the condition on line 896 was always true
897 ratings_query = ratings_query.filter(time_condition)
899 # Get average rating
900 avg_rating = ratings_query.with_entities(
901 func.avg(ResearchRating.rating).label("avg_rating")
902 ).scalar()
904 # Get total rating count
905 total_ratings = ratings_query.count()
907 user_satisfaction = {
908 "avg_rating": round(avg_rating, 1) if avg_rating else None,
909 "total_ratings": total_ratings,
910 }
911 except Exception as e:
912 logger.warning(f"Error getting user satisfaction data: {e}")
913 user_satisfaction = {"avg_rating": None, "total_ratings": 0}
915 # Get strategy analytics
916 strategy_data = get_strategy_analytics(period, username)
917 logger.debug(f"strategy_data keys: {list(strategy_data.keys())}")
919 # Get rate limiting analytics
920 rate_limiting_data = get_rate_limiting_analytics(period, username)
921 logger.debug(f"rate_limiting_data: {rate_limiting_data}")
922 logger.debug(
923 f"rate_limiting_data keys: {list(rate_limiting_data.keys())}"
924 )
926 # Combine metrics
927 combined_metrics = {
928 **token_metrics,
929 **search_metrics,
930 **strategy_data,
931 **rate_limiting_data,
932 "user_satisfaction": user_satisfaction,
933 }
935 logger.debug(f"combined_metrics keys: {list(combined_metrics.keys())}")
936 logger.debug(
937 f"combined_metrics['rate_limiting']: {combined_metrics.get('rate_limiting', 'NOT FOUND')}"
938 )
940 return jsonify(
941 {
942 "status": "success",
943 "metrics": combined_metrics,
944 "period": period,
945 "research_mode": research_mode,
946 }
947 )
948 except Exception:
949 logger.exception("Error getting metrics")
950 return (
951 jsonify(
952 {
953 "status": "error",
954 "message": "An internal error occurred. Please try again later.",
955 }
956 ),
957 500,
958 )
961@metrics_bp.route("/api/rate-limiting")
962@login_required
963def api_rate_limiting_metrics():
964 """Get detailed rate limiting metrics."""
965 logger.info("DEBUG: api_rate_limiting_metrics endpoint called")
966 try:
967 username = flask_session.get("username")
968 period = request.args.get("period", "30d")
969 rate_limiting_data = get_rate_limiting_analytics(period, username)
971 return jsonify(
972 {"status": "success", "data": rate_limiting_data, "period": period}
973 )
974 except Exception:
975 logger.exception("Error getting rate limiting metrics")
976 return jsonify(
977 {
978 "status": "error",
979 "message": "Failed to retrieve rate limiting metrics",
980 }
981 ), 500
984@metrics_bp.route("/api/rate-limiting/current")
985@login_required
986def api_current_rate_limits():
987 """Get current rate limit estimates for all engines."""
988 try:
989 tracker = get_tracker()
990 stats = tracker.get_stats()
992 current_limits = []
993 for stat in stats: 993 ↛ 994line 993 didn't jump to line 994 because the loop on line 993 never started
994 (
995 engine_type,
996 base_wait,
997 min_wait,
998 max_wait,
999 last_updated,
1000 total_attempts,
1001 success_rate,
1002 ) = stat
1003 current_limits.append(
1004 {
1005 "engine_type": engine_type,
1006 "base_wait_seconds": round(base_wait, 2),
1007 "min_wait_seconds": round(min_wait, 2),
1008 "max_wait_seconds": round(max_wait, 2),
1009 "success_rate": round(success_rate * 100, 1),
1010 "total_attempts": total_attempts,
1011 "last_updated": datetime.fromtimestamp(
1012 last_updated, UTC
1013 ).isoformat(), # ISO format already includes timezone
1014 "status": "healthy"
1015 if success_rate > 0.8
1016 else "degraded"
1017 if success_rate > 0.5
1018 else "poor",
1019 }
1020 )
1022 return jsonify(
1023 {
1024 "status": "success",
1025 "current_limits": current_limits,
1026 "timestamp": datetime.now(UTC).isoformat(),
1027 }
1028 )
1029 except Exception:
1030 logger.exception("Error getting current rate limits")
1031 return jsonify(
1032 {
1033 "status": "error",
1034 "message": "Failed to retrieve current rate limits",
1035 }
1036 ), 500
1039@metrics_bp.route("/api/metrics/research/<string:research_id>/links")
1040@login_required
1041def api_research_link_metrics(research_id):
1042 """Get link analytics for a specific research."""
1043 try:
1044 username = flask_session.get("username")
1045 if not username:
1046 return jsonify(
1047 {"status": "error", "message": "No user session found"}
1048 ), 401
1050 with get_user_db_session(username) as session:
1051 # Get all resources for this specific research
1052 resources = (
1053 session.query(ResearchResource)
1054 .filter(ResearchResource.research_id == research_id)
1055 .all()
1056 )
1058 if not resources:
1059 return jsonify(
1060 {
1061 "status": "success",
1062 "data": {
1063 "total_links": 0,
1064 "unique_domains": 0,
1065 "domains": [],
1066 "category_distribution": {},
1067 "domain_categories": {},
1068 "resources": [],
1069 },
1070 }
1071 )
1073 # Extract domain information
1074 from urllib.parse import urlparse
1075 from ...domain_classifier.classifier import DomainClassifier
1077 domain_counts = {}
1079 # Generic category counting from LLM classifications
1080 category_counts = {}
1082 # Initialize domain classifier for LLM-based categorization
1083 domain_classifier = DomainClassifier(username=username)
1085 for resource in resources:
1086 if resource.url:
1087 try:
1088 parsed = urlparse(resource.url)
1089 domain = parsed.netloc.lower()
1090 if domain.startswith("www."):
1091 domain = domain[4:]
1093 domain_counts[domain] = domain_counts.get(domain, 0) + 1
1095 # Count categories from LLM classification
1096 classification = domain_classifier.get_classification(
1097 domain
1098 )
1099 if classification:
1100 category = classification.category
1101 category_counts[category] = (
1102 category_counts.get(category, 0) + 1
1103 )
1104 else:
1105 category_counts["Unclassified"] = (
1106 category_counts.get("Unclassified", 0) + 1
1107 )
1108 except:
1109 pass
1111 # Sort domains by count
1112 sorted_domains = sorted(
1113 domain_counts.items(), key=lambda x: x[1], reverse=True
1114 )
1116 return jsonify(
1117 {
1118 "status": "success",
1119 "data": {
1120 "total_links": len(resources),
1121 "unique_domains": len(domain_counts),
1122 "domains": [
1123 {
1124 "domain": domain,
1125 "count": count,
1126 "percentage": round(
1127 count / len(resources) * 100, 1
1128 ),
1129 }
1130 for domain, count in sorted_domains[
1131 :20
1132 ] # Top 20 domains
1133 ],
1134 "category_distribution": category_counts,
1135 "domain_categories": category_counts, # Generic categories from LLM
1136 "resources": [
1137 {
1138 "title": r.title or "Untitled",
1139 "url": r.url,
1140 "preview": r.content_preview[:200]
1141 if r.content_preview
1142 else None,
1143 }
1144 for r in resources[:10] # First 10 resources
1145 ],
1146 },
1147 }
1148 )
1150 except Exception:
1151 logger.exception("Error getting research link metrics")
1152 return jsonify(
1153 {"status": "error", "message": "Failed to retrieve link metrics"}
1154 ), 500
1157@metrics_bp.route("/api/metrics/research/<string:research_id>")
1158@login_required
1159def api_research_metrics(research_id):
1160 """Get metrics for a specific research."""
1161 try:
1162 token_counter = TokenCounter()
1163 metrics = token_counter.get_research_metrics(research_id)
1164 return jsonify({"status": "success", "metrics": metrics})
1165 except Exception:
1166 logger.exception("Error getting research metrics")
1167 return (
1168 jsonify(
1169 {
1170 "status": "error",
1171 "message": "An internal error occurred. Please try again later.",
1172 }
1173 ),
1174 500,
1175 )
1178@metrics_bp.route("/api/metrics/research/<string:research_id>/timeline")
1179@login_required
1180def api_research_timeline_metrics(research_id):
1181 """Get timeline metrics for a specific research."""
1182 try:
1183 token_counter = TokenCounter()
1184 timeline_metrics = token_counter.get_research_timeline_metrics(
1185 research_id
1186 )
1187 return jsonify({"status": "success", "metrics": timeline_metrics})
1188 except Exception:
1189 logger.exception("Error getting research timeline metrics")
1190 return (
1191 jsonify(
1192 {
1193 "status": "error",
1194 "message": "An internal error occurred. Please try again later.",
1195 }
1196 ),
1197 500,
1198 )
1201@metrics_bp.route("/api/metrics/research/<string:research_id>/search")
1202@login_required
1203def api_research_search_metrics(research_id):
1204 """Get search metrics for a specific research."""
1205 try:
1206 search_tracker = get_search_tracker()
1207 search_metrics = search_tracker.get_research_search_metrics(research_id)
1208 return jsonify({"status": "success", "metrics": search_metrics})
1209 except Exception:
1210 logger.exception("Error getting research search metrics")
1211 return (
1212 jsonify(
1213 {
1214 "status": "error",
1215 "message": "An internal error occurred. Please try again later.",
1216 }
1217 ),
1218 500,
1219 )
1222@metrics_bp.route("/api/metrics/enhanced")
1223@login_required
1224def api_enhanced_metrics():
1225 """Get enhanced Phase 1 tracking metrics."""
1226 try:
1227 # Get time period and research mode from query parameters
1228 period = request.args.get("period", "30d")
1229 research_mode = request.args.get("mode", "all")
1230 username = flask_session.get("username")
1232 token_counter = TokenCounter()
1233 search_tracker = get_search_tracker()
1235 enhanced_metrics = token_counter.get_enhanced_metrics(
1236 period=period, research_mode=research_mode
1237 )
1239 # Add search time series data for the chart
1240 search_time_series = search_tracker.get_search_time_series(
1241 period=period, research_mode=research_mode
1242 )
1243 enhanced_metrics["search_time_series"] = search_time_series
1245 # Add rating analytics
1246 rating_analytics = get_rating_analytics(period, research_mode, username)
1247 enhanced_metrics.update(rating_analytics)
1249 return jsonify(
1250 {
1251 "status": "success",
1252 "metrics": enhanced_metrics,
1253 "period": period,
1254 "research_mode": research_mode,
1255 }
1256 )
1257 except Exception:
1258 logger.exception("Error getting enhanced metrics")
1259 return (
1260 jsonify(
1261 {
1262 "status": "error",
1263 "message": "An internal error occurred. Please try again later.",
1264 }
1265 ),
1266 500,
1267 )
1270@metrics_bp.route("/api/ratings/<string:research_id>", methods=["GET"])
1271@login_required
1272def api_get_research_rating(research_id):
1273 """Get rating for a specific research session."""
1274 try:
1275 username = flask_session.get("username")
1276 if not username:
1277 return jsonify(
1278 {"status": "error", "message": "No user session found"}
1279 ), 401
1281 with get_user_db_session(username) as session:
1282 rating = (
1283 session.query(ResearchRating)
1284 .filter_by(research_id=research_id)
1285 .first()
1286 )
1288 if rating:
1289 return jsonify(
1290 {
1291 "status": "success",
1292 "rating": rating.rating,
1293 "created_at": rating.created_at.isoformat(),
1294 "updated_at": rating.updated_at.isoformat(),
1295 }
1296 )
1297 else:
1298 return jsonify({"status": "success", "rating": None})
1300 except Exception:
1301 logger.exception("Error getting research rating")
1302 return (
1303 jsonify(
1304 {
1305 "status": "error",
1306 "message": "An internal error occurred. Please try again later.",
1307 }
1308 ),
1309 500,
1310 )
1313@metrics_bp.route("/api/ratings/<string:research_id>", methods=["POST"])
1314@login_required
1315def api_save_research_rating(research_id):
1316 """Save or update rating for a specific research session."""
1317 try:
1318 username = flask_session.get("username")
1319 if not username:
1320 return jsonify(
1321 {"status": "error", "message": "No user session found"}
1322 ), 401
1324 data = request.get_json()
1325 rating_value = data.get("rating")
1327 if (
1328 not rating_value
1329 or not isinstance(rating_value, int)
1330 or rating_value < 1
1331 or rating_value > 5
1332 ):
1333 return (
1334 jsonify(
1335 {
1336 "status": "error",
1337 "message": "Rating must be an integer between 1 and 5",
1338 }
1339 ),
1340 400,
1341 )
1343 with get_user_db_session(username) as session:
1344 # Check if rating already exists
1345 existing_rating = (
1346 session.query(ResearchRating)
1347 .filter_by(research_id=research_id)
1348 .first()
1349 )
1351 if existing_rating:
1352 # Update existing rating
1353 existing_rating.rating = rating_value
1354 existing_rating.updated_at = func.now()
1355 else:
1356 # Create new rating
1357 new_rating = ResearchRating(
1358 research_id=research_id, rating=rating_value
1359 )
1360 session.add(new_rating)
1362 session.commit()
1364 return jsonify(
1365 {
1366 "status": "success",
1367 "message": "Rating saved successfully",
1368 "rating": rating_value,
1369 }
1370 )
1372 except Exception:
1373 logger.exception("Error saving research rating")
1374 return (
1375 jsonify(
1376 {
1377 "status": "error",
1378 "message": "An internal error occurred. Please try again later.",
1379 }
1380 ),
1381 500,
1382 )
1385@metrics_bp.route("/star-reviews")
1386@login_required
1387def star_reviews():
1388 """Display star reviews metrics page."""
1389 return render_template_with_defaults("pages/star_reviews.html")
1392@metrics_bp.route("/costs")
1393@login_required
1394def cost_analytics():
1395 """Display cost analytics page."""
1396 return render_template_with_defaults("pages/cost_analytics.html")
1399@metrics_bp.route("/api/star-reviews")
1400@login_required
1401def api_star_reviews():
1402 """Get star reviews analytics data."""
1403 try:
1404 username = flask_session.get("username")
1405 if not username: 1405 ↛ 1406line 1405 didn't jump to line 1406 because the condition on line 1405 was never true
1406 return jsonify(
1407 {"status": "error", "message": "No user session found"}
1408 ), 401
1410 period = request.args.get("period", "30d")
1412 with get_user_db_session(username) as session:
1413 # Build base query with time filter
1414 base_query = session.query(ResearchRating)
1415 time_condition = get_time_filter_condition(
1416 period, ResearchRating.created_at
1417 )
1418 if time_condition is not None: 1418 ↛ 1422line 1418 didn't jump to line 1422 because the condition on line 1418 was always true
1419 base_query = base_query.filter(time_condition)
1421 # Overall rating statistics
1422 overall_stats = session.query(
1423 func.avg(ResearchRating.rating).label("avg_rating"),
1424 func.count(ResearchRating.rating).label("total_ratings"),
1425 func.sum(case((ResearchRating.rating == 5, 1), else_=0)).label(
1426 "five_star"
1427 ),
1428 func.sum(case((ResearchRating.rating == 4, 1), else_=0)).label(
1429 "four_star"
1430 ),
1431 func.sum(case((ResearchRating.rating == 3, 1), else_=0)).label(
1432 "three_star"
1433 ),
1434 func.sum(case((ResearchRating.rating == 2, 1), else_=0)).label(
1435 "two_star"
1436 ),
1437 func.sum(case((ResearchRating.rating == 1, 1), else_=0)).label(
1438 "one_star"
1439 ),
1440 )
1442 if time_condition is not None: 1442 ↛ 1445line 1442 didn't jump to line 1445 because the condition on line 1442 was always true
1443 overall_stats = overall_stats.filter(time_condition)
1445 overall_stats = overall_stats.first()
1447 # Ratings by LLM model (get from token_usage since Research doesn't have model field)
1448 llm_ratings_query = session.query(
1449 func.coalesce(TokenUsage.model_name, "Unknown").label("model"),
1450 func.avg(ResearchRating.rating).label("avg_rating"),
1451 func.count(ResearchRating.rating).label("rating_count"),
1452 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label(
1453 "positive_ratings"
1454 ),
1455 ).outerjoin(
1456 TokenUsage, ResearchRating.research_id == TokenUsage.research_id
1457 )
1459 if time_condition is not None: 1459 ↛ 1462line 1459 didn't jump to line 1462 because the condition on line 1459 was always true
1460 llm_ratings_query = llm_ratings_query.filter(time_condition)
1462 llm_ratings = (
1463 llm_ratings_query.group_by(TokenUsage.model_name)
1464 .order_by(func.avg(ResearchRating.rating).desc())
1465 .all()
1466 )
1468 # Ratings by search engine (join with token_usage to get search engine info)
1469 search_engine_ratings_query = session.query(
1470 func.coalesce(
1471 TokenUsage.search_engine_selected, "Unknown"
1472 ).label("search_engine"),
1473 func.avg(ResearchRating.rating).label("avg_rating"),
1474 func.count(ResearchRating.rating).label("rating_count"),
1475 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label(
1476 "positive_ratings"
1477 ),
1478 ).outerjoin(
1479 TokenUsage, ResearchRating.research_id == TokenUsage.research_id
1480 )
1482 if time_condition is not None: 1482 ↛ 1487line 1482 didn't jump to line 1487 because the condition on line 1482 was always true
1483 search_engine_ratings_query = (
1484 search_engine_ratings_query.filter(time_condition)
1485 )
1487 search_engine_ratings = (
1488 search_engine_ratings_query.group_by(
1489 TokenUsage.search_engine_selected
1490 )
1491 .having(func.count(ResearchRating.rating) > 0)
1492 .order_by(func.avg(ResearchRating.rating).desc())
1493 .all()
1494 )
1496 # Rating trends over time
1497 rating_trends_query = session.query(
1498 func.date(ResearchRating.created_at).label("date"),
1499 func.avg(ResearchRating.rating).label("avg_rating"),
1500 func.count(ResearchRating.rating).label("daily_count"),
1501 )
1503 if time_condition is not None: 1503 ↛ 1506line 1503 didn't jump to line 1506 because the condition on line 1503 was always true
1504 rating_trends_query = rating_trends_query.filter(time_condition)
1506 rating_trends = (
1507 rating_trends_query.group_by(
1508 func.date(ResearchRating.created_at)
1509 )
1510 .order_by("date")
1511 .all()
1512 )
1514 # Recent ratings with research details
1515 recent_ratings_query = (
1516 session.query(
1517 ResearchRating.rating,
1518 ResearchRating.created_at,
1519 ResearchRating.research_id,
1520 Research.query,
1521 Research.mode,
1522 TokenUsage.model_name,
1523 Research.created_at,
1524 )
1525 .outerjoin(Research, ResearchRating.research_id == Research.id)
1526 .outerjoin(
1527 TokenUsage,
1528 ResearchRating.research_id == TokenUsage.research_id,
1529 )
1530 )
1532 if time_condition is not None: 1532 ↛ 1537line 1532 didn't jump to line 1537 because the condition on line 1532 was always true
1533 recent_ratings_query = recent_ratings_query.filter(
1534 time_condition
1535 )
1537 recent_ratings = (
1538 recent_ratings_query.order_by(ResearchRating.created_at.desc())
1539 .limit(20)
1540 .all()
1541 )
1543 return jsonify(
1544 {
1545 "overall_stats": {
1546 "avg_rating": round(overall_stats.avg_rating or 0, 2),
1547 "total_ratings": overall_stats.total_ratings or 0,
1548 "rating_distribution": {
1549 "5": overall_stats.five_star or 0,
1550 "4": overall_stats.four_star or 0,
1551 "3": overall_stats.three_star or 0,
1552 "2": overall_stats.two_star or 0,
1553 "1": overall_stats.one_star or 0,
1554 },
1555 },
1556 "llm_ratings": [
1557 {
1558 "model": rating.model,
1559 "avg_rating": round(rating.avg_rating or 0, 2),
1560 "rating_count": rating.rating_count or 0,
1561 "positive_ratings": rating.positive_ratings or 0,
1562 "satisfaction_rate": round(
1563 (rating.positive_ratings or 0)
1564 / max(rating.rating_count or 1, 1)
1565 * 100,
1566 1,
1567 ),
1568 }
1569 for rating in llm_ratings
1570 ],
1571 "search_engine_ratings": [
1572 {
1573 "search_engine": rating.search_engine,
1574 "avg_rating": round(rating.avg_rating or 0, 2),
1575 "rating_count": rating.rating_count or 0,
1576 "positive_ratings": rating.positive_ratings or 0,
1577 "satisfaction_rate": round(
1578 (rating.positive_ratings or 0)
1579 / max(rating.rating_count or 1, 1)
1580 * 100,
1581 1,
1582 ),
1583 }
1584 for rating in search_engine_ratings
1585 ],
1586 "rating_trends": [
1587 {
1588 "date": str(trend.date),
1589 "avg_rating": round(trend.avg_rating or 0, 2),
1590 "count": trend.daily_count or 0,
1591 }
1592 for trend in rating_trends
1593 ],
1594 "recent_ratings": [
1595 {
1596 "rating": rating.rating,
1597 "created_at": str(rating.created_at),
1598 "research_id": rating.research_id,
1599 "query": (
1600 rating.query
1601 if rating.query
1602 else f"Research Session #{rating.research_id}"
1603 ),
1604 "mode": rating.mode
1605 if rating.mode
1606 else "Standard Research",
1607 "llm_model": (
1608 rating.model_name
1609 if rating.model_name
1610 else "LLM Model"
1611 ),
1612 }
1613 for rating in recent_ratings
1614 ],
1615 }
1616 )
1618 except Exception:
1619 logger.exception("Error getting star reviews data")
1620 return (
1621 jsonify(
1622 {"error": "An internal error occurred. Please try again later."}
1623 ),
1624 500,
1625 )
1628@metrics_bp.route("/api/pricing")
1629@login_required
1630def api_pricing():
1631 """Get current LLM pricing data."""
1632 try:
1633 from ...metrics.pricing.pricing_fetcher import PricingFetcher
1635 # Use static pricing data instead of async
1636 fetcher = PricingFetcher()
1637 pricing_data = fetcher.static_pricing
1639 return jsonify(
1640 {
1641 "status": "success",
1642 "pricing": pricing_data,
1643 "last_updated": datetime.now(UTC).isoformat(),
1644 "note": "Pricing data is from static configuration. Real-time APIs not available for most providers.",
1645 }
1646 )
1648 except Exception:
1649 logger.exception("Error fetching pricing data")
1650 return jsonify({"error": "Internal Server Error"}), 500
1653@metrics_bp.route("/api/pricing/<model_name>")
1654@login_required
1655def api_model_pricing(model_name):
1656 """Get pricing for a specific model."""
1657 try:
1658 # Optional provider parameter
1659 provider = request.args.get("provider")
1661 from ...metrics.pricing.cost_calculator import CostCalculator
1663 # Use synchronous approach with cached/static pricing
1664 calculator = CostCalculator()
1665 pricing = calculator.cache.get_model_pricing(
1666 model_name
1667 ) or calculator.calculate_cost_sync(model_name, 1000, 1000).get(
1668 "pricing_used", {}
1669 )
1671 return jsonify(
1672 {
1673 "status": "success",
1674 "model": model_name,
1675 "provider": provider,
1676 "pricing": pricing,
1677 "last_updated": datetime.now(UTC).isoformat(),
1678 }
1679 )
1681 except Exception:
1682 logger.exception(f"Error getting pricing for model: {model_name}")
1683 return jsonify({"error": "An internal error occurred"}), 500
1686@metrics_bp.route("/api/cost-calculation", methods=["POST"])
1687@login_required
1688def api_cost_calculation():
1689 """Calculate cost for token usage."""
1690 try:
1691 data = request.get_json()
1693 if not data:
1694 return jsonify({"error": "No data provided"}), 400
1696 model_name = data.get("model_name")
1697 provider = data.get("provider") # Optional provider parameter
1698 prompt_tokens = data.get("prompt_tokens", 0)
1699 completion_tokens = data.get("completion_tokens", 0)
1701 if not model_name: 1701 ↛ 1702line 1701 didn't jump to line 1702 because the condition on line 1701 was never true
1702 return jsonify({"error": "model_name is required"}), 400
1704 from ...metrics.pricing.cost_calculator import CostCalculator
1706 # Use synchronous cost calculation
1707 calculator = CostCalculator()
1708 cost_data = calculator.calculate_cost_sync(
1709 model_name, prompt_tokens, completion_tokens
1710 )
1712 return jsonify(
1713 {
1714 "status": "success",
1715 "model_name": model_name,
1716 "provider": provider,
1717 "prompt_tokens": prompt_tokens,
1718 "completion_tokens": completion_tokens,
1719 "total_tokens": prompt_tokens + completion_tokens,
1720 **cost_data,
1721 }
1722 )
1724 except Exception:
1725 logger.exception("Error calculating cost")
1726 return jsonify({"error": "An internal error occurred"}), 500
1729@metrics_bp.route("/api/research-costs/<string:research_id>")
1730@login_required
1731def api_research_costs(research_id):
1732 """Get cost analysis for a specific research session."""
1733 try:
1734 username = flask_session.get("username")
1735 if not username:
1736 return jsonify(
1737 {"status": "error", "message": "No user session found"}
1738 ), 401
1740 with get_user_db_session(username) as session:
1741 # Get token usage records for this research
1742 usage_records = (
1743 session.query(TokenUsage)
1744 .filter(TokenUsage.research_id == research_id)
1745 .all()
1746 )
1748 if not usage_records:
1749 return jsonify(
1750 {
1751 "status": "success",
1752 "research_id": research_id,
1753 "total_cost": 0.0,
1754 "message": "No token usage data found for this research session",
1755 }
1756 )
1758 # Convert to dict format for cost calculation
1759 usage_data = []
1760 for record in usage_records:
1761 usage_data.append(
1762 {
1763 "model_name": record.model_name,
1764 "provider": getattr(
1765 record, "provider", None
1766 ), # Handle both old and new records
1767 "prompt_tokens": record.prompt_tokens,
1768 "completion_tokens": record.completion_tokens,
1769 "timestamp": record.timestamp,
1770 }
1771 )
1773 from ...metrics.pricing.cost_calculator import CostCalculator
1775 # Use synchronous calculation for research costs
1776 calculator = CostCalculator()
1777 costs = []
1778 for record in usage_data:
1779 cost_data = calculator.calculate_cost_sync(
1780 record["model_name"],
1781 record["prompt_tokens"],
1782 record["completion_tokens"],
1783 )
1784 costs.append({**record, **cost_data})
1786 total_cost = sum(c["total_cost"] for c in costs)
1787 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data)
1788 total_completion_tokens = sum(
1789 r["completion_tokens"] for r in usage_data
1790 )
1792 cost_summary = {
1793 "total_cost": round(total_cost, 6),
1794 "total_tokens": total_prompt_tokens + total_completion_tokens,
1795 "prompt_tokens": total_prompt_tokens,
1796 "completion_tokens": total_completion_tokens,
1797 }
1799 return jsonify(
1800 {
1801 "status": "success",
1802 "research_id": research_id,
1803 **cost_summary,
1804 }
1805 )
1807 except Exception:
1808 logger.exception(
1809 f"Error getting research costs for research: {research_id}"
1810 )
1811 return jsonify({"error": "An internal error occurred"}), 500
1814@metrics_bp.route("/api/cost-analytics")
1815@login_required
1816def api_cost_analytics():
1817 """Get cost analytics across all research sessions."""
1818 try:
1819 username = flask_session.get("username")
1820 if not username: 1820 ↛ 1821line 1820 didn't jump to line 1821 because the condition on line 1820 was never true
1821 return jsonify(
1822 {"status": "error", "message": "No user session found"}
1823 ), 401
1825 period = request.args.get("period", "30d")
1827 with get_user_db_session(username) as session:
1828 # Get token usage for the period
1829 query = session.query(TokenUsage)
1830 time_condition = get_time_filter_condition(
1831 period, TokenUsage.timestamp
1832 )
1833 if time_condition is not None: 1833 ↛ 1837line 1833 didn't jump to line 1837 because the condition on line 1833 was always true
1834 query = query.filter(time_condition)
1836 # First check if we have any records to avoid expensive queries
1837 record_count = query.count()
1839 if record_count == 0: 1839 ↛ 1857line 1839 didn't jump to line 1857 because the condition on line 1839 was always true
1840 return jsonify(
1841 {
1842 "status": "success",
1843 "period": period,
1844 "overview": {
1845 "total_cost": 0.0,
1846 "total_tokens": 0,
1847 "prompt_tokens": 0,
1848 "completion_tokens": 0,
1849 },
1850 "top_expensive_research": [],
1851 "research_count": 0,
1852 "message": "No token usage data found for this period",
1853 }
1854 )
1856 # If we have too many records, limit to recent ones to avoid timeout
1857 if record_count > 1000:
1858 logger.warning(
1859 f"Large dataset detected ({record_count} records), limiting to recent 1000 for performance"
1860 )
1861 usage_records = (
1862 query.order_by(TokenUsage.timestamp.desc())
1863 .limit(1000)
1864 .all()
1865 )
1866 else:
1867 usage_records = query.all()
1869 # Convert to dict format
1870 usage_data = []
1871 for record in usage_records:
1872 usage_data.append(
1873 {
1874 "model_name": record.model_name,
1875 "provider": getattr(
1876 record, "provider", None
1877 ), # Handle both old and new records
1878 "prompt_tokens": record.prompt_tokens,
1879 "completion_tokens": record.completion_tokens,
1880 "research_id": record.research_id,
1881 "timestamp": record.timestamp,
1882 }
1883 )
1885 from ...metrics.pricing.cost_calculator import CostCalculator
1887 # Use synchronous calculation
1888 calculator = CostCalculator()
1890 # Calculate overall costs
1891 costs = []
1892 for record in usage_data:
1893 cost_data = calculator.calculate_cost_sync(
1894 record["model_name"],
1895 record["prompt_tokens"],
1896 record["completion_tokens"],
1897 )
1898 costs.append({**record, **cost_data})
1900 total_cost = sum(c["total_cost"] for c in costs)
1901 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data)
1902 total_completion_tokens = sum(
1903 r["completion_tokens"] for r in usage_data
1904 )
1906 cost_summary = {
1907 "total_cost": round(total_cost, 6),
1908 "total_tokens": total_prompt_tokens + total_completion_tokens,
1909 "prompt_tokens": total_prompt_tokens,
1910 "completion_tokens": total_completion_tokens,
1911 }
1913 # Group by research_id for per-research costs
1914 research_costs = {}
1915 for record in usage_data:
1916 rid = record["research_id"]
1917 if rid not in research_costs:
1918 research_costs[rid] = []
1919 research_costs[rid].append(record)
1921 # Calculate cost per research
1922 research_summaries = {}
1923 for rid, records in research_costs.items():
1924 research_total = 0
1925 for record in records:
1926 cost_data = calculator.calculate_cost_sync(
1927 record["model_name"],
1928 record["prompt_tokens"],
1929 record["completion_tokens"],
1930 )
1931 research_total += cost_data["total_cost"]
1932 research_summaries[rid] = {
1933 "total_cost": round(research_total, 6)
1934 }
1936 # Top expensive research sessions
1937 top_expensive = sorted(
1938 [
1939 (rid, data["total_cost"])
1940 for rid, data in research_summaries.items()
1941 ],
1942 key=lambda x: x[1],
1943 reverse=True,
1944 )[:10]
1946 return jsonify(
1947 {
1948 "status": "success",
1949 "period": period,
1950 "overview": cost_summary,
1951 "top_expensive_research": [
1952 {"research_id": rid, "total_cost": cost}
1953 for rid, cost in top_expensive
1954 ],
1955 "research_count": len(research_summaries),
1956 }
1957 )
1959 except Exception:
1960 logger.exception("Error getting cost analytics")
1961 # Return a more graceful error response
1962 return (
1963 jsonify(
1964 {
1965 "status": "success",
1966 "period": period,
1967 "overview": {
1968 "total_cost": 0.0,
1969 "total_tokens": 0,
1970 "prompt_tokens": 0,
1971 "completion_tokens": 0,
1972 },
1973 "top_expensive_research": [],
1974 "research_count": 0,
1975 "error": "Cost analytics temporarily unavailable",
1976 }
1977 ),
1978 200,
1979 ) # Return 200 to avoid breaking the UI
1982@metrics_bp.route("/links")
1983@login_required
1984def link_analytics():
1985 """Display link analytics page."""
1986 return render_template_with_defaults("pages/link_analytics.html")
1989@metrics_bp.route("/api/link-analytics")
1990@login_required
1991def api_link_analytics():
1992 """Get link analytics data."""
1993 try:
1994 username = flask_session.get("username")
1995 if not username:
1996 return jsonify(
1997 {"status": "error", "message": "No user session found"}
1998 ), 401
2000 period = request.args.get("period", "30d")
2002 # Get link analytics data
2003 link_data = get_link_analytics(period, username)
2005 return jsonify(
2006 {
2007 "status": "success",
2008 "data": link_data["link_analytics"],
2009 "period": period,
2010 }
2011 )
2013 except Exception:
2014 logger.exception("Error getting link analytics")
2015 return (
2016 jsonify(
2017 {
2018 "status": "error",
2019 "message": "An internal error occurred. Please try again later.",
2020 }
2021 ),
2022 500,
2023 )
2026@metrics_bp.route("/api/domain-classifications", methods=["GET"])
2027@login_required
2028def api_get_domain_classifications():
2029 """Get all domain classifications."""
2030 try:
2031 username = flask_session.get("username")
2032 if not username:
2033 return jsonify(
2034 {"status": "error", "message": "No user session found"}
2035 ), 401
2037 classifier = DomainClassifier(username)
2038 classifications = classifier.get_all_classifications()
2040 return jsonify(
2041 {
2042 "status": "success",
2043 "classifications": [c.to_dict() for c in classifications],
2044 "total": len(classifications),
2045 }
2046 )
2048 except Exception:
2049 logger.exception("Error getting domain classifications")
2050 return jsonify(
2051 {"status": "error", "message": "Failed to retrieve classifications"}
2052 ), 500
2055@metrics_bp.route("/api/domain-classifications/summary", methods=["GET"])
2056@login_required
2057def api_get_classifications_summary():
2058 """Get summary of domain classifications by category."""
2059 try:
2060 username = flask_session.get("username")
2061 if not username:
2062 return jsonify(
2063 {"status": "error", "message": "No user session found"}
2064 ), 401
2066 classifier = DomainClassifier(username)
2067 summary = classifier.get_categories_summary()
2069 return jsonify({"status": "success", "summary": summary})
2071 except Exception:
2072 logger.exception("Error getting classifications summary")
2073 return jsonify(
2074 {"status": "error", "message": "Failed to retrieve summary"}
2075 ), 500
2078@metrics_bp.route("/api/domain-classifications/classify", methods=["POST"])
2079@login_required
2080def api_classify_domains():
2081 """Trigger classification of a specific domain or batch classification."""
2082 try:
2083 username = flask_session.get("username")
2084 if not username:
2085 return jsonify(
2086 {"status": "error", "message": "No user session found"}
2087 ), 401
2089 data = request.get_json() or {}
2090 domain = data.get("domain")
2091 force_update = data.get("force_update", False)
2092 batch_mode = data.get("batch", False)
2094 # Get settings snapshot for LLM configuration
2095 from ..services.settings_manager import SettingsManager
2096 from ...database.session_context import get_user_db_session
2098 with get_user_db_session(username) as db_session:
2099 settings_manager = SettingsManager(db_session=db_session)
2100 settings_snapshot = settings_manager.get_all_settings()
2102 classifier = DomainClassifier(
2103 username, settings_snapshot=settings_snapshot
2104 )
2106 if domain and not batch_mode:
2107 # Classify single domain
2108 logger.info(f"Classifying single domain: {domain}")
2109 classification = classifier.classify_domain(domain, force_update)
2110 if classification:
2111 return jsonify(
2112 {
2113 "status": "success",
2114 "classification": classification.to_dict(),
2115 }
2116 )
2117 else:
2118 return jsonify(
2119 {
2120 "status": "error",
2121 "message": f"Failed to classify domain: {domain}",
2122 }
2123 ), 400
2124 elif batch_mode:
2125 # Batch classification - this should really be a background task
2126 # For now, we'll just return immediately and let the frontend poll
2127 logger.info("Starting batch classification of all domains")
2128 results = classifier.classify_all_domains(force_update)
2130 return jsonify({"status": "success", "results": results})
2131 else:
2132 return jsonify(
2133 {
2134 "status": "error",
2135 "message": "Must provide either 'domain' or set 'batch': true",
2136 }
2137 ), 400
2139 except Exception:
2140 logger.exception("Error classifying domains")
2141 return jsonify(
2142 {"status": "error", "message": "Failed to classify domains"}
2143 ), 500
2146@metrics_bp.route("/api/domain-classifications/progress", methods=["GET"])
2147@login_required
2148def api_classification_progress():
2149 """Get progress of domain classification task."""
2150 try:
2151 username = flask_session.get("username")
2152 if not username:
2153 return jsonify(
2154 {"status": "error", "message": "No user session found"}
2155 ), 401
2157 # Get counts of classified vs unclassified domains
2158 with get_user_db_session(username) as session:
2159 # Count total unique domains
2160 from urllib.parse import urlparse
2162 resources = session.query(ResearchResource.url).distinct().all()
2163 domains = set()
2164 all_domains = []
2166 for (url,) in resources:
2167 if url:
2168 try:
2169 parsed = urlparse(url)
2170 domain = parsed.netloc.lower()
2171 if domain.startswith("www."):
2172 domain = domain[4:]
2173 if domain:
2174 domains.add(domain)
2175 except:
2176 continue
2178 all_domains = sorted(list(domains))
2179 total_domains = len(domains)
2181 # Count classified domains
2182 classified_count = session.query(DomainClassification).count()
2184 return jsonify(
2185 {
2186 "status": "success",
2187 "progress": {
2188 "total_domains": total_domains,
2189 "classified": classified_count,
2190 "unclassified": total_domains - classified_count,
2191 "percentage": round(
2192 (classified_count / total_domains * 100)
2193 if total_domains > 0
2194 else 0,
2195 1,
2196 ),
2197 "all_domains": all_domains, # Return all domains for classification
2198 },
2199 }
2200 )
2202 except Exception:
2203 logger.exception("Error getting classification progress")
2204 return jsonify(
2205 {"status": "error", "message": "Failed to retrieve progress"}
2206 ), 500