Coverage for src / local_deep_research / web / routes / metrics_routes.py: 61%
691 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
1"""Routes for metrics dashboard."""
3from datetime import datetime, timedelta, UTC
5from flask import Blueprint, jsonify, request, session as flask_session
6from loguru import logger
7from sqlalchemy import case, func
9from ...database.models import (
10 RateLimitAttempt,
11 RateLimitEstimate,
12 Research,
13 ResearchRating,
14 ResearchResource,
15 ResearchStrategy,
16 TokenUsage,
17)
18from ...domain_classifier import DomainClassifier, DomainClassification
19from ...database.session_context import get_user_db_session
20from ...metrics import TokenCounter
21from ...metrics.query_utils import get_time_filter_condition
22from ...metrics.search_tracker import get_search_tracker
23from ...web_search_engines.rate_limiting import get_tracker
24from ..auth.decorators import login_required
25from ..utils.templates import render_template_with_defaults
27# Create a Blueprint for metrics
28metrics_bp = Blueprint("metrics", __name__, url_prefix="/metrics")
31def get_rating_analytics(period="30d", research_mode="all", username=None):
32 """Get rating analytics for the specified period and research mode."""
33 try:
34 if not username: 34 ↛ 35line 34 didn't jump to line 35 because the condition on line 34 was never true
35 username = flask_session.get("username")
37 if not username: 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true
38 return {
39 "rating_analytics": {
40 "avg_rating": None,
41 "total_ratings": 0,
42 "rating_distribution": {},
43 "satisfaction_stats": {
44 "very_satisfied": 0,
45 "satisfied": 0,
46 "neutral": 0,
47 "dissatisfied": 0,
48 "very_dissatisfied": 0,
49 },
50 "error": "No user session",
51 }
52 }
54 # Calculate date range
55 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None}
56 days = days_map.get(period, 30)
58 with get_user_db_session(username) as session:
59 query = session.query(ResearchRating)
61 # Apply time filter
62 if days: 62 ↛ 67line 62 didn't jump to line 67 because the condition on line 62 was always true
63 cutoff_date = datetime.now(UTC) - timedelta(days=days)
64 query = query.filter(ResearchRating.created_at >= cutoff_date)
66 # Get all ratings
67 ratings = query.all()
69 if not ratings: 69 ↛ 86line 69 didn't jump to line 86 because the condition on line 69 was always true
70 return {
71 "rating_analytics": {
72 "avg_rating": None,
73 "total_ratings": 0,
74 "rating_distribution": {},
75 "satisfaction_stats": {
76 "very_satisfied": 0,
77 "satisfied": 0,
78 "neutral": 0,
79 "dissatisfied": 0,
80 "very_dissatisfied": 0,
81 },
82 }
83 }
85 # Calculate statistics
86 rating_values = [r.rating for r in ratings]
87 avg_rating = sum(rating_values) / len(rating_values)
89 # Rating distribution
90 rating_counts = {}
91 for i in range(1, 6):
92 rating_counts[str(i)] = rating_values.count(i)
94 # Satisfaction categories
95 satisfaction_stats = {
96 "very_satisfied": rating_values.count(5),
97 "satisfied": rating_values.count(4),
98 "neutral": rating_values.count(3),
99 "dissatisfied": rating_values.count(2),
100 "very_dissatisfied": rating_values.count(1),
101 }
103 return {
104 "rating_analytics": {
105 "avg_rating": round(avg_rating, 1),
106 "total_ratings": len(ratings),
107 "rating_distribution": rating_counts,
108 "satisfaction_stats": satisfaction_stats,
109 }
110 }
112 except Exception:
113 logger.exception("Error getting rating analytics")
114 return {
115 "rating_analytics": {
116 "avg_rating": None,
117 "total_ratings": 0,
118 "rating_distribution": {},
119 "satisfaction_stats": {
120 "very_satisfied": 0,
121 "satisfied": 0,
122 "neutral": 0,
123 "dissatisfied": 0,
124 "very_dissatisfied": 0,
125 },
126 }
127 }
130def get_link_analytics(period="30d", username=None):
131 """Get link analytics from research resources."""
132 try:
133 if not username:
134 username = flask_session.get("username")
136 if not username: 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true
137 return {
138 "link_analytics": {
139 "top_domains": [],
140 "total_unique_domains": 0,
141 "avg_links_per_research": 0,
142 "domain_distribution": {},
143 "source_type_analysis": {},
144 "academic_vs_general": {},
145 "total_links": 0,
146 "error": "No user session",
147 }
148 }
150 # Calculate date range
151 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None}
152 days = days_map.get(period, 30)
154 with get_user_db_session(username) as session:
155 # Base query
156 query = session.query(ResearchResource)
158 # Apply time filter
159 if days: 159 ↛ 166line 159 didn't jump to line 166 because the condition on line 159 was always true
160 cutoff_date = datetime.now(UTC) - timedelta(days=days)
161 query = query.filter(
162 ResearchResource.created_at >= cutoff_date.isoformat()
163 )
165 # Get all resources
166 resources = query.all()
168 if not resources: 168 ↛ 169line 168 didn't jump to line 169 because the condition on line 168 was never true
169 return {
170 "link_analytics": {
171 "top_domains": [],
172 "total_unique_domains": 0,
173 "avg_links_per_research": 0,
174 "domain_distribution": {},
175 "source_type_analysis": {},
176 "academic_vs_general": {},
177 "total_links": 0,
178 }
179 }
181 # Extract domains from URLs
182 from urllib.parse import urlparse
183 from ...domain_classifier.classifier import DomainClassifier
185 domain_counts = {}
186 domain_researches = {} # Track which researches used each domain
187 source_types = {}
188 temporal_data = {} # Track links over time
189 domain_connections = {} # Track domain co-occurrences
191 # Generic category counting from LLM classifications
192 category_counts = {}
194 # Initialize domain classifier for LLM-based categorization
195 domain_classifier = DomainClassifier(username=username)
196 quality_metrics = {
197 "with_title": 0,
198 "with_preview": 0,
199 "with_both": 0,
200 "total": 0,
201 }
203 for resource in resources:
204 if resource.url: 204 ↛ 203line 204 didn't jump to line 203 because the condition on line 204 was always true
205 try:
206 parsed = urlparse(resource.url)
207 domain = parsed.netloc.lower()
208 # Remove www. prefix
209 if domain.startswith("www."): 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true
210 domain = domain[4:]
212 # Count domains
213 domain_counts[domain] = domain_counts.get(domain, 0) + 1
215 # Track research IDs for each domain
216 if domain not in domain_researches: 216 ↛ 218line 216 didn't jump to line 218 because the condition on line 216 was always true
217 domain_researches[domain] = set()
218 domain_researches[domain].add(resource.research_id)
220 # Track temporal data (daily counts)
221 if resource.created_at: 221 ↛ 230line 221 didn't jump to line 230 because the condition on line 221 was always true
222 date_str = resource.created_at[
223 :10
224 ] # Extract YYYY-MM-DD
225 temporal_data[date_str] = (
226 temporal_data.get(date_str, 0) + 1
227 )
229 # Count categories from LLM classification
230 classification = domain_classifier.get_classification(
231 domain
232 )
233 if classification: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true
234 category = classification.category
235 category_counts[category] = (
236 category_counts.get(category, 0) + 1
237 )
238 else:
239 category_counts["Unclassified"] = (
240 category_counts.get("Unclassified", 0) + 1
241 )
243 # Track source type from metadata if available
244 if resource.source_type: 244 ↛ 250line 244 didn't jump to line 250 because the condition on line 244 was always true
245 source_types[resource.source_type] = (
246 source_types.get(resource.source_type, 0) + 1
247 )
249 # Track quality metrics
250 quality_metrics["total"] += 1
251 if resource.title: 251 ↛ 253line 251 didn't jump to line 253 because the condition on line 251 was always true
252 quality_metrics["with_title"] += 1
253 if resource.content_preview: 253 ↛ 255line 253 didn't jump to line 255 because the condition on line 253 was always true
254 quality_metrics["with_preview"] += 1
255 if resource.title and resource.content_preview: 255 ↛ 259line 255 didn't jump to line 259 because the condition on line 255 was always true
256 quality_metrics["with_both"] += 1
258 # Track domain co-occurrences for network visualization
259 research_id = resource.research_id
260 if research_id not in domain_connections:
261 domain_connections[research_id] = []
262 domain_connections[research_id].append(domain)
264 except Exception as e:
265 logger.warning(f"Error parsing URL {resource.url}: {e}")
267 # Sort domains by count and get top 10
268 sorted_domains = sorted(
269 domain_counts.items(), key=lambda x: x[1], reverse=True
270 )
271 top_10_domains = sorted_domains[:10]
273 # Calculate domain distribution (top domains vs others)
274 top_10_count = sum(count for _, count in top_10_domains)
275 others_count = len(resources) - top_10_count
277 # Get unique research IDs to calculate average
278 unique_research_ids = set(r.research_id for r in resources)
279 avg_links = (
280 len(resources) / len(unique_research_ids)
281 if unique_research_ids
282 else 0
283 )
285 # Prepare temporal trend data (sorted by date)
286 temporal_trend = sorted(
287 [
288 {"date": date, "count": count}
289 for date, count in temporal_data.items()
290 ],
291 key=lambda x: x["date"],
292 )
294 # Get most recent research for each top domain and classifications
295 domain_recent_research = {}
296 domain_classifications = {}
297 with get_user_db_session(username) as session:
298 from ...database.models import Research
300 # Get classifications for all domains
301 all_classifications = session.query(DomainClassification).all()
302 for classification in all_classifications:
303 domain_classifications[classification.domain] = {
304 "category": classification.category,
305 "subcategory": classification.subcategory,
306 "confidence": classification.confidence,
307 }
309 for domain, _ in top_10_domains:
310 if domain in domain_researches: 310 ↛ 309line 310 didn't jump to line 309 because the condition on line 310 was always true
311 research_ids = list(domain_researches[domain])[
312 :3
313 ] # Get up to 3 recent researches
314 researches = (
315 session.query(Research)
316 .filter(Research.id.in_(research_ids))
317 .all()
318 )
319 domain_recent_research[domain] = [
320 {
321 "id": r.id,
322 "query": r.query[:50]
323 if r.query
324 else "Research",
325 }
326 for r in researches
327 ]
329 return {
330 "link_analytics": {
331 "top_domains": [
332 {
333 "domain": domain,
334 "count": count,
335 "percentage": round(
336 count / len(resources) * 100, 1
337 ),
338 "research_count": len(
339 domain_researches.get(domain, set())
340 ),
341 "recent_researches": domain_recent_research.get(
342 domain, []
343 ),
344 "classification": domain_classifications.get(
345 domain, None
346 ),
347 }
348 for domain, count in top_10_domains
349 ],
350 "total_unique_domains": len(domain_counts),
351 "avg_links_per_research": round(avg_links, 1),
352 "domain_distribution": {
353 "top_10": top_10_count,
354 "others": others_count,
355 },
356 "source_type_analysis": source_types,
357 "category_distribution": category_counts,
358 # Generic pie chart data - use whatever LLM classifier outputs
359 "domain_categories": category_counts,
360 "total_links": len(resources),
361 "total_researches": len(unique_research_ids),
362 "temporal_trend": temporal_trend,
363 "domain_metrics": {
364 domain: {
365 "usage_count": count,
366 "usage_percentage": round(
367 count / len(resources) * 100, 1
368 ),
369 "research_diversity": len(
370 domain_researches.get(domain, set())
371 ),
372 "frequency_rank": rank + 1,
373 }
374 for rank, (domain, count) in enumerate(top_10_domains)
375 },
376 }
377 }
379 except Exception:
380 logger.exception("Error getting link analytics")
381 return {
382 "link_analytics": {
383 "top_domains": [],
384 "total_unique_domains": 0,
385 "avg_links_per_research": 0,
386 "domain_distribution": {},
387 "source_type_analysis": {},
388 "academic_vs_general": {},
389 "total_links": 0,
390 "error": "Failed to retrieve link analytics",
391 }
392 }
395def get_available_strategies():
396 """Get list of all available search strategies from the search system."""
397 # This list comes from the AdvancedSearchSystem.__init__ method
398 strategies = [
399 {"name": "standard", "description": "Basic iterative search strategy"},
400 {
401 "name": "iterdrag",
402 "description": "Iterative Dense Retrieval Augmented Generation",
403 },
404 {
405 "name": "source-based",
406 "description": "Focuses on finding and extracting from sources",
407 },
408 {
409 "name": "parallel",
410 "description": "Runs multiple search queries in parallel",
411 },
412 {"name": "rapid", "description": "Quick single-pass search"},
413 {
414 "name": "recursive",
415 "description": "Recursive decomposition of complex queries",
416 },
417 {
418 "name": "iterative",
419 "description": "Loop-based reasoning with persistent knowledge",
420 },
421 {"name": "adaptive", "description": "Adaptive step-by-step reasoning"},
422 {
423 "name": "smart",
424 "description": "Automatically chooses best strategy based on query",
425 },
426 {
427 "name": "browsecomp",
428 "description": "Optimized for BrowseComp-style puzzle queries",
429 },
430 {
431 "name": "evidence",
432 "description": "Enhanced evidence-based verification with improved candidate discovery",
433 },
434 {
435 "name": "constrained",
436 "description": "Progressive constraint-based search that narrows candidates step by step",
437 },
438 {
439 "name": "parallel-constrained",
440 "description": "Parallel constraint-based search with combined constraint execution",
441 },
442 {
443 "name": "early-stop-constrained",
444 "description": "Parallel constraint search with immediate evaluation and early stopping at 99% confidence",
445 },
446 {
447 "name": "smart-query",
448 "description": "Smart query generation strategy",
449 },
450 {
451 "name": "dual-confidence",
452 "description": "Dual confidence scoring with positive/negative/uncertainty",
453 },
454 {
455 "name": "dual-confidence-with-rejection",
456 "description": "Dual confidence with early rejection of poor candidates",
457 },
458 {
459 "name": "concurrent-dual-confidence",
460 "description": "Concurrent search & evaluation with progressive constraint relaxation",
461 },
462 {
463 "name": "modular",
464 "description": "Modular architecture using constraint checking and candidate exploration modules",
465 },
466 {
467 "name": "modular-parallel",
468 "description": "Modular strategy with parallel exploration",
469 },
470 {
471 "name": "focused-iteration",
472 "description": "Focused iteration strategy optimized for accuracy",
473 },
474 {
475 "name": "browsecomp-entity",
476 "description": "Entity-focused search for BrowseComp questions with knowledge graph building",
477 },
478 ]
479 return strategies
482def get_strategy_analytics(period="30d", username=None):
483 """Get strategy usage analytics for the specified period."""
484 try:
485 if not username: 485 ↛ 486line 485 didn't jump to line 486 because the condition on line 485 was never true
486 username = flask_session.get("username")
488 if not username: 488 ↛ 489line 488 didn't jump to line 489 because the condition on line 488 was never true
489 return {
490 "strategy_analytics": {
491 "total_research_with_strategy": 0,
492 "total_research": 0,
493 "most_popular_strategy": None,
494 "strategy_usage": [],
495 "strategy_distribution": {},
496 "available_strategies": get_available_strategies(),
497 "error": "No user session",
498 }
499 }
501 # Calculate date range
502 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None}
503 days = days_map.get(period, 30)
505 with get_user_db_session(username) as session:
506 # Check if we have any ResearchStrategy records
507 strategy_count = session.query(ResearchStrategy).count()
509 if strategy_count == 0: 509 ↛ 524line 509 didn't jump to line 524 because the condition on line 509 was always true
510 logger.warning("No research strategies found in database")
511 return {
512 "strategy_analytics": {
513 "total_research_with_strategy": 0,
514 "total_research": 0,
515 "most_popular_strategy": None,
516 "strategy_usage": [],
517 "strategy_distribution": {},
518 "available_strategies": get_available_strategies(),
519 "message": "Strategy tracking not yet available - run a research to start tracking",
520 }
521 }
523 # Base query for strategy usage (no JOIN needed since we just want strategy counts)
524 query = session.query(
525 ResearchStrategy.strategy_name,
526 func.count(ResearchStrategy.id).label("usage_count"),
527 )
529 # Apply time filter if specified
530 if days:
531 cutoff_date = datetime.now(UTC) - timedelta(days=days)
532 query = query.filter(ResearchStrategy.created_at >= cutoff_date)
534 # Group by strategy and order by usage
535 strategy_results = (
536 query.group_by(ResearchStrategy.strategy_name)
537 .order_by(func.count(ResearchStrategy.id).desc())
538 .all()
539 )
541 # Get total strategy count for percentage calculation
542 total_query = session.query(ResearchStrategy)
543 if days:
544 total_query = total_query.filter(
545 ResearchStrategy.created_at >= cutoff_date
546 )
547 total_research = total_query.count()
549 # Format strategy data
550 strategy_usage = []
551 strategy_distribution = {}
553 for strategy_name, usage_count in strategy_results:
554 percentage = (
555 (usage_count / total_research * 100)
556 if total_research > 0
557 else 0
558 )
559 strategy_usage.append(
560 {
561 "strategy": strategy_name,
562 "count": usage_count,
563 "percentage": round(percentage, 1),
564 }
565 )
566 strategy_distribution[strategy_name] = usage_count
568 # Find most popular strategy
569 most_popular = (
570 strategy_usage[0]["strategy"] if strategy_usage else None
571 )
573 return {
574 "strategy_analytics": {
575 "total_research_with_strategy": sum(
576 item["count"] for item in strategy_usage
577 ),
578 "total_research": total_research,
579 "most_popular_strategy": most_popular,
580 "strategy_usage": strategy_usage,
581 "strategy_distribution": strategy_distribution,
582 "available_strategies": get_available_strategies(),
583 }
584 }
586 except Exception:
587 logger.exception("Error getting strategy analytics")
588 return {
589 "strategy_analytics": {
590 "total_research_with_strategy": 0,
591 "total_research": 0,
592 "most_popular_strategy": None,
593 "strategy_usage": [],
594 "strategy_distribution": {},
595 "available_strategies": get_available_strategies(),
596 "error": "Failed to retrieve strategy data",
597 }
598 }
601def get_rate_limiting_analytics(period="30d", username=None):
602 """Get rate limiting analytics for the specified period."""
603 try:
604 if not username: 604 ↛ 605line 604 didn't jump to line 605 because the condition on line 604 was never true
605 username = flask_session.get("username")
607 if not username: 607 ↛ 608line 607 didn't jump to line 608 because the condition on line 607 was never true
608 return {
609 "rate_limiting": {
610 "total_attempts": 0,
611 "successful_attempts": 0,
612 "failed_attempts": 0,
613 "success_rate": 0,
614 "rate_limit_events": 0,
615 "avg_wait_time": 0,
616 "avg_successful_wait": 0,
617 "tracked_engines": 0,
618 "engine_stats": [],
619 "total_engines_tracked": 0,
620 "healthy_engines": 0,
621 "degraded_engines": 0,
622 "poor_engines": 0,
623 "error": "No user session",
624 }
625 }
627 # Calculate date range for timestamp filtering
628 import time
630 if period == "7d":
631 cutoff_time = time.time() - (7 * 24 * 3600)
632 elif period == "30d": 632 ↛ 634line 632 didn't jump to line 634 because the condition on line 632 was always true
633 cutoff_time = time.time() - (30 * 24 * 3600)
634 elif period == "3m":
635 cutoff_time = time.time() - (90 * 24 * 3600)
636 elif period == "1y":
637 cutoff_time = time.time() - (365 * 24 * 3600)
638 else: # all
639 cutoff_time = 0
641 with get_user_db_session(username) as session:
642 # Get rate limit attempts
643 rate_limit_query = session.query(RateLimitAttempt)
645 # Apply time filter
646 if cutoff_time > 0: 646 ↛ 652line 646 didn't jump to line 652 because the condition on line 646 was always true
647 rate_limit_query = rate_limit_query.filter(
648 RateLimitAttempt.timestamp >= cutoff_time
649 )
651 # Get rate limit statistics
652 total_attempts = rate_limit_query.count()
653 successful_attempts = rate_limit_query.filter(
654 RateLimitAttempt.success
655 ).count()
656 failed_attempts = total_attempts - successful_attempts
658 # Count rate limiting events (failures with RateLimitError)
659 rate_limit_events = rate_limit_query.filter(
660 ~RateLimitAttempt.success,
661 RateLimitAttempt.error_type == "RateLimitError",
662 ).count()
664 logger.info(
665 f"Rate limit attempts in database: total={total_attempts}, successful={successful_attempts}"
666 )
668 # Get all attempts for detailed calculations
669 attempts = rate_limit_query.all()
671 # Calculate average wait times
672 if attempts: 672 ↛ 673line 672 didn't jump to line 673 because the condition on line 672 was never true
673 avg_wait_time = sum(a.wait_time for a in attempts) / len(
674 attempts
675 )
676 successful_wait_times = [
677 a.wait_time for a in attempts if a.success
678 ]
679 avg_successful_wait = (
680 sum(successful_wait_times) / len(successful_wait_times)
681 if successful_wait_times
682 else 0
683 )
684 else:
685 avg_wait_time = 0
686 avg_successful_wait = 0
688 # Get tracked engines - count distinct engine types from attempts
689 tracked_engines_query = session.query(
690 func.count(func.distinct(RateLimitAttempt.engine_type))
691 )
692 if cutoff_time > 0: 692 ↛ 696line 692 didn't jump to line 696 because the condition on line 692 was always true
693 tracked_engines_query = tracked_engines_query.filter(
694 RateLimitAttempt.timestamp >= cutoff_time
695 )
696 tracked_engines = tracked_engines_query.scalar() or 0
698 # Get engine-specific stats from attempts
699 engine_stats = []
701 # Get distinct engine types from attempts
702 engine_types_query = session.query(
703 RateLimitAttempt.engine_type
704 ).distinct()
705 if cutoff_time > 0: 705 ↛ 709line 705 didn't jump to line 709 because the condition on line 705 was always true
706 engine_types_query = engine_types_query.filter(
707 RateLimitAttempt.timestamp >= cutoff_time
708 )
709 engine_types = [row.engine_type for row in engine_types_query.all()]
711 # Preload estimates for relevant engines to avoid N+1 queries
712 estimates_by_engine = {}
713 if engine_types: 713 ↛ 714line 713 didn't jump to line 714 because the condition on line 713 was never true
714 all_estimates = (
715 session.query(RateLimitEstimate)
716 .filter(RateLimitEstimate.engine_type.in_(engine_types))
717 .all()
718 )
719 estimates_by_engine = {e.engine_type: e for e in all_estimates}
721 for engine_type in engine_types: 721 ↛ 722line 721 didn't jump to line 722 because the loop on line 721 never started
722 engine_attempts_list = [
723 a for a in attempts if a.engine_type == engine_type
724 ]
725 engine_attempts = len(engine_attempts_list)
726 engine_success = len(
727 [a for a in engine_attempts_list if a.success]
728 )
730 # Get estimate from preloaded dict
731 estimate = estimates_by_engine.get(engine_type)
733 # Calculate recent success rate
734 recent_success_rate = (
735 (engine_success / engine_attempts * 100)
736 if engine_attempts > 0
737 else 0
738 )
740 # Determine status based on success rate
741 if estimate:
742 status = (
743 "healthy"
744 if estimate.success_rate > 0.8
745 else "degraded"
746 if estimate.success_rate > 0.5
747 else "poor"
748 )
749 else:
750 status = (
751 "healthy"
752 if recent_success_rate > 80
753 else "degraded"
754 if recent_success_rate > 50
755 else "poor"
756 )
758 engine_stat = {
759 "engine": engine_type,
760 "base_wait": estimate.base_wait_seconds
761 if estimate
762 else 0.0,
763 "base_wait_seconds": round(
764 estimate.base_wait_seconds if estimate else 0.0, 2
765 ),
766 "min_wait_seconds": round(
767 estimate.min_wait_seconds if estimate else 0.0, 2
768 ),
769 "max_wait_seconds": round(
770 estimate.max_wait_seconds if estimate else 0.0, 2
771 ),
772 "success_rate": round(estimate.success_rate * 100, 1)
773 if estimate
774 else recent_success_rate,
775 "total_attempts": estimate.total_attempts
776 if estimate
777 else engine_attempts,
778 "recent_attempts": engine_attempts,
779 "recent_success_rate": round(recent_success_rate, 1),
780 "attempts": engine_attempts,
781 "status": status,
782 }
784 if estimate:
785 from datetime import datetime
787 engine_stat["last_updated"] = datetime.fromtimestamp(
788 estimate.last_updated, UTC
789 ).isoformat() # ISO format already includes timezone
790 else:
791 engine_stat["last_updated"] = "Never"
793 engine_stats.append(engine_stat)
795 logger.info(
796 f"Tracked engines: {tracked_engines}, engine_stats: {engine_stats}"
797 )
799 result = {
800 "rate_limiting": {
801 "total_attempts": total_attempts,
802 "successful_attempts": successful_attempts,
803 "failed_attempts": failed_attempts,
804 "success_rate": (successful_attempts / total_attempts * 100)
805 if total_attempts > 0
806 else 0,
807 "rate_limit_events": rate_limit_events,
808 "avg_wait_time": round(float(avg_wait_time), 2),
809 "avg_successful_wait": round(float(avg_successful_wait), 2),
810 "tracked_engines": tracked_engines,
811 "engine_stats": engine_stats,
812 "total_engines_tracked": tracked_engines,
813 "healthy_engines": len(
814 [s for s in engine_stats if s["status"] == "healthy"]
815 ),
816 "degraded_engines": len(
817 [s for s in engine_stats if s["status"] == "degraded"]
818 ),
819 "poor_engines": len(
820 [s for s in engine_stats if s["status"] == "poor"]
821 ),
822 }
823 }
825 logger.info(
826 f"DEBUG: Returning rate_limiting_analytics result: {result}"
827 )
828 return result
830 except Exception:
831 logger.exception("Error getting rate limiting analytics")
832 return {
833 "rate_limiting": {
834 "total_attempts": 0,
835 "successful_attempts": 0,
836 "failed_attempts": 0,
837 "success_rate": 0,
838 "rate_limit_events": 0,
839 "avg_wait_time": 0,
840 "avg_successful_wait": 0,
841 "tracked_engines": 0,
842 "engine_stats": [],
843 "total_engines_tracked": 0,
844 "healthy_engines": 0,
845 "degraded_engines": 0,
846 "poor_engines": 0,
847 "error": "An internal error occurred while processing the request.",
848 }
849 }
852@metrics_bp.route("/")
853@login_required
854def metrics_dashboard():
855 """Render the metrics dashboard page."""
856 return render_template_with_defaults("pages/metrics.html")
859@metrics_bp.route("/context-overflow")
860@login_required
861def context_overflow_page():
862 """Context overflow analytics page."""
863 return render_template_with_defaults("pages/context_overflow.html")
866@metrics_bp.route("/api/metrics")
867@login_required
868def api_metrics():
869 """Get overall metrics data."""
870 logger.debug("api_metrics endpoint called")
871 try:
872 # Get username from session
873 username = flask_session.get("username")
874 if not username: 874 ↛ 875line 874 didn't jump to line 875 because the condition on line 874 was never true
875 return jsonify(
876 {"status": "error", "message": "No user session found"}
877 ), 401
879 # Get time period and research mode from query parameters
880 period = request.args.get("period", "30d")
881 research_mode = request.args.get("mode", "all")
883 token_counter = TokenCounter()
884 search_tracker = get_search_tracker()
886 # Get both token and search metrics
887 token_metrics = token_counter.get_overall_metrics(
888 period=period, research_mode=research_mode
889 )
890 search_metrics = search_tracker.get_search_metrics(
891 period=period, research_mode=research_mode
892 )
894 # Get user satisfaction rating data
895 try:
896 with get_user_db_session(username) as session:
897 # Build base query with time filter
898 ratings_query = session.query(ResearchRating)
899 time_condition = get_time_filter_condition(
900 period, ResearchRating.created_at
901 )
902 if time_condition is not None: 902 ↛ 906line 902 didn't jump to line 906 because the condition on line 902 was always true
903 ratings_query = ratings_query.filter(time_condition)
905 # Get average rating
906 avg_rating = ratings_query.with_entities(
907 func.avg(ResearchRating.rating).label("avg_rating")
908 ).scalar()
910 # Get total rating count
911 total_ratings = ratings_query.count()
913 user_satisfaction = {
914 "avg_rating": round(avg_rating, 1) if avg_rating else None,
915 "total_ratings": total_ratings,
916 }
917 except Exception as e:
918 logger.warning(f"Error getting user satisfaction data: {e}")
919 user_satisfaction = {"avg_rating": None, "total_ratings": 0}
921 # Get strategy analytics
922 strategy_data = get_strategy_analytics(period, username)
923 logger.debug(f"strategy_data keys: {list(strategy_data.keys())}")
925 # Get rate limiting analytics
926 rate_limiting_data = get_rate_limiting_analytics(period, username)
927 logger.debug(f"rate_limiting_data: {rate_limiting_data}")
928 logger.debug(
929 f"rate_limiting_data keys: {list(rate_limiting_data.keys())}"
930 )
932 # Combine metrics
933 combined_metrics = {
934 **token_metrics,
935 **search_metrics,
936 **strategy_data,
937 **rate_limiting_data,
938 "user_satisfaction": user_satisfaction,
939 }
941 logger.debug(f"combined_metrics keys: {list(combined_metrics.keys())}")
942 logger.debug(
943 f"combined_metrics['rate_limiting']: {combined_metrics.get('rate_limiting', 'NOT FOUND')}"
944 )
946 return jsonify(
947 {
948 "status": "success",
949 "metrics": combined_metrics,
950 "period": period,
951 "research_mode": research_mode,
952 }
953 )
954 except Exception:
955 logger.exception("Error getting metrics")
956 return (
957 jsonify(
958 {
959 "status": "error",
960 "message": "An internal error occurred. Please try again later.",
961 }
962 ),
963 500,
964 )
967@metrics_bp.route("/api/rate-limiting")
968@login_required
969def api_rate_limiting_metrics():
970 """Get detailed rate limiting metrics."""
971 logger.info("DEBUG: api_rate_limiting_metrics endpoint called")
972 try:
973 username = flask_session.get("username")
974 period = request.args.get("period", "30d")
975 rate_limiting_data = get_rate_limiting_analytics(period, username)
977 return jsonify(
978 {"status": "success", "data": rate_limiting_data, "period": period}
979 )
980 except Exception:
981 logger.exception("Error getting rate limiting metrics")
982 return jsonify(
983 {
984 "status": "error",
985 "message": "Failed to retrieve rate limiting metrics",
986 }
987 ), 500
990@metrics_bp.route("/api/rate-limiting/current")
991@login_required
992def api_current_rate_limits():
993 """Get current rate limit estimates for all engines."""
994 try:
995 tracker = get_tracker()
996 stats = tracker.get_stats()
998 current_limits = []
999 for stat in stats:
1000 (
1001 engine_type,
1002 base_wait,
1003 min_wait,
1004 max_wait,
1005 last_updated,
1006 total_attempts,
1007 success_rate,
1008 ) = stat
1009 current_limits.append(
1010 {
1011 "engine_type": engine_type,
1012 "base_wait_seconds": round(base_wait, 2),
1013 "min_wait_seconds": round(min_wait, 2),
1014 "max_wait_seconds": round(max_wait, 2),
1015 "success_rate": round(success_rate * 100, 1),
1016 "total_attempts": total_attempts,
1017 "last_updated": datetime.fromtimestamp(
1018 last_updated, UTC
1019 ).isoformat(), # ISO format already includes timezone
1020 "status": "healthy"
1021 if success_rate > 0.8
1022 else "degraded"
1023 if success_rate > 0.5
1024 else "poor",
1025 }
1026 )
1028 return jsonify(
1029 {
1030 "status": "success",
1031 "current_limits": current_limits,
1032 "timestamp": datetime.now(UTC).isoformat(),
1033 }
1034 )
1035 except Exception:
1036 logger.exception("Error getting current rate limits")
1037 return jsonify(
1038 {
1039 "status": "error",
1040 "message": "Failed to retrieve current rate limits",
1041 }
1042 ), 500
1045@metrics_bp.route("/api/metrics/research/<string:research_id>/links")
1046@login_required
1047def api_research_link_metrics(research_id):
1048 """Get link analytics for a specific research."""
1049 try:
1050 username = flask_session.get("username")
1051 if not username: 1051 ↛ 1052line 1051 didn't jump to line 1052 because the condition on line 1051 was never true
1052 return jsonify(
1053 {"status": "error", "message": "No user session found"}
1054 ), 401
1056 with get_user_db_session(username) as session:
1057 # Get all resources for this specific research
1058 resources = (
1059 session.query(ResearchResource)
1060 .filter(ResearchResource.research_id == research_id)
1061 .all()
1062 )
1064 if not resources: 1064 ↛ 1080line 1064 didn't jump to line 1080 because the condition on line 1064 was always true
1065 return jsonify(
1066 {
1067 "status": "success",
1068 "data": {
1069 "total_links": 0,
1070 "unique_domains": 0,
1071 "domains": [],
1072 "category_distribution": {},
1073 "domain_categories": {},
1074 "resources": [],
1075 },
1076 }
1077 )
1079 # Extract domain information
1080 from urllib.parse import urlparse
1081 from ...domain_classifier.classifier import DomainClassifier
1083 domain_counts = {}
1085 # Generic category counting from LLM classifications
1086 category_counts = {}
1088 # Initialize domain classifier for LLM-based categorization
1089 domain_classifier = DomainClassifier(username=username)
1091 for resource in resources:
1092 if resource.url:
1093 try:
1094 parsed = urlparse(resource.url)
1095 domain = parsed.netloc.lower()
1096 if domain.startswith("www."):
1097 domain = domain[4:]
1099 domain_counts[domain] = domain_counts.get(domain, 0) + 1
1101 # Count categories from LLM classification
1102 classification = domain_classifier.get_classification(
1103 domain
1104 )
1105 if classification:
1106 category = classification.category
1107 category_counts[category] = (
1108 category_counts.get(category, 0) + 1
1109 )
1110 else:
1111 category_counts["Unclassified"] = (
1112 category_counts.get("Unclassified", 0) + 1
1113 )
1114 except (AttributeError, KeyError) as e:
1115 logger.debug(f"Error classifying domain {domain}: {e}")
1117 # Sort domains by count
1118 sorted_domains = sorted(
1119 domain_counts.items(), key=lambda x: x[1], reverse=True
1120 )
1122 return jsonify(
1123 {
1124 "status": "success",
1125 "data": {
1126 "total_links": len(resources),
1127 "unique_domains": len(domain_counts),
1128 "domains": [
1129 {
1130 "domain": domain,
1131 "count": count,
1132 "percentage": round(
1133 count / len(resources) * 100, 1
1134 ),
1135 }
1136 for domain, count in sorted_domains[
1137 :20
1138 ] # Top 20 domains
1139 ],
1140 "category_distribution": category_counts,
1141 "domain_categories": category_counts, # Generic categories from LLM
1142 "resources": [
1143 {
1144 "title": r.title or "Untitled",
1145 "url": r.url,
1146 "preview": r.content_preview[:200]
1147 if r.content_preview
1148 else None,
1149 }
1150 for r in resources[:10] # First 10 resources
1151 ],
1152 },
1153 }
1154 )
1156 except Exception:
1157 logger.exception("Error getting research link metrics")
1158 return jsonify(
1159 {"status": "error", "message": "Failed to retrieve link metrics"}
1160 ), 500
1163@metrics_bp.route("/api/metrics/research/<string:research_id>")
1164@login_required
1165def api_research_metrics(research_id):
1166 """Get metrics for a specific research."""
1167 try:
1168 token_counter = TokenCounter()
1169 metrics = token_counter.get_research_metrics(research_id)
1170 return jsonify({"status": "success", "metrics": metrics})
1171 except Exception:
1172 logger.exception("Error getting research metrics")
1173 return (
1174 jsonify(
1175 {
1176 "status": "error",
1177 "message": "An internal error occurred. Please try again later.",
1178 }
1179 ),
1180 500,
1181 )
1184@metrics_bp.route("/api/metrics/research/<string:research_id>/timeline")
1185@login_required
1186def api_research_timeline_metrics(research_id):
1187 """Get timeline metrics for a specific research."""
1188 try:
1189 token_counter = TokenCounter()
1190 timeline_metrics = token_counter.get_research_timeline_metrics(
1191 research_id
1192 )
1193 return jsonify({"status": "success", "metrics": timeline_metrics})
1194 except Exception:
1195 logger.exception("Error getting research timeline metrics")
1196 return (
1197 jsonify(
1198 {
1199 "status": "error",
1200 "message": "An internal error occurred. Please try again later.",
1201 }
1202 ),
1203 500,
1204 )
1207@metrics_bp.route("/api/metrics/research/<string:research_id>/search")
1208@login_required
1209def api_research_search_metrics(research_id):
1210 """Get search metrics for a specific research."""
1211 try:
1212 search_tracker = get_search_tracker()
1213 search_metrics = search_tracker.get_research_search_metrics(research_id)
1214 return jsonify({"status": "success", "metrics": search_metrics})
1215 except Exception:
1216 logger.exception("Error getting research search metrics")
1217 return (
1218 jsonify(
1219 {
1220 "status": "error",
1221 "message": "An internal error occurred. Please try again later.",
1222 }
1223 ),
1224 500,
1225 )
1228@metrics_bp.route("/api/metrics/enhanced")
1229@login_required
1230def api_enhanced_metrics():
1231 """Get enhanced Phase 1 tracking metrics."""
1232 try:
1233 # Get time period and research mode from query parameters
1234 period = request.args.get("period", "30d")
1235 research_mode = request.args.get("mode", "all")
1236 username = flask_session.get("username")
1238 token_counter = TokenCounter()
1239 search_tracker = get_search_tracker()
1241 enhanced_metrics = token_counter.get_enhanced_metrics(
1242 period=period, research_mode=research_mode
1243 )
1245 # Add search time series data for the chart
1246 search_time_series = search_tracker.get_search_time_series(
1247 period=period, research_mode=research_mode
1248 )
1249 enhanced_metrics["search_time_series"] = search_time_series
1251 # Add rating analytics
1252 rating_analytics = get_rating_analytics(period, research_mode, username)
1253 enhanced_metrics.update(rating_analytics)
1255 return jsonify(
1256 {
1257 "status": "success",
1258 "metrics": enhanced_metrics,
1259 "period": period,
1260 "research_mode": research_mode,
1261 }
1262 )
1263 except Exception:
1264 logger.exception("Error getting enhanced metrics")
1265 return (
1266 jsonify(
1267 {
1268 "status": "error",
1269 "message": "An internal error occurred. Please try again later.",
1270 }
1271 ),
1272 500,
1273 )
1276@metrics_bp.route("/api/ratings/<string:research_id>", methods=["GET"])
1277@login_required
1278def api_get_research_rating(research_id):
1279 """Get rating for a specific research session."""
1280 try:
1281 username = flask_session.get("username")
1282 if not username: 1282 ↛ 1283line 1282 didn't jump to line 1283 because the condition on line 1282 was never true
1283 return jsonify(
1284 {"status": "error", "message": "No user session found"}
1285 ), 401
1287 with get_user_db_session(username) as session:
1288 rating = (
1289 session.query(ResearchRating)
1290 .filter_by(research_id=research_id)
1291 .first()
1292 )
1294 if rating:
1295 return jsonify(
1296 {
1297 "status": "success",
1298 "rating": rating.rating,
1299 "created_at": rating.created_at.isoformat(),
1300 "updated_at": rating.updated_at.isoformat(),
1301 }
1302 )
1303 else:
1304 return jsonify({"status": "success", "rating": None})
1306 except Exception:
1307 logger.exception("Error getting research rating")
1308 return (
1309 jsonify(
1310 {
1311 "status": "error",
1312 "message": "An internal error occurred. Please try again later.",
1313 }
1314 ),
1315 500,
1316 )
1319@metrics_bp.route("/api/ratings/<string:research_id>", methods=["POST"])
1320@login_required
1321def api_save_research_rating(research_id):
1322 """Save or update rating for a specific research session."""
1323 try:
1324 username = flask_session.get("username")
1325 if not username: 1325 ↛ 1326line 1325 didn't jump to line 1326 because the condition on line 1325 was never true
1326 return jsonify(
1327 {"status": "error", "message": "No user session found"}
1328 ), 401
1330 data = request.get_json()
1331 rating_value = data.get("rating")
1333 if (
1334 not rating_value
1335 or not isinstance(rating_value, int)
1336 or rating_value < 1
1337 or rating_value > 5
1338 ):
1339 return (
1340 jsonify(
1341 {
1342 "status": "error",
1343 "message": "Rating must be an integer between 1 and 5",
1344 }
1345 ),
1346 400,
1347 )
1349 with get_user_db_session(username) as session:
1350 # Check if rating already exists
1351 existing_rating = (
1352 session.query(ResearchRating)
1353 .filter_by(research_id=research_id)
1354 .first()
1355 )
1357 if existing_rating: 1357 ↛ 1359line 1357 didn't jump to line 1359 because the condition on line 1357 was never true
1358 # Update existing rating
1359 existing_rating.rating = rating_value
1360 existing_rating.updated_at = func.now()
1361 else:
1362 # Create new rating
1363 new_rating = ResearchRating(
1364 research_id=research_id, rating=rating_value
1365 )
1366 session.add(new_rating)
1368 session.commit()
1370 return jsonify(
1371 {
1372 "status": "success",
1373 "message": "Rating saved successfully",
1374 "rating": rating_value,
1375 }
1376 )
1378 except Exception:
1379 logger.exception("Error saving research rating")
1380 return (
1381 jsonify(
1382 {
1383 "status": "error",
1384 "message": "An internal error occurred. Please try again later.",
1385 }
1386 ),
1387 500,
1388 )
1391@metrics_bp.route("/star-reviews")
1392@login_required
1393def star_reviews():
1394 """Display star reviews metrics page."""
1395 return render_template_with_defaults("pages/star_reviews.html")
1398@metrics_bp.route("/costs")
1399@login_required
1400def cost_analytics():
1401 """Display cost analytics page."""
1402 return render_template_with_defaults("pages/cost_analytics.html")
1405@metrics_bp.route("/api/star-reviews")
1406@login_required
1407def api_star_reviews():
1408 """Get star reviews analytics data."""
1409 try:
1410 username = flask_session.get("username")
1411 if not username: 1411 ↛ 1412line 1411 didn't jump to line 1412 because the condition on line 1411 was never true
1412 return jsonify(
1413 {"status": "error", "message": "No user session found"}
1414 ), 401
1416 period = request.args.get("period", "30d")
1418 with get_user_db_session(username) as session:
1419 # Build base query with time filter
1420 base_query = session.query(ResearchRating)
1421 time_condition = get_time_filter_condition(
1422 period, ResearchRating.created_at
1423 )
1424 if time_condition is not None: 1424 ↛ 1428line 1424 didn't jump to line 1428 because the condition on line 1424 was always true
1425 base_query = base_query.filter(time_condition)
1427 # Overall rating statistics
1428 overall_stats = session.query(
1429 func.avg(ResearchRating.rating).label("avg_rating"),
1430 func.count(ResearchRating.rating).label("total_ratings"),
1431 func.sum(case((ResearchRating.rating == 5, 1), else_=0)).label(
1432 "five_star"
1433 ),
1434 func.sum(case((ResearchRating.rating == 4, 1), else_=0)).label(
1435 "four_star"
1436 ),
1437 func.sum(case((ResearchRating.rating == 3, 1), else_=0)).label(
1438 "three_star"
1439 ),
1440 func.sum(case((ResearchRating.rating == 2, 1), else_=0)).label(
1441 "two_star"
1442 ),
1443 func.sum(case((ResearchRating.rating == 1, 1), else_=0)).label(
1444 "one_star"
1445 ),
1446 )
1448 if time_condition is not None: 1448 ↛ 1451line 1448 didn't jump to line 1451 because the condition on line 1448 was always true
1449 overall_stats = overall_stats.filter(time_condition)
1451 overall_stats = overall_stats.first()
1453 # Ratings by LLM model (get from token_usage since Research doesn't have model field)
1454 llm_ratings_query = session.query(
1455 func.coalesce(TokenUsage.model_name, "Unknown").label("model"),
1456 func.avg(ResearchRating.rating).label("avg_rating"),
1457 func.count(ResearchRating.rating).label("rating_count"),
1458 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label(
1459 "positive_ratings"
1460 ),
1461 ).outerjoin(
1462 TokenUsage, ResearchRating.research_id == TokenUsage.research_id
1463 )
1465 if time_condition is not None: 1465 ↛ 1468line 1465 didn't jump to line 1468 because the condition on line 1465 was always true
1466 llm_ratings_query = llm_ratings_query.filter(time_condition)
1468 llm_ratings = (
1469 llm_ratings_query.group_by(TokenUsage.model_name)
1470 .order_by(func.avg(ResearchRating.rating).desc())
1471 .all()
1472 )
1474 # Ratings by search engine (join with token_usage to get search engine info)
1475 search_engine_ratings_query = session.query(
1476 func.coalesce(
1477 TokenUsage.search_engine_selected, "Unknown"
1478 ).label("search_engine"),
1479 func.avg(ResearchRating.rating).label("avg_rating"),
1480 func.count(ResearchRating.rating).label("rating_count"),
1481 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label(
1482 "positive_ratings"
1483 ),
1484 ).outerjoin(
1485 TokenUsage, ResearchRating.research_id == TokenUsage.research_id
1486 )
1488 if time_condition is not None: 1488 ↛ 1493line 1488 didn't jump to line 1493 because the condition on line 1488 was always true
1489 search_engine_ratings_query = (
1490 search_engine_ratings_query.filter(time_condition)
1491 )
1493 search_engine_ratings = (
1494 search_engine_ratings_query.group_by(
1495 TokenUsage.search_engine_selected
1496 )
1497 .having(func.count(ResearchRating.rating) > 0)
1498 .order_by(func.avg(ResearchRating.rating).desc())
1499 .all()
1500 )
1502 # Rating trends over time
1503 rating_trends_query = session.query(
1504 func.date(ResearchRating.created_at).label("date"),
1505 func.avg(ResearchRating.rating).label("avg_rating"),
1506 func.count(ResearchRating.rating).label("daily_count"),
1507 )
1509 if time_condition is not None: 1509 ↛ 1512line 1509 didn't jump to line 1512 because the condition on line 1509 was always true
1510 rating_trends_query = rating_trends_query.filter(time_condition)
1512 rating_trends = (
1513 rating_trends_query.group_by(
1514 func.date(ResearchRating.created_at)
1515 )
1516 .order_by("date")
1517 .all()
1518 )
1520 # Recent ratings with research details
1521 recent_ratings_query = (
1522 session.query(
1523 ResearchRating.rating,
1524 ResearchRating.created_at,
1525 ResearchRating.research_id,
1526 Research.query,
1527 Research.mode,
1528 TokenUsage.model_name,
1529 Research.created_at,
1530 )
1531 .outerjoin(Research, ResearchRating.research_id == Research.id)
1532 .outerjoin(
1533 TokenUsage,
1534 ResearchRating.research_id == TokenUsage.research_id,
1535 )
1536 )
1538 if time_condition is not None: 1538 ↛ 1543line 1538 didn't jump to line 1543 because the condition on line 1538 was always true
1539 recent_ratings_query = recent_ratings_query.filter(
1540 time_condition
1541 )
1543 recent_ratings = (
1544 recent_ratings_query.order_by(ResearchRating.created_at.desc())
1545 .limit(20)
1546 .all()
1547 )
1549 return jsonify(
1550 {
1551 "overall_stats": {
1552 "avg_rating": round(overall_stats.avg_rating or 0, 2),
1553 "total_ratings": overall_stats.total_ratings or 0,
1554 "rating_distribution": {
1555 "5": overall_stats.five_star or 0,
1556 "4": overall_stats.four_star or 0,
1557 "3": overall_stats.three_star or 0,
1558 "2": overall_stats.two_star or 0,
1559 "1": overall_stats.one_star or 0,
1560 },
1561 },
1562 "llm_ratings": [
1563 {
1564 "model": rating.model,
1565 "avg_rating": round(rating.avg_rating or 0, 2),
1566 "rating_count": rating.rating_count or 0,
1567 "positive_ratings": rating.positive_ratings or 0,
1568 "satisfaction_rate": round(
1569 (rating.positive_ratings or 0)
1570 / max(rating.rating_count or 1, 1)
1571 * 100,
1572 1,
1573 ),
1574 }
1575 for rating in llm_ratings
1576 ],
1577 "search_engine_ratings": [
1578 {
1579 "search_engine": rating.search_engine,
1580 "avg_rating": round(rating.avg_rating or 0, 2),
1581 "rating_count": rating.rating_count or 0,
1582 "positive_ratings": rating.positive_ratings or 0,
1583 "satisfaction_rate": round(
1584 (rating.positive_ratings or 0)
1585 / max(rating.rating_count or 1, 1)
1586 * 100,
1587 1,
1588 ),
1589 }
1590 for rating in search_engine_ratings
1591 ],
1592 "rating_trends": [
1593 {
1594 "date": str(trend.date),
1595 "avg_rating": round(trend.avg_rating or 0, 2),
1596 "count": trend.daily_count or 0,
1597 }
1598 for trend in rating_trends
1599 ],
1600 "recent_ratings": [
1601 {
1602 "rating": rating.rating,
1603 "created_at": str(rating.created_at),
1604 "research_id": rating.research_id,
1605 "query": (
1606 rating.query
1607 if rating.query
1608 else f"Research Session #{rating.research_id}"
1609 ),
1610 "mode": rating.mode
1611 if rating.mode
1612 else "Standard Research",
1613 "llm_model": (
1614 rating.model_name
1615 if rating.model_name
1616 else "LLM Model"
1617 ),
1618 }
1619 for rating in recent_ratings
1620 ],
1621 }
1622 )
1624 except Exception:
1625 logger.exception("Error getting star reviews data")
1626 return (
1627 jsonify(
1628 {"error": "An internal error occurred. Please try again later."}
1629 ),
1630 500,
1631 )
1634@metrics_bp.route("/api/pricing")
1635@login_required
1636def api_pricing():
1637 """Get current LLM pricing data."""
1638 try:
1639 from ...metrics.pricing.pricing_fetcher import PricingFetcher
1641 # Use static pricing data instead of async
1642 fetcher = PricingFetcher()
1643 pricing_data = fetcher.static_pricing
1645 return jsonify(
1646 {
1647 "status": "success",
1648 "pricing": pricing_data,
1649 "last_updated": datetime.now(UTC).isoformat(),
1650 "note": "Pricing data is from static configuration. Real-time APIs not available for most providers.",
1651 }
1652 )
1654 except Exception:
1655 logger.exception("Error fetching pricing data")
1656 return jsonify({"error": "Internal Server Error"}), 500
1659@metrics_bp.route("/api/pricing/<model_name>")
1660@login_required
1661def api_model_pricing(model_name):
1662 """Get pricing for a specific model."""
1663 try:
1664 # Optional provider parameter
1665 provider = request.args.get("provider")
1667 from ...metrics.pricing.cost_calculator import CostCalculator
1669 # Use synchronous approach with cached/static pricing
1670 calculator = CostCalculator()
1671 pricing = calculator.cache.get_model_pricing(
1672 model_name
1673 ) or calculator.calculate_cost_sync(model_name, 1000, 1000).get(
1674 "pricing_used", {}
1675 )
1677 return jsonify(
1678 {
1679 "status": "success",
1680 "model": model_name,
1681 "provider": provider,
1682 "pricing": pricing,
1683 "last_updated": datetime.now(UTC).isoformat(),
1684 }
1685 )
1687 except Exception:
1688 logger.exception(f"Error getting pricing for model: {model_name}")
1689 return jsonify({"error": "An internal error occurred"}), 500
1692@metrics_bp.route("/api/cost-calculation", methods=["POST"])
1693@login_required
1694def api_cost_calculation():
1695 """Calculate cost for token usage."""
1696 try:
1697 data = request.get_json()
1699 if not data:
1700 return jsonify({"error": "No data provided"}), 400
1702 model_name = data.get("model_name")
1703 provider = data.get("provider") # Optional provider parameter
1704 prompt_tokens = data.get("prompt_tokens", 0)
1705 completion_tokens = data.get("completion_tokens", 0)
1707 if not model_name:
1708 return jsonify({"error": "model_name is required"}), 400
1710 from ...metrics.pricing.cost_calculator import CostCalculator
1712 # Use synchronous cost calculation
1713 calculator = CostCalculator()
1714 cost_data = calculator.calculate_cost_sync(
1715 model_name, prompt_tokens, completion_tokens
1716 )
1718 return jsonify(
1719 {
1720 "status": "success",
1721 "model_name": model_name,
1722 "provider": provider,
1723 "prompt_tokens": prompt_tokens,
1724 "completion_tokens": completion_tokens,
1725 "total_tokens": prompt_tokens + completion_tokens,
1726 **cost_data,
1727 }
1728 )
1730 except Exception:
1731 logger.exception("Error calculating cost")
1732 return jsonify({"error": "An internal error occurred"}), 500
1735@metrics_bp.route("/api/research-costs/<string:research_id>")
1736@login_required
1737def api_research_costs(research_id):
1738 """Get cost analysis for a specific research session."""
1739 try:
1740 username = flask_session.get("username")
1741 if not username: 1741 ↛ 1742line 1741 didn't jump to line 1742 because the condition on line 1741 was never true
1742 return jsonify(
1743 {"status": "error", "message": "No user session found"}
1744 ), 401
1746 with get_user_db_session(username) as session:
1747 # Get token usage records for this research
1748 usage_records = (
1749 session.query(TokenUsage)
1750 .filter(TokenUsage.research_id == research_id)
1751 .all()
1752 )
1754 if not usage_records: 1754 ↛ 1765line 1754 didn't jump to line 1765 because the condition on line 1754 was always true
1755 return jsonify(
1756 {
1757 "status": "success",
1758 "research_id": research_id,
1759 "total_cost": 0.0,
1760 "message": "No token usage data found for this research session",
1761 }
1762 )
1764 # Convert to dict format for cost calculation
1765 usage_data = []
1766 for record in usage_records:
1767 usage_data.append(
1768 {
1769 "model_name": record.model_name,
1770 "provider": getattr(
1771 record, "provider", None
1772 ), # Handle both old and new records
1773 "prompt_tokens": record.prompt_tokens,
1774 "completion_tokens": record.completion_tokens,
1775 "timestamp": record.timestamp,
1776 }
1777 )
1779 from ...metrics.pricing.cost_calculator import CostCalculator
1781 # Use synchronous calculation for research costs
1782 calculator = CostCalculator()
1783 costs = []
1784 for record in usage_data:
1785 cost_data = calculator.calculate_cost_sync(
1786 record["model_name"],
1787 record["prompt_tokens"],
1788 record["completion_tokens"],
1789 )
1790 costs.append({**record, **cost_data})
1792 total_cost = sum(c["total_cost"] for c in costs)
1793 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data)
1794 total_completion_tokens = sum(
1795 r["completion_tokens"] for r in usage_data
1796 )
1798 cost_summary = {
1799 "total_cost": round(total_cost, 6),
1800 "total_tokens": total_prompt_tokens + total_completion_tokens,
1801 "prompt_tokens": total_prompt_tokens,
1802 "completion_tokens": total_completion_tokens,
1803 }
1805 return jsonify(
1806 {
1807 "status": "success",
1808 "research_id": research_id,
1809 **cost_summary,
1810 }
1811 )
1813 except Exception:
1814 logger.exception(
1815 f"Error getting research costs for research: {research_id}"
1816 )
1817 return jsonify({"error": "An internal error occurred"}), 500
1820@metrics_bp.route("/api/cost-analytics")
1821@login_required
1822def api_cost_analytics():
1823 """Get cost analytics across all research sessions."""
1824 try:
1825 username = flask_session.get("username")
1826 if not username: 1826 ↛ 1827line 1826 didn't jump to line 1827 because the condition on line 1826 was never true
1827 return jsonify(
1828 {"status": "error", "message": "No user session found"}
1829 ), 401
1831 period = request.args.get("period", "30d")
1833 with get_user_db_session(username) as session:
1834 # Get token usage for the period
1835 query = session.query(TokenUsage)
1836 time_condition = get_time_filter_condition(
1837 period, TokenUsage.timestamp
1838 )
1839 if time_condition is not None: 1839 ↛ 1843line 1839 didn't jump to line 1843 because the condition on line 1839 was always true
1840 query = query.filter(time_condition)
1842 # First check if we have any records to avoid expensive queries
1843 record_count = query.count()
1845 if record_count == 0:
1846 return jsonify(
1847 {
1848 "status": "success",
1849 "period": period,
1850 "overview": {
1851 "total_cost": 0.0,
1852 "total_tokens": 0,
1853 "prompt_tokens": 0,
1854 "completion_tokens": 0,
1855 },
1856 "top_expensive_research": [],
1857 "research_count": 0,
1858 "message": "No token usage data found for this period",
1859 }
1860 )
1862 # If we have too many records, limit to recent ones to avoid timeout
1863 if record_count > 1000: 1863 ↛ anywhereline 1863 didn't jump anywhere: it always raised an exception.
1864 logger.warning(
1865 f"Large dataset detected ({record_count} records), limiting to recent 1000 for performance"
1866 )
1867 usage_records = (
1868 query.order_by(TokenUsage.timestamp.desc())
1869 .limit(1000)
1870 .all()
1871 )
1872 else:
1873 usage_records = query.all()
1875 # Convert to dict format
1876 usage_data = []
1877 for record in usage_records:
1878 usage_data.append(
1879 {
1880 "model_name": record.model_name,
1881 "provider": getattr(
1882 record, "provider", None
1883 ), # Handle both old and new records
1884 "prompt_tokens": record.prompt_tokens,
1885 "completion_tokens": record.completion_tokens,
1886 "research_id": record.research_id,
1887 "timestamp": record.timestamp,
1888 }
1889 )
1891 from ...metrics.pricing.cost_calculator import CostCalculator
1893 # Use synchronous calculation
1894 calculator = CostCalculator()
1896 # Calculate overall costs
1897 costs = []
1898 for record in usage_data:
1899 cost_data = calculator.calculate_cost_sync(
1900 record["model_name"],
1901 record["prompt_tokens"],
1902 record["completion_tokens"],
1903 )
1904 costs.append({**record, **cost_data})
1906 total_cost = sum(c["total_cost"] for c in costs)
1907 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data)
1908 total_completion_tokens = sum(
1909 r["completion_tokens"] for r in usage_data
1910 )
1912 cost_summary = {
1913 "total_cost": round(total_cost, 6),
1914 "total_tokens": total_prompt_tokens + total_completion_tokens,
1915 "prompt_tokens": total_prompt_tokens,
1916 "completion_tokens": total_completion_tokens,
1917 }
1919 # Group by research_id for per-research costs
1920 research_costs = {}
1921 for record in usage_data:
1922 rid = record["research_id"]
1923 if rid not in research_costs:
1924 research_costs[rid] = []
1925 research_costs[rid].append(record)
1927 # Calculate cost per research
1928 research_summaries = {}
1929 for rid, records in research_costs.items():
1930 research_total = 0
1931 for record in records:
1932 cost_data = calculator.calculate_cost_sync(
1933 record["model_name"],
1934 record["prompt_tokens"],
1935 record["completion_tokens"],
1936 )
1937 research_total += cost_data["total_cost"]
1938 research_summaries[rid] = {
1939 "total_cost": round(research_total, 6)
1940 }
1942 # Top expensive research sessions
1943 top_expensive = sorted(
1944 [
1945 (rid, data["total_cost"])
1946 for rid, data in research_summaries.items()
1947 ],
1948 key=lambda x: x[1],
1949 reverse=True,
1950 )[:10]
1952 return jsonify(
1953 {
1954 "status": "success",
1955 "period": period,
1956 "overview": cost_summary,
1957 "top_expensive_research": [
1958 {"research_id": rid, "total_cost": cost}
1959 for rid, cost in top_expensive
1960 ],
1961 "research_count": len(research_summaries),
1962 }
1963 )
1965 except Exception:
1966 logger.exception("Error getting cost analytics")
1967 # Return a more graceful error response
1968 return (
1969 jsonify(
1970 {
1971 "status": "success",
1972 "period": period,
1973 "overview": {
1974 "total_cost": 0.0,
1975 "total_tokens": 0,
1976 "prompt_tokens": 0,
1977 "completion_tokens": 0,
1978 },
1979 "top_expensive_research": [],
1980 "research_count": 0,
1981 "error": "Cost analytics temporarily unavailable",
1982 }
1983 ),
1984 200,
1985 ) # Return 200 to avoid breaking the UI
1988@metrics_bp.route("/links")
1989@login_required
1990def link_analytics():
1991 """Display link analytics page."""
1992 return render_template_with_defaults("pages/link_analytics.html")
1995@metrics_bp.route("/api/link-analytics")
1996@login_required
1997def api_link_analytics():
1998 """Get link analytics data."""
1999 try:
2000 username = flask_session.get("username")
2001 if not username: 2001 ↛ 2002line 2001 didn't jump to line 2002 because the condition on line 2001 was never true
2002 return jsonify(
2003 {"status": "error", "message": "No user session found"}
2004 ), 401
2006 period = request.args.get("period", "30d")
2008 # Get link analytics data
2009 link_data = get_link_analytics(period, username)
2011 return jsonify(
2012 {
2013 "status": "success",
2014 "data": link_data["link_analytics"],
2015 "period": period,
2016 }
2017 )
2019 except Exception:
2020 logger.exception("Error getting link analytics")
2021 return (
2022 jsonify(
2023 {
2024 "status": "error",
2025 "message": "An internal error occurred. Please try again later.",
2026 }
2027 ),
2028 500,
2029 )
2032@metrics_bp.route("/api/domain-classifications", methods=["GET"])
2033@login_required
2034def api_get_domain_classifications():
2035 """Get all domain classifications."""
2036 try:
2037 username = flask_session.get("username")
2038 if not username: 2038 ↛ 2039line 2038 didn't jump to line 2039 because the condition on line 2038 was never true
2039 return jsonify(
2040 {"status": "error", "message": "No user session found"}
2041 ), 401
2043 classifier = DomainClassifier(username)
2044 classifications = classifier.get_all_classifications()
2046 return jsonify(
2047 {
2048 "status": "success",
2049 "classifications": [c.to_dict() for c in classifications],
2050 "total": len(classifications),
2051 }
2052 )
2054 except Exception:
2055 logger.exception("Error getting domain classifications")
2056 return jsonify(
2057 {"status": "error", "message": "Failed to retrieve classifications"}
2058 ), 500
2061@metrics_bp.route("/api/domain-classifications/summary", methods=["GET"])
2062@login_required
2063def api_get_classifications_summary():
2064 """Get summary of domain classifications by category."""
2065 try:
2066 username = flask_session.get("username")
2067 if not username: 2067 ↛ 2068line 2067 didn't jump to line 2068 because the condition on line 2067 was never true
2068 return jsonify(
2069 {"status": "error", "message": "No user session found"}
2070 ), 401
2072 classifier = DomainClassifier(username)
2073 summary = classifier.get_categories_summary()
2075 return jsonify({"status": "success", "summary": summary})
2077 except Exception:
2078 logger.exception("Error getting classifications summary")
2079 return jsonify(
2080 {"status": "error", "message": "Failed to retrieve summary"}
2081 ), 500
2084@metrics_bp.route("/api/domain-classifications/classify", methods=["POST"])
2085@login_required
2086def api_classify_domains():
2087 """Trigger classification of a specific domain or batch classification."""
2088 try:
2089 username = flask_session.get("username")
2090 if not username: 2090 ↛ 2091line 2090 didn't jump to line 2091 because the condition on line 2090 was never true
2091 return jsonify(
2092 {"status": "error", "message": "No user session found"}
2093 ), 401
2095 data = request.get_json() or {}
2096 domain = data.get("domain")
2097 force_update = data.get("force_update", False)
2098 batch_mode = data.get("batch", False)
2100 # Get settings snapshot for LLM configuration
2101 from ...settings.manager import SettingsManager
2102 from ...database.session_context import get_user_db_session
2104 with get_user_db_session(username) as db_session:
2105 settings_manager = SettingsManager(db_session=db_session)
2106 settings_snapshot = settings_manager.get_all_settings()
2108 classifier = DomainClassifier(
2109 username, settings_snapshot=settings_snapshot
2110 )
2112 if domain and not batch_mode: 2112 ↛ 2114line 2112 didn't jump to line 2114 because the condition on line 2112 was never true
2113 # Classify single domain
2114 logger.info(f"Classifying single domain: {domain}")
2115 classification = classifier.classify_domain(domain, force_update)
2116 if classification:
2117 return jsonify(
2118 {
2119 "status": "success",
2120 "classification": classification.to_dict(),
2121 }
2122 )
2123 else:
2124 return jsonify(
2125 {
2126 "status": "error",
2127 "message": f"Failed to classify domain: {domain}",
2128 }
2129 ), 400
2130 elif batch_mode: 2130 ↛ 2133line 2130 didn't jump to line 2133 because the condition on line 2130 was never true
2131 # Batch classification - this should really be a background task
2132 # For now, we'll just return immediately and let the frontend poll
2133 logger.info("Starting batch classification of all domains")
2134 results = classifier.classify_all_domains(force_update)
2136 return jsonify({"status": "success", "results": results})
2137 else:
2138 return jsonify(
2139 {
2140 "status": "error",
2141 "message": "Must provide either 'domain' or set 'batch': true",
2142 }
2143 ), 400
2145 except Exception:
2146 logger.exception("Error classifying domains")
2147 return jsonify(
2148 {"status": "error", "message": "Failed to classify domains"}
2149 ), 500
2152@metrics_bp.route("/api/domain-classifications/progress", methods=["GET"])
2153@login_required
2154def api_classification_progress():
2155 """Get progress of domain classification task."""
2156 try:
2157 username = flask_session.get("username")
2158 if not username: 2158 ↛ 2159line 2158 didn't jump to line 2159 because the condition on line 2158 was never true
2159 return jsonify(
2160 {"status": "error", "message": "No user session found"}
2161 ), 401
2163 # Get counts of classified vs unclassified domains
2164 with get_user_db_session(username) as session:
2165 # Count total unique domains
2166 from urllib.parse import urlparse
2168 resources = session.query(ResearchResource.url).distinct().all()
2169 domains = set()
2170 all_domains = []
2172 for (url,) in resources: 2172 ↛ 2173line 2172 didn't jump to line 2173 because the loop on line 2172 never started
2173 if url:
2174 try:
2175 parsed = urlparse(url)
2176 domain = parsed.netloc.lower()
2177 if domain.startswith("www."):
2178 domain = domain[4:]
2179 if domain:
2180 domains.add(domain)
2181 except (ValueError, AttributeError):
2182 # urlparse can raise ValueError for malformed URLs
2183 continue
2185 all_domains = sorted(list(domains))
2186 total_domains = len(domains)
2188 # Count classified domains
2189 classified_count = session.query(DomainClassification).count()
2191 return jsonify(
2192 {
2193 "status": "success",
2194 "progress": {
2195 "total_domains": total_domains,
2196 "classified": classified_count,
2197 "unclassified": total_domains - classified_count,
2198 "percentage": round(
2199 (classified_count / total_domains * 100)
2200 if total_domains > 0
2201 else 0,
2202 1,
2203 ),
2204 "all_domains": all_domains, # Return all domains for classification
2205 },
2206 }
2207 )
2209 except Exception:
2210 logger.exception("Error getting classification progress")
2211 return jsonify(
2212 {"status": "error", "message": "Failed to retrieve progress"}
2213 ), 500