Coverage for src / local_deep_research / web / routes / metrics_routes.py: 98%
696 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""Routes for metrics dashboard."""
3from datetime import datetime, timedelta, UTC
4from typing import Any
5from urllib.parse import urlparse
7from flask import Blueprint, jsonify, request, session as flask_session
8from loguru import logger
9from sqlalchemy import case, func
11from ...database.models import (
12 RateLimitAttempt,
13 RateLimitEstimate,
14 Research,
15 ResearchHistory,
16 ResearchRating,
17 ResearchResource,
18 ResearchStrategy,
19 TokenUsage,
20)
21from ...constants import get_available_strategies
22from ...domain_classifier import DomainClassifier, DomainClassification
23from ...database.session_context import get_user_db_session
24from ...metrics import TokenCounter
25from ...metrics.query_utils import get_period_days, get_time_filter_condition
26from ...metrics.search_tracker import get_search_tracker
27from ...web_search_engines.rate_limiting import get_tracker
28from ...security.decorators import require_json_body
29from ..auth.decorators import login_required
30from ..utils.templates import render_template_with_defaults
32# Create a Blueprint for metrics
33metrics_bp = Blueprint("metrics", __name__, url_prefix="/metrics")
35# NOTE: Routes use flask_session["username"] (not .get()) intentionally.
36# @login_required guarantees the key exists; direct access fails fast
37# if the decorator is ever removed.
40def _extract_domain(url):
41 """Extract normalized domain from URL, stripping www. prefix."""
42 try:
43 parsed = urlparse(url)
44 domain = parsed.netloc.lower()
45 if domain.startswith("www."):
46 domain = domain[4:]
47 return domain if domain else None
48 except (ValueError, AttributeError, TypeError):
49 return None
52def get_rating_analytics(period="30d", research_mode="all", username=None):
53 """Get rating analytics for the specified period and research mode."""
54 try:
55 if not username:
56 username = flask_session.get("username")
58 if not username:
59 return {
60 "rating_analytics": {
61 "avg_rating": None,
62 "total_ratings": 0,
63 "rating_distribution": {},
64 "satisfaction_stats": {
65 "very_satisfied": 0,
66 "satisfied": 0,
67 "neutral": 0,
68 "dissatisfied": 0,
69 "very_dissatisfied": 0,
70 },
71 "error": "No user session",
72 }
73 }
75 # Calculate date range
76 days = get_period_days(period)
78 with get_user_db_session(username) as session:
79 query = session.query(ResearchRating)
81 # Apply time filter
82 if days:
83 cutoff_date = datetime.now(UTC) - timedelta(days=days)
84 query = query.filter(ResearchRating.created_at >= cutoff_date)
86 # Get all ratings
87 ratings = query.all()
89 if not ratings:
90 return {
91 "rating_analytics": {
92 "avg_rating": None,
93 "total_ratings": 0,
94 "rating_distribution": {},
95 "satisfaction_stats": {
96 "very_satisfied": 0,
97 "satisfied": 0,
98 "neutral": 0,
99 "dissatisfied": 0,
100 "very_dissatisfied": 0,
101 },
102 }
103 }
105 # Calculate statistics
106 rating_values = [r.rating for r in ratings]
107 avg_rating = sum(rating_values) / len(rating_values)
109 # Rating distribution
110 rating_counts = {}
111 for i in range(1, 6):
112 rating_counts[str(i)] = rating_values.count(i)
114 # Satisfaction categories
115 satisfaction_stats = {
116 "very_satisfied": rating_values.count(5),
117 "satisfied": rating_values.count(4),
118 "neutral": rating_values.count(3),
119 "dissatisfied": rating_values.count(2),
120 "very_dissatisfied": rating_values.count(1),
121 }
123 return {
124 "rating_analytics": {
125 "avg_rating": round(avg_rating, 1),
126 "total_ratings": len(ratings),
127 "rating_distribution": rating_counts,
128 "satisfaction_stats": satisfaction_stats,
129 }
130 }
132 except Exception:
133 logger.exception("Error getting rating analytics")
134 return {
135 "rating_analytics": {
136 "avg_rating": None,
137 "total_ratings": 0,
138 "rating_distribution": {},
139 "satisfaction_stats": {
140 "very_satisfied": 0,
141 "satisfied": 0,
142 "neutral": 0,
143 "dissatisfied": 0,
144 "very_dissatisfied": 0,
145 },
146 }
147 }
150def get_link_analytics(period="30d", username=None):
151 """Get link analytics from research resources."""
152 try:
153 if not username:
154 username = flask_session.get("username")
156 if not username:
157 return {
158 "link_analytics": {
159 "top_domains": [],
160 "total_unique_domains": 0,
161 "avg_links_per_research": 0,
162 "domain_distribution": {},
163 "source_type_analysis": {},
164 "academic_vs_general": {},
165 "total_links": 0,
166 "error": "No user session",
167 }
168 }
170 # Calculate date range
171 days = get_period_days(period)
173 with get_user_db_session(username) as session:
174 # Base query
175 query = session.query(ResearchResource)
177 # Apply time filter
178 if days:
179 cutoff_date = datetime.now(UTC) - timedelta(days=days)
180 query = query.filter(
181 ResearchResource.created_at >= cutoff_date.isoformat()
182 )
184 # Get all resources
185 resources = query.all()
187 if not resources:
188 return {
189 "link_analytics": {
190 "top_domains": [],
191 "total_unique_domains": 0,
192 "avg_links_per_research": 0,
193 "domain_distribution": {},
194 "source_type_analysis": {},
195 "academic_vs_general": {},
196 "total_links": 0,
197 }
198 }
200 # Extract domains from URLs
201 domain_counts: dict[str, Any] = {}
202 domain_researches: dict[
203 str, Any
204 ] = {} # Track which researches used each domain
205 source_types: dict[str, Any] = {}
206 temporal_data: dict[str, Any] = {} # Track links over time
207 domain_connections: dict[
208 str, Any
209 ] = {} # Track domain co-occurrences
211 # Generic category counting from LLM classifications
212 category_counts: dict[str, Any] = {}
214 quality_metrics = {
215 "with_title": 0,
216 "with_preview": 0,
217 "with_both": 0,
218 "total": 0,
219 }
221 # First pass: collect all domains from resources
222 all_domains = set()
223 for resource in resources:
224 if resource.url:
225 domain = _extract_domain(resource.url)
226 if domain: 226 ↛ 223line 226 didn't jump to line 223 because the condition on line 226 was always true
227 all_domains.add(domain)
229 # Batch load all domain classifications in one query (fix N+1)
230 domain_classifications_map = {}
231 if all_domains:
232 all_classifications = (
233 session.query(DomainClassification)
234 .filter(DomainClassification.domain.in_(all_domains))
235 .all()
236 )
237 for classification in all_classifications:
238 domain_classifications_map[classification.domain] = (
239 classification
240 )
242 # Second pass: process resources with pre-loaded classifications
243 for resource in resources:
244 if resource.url:
245 try:
246 domain = _extract_domain(resource.url)
247 if not domain: 247 ↛ 248line 247 didn't jump to line 248 because the condition on line 247 was never true
248 continue
250 # Count domains
251 domain_counts[domain] = domain_counts.get(domain, 0) + 1
253 # Track research IDs for each domain
254 if domain not in domain_researches:
255 domain_researches[domain] = set()
256 domain_researches[domain].add(resource.research_id)
258 # Track temporal data (daily counts)
259 if resource.created_at: 259 ↛ 268line 259 didn't jump to line 268 because the condition on line 259 was always true
260 date_str = resource.created_at[
261 :10
262 ] # Extract YYYY-MM-DD
263 temporal_data[date_str] = (
264 temporal_data.get(date_str, 0) + 1
265 )
267 # Count categories from pre-loaded classifications (no N+1)
268 classification = domain_classifications_map.get(domain)
269 if classification:
270 category = classification.category
271 category_counts[category] = (
272 category_counts.get(category, 0) + 1
273 )
274 else:
275 category_counts["Unclassified"] = (
276 category_counts.get("Unclassified", 0) + 1
277 )
279 # Track source type from metadata if available
280 if resource.source_type:
281 source_types[resource.source_type] = (
282 source_types.get(resource.source_type, 0) + 1
283 )
285 # Track quality metrics
286 quality_metrics["total"] += 1
287 if resource.title:
288 quality_metrics["with_title"] += 1
289 if resource.content_preview:
290 quality_metrics["with_preview"] += 1
291 if resource.title and resource.content_preview:
292 quality_metrics["with_both"] += 1
294 # Track domain co-occurrences for network visualization
295 research_id = resource.research_id
296 if research_id not in domain_connections:
297 domain_connections[research_id] = []
298 domain_connections[research_id].append(domain)
300 except Exception:
301 logger.warning(f"Error parsing URL {resource.url}")
303 # Sort domains by count and get top 10
304 sorted_domains = sorted(
305 domain_counts.items(), key=lambda x: x[1], reverse=True
306 )
307 top_10_domains = sorted_domains[:10]
309 # Calculate domain distribution (top domains vs others)
310 top_10_count = sum(count for _, count in top_10_domains)
311 others_count = len(resources) - top_10_count
313 # Get unique research IDs to calculate average
314 unique_research_ids = {r.research_id for r in resources}
315 avg_links = (
316 len(resources) / len(unique_research_ids)
317 if unique_research_ids
318 else 0
319 )
321 # Prepare temporal trend data (sorted by date)
322 temporal_trend = sorted(
323 [
324 {"date": date, "count": count}
325 for date, count in temporal_data.items()
326 ],
327 key=lambda x: x["date"],
328 )
330 # Get most recent research for each top domain and classifications
331 domain_recent_research = {}
332 # Build domain_classifications dict from pre-loaded data
333 domain_classifications = {
334 domain: {
335 "category": classification.category,
336 "subcategory": classification.subcategory,
337 "confidence": classification.confidence,
338 }
339 for domain, classification in domain_classifications_map.items()
340 }
342 # Batch-load research details for top domains (fix N+1 query)
343 all_research_ids = []
344 domain_research_id_lists = {}
345 for domain, _ in top_10_domains:
346 if domain in domain_researches: 346 ↛ 345line 346 didn't jump to line 345 because the condition on line 346 was always true
347 ids = list(domain_researches[domain])[:3]
348 domain_research_id_lists[domain] = ids
349 all_research_ids.extend(ids)
351 research_by_id = {}
352 if all_research_ids:
353 researches = (
354 session.query(ResearchHistory)
355 .filter(ResearchHistory.id.in_(all_research_ids))
356 .all()
357 )
358 research_by_id = {r.id: r for r in researches}
360 for domain, ids in domain_research_id_lists.items():
361 domain_recent_research[domain] = [
362 {
363 "id": r_id,
364 "query": research_by_id[r_id].query[:50]
365 if research_by_id.get(r_id)
366 and research_by_id[r_id].query
367 else "Research",
368 }
369 for r_id in ids
370 if r_id in research_by_id
371 ]
373 return {
374 "link_analytics": {
375 "top_domains": [
376 {
377 "domain": domain,
378 "count": count,
379 "percentage": round(
380 count / len(resources) * 100, 1
381 ),
382 "research_count": len(
383 domain_researches.get(domain, set())
384 ),
385 "recent_researches": domain_recent_research.get(
386 domain, []
387 ),
388 "classification": domain_classifications.get(
389 domain, None
390 ),
391 }
392 for domain, count in top_10_domains
393 ],
394 "total_unique_domains": len(domain_counts),
395 "avg_links_per_research": round(avg_links, 1),
396 "domain_distribution": {
397 "top_10": top_10_count,
398 "others": others_count,
399 },
400 "source_type_analysis": source_types,
401 "category_distribution": category_counts,
402 # Generic pie chart data - use whatever LLM classifier outputs
403 "domain_categories": category_counts,
404 "total_links": len(resources),
405 "total_researches": len(unique_research_ids),
406 "temporal_trend": temporal_trend,
407 "domain_metrics": {
408 domain: {
409 "usage_count": count,
410 "usage_percentage": round(
411 count / len(resources) * 100, 1
412 ),
413 "research_diversity": len(
414 domain_researches.get(domain, set())
415 ),
416 "frequency_rank": rank + 1,
417 }
418 for rank, (domain, count) in enumerate(top_10_domains)
419 },
420 }
421 }
423 except Exception:
424 logger.exception("Error getting link analytics")
425 return {
426 "link_analytics": {
427 "top_domains": [],
428 "total_unique_domains": 0,
429 "avg_links_per_research": 0,
430 "domain_distribution": {},
431 "source_type_analysis": {},
432 "academic_vs_general": {},
433 "total_links": 0,
434 "error": "Failed to retrieve link analytics",
435 }
436 }
439def get_strategy_analytics(period="30d", username=None):
440 """Get strategy usage analytics for the specified period."""
441 try:
442 if not username:
443 username = flask_session.get("username")
445 if not username:
446 return {
447 "strategy_analytics": {
448 "total_research_with_strategy": 0,
449 "total_research": 0,
450 "most_popular_strategy": None,
451 "strategy_usage": [],
452 "strategy_distribution": {},
453 "available_strategies": get_available_strategies(),
454 "error": "No user session",
455 }
456 }
458 # Calculate date range
459 days = get_period_days(period)
461 with get_user_db_session(username) as session:
462 # Check if we have any ResearchStrategy records
463 strategy_count = session.query(ResearchStrategy).count()
465 if strategy_count == 0:
466 logger.warning("No research strategies found in database")
467 return {
468 "strategy_analytics": {
469 "total_research_with_strategy": 0,
470 "total_research": 0,
471 "most_popular_strategy": None,
472 "strategy_usage": [],
473 "strategy_distribution": {},
474 "available_strategies": get_available_strategies(),
475 "message": "Strategy tracking not yet available - run a research to start tracking",
476 }
477 }
479 # Base query for strategy usage (no JOIN needed since we just want strategy counts)
480 query = session.query(
481 ResearchStrategy.strategy_name,
482 func.count(ResearchStrategy.id).label("usage_count"),
483 )
485 # Apply time filter if specified
486 if days:
487 cutoff_date = datetime.now(UTC) - timedelta(days=days)
488 query = query.filter(ResearchStrategy.created_at >= cutoff_date)
490 # Group by strategy and order by usage
491 strategy_results = (
492 query.group_by(ResearchStrategy.strategy_name)
493 .order_by(func.count(ResearchStrategy.id).desc())
494 .all()
495 )
497 # Get total strategy count for percentage calculation
498 total_query = session.query(ResearchStrategy)
499 if days:
500 total_query = total_query.filter(
501 ResearchStrategy.created_at >= cutoff_date
502 )
503 total_research = total_query.count()
505 # Format strategy data
506 strategy_usage = []
507 strategy_distribution = {}
509 for strategy_name, usage_count in strategy_results:
510 percentage = (
511 (usage_count / total_research * 100)
512 if total_research > 0
513 else 0
514 )
515 strategy_usage.append(
516 {
517 "strategy": strategy_name,
518 "count": usage_count,
519 "percentage": round(percentage, 1),
520 }
521 )
522 strategy_distribution[strategy_name] = usage_count
524 # Find most popular strategy
525 most_popular = (
526 strategy_usage[0]["strategy"] if strategy_usage else None
527 )
529 return {
530 "strategy_analytics": {
531 "total_research_with_strategy": sum(
532 item["count"] for item in strategy_usage
533 ),
534 "total_research": total_research,
535 "most_popular_strategy": most_popular,
536 "strategy_usage": strategy_usage,
537 "strategy_distribution": strategy_distribution,
538 "available_strategies": get_available_strategies(),
539 }
540 }
542 except Exception:
543 logger.exception("Error getting strategy analytics")
544 return {
545 "strategy_analytics": {
546 "total_research_with_strategy": 0,
547 "total_research": 0,
548 "most_popular_strategy": None,
549 "strategy_usage": [],
550 "strategy_distribution": {},
551 "available_strategies": get_available_strategies(),
552 "error": "Failed to retrieve strategy data",
553 }
554 }
557def get_rate_limiting_analytics(period="30d", username=None):
558 """Get rate limiting analytics for the specified period."""
559 try:
560 if not username:
561 username = flask_session.get("username")
563 if not username:
564 return {
565 "rate_limiting": {
566 "total_attempts": 0,
567 "successful_attempts": 0,
568 "failed_attempts": 0,
569 "success_rate": 0,
570 "rate_limit_events": 0,
571 "avg_wait_time": 0,
572 "avg_successful_wait": 0,
573 "tracked_engines": 0,
574 "engine_stats": [],
575 "total_engines_tracked": 0,
576 "healthy_engines": 0,
577 "degraded_engines": 0,
578 "poor_engines": 0,
579 "error": "No user session",
580 }
581 }
583 # Calculate date range for timestamp filtering
584 import time
586 if period == "7d":
587 cutoff_time = time.time() - (7 * 24 * 3600)
588 elif period == "30d":
589 cutoff_time = time.time() - (30 * 24 * 3600)
590 elif period == "3m":
591 cutoff_time = time.time() - (90 * 24 * 3600)
592 elif period == "1y":
593 cutoff_time = time.time() - (365 * 24 * 3600)
594 else: # all
595 cutoff_time = 0
597 with get_user_db_session(username) as session:
598 # Get rate limit attempts
599 rate_limit_query = session.query(RateLimitAttempt)
601 # Apply time filter
602 if cutoff_time > 0:
603 rate_limit_query = rate_limit_query.filter(
604 RateLimitAttempt.timestamp >= cutoff_time
605 )
607 # Get rate limit statistics
608 total_attempts = rate_limit_query.count()
609 successful_attempts = rate_limit_query.filter(
610 RateLimitAttempt.success
611 ).count()
612 failed_attempts = total_attempts - successful_attempts
614 # Count rate limiting events (failures with RateLimitError)
615 rate_limit_events = rate_limit_query.filter(
616 ~RateLimitAttempt.success,
617 RateLimitAttempt.error_type == "RateLimitError",
618 ).count()
620 logger.info(
621 f"Rate limit attempts in database: total={total_attempts}, successful={successful_attempts}"
622 )
624 # Get all attempts for detailed calculations
625 attempts = rate_limit_query.all()
627 # Calculate average wait times
628 if attempts:
629 avg_wait_time = sum(a.wait_time for a in attempts) / len(
630 attempts
631 )
632 successful_wait_times = [
633 a.wait_time for a in attempts if a.success
634 ]
635 avg_successful_wait = (
636 sum(successful_wait_times) / len(successful_wait_times)
637 if successful_wait_times
638 else 0
639 )
640 else:
641 avg_wait_time = 0
642 avg_successful_wait = 0
644 # Get tracked engines - count distinct engine types from attempts
645 tracked_engines_query = session.query(
646 func.count(func.distinct(RateLimitAttempt.engine_type))
647 )
648 if cutoff_time > 0:
649 tracked_engines_query = tracked_engines_query.filter(
650 RateLimitAttempt.timestamp >= cutoff_time
651 )
652 tracked_engines = tracked_engines_query.scalar() or 0
654 # Get engine-specific stats from attempts
655 engine_stats = []
657 # Get distinct engine types from attempts
658 engine_types_query = session.query(
659 RateLimitAttempt.engine_type
660 ).distinct()
661 if cutoff_time > 0:
662 engine_types_query = engine_types_query.filter(
663 RateLimitAttempt.timestamp >= cutoff_time
664 )
665 engine_types = [row.engine_type for row in engine_types_query.all()]
667 # Preload estimates for relevant engines to avoid N+1 queries
668 estimates_by_engine = {}
669 if engine_types:
670 all_estimates = (
671 session.query(RateLimitEstimate)
672 .filter(RateLimitEstimate.engine_type.in_(engine_types))
673 .all()
674 )
675 estimates_by_engine = {e.engine_type: e for e in all_estimates}
677 for engine_type in engine_types:
678 engine_attempts_list = [
679 a for a in attempts if a.engine_type == engine_type
680 ]
681 engine_attempts = len(engine_attempts_list)
682 engine_success = len(
683 [a for a in engine_attempts_list if a.success]
684 )
686 # Get estimate from preloaded dict
687 estimate = estimates_by_engine.get(engine_type)
689 # Calculate recent success rate
690 recent_success_rate = (
691 (engine_success / engine_attempts * 100)
692 if engine_attempts > 0
693 else 0
694 )
696 # Determine status based on success rate
697 if estimate:
698 status = (
699 "healthy"
700 if estimate.success_rate > 0.8
701 else "degraded"
702 if estimate.success_rate > 0.5
703 else "poor"
704 )
705 else:
706 status = (
707 "healthy"
708 if recent_success_rate > 80
709 else "degraded"
710 if recent_success_rate > 50
711 else "poor"
712 )
714 engine_stat = {
715 "engine": engine_type,
716 "base_wait": estimate.base_wait_seconds
717 if estimate
718 else 0.0,
719 "base_wait_seconds": round(
720 estimate.base_wait_seconds if estimate else 0.0, 2
721 ),
722 "min_wait_seconds": round(
723 estimate.min_wait_seconds if estimate else 0.0, 2
724 ),
725 "max_wait_seconds": round(
726 estimate.max_wait_seconds if estimate else 0.0, 2
727 ),
728 "success_rate": round(estimate.success_rate * 100, 1)
729 if estimate
730 else recent_success_rate,
731 "total_attempts": estimate.total_attempts
732 if estimate
733 else engine_attempts,
734 "recent_attempts": engine_attempts,
735 "recent_success_rate": round(recent_success_rate, 1),
736 "attempts": engine_attempts,
737 "status": status,
738 }
740 if estimate:
741 from datetime import datetime
743 engine_stat["last_updated"] = datetime.fromtimestamp(
744 estimate.last_updated, UTC
745 ).isoformat() # ISO format already includes timezone
746 else:
747 engine_stat["last_updated"] = "Never"
749 engine_stats.append(engine_stat)
751 logger.info(
752 f"Tracked engines: {tracked_engines}, engine_stats: {engine_stats}"
753 )
755 result = {
756 "rate_limiting": {
757 "total_attempts": total_attempts,
758 "successful_attempts": successful_attempts,
759 "failed_attempts": failed_attempts,
760 "success_rate": (successful_attempts / total_attempts * 100)
761 if total_attempts > 0
762 else 0,
763 "rate_limit_events": rate_limit_events,
764 "avg_wait_time": round(float(avg_wait_time), 2),
765 "avg_successful_wait": round(float(avg_successful_wait), 2),
766 "tracked_engines": tracked_engines,
767 "engine_stats": engine_stats,
768 "total_engines_tracked": tracked_engines,
769 "healthy_engines": len(
770 [s for s in engine_stats if s["status"] == "healthy"]
771 ),
772 "degraded_engines": len(
773 [s for s in engine_stats if s["status"] == "degraded"]
774 ),
775 "poor_engines": len(
776 [s for s in engine_stats if s["status"] == "poor"]
777 ),
778 }
779 }
781 logger.info(
782 f"DEBUG: Returning rate_limiting_analytics result: {result}"
783 )
784 return result
786 except Exception:
787 logger.exception("Error getting rate limiting analytics")
788 return {
789 "rate_limiting": {
790 "total_attempts": 0,
791 "successful_attempts": 0,
792 "failed_attempts": 0,
793 "success_rate": 0,
794 "rate_limit_events": 0,
795 "avg_wait_time": 0,
796 "avg_successful_wait": 0,
797 "tracked_engines": 0,
798 "engine_stats": [],
799 "total_engines_tracked": 0,
800 "healthy_engines": 0,
801 "degraded_engines": 0,
802 "poor_engines": 0,
803 "error": "An internal error occurred while processing the request.",
804 }
805 }
808@metrics_bp.route("/")
809@login_required
810def metrics_dashboard():
811 """Render the metrics dashboard page."""
812 return render_template_with_defaults("pages/metrics.html")
815@metrics_bp.route("/context-overflow")
816@login_required
817def context_overflow_page():
818 """Context overflow analytics page."""
819 return render_template_with_defaults("pages/context_overflow.html")
822@metrics_bp.route("/api/metrics")
823@login_required
824def api_metrics():
825 """Get overall metrics data."""
826 logger.debug("api_metrics endpoint called")
827 try:
828 # Get username from session
829 username = flask_session["username"]
831 # Get time period and research mode from query parameters
832 period = request.args.get("period", "30d")
833 research_mode = request.args.get("mode", "all")
835 token_counter = TokenCounter()
836 search_tracker = get_search_tracker()
838 # Get both token and search metrics
839 token_metrics = token_counter.get_overall_metrics(
840 period=period, research_mode=research_mode
841 )
842 search_metrics = search_tracker.get_search_metrics(
843 period=period,
844 research_mode=research_mode,
845 username=username,
846 )
848 # Get user satisfaction rating data
849 try:
850 with get_user_db_session(username) as session:
851 # Build base query with time filter
852 ratings_query = session.query(ResearchRating)
853 time_condition = get_time_filter_condition(
854 period, ResearchRating.created_at
855 )
856 if time_condition is not None:
857 ratings_query = ratings_query.filter(time_condition)
859 # Get average rating
860 avg_rating = ratings_query.with_entities(
861 func.avg(ResearchRating.rating).label("avg_rating")
862 ).scalar()
864 # Get total rating count
865 total_ratings = ratings_query.count()
867 user_satisfaction = {
868 "avg_rating": round(avg_rating, 1) if avg_rating else None,
869 "total_ratings": total_ratings,
870 }
871 except Exception:
872 logger.warning("Error getting user satisfaction data")
873 user_satisfaction = {"avg_rating": None, "total_ratings": 0}
875 # Get strategy analytics
876 strategy_data = get_strategy_analytics(period, username)
877 logger.debug(f"strategy_data keys: {list(strategy_data.keys())}")
879 # Get rate limiting analytics
880 rate_limiting_data = get_rate_limiting_analytics(period, username)
881 logger.debug(f"rate_limiting_data: {rate_limiting_data}")
882 logger.debug(
883 f"rate_limiting_data keys: {list(rate_limiting_data.keys())}"
884 )
886 # Combine metrics
887 combined_metrics = {
888 **token_metrics,
889 **search_metrics,
890 **strategy_data,
891 **rate_limiting_data,
892 "user_satisfaction": user_satisfaction,
893 }
895 logger.debug(f"combined_metrics keys: {list(combined_metrics.keys())}")
896 logger.debug(
897 f"combined_metrics['rate_limiting']: {combined_metrics.get('rate_limiting', 'NOT FOUND')}"
898 )
900 return jsonify(
901 {
902 "status": "success",
903 "metrics": combined_metrics,
904 "period": period,
905 "research_mode": research_mode,
906 }
907 )
908 except Exception:
909 logger.exception("Error getting metrics")
910 return (
911 jsonify(
912 {
913 "status": "error",
914 "message": "An internal error occurred. Please try again later.",
915 }
916 ),
917 500,
918 )
921@metrics_bp.route("/api/rate-limiting")
922@login_required
923def api_rate_limiting_metrics():
924 """Get detailed rate limiting metrics."""
925 logger.info("DEBUG: api_rate_limiting_metrics endpoint called")
926 try:
927 username = flask_session["username"]
928 period = request.args.get("period", "30d")
929 rate_limiting_data = get_rate_limiting_analytics(period, username)
931 return jsonify(
932 {"status": "success", "data": rate_limiting_data, "period": period}
933 )
934 except Exception:
935 logger.exception("Error getting rate limiting metrics")
936 return jsonify(
937 {
938 "status": "error",
939 "message": "Failed to retrieve rate limiting metrics",
940 }
941 ), 500
944@metrics_bp.route("/api/rate-limiting/current")
945@login_required
946def api_current_rate_limits():
947 """Get current rate limit estimates for all engines."""
948 try:
949 tracker = get_tracker()
950 stats = tracker.get_stats()
952 current_limits = []
953 for stat in stats:
954 (
955 engine_type,
956 base_wait,
957 min_wait,
958 max_wait,
959 last_updated,
960 total_attempts,
961 success_rate,
962 ) = stat
963 current_limits.append(
964 {
965 "engine_type": engine_type,
966 "base_wait_seconds": round(base_wait, 2),
967 "min_wait_seconds": round(min_wait, 2),
968 "max_wait_seconds": round(max_wait, 2),
969 "success_rate": round(success_rate * 100, 1),
970 "total_attempts": total_attempts,
971 "last_updated": datetime.fromtimestamp(
972 last_updated, UTC
973 ).isoformat(), # ISO format already includes timezone
974 "status": "healthy"
975 if success_rate > 0.8
976 else "degraded"
977 if success_rate > 0.5
978 else "poor",
979 }
980 )
982 return jsonify(
983 {
984 "status": "success",
985 "current_limits": current_limits,
986 "timestamp": datetime.now(UTC).isoformat(),
987 }
988 )
989 except Exception:
990 logger.exception("Error getting current rate limits")
991 return jsonify(
992 {
993 "status": "error",
994 "message": "Failed to retrieve current rate limits",
995 }
996 ), 500
999@metrics_bp.route("/api/metrics/research/<string:research_id>/links")
1000@login_required
1001def api_research_link_metrics(research_id):
1002 """Get link analytics for a specific research."""
1003 try:
1004 username = flask_session["username"]
1006 with get_user_db_session(username) as session:
1007 # Get all resources for this specific research
1008 resources = (
1009 session.query(ResearchResource)
1010 .filter(ResearchResource.research_id == research_id)
1011 .all()
1012 )
1014 if not resources:
1015 return jsonify(
1016 {
1017 "status": "success",
1018 "data": {
1019 "total_links": 0,
1020 "unique_domains": 0,
1021 "domains": [],
1022 "category_distribution": {},
1023 "domain_categories": {},
1024 "resources": [],
1025 },
1026 }
1027 )
1029 # Extract domain information
1030 domain_counts: dict[str, Any] = {}
1032 # Generic category counting from LLM classifications
1033 category_counts: dict[str, Any] = {}
1035 # First pass: collect all domains
1036 all_domains = set()
1037 for resource in resources:
1038 if resource.url: 1038 ↛ 1037line 1038 didn't jump to line 1037 because the condition on line 1038 was always true
1039 domain = _extract_domain(resource.url)
1040 if domain: 1040 ↛ 1037line 1040 didn't jump to line 1037 because the condition on line 1040 was always true
1041 all_domains.add(domain)
1043 # Batch load all domain classifications in one query (fix N+1)
1044 domain_classifications_map = {}
1045 if all_domains: 1045 ↛ 1057line 1045 didn't jump to line 1057 because the condition on line 1045 was always true
1046 all_classifications = (
1047 session.query(DomainClassification)
1048 .filter(DomainClassification.domain.in_(all_domains))
1049 .all()
1050 )
1051 for classification in all_classifications:
1052 domain_classifications_map[classification.domain] = (
1053 classification
1054 )
1056 # Second pass: process resources with pre-loaded classifications
1057 for resource in resources:
1058 if resource.url: 1058 ↛ 1057line 1058 didn't jump to line 1057 because the condition on line 1058 was always true
1059 try:
1060 domain = _extract_domain(resource.url)
1061 if not domain: 1061 ↛ 1062line 1061 didn't jump to line 1062 because the condition on line 1061 was never true
1062 continue
1064 domain_counts[domain] = domain_counts.get(domain, 0) + 1
1066 # Count categories from pre-loaded classifications (no N+1)
1067 classification = domain_classifications_map.get(domain)
1068 if classification:
1069 category = classification.category
1070 category_counts[category] = (
1071 category_counts.get(category, 0) + 1
1072 )
1073 else:
1074 category_counts["Unclassified"] = (
1075 category_counts.get("Unclassified", 0) + 1
1076 )
1077 except (AttributeError, KeyError) as e:
1078 logger.debug(f"Error classifying domain {domain}: {e}")
1080 # Sort domains by count
1081 sorted_domains = sorted(
1082 domain_counts.items(), key=lambda x: x[1], reverse=True
1083 )
1085 return jsonify(
1086 {
1087 "status": "success",
1088 "data": {
1089 "total_links": len(resources),
1090 "unique_domains": len(domain_counts),
1091 "domains": [
1092 {
1093 "domain": domain,
1094 "count": count,
1095 "percentage": round(
1096 count / len(resources) * 100, 1
1097 ),
1098 }
1099 for domain, count in sorted_domains[
1100 :20
1101 ] # Top 20 domains
1102 ],
1103 "category_distribution": category_counts,
1104 "domain_categories": category_counts, # Generic categories from LLM
1105 "resources": [
1106 {
1107 "title": r.title or "Untitled",
1108 "url": r.url,
1109 "preview": r.content_preview[:200]
1110 if r.content_preview
1111 else None,
1112 }
1113 for r in resources[:10] # First 10 resources
1114 ],
1115 },
1116 }
1117 )
1119 except Exception:
1120 logger.exception("Error getting research link metrics")
1121 return jsonify(
1122 {"status": "error", "message": "Failed to retrieve link metrics"}
1123 ), 500
1126@metrics_bp.route("/api/metrics/research/<string:research_id>")
1127@login_required
1128def api_research_metrics(research_id):
1129 """Get metrics for a specific research."""
1130 try:
1131 token_counter = TokenCounter()
1132 metrics = token_counter.get_research_metrics(research_id)
1133 return jsonify({"status": "success", "metrics": metrics})
1134 except Exception:
1135 logger.exception("Error getting research metrics")
1136 return (
1137 jsonify(
1138 {
1139 "status": "error",
1140 "message": "An internal error occurred. Please try again later.",
1141 }
1142 ),
1143 500,
1144 )
1147@metrics_bp.route("/api/metrics/research/<string:research_id>/timeline")
1148@login_required
1149def api_research_timeline_metrics(research_id):
1150 """Get timeline metrics for a specific research."""
1151 try:
1152 token_counter = TokenCounter()
1153 timeline_metrics = token_counter.get_research_timeline_metrics(
1154 research_id
1155 )
1156 return jsonify({"status": "success", "metrics": timeline_metrics})
1157 except Exception:
1158 logger.exception("Error getting research timeline metrics")
1159 return (
1160 jsonify(
1161 {
1162 "status": "error",
1163 "message": "An internal error occurred. Please try again later.",
1164 }
1165 ),
1166 500,
1167 )
1170@metrics_bp.route("/api/metrics/research/<string:research_id>/search")
1171@login_required
1172def api_research_search_metrics(research_id):
1173 """Get search metrics for a specific research."""
1174 try:
1175 username = flask_session["username"]
1176 search_tracker = get_search_tracker()
1177 search_metrics = search_tracker.get_research_search_metrics(
1178 research_id, username=username
1179 )
1180 return jsonify({"status": "success", "metrics": search_metrics})
1181 except Exception:
1182 logger.exception("Error getting research search metrics")
1183 return (
1184 jsonify(
1185 {
1186 "status": "error",
1187 "message": "An internal error occurred. Please try again later.",
1188 }
1189 ),
1190 500,
1191 )
1194@metrics_bp.route("/api/metrics/enhanced")
1195@login_required
1196def api_enhanced_metrics():
1197 """Get enhanced Phase 1 tracking metrics."""
1198 try:
1199 # Get time period and research mode from query parameters
1200 period = request.args.get("period", "30d")
1201 research_mode = request.args.get("mode", "all")
1202 username = flask_session["username"]
1204 token_counter = TokenCounter()
1205 search_tracker = get_search_tracker()
1207 enhanced_metrics = token_counter.get_enhanced_metrics(
1208 period=period, research_mode=research_mode
1209 )
1211 # Add search time series data for the chart
1212 search_time_series = search_tracker.get_search_time_series(
1213 period=period,
1214 research_mode=research_mode,
1215 username=username,
1216 )
1217 enhanced_metrics["search_time_series"] = search_time_series
1219 # Add rating analytics
1220 rating_analytics = get_rating_analytics(period, research_mode, username)
1221 enhanced_metrics.update(rating_analytics)
1223 return jsonify(
1224 {
1225 "status": "success",
1226 "metrics": enhanced_metrics,
1227 "period": period,
1228 "research_mode": research_mode,
1229 }
1230 )
1231 except Exception:
1232 logger.exception("Error getting enhanced metrics")
1233 return (
1234 jsonify(
1235 {
1236 "status": "error",
1237 "message": "An internal error occurred. Please try again later.",
1238 }
1239 ),
1240 500,
1241 )
1244@metrics_bp.route("/api/ratings/<string:research_id>", methods=["GET"])
1245@login_required
1246def api_get_research_rating(research_id):
1247 """Get rating for a specific research session."""
1248 try:
1249 username = flask_session["username"]
1251 with get_user_db_session(username) as session:
1252 rating = (
1253 session.query(ResearchRating)
1254 .filter_by(research_id=research_id)
1255 .first()
1256 )
1258 if rating:
1259 return jsonify(
1260 {
1261 "status": "success",
1262 "rating": rating.rating,
1263 "created_at": rating.created_at.isoformat(),
1264 "updated_at": rating.updated_at.isoformat(),
1265 }
1266 )
1267 return jsonify({"status": "success", "rating": None})
1269 except Exception:
1270 logger.exception("Error getting research rating")
1271 return (
1272 jsonify(
1273 {
1274 "status": "error",
1275 "message": "An internal error occurred. Please try again later.",
1276 }
1277 ),
1278 500,
1279 )
1282@metrics_bp.route("/api/ratings/<string:research_id>", methods=["POST"])
1283@login_required
1284@require_json_body(error_format="status")
1285def api_save_research_rating(research_id):
1286 """Save or update rating for a specific research session."""
1287 try:
1288 username = flask_session["username"]
1290 data = request.get_json()
1291 rating_value = data.get("rating")
1293 if (
1294 not rating_value
1295 or not isinstance(rating_value, int)
1296 or rating_value < 1
1297 or rating_value > 5
1298 ):
1299 return (
1300 jsonify(
1301 {
1302 "status": "error",
1303 "message": "Rating must be an integer between 1 and 5",
1304 }
1305 ),
1306 400,
1307 )
1309 with get_user_db_session(username) as session:
1310 # Check if rating already exists
1311 existing_rating = (
1312 session.query(ResearchRating)
1313 .filter_by(research_id=research_id)
1314 .first()
1315 )
1317 if existing_rating:
1318 # Update existing rating
1319 existing_rating.rating = rating_value
1320 existing_rating.updated_at = func.now()
1321 else:
1322 # Create new rating
1323 new_rating = ResearchRating(
1324 research_id=research_id, rating=rating_value
1325 )
1326 session.add(new_rating)
1328 session.commit()
1330 return jsonify(
1331 {
1332 "status": "success",
1333 "message": "Rating saved successfully",
1334 "rating": rating_value,
1335 }
1336 )
1338 except Exception:
1339 logger.exception("Error saving research rating")
1340 return (
1341 jsonify(
1342 {
1343 "status": "error",
1344 "message": "An internal error occurred. Please try again later.",
1345 }
1346 ),
1347 500,
1348 )
1351@metrics_bp.route("/star-reviews")
1352@login_required
1353def star_reviews():
1354 """Display star reviews metrics page."""
1355 return render_template_with_defaults("pages/star_reviews.html")
1358@metrics_bp.route("/costs")
1359@login_required
1360def cost_analytics():
1361 """Display cost analytics page."""
1362 return render_template_with_defaults("pages/cost_analytics.html")
1365@metrics_bp.route("/api/star-reviews")
1366@login_required
1367def api_star_reviews():
1368 """Get star reviews analytics data."""
1369 try:
1370 username = flask_session["username"]
1372 period = request.args.get("period", "30d")
1374 with get_user_db_session(username) as session:
1375 # Build base query with time filter
1376 base_query = session.query(ResearchRating)
1377 time_condition = get_time_filter_condition(
1378 period, ResearchRating.created_at
1379 )
1380 if time_condition is not None:
1381 base_query = base_query.filter(time_condition)
1383 # Overall rating statistics
1384 overall_stats = session.query(
1385 func.avg(ResearchRating.rating).label("avg_rating"),
1386 func.count(ResearchRating.rating).label("total_ratings"),
1387 func.sum(case((ResearchRating.rating == 5, 1), else_=0)).label(
1388 "five_star"
1389 ),
1390 func.sum(case((ResearchRating.rating == 4, 1), else_=0)).label(
1391 "four_star"
1392 ),
1393 func.sum(case((ResearchRating.rating == 3, 1), else_=0)).label(
1394 "three_star"
1395 ),
1396 func.sum(case((ResearchRating.rating == 2, 1), else_=0)).label(
1397 "two_star"
1398 ),
1399 func.sum(case((ResearchRating.rating == 1, 1), else_=0)).label(
1400 "one_star"
1401 ),
1402 )
1404 if time_condition is not None:
1405 overall_stats = overall_stats.filter(time_condition)
1407 overall_stats = overall_stats.first()
1409 # Ratings by LLM model (get from token_usage since Research doesn't have model field)
1410 llm_ratings_query = session.query(
1411 func.coalesce(TokenUsage.model_name, "Unknown").label("model"),
1412 func.avg(ResearchRating.rating).label("avg_rating"),
1413 func.count(ResearchRating.rating).label("rating_count"),
1414 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label(
1415 "positive_ratings"
1416 ),
1417 ).outerjoin(
1418 TokenUsage, ResearchRating.research_id == TokenUsage.research_id
1419 )
1421 if time_condition is not None:
1422 llm_ratings_query = llm_ratings_query.filter(time_condition)
1424 llm_ratings = (
1425 llm_ratings_query.group_by(TokenUsage.model_name)
1426 .order_by(func.avg(ResearchRating.rating).desc())
1427 .all()
1428 )
1430 # Ratings by search engine (join with token_usage to get search engine info)
1431 search_engine_ratings_query = session.query(
1432 func.coalesce(
1433 TokenUsage.search_engine_selected, "Unknown"
1434 ).label("search_engine"),
1435 func.avg(ResearchRating.rating).label("avg_rating"),
1436 func.count(ResearchRating.rating).label("rating_count"),
1437 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label(
1438 "positive_ratings"
1439 ),
1440 ).outerjoin(
1441 TokenUsage, ResearchRating.research_id == TokenUsage.research_id
1442 )
1444 if time_condition is not None:
1445 search_engine_ratings_query = (
1446 search_engine_ratings_query.filter(time_condition)
1447 )
1449 search_engine_ratings = (
1450 search_engine_ratings_query.group_by(
1451 TokenUsage.search_engine_selected
1452 )
1453 .having(func.count(ResearchRating.rating) > 0)
1454 .order_by(func.avg(ResearchRating.rating).desc())
1455 .all()
1456 )
1458 # Rating trends over time
1459 rating_trends_query = session.query(
1460 func.date(ResearchRating.created_at).label("date"),
1461 func.avg(ResearchRating.rating).label("avg_rating"),
1462 func.count(ResearchRating.rating).label("daily_count"),
1463 )
1465 if time_condition is not None:
1466 rating_trends_query = rating_trends_query.filter(time_condition)
1468 rating_trends = (
1469 rating_trends_query.group_by(
1470 func.date(ResearchRating.created_at)
1471 )
1472 .order_by("date")
1473 .all()
1474 )
1476 # Recent ratings with research details
1477 recent_ratings_query = (
1478 session.query(
1479 ResearchRating.rating,
1480 ResearchRating.created_at,
1481 ResearchRating.research_id,
1482 Research.query,
1483 Research.mode,
1484 TokenUsage.model_name,
1485 Research.created_at,
1486 )
1487 .outerjoin(Research, ResearchRating.research_id == Research.id)
1488 .outerjoin(
1489 TokenUsage,
1490 ResearchRating.research_id == TokenUsage.research_id,
1491 )
1492 )
1494 if time_condition is not None:
1495 recent_ratings_query = recent_ratings_query.filter(
1496 time_condition
1497 )
1499 recent_ratings = (
1500 recent_ratings_query.order_by(ResearchRating.created_at.desc())
1501 .limit(20)
1502 .all()
1503 )
1505 return jsonify(
1506 {
1507 "overall_stats": {
1508 "avg_rating": round(overall_stats.avg_rating or 0, 2),
1509 "total_ratings": overall_stats.total_ratings or 0,
1510 "rating_distribution": {
1511 "5": overall_stats.five_star or 0,
1512 "4": overall_stats.four_star or 0,
1513 "3": overall_stats.three_star or 0,
1514 "2": overall_stats.two_star or 0,
1515 "1": overall_stats.one_star or 0,
1516 },
1517 },
1518 "llm_ratings": [
1519 {
1520 "model": rating.model,
1521 "avg_rating": round(rating.avg_rating or 0, 2),
1522 "rating_count": rating.rating_count or 0,
1523 "positive_ratings": rating.positive_ratings or 0,
1524 "satisfaction_rate": round(
1525 (rating.positive_ratings or 0)
1526 / max(rating.rating_count or 1, 1)
1527 * 100,
1528 1,
1529 ),
1530 }
1531 for rating in llm_ratings
1532 ],
1533 "search_engine_ratings": [
1534 {
1535 "search_engine": rating.search_engine,
1536 "avg_rating": round(rating.avg_rating or 0, 2),
1537 "rating_count": rating.rating_count or 0,
1538 "positive_ratings": rating.positive_ratings or 0,
1539 "satisfaction_rate": round(
1540 (rating.positive_ratings or 0)
1541 / max(rating.rating_count or 1, 1)
1542 * 100,
1543 1,
1544 ),
1545 }
1546 for rating in search_engine_ratings
1547 ],
1548 "rating_trends": [
1549 {
1550 "date": str(trend.date),
1551 "avg_rating": round(trend.avg_rating or 0, 2),
1552 "count": trend.daily_count or 0,
1553 }
1554 for trend in rating_trends
1555 ],
1556 "recent_ratings": [
1557 {
1558 "rating": rating.rating,
1559 "created_at": str(rating.created_at),
1560 "research_id": rating.research_id,
1561 "query": (
1562 rating.query
1563 if rating.query
1564 else f"Research Session #{rating.research_id}"
1565 ),
1566 "mode": rating.mode
1567 if rating.mode
1568 else "Standard Research",
1569 "llm_model": (
1570 rating.model_name
1571 if rating.model_name
1572 else "LLM Model"
1573 ),
1574 }
1575 for rating in recent_ratings
1576 ],
1577 }
1578 )
1580 except Exception:
1581 logger.exception("Error getting star reviews data")
1582 return (
1583 jsonify(
1584 {"error": "An internal error occurred. Please try again later."}
1585 ),
1586 500,
1587 )
1590@metrics_bp.route("/api/pricing")
1591@login_required
1592def api_pricing():
1593 """Get current LLM pricing data."""
1594 try:
1595 from ...metrics.pricing.pricing_fetcher import PricingFetcher
1597 # Use static pricing data instead of async
1598 fetcher = PricingFetcher()
1599 pricing_data = fetcher.static_pricing
1601 return jsonify(
1602 {
1603 "status": "success",
1604 "pricing": pricing_data,
1605 "last_updated": datetime.now(UTC).isoformat(),
1606 "note": "Pricing data is from static configuration. Real-time APIs not available for most providers.",
1607 }
1608 )
1610 except Exception:
1611 logger.exception("Error fetching pricing data")
1612 return jsonify({"error": "Internal Server Error"}), 500
1615@metrics_bp.route("/api/pricing/<model_name>")
1616@login_required
1617def api_model_pricing(model_name):
1618 """Get pricing for a specific model."""
1619 try:
1620 # Optional provider parameter
1621 provider = request.args.get("provider")
1623 from ...metrics.pricing.cost_calculator import CostCalculator
1625 # Use synchronous approach with cached/static pricing
1626 calculator = CostCalculator()
1627 pricing = calculator.cache.get_model_pricing(
1628 model_name
1629 ) or calculator.calculate_cost_sync(model_name, 1000, 1000).get(
1630 "pricing_used", {}
1631 )
1633 return jsonify(
1634 {
1635 "status": "success",
1636 "model": model_name,
1637 "provider": provider,
1638 "pricing": pricing,
1639 "last_updated": datetime.now(UTC).isoformat(),
1640 }
1641 )
1643 except Exception:
1644 logger.exception(f"Error getting pricing for model: {model_name}")
1645 return jsonify({"error": "An internal error occurred"}), 500
1648@metrics_bp.route("/api/cost-calculation", methods=["POST"])
1649@login_required
1650@require_json_body(error_message="No data provided")
1651def api_cost_calculation():
1652 """Calculate cost for token usage."""
1653 try:
1654 data = request.get_json()
1655 model_name = data.get("model_name")
1656 provider = data.get("provider") # Optional provider parameter
1657 prompt_tokens = data.get("prompt_tokens", 0)
1658 completion_tokens = data.get("completion_tokens", 0)
1660 if not model_name:
1661 return jsonify({"error": "model_name is required"}), 400
1663 from ...metrics.pricing.cost_calculator import CostCalculator
1665 # Use synchronous cost calculation
1666 calculator = CostCalculator()
1667 cost_data = calculator.calculate_cost_sync(
1668 model_name, prompt_tokens, completion_tokens
1669 )
1671 return jsonify(
1672 {
1673 "status": "success",
1674 "model_name": model_name,
1675 "provider": provider,
1676 "prompt_tokens": prompt_tokens,
1677 "completion_tokens": completion_tokens,
1678 "total_tokens": prompt_tokens + completion_tokens,
1679 **cost_data,
1680 }
1681 )
1683 except Exception:
1684 logger.exception("Error calculating cost")
1685 return jsonify({"error": "An internal error occurred"}), 500
1688@metrics_bp.route("/api/research-costs/<string:research_id>")
1689@login_required
1690def api_research_costs(research_id):
1691 """Get cost analysis for a specific research session."""
1692 try:
1693 username = flask_session["username"]
1695 with get_user_db_session(username) as session:
1696 # Get token usage records for this research
1697 usage_records = (
1698 session.query(TokenUsage)
1699 .filter(TokenUsage.research_id == research_id)
1700 .all()
1701 )
1703 if not usage_records:
1704 return jsonify(
1705 {
1706 "status": "success",
1707 "research_id": research_id,
1708 "total_cost": 0.0,
1709 "message": "No token usage data found for this research session",
1710 }
1711 )
1713 # Convert to dict format for cost calculation
1714 usage_data = []
1715 for record in usage_records:
1716 usage_data.append(
1717 {
1718 "model_name": record.model_name,
1719 "provider": getattr(
1720 record, "provider", None
1721 ), # Handle both old and new records
1722 "prompt_tokens": record.prompt_tokens,
1723 "completion_tokens": record.completion_tokens,
1724 "timestamp": record.timestamp,
1725 }
1726 )
1728 from ...metrics.pricing.cost_calculator import CostCalculator
1730 # Use synchronous calculation for research costs
1731 calculator = CostCalculator()
1732 costs = []
1733 for record in usage_data:
1734 cost_data = calculator.calculate_cost_sync(
1735 record["model_name"],
1736 record["prompt_tokens"],
1737 record["completion_tokens"],
1738 )
1739 costs.append({**record, **cost_data})
1741 total_cost = sum(c["total_cost"] for c in costs)
1742 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data)
1743 total_completion_tokens = sum(
1744 r["completion_tokens"] for r in usage_data
1745 )
1747 cost_summary = {
1748 "total_cost": round(total_cost, 6),
1749 "total_tokens": total_prompt_tokens + total_completion_tokens,
1750 "prompt_tokens": total_prompt_tokens,
1751 "completion_tokens": total_completion_tokens,
1752 }
1754 return jsonify(
1755 {
1756 "status": "success",
1757 "research_id": research_id,
1758 **cost_summary,
1759 }
1760 )
1762 except Exception:
1763 logger.exception(
1764 f"Error getting research costs for research: {research_id}"
1765 )
1766 return jsonify({"error": "An internal error occurred"}), 500
1769@metrics_bp.route("/api/cost-analytics")
1770@login_required
1771def api_cost_analytics():
1772 """Get cost analytics across all research sessions."""
1773 try:
1774 username = flask_session["username"]
1776 period = request.args.get("period", "30d")
1778 with get_user_db_session(username) as session:
1779 # Get token usage for the period
1780 query = session.query(TokenUsage)
1781 time_condition = get_time_filter_condition(
1782 period, TokenUsage.timestamp
1783 )
1784 if time_condition is not None:
1785 query = query.filter(time_condition)
1787 # First check if we have any records to avoid expensive queries
1788 record_count = query.count()
1790 if record_count == 0:
1791 return jsonify(
1792 {
1793 "status": "success",
1794 "period": period,
1795 "overview": {
1796 "total_cost": 0.0,
1797 "total_tokens": 0,
1798 "prompt_tokens": 0,
1799 "completion_tokens": 0,
1800 },
1801 "top_expensive_research": [],
1802 "research_count": 0,
1803 "message": "No token usage data found for this period",
1804 }
1805 )
1807 # If we have too many records, limit to recent ones to avoid timeout
1808 if record_count > 1000:
1809 logger.warning(
1810 f"Large dataset detected ({record_count} records), limiting to recent 1000 for performance"
1811 )
1812 usage_records = (
1813 query.order_by(TokenUsage.timestamp.desc())
1814 .limit(1000)
1815 .all()
1816 )
1817 else:
1818 usage_records = query.all()
1820 # Convert to dict format
1821 usage_data = []
1822 for record in usage_records:
1823 usage_data.append(
1824 {
1825 "model_name": record.model_name,
1826 "provider": getattr(
1827 record, "provider", None
1828 ), # Handle both old and new records
1829 "prompt_tokens": record.prompt_tokens,
1830 "completion_tokens": record.completion_tokens,
1831 "research_id": record.research_id,
1832 "timestamp": record.timestamp,
1833 }
1834 )
1836 from ...metrics.pricing.cost_calculator import CostCalculator
1838 # Use synchronous calculation
1839 calculator = CostCalculator()
1841 # Calculate overall costs
1842 costs = []
1843 for record in usage_data:
1844 cost_data = calculator.calculate_cost_sync(
1845 record["model_name"],
1846 record["prompt_tokens"],
1847 record["completion_tokens"],
1848 )
1849 costs.append({**record, **cost_data})
1851 total_cost = sum(c["total_cost"] for c in costs)
1852 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data)
1853 total_completion_tokens = sum(
1854 r["completion_tokens"] for r in usage_data
1855 )
1857 cost_summary = {
1858 "total_cost": round(total_cost, 6),
1859 "total_tokens": total_prompt_tokens + total_completion_tokens,
1860 "prompt_tokens": total_prompt_tokens,
1861 "completion_tokens": total_completion_tokens,
1862 }
1864 # Group by research_id for per-research costs
1865 research_costs: dict[str, Any] = {}
1866 for record in usage_data:
1867 rid = record["research_id"]
1868 if rid not in research_costs: 1868 ↛ 1870line 1868 didn't jump to line 1870 because the condition on line 1868 was always true
1869 research_costs[rid] = []
1870 research_costs[rid].append(record)
1872 # Calculate cost per research
1873 research_summaries = {}
1874 for rid, records in research_costs.items():
1875 research_total: float = 0
1876 for record in records:
1877 cost_data = calculator.calculate_cost_sync(
1878 record["model_name"],
1879 record["prompt_tokens"],
1880 record["completion_tokens"],
1881 )
1882 research_total += cost_data["total_cost"]
1883 research_summaries[rid] = {
1884 "total_cost": round(research_total, 6)
1885 }
1887 # Top expensive research sessions
1888 top_expensive = sorted(
1889 [
1890 (rid, data["total_cost"])
1891 for rid, data in research_summaries.items()
1892 ],
1893 key=lambda x: x[1],
1894 reverse=True,
1895 )[:10]
1897 return jsonify(
1898 {
1899 "status": "success",
1900 "period": period,
1901 "overview": cost_summary,
1902 "top_expensive_research": [
1903 {"research_id": rid, "total_cost": cost}
1904 for rid, cost in top_expensive
1905 ],
1906 "research_count": len(research_summaries),
1907 }
1908 )
1910 except Exception:
1911 logger.exception("Error getting cost analytics")
1912 # Return a more graceful error response
1913 return (
1914 jsonify(
1915 {
1916 "status": "success",
1917 "period": period,
1918 "overview": {
1919 "total_cost": 0.0,
1920 "total_tokens": 0,
1921 "prompt_tokens": 0,
1922 "completion_tokens": 0,
1923 },
1924 "top_expensive_research": [],
1925 "research_count": 0,
1926 "error": "Cost analytics temporarily unavailable",
1927 }
1928 ),
1929 200,
1930 ) # Return 200 to avoid breaking the UI
1933@metrics_bp.route("/links")
1934@login_required
1935def link_analytics():
1936 """Display link analytics page."""
1937 return render_template_with_defaults("pages/link_analytics.html")
1940@metrics_bp.route("/api/link-analytics")
1941@login_required
1942def api_link_analytics():
1943 """Get link analytics data."""
1944 try:
1945 username = flask_session["username"]
1947 period = request.args.get("period", "30d")
1949 # Get link analytics data
1950 link_data = get_link_analytics(period, username)
1952 return jsonify(
1953 {
1954 "status": "success",
1955 "data": link_data["link_analytics"],
1956 "period": period,
1957 }
1958 )
1960 except Exception:
1961 logger.exception("Error getting link analytics")
1962 return (
1963 jsonify(
1964 {
1965 "status": "error",
1966 "message": "An internal error occurred. Please try again later.",
1967 }
1968 ),
1969 500,
1970 )
1973@metrics_bp.route("/api/domain-classifications", methods=["GET"])
1974@login_required
1975def api_get_domain_classifications():
1976 """Get all domain classifications."""
1977 classifier = None
1978 try:
1979 username = flask_session["username"]
1981 classifier = DomainClassifier(username)
1982 classifications = classifier.get_all_classifications()
1984 return jsonify(
1985 {
1986 "status": "success",
1987 "classifications": [c.to_dict() for c in classifications],
1988 "total": len(classifications),
1989 }
1990 )
1992 except Exception:
1993 logger.exception("Error getting domain classifications")
1994 return jsonify(
1995 {"status": "error", "message": "Failed to retrieve classifications"}
1996 ), 500
1997 finally:
1998 if classifier is not None:
1999 from ...utilities.resource_utils import safe_close
2001 safe_close(classifier, "domain classifier")
2004@metrics_bp.route("/api/domain-classifications/summary", methods=["GET"])
2005@login_required
2006def api_get_classifications_summary():
2007 """Get summary of domain classifications by category."""
2008 classifier = None
2009 try:
2010 username = flask_session["username"]
2012 classifier = DomainClassifier(username)
2013 summary = classifier.get_categories_summary()
2015 return jsonify({"status": "success", "summary": summary})
2017 except Exception:
2018 logger.exception("Error getting classifications summary")
2019 return jsonify(
2020 {"status": "error", "message": "Failed to retrieve summary"}
2021 ), 500
2022 finally:
2023 if classifier is not None:
2024 from ...utilities.resource_utils import safe_close
2026 safe_close(classifier, "domain classifier")
2029@metrics_bp.route("/api/domain-classifications/classify", methods=["POST"])
2030@login_required
2031def api_classify_domains():
2032 """Trigger classification of a specific domain or batch classification."""
2033 classifier = None
2034 try:
2035 username = flask_session["username"]
2037 data = request.get_json() or {}
2038 domain = data.get("domain")
2039 force_update = data.get("force_update", False)
2040 batch_mode = data.get("batch", False)
2042 # Get settings snapshot for LLM configuration
2043 from ...settings.manager import SettingsManager
2044 from ...database.session_context import get_user_db_session
2046 with get_user_db_session(username) as db_session:
2047 settings_manager = SettingsManager(db_session=db_session)
2048 settings_snapshot = settings_manager.get_all_settings()
2050 classifier = DomainClassifier(
2051 username, settings_snapshot=settings_snapshot
2052 )
2054 if domain and not batch_mode:
2055 # Classify single domain
2056 logger.info(f"Classifying single domain: {domain}")
2057 classification = classifier.classify_domain(domain, force_update)
2058 if classification:
2059 return jsonify(
2060 {
2061 "status": "success",
2062 "classification": classification.to_dict(),
2063 }
2064 )
2065 return jsonify(
2066 {
2067 "status": "error",
2068 "message": f"Failed to classify domain: {domain}",
2069 }
2070 ), 400
2071 if batch_mode:
2072 # Batch classification - this should really be a background task
2073 # For now, we'll just return immediately and let the frontend poll
2074 logger.info("Starting batch classification of all domains")
2075 results = classifier.classify_all_domains(force_update)
2077 return jsonify({"status": "success", "results": results})
2078 return jsonify(
2079 {
2080 "status": "error",
2081 "message": "Must provide either 'domain' or set 'batch': true",
2082 }
2083 ), 400
2085 except Exception:
2086 logger.exception("Error classifying domains")
2087 return jsonify(
2088 {"status": "error", "message": "Failed to classify domains"}
2089 ), 500
2090 finally:
2091 if classifier is not None:
2092 from ...utilities.resource_utils import safe_close
2094 safe_close(classifier, "domain classifier")
2097@metrics_bp.route("/api/domain-classifications/progress", methods=["GET"])
2098@login_required
2099def api_classification_progress():
2100 """Get progress of domain classification task."""
2101 try:
2102 username = flask_session["username"]
2104 # Get counts of classified vs unclassified domains
2105 with get_user_db_session(username) as session:
2106 # Count total unique domains
2107 resources = session.query(ResearchResource.url).distinct().all()
2108 domains = set()
2110 for (url,) in resources:
2111 if url:
2112 domain = _extract_domain(url)
2113 if domain: 2113 ↛ 2110line 2113 didn't jump to line 2110 because the condition on line 2113 was always true
2114 domains.add(domain)
2116 all_domains = sorted(domains)
2117 total_domains = len(domains)
2119 # Count classified domains
2120 classified_count = session.query(DomainClassification).count()
2122 return jsonify(
2123 {
2124 "status": "success",
2125 "progress": {
2126 "total_domains": total_domains,
2127 "classified": classified_count,
2128 "unclassified": total_domains - classified_count,
2129 "percentage": round(
2130 (classified_count / total_domains * 100)
2131 if total_domains > 0
2132 else 0,
2133 1,
2134 ),
2135 "all_domains": all_domains, # Return all domains for classification
2136 },
2137 }
2138 )
2140 except Exception:
2141 logger.exception("Error getting classification progress")
2142 return jsonify(
2143 {"status": "error", "message": "Failed to retrieve progress"}
2144 ), 500