Coverage for src / local_deep_research / web / routes / metrics_routes.py: 98%

696 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1"""Routes for metrics dashboard.""" 

2 

3from datetime import datetime, timedelta, UTC 

4from typing import Any 

5from urllib.parse import urlparse 

6 

7from flask import Blueprint, jsonify, request, session as flask_session 

8from loguru import logger 

9from sqlalchemy import case, func 

10 

11from ...database.models import ( 

12 RateLimitAttempt, 

13 RateLimitEstimate, 

14 Research, 

15 ResearchHistory, 

16 ResearchRating, 

17 ResearchResource, 

18 ResearchStrategy, 

19 TokenUsage, 

20) 

21from ...constants import get_available_strategies 

22from ...domain_classifier import DomainClassifier, DomainClassification 

23from ...database.session_context import get_user_db_session 

24from ...metrics import TokenCounter 

25from ...metrics.query_utils import get_period_days, get_time_filter_condition 

26from ...metrics.search_tracker import get_search_tracker 

27from ...web_search_engines.rate_limiting import get_tracker 

28from ...security.decorators import require_json_body 

29from ..auth.decorators import login_required 

30from ..utils.templates import render_template_with_defaults 

31 

32# Create a Blueprint for metrics 

33metrics_bp = Blueprint("metrics", __name__, url_prefix="/metrics") 

34 

35# NOTE: Routes use flask_session["username"] (not .get()) intentionally. 

36# @login_required guarantees the key exists; direct access fails fast 

37# if the decorator is ever removed. 

38 

39 

40def _extract_domain(url): 

41 """Extract normalized domain from URL, stripping www. prefix.""" 

42 try: 

43 parsed = urlparse(url) 

44 domain = parsed.netloc.lower() 

45 if domain.startswith("www."): 

46 domain = domain[4:] 

47 return domain if domain else None 

48 except (ValueError, AttributeError, TypeError): 

49 return None 

50 

51 

52def get_rating_analytics(period="30d", research_mode="all", username=None): 

53 """Get rating analytics for the specified period and research mode.""" 

54 try: 

55 if not username: 

56 username = flask_session.get("username") 

57 

58 if not username: 

59 return { 

60 "rating_analytics": { 

61 "avg_rating": None, 

62 "total_ratings": 0, 

63 "rating_distribution": {}, 

64 "satisfaction_stats": { 

65 "very_satisfied": 0, 

66 "satisfied": 0, 

67 "neutral": 0, 

68 "dissatisfied": 0, 

69 "very_dissatisfied": 0, 

70 }, 

71 "error": "No user session", 

72 } 

73 } 

74 

75 # Calculate date range 

76 days = get_period_days(period) 

77 

78 with get_user_db_session(username) as session: 

79 query = session.query(ResearchRating) 

80 

81 # Apply time filter 

82 if days: 

83 cutoff_date = datetime.now(UTC) - timedelta(days=days) 

84 query = query.filter(ResearchRating.created_at >= cutoff_date) 

85 

86 # Get all ratings 

87 ratings = query.all() 

88 

89 if not ratings: 

90 return { 

91 "rating_analytics": { 

92 "avg_rating": None, 

93 "total_ratings": 0, 

94 "rating_distribution": {}, 

95 "satisfaction_stats": { 

96 "very_satisfied": 0, 

97 "satisfied": 0, 

98 "neutral": 0, 

99 "dissatisfied": 0, 

100 "very_dissatisfied": 0, 

101 }, 

102 } 

103 } 

104 

105 # Calculate statistics 

106 rating_values = [r.rating for r in ratings] 

107 avg_rating = sum(rating_values) / len(rating_values) 

108 

109 # Rating distribution 

110 rating_counts = {} 

111 for i in range(1, 6): 

112 rating_counts[str(i)] = rating_values.count(i) 

113 

114 # Satisfaction categories 

115 satisfaction_stats = { 

116 "very_satisfied": rating_values.count(5), 

117 "satisfied": rating_values.count(4), 

118 "neutral": rating_values.count(3), 

119 "dissatisfied": rating_values.count(2), 

120 "very_dissatisfied": rating_values.count(1), 

121 } 

122 

123 return { 

124 "rating_analytics": { 

125 "avg_rating": round(avg_rating, 1), 

126 "total_ratings": len(ratings), 

127 "rating_distribution": rating_counts, 

128 "satisfaction_stats": satisfaction_stats, 

129 } 

130 } 

131 

132 except Exception: 

133 logger.exception("Error getting rating analytics") 

134 return { 

135 "rating_analytics": { 

136 "avg_rating": None, 

137 "total_ratings": 0, 

138 "rating_distribution": {}, 

139 "satisfaction_stats": { 

140 "very_satisfied": 0, 

141 "satisfied": 0, 

142 "neutral": 0, 

143 "dissatisfied": 0, 

144 "very_dissatisfied": 0, 

145 }, 

146 } 

147 } 

148 

149 

150def get_link_analytics(period="30d", username=None): 

151 """Get link analytics from research resources.""" 

152 try: 

153 if not username: 

154 username = flask_session.get("username") 

155 

156 if not username: 

157 return { 

158 "link_analytics": { 

159 "top_domains": [], 

160 "total_unique_domains": 0, 

161 "avg_links_per_research": 0, 

162 "domain_distribution": {}, 

163 "source_type_analysis": {}, 

164 "academic_vs_general": {}, 

165 "total_links": 0, 

166 "error": "No user session", 

167 } 

168 } 

169 

170 # Calculate date range 

171 days = get_period_days(period) 

172 

173 with get_user_db_session(username) as session: 

174 # Base query 

175 query = session.query(ResearchResource) 

176 

177 # Apply time filter 

178 if days: 

179 cutoff_date = datetime.now(UTC) - timedelta(days=days) 

180 query = query.filter( 

181 ResearchResource.created_at >= cutoff_date.isoformat() 

182 ) 

183 

184 # Get all resources 

185 resources = query.all() 

186 

187 if not resources: 

188 return { 

189 "link_analytics": { 

190 "top_domains": [], 

191 "total_unique_domains": 0, 

192 "avg_links_per_research": 0, 

193 "domain_distribution": {}, 

194 "source_type_analysis": {}, 

195 "academic_vs_general": {}, 

196 "total_links": 0, 

197 } 

198 } 

199 

200 # Extract domains from URLs 

201 domain_counts: dict[str, Any] = {} 

202 domain_researches: dict[ 

203 str, Any 

204 ] = {} # Track which researches used each domain 

205 source_types: dict[str, Any] = {} 

206 temporal_data: dict[str, Any] = {} # Track links over time 

207 domain_connections: dict[ 

208 str, Any 

209 ] = {} # Track domain co-occurrences 

210 

211 # Generic category counting from LLM classifications 

212 category_counts: dict[str, Any] = {} 

213 

214 quality_metrics = { 

215 "with_title": 0, 

216 "with_preview": 0, 

217 "with_both": 0, 

218 "total": 0, 

219 } 

220 

221 # First pass: collect all domains from resources 

222 all_domains = set() 

223 for resource in resources: 

224 if resource.url: 

225 domain = _extract_domain(resource.url) 

226 if domain: 226 ↛ 223line 226 didn't jump to line 223 because the condition on line 226 was always true

227 all_domains.add(domain) 

228 

229 # Batch load all domain classifications in one query (fix N+1) 

230 domain_classifications_map = {} 

231 if all_domains: 

232 all_classifications = ( 

233 session.query(DomainClassification) 

234 .filter(DomainClassification.domain.in_(all_domains)) 

235 .all() 

236 ) 

237 for classification in all_classifications: 

238 domain_classifications_map[classification.domain] = ( 

239 classification 

240 ) 

241 

242 # Second pass: process resources with pre-loaded classifications 

243 for resource in resources: 

244 if resource.url: 

245 try: 

246 domain = _extract_domain(resource.url) 

247 if not domain: 247 ↛ 248line 247 didn't jump to line 248 because the condition on line 247 was never true

248 continue 

249 

250 # Count domains 

251 domain_counts[domain] = domain_counts.get(domain, 0) + 1 

252 

253 # Track research IDs for each domain 

254 if domain not in domain_researches: 

255 domain_researches[domain] = set() 

256 domain_researches[domain].add(resource.research_id) 

257 

258 # Track temporal data (daily counts) 

259 if resource.created_at: 259 ↛ 268line 259 didn't jump to line 268 because the condition on line 259 was always true

260 date_str = resource.created_at[ 

261 :10 

262 ] # Extract YYYY-MM-DD 

263 temporal_data[date_str] = ( 

264 temporal_data.get(date_str, 0) + 1 

265 ) 

266 

267 # Count categories from pre-loaded classifications (no N+1) 

268 classification = domain_classifications_map.get(domain) 

269 if classification: 

270 category = classification.category 

271 category_counts[category] = ( 

272 category_counts.get(category, 0) + 1 

273 ) 

274 else: 

275 category_counts["Unclassified"] = ( 

276 category_counts.get("Unclassified", 0) + 1 

277 ) 

278 

279 # Track source type from metadata if available 

280 if resource.source_type: 

281 source_types[resource.source_type] = ( 

282 source_types.get(resource.source_type, 0) + 1 

283 ) 

284 

285 # Track quality metrics 

286 quality_metrics["total"] += 1 

287 if resource.title: 

288 quality_metrics["with_title"] += 1 

289 if resource.content_preview: 

290 quality_metrics["with_preview"] += 1 

291 if resource.title and resource.content_preview: 

292 quality_metrics["with_both"] += 1 

293 

294 # Track domain co-occurrences for network visualization 

295 research_id = resource.research_id 

296 if research_id not in domain_connections: 

297 domain_connections[research_id] = [] 

298 domain_connections[research_id].append(domain) 

299 

300 except Exception: 

301 logger.warning(f"Error parsing URL {resource.url}") 

302 

303 # Sort domains by count and get top 10 

304 sorted_domains = sorted( 

305 domain_counts.items(), key=lambda x: x[1], reverse=True 

306 ) 

307 top_10_domains = sorted_domains[:10] 

308 

309 # Calculate domain distribution (top domains vs others) 

310 top_10_count = sum(count for _, count in top_10_domains) 

311 others_count = len(resources) - top_10_count 

312 

313 # Get unique research IDs to calculate average 

314 unique_research_ids = {r.research_id for r in resources} 

315 avg_links = ( 

316 len(resources) / len(unique_research_ids) 

317 if unique_research_ids 

318 else 0 

319 ) 

320 

321 # Prepare temporal trend data (sorted by date) 

322 temporal_trend = sorted( 

323 [ 

324 {"date": date, "count": count} 

325 for date, count in temporal_data.items() 

326 ], 

327 key=lambda x: x["date"], 

328 ) 

329 

330 # Get most recent research for each top domain and classifications 

331 domain_recent_research = {} 

332 # Build domain_classifications dict from pre-loaded data 

333 domain_classifications = { 

334 domain: { 

335 "category": classification.category, 

336 "subcategory": classification.subcategory, 

337 "confidence": classification.confidence, 

338 } 

339 for domain, classification in domain_classifications_map.items() 

340 } 

341 

342 # Batch-load research details for top domains (fix N+1 query) 

343 all_research_ids = [] 

344 domain_research_id_lists = {} 

345 for domain, _ in top_10_domains: 

346 if domain in domain_researches: 346 ↛ 345line 346 didn't jump to line 345 because the condition on line 346 was always true

347 ids = list(domain_researches[domain])[:3] 

348 domain_research_id_lists[domain] = ids 

349 all_research_ids.extend(ids) 

350 

351 research_by_id = {} 

352 if all_research_ids: 

353 researches = ( 

354 session.query(ResearchHistory) 

355 .filter(ResearchHistory.id.in_(all_research_ids)) 

356 .all() 

357 ) 

358 research_by_id = {r.id: r for r in researches} 

359 

360 for domain, ids in domain_research_id_lists.items(): 

361 domain_recent_research[domain] = [ 

362 { 

363 "id": r_id, 

364 "query": research_by_id[r_id].query[:50] 

365 if research_by_id.get(r_id) 

366 and research_by_id[r_id].query 

367 else "Research", 

368 } 

369 for r_id in ids 

370 if r_id in research_by_id 

371 ] 

372 

373 return { 

374 "link_analytics": { 

375 "top_domains": [ 

376 { 

377 "domain": domain, 

378 "count": count, 

379 "percentage": round( 

380 count / len(resources) * 100, 1 

381 ), 

382 "research_count": len( 

383 domain_researches.get(domain, set()) 

384 ), 

385 "recent_researches": domain_recent_research.get( 

386 domain, [] 

387 ), 

388 "classification": domain_classifications.get( 

389 domain, None 

390 ), 

391 } 

392 for domain, count in top_10_domains 

393 ], 

394 "total_unique_domains": len(domain_counts), 

395 "avg_links_per_research": round(avg_links, 1), 

396 "domain_distribution": { 

397 "top_10": top_10_count, 

398 "others": others_count, 

399 }, 

400 "source_type_analysis": source_types, 

401 "category_distribution": category_counts, 

402 # Generic pie chart data - use whatever LLM classifier outputs 

403 "domain_categories": category_counts, 

404 "total_links": len(resources), 

405 "total_researches": len(unique_research_ids), 

406 "temporal_trend": temporal_trend, 

407 "domain_metrics": { 

408 domain: { 

409 "usage_count": count, 

410 "usage_percentage": round( 

411 count / len(resources) * 100, 1 

412 ), 

413 "research_diversity": len( 

414 domain_researches.get(domain, set()) 

415 ), 

416 "frequency_rank": rank + 1, 

417 } 

418 for rank, (domain, count) in enumerate(top_10_domains) 

419 }, 

420 } 

421 } 

422 

423 except Exception: 

424 logger.exception("Error getting link analytics") 

425 return { 

426 "link_analytics": { 

427 "top_domains": [], 

428 "total_unique_domains": 0, 

429 "avg_links_per_research": 0, 

430 "domain_distribution": {}, 

431 "source_type_analysis": {}, 

432 "academic_vs_general": {}, 

433 "total_links": 0, 

434 "error": "Failed to retrieve link analytics", 

435 } 

436 } 

437 

438 

439def get_strategy_analytics(period="30d", username=None): 

440 """Get strategy usage analytics for the specified period.""" 

441 try: 

442 if not username: 

443 username = flask_session.get("username") 

444 

445 if not username: 

446 return { 

447 "strategy_analytics": { 

448 "total_research_with_strategy": 0, 

449 "total_research": 0, 

450 "most_popular_strategy": None, 

451 "strategy_usage": [], 

452 "strategy_distribution": {}, 

453 "available_strategies": get_available_strategies(), 

454 "error": "No user session", 

455 } 

456 } 

457 

458 # Calculate date range 

459 days = get_period_days(period) 

460 

461 with get_user_db_session(username) as session: 

462 # Check if we have any ResearchStrategy records 

463 strategy_count = session.query(ResearchStrategy).count() 

464 

465 if strategy_count == 0: 

466 logger.warning("No research strategies found in database") 

467 return { 

468 "strategy_analytics": { 

469 "total_research_with_strategy": 0, 

470 "total_research": 0, 

471 "most_popular_strategy": None, 

472 "strategy_usage": [], 

473 "strategy_distribution": {}, 

474 "available_strategies": get_available_strategies(), 

475 "message": "Strategy tracking not yet available - run a research to start tracking", 

476 } 

477 } 

478 

479 # Base query for strategy usage (no JOIN needed since we just want strategy counts) 

480 query = session.query( 

481 ResearchStrategy.strategy_name, 

482 func.count(ResearchStrategy.id).label("usage_count"), 

483 ) 

484 

485 # Apply time filter if specified 

486 if days: 

487 cutoff_date = datetime.now(UTC) - timedelta(days=days) 

488 query = query.filter(ResearchStrategy.created_at >= cutoff_date) 

489 

490 # Group by strategy and order by usage 

491 strategy_results = ( 

492 query.group_by(ResearchStrategy.strategy_name) 

493 .order_by(func.count(ResearchStrategy.id).desc()) 

494 .all() 

495 ) 

496 

497 # Get total strategy count for percentage calculation 

498 total_query = session.query(ResearchStrategy) 

499 if days: 

500 total_query = total_query.filter( 

501 ResearchStrategy.created_at >= cutoff_date 

502 ) 

503 total_research = total_query.count() 

504 

505 # Format strategy data 

506 strategy_usage = [] 

507 strategy_distribution = {} 

508 

509 for strategy_name, usage_count in strategy_results: 

510 percentage = ( 

511 (usage_count / total_research * 100) 

512 if total_research > 0 

513 else 0 

514 ) 

515 strategy_usage.append( 

516 { 

517 "strategy": strategy_name, 

518 "count": usage_count, 

519 "percentage": round(percentage, 1), 

520 } 

521 ) 

522 strategy_distribution[strategy_name] = usage_count 

523 

524 # Find most popular strategy 

525 most_popular = ( 

526 strategy_usage[0]["strategy"] if strategy_usage else None 

527 ) 

528 

529 return { 

530 "strategy_analytics": { 

531 "total_research_with_strategy": sum( 

532 item["count"] for item in strategy_usage 

533 ), 

534 "total_research": total_research, 

535 "most_popular_strategy": most_popular, 

536 "strategy_usage": strategy_usage, 

537 "strategy_distribution": strategy_distribution, 

538 "available_strategies": get_available_strategies(), 

539 } 

540 } 

541 

542 except Exception: 

543 logger.exception("Error getting strategy analytics") 

544 return { 

545 "strategy_analytics": { 

546 "total_research_with_strategy": 0, 

547 "total_research": 0, 

548 "most_popular_strategy": None, 

549 "strategy_usage": [], 

550 "strategy_distribution": {}, 

551 "available_strategies": get_available_strategies(), 

552 "error": "Failed to retrieve strategy data", 

553 } 

554 } 

555 

556 

557def get_rate_limiting_analytics(period="30d", username=None): 

558 """Get rate limiting analytics for the specified period.""" 

559 try: 

560 if not username: 

561 username = flask_session.get("username") 

562 

563 if not username: 

564 return { 

565 "rate_limiting": { 

566 "total_attempts": 0, 

567 "successful_attempts": 0, 

568 "failed_attempts": 0, 

569 "success_rate": 0, 

570 "rate_limit_events": 0, 

571 "avg_wait_time": 0, 

572 "avg_successful_wait": 0, 

573 "tracked_engines": 0, 

574 "engine_stats": [], 

575 "total_engines_tracked": 0, 

576 "healthy_engines": 0, 

577 "degraded_engines": 0, 

578 "poor_engines": 0, 

579 "error": "No user session", 

580 } 

581 } 

582 

583 # Calculate date range for timestamp filtering 

584 import time 

585 

586 if period == "7d": 

587 cutoff_time = time.time() - (7 * 24 * 3600) 

588 elif period == "30d": 

589 cutoff_time = time.time() - (30 * 24 * 3600) 

590 elif period == "3m": 

591 cutoff_time = time.time() - (90 * 24 * 3600) 

592 elif period == "1y": 

593 cutoff_time = time.time() - (365 * 24 * 3600) 

594 else: # all 

595 cutoff_time = 0 

596 

597 with get_user_db_session(username) as session: 

598 # Get rate limit attempts 

599 rate_limit_query = session.query(RateLimitAttempt) 

600 

601 # Apply time filter 

602 if cutoff_time > 0: 

603 rate_limit_query = rate_limit_query.filter( 

604 RateLimitAttempt.timestamp >= cutoff_time 

605 ) 

606 

607 # Get rate limit statistics 

608 total_attempts = rate_limit_query.count() 

609 successful_attempts = rate_limit_query.filter( 

610 RateLimitAttempt.success 

611 ).count() 

612 failed_attempts = total_attempts - successful_attempts 

613 

614 # Count rate limiting events (failures with RateLimitError) 

615 rate_limit_events = rate_limit_query.filter( 

616 ~RateLimitAttempt.success, 

617 RateLimitAttempt.error_type == "RateLimitError", 

618 ).count() 

619 

620 logger.info( 

621 f"Rate limit attempts in database: total={total_attempts}, successful={successful_attempts}" 

622 ) 

623 

624 # Get all attempts for detailed calculations 

625 attempts = rate_limit_query.all() 

626 

627 # Calculate average wait times 

628 if attempts: 

629 avg_wait_time = sum(a.wait_time for a in attempts) / len( 

630 attempts 

631 ) 

632 successful_wait_times = [ 

633 a.wait_time for a in attempts if a.success 

634 ] 

635 avg_successful_wait = ( 

636 sum(successful_wait_times) / len(successful_wait_times) 

637 if successful_wait_times 

638 else 0 

639 ) 

640 else: 

641 avg_wait_time = 0 

642 avg_successful_wait = 0 

643 

644 # Get tracked engines - count distinct engine types from attempts 

645 tracked_engines_query = session.query( 

646 func.count(func.distinct(RateLimitAttempt.engine_type)) 

647 ) 

648 if cutoff_time > 0: 

649 tracked_engines_query = tracked_engines_query.filter( 

650 RateLimitAttempt.timestamp >= cutoff_time 

651 ) 

652 tracked_engines = tracked_engines_query.scalar() or 0 

653 

654 # Get engine-specific stats from attempts 

655 engine_stats = [] 

656 

657 # Get distinct engine types from attempts 

658 engine_types_query = session.query( 

659 RateLimitAttempt.engine_type 

660 ).distinct() 

661 if cutoff_time > 0: 

662 engine_types_query = engine_types_query.filter( 

663 RateLimitAttempt.timestamp >= cutoff_time 

664 ) 

665 engine_types = [row.engine_type for row in engine_types_query.all()] 

666 

667 # Preload estimates for relevant engines to avoid N+1 queries 

668 estimates_by_engine = {} 

669 if engine_types: 

670 all_estimates = ( 

671 session.query(RateLimitEstimate) 

672 .filter(RateLimitEstimate.engine_type.in_(engine_types)) 

673 .all() 

674 ) 

675 estimates_by_engine = {e.engine_type: e for e in all_estimates} 

676 

677 for engine_type in engine_types: 

678 engine_attempts_list = [ 

679 a for a in attempts if a.engine_type == engine_type 

680 ] 

681 engine_attempts = len(engine_attempts_list) 

682 engine_success = len( 

683 [a for a in engine_attempts_list if a.success] 

684 ) 

685 

686 # Get estimate from preloaded dict 

687 estimate = estimates_by_engine.get(engine_type) 

688 

689 # Calculate recent success rate 

690 recent_success_rate = ( 

691 (engine_success / engine_attempts * 100) 

692 if engine_attempts > 0 

693 else 0 

694 ) 

695 

696 # Determine status based on success rate 

697 if estimate: 

698 status = ( 

699 "healthy" 

700 if estimate.success_rate > 0.8 

701 else "degraded" 

702 if estimate.success_rate > 0.5 

703 else "poor" 

704 ) 

705 else: 

706 status = ( 

707 "healthy" 

708 if recent_success_rate > 80 

709 else "degraded" 

710 if recent_success_rate > 50 

711 else "poor" 

712 ) 

713 

714 engine_stat = { 

715 "engine": engine_type, 

716 "base_wait": estimate.base_wait_seconds 

717 if estimate 

718 else 0.0, 

719 "base_wait_seconds": round( 

720 estimate.base_wait_seconds if estimate else 0.0, 2 

721 ), 

722 "min_wait_seconds": round( 

723 estimate.min_wait_seconds if estimate else 0.0, 2 

724 ), 

725 "max_wait_seconds": round( 

726 estimate.max_wait_seconds if estimate else 0.0, 2 

727 ), 

728 "success_rate": round(estimate.success_rate * 100, 1) 

729 if estimate 

730 else recent_success_rate, 

731 "total_attempts": estimate.total_attempts 

732 if estimate 

733 else engine_attempts, 

734 "recent_attempts": engine_attempts, 

735 "recent_success_rate": round(recent_success_rate, 1), 

736 "attempts": engine_attempts, 

737 "status": status, 

738 } 

739 

740 if estimate: 

741 from datetime import datetime 

742 

743 engine_stat["last_updated"] = datetime.fromtimestamp( 

744 estimate.last_updated, UTC 

745 ).isoformat() # ISO format already includes timezone 

746 else: 

747 engine_stat["last_updated"] = "Never" 

748 

749 engine_stats.append(engine_stat) 

750 

751 logger.info( 

752 f"Tracked engines: {tracked_engines}, engine_stats: {engine_stats}" 

753 ) 

754 

755 result = { 

756 "rate_limiting": { 

757 "total_attempts": total_attempts, 

758 "successful_attempts": successful_attempts, 

759 "failed_attempts": failed_attempts, 

760 "success_rate": (successful_attempts / total_attempts * 100) 

761 if total_attempts > 0 

762 else 0, 

763 "rate_limit_events": rate_limit_events, 

764 "avg_wait_time": round(float(avg_wait_time), 2), 

765 "avg_successful_wait": round(float(avg_successful_wait), 2), 

766 "tracked_engines": tracked_engines, 

767 "engine_stats": engine_stats, 

768 "total_engines_tracked": tracked_engines, 

769 "healthy_engines": len( 

770 [s for s in engine_stats if s["status"] == "healthy"] 

771 ), 

772 "degraded_engines": len( 

773 [s for s in engine_stats if s["status"] == "degraded"] 

774 ), 

775 "poor_engines": len( 

776 [s for s in engine_stats if s["status"] == "poor"] 

777 ), 

778 } 

779 } 

780 

781 logger.info( 

782 f"DEBUG: Returning rate_limiting_analytics result: {result}" 

783 ) 

784 return result 

785 

786 except Exception: 

787 logger.exception("Error getting rate limiting analytics") 

788 return { 

789 "rate_limiting": { 

790 "total_attempts": 0, 

791 "successful_attempts": 0, 

792 "failed_attempts": 0, 

793 "success_rate": 0, 

794 "rate_limit_events": 0, 

795 "avg_wait_time": 0, 

796 "avg_successful_wait": 0, 

797 "tracked_engines": 0, 

798 "engine_stats": [], 

799 "total_engines_tracked": 0, 

800 "healthy_engines": 0, 

801 "degraded_engines": 0, 

802 "poor_engines": 0, 

803 "error": "An internal error occurred while processing the request.", 

804 } 

805 } 

806 

807 

808@metrics_bp.route("/") 

809@login_required 

810def metrics_dashboard(): 

811 """Render the metrics dashboard page.""" 

812 return render_template_with_defaults("pages/metrics.html") 

813 

814 

815@metrics_bp.route("/context-overflow") 

816@login_required 

817def context_overflow_page(): 

818 """Context overflow analytics page.""" 

819 return render_template_with_defaults("pages/context_overflow.html") 

820 

821 

822@metrics_bp.route("/api/metrics") 

823@login_required 

824def api_metrics(): 

825 """Get overall metrics data.""" 

826 logger.debug("api_metrics endpoint called") 

827 try: 

828 # Get username from session 

829 username = flask_session["username"] 

830 

831 # Get time period and research mode from query parameters 

832 period = request.args.get("period", "30d") 

833 research_mode = request.args.get("mode", "all") 

834 

835 token_counter = TokenCounter() 

836 search_tracker = get_search_tracker() 

837 

838 # Get both token and search metrics 

839 token_metrics = token_counter.get_overall_metrics( 

840 period=period, research_mode=research_mode 

841 ) 

842 search_metrics = search_tracker.get_search_metrics( 

843 period=period, 

844 research_mode=research_mode, 

845 username=username, 

846 ) 

847 

848 # Get user satisfaction rating data 

849 try: 

850 with get_user_db_session(username) as session: 

851 # Build base query with time filter 

852 ratings_query = session.query(ResearchRating) 

853 time_condition = get_time_filter_condition( 

854 period, ResearchRating.created_at 

855 ) 

856 if time_condition is not None: 

857 ratings_query = ratings_query.filter(time_condition) 

858 

859 # Get average rating 

860 avg_rating = ratings_query.with_entities( 

861 func.avg(ResearchRating.rating).label("avg_rating") 

862 ).scalar() 

863 

864 # Get total rating count 

865 total_ratings = ratings_query.count() 

866 

867 user_satisfaction = { 

868 "avg_rating": round(avg_rating, 1) if avg_rating else None, 

869 "total_ratings": total_ratings, 

870 } 

871 except Exception: 

872 logger.warning("Error getting user satisfaction data") 

873 user_satisfaction = {"avg_rating": None, "total_ratings": 0} 

874 

875 # Get strategy analytics 

876 strategy_data = get_strategy_analytics(period, username) 

877 logger.debug(f"strategy_data keys: {list(strategy_data.keys())}") 

878 

879 # Get rate limiting analytics 

880 rate_limiting_data = get_rate_limiting_analytics(period, username) 

881 logger.debug(f"rate_limiting_data: {rate_limiting_data}") 

882 logger.debug( 

883 f"rate_limiting_data keys: {list(rate_limiting_data.keys())}" 

884 ) 

885 

886 # Combine metrics 

887 combined_metrics = { 

888 **token_metrics, 

889 **search_metrics, 

890 **strategy_data, 

891 **rate_limiting_data, 

892 "user_satisfaction": user_satisfaction, 

893 } 

894 

895 logger.debug(f"combined_metrics keys: {list(combined_metrics.keys())}") 

896 logger.debug( 

897 f"combined_metrics['rate_limiting']: {combined_metrics.get('rate_limiting', 'NOT FOUND')}" 

898 ) 

899 

900 return jsonify( 

901 { 

902 "status": "success", 

903 "metrics": combined_metrics, 

904 "period": period, 

905 "research_mode": research_mode, 

906 } 

907 ) 

908 except Exception: 

909 logger.exception("Error getting metrics") 

910 return ( 

911 jsonify( 

912 { 

913 "status": "error", 

914 "message": "An internal error occurred. Please try again later.", 

915 } 

916 ), 

917 500, 

918 ) 

919 

920 

921@metrics_bp.route("/api/rate-limiting") 

922@login_required 

923def api_rate_limiting_metrics(): 

924 """Get detailed rate limiting metrics.""" 

925 logger.info("DEBUG: api_rate_limiting_metrics endpoint called") 

926 try: 

927 username = flask_session["username"] 

928 period = request.args.get("period", "30d") 

929 rate_limiting_data = get_rate_limiting_analytics(period, username) 

930 

931 return jsonify( 

932 {"status": "success", "data": rate_limiting_data, "period": period} 

933 ) 

934 except Exception: 

935 logger.exception("Error getting rate limiting metrics") 

936 return jsonify( 

937 { 

938 "status": "error", 

939 "message": "Failed to retrieve rate limiting metrics", 

940 } 

941 ), 500 

942 

943 

944@metrics_bp.route("/api/rate-limiting/current") 

945@login_required 

946def api_current_rate_limits(): 

947 """Get current rate limit estimates for all engines.""" 

948 try: 

949 tracker = get_tracker() 

950 stats = tracker.get_stats() 

951 

952 current_limits = [] 

953 for stat in stats: 

954 ( 

955 engine_type, 

956 base_wait, 

957 min_wait, 

958 max_wait, 

959 last_updated, 

960 total_attempts, 

961 success_rate, 

962 ) = stat 

963 current_limits.append( 

964 { 

965 "engine_type": engine_type, 

966 "base_wait_seconds": round(base_wait, 2), 

967 "min_wait_seconds": round(min_wait, 2), 

968 "max_wait_seconds": round(max_wait, 2), 

969 "success_rate": round(success_rate * 100, 1), 

970 "total_attempts": total_attempts, 

971 "last_updated": datetime.fromtimestamp( 

972 last_updated, UTC 

973 ).isoformat(), # ISO format already includes timezone 

974 "status": "healthy" 

975 if success_rate > 0.8 

976 else "degraded" 

977 if success_rate > 0.5 

978 else "poor", 

979 } 

980 ) 

981 

982 return jsonify( 

983 { 

984 "status": "success", 

985 "current_limits": current_limits, 

986 "timestamp": datetime.now(UTC).isoformat(), 

987 } 

988 ) 

989 except Exception: 

990 logger.exception("Error getting current rate limits") 

991 return jsonify( 

992 { 

993 "status": "error", 

994 "message": "Failed to retrieve current rate limits", 

995 } 

996 ), 500 

997 

998 

999@metrics_bp.route("/api/metrics/research/<string:research_id>/links") 

1000@login_required 

1001def api_research_link_metrics(research_id): 

1002 """Get link analytics for a specific research.""" 

1003 try: 

1004 username = flask_session["username"] 

1005 

1006 with get_user_db_session(username) as session: 

1007 # Get all resources for this specific research 

1008 resources = ( 

1009 session.query(ResearchResource) 

1010 .filter(ResearchResource.research_id == research_id) 

1011 .all() 

1012 ) 

1013 

1014 if not resources: 

1015 return jsonify( 

1016 { 

1017 "status": "success", 

1018 "data": { 

1019 "total_links": 0, 

1020 "unique_domains": 0, 

1021 "domains": [], 

1022 "category_distribution": {}, 

1023 "domain_categories": {}, 

1024 "resources": [], 

1025 }, 

1026 } 

1027 ) 

1028 

1029 # Extract domain information 

1030 domain_counts: dict[str, Any] = {} 

1031 

1032 # Generic category counting from LLM classifications 

1033 category_counts: dict[str, Any] = {} 

1034 

1035 # First pass: collect all domains 

1036 all_domains = set() 

1037 for resource in resources: 

1038 if resource.url: 1038 ↛ 1037line 1038 didn't jump to line 1037 because the condition on line 1038 was always true

1039 domain = _extract_domain(resource.url) 

1040 if domain: 1040 ↛ 1037line 1040 didn't jump to line 1037 because the condition on line 1040 was always true

1041 all_domains.add(domain) 

1042 

1043 # Batch load all domain classifications in one query (fix N+1) 

1044 domain_classifications_map = {} 

1045 if all_domains: 1045 ↛ 1057line 1045 didn't jump to line 1057 because the condition on line 1045 was always true

1046 all_classifications = ( 

1047 session.query(DomainClassification) 

1048 .filter(DomainClassification.domain.in_(all_domains)) 

1049 .all() 

1050 ) 

1051 for classification in all_classifications: 

1052 domain_classifications_map[classification.domain] = ( 

1053 classification 

1054 ) 

1055 

1056 # Second pass: process resources with pre-loaded classifications 

1057 for resource in resources: 

1058 if resource.url: 1058 ↛ 1057line 1058 didn't jump to line 1057 because the condition on line 1058 was always true

1059 try: 

1060 domain = _extract_domain(resource.url) 

1061 if not domain: 1061 ↛ 1062line 1061 didn't jump to line 1062 because the condition on line 1061 was never true

1062 continue 

1063 

1064 domain_counts[domain] = domain_counts.get(domain, 0) + 1 

1065 

1066 # Count categories from pre-loaded classifications (no N+1) 

1067 classification = domain_classifications_map.get(domain) 

1068 if classification: 

1069 category = classification.category 

1070 category_counts[category] = ( 

1071 category_counts.get(category, 0) + 1 

1072 ) 

1073 else: 

1074 category_counts["Unclassified"] = ( 

1075 category_counts.get("Unclassified", 0) + 1 

1076 ) 

1077 except (AttributeError, KeyError) as e: 

1078 logger.debug(f"Error classifying domain {domain}: {e}") 

1079 

1080 # Sort domains by count 

1081 sorted_domains = sorted( 

1082 domain_counts.items(), key=lambda x: x[1], reverse=True 

1083 ) 

1084 

1085 return jsonify( 

1086 { 

1087 "status": "success", 

1088 "data": { 

1089 "total_links": len(resources), 

1090 "unique_domains": len(domain_counts), 

1091 "domains": [ 

1092 { 

1093 "domain": domain, 

1094 "count": count, 

1095 "percentage": round( 

1096 count / len(resources) * 100, 1 

1097 ), 

1098 } 

1099 for domain, count in sorted_domains[ 

1100 :20 

1101 ] # Top 20 domains 

1102 ], 

1103 "category_distribution": category_counts, 

1104 "domain_categories": category_counts, # Generic categories from LLM 

1105 "resources": [ 

1106 { 

1107 "title": r.title or "Untitled", 

1108 "url": r.url, 

1109 "preview": r.content_preview[:200] 

1110 if r.content_preview 

1111 else None, 

1112 } 

1113 for r in resources[:10] # First 10 resources 

1114 ], 

1115 }, 

1116 } 

1117 ) 

1118 

1119 except Exception: 

1120 logger.exception("Error getting research link metrics") 

1121 return jsonify( 

1122 {"status": "error", "message": "Failed to retrieve link metrics"} 

1123 ), 500 

1124 

1125 

1126@metrics_bp.route("/api/metrics/research/<string:research_id>") 

1127@login_required 

1128def api_research_metrics(research_id): 

1129 """Get metrics for a specific research.""" 

1130 try: 

1131 token_counter = TokenCounter() 

1132 metrics = token_counter.get_research_metrics(research_id) 

1133 return jsonify({"status": "success", "metrics": metrics}) 

1134 except Exception: 

1135 logger.exception("Error getting research metrics") 

1136 return ( 

1137 jsonify( 

1138 { 

1139 "status": "error", 

1140 "message": "An internal error occurred. Please try again later.", 

1141 } 

1142 ), 

1143 500, 

1144 ) 

1145 

1146 

1147@metrics_bp.route("/api/metrics/research/<string:research_id>/timeline") 

1148@login_required 

1149def api_research_timeline_metrics(research_id): 

1150 """Get timeline metrics for a specific research.""" 

1151 try: 

1152 token_counter = TokenCounter() 

1153 timeline_metrics = token_counter.get_research_timeline_metrics( 

1154 research_id 

1155 ) 

1156 return jsonify({"status": "success", "metrics": timeline_metrics}) 

1157 except Exception: 

1158 logger.exception("Error getting research timeline metrics") 

1159 return ( 

1160 jsonify( 

1161 { 

1162 "status": "error", 

1163 "message": "An internal error occurred. Please try again later.", 

1164 } 

1165 ), 

1166 500, 

1167 ) 

1168 

1169 

1170@metrics_bp.route("/api/metrics/research/<string:research_id>/search") 

1171@login_required 

1172def api_research_search_metrics(research_id): 

1173 """Get search metrics for a specific research.""" 

1174 try: 

1175 username = flask_session["username"] 

1176 search_tracker = get_search_tracker() 

1177 search_metrics = search_tracker.get_research_search_metrics( 

1178 research_id, username=username 

1179 ) 

1180 return jsonify({"status": "success", "metrics": search_metrics}) 

1181 except Exception: 

1182 logger.exception("Error getting research search metrics") 

1183 return ( 

1184 jsonify( 

1185 { 

1186 "status": "error", 

1187 "message": "An internal error occurred. Please try again later.", 

1188 } 

1189 ), 

1190 500, 

1191 ) 

1192 

1193 

1194@metrics_bp.route("/api/metrics/enhanced") 

1195@login_required 

1196def api_enhanced_metrics(): 

1197 """Get enhanced Phase 1 tracking metrics.""" 

1198 try: 

1199 # Get time period and research mode from query parameters 

1200 period = request.args.get("period", "30d") 

1201 research_mode = request.args.get("mode", "all") 

1202 username = flask_session["username"] 

1203 

1204 token_counter = TokenCounter() 

1205 search_tracker = get_search_tracker() 

1206 

1207 enhanced_metrics = token_counter.get_enhanced_metrics( 

1208 period=period, research_mode=research_mode 

1209 ) 

1210 

1211 # Add search time series data for the chart 

1212 search_time_series = search_tracker.get_search_time_series( 

1213 period=period, 

1214 research_mode=research_mode, 

1215 username=username, 

1216 ) 

1217 enhanced_metrics["search_time_series"] = search_time_series 

1218 

1219 # Add rating analytics 

1220 rating_analytics = get_rating_analytics(period, research_mode, username) 

1221 enhanced_metrics.update(rating_analytics) 

1222 

1223 return jsonify( 

1224 { 

1225 "status": "success", 

1226 "metrics": enhanced_metrics, 

1227 "period": period, 

1228 "research_mode": research_mode, 

1229 } 

1230 ) 

1231 except Exception: 

1232 logger.exception("Error getting enhanced metrics") 

1233 return ( 

1234 jsonify( 

1235 { 

1236 "status": "error", 

1237 "message": "An internal error occurred. Please try again later.", 

1238 } 

1239 ), 

1240 500, 

1241 ) 

1242 

1243 

1244@metrics_bp.route("/api/ratings/<string:research_id>", methods=["GET"]) 

1245@login_required 

1246def api_get_research_rating(research_id): 

1247 """Get rating for a specific research session.""" 

1248 try: 

1249 username = flask_session["username"] 

1250 

1251 with get_user_db_session(username) as session: 

1252 rating = ( 

1253 session.query(ResearchRating) 

1254 .filter_by(research_id=research_id) 

1255 .first() 

1256 ) 

1257 

1258 if rating: 

1259 return jsonify( 

1260 { 

1261 "status": "success", 

1262 "rating": rating.rating, 

1263 "created_at": rating.created_at.isoformat(), 

1264 "updated_at": rating.updated_at.isoformat(), 

1265 } 

1266 ) 

1267 return jsonify({"status": "success", "rating": None}) 

1268 

1269 except Exception: 

1270 logger.exception("Error getting research rating") 

1271 return ( 

1272 jsonify( 

1273 { 

1274 "status": "error", 

1275 "message": "An internal error occurred. Please try again later.", 

1276 } 

1277 ), 

1278 500, 

1279 ) 

1280 

1281 

1282@metrics_bp.route("/api/ratings/<string:research_id>", methods=["POST"]) 

1283@login_required 

1284@require_json_body(error_format="status") 

1285def api_save_research_rating(research_id): 

1286 """Save or update rating for a specific research session.""" 

1287 try: 

1288 username = flask_session["username"] 

1289 

1290 data = request.get_json() 

1291 rating_value = data.get("rating") 

1292 

1293 if ( 

1294 not rating_value 

1295 or not isinstance(rating_value, int) 

1296 or rating_value < 1 

1297 or rating_value > 5 

1298 ): 

1299 return ( 

1300 jsonify( 

1301 { 

1302 "status": "error", 

1303 "message": "Rating must be an integer between 1 and 5", 

1304 } 

1305 ), 

1306 400, 

1307 ) 

1308 

1309 with get_user_db_session(username) as session: 

1310 # Check if rating already exists 

1311 existing_rating = ( 

1312 session.query(ResearchRating) 

1313 .filter_by(research_id=research_id) 

1314 .first() 

1315 ) 

1316 

1317 if existing_rating: 

1318 # Update existing rating 

1319 existing_rating.rating = rating_value 

1320 existing_rating.updated_at = func.now() 

1321 else: 

1322 # Create new rating 

1323 new_rating = ResearchRating( 

1324 research_id=research_id, rating=rating_value 

1325 ) 

1326 session.add(new_rating) 

1327 

1328 session.commit() 

1329 

1330 return jsonify( 

1331 { 

1332 "status": "success", 

1333 "message": "Rating saved successfully", 

1334 "rating": rating_value, 

1335 } 

1336 ) 

1337 

1338 except Exception: 

1339 logger.exception("Error saving research rating") 

1340 return ( 

1341 jsonify( 

1342 { 

1343 "status": "error", 

1344 "message": "An internal error occurred. Please try again later.", 

1345 } 

1346 ), 

1347 500, 

1348 ) 

1349 

1350 

1351@metrics_bp.route("/star-reviews") 

1352@login_required 

1353def star_reviews(): 

1354 """Display star reviews metrics page.""" 

1355 return render_template_with_defaults("pages/star_reviews.html") 

1356 

1357 

1358@metrics_bp.route("/costs") 

1359@login_required 

1360def cost_analytics(): 

1361 """Display cost analytics page.""" 

1362 return render_template_with_defaults("pages/cost_analytics.html") 

1363 

1364 

1365@metrics_bp.route("/api/star-reviews") 

1366@login_required 

1367def api_star_reviews(): 

1368 """Get star reviews analytics data.""" 

1369 try: 

1370 username = flask_session["username"] 

1371 

1372 period = request.args.get("period", "30d") 

1373 

1374 with get_user_db_session(username) as session: 

1375 # Build base query with time filter 

1376 base_query = session.query(ResearchRating) 

1377 time_condition = get_time_filter_condition( 

1378 period, ResearchRating.created_at 

1379 ) 

1380 if time_condition is not None: 

1381 base_query = base_query.filter(time_condition) 

1382 

1383 # Overall rating statistics 

1384 overall_stats = session.query( 

1385 func.avg(ResearchRating.rating).label("avg_rating"), 

1386 func.count(ResearchRating.rating).label("total_ratings"), 

1387 func.sum(case((ResearchRating.rating == 5, 1), else_=0)).label( 

1388 "five_star" 

1389 ), 

1390 func.sum(case((ResearchRating.rating == 4, 1), else_=0)).label( 

1391 "four_star" 

1392 ), 

1393 func.sum(case((ResearchRating.rating == 3, 1), else_=0)).label( 

1394 "three_star" 

1395 ), 

1396 func.sum(case((ResearchRating.rating == 2, 1), else_=0)).label( 

1397 "two_star" 

1398 ), 

1399 func.sum(case((ResearchRating.rating == 1, 1), else_=0)).label( 

1400 "one_star" 

1401 ), 

1402 ) 

1403 

1404 if time_condition is not None: 

1405 overall_stats = overall_stats.filter(time_condition) 

1406 

1407 overall_stats = overall_stats.first() 

1408 

1409 # Ratings by LLM model (get from token_usage since Research doesn't have model field) 

1410 llm_ratings_query = session.query( 

1411 func.coalesce(TokenUsage.model_name, "Unknown").label("model"), 

1412 func.avg(ResearchRating.rating).label("avg_rating"), 

1413 func.count(ResearchRating.rating).label("rating_count"), 

1414 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label( 

1415 "positive_ratings" 

1416 ), 

1417 ).outerjoin( 

1418 TokenUsage, ResearchRating.research_id == TokenUsage.research_id 

1419 ) 

1420 

1421 if time_condition is not None: 

1422 llm_ratings_query = llm_ratings_query.filter(time_condition) 

1423 

1424 llm_ratings = ( 

1425 llm_ratings_query.group_by(TokenUsage.model_name) 

1426 .order_by(func.avg(ResearchRating.rating).desc()) 

1427 .all() 

1428 ) 

1429 

1430 # Ratings by search engine (join with token_usage to get search engine info) 

1431 search_engine_ratings_query = session.query( 

1432 func.coalesce( 

1433 TokenUsage.search_engine_selected, "Unknown" 

1434 ).label("search_engine"), 

1435 func.avg(ResearchRating.rating).label("avg_rating"), 

1436 func.count(ResearchRating.rating).label("rating_count"), 

1437 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label( 

1438 "positive_ratings" 

1439 ), 

1440 ).outerjoin( 

1441 TokenUsage, ResearchRating.research_id == TokenUsage.research_id 

1442 ) 

1443 

1444 if time_condition is not None: 

1445 search_engine_ratings_query = ( 

1446 search_engine_ratings_query.filter(time_condition) 

1447 ) 

1448 

1449 search_engine_ratings = ( 

1450 search_engine_ratings_query.group_by( 

1451 TokenUsage.search_engine_selected 

1452 ) 

1453 .having(func.count(ResearchRating.rating) > 0) 

1454 .order_by(func.avg(ResearchRating.rating).desc()) 

1455 .all() 

1456 ) 

1457 

1458 # Rating trends over time 

1459 rating_trends_query = session.query( 

1460 func.date(ResearchRating.created_at).label("date"), 

1461 func.avg(ResearchRating.rating).label("avg_rating"), 

1462 func.count(ResearchRating.rating).label("daily_count"), 

1463 ) 

1464 

1465 if time_condition is not None: 

1466 rating_trends_query = rating_trends_query.filter(time_condition) 

1467 

1468 rating_trends = ( 

1469 rating_trends_query.group_by( 

1470 func.date(ResearchRating.created_at) 

1471 ) 

1472 .order_by("date") 

1473 .all() 

1474 ) 

1475 

1476 # Recent ratings with research details 

1477 recent_ratings_query = ( 

1478 session.query( 

1479 ResearchRating.rating, 

1480 ResearchRating.created_at, 

1481 ResearchRating.research_id, 

1482 Research.query, 

1483 Research.mode, 

1484 TokenUsage.model_name, 

1485 Research.created_at, 

1486 ) 

1487 .outerjoin(Research, ResearchRating.research_id == Research.id) 

1488 .outerjoin( 

1489 TokenUsage, 

1490 ResearchRating.research_id == TokenUsage.research_id, 

1491 ) 

1492 ) 

1493 

1494 if time_condition is not None: 

1495 recent_ratings_query = recent_ratings_query.filter( 

1496 time_condition 

1497 ) 

1498 

1499 recent_ratings = ( 

1500 recent_ratings_query.order_by(ResearchRating.created_at.desc()) 

1501 .limit(20) 

1502 .all() 

1503 ) 

1504 

1505 return jsonify( 

1506 { 

1507 "overall_stats": { 

1508 "avg_rating": round(overall_stats.avg_rating or 0, 2), 

1509 "total_ratings": overall_stats.total_ratings or 0, 

1510 "rating_distribution": { 

1511 "5": overall_stats.five_star or 0, 

1512 "4": overall_stats.four_star or 0, 

1513 "3": overall_stats.three_star or 0, 

1514 "2": overall_stats.two_star or 0, 

1515 "1": overall_stats.one_star or 0, 

1516 }, 

1517 }, 

1518 "llm_ratings": [ 

1519 { 

1520 "model": rating.model, 

1521 "avg_rating": round(rating.avg_rating or 0, 2), 

1522 "rating_count": rating.rating_count or 0, 

1523 "positive_ratings": rating.positive_ratings or 0, 

1524 "satisfaction_rate": round( 

1525 (rating.positive_ratings or 0) 

1526 / max(rating.rating_count or 1, 1) 

1527 * 100, 

1528 1, 

1529 ), 

1530 } 

1531 for rating in llm_ratings 

1532 ], 

1533 "search_engine_ratings": [ 

1534 { 

1535 "search_engine": rating.search_engine, 

1536 "avg_rating": round(rating.avg_rating or 0, 2), 

1537 "rating_count": rating.rating_count or 0, 

1538 "positive_ratings": rating.positive_ratings or 0, 

1539 "satisfaction_rate": round( 

1540 (rating.positive_ratings or 0) 

1541 / max(rating.rating_count or 1, 1) 

1542 * 100, 

1543 1, 

1544 ), 

1545 } 

1546 for rating in search_engine_ratings 

1547 ], 

1548 "rating_trends": [ 

1549 { 

1550 "date": str(trend.date), 

1551 "avg_rating": round(trend.avg_rating or 0, 2), 

1552 "count": trend.daily_count or 0, 

1553 } 

1554 for trend in rating_trends 

1555 ], 

1556 "recent_ratings": [ 

1557 { 

1558 "rating": rating.rating, 

1559 "created_at": str(rating.created_at), 

1560 "research_id": rating.research_id, 

1561 "query": ( 

1562 rating.query 

1563 if rating.query 

1564 else f"Research Session #{rating.research_id}" 

1565 ), 

1566 "mode": rating.mode 

1567 if rating.mode 

1568 else "Standard Research", 

1569 "llm_model": ( 

1570 rating.model_name 

1571 if rating.model_name 

1572 else "LLM Model" 

1573 ), 

1574 } 

1575 for rating in recent_ratings 

1576 ], 

1577 } 

1578 ) 

1579 

1580 except Exception: 

1581 logger.exception("Error getting star reviews data") 

1582 return ( 

1583 jsonify( 

1584 {"error": "An internal error occurred. Please try again later."} 

1585 ), 

1586 500, 

1587 ) 

1588 

1589 

1590@metrics_bp.route("/api/pricing") 

1591@login_required 

1592def api_pricing(): 

1593 """Get current LLM pricing data.""" 

1594 try: 

1595 from ...metrics.pricing.pricing_fetcher import PricingFetcher 

1596 

1597 # Use static pricing data instead of async 

1598 fetcher = PricingFetcher() 

1599 pricing_data = fetcher.static_pricing 

1600 

1601 return jsonify( 

1602 { 

1603 "status": "success", 

1604 "pricing": pricing_data, 

1605 "last_updated": datetime.now(UTC).isoformat(), 

1606 "note": "Pricing data is from static configuration. Real-time APIs not available for most providers.", 

1607 } 

1608 ) 

1609 

1610 except Exception: 

1611 logger.exception("Error fetching pricing data") 

1612 return jsonify({"error": "Internal Server Error"}), 500 

1613 

1614 

1615@metrics_bp.route("/api/pricing/<model_name>") 

1616@login_required 

1617def api_model_pricing(model_name): 

1618 """Get pricing for a specific model.""" 

1619 try: 

1620 # Optional provider parameter 

1621 provider = request.args.get("provider") 

1622 

1623 from ...metrics.pricing.cost_calculator import CostCalculator 

1624 

1625 # Use synchronous approach with cached/static pricing 

1626 calculator = CostCalculator() 

1627 pricing = calculator.cache.get_model_pricing( 

1628 model_name 

1629 ) or calculator.calculate_cost_sync(model_name, 1000, 1000).get( 

1630 "pricing_used", {} 

1631 ) 

1632 

1633 return jsonify( 

1634 { 

1635 "status": "success", 

1636 "model": model_name, 

1637 "provider": provider, 

1638 "pricing": pricing, 

1639 "last_updated": datetime.now(UTC).isoformat(), 

1640 } 

1641 ) 

1642 

1643 except Exception: 

1644 logger.exception(f"Error getting pricing for model: {model_name}") 

1645 return jsonify({"error": "An internal error occurred"}), 500 

1646 

1647 

1648@metrics_bp.route("/api/cost-calculation", methods=["POST"]) 

1649@login_required 

1650@require_json_body(error_message="No data provided") 

1651def api_cost_calculation(): 

1652 """Calculate cost for token usage.""" 

1653 try: 

1654 data = request.get_json() 

1655 model_name = data.get("model_name") 

1656 provider = data.get("provider") # Optional provider parameter 

1657 prompt_tokens = data.get("prompt_tokens", 0) 

1658 completion_tokens = data.get("completion_tokens", 0) 

1659 

1660 if not model_name: 

1661 return jsonify({"error": "model_name is required"}), 400 

1662 

1663 from ...metrics.pricing.cost_calculator import CostCalculator 

1664 

1665 # Use synchronous cost calculation 

1666 calculator = CostCalculator() 

1667 cost_data = calculator.calculate_cost_sync( 

1668 model_name, prompt_tokens, completion_tokens 

1669 ) 

1670 

1671 return jsonify( 

1672 { 

1673 "status": "success", 

1674 "model_name": model_name, 

1675 "provider": provider, 

1676 "prompt_tokens": prompt_tokens, 

1677 "completion_tokens": completion_tokens, 

1678 "total_tokens": prompt_tokens + completion_tokens, 

1679 **cost_data, 

1680 } 

1681 ) 

1682 

1683 except Exception: 

1684 logger.exception("Error calculating cost") 

1685 return jsonify({"error": "An internal error occurred"}), 500 

1686 

1687 

1688@metrics_bp.route("/api/research-costs/<string:research_id>") 

1689@login_required 

1690def api_research_costs(research_id): 

1691 """Get cost analysis for a specific research session.""" 

1692 try: 

1693 username = flask_session["username"] 

1694 

1695 with get_user_db_session(username) as session: 

1696 # Get token usage records for this research 

1697 usage_records = ( 

1698 session.query(TokenUsage) 

1699 .filter(TokenUsage.research_id == research_id) 

1700 .all() 

1701 ) 

1702 

1703 if not usage_records: 

1704 return jsonify( 

1705 { 

1706 "status": "success", 

1707 "research_id": research_id, 

1708 "total_cost": 0.0, 

1709 "message": "No token usage data found for this research session", 

1710 } 

1711 ) 

1712 

1713 # Convert to dict format for cost calculation 

1714 usage_data = [] 

1715 for record in usage_records: 

1716 usage_data.append( 

1717 { 

1718 "model_name": record.model_name, 

1719 "provider": getattr( 

1720 record, "provider", None 

1721 ), # Handle both old and new records 

1722 "prompt_tokens": record.prompt_tokens, 

1723 "completion_tokens": record.completion_tokens, 

1724 "timestamp": record.timestamp, 

1725 } 

1726 ) 

1727 

1728 from ...metrics.pricing.cost_calculator import CostCalculator 

1729 

1730 # Use synchronous calculation for research costs 

1731 calculator = CostCalculator() 

1732 costs = [] 

1733 for record in usage_data: 

1734 cost_data = calculator.calculate_cost_sync( 

1735 record["model_name"], 

1736 record["prompt_tokens"], 

1737 record["completion_tokens"], 

1738 ) 

1739 costs.append({**record, **cost_data}) 

1740 

1741 total_cost = sum(c["total_cost"] for c in costs) 

1742 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data) 

1743 total_completion_tokens = sum( 

1744 r["completion_tokens"] for r in usage_data 

1745 ) 

1746 

1747 cost_summary = { 

1748 "total_cost": round(total_cost, 6), 

1749 "total_tokens": total_prompt_tokens + total_completion_tokens, 

1750 "prompt_tokens": total_prompt_tokens, 

1751 "completion_tokens": total_completion_tokens, 

1752 } 

1753 

1754 return jsonify( 

1755 { 

1756 "status": "success", 

1757 "research_id": research_id, 

1758 **cost_summary, 

1759 } 

1760 ) 

1761 

1762 except Exception: 

1763 logger.exception( 

1764 f"Error getting research costs for research: {research_id}" 

1765 ) 

1766 return jsonify({"error": "An internal error occurred"}), 500 

1767 

1768 

1769@metrics_bp.route("/api/cost-analytics") 

1770@login_required 

1771def api_cost_analytics(): 

1772 """Get cost analytics across all research sessions.""" 

1773 try: 

1774 username = flask_session["username"] 

1775 

1776 period = request.args.get("period", "30d") 

1777 

1778 with get_user_db_session(username) as session: 

1779 # Get token usage for the period 

1780 query = session.query(TokenUsage) 

1781 time_condition = get_time_filter_condition( 

1782 period, TokenUsage.timestamp 

1783 ) 

1784 if time_condition is not None: 

1785 query = query.filter(time_condition) 

1786 

1787 # First check if we have any records to avoid expensive queries 

1788 record_count = query.count() 

1789 

1790 if record_count == 0: 

1791 return jsonify( 

1792 { 

1793 "status": "success", 

1794 "period": period, 

1795 "overview": { 

1796 "total_cost": 0.0, 

1797 "total_tokens": 0, 

1798 "prompt_tokens": 0, 

1799 "completion_tokens": 0, 

1800 }, 

1801 "top_expensive_research": [], 

1802 "research_count": 0, 

1803 "message": "No token usage data found for this period", 

1804 } 

1805 ) 

1806 

1807 # If we have too many records, limit to recent ones to avoid timeout 

1808 if record_count > 1000: 

1809 logger.warning( 

1810 f"Large dataset detected ({record_count} records), limiting to recent 1000 for performance" 

1811 ) 

1812 usage_records = ( 

1813 query.order_by(TokenUsage.timestamp.desc()) 

1814 .limit(1000) 

1815 .all() 

1816 ) 

1817 else: 

1818 usage_records = query.all() 

1819 

1820 # Convert to dict format 

1821 usage_data = [] 

1822 for record in usage_records: 

1823 usage_data.append( 

1824 { 

1825 "model_name": record.model_name, 

1826 "provider": getattr( 

1827 record, "provider", None 

1828 ), # Handle both old and new records 

1829 "prompt_tokens": record.prompt_tokens, 

1830 "completion_tokens": record.completion_tokens, 

1831 "research_id": record.research_id, 

1832 "timestamp": record.timestamp, 

1833 } 

1834 ) 

1835 

1836 from ...metrics.pricing.cost_calculator import CostCalculator 

1837 

1838 # Use synchronous calculation 

1839 calculator = CostCalculator() 

1840 

1841 # Calculate overall costs 

1842 costs = [] 

1843 for record in usage_data: 

1844 cost_data = calculator.calculate_cost_sync( 

1845 record["model_name"], 

1846 record["prompt_tokens"], 

1847 record["completion_tokens"], 

1848 ) 

1849 costs.append({**record, **cost_data}) 

1850 

1851 total_cost = sum(c["total_cost"] for c in costs) 

1852 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data) 

1853 total_completion_tokens = sum( 

1854 r["completion_tokens"] for r in usage_data 

1855 ) 

1856 

1857 cost_summary = { 

1858 "total_cost": round(total_cost, 6), 

1859 "total_tokens": total_prompt_tokens + total_completion_tokens, 

1860 "prompt_tokens": total_prompt_tokens, 

1861 "completion_tokens": total_completion_tokens, 

1862 } 

1863 

1864 # Group by research_id for per-research costs 

1865 research_costs: dict[str, Any] = {} 

1866 for record in usage_data: 

1867 rid = record["research_id"] 

1868 if rid not in research_costs: 1868 ↛ 1870line 1868 didn't jump to line 1870 because the condition on line 1868 was always true

1869 research_costs[rid] = [] 

1870 research_costs[rid].append(record) 

1871 

1872 # Calculate cost per research 

1873 research_summaries = {} 

1874 for rid, records in research_costs.items(): 

1875 research_total: float = 0 

1876 for record in records: 

1877 cost_data = calculator.calculate_cost_sync( 

1878 record["model_name"], 

1879 record["prompt_tokens"], 

1880 record["completion_tokens"], 

1881 ) 

1882 research_total += cost_data["total_cost"] 

1883 research_summaries[rid] = { 

1884 "total_cost": round(research_total, 6) 

1885 } 

1886 

1887 # Top expensive research sessions 

1888 top_expensive = sorted( 

1889 [ 

1890 (rid, data["total_cost"]) 

1891 for rid, data in research_summaries.items() 

1892 ], 

1893 key=lambda x: x[1], 

1894 reverse=True, 

1895 )[:10] 

1896 

1897 return jsonify( 

1898 { 

1899 "status": "success", 

1900 "period": period, 

1901 "overview": cost_summary, 

1902 "top_expensive_research": [ 

1903 {"research_id": rid, "total_cost": cost} 

1904 for rid, cost in top_expensive 

1905 ], 

1906 "research_count": len(research_summaries), 

1907 } 

1908 ) 

1909 

1910 except Exception: 

1911 logger.exception("Error getting cost analytics") 

1912 # Return a more graceful error response 

1913 return ( 

1914 jsonify( 

1915 { 

1916 "status": "success", 

1917 "period": period, 

1918 "overview": { 

1919 "total_cost": 0.0, 

1920 "total_tokens": 0, 

1921 "prompt_tokens": 0, 

1922 "completion_tokens": 0, 

1923 }, 

1924 "top_expensive_research": [], 

1925 "research_count": 0, 

1926 "error": "Cost analytics temporarily unavailable", 

1927 } 

1928 ), 

1929 200, 

1930 ) # Return 200 to avoid breaking the UI 

1931 

1932 

1933@metrics_bp.route("/links") 

1934@login_required 

1935def link_analytics(): 

1936 """Display link analytics page.""" 

1937 return render_template_with_defaults("pages/link_analytics.html") 

1938 

1939 

1940@metrics_bp.route("/api/link-analytics") 

1941@login_required 

1942def api_link_analytics(): 

1943 """Get link analytics data.""" 

1944 try: 

1945 username = flask_session["username"] 

1946 

1947 period = request.args.get("period", "30d") 

1948 

1949 # Get link analytics data 

1950 link_data = get_link_analytics(period, username) 

1951 

1952 return jsonify( 

1953 { 

1954 "status": "success", 

1955 "data": link_data["link_analytics"], 

1956 "period": period, 

1957 } 

1958 ) 

1959 

1960 except Exception: 

1961 logger.exception("Error getting link analytics") 

1962 return ( 

1963 jsonify( 

1964 { 

1965 "status": "error", 

1966 "message": "An internal error occurred. Please try again later.", 

1967 } 

1968 ), 

1969 500, 

1970 ) 

1971 

1972 

1973@metrics_bp.route("/api/domain-classifications", methods=["GET"]) 

1974@login_required 

1975def api_get_domain_classifications(): 

1976 """Get all domain classifications.""" 

1977 classifier = None 

1978 try: 

1979 username = flask_session["username"] 

1980 

1981 classifier = DomainClassifier(username) 

1982 classifications = classifier.get_all_classifications() 

1983 

1984 return jsonify( 

1985 { 

1986 "status": "success", 

1987 "classifications": [c.to_dict() for c in classifications], 

1988 "total": len(classifications), 

1989 } 

1990 ) 

1991 

1992 except Exception: 

1993 logger.exception("Error getting domain classifications") 

1994 return jsonify( 

1995 {"status": "error", "message": "Failed to retrieve classifications"} 

1996 ), 500 

1997 finally: 

1998 if classifier is not None: 

1999 from ...utilities.resource_utils import safe_close 

2000 

2001 safe_close(classifier, "domain classifier") 

2002 

2003 

2004@metrics_bp.route("/api/domain-classifications/summary", methods=["GET"]) 

2005@login_required 

2006def api_get_classifications_summary(): 

2007 """Get summary of domain classifications by category.""" 

2008 classifier = None 

2009 try: 

2010 username = flask_session["username"] 

2011 

2012 classifier = DomainClassifier(username) 

2013 summary = classifier.get_categories_summary() 

2014 

2015 return jsonify({"status": "success", "summary": summary}) 

2016 

2017 except Exception: 

2018 logger.exception("Error getting classifications summary") 

2019 return jsonify( 

2020 {"status": "error", "message": "Failed to retrieve summary"} 

2021 ), 500 

2022 finally: 

2023 if classifier is not None: 

2024 from ...utilities.resource_utils import safe_close 

2025 

2026 safe_close(classifier, "domain classifier") 

2027 

2028 

2029@metrics_bp.route("/api/domain-classifications/classify", methods=["POST"]) 

2030@login_required 

2031def api_classify_domains(): 

2032 """Trigger classification of a specific domain or batch classification.""" 

2033 classifier = None 

2034 try: 

2035 username = flask_session["username"] 

2036 

2037 data = request.get_json() or {} 

2038 domain = data.get("domain") 

2039 force_update = data.get("force_update", False) 

2040 batch_mode = data.get("batch", False) 

2041 

2042 # Get settings snapshot for LLM configuration 

2043 from ...settings.manager import SettingsManager 

2044 from ...database.session_context import get_user_db_session 

2045 

2046 with get_user_db_session(username) as db_session: 

2047 settings_manager = SettingsManager(db_session=db_session) 

2048 settings_snapshot = settings_manager.get_all_settings() 

2049 

2050 classifier = DomainClassifier( 

2051 username, settings_snapshot=settings_snapshot 

2052 ) 

2053 

2054 if domain and not batch_mode: 

2055 # Classify single domain 

2056 logger.info(f"Classifying single domain: {domain}") 

2057 classification = classifier.classify_domain(domain, force_update) 

2058 if classification: 

2059 return jsonify( 

2060 { 

2061 "status": "success", 

2062 "classification": classification.to_dict(), 

2063 } 

2064 ) 

2065 return jsonify( 

2066 { 

2067 "status": "error", 

2068 "message": f"Failed to classify domain: {domain}", 

2069 } 

2070 ), 400 

2071 if batch_mode: 

2072 # Batch classification - this should really be a background task 

2073 # For now, we'll just return immediately and let the frontend poll 

2074 logger.info("Starting batch classification of all domains") 

2075 results = classifier.classify_all_domains(force_update) 

2076 

2077 return jsonify({"status": "success", "results": results}) 

2078 return jsonify( 

2079 { 

2080 "status": "error", 

2081 "message": "Must provide either 'domain' or set 'batch': true", 

2082 } 

2083 ), 400 

2084 

2085 except Exception: 

2086 logger.exception("Error classifying domains") 

2087 return jsonify( 

2088 {"status": "error", "message": "Failed to classify domains"} 

2089 ), 500 

2090 finally: 

2091 if classifier is not None: 

2092 from ...utilities.resource_utils import safe_close 

2093 

2094 safe_close(classifier, "domain classifier") 

2095 

2096 

2097@metrics_bp.route("/api/domain-classifications/progress", methods=["GET"]) 

2098@login_required 

2099def api_classification_progress(): 

2100 """Get progress of domain classification task.""" 

2101 try: 

2102 username = flask_session["username"] 

2103 

2104 # Get counts of classified vs unclassified domains 

2105 with get_user_db_session(username) as session: 

2106 # Count total unique domains 

2107 resources = session.query(ResearchResource.url).distinct().all() 

2108 domains = set() 

2109 

2110 for (url,) in resources: 

2111 if url: 

2112 domain = _extract_domain(url) 

2113 if domain: 2113 ↛ 2110line 2113 didn't jump to line 2110 because the condition on line 2113 was always true

2114 domains.add(domain) 

2115 

2116 all_domains = sorted(domains) 

2117 total_domains = len(domains) 

2118 

2119 # Count classified domains 

2120 classified_count = session.query(DomainClassification).count() 

2121 

2122 return jsonify( 

2123 { 

2124 "status": "success", 

2125 "progress": { 

2126 "total_domains": total_domains, 

2127 "classified": classified_count, 

2128 "unclassified": total_domains - classified_count, 

2129 "percentage": round( 

2130 (classified_count / total_domains * 100) 

2131 if total_domains > 0 

2132 else 0, 

2133 1, 

2134 ), 

2135 "all_domains": all_domains, # Return all domains for classification 

2136 }, 

2137 } 

2138 ) 

2139 

2140 except Exception: 

2141 logger.exception("Error getting classification progress") 

2142 return jsonify( 

2143 {"status": "error", "message": "Failed to retrieve progress"} 

2144 ), 500