Coverage for src / local_deep_research / web / routes / metrics_routes.py: 61%

691 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-25 01:07 +0000

1"""Routes for metrics dashboard.""" 

2 

3from datetime import datetime, timedelta, UTC 

4 

5from flask import Blueprint, jsonify, request, session as flask_session 

6from loguru import logger 

7from sqlalchemy import case, func 

8 

9from ...database.models import ( 

10 RateLimitAttempt, 

11 RateLimitEstimate, 

12 Research, 

13 ResearchRating, 

14 ResearchResource, 

15 ResearchStrategy, 

16 TokenUsage, 

17) 

18from ...domain_classifier import DomainClassifier, DomainClassification 

19from ...database.session_context import get_user_db_session 

20from ...metrics import TokenCounter 

21from ...metrics.query_utils import get_time_filter_condition 

22from ...metrics.search_tracker import get_search_tracker 

23from ...web_search_engines.rate_limiting import get_tracker 

24from ..auth.decorators import login_required 

25from ..utils.templates import render_template_with_defaults 

26 

27# Create a Blueprint for metrics 

28metrics_bp = Blueprint("metrics", __name__, url_prefix="/metrics") 

29 

30 

31def get_rating_analytics(period="30d", research_mode="all", username=None): 

32 """Get rating analytics for the specified period and research mode.""" 

33 try: 

34 if not username: 34 ↛ 35line 34 didn't jump to line 35 because the condition on line 34 was never true

35 username = flask_session.get("username") 

36 

37 if not username: 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true

38 return { 

39 "rating_analytics": { 

40 "avg_rating": None, 

41 "total_ratings": 0, 

42 "rating_distribution": {}, 

43 "satisfaction_stats": { 

44 "very_satisfied": 0, 

45 "satisfied": 0, 

46 "neutral": 0, 

47 "dissatisfied": 0, 

48 "very_dissatisfied": 0, 

49 }, 

50 "error": "No user session", 

51 } 

52 } 

53 

54 # Calculate date range 

55 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None} 

56 days = days_map.get(period, 30) 

57 

58 with get_user_db_session(username) as session: 

59 query = session.query(ResearchRating) 

60 

61 # Apply time filter 

62 if days: 62 ↛ 67line 62 didn't jump to line 67 because the condition on line 62 was always true

63 cutoff_date = datetime.now(UTC) - timedelta(days=days) 

64 query = query.filter(ResearchRating.created_at >= cutoff_date) 

65 

66 # Get all ratings 

67 ratings = query.all() 

68 

69 if not ratings: 69 ↛ 86line 69 didn't jump to line 86 because the condition on line 69 was always true

70 return { 

71 "rating_analytics": { 

72 "avg_rating": None, 

73 "total_ratings": 0, 

74 "rating_distribution": {}, 

75 "satisfaction_stats": { 

76 "very_satisfied": 0, 

77 "satisfied": 0, 

78 "neutral": 0, 

79 "dissatisfied": 0, 

80 "very_dissatisfied": 0, 

81 }, 

82 } 

83 } 

84 

85 # Calculate statistics 

86 rating_values = [r.rating for r in ratings] 

87 avg_rating = sum(rating_values) / len(rating_values) 

88 

89 # Rating distribution 

90 rating_counts = {} 

91 for i in range(1, 6): 

92 rating_counts[str(i)] = rating_values.count(i) 

93 

94 # Satisfaction categories 

95 satisfaction_stats = { 

96 "very_satisfied": rating_values.count(5), 

97 "satisfied": rating_values.count(4), 

98 "neutral": rating_values.count(3), 

99 "dissatisfied": rating_values.count(2), 

100 "very_dissatisfied": rating_values.count(1), 

101 } 

102 

103 return { 

104 "rating_analytics": { 

105 "avg_rating": round(avg_rating, 1), 

106 "total_ratings": len(ratings), 

107 "rating_distribution": rating_counts, 

108 "satisfaction_stats": satisfaction_stats, 

109 } 

110 } 

111 

112 except Exception: 

113 logger.exception("Error getting rating analytics") 

114 return { 

115 "rating_analytics": { 

116 "avg_rating": None, 

117 "total_ratings": 0, 

118 "rating_distribution": {}, 

119 "satisfaction_stats": { 

120 "very_satisfied": 0, 

121 "satisfied": 0, 

122 "neutral": 0, 

123 "dissatisfied": 0, 

124 "very_dissatisfied": 0, 

125 }, 

126 } 

127 } 

128 

129 

130def get_link_analytics(period="30d", username=None): 

131 """Get link analytics from research resources.""" 

132 try: 

133 if not username: 

134 username = flask_session.get("username") 

135 

136 if not username: 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true

137 return { 

138 "link_analytics": { 

139 "top_domains": [], 

140 "total_unique_domains": 0, 

141 "avg_links_per_research": 0, 

142 "domain_distribution": {}, 

143 "source_type_analysis": {}, 

144 "academic_vs_general": {}, 

145 "total_links": 0, 

146 "error": "No user session", 

147 } 

148 } 

149 

150 # Calculate date range 

151 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None} 

152 days = days_map.get(period, 30) 

153 

154 with get_user_db_session(username) as session: 

155 # Base query 

156 query = session.query(ResearchResource) 

157 

158 # Apply time filter 

159 if days: 159 ↛ 166line 159 didn't jump to line 166 because the condition on line 159 was always true

160 cutoff_date = datetime.now(UTC) - timedelta(days=days) 

161 query = query.filter( 

162 ResearchResource.created_at >= cutoff_date.isoformat() 

163 ) 

164 

165 # Get all resources 

166 resources = query.all() 

167 

168 if not resources: 168 ↛ 169line 168 didn't jump to line 169 because the condition on line 168 was never true

169 return { 

170 "link_analytics": { 

171 "top_domains": [], 

172 "total_unique_domains": 0, 

173 "avg_links_per_research": 0, 

174 "domain_distribution": {}, 

175 "source_type_analysis": {}, 

176 "academic_vs_general": {}, 

177 "total_links": 0, 

178 } 

179 } 

180 

181 # Extract domains from URLs 

182 from urllib.parse import urlparse 

183 from ...domain_classifier.classifier import DomainClassifier 

184 

185 domain_counts = {} 

186 domain_researches = {} # Track which researches used each domain 

187 source_types = {} 

188 temporal_data = {} # Track links over time 

189 domain_connections = {} # Track domain co-occurrences 

190 

191 # Generic category counting from LLM classifications 

192 category_counts = {} 

193 

194 # Initialize domain classifier for LLM-based categorization 

195 domain_classifier = DomainClassifier(username=username) 

196 quality_metrics = { 

197 "with_title": 0, 

198 "with_preview": 0, 

199 "with_both": 0, 

200 "total": 0, 

201 } 

202 

203 for resource in resources: 

204 if resource.url: 204 ↛ 203line 204 didn't jump to line 203 because the condition on line 204 was always true

205 try: 

206 parsed = urlparse(resource.url) 

207 domain = parsed.netloc.lower() 

208 # Remove www. prefix 

209 if domain.startswith("www."): 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true

210 domain = domain[4:] 

211 

212 # Count domains 

213 domain_counts[domain] = domain_counts.get(domain, 0) + 1 

214 

215 # Track research IDs for each domain 

216 if domain not in domain_researches: 216 ↛ 218line 216 didn't jump to line 218 because the condition on line 216 was always true

217 domain_researches[domain] = set() 

218 domain_researches[domain].add(resource.research_id) 

219 

220 # Track temporal data (daily counts) 

221 if resource.created_at: 221 ↛ 230line 221 didn't jump to line 230 because the condition on line 221 was always true

222 date_str = resource.created_at[ 

223 :10 

224 ] # Extract YYYY-MM-DD 

225 temporal_data[date_str] = ( 

226 temporal_data.get(date_str, 0) + 1 

227 ) 

228 

229 # Count categories from LLM classification 

230 classification = domain_classifier.get_classification( 

231 domain 

232 ) 

233 if classification: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true

234 category = classification.category 

235 category_counts[category] = ( 

236 category_counts.get(category, 0) + 1 

237 ) 

238 else: 

239 category_counts["Unclassified"] = ( 

240 category_counts.get("Unclassified", 0) + 1 

241 ) 

242 

243 # Track source type from metadata if available 

244 if resource.source_type: 244 ↛ 250line 244 didn't jump to line 250 because the condition on line 244 was always true

245 source_types[resource.source_type] = ( 

246 source_types.get(resource.source_type, 0) + 1 

247 ) 

248 

249 # Track quality metrics 

250 quality_metrics["total"] += 1 

251 if resource.title: 251 ↛ 253line 251 didn't jump to line 253 because the condition on line 251 was always true

252 quality_metrics["with_title"] += 1 

253 if resource.content_preview: 253 ↛ 255line 253 didn't jump to line 255 because the condition on line 253 was always true

254 quality_metrics["with_preview"] += 1 

255 if resource.title and resource.content_preview: 255 ↛ 259line 255 didn't jump to line 259 because the condition on line 255 was always true

256 quality_metrics["with_both"] += 1 

257 

258 # Track domain co-occurrences for network visualization 

259 research_id = resource.research_id 

260 if research_id not in domain_connections: 

261 domain_connections[research_id] = [] 

262 domain_connections[research_id].append(domain) 

263 

264 except Exception as e: 

265 logger.warning(f"Error parsing URL {resource.url}: {e}") 

266 

267 # Sort domains by count and get top 10 

268 sorted_domains = sorted( 

269 domain_counts.items(), key=lambda x: x[1], reverse=True 

270 ) 

271 top_10_domains = sorted_domains[:10] 

272 

273 # Calculate domain distribution (top domains vs others) 

274 top_10_count = sum(count for _, count in top_10_domains) 

275 others_count = len(resources) - top_10_count 

276 

277 # Get unique research IDs to calculate average 

278 unique_research_ids = set(r.research_id for r in resources) 

279 avg_links = ( 

280 len(resources) / len(unique_research_ids) 

281 if unique_research_ids 

282 else 0 

283 ) 

284 

285 # Prepare temporal trend data (sorted by date) 

286 temporal_trend = sorted( 

287 [ 

288 {"date": date, "count": count} 

289 for date, count in temporal_data.items() 

290 ], 

291 key=lambda x: x["date"], 

292 ) 

293 

294 # Get most recent research for each top domain and classifications 

295 domain_recent_research = {} 

296 domain_classifications = {} 

297 with get_user_db_session(username) as session: 

298 from ...database.models import Research 

299 

300 # Get classifications for all domains 

301 all_classifications = session.query(DomainClassification).all() 

302 for classification in all_classifications: 

303 domain_classifications[classification.domain] = { 

304 "category": classification.category, 

305 "subcategory": classification.subcategory, 

306 "confidence": classification.confidence, 

307 } 

308 

309 for domain, _ in top_10_domains: 

310 if domain in domain_researches: 310 ↛ 309line 310 didn't jump to line 309 because the condition on line 310 was always true

311 research_ids = list(domain_researches[domain])[ 

312 :3 

313 ] # Get up to 3 recent researches 

314 researches = ( 

315 session.query(Research) 

316 .filter(Research.id.in_(research_ids)) 

317 .all() 

318 ) 

319 domain_recent_research[domain] = [ 

320 { 

321 "id": r.id, 

322 "query": r.query[:50] 

323 if r.query 

324 else "Research", 

325 } 

326 for r in researches 

327 ] 

328 

329 return { 

330 "link_analytics": { 

331 "top_domains": [ 

332 { 

333 "domain": domain, 

334 "count": count, 

335 "percentage": round( 

336 count / len(resources) * 100, 1 

337 ), 

338 "research_count": len( 

339 domain_researches.get(domain, set()) 

340 ), 

341 "recent_researches": domain_recent_research.get( 

342 domain, [] 

343 ), 

344 "classification": domain_classifications.get( 

345 domain, None 

346 ), 

347 } 

348 for domain, count in top_10_domains 

349 ], 

350 "total_unique_domains": len(domain_counts), 

351 "avg_links_per_research": round(avg_links, 1), 

352 "domain_distribution": { 

353 "top_10": top_10_count, 

354 "others": others_count, 

355 }, 

356 "source_type_analysis": source_types, 

357 "category_distribution": category_counts, 

358 # Generic pie chart data - use whatever LLM classifier outputs 

359 "domain_categories": category_counts, 

360 "total_links": len(resources), 

361 "total_researches": len(unique_research_ids), 

362 "temporal_trend": temporal_trend, 

363 "domain_metrics": { 

364 domain: { 

365 "usage_count": count, 

366 "usage_percentage": round( 

367 count / len(resources) * 100, 1 

368 ), 

369 "research_diversity": len( 

370 domain_researches.get(domain, set()) 

371 ), 

372 "frequency_rank": rank + 1, 

373 } 

374 for rank, (domain, count) in enumerate(top_10_domains) 

375 }, 

376 } 

377 } 

378 

379 except Exception: 

380 logger.exception("Error getting link analytics") 

381 return { 

382 "link_analytics": { 

383 "top_domains": [], 

384 "total_unique_domains": 0, 

385 "avg_links_per_research": 0, 

386 "domain_distribution": {}, 

387 "source_type_analysis": {}, 

388 "academic_vs_general": {}, 

389 "total_links": 0, 

390 "error": "Failed to retrieve link analytics", 

391 } 

392 } 

393 

394 

395def get_available_strategies(): 

396 """Get list of all available search strategies from the search system.""" 

397 # This list comes from the AdvancedSearchSystem.__init__ method 

398 strategies = [ 

399 {"name": "standard", "description": "Basic iterative search strategy"}, 

400 { 

401 "name": "iterdrag", 

402 "description": "Iterative Dense Retrieval Augmented Generation", 

403 }, 

404 { 

405 "name": "source-based", 

406 "description": "Focuses on finding and extracting from sources", 

407 }, 

408 { 

409 "name": "parallel", 

410 "description": "Runs multiple search queries in parallel", 

411 }, 

412 {"name": "rapid", "description": "Quick single-pass search"}, 

413 { 

414 "name": "recursive", 

415 "description": "Recursive decomposition of complex queries", 

416 }, 

417 { 

418 "name": "iterative", 

419 "description": "Loop-based reasoning with persistent knowledge", 

420 }, 

421 {"name": "adaptive", "description": "Adaptive step-by-step reasoning"}, 

422 { 

423 "name": "smart", 

424 "description": "Automatically chooses best strategy based on query", 

425 }, 

426 { 

427 "name": "browsecomp", 

428 "description": "Optimized for BrowseComp-style puzzle queries", 

429 }, 

430 { 

431 "name": "evidence", 

432 "description": "Enhanced evidence-based verification with improved candidate discovery", 

433 }, 

434 { 

435 "name": "constrained", 

436 "description": "Progressive constraint-based search that narrows candidates step by step", 

437 }, 

438 { 

439 "name": "parallel-constrained", 

440 "description": "Parallel constraint-based search with combined constraint execution", 

441 }, 

442 { 

443 "name": "early-stop-constrained", 

444 "description": "Parallel constraint search with immediate evaluation and early stopping at 99% confidence", 

445 }, 

446 { 

447 "name": "smart-query", 

448 "description": "Smart query generation strategy", 

449 }, 

450 { 

451 "name": "dual-confidence", 

452 "description": "Dual confidence scoring with positive/negative/uncertainty", 

453 }, 

454 { 

455 "name": "dual-confidence-with-rejection", 

456 "description": "Dual confidence with early rejection of poor candidates", 

457 }, 

458 { 

459 "name": "concurrent-dual-confidence", 

460 "description": "Concurrent search & evaluation with progressive constraint relaxation", 

461 }, 

462 { 

463 "name": "modular", 

464 "description": "Modular architecture using constraint checking and candidate exploration modules", 

465 }, 

466 { 

467 "name": "modular-parallel", 

468 "description": "Modular strategy with parallel exploration", 

469 }, 

470 { 

471 "name": "focused-iteration", 

472 "description": "Focused iteration strategy optimized for accuracy", 

473 }, 

474 { 

475 "name": "browsecomp-entity", 

476 "description": "Entity-focused search for BrowseComp questions with knowledge graph building", 

477 }, 

478 ] 

479 return strategies 

480 

481 

482def get_strategy_analytics(period="30d", username=None): 

483 """Get strategy usage analytics for the specified period.""" 

484 try: 

485 if not username: 485 ↛ 486line 485 didn't jump to line 486 because the condition on line 485 was never true

486 username = flask_session.get("username") 

487 

488 if not username: 488 ↛ 489line 488 didn't jump to line 489 because the condition on line 488 was never true

489 return { 

490 "strategy_analytics": { 

491 "total_research_with_strategy": 0, 

492 "total_research": 0, 

493 "most_popular_strategy": None, 

494 "strategy_usage": [], 

495 "strategy_distribution": {}, 

496 "available_strategies": get_available_strategies(), 

497 "error": "No user session", 

498 } 

499 } 

500 

501 # Calculate date range 

502 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None} 

503 days = days_map.get(period, 30) 

504 

505 with get_user_db_session(username) as session: 

506 # Check if we have any ResearchStrategy records 

507 strategy_count = session.query(ResearchStrategy).count() 

508 

509 if strategy_count == 0: 509 ↛ 524line 509 didn't jump to line 524 because the condition on line 509 was always true

510 logger.warning("No research strategies found in database") 

511 return { 

512 "strategy_analytics": { 

513 "total_research_with_strategy": 0, 

514 "total_research": 0, 

515 "most_popular_strategy": None, 

516 "strategy_usage": [], 

517 "strategy_distribution": {}, 

518 "available_strategies": get_available_strategies(), 

519 "message": "Strategy tracking not yet available - run a research to start tracking", 

520 } 

521 } 

522 

523 # Base query for strategy usage (no JOIN needed since we just want strategy counts) 

524 query = session.query( 

525 ResearchStrategy.strategy_name, 

526 func.count(ResearchStrategy.id).label("usage_count"), 

527 ) 

528 

529 # Apply time filter if specified 

530 if days: 

531 cutoff_date = datetime.now(UTC) - timedelta(days=days) 

532 query = query.filter(ResearchStrategy.created_at >= cutoff_date) 

533 

534 # Group by strategy and order by usage 

535 strategy_results = ( 

536 query.group_by(ResearchStrategy.strategy_name) 

537 .order_by(func.count(ResearchStrategy.id).desc()) 

538 .all() 

539 ) 

540 

541 # Get total strategy count for percentage calculation 

542 total_query = session.query(ResearchStrategy) 

543 if days: 

544 total_query = total_query.filter( 

545 ResearchStrategy.created_at >= cutoff_date 

546 ) 

547 total_research = total_query.count() 

548 

549 # Format strategy data 

550 strategy_usage = [] 

551 strategy_distribution = {} 

552 

553 for strategy_name, usage_count in strategy_results: 

554 percentage = ( 

555 (usage_count / total_research * 100) 

556 if total_research > 0 

557 else 0 

558 ) 

559 strategy_usage.append( 

560 { 

561 "strategy": strategy_name, 

562 "count": usage_count, 

563 "percentage": round(percentage, 1), 

564 } 

565 ) 

566 strategy_distribution[strategy_name] = usage_count 

567 

568 # Find most popular strategy 

569 most_popular = ( 

570 strategy_usage[0]["strategy"] if strategy_usage else None 

571 ) 

572 

573 return { 

574 "strategy_analytics": { 

575 "total_research_with_strategy": sum( 

576 item["count"] for item in strategy_usage 

577 ), 

578 "total_research": total_research, 

579 "most_popular_strategy": most_popular, 

580 "strategy_usage": strategy_usage, 

581 "strategy_distribution": strategy_distribution, 

582 "available_strategies": get_available_strategies(), 

583 } 

584 } 

585 

586 except Exception: 

587 logger.exception("Error getting strategy analytics") 

588 return { 

589 "strategy_analytics": { 

590 "total_research_with_strategy": 0, 

591 "total_research": 0, 

592 "most_popular_strategy": None, 

593 "strategy_usage": [], 

594 "strategy_distribution": {}, 

595 "available_strategies": get_available_strategies(), 

596 "error": "Failed to retrieve strategy data", 

597 } 

598 } 

599 

600 

601def get_rate_limiting_analytics(period="30d", username=None): 

602 """Get rate limiting analytics for the specified period.""" 

603 try: 

604 if not username: 604 ↛ 605line 604 didn't jump to line 605 because the condition on line 604 was never true

605 username = flask_session.get("username") 

606 

607 if not username: 607 ↛ 608line 607 didn't jump to line 608 because the condition on line 607 was never true

608 return { 

609 "rate_limiting": { 

610 "total_attempts": 0, 

611 "successful_attempts": 0, 

612 "failed_attempts": 0, 

613 "success_rate": 0, 

614 "rate_limit_events": 0, 

615 "avg_wait_time": 0, 

616 "avg_successful_wait": 0, 

617 "tracked_engines": 0, 

618 "engine_stats": [], 

619 "total_engines_tracked": 0, 

620 "healthy_engines": 0, 

621 "degraded_engines": 0, 

622 "poor_engines": 0, 

623 "error": "No user session", 

624 } 

625 } 

626 

627 # Calculate date range for timestamp filtering 

628 import time 

629 

630 if period == "7d": 

631 cutoff_time = time.time() - (7 * 24 * 3600) 

632 elif period == "30d": 632 ↛ 634line 632 didn't jump to line 634 because the condition on line 632 was always true

633 cutoff_time = time.time() - (30 * 24 * 3600) 

634 elif period == "3m": 

635 cutoff_time = time.time() - (90 * 24 * 3600) 

636 elif period == "1y": 

637 cutoff_time = time.time() - (365 * 24 * 3600) 

638 else: # all 

639 cutoff_time = 0 

640 

641 with get_user_db_session(username) as session: 

642 # Get rate limit attempts 

643 rate_limit_query = session.query(RateLimitAttempt) 

644 

645 # Apply time filter 

646 if cutoff_time > 0: 646 ↛ 652line 646 didn't jump to line 652 because the condition on line 646 was always true

647 rate_limit_query = rate_limit_query.filter( 

648 RateLimitAttempt.timestamp >= cutoff_time 

649 ) 

650 

651 # Get rate limit statistics 

652 total_attempts = rate_limit_query.count() 

653 successful_attempts = rate_limit_query.filter( 

654 RateLimitAttempt.success 

655 ).count() 

656 failed_attempts = total_attempts - successful_attempts 

657 

658 # Count rate limiting events (failures with RateLimitError) 

659 rate_limit_events = rate_limit_query.filter( 

660 ~RateLimitAttempt.success, 

661 RateLimitAttempt.error_type == "RateLimitError", 

662 ).count() 

663 

664 logger.info( 

665 f"Rate limit attempts in database: total={total_attempts}, successful={successful_attempts}" 

666 ) 

667 

668 # Get all attempts for detailed calculations 

669 attempts = rate_limit_query.all() 

670 

671 # Calculate average wait times 

672 if attempts: 672 ↛ 673line 672 didn't jump to line 673 because the condition on line 672 was never true

673 avg_wait_time = sum(a.wait_time for a in attempts) / len( 

674 attempts 

675 ) 

676 successful_wait_times = [ 

677 a.wait_time for a in attempts if a.success 

678 ] 

679 avg_successful_wait = ( 

680 sum(successful_wait_times) / len(successful_wait_times) 

681 if successful_wait_times 

682 else 0 

683 ) 

684 else: 

685 avg_wait_time = 0 

686 avg_successful_wait = 0 

687 

688 # Get tracked engines - count distinct engine types from attempts 

689 tracked_engines_query = session.query( 

690 func.count(func.distinct(RateLimitAttempt.engine_type)) 

691 ) 

692 if cutoff_time > 0: 692 ↛ 696line 692 didn't jump to line 696 because the condition on line 692 was always true

693 tracked_engines_query = tracked_engines_query.filter( 

694 RateLimitAttempt.timestamp >= cutoff_time 

695 ) 

696 tracked_engines = tracked_engines_query.scalar() or 0 

697 

698 # Get engine-specific stats from attempts 

699 engine_stats = [] 

700 

701 # Get distinct engine types from attempts 

702 engine_types_query = session.query( 

703 RateLimitAttempt.engine_type 

704 ).distinct() 

705 if cutoff_time > 0: 705 ↛ 709line 705 didn't jump to line 709 because the condition on line 705 was always true

706 engine_types_query = engine_types_query.filter( 

707 RateLimitAttempt.timestamp >= cutoff_time 

708 ) 

709 engine_types = [row.engine_type for row in engine_types_query.all()] 

710 

711 # Preload estimates for relevant engines to avoid N+1 queries 

712 estimates_by_engine = {} 

713 if engine_types: 713 ↛ 714line 713 didn't jump to line 714 because the condition on line 713 was never true

714 all_estimates = ( 

715 session.query(RateLimitEstimate) 

716 .filter(RateLimitEstimate.engine_type.in_(engine_types)) 

717 .all() 

718 ) 

719 estimates_by_engine = {e.engine_type: e for e in all_estimates} 

720 

721 for engine_type in engine_types: 721 ↛ 722line 721 didn't jump to line 722 because the loop on line 721 never started

722 engine_attempts_list = [ 

723 a for a in attempts if a.engine_type == engine_type 

724 ] 

725 engine_attempts = len(engine_attempts_list) 

726 engine_success = len( 

727 [a for a in engine_attempts_list if a.success] 

728 ) 

729 

730 # Get estimate from preloaded dict 

731 estimate = estimates_by_engine.get(engine_type) 

732 

733 # Calculate recent success rate 

734 recent_success_rate = ( 

735 (engine_success / engine_attempts * 100) 

736 if engine_attempts > 0 

737 else 0 

738 ) 

739 

740 # Determine status based on success rate 

741 if estimate: 

742 status = ( 

743 "healthy" 

744 if estimate.success_rate > 0.8 

745 else "degraded" 

746 if estimate.success_rate > 0.5 

747 else "poor" 

748 ) 

749 else: 

750 status = ( 

751 "healthy" 

752 if recent_success_rate > 80 

753 else "degraded" 

754 if recent_success_rate > 50 

755 else "poor" 

756 ) 

757 

758 engine_stat = { 

759 "engine": engine_type, 

760 "base_wait": estimate.base_wait_seconds 

761 if estimate 

762 else 0.0, 

763 "base_wait_seconds": round( 

764 estimate.base_wait_seconds if estimate else 0.0, 2 

765 ), 

766 "min_wait_seconds": round( 

767 estimate.min_wait_seconds if estimate else 0.0, 2 

768 ), 

769 "max_wait_seconds": round( 

770 estimate.max_wait_seconds if estimate else 0.0, 2 

771 ), 

772 "success_rate": round(estimate.success_rate * 100, 1) 

773 if estimate 

774 else recent_success_rate, 

775 "total_attempts": estimate.total_attempts 

776 if estimate 

777 else engine_attempts, 

778 "recent_attempts": engine_attempts, 

779 "recent_success_rate": round(recent_success_rate, 1), 

780 "attempts": engine_attempts, 

781 "status": status, 

782 } 

783 

784 if estimate: 

785 from datetime import datetime 

786 

787 engine_stat["last_updated"] = datetime.fromtimestamp( 

788 estimate.last_updated, UTC 

789 ).isoformat() # ISO format already includes timezone 

790 else: 

791 engine_stat["last_updated"] = "Never" 

792 

793 engine_stats.append(engine_stat) 

794 

795 logger.info( 

796 f"Tracked engines: {tracked_engines}, engine_stats: {engine_stats}" 

797 ) 

798 

799 result = { 

800 "rate_limiting": { 

801 "total_attempts": total_attempts, 

802 "successful_attempts": successful_attempts, 

803 "failed_attempts": failed_attempts, 

804 "success_rate": (successful_attempts / total_attempts * 100) 

805 if total_attempts > 0 

806 else 0, 

807 "rate_limit_events": rate_limit_events, 

808 "avg_wait_time": round(float(avg_wait_time), 2), 

809 "avg_successful_wait": round(float(avg_successful_wait), 2), 

810 "tracked_engines": tracked_engines, 

811 "engine_stats": engine_stats, 

812 "total_engines_tracked": tracked_engines, 

813 "healthy_engines": len( 

814 [s for s in engine_stats if s["status"] == "healthy"] 

815 ), 

816 "degraded_engines": len( 

817 [s for s in engine_stats if s["status"] == "degraded"] 

818 ), 

819 "poor_engines": len( 

820 [s for s in engine_stats if s["status"] == "poor"] 

821 ), 

822 } 

823 } 

824 

825 logger.info( 

826 f"DEBUG: Returning rate_limiting_analytics result: {result}" 

827 ) 

828 return result 

829 

830 except Exception: 

831 logger.exception("Error getting rate limiting analytics") 

832 return { 

833 "rate_limiting": { 

834 "total_attempts": 0, 

835 "successful_attempts": 0, 

836 "failed_attempts": 0, 

837 "success_rate": 0, 

838 "rate_limit_events": 0, 

839 "avg_wait_time": 0, 

840 "avg_successful_wait": 0, 

841 "tracked_engines": 0, 

842 "engine_stats": [], 

843 "total_engines_tracked": 0, 

844 "healthy_engines": 0, 

845 "degraded_engines": 0, 

846 "poor_engines": 0, 

847 "error": "An internal error occurred while processing the request.", 

848 } 

849 } 

850 

851 

852@metrics_bp.route("/") 

853@login_required 

854def metrics_dashboard(): 

855 """Render the metrics dashboard page.""" 

856 return render_template_with_defaults("pages/metrics.html") 

857 

858 

859@metrics_bp.route("/context-overflow") 

860@login_required 

861def context_overflow_page(): 

862 """Context overflow analytics page.""" 

863 return render_template_with_defaults("pages/context_overflow.html") 

864 

865 

866@metrics_bp.route("/api/metrics") 

867@login_required 

868def api_metrics(): 

869 """Get overall metrics data.""" 

870 logger.debug("api_metrics endpoint called") 

871 try: 

872 # Get username from session 

873 username = flask_session.get("username") 

874 if not username: 874 ↛ 875line 874 didn't jump to line 875 because the condition on line 874 was never true

875 return jsonify( 

876 {"status": "error", "message": "No user session found"} 

877 ), 401 

878 

879 # Get time period and research mode from query parameters 

880 period = request.args.get("period", "30d") 

881 research_mode = request.args.get("mode", "all") 

882 

883 token_counter = TokenCounter() 

884 search_tracker = get_search_tracker() 

885 

886 # Get both token and search metrics 

887 token_metrics = token_counter.get_overall_metrics( 

888 period=period, research_mode=research_mode 

889 ) 

890 search_metrics = search_tracker.get_search_metrics( 

891 period=period, research_mode=research_mode 

892 ) 

893 

894 # Get user satisfaction rating data 

895 try: 

896 with get_user_db_session(username) as session: 

897 # Build base query with time filter 

898 ratings_query = session.query(ResearchRating) 

899 time_condition = get_time_filter_condition( 

900 period, ResearchRating.created_at 

901 ) 

902 if time_condition is not None: 902 ↛ 906line 902 didn't jump to line 906 because the condition on line 902 was always true

903 ratings_query = ratings_query.filter(time_condition) 

904 

905 # Get average rating 

906 avg_rating = ratings_query.with_entities( 

907 func.avg(ResearchRating.rating).label("avg_rating") 

908 ).scalar() 

909 

910 # Get total rating count 

911 total_ratings = ratings_query.count() 

912 

913 user_satisfaction = { 

914 "avg_rating": round(avg_rating, 1) if avg_rating else None, 

915 "total_ratings": total_ratings, 

916 } 

917 except Exception as e: 

918 logger.warning(f"Error getting user satisfaction data: {e}") 

919 user_satisfaction = {"avg_rating": None, "total_ratings": 0} 

920 

921 # Get strategy analytics 

922 strategy_data = get_strategy_analytics(period, username) 

923 logger.debug(f"strategy_data keys: {list(strategy_data.keys())}") 

924 

925 # Get rate limiting analytics 

926 rate_limiting_data = get_rate_limiting_analytics(period, username) 

927 logger.debug(f"rate_limiting_data: {rate_limiting_data}") 

928 logger.debug( 

929 f"rate_limiting_data keys: {list(rate_limiting_data.keys())}" 

930 ) 

931 

932 # Combine metrics 

933 combined_metrics = { 

934 **token_metrics, 

935 **search_metrics, 

936 **strategy_data, 

937 **rate_limiting_data, 

938 "user_satisfaction": user_satisfaction, 

939 } 

940 

941 logger.debug(f"combined_metrics keys: {list(combined_metrics.keys())}") 

942 logger.debug( 

943 f"combined_metrics['rate_limiting']: {combined_metrics.get('rate_limiting', 'NOT FOUND')}" 

944 ) 

945 

946 return jsonify( 

947 { 

948 "status": "success", 

949 "metrics": combined_metrics, 

950 "period": period, 

951 "research_mode": research_mode, 

952 } 

953 ) 

954 except Exception: 

955 logger.exception("Error getting metrics") 

956 return ( 

957 jsonify( 

958 { 

959 "status": "error", 

960 "message": "An internal error occurred. Please try again later.", 

961 } 

962 ), 

963 500, 

964 ) 

965 

966 

967@metrics_bp.route("/api/rate-limiting") 

968@login_required 

969def api_rate_limiting_metrics(): 

970 """Get detailed rate limiting metrics.""" 

971 logger.info("DEBUG: api_rate_limiting_metrics endpoint called") 

972 try: 

973 username = flask_session.get("username") 

974 period = request.args.get("period", "30d") 

975 rate_limiting_data = get_rate_limiting_analytics(period, username) 

976 

977 return jsonify( 

978 {"status": "success", "data": rate_limiting_data, "period": period} 

979 ) 

980 except Exception: 

981 logger.exception("Error getting rate limiting metrics") 

982 return jsonify( 

983 { 

984 "status": "error", 

985 "message": "Failed to retrieve rate limiting metrics", 

986 } 

987 ), 500 

988 

989 

990@metrics_bp.route("/api/rate-limiting/current") 

991@login_required 

992def api_current_rate_limits(): 

993 """Get current rate limit estimates for all engines.""" 

994 try: 

995 tracker = get_tracker() 

996 stats = tracker.get_stats() 

997 

998 current_limits = [] 

999 for stat in stats: 

1000 ( 

1001 engine_type, 

1002 base_wait, 

1003 min_wait, 

1004 max_wait, 

1005 last_updated, 

1006 total_attempts, 

1007 success_rate, 

1008 ) = stat 

1009 current_limits.append( 

1010 { 

1011 "engine_type": engine_type, 

1012 "base_wait_seconds": round(base_wait, 2), 

1013 "min_wait_seconds": round(min_wait, 2), 

1014 "max_wait_seconds": round(max_wait, 2), 

1015 "success_rate": round(success_rate * 100, 1), 

1016 "total_attempts": total_attempts, 

1017 "last_updated": datetime.fromtimestamp( 

1018 last_updated, UTC 

1019 ).isoformat(), # ISO format already includes timezone 

1020 "status": "healthy" 

1021 if success_rate > 0.8 

1022 else "degraded" 

1023 if success_rate > 0.5 

1024 else "poor", 

1025 } 

1026 ) 

1027 

1028 return jsonify( 

1029 { 

1030 "status": "success", 

1031 "current_limits": current_limits, 

1032 "timestamp": datetime.now(UTC).isoformat(), 

1033 } 

1034 ) 

1035 except Exception: 

1036 logger.exception("Error getting current rate limits") 

1037 return jsonify( 

1038 { 

1039 "status": "error", 

1040 "message": "Failed to retrieve current rate limits", 

1041 } 

1042 ), 500 

1043 

1044 

1045@metrics_bp.route("/api/metrics/research/<string:research_id>/links") 

1046@login_required 

1047def api_research_link_metrics(research_id): 

1048 """Get link analytics for a specific research.""" 

1049 try: 

1050 username = flask_session.get("username") 

1051 if not username: 1051 ↛ 1052line 1051 didn't jump to line 1052 because the condition on line 1051 was never true

1052 return jsonify( 

1053 {"status": "error", "message": "No user session found"} 

1054 ), 401 

1055 

1056 with get_user_db_session(username) as session: 

1057 # Get all resources for this specific research 

1058 resources = ( 

1059 session.query(ResearchResource) 

1060 .filter(ResearchResource.research_id == research_id) 

1061 .all() 

1062 ) 

1063 

1064 if not resources: 1064 ↛ 1080line 1064 didn't jump to line 1080 because the condition on line 1064 was always true

1065 return jsonify( 

1066 { 

1067 "status": "success", 

1068 "data": { 

1069 "total_links": 0, 

1070 "unique_domains": 0, 

1071 "domains": [], 

1072 "category_distribution": {}, 

1073 "domain_categories": {}, 

1074 "resources": [], 

1075 }, 

1076 } 

1077 ) 

1078 

1079 # Extract domain information 

1080 from urllib.parse import urlparse 

1081 from ...domain_classifier.classifier import DomainClassifier 

1082 

1083 domain_counts = {} 

1084 

1085 # Generic category counting from LLM classifications 

1086 category_counts = {} 

1087 

1088 # Initialize domain classifier for LLM-based categorization 

1089 domain_classifier = DomainClassifier(username=username) 

1090 

1091 for resource in resources: 

1092 if resource.url: 

1093 try: 

1094 parsed = urlparse(resource.url) 

1095 domain = parsed.netloc.lower() 

1096 if domain.startswith("www."): 

1097 domain = domain[4:] 

1098 

1099 domain_counts[domain] = domain_counts.get(domain, 0) + 1 

1100 

1101 # Count categories from LLM classification 

1102 classification = domain_classifier.get_classification( 

1103 domain 

1104 ) 

1105 if classification: 

1106 category = classification.category 

1107 category_counts[category] = ( 

1108 category_counts.get(category, 0) + 1 

1109 ) 

1110 else: 

1111 category_counts["Unclassified"] = ( 

1112 category_counts.get("Unclassified", 0) + 1 

1113 ) 

1114 except (AttributeError, KeyError) as e: 

1115 logger.debug(f"Error classifying domain {domain}: {e}") 

1116 

1117 # Sort domains by count 

1118 sorted_domains = sorted( 

1119 domain_counts.items(), key=lambda x: x[1], reverse=True 

1120 ) 

1121 

1122 return jsonify( 

1123 { 

1124 "status": "success", 

1125 "data": { 

1126 "total_links": len(resources), 

1127 "unique_domains": len(domain_counts), 

1128 "domains": [ 

1129 { 

1130 "domain": domain, 

1131 "count": count, 

1132 "percentage": round( 

1133 count / len(resources) * 100, 1 

1134 ), 

1135 } 

1136 for domain, count in sorted_domains[ 

1137 :20 

1138 ] # Top 20 domains 

1139 ], 

1140 "category_distribution": category_counts, 

1141 "domain_categories": category_counts, # Generic categories from LLM 

1142 "resources": [ 

1143 { 

1144 "title": r.title or "Untitled", 

1145 "url": r.url, 

1146 "preview": r.content_preview[:200] 

1147 if r.content_preview 

1148 else None, 

1149 } 

1150 for r in resources[:10] # First 10 resources 

1151 ], 

1152 }, 

1153 } 

1154 ) 

1155 

1156 except Exception: 

1157 logger.exception("Error getting research link metrics") 

1158 return jsonify( 

1159 {"status": "error", "message": "Failed to retrieve link metrics"} 

1160 ), 500 

1161 

1162 

1163@metrics_bp.route("/api/metrics/research/<string:research_id>") 

1164@login_required 

1165def api_research_metrics(research_id): 

1166 """Get metrics for a specific research.""" 

1167 try: 

1168 token_counter = TokenCounter() 

1169 metrics = token_counter.get_research_metrics(research_id) 

1170 return jsonify({"status": "success", "metrics": metrics}) 

1171 except Exception: 

1172 logger.exception("Error getting research metrics") 

1173 return ( 

1174 jsonify( 

1175 { 

1176 "status": "error", 

1177 "message": "An internal error occurred. Please try again later.", 

1178 } 

1179 ), 

1180 500, 

1181 ) 

1182 

1183 

1184@metrics_bp.route("/api/metrics/research/<string:research_id>/timeline") 

1185@login_required 

1186def api_research_timeline_metrics(research_id): 

1187 """Get timeline metrics for a specific research.""" 

1188 try: 

1189 token_counter = TokenCounter() 

1190 timeline_metrics = token_counter.get_research_timeline_metrics( 

1191 research_id 

1192 ) 

1193 return jsonify({"status": "success", "metrics": timeline_metrics}) 

1194 except Exception: 

1195 logger.exception("Error getting research timeline metrics") 

1196 return ( 

1197 jsonify( 

1198 { 

1199 "status": "error", 

1200 "message": "An internal error occurred. Please try again later.", 

1201 } 

1202 ), 

1203 500, 

1204 ) 

1205 

1206 

1207@metrics_bp.route("/api/metrics/research/<string:research_id>/search") 

1208@login_required 

1209def api_research_search_metrics(research_id): 

1210 """Get search metrics for a specific research.""" 

1211 try: 

1212 search_tracker = get_search_tracker() 

1213 search_metrics = search_tracker.get_research_search_metrics(research_id) 

1214 return jsonify({"status": "success", "metrics": search_metrics}) 

1215 except Exception: 

1216 logger.exception("Error getting research search metrics") 

1217 return ( 

1218 jsonify( 

1219 { 

1220 "status": "error", 

1221 "message": "An internal error occurred. Please try again later.", 

1222 } 

1223 ), 

1224 500, 

1225 ) 

1226 

1227 

1228@metrics_bp.route("/api/metrics/enhanced") 

1229@login_required 

1230def api_enhanced_metrics(): 

1231 """Get enhanced Phase 1 tracking metrics.""" 

1232 try: 

1233 # Get time period and research mode from query parameters 

1234 period = request.args.get("period", "30d") 

1235 research_mode = request.args.get("mode", "all") 

1236 username = flask_session.get("username") 

1237 

1238 token_counter = TokenCounter() 

1239 search_tracker = get_search_tracker() 

1240 

1241 enhanced_metrics = token_counter.get_enhanced_metrics( 

1242 period=period, research_mode=research_mode 

1243 ) 

1244 

1245 # Add search time series data for the chart 

1246 search_time_series = search_tracker.get_search_time_series( 

1247 period=period, research_mode=research_mode 

1248 ) 

1249 enhanced_metrics["search_time_series"] = search_time_series 

1250 

1251 # Add rating analytics 

1252 rating_analytics = get_rating_analytics(period, research_mode, username) 

1253 enhanced_metrics.update(rating_analytics) 

1254 

1255 return jsonify( 

1256 { 

1257 "status": "success", 

1258 "metrics": enhanced_metrics, 

1259 "period": period, 

1260 "research_mode": research_mode, 

1261 } 

1262 ) 

1263 except Exception: 

1264 logger.exception("Error getting enhanced metrics") 

1265 return ( 

1266 jsonify( 

1267 { 

1268 "status": "error", 

1269 "message": "An internal error occurred. Please try again later.", 

1270 } 

1271 ), 

1272 500, 

1273 ) 

1274 

1275 

1276@metrics_bp.route("/api/ratings/<string:research_id>", methods=["GET"]) 

1277@login_required 

1278def api_get_research_rating(research_id): 

1279 """Get rating for a specific research session.""" 

1280 try: 

1281 username = flask_session.get("username") 

1282 if not username: 1282 ↛ 1283line 1282 didn't jump to line 1283 because the condition on line 1282 was never true

1283 return jsonify( 

1284 {"status": "error", "message": "No user session found"} 

1285 ), 401 

1286 

1287 with get_user_db_session(username) as session: 

1288 rating = ( 

1289 session.query(ResearchRating) 

1290 .filter_by(research_id=research_id) 

1291 .first() 

1292 ) 

1293 

1294 if rating: 

1295 return jsonify( 

1296 { 

1297 "status": "success", 

1298 "rating": rating.rating, 

1299 "created_at": rating.created_at.isoformat(), 

1300 "updated_at": rating.updated_at.isoformat(), 

1301 } 

1302 ) 

1303 else: 

1304 return jsonify({"status": "success", "rating": None}) 

1305 

1306 except Exception: 

1307 logger.exception("Error getting research rating") 

1308 return ( 

1309 jsonify( 

1310 { 

1311 "status": "error", 

1312 "message": "An internal error occurred. Please try again later.", 

1313 } 

1314 ), 

1315 500, 

1316 ) 

1317 

1318 

1319@metrics_bp.route("/api/ratings/<string:research_id>", methods=["POST"]) 

1320@login_required 

1321def api_save_research_rating(research_id): 

1322 """Save or update rating for a specific research session.""" 

1323 try: 

1324 username = flask_session.get("username") 

1325 if not username: 1325 ↛ 1326line 1325 didn't jump to line 1326 because the condition on line 1325 was never true

1326 return jsonify( 

1327 {"status": "error", "message": "No user session found"} 

1328 ), 401 

1329 

1330 data = request.get_json() 

1331 rating_value = data.get("rating") 

1332 

1333 if ( 

1334 not rating_value 

1335 or not isinstance(rating_value, int) 

1336 or rating_value < 1 

1337 or rating_value > 5 

1338 ): 

1339 return ( 

1340 jsonify( 

1341 { 

1342 "status": "error", 

1343 "message": "Rating must be an integer between 1 and 5", 

1344 } 

1345 ), 

1346 400, 

1347 ) 

1348 

1349 with get_user_db_session(username) as session: 

1350 # Check if rating already exists 

1351 existing_rating = ( 

1352 session.query(ResearchRating) 

1353 .filter_by(research_id=research_id) 

1354 .first() 

1355 ) 

1356 

1357 if existing_rating: 1357 ↛ 1359line 1357 didn't jump to line 1359 because the condition on line 1357 was never true

1358 # Update existing rating 

1359 existing_rating.rating = rating_value 

1360 existing_rating.updated_at = func.now() 

1361 else: 

1362 # Create new rating 

1363 new_rating = ResearchRating( 

1364 research_id=research_id, rating=rating_value 

1365 ) 

1366 session.add(new_rating) 

1367 

1368 session.commit() 

1369 

1370 return jsonify( 

1371 { 

1372 "status": "success", 

1373 "message": "Rating saved successfully", 

1374 "rating": rating_value, 

1375 } 

1376 ) 

1377 

1378 except Exception: 

1379 logger.exception("Error saving research rating") 

1380 return ( 

1381 jsonify( 

1382 { 

1383 "status": "error", 

1384 "message": "An internal error occurred. Please try again later.", 

1385 } 

1386 ), 

1387 500, 

1388 ) 

1389 

1390 

1391@metrics_bp.route("/star-reviews") 

1392@login_required 

1393def star_reviews(): 

1394 """Display star reviews metrics page.""" 

1395 return render_template_with_defaults("pages/star_reviews.html") 

1396 

1397 

1398@metrics_bp.route("/costs") 

1399@login_required 

1400def cost_analytics(): 

1401 """Display cost analytics page.""" 

1402 return render_template_with_defaults("pages/cost_analytics.html") 

1403 

1404 

1405@metrics_bp.route("/api/star-reviews") 

1406@login_required 

1407def api_star_reviews(): 

1408 """Get star reviews analytics data.""" 

1409 try: 

1410 username = flask_session.get("username") 

1411 if not username: 1411 ↛ 1412line 1411 didn't jump to line 1412 because the condition on line 1411 was never true

1412 return jsonify( 

1413 {"status": "error", "message": "No user session found"} 

1414 ), 401 

1415 

1416 period = request.args.get("period", "30d") 

1417 

1418 with get_user_db_session(username) as session: 

1419 # Build base query with time filter 

1420 base_query = session.query(ResearchRating) 

1421 time_condition = get_time_filter_condition( 

1422 period, ResearchRating.created_at 

1423 ) 

1424 if time_condition is not None: 1424 ↛ 1428line 1424 didn't jump to line 1428 because the condition on line 1424 was always true

1425 base_query = base_query.filter(time_condition) 

1426 

1427 # Overall rating statistics 

1428 overall_stats = session.query( 

1429 func.avg(ResearchRating.rating).label("avg_rating"), 

1430 func.count(ResearchRating.rating).label("total_ratings"), 

1431 func.sum(case((ResearchRating.rating == 5, 1), else_=0)).label( 

1432 "five_star" 

1433 ), 

1434 func.sum(case((ResearchRating.rating == 4, 1), else_=0)).label( 

1435 "four_star" 

1436 ), 

1437 func.sum(case((ResearchRating.rating == 3, 1), else_=0)).label( 

1438 "three_star" 

1439 ), 

1440 func.sum(case((ResearchRating.rating == 2, 1), else_=0)).label( 

1441 "two_star" 

1442 ), 

1443 func.sum(case((ResearchRating.rating == 1, 1), else_=0)).label( 

1444 "one_star" 

1445 ), 

1446 ) 

1447 

1448 if time_condition is not None: 1448 ↛ 1451line 1448 didn't jump to line 1451 because the condition on line 1448 was always true

1449 overall_stats = overall_stats.filter(time_condition) 

1450 

1451 overall_stats = overall_stats.first() 

1452 

1453 # Ratings by LLM model (get from token_usage since Research doesn't have model field) 

1454 llm_ratings_query = session.query( 

1455 func.coalesce(TokenUsage.model_name, "Unknown").label("model"), 

1456 func.avg(ResearchRating.rating).label("avg_rating"), 

1457 func.count(ResearchRating.rating).label("rating_count"), 

1458 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label( 

1459 "positive_ratings" 

1460 ), 

1461 ).outerjoin( 

1462 TokenUsage, ResearchRating.research_id == TokenUsage.research_id 

1463 ) 

1464 

1465 if time_condition is not None: 1465 ↛ 1468line 1465 didn't jump to line 1468 because the condition on line 1465 was always true

1466 llm_ratings_query = llm_ratings_query.filter(time_condition) 

1467 

1468 llm_ratings = ( 

1469 llm_ratings_query.group_by(TokenUsage.model_name) 

1470 .order_by(func.avg(ResearchRating.rating).desc()) 

1471 .all() 

1472 ) 

1473 

1474 # Ratings by search engine (join with token_usage to get search engine info) 

1475 search_engine_ratings_query = session.query( 

1476 func.coalesce( 

1477 TokenUsage.search_engine_selected, "Unknown" 

1478 ).label("search_engine"), 

1479 func.avg(ResearchRating.rating).label("avg_rating"), 

1480 func.count(ResearchRating.rating).label("rating_count"), 

1481 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label( 

1482 "positive_ratings" 

1483 ), 

1484 ).outerjoin( 

1485 TokenUsage, ResearchRating.research_id == TokenUsage.research_id 

1486 ) 

1487 

1488 if time_condition is not None: 1488 ↛ 1493line 1488 didn't jump to line 1493 because the condition on line 1488 was always true

1489 search_engine_ratings_query = ( 

1490 search_engine_ratings_query.filter(time_condition) 

1491 ) 

1492 

1493 search_engine_ratings = ( 

1494 search_engine_ratings_query.group_by( 

1495 TokenUsage.search_engine_selected 

1496 ) 

1497 .having(func.count(ResearchRating.rating) > 0) 

1498 .order_by(func.avg(ResearchRating.rating).desc()) 

1499 .all() 

1500 ) 

1501 

1502 # Rating trends over time 

1503 rating_trends_query = session.query( 

1504 func.date(ResearchRating.created_at).label("date"), 

1505 func.avg(ResearchRating.rating).label("avg_rating"), 

1506 func.count(ResearchRating.rating).label("daily_count"), 

1507 ) 

1508 

1509 if time_condition is not None: 1509 ↛ 1512line 1509 didn't jump to line 1512 because the condition on line 1509 was always true

1510 rating_trends_query = rating_trends_query.filter(time_condition) 

1511 

1512 rating_trends = ( 

1513 rating_trends_query.group_by( 

1514 func.date(ResearchRating.created_at) 

1515 ) 

1516 .order_by("date") 

1517 .all() 

1518 ) 

1519 

1520 # Recent ratings with research details 

1521 recent_ratings_query = ( 

1522 session.query( 

1523 ResearchRating.rating, 

1524 ResearchRating.created_at, 

1525 ResearchRating.research_id, 

1526 Research.query, 

1527 Research.mode, 

1528 TokenUsage.model_name, 

1529 Research.created_at, 

1530 ) 

1531 .outerjoin(Research, ResearchRating.research_id == Research.id) 

1532 .outerjoin( 

1533 TokenUsage, 

1534 ResearchRating.research_id == TokenUsage.research_id, 

1535 ) 

1536 ) 

1537 

1538 if time_condition is not None: 1538 ↛ 1543line 1538 didn't jump to line 1543 because the condition on line 1538 was always true

1539 recent_ratings_query = recent_ratings_query.filter( 

1540 time_condition 

1541 ) 

1542 

1543 recent_ratings = ( 

1544 recent_ratings_query.order_by(ResearchRating.created_at.desc()) 

1545 .limit(20) 

1546 .all() 

1547 ) 

1548 

1549 return jsonify( 

1550 { 

1551 "overall_stats": { 

1552 "avg_rating": round(overall_stats.avg_rating or 0, 2), 

1553 "total_ratings": overall_stats.total_ratings or 0, 

1554 "rating_distribution": { 

1555 "5": overall_stats.five_star or 0, 

1556 "4": overall_stats.four_star or 0, 

1557 "3": overall_stats.three_star or 0, 

1558 "2": overall_stats.two_star or 0, 

1559 "1": overall_stats.one_star or 0, 

1560 }, 

1561 }, 

1562 "llm_ratings": [ 

1563 { 

1564 "model": rating.model, 

1565 "avg_rating": round(rating.avg_rating or 0, 2), 

1566 "rating_count": rating.rating_count or 0, 

1567 "positive_ratings": rating.positive_ratings or 0, 

1568 "satisfaction_rate": round( 

1569 (rating.positive_ratings or 0) 

1570 / max(rating.rating_count or 1, 1) 

1571 * 100, 

1572 1, 

1573 ), 

1574 } 

1575 for rating in llm_ratings 

1576 ], 

1577 "search_engine_ratings": [ 

1578 { 

1579 "search_engine": rating.search_engine, 

1580 "avg_rating": round(rating.avg_rating or 0, 2), 

1581 "rating_count": rating.rating_count or 0, 

1582 "positive_ratings": rating.positive_ratings or 0, 

1583 "satisfaction_rate": round( 

1584 (rating.positive_ratings or 0) 

1585 / max(rating.rating_count or 1, 1) 

1586 * 100, 

1587 1, 

1588 ), 

1589 } 

1590 for rating in search_engine_ratings 

1591 ], 

1592 "rating_trends": [ 

1593 { 

1594 "date": str(trend.date), 

1595 "avg_rating": round(trend.avg_rating or 0, 2), 

1596 "count": trend.daily_count or 0, 

1597 } 

1598 for trend in rating_trends 

1599 ], 

1600 "recent_ratings": [ 

1601 { 

1602 "rating": rating.rating, 

1603 "created_at": str(rating.created_at), 

1604 "research_id": rating.research_id, 

1605 "query": ( 

1606 rating.query 

1607 if rating.query 

1608 else f"Research Session #{rating.research_id}" 

1609 ), 

1610 "mode": rating.mode 

1611 if rating.mode 

1612 else "Standard Research", 

1613 "llm_model": ( 

1614 rating.model_name 

1615 if rating.model_name 

1616 else "LLM Model" 

1617 ), 

1618 } 

1619 for rating in recent_ratings 

1620 ], 

1621 } 

1622 ) 

1623 

1624 except Exception: 

1625 logger.exception("Error getting star reviews data") 

1626 return ( 

1627 jsonify( 

1628 {"error": "An internal error occurred. Please try again later."} 

1629 ), 

1630 500, 

1631 ) 

1632 

1633 

1634@metrics_bp.route("/api/pricing") 

1635@login_required 

1636def api_pricing(): 

1637 """Get current LLM pricing data.""" 

1638 try: 

1639 from ...metrics.pricing.pricing_fetcher import PricingFetcher 

1640 

1641 # Use static pricing data instead of async 

1642 fetcher = PricingFetcher() 

1643 pricing_data = fetcher.static_pricing 

1644 

1645 return jsonify( 

1646 { 

1647 "status": "success", 

1648 "pricing": pricing_data, 

1649 "last_updated": datetime.now(UTC).isoformat(), 

1650 "note": "Pricing data is from static configuration. Real-time APIs not available for most providers.", 

1651 } 

1652 ) 

1653 

1654 except Exception: 

1655 logger.exception("Error fetching pricing data") 

1656 return jsonify({"error": "Internal Server Error"}), 500 

1657 

1658 

1659@metrics_bp.route("/api/pricing/<model_name>") 

1660@login_required 

1661def api_model_pricing(model_name): 

1662 """Get pricing for a specific model.""" 

1663 try: 

1664 # Optional provider parameter 

1665 provider = request.args.get("provider") 

1666 

1667 from ...metrics.pricing.cost_calculator import CostCalculator 

1668 

1669 # Use synchronous approach with cached/static pricing 

1670 calculator = CostCalculator() 

1671 pricing = calculator.cache.get_model_pricing( 

1672 model_name 

1673 ) or calculator.calculate_cost_sync(model_name, 1000, 1000).get( 

1674 "pricing_used", {} 

1675 ) 

1676 

1677 return jsonify( 

1678 { 

1679 "status": "success", 

1680 "model": model_name, 

1681 "provider": provider, 

1682 "pricing": pricing, 

1683 "last_updated": datetime.now(UTC).isoformat(), 

1684 } 

1685 ) 

1686 

1687 except Exception: 

1688 logger.exception(f"Error getting pricing for model: {model_name}") 

1689 return jsonify({"error": "An internal error occurred"}), 500 

1690 

1691 

1692@metrics_bp.route("/api/cost-calculation", methods=["POST"]) 

1693@login_required 

1694def api_cost_calculation(): 

1695 """Calculate cost for token usage.""" 

1696 try: 

1697 data = request.get_json() 

1698 

1699 if not data: 

1700 return jsonify({"error": "No data provided"}), 400 

1701 

1702 model_name = data.get("model_name") 

1703 provider = data.get("provider") # Optional provider parameter 

1704 prompt_tokens = data.get("prompt_tokens", 0) 

1705 completion_tokens = data.get("completion_tokens", 0) 

1706 

1707 if not model_name: 

1708 return jsonify({"error": "model_name is required"}), 400 

1709 

1710 from ...metrics.pricing.cost_calculator import CostCalculator 

1711 

1712 # Use synchronous cost calculation 

1713 calculator = CostCalculator() 

1714 cost_data = calculator.calculate_cost_sync( 

1715 model_name, prompt_tokens, completion_tokens 

1716 ) 

1717 

1718 return jsonify( 

1719 { 

1720 "status": "success", 

1721 "model_name": model_name, 

1722 "provider": provider, 

1723 "prompt_tokens": prompt_tokens, 

1724 "completion_tokens": completion_tokens, 

1725 "total_tokens": prompt_tokens + completion_tokens, 

1726 **cost_data, 

1727 } 

1728 ) 

1729 

1730 except Exception: 

1731 logger.exception("Error calculating cost") 

1732 return jsonify({"error": "An internal error occurred"}), 500 

1733 

1734 

1735@metrics_bp.route("/api/research-costs/<string:research_id>") 

1736@login_required 

1737def api_research_costs(research_id): 

1738 """Get cost analysis for a specific research session.""" 

1739 try: 

1740 username = flask_session.get("username") 

1741 if not username: 1741 ↛ 1742line 1741 didn't jump to line 1742 because the condition on line 1741 was never true

1742 return jsonify( 

1743 {"status": "error", "message": "No user session found"} 

1744 ), 401 

1745 

1746 with get_user_db_session(username) as session: 

1747 # Get token usage records for this research 

1748 usage_records = ( 

1749 session.query(TokenUsage) 

1750 .filter(TokenUsage.research_id == research_id) 

1751 .all() 

1752 ) 

1753 

1754 if not usage_records: 1754 ↛ 1765line 1754 didn't jump to line 1765 because the condition on line 1754 was always true

1755 return jsonify( 

1756 { 

1757 "status": "success", 

1758 "research_id": research_id, 

1759 "total_cost": 0.0, 

1760 "message": "No token usage data found for this research session", 

1761 } 

1762 ) 

1763 

1764 # Convert to dict format for cost calculation 

1765 usage_data = [] 

1766 for record in usage_records: 

1767 usage_data.append( 

1768 { 

1769 "model_name": record.model_name, 

1770 "provider": getattr( 

1771 record, "provider", None 

1772 ), # Handle both old and new records 

1773 "prompt_tokens": record.prompt_tokens, 

1774 "completion_tokens": record.completion_tokens, 

1775 "timestamp": record.timestamp, 

1776 } 

1777 ) 

1778 

1779 from ...metrics.pricing.cost_calculator import CostCalculator 

1780 

1781 # Use synchronous calculation for research costs 

1782 calculator = CostCalculator() 

1783 costs = [] 

1784 for record in usage_data: 

1785 cost_data = calculator.calculate_cost_sync( 

1786 record["model_name"], 

1787 record["prompt_tokens"], 

1788 record["completion_tokens"], 

1789 ) 

1790 costs.append({**record, **cost_data}) 

1791 

1792 total_cost = sum(c["total_cost"] for c in costs) 

1793 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data) 

1794 total_completion_tokens = sum( 

1795 r["completion_tokens"] for r in usage_data 

1796 ) 

1797 

1798 cost_summary = { 

1799 "total_cost": round(total_cost, 6), 

1800 "total_tokens": total_prompt_tokens + total_completion_tokens, 

1801 "prompt_tokens": total_prompt_tokens, 

1802 "completion_tokens": total_completion_tokens, 

1803 } 

1804 

1805 return jsonify( 

1806 { 

1807 "status": "success", 

1808 "research_id": research_id, 

1809 **cost_summary, 

1810 } 

1811 ) 

1812 

1813 except Exception: 

1814 logger.exception( 

1815 f"Error getting research costs for research: {research_id}" 

1816 ) 

1817 return jsonify({"error": "An internal error occurred"}), 500 

1818 

1819 

1820@metrics_bp.route("/api/cost-analytics") 

1821@login_required 

1822def api_cost_analytics(): 

1823 """Get cost analytics across all research sessions.""" 

1824 try: 

1825 username = flask_session.get("username") 

1826 if not username: 1826 ↛ 1827line 1826 didn't jump to line 1827 because the condition on line 1826 was never true

1827 return jsonify( 

1828 {"status": "error", "message": "No user session found"} 

1829 ), 401 

1830 

1831 period = request.args.get("period", "30d") 

1832 

1833 with get_user_db_session(username) as session: 

1834 # Get token usage for the period 

1835 query = session.query(TokenUsage) 

1836 time_condition = get_time_filter_condition( 

1837 period, TokenUsage.timestamp 

1838 ) 

1839 if time_condition is not None: 1839 ↛ 1843line 1839 didn't jump to line 1843 because the condition on line 1839 was always true

1840 query = query.filter(time_condition) 

1841 

1842 # First check if we have any records to avoid expensive queries 

1843 record_count = query.count() 

1844 

1845 if record_count == 0: 

1846 return jsonify( 

1847 { 

1848 "status": "success", 

1849 "period": period, 

1850 "overview": { 

1851 "total_cost": 0.0, 

1852 "total_tokens": 0, 

1853 "prompt_tokens": 0, 

1854 "completion_tokens": 0, 

1855 }, 

1856 "top_expensive_research": [], 

1857 "research_count": 0, 

1858 "message": "No token usage data found for this period", 

1859 } 

1860 ) 

1861 

1862 # If we have too many records, limit to recent ones to avoid timeout 

1863 if record_count > 1000: 1863 ↛ anywhereline 1863 didn't jump anywhere: it always raised an exception.

1864 logger.warning( 

1865 f"Large dataset detected ({record_count} records), limiting to recent 1000 for performance" 

1866 ) 

1867 usage_records = ( 

1868 query.order_by(TokenUsage.timestamp.desc()) 

1869 .limit(1000) 

1870 .all() 

1871 ) 

1872 else: 

1873 usage_records = query.all() 

1874 

1875 # Convert to dict format 

1876 usage_data = [] 

1877 for record in usage_records: 

1878 usage_data.append( 

1879 { 

1880 "model_name": record.model_name, 

1881 "provider": getattr( 

1882 record, "provider", None 

1883 ), # Handle both old and new records 

1884 "prompt_tokens": record.prompt_tokens, 

1885 "completion_tokens": record.completion_tokens, 

1886 "research_id": record.research_id, 

1887 "timestamp": record.timestamp, 

1888 } 

1889 ) 

1890 

1891 from ...metrics.pricing.cost_calculator import CostCalculator 

1892 

1893 # Use synchronous calculation 

1894 calculator = CostCalculator() 

1895 

1896 # Calculate overall costs 

1897 costs = [] 

1898 for record in usage_data: 

1899 cost_data = calculator.calculate_cost_sync( 

1900 record["model_name"], 

1901 record["prompt_tokens"], 

1902 record["completion_tokens"], 

1903 ) 

1904 costs.append({**record, **cost_data}) 

1905 

1906 total_cost = sum(c["total_cost"] for c in costs) 

1907 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data) 

1908 total_completion_tokens = sum( 

1909 r["completion_tokens"] for r in usage_data 

1910 ) 

1911 

1912 cost_summary = { 

1913 "total_cost": round(total_cost, 6), 

1914 "total_tokens": total_prompt_tokens + total_completion_tokens, 

1915 "prompt_tokens": total_prompt_tokens, 

1916 "completion_tokens": total_completion_tokens, 

1917 } 

1918 

1919 # Group by research_id for per-research costs 

1920 research_costs = {} 

1921 for record in usage_data: 

1922 rid = record["research_id"] 

1923 if rid not in research_costs: 

1924 research_costs[rid] = [] 

1925 research_costs[rid].append(record) 

1926 

1927 # Calculate cost per research 

1928 research_summaries = {} 

1929 for rid, records in research_costs.items(): 

1930 research_total = 0 

1931 for record in records: 

1932 cost_data = calculator.calculate_cost_sync( 

1933 record["model_name"], 

1934 record["prompt_tokens"], 

1935 record["completion_tokens"], 

1936 ) 

1937 research_total += cost_data["total_cost"] 

1938 research_summaries[rid] = { 

1939 "total_cost": round(research_total, 6) 

1940 } 

1941 

1942 # Top expensive research sessions 

1943 top_expensive = sorted( 

1944 [ 

1945 (rid, data["total_cost"]) 

1946 for rid, data in research_summaries.items() 

1947 ], 

1948 key=lambda x: x[1], 

1949 reverse=True, 

1950 )[:10] 

1951 

1952 return jsonify( 

1953 { 

1954 "status": "success", 

1955 "period": period, 

1956 "overview": cost_summary, 

1957 "top_expensive_research": [ 

1958 {"research_id": rid, "total_cost": cost} 

1959 for rid, cost in top_expensive 

1960 ], 

1961 "research_count": len(research_summaries), 

1962 } 

1963 ) 

1964 

1965 except Exception: 

1966 logger.exception("Error getting cost analytics") 

1967 # Return a more graceful error response 

1968 return ( 

1969 jsonify( 

1970 { 

1971 "status": "success", 

1972 "period": period, 

1973 "overview": { 

1974 "total_cost": 0.0, 

1975 "total_tokens": 0, 

1976 "prompt_tokens": 0, 

1977 "completion_tokens": 0, 

1978 }, 

1979 "top_expensive_research": [], 

1980 "research_count": 0, 

1981 "error": "Cost analytics temporarily unavailable", 

1982 } 

1983 ), 

1984 200, 

1985 ) # Return 200 to avoid breaking the UI 

1986 

1987 

1988@metrics_bp.route("/links") 

1989@login_required 

1990def link_analytics(): 

1991 """Display link analytics page.""" 

1992 return render_template_with_defaults("pages/link_analytics.html") 

1993 

1994 

1995@metrics_bp.route("/api/link-analytics") 

1996@login_required 

1997def api_link_analytics(): 

1998 """Get link analytics data.""" 

1999 try: 

2000 username = flask_session.get("username") 

2001 if not username: 2001 ↛ 2002line 2001 didn't jump to line 2002 because the condition on line 2001 was never true

2002 return jsonify( 

2003 {"status": "error", "message": "No user session found"} 

2004 ), 401 

2005 

2006 period = request.args.get("period", "30d") 

2007 

2008 # Get link analytics data 

2009 link_data = get_link_analytics(period, username) 

2010 

2011 return jsonify( 

2012 { 

2013 "status": "success", 

2014 "data": link_data["link_analytics"], 

2015 "period": period, 

2016 } 

2017 ) 

2018 

2019 except Exception: 

2020 logger.exception("Error getting link analytics") 

2021 return ( 

2022 jsonify( 

2023 { 

2024 "status": "error", 

2025 "message": "An internal error occurred. Please try again later.", 

2026 } 

2027 ), 

2028 500, 

2029 ) 

2030 

2031 

2032@metrics_bp.route("/api/domain-classifications", methods=["GET"]) 

2033@login_required 

2034def api_get_domain_classifications(): 

2035 """Get all domain classifications.""" 

2036 try: 

2037 username = flask_session.get("username") 

2038 if not username: 2038 ↛ 2039line 2038 didn't jump to line 2039 because the condition on line 2038 was never true

2039 return jsonify( 

2040 {"status": "error", "message": "No user session found"} 

2041 ), 401 

2042 

2043 classifier = DomainClassifier(username) 

2044 classifications = classifier.get_all_classifications() 

2045 

2046 return jsonify( 

2047 { 

2048 "status": "success", 

2049 "classifications": [c.to_dict() for c in classifications], 

2050 "total": len(classifications), 

2051 } 

2052 ) 

2053 

2054 except Exception: 

2055 logger.exception("Error getting domain classifications") 

2056 return jsonify( 

2057 {"status": "error", "message": "Failed to retrieve classifications"} 

2058 ), 500 

2059 

2060 

2061@metrics_bp.route("/api/domain-classifications/summary", methods=["GET"]) 

2062@login_required 

2063def api_get_classifications_summary(): 

2064 """Get summary of domain classifications by category.""" 

2065 try: 

2066 username = flask_session.get("username") 

2067 if not username: 2067 ↛ 2068line 2067 didn't jump to line 2068 because the condition on line 2067 was never true

2068 return jsonify( 

2069 {"status": "error", "message": "No user session found"} 

2070 ), 401 

2071 

2072 classifier = DomainClassifier(username) 

2073 summary = classifier.get_categories_summary() 

2074 

2075 return jsonify({"status": "success", "summary": summary}) 

2076 

2077 except Exception: 

2078 logger.exception("Error getting classifications summary") 

2079 return jsonify( 

2080 {"status": "error", "message": "Failed to retrieve summary"} 

2081 ), 500 

2082 

2083 

2084@metrics_bp.route("/api/domain-classifications/classify", methods=["POST"]) 

2085@login_required 

2086def api_classify_domains(): 

2087 """Trigger classification of a specific domain or batch classification.""" 

2088 try: 

2089 username = flask_session.get("username") 

2090 if not username: 2090 ↛ 2091line 2090 didn't jump to line 2091 because the condition on line 2090 was never true

2091 return jsonify( 

2092 {"status": "error", "message": "No user session found"} 

2093 ), 401 

2094 

2095 data = request.get_json() or {} 

2096 domain = data.get("domain") 

2097 force_update = data.get("force_update", False) 

2098 batch_mode = data.get("batch", False) 

2099 

2100 # Get settings snapshot for LLM configuration 

2101 from ...settings.manager import SettingsManager 

2102 from ...database.session_context import get_user_db_session 

2103 

2104 with get_user_db_session(username) as db_session: 

2105 settings_manager = SettingsManager(db_session=db_session) 

2106 settings_snapshot = settings_manager.get_all_settings() 

2107 

2108 classifier = DomainClassifier( 

2109 username, settings_snapshot=settings_snapshot 

2110 ) 

2111 

2112 if domain and not batch_mode: 2112 ↛ 2114line 2112 didn't jump to line 2114 because the condition on line 2112 was never true

2113 # Classify single domain 

2114 logger.info(f"Classifying single domain: {domain}") 

2115 classification = classifier.classify_domain(domain, force_update) 

2116 if classification: 

2117 return jsonify( 

2118 { 

2119 "status": "success", 

2120 "classification": classification.to_dict(), 

2121 } 

2122 ) 

2123 else: 

2124 return jsonify( 

2125 { 

2126 "status": "error", 

2127 "message": f"Failed to classify domain: {domain}", 

2128 } 

2129 ), 400 

2130 elif batch_mode: 2130 ↛ 2133line 2130 didn't jump to line 2133 because the condition on line 2130 was never true

2131 # Batch classification - this should really be a background task 

2132 # For now, we'll just return immediately and let the frontend poll 

2133 logger.info("Starting batch classification of all domains") 

2134 results = classifier.classify_all_domains(force_update) 

2135 

2136 return jsonify({"status": "success", "results": results}) 

2137 else: 

2138 return jsonify( 

2139 { 

2140 "status": "error", 

2141 "message": "Must provide either 'domain' or set 'batch': true", 

2142 } 

2143 ), 400 

2144 

2145 except Exception: 

2146 logger.exception("Error classifying domains") 

2147 return jsonify( 

2148 {"status": "error", "message": "Failed to classify domains"} 

2149 ), 500 

2150 

2151 

2152@metrics_bp.route("/api/domain-classifications/progress", methods=["GET"]) 

2153@login_required 

2154def api_classification_progress(): 

2155 """Get progress of domain classification task.""" 

2156 try: 

2157 username = flask_session.get("username") 

2158 if not username: 2158 ↛ 2159line 2158 didn't jump to line 2159 because the condition on line 2158 was never true

2159 return jsonify( 

2160 {"status": "error", "message": "No user session found"} 

2161 ), 401 

2162 

2163 # Get counts of classified vs unclassified domains 

2164 with get_user_db_session(username) as session: 

2165 # Count total unique domains 

2166 from urllib.parse import urlparse 

2167 

2168 resources = session.query(ResearchResource.url).distinct().all() 

2169 domains = set() 

2170 all_domains = [] 

2171 

2172 for (url,) in resources: 2172 ↛ 2173line 2172 didn't jump to line 2173 because the loop on line 2172 never started

2173 if url: 

2174 try: 

2175 parsed = urlparse(url) 

2176 domain = parsed.netloc.lower() 

2177 if domain.startswith("www."): 

2178 domain = domain[4:] 

2179 if domain: 

2180 domains.add(domain) 

2181 except (ValueError, AttributeError): 

2182 # urlparse can raise ValueError for malformed URLs 

2183 continue 

2184 

2185 all_domains = sorted(list(domains)) 

2186 total_domains = len(domains) 

2187 

2188 # Count classified domains 

2189 classified_count = session.query(DomainClassification).count() 

2190 

2191 return jsonify( 

2192 { 

2193 "status": "success", 

2194 "progress": { 

2195 "total_domains": total_domains, 

2196 "classified": classified_count, 

2197 "unclassified": total_domains - classified_count, 

2198 "percentage": round( 

2199 (classified_count / total_domains * 100) 

2200 if total_domains > 0 

2201 else 0, 

2202 1, 

2203 ), 

2204 "all_domains": all_domains, # Return all domains for classification 

2205 }, 

2206 } 

2207 ) 

2208 

2209 except Exception: 

2210 logger.exception("Error getting classification progress") 

2211 return jsonify( 

2212 {"status": "error", "message": "Failed to retrieve progress"} 

2213 ), 500