Coverage for src/local_deep_research/web/routes/metrics

1"""Routes for metrics dashboard."""

3from datetime import datetime, timedelta, UTC

5from flask import Blueprint, jsonify, request, session as flask_session

6from loguru import logger

7from sqlalchemy import case, func

9from ...database.models import (

10 RateLimitAttempt,

11 RateLimitEstimate,

12 Research,

13 ResearchRating,

14 ResearchResource,

15 ResearchStrategy,

16 TokenUsage,

17)

18from ...domain_classifier import DomainClassifier, DomainClassification

19from ...database.session_context import get_user_db_session

20from ...metrics import TokenCounter

21from ...metrics.query_utils import get_time_filter_condition

22from ...metrics.search_tracker import get_search_tracker

23from ...web_search_engines.rate_limiting import get_tracker

24from ..auth.decorators import login_required

25from ..utils.templates import render_template_with_defaults

27# Create a Blueprint for metrics

28metrics_bp = Blueprint("metrics", __name__, url_prefix="/metrics")

31def get_rating_analytics(period="30d", research_mode="all", username=None):

32 """Get rating analytics for the specified period and research mode."""

33 try:

34 if not username: 34 ↛ 35line 34 didn't jump to line 35 because the condition on line 34 was never true

35 username = flask_session.get("username")

37 if not username: 37 ↛ 38line 37 didn't jump to line 38 because the condition on line 37 was never true

38 return {

39 "rating_analytics": {

40 "avg_rating": None,

41 "total_ratings": 0,

42 "rating_distribution": {},

43 "satisfaction_stats": {

44 "very_satisfied": 0,

45 "satisfied": 0,

46 "neutral": 0,

47 "dissatisfied": 0,

48 "very_dissatisfied": 0,

49 },

50 "error": "No user session",

51 }

52 }

54 # Calculate date range

55 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None}

56 days = days_map.get(period, 30)

58 with get_user_db_session(username) as session:

59 query = session.query(ResearchRating)

61 # Apply time filter

62 if days: 62 ↛ 67line 62 didn't jump to line 67 because the condition on line 62 was always true

63 cutoff_date = datetime.now(UTC) - timedelta(days=days)

64 query = query.filter(ResearchRating.created_at >= cutoff_date)

66 # Get all ratings

67 ratings = query.all()

69 if not ratings: 69 ↛ 86line 69 didn't jump to line 86 because the condition on line 69 was always true

70 return {

71 "rating_analytics": {

72 "avg_rating": None,

73 "total_ratings": 0,

74 "rating_distribution": {},

75 "satisfaction_stats": {

76 "very_satisfied": 0,

77 "satisfied": 0,

78 "neutral": 0,

79 "dissatisfied": 0,

80 "very_dissatisfied": 0,

81 },

82 }

83 }

85 # Calculate statistics

86 rating_values = [r.rating for r in ratings]

87 avg_rating = sum(rating_values) / len(rating_values)

89 # Rating distribution

90 rating_counts = {}

91 for i in range(1, 6):

92 rating_counts[str(i)] = rating_values.count(i)

94 # Satisfaction categories

95 satisfaction_stats = {

96 "very_satisfied": rating_values.count(5),

97 "satisfied": rating_values.count(4),

98 "neutral": rating_values.count(3),

99 "dissatisfied": rating_values.count(2),

100 "very_dissatisfied": rating_values.count(1),

101 }

102

103 return {

104 "rating_analytics": {

105 "avg_rating": round(avg_rating, 1),

106 "total_ratings": len(ratings),

107 "rating_distribution": rating_counts,

108 "satisfaction_stats": satisfaction_stats,

109 }

110 }

111

112 except Exception:

113 logger.exception("Error getting rating analytics")

114 return {

115 "rating_analytics": {

116 "avg_rating": None,

117 "total_ratings": 0,

118 "rating_distribution": {},

119 "satisfaction_stats": {

120 "very_satisfied": 0,

121 "satisfied": 0,

122 "neutral": 0,

123 "dissatisfied": 0,

124 "very_dissatisfied": 0,

125 },

126 }

127 }

128

129

130def get_link_analytics(period="30d", username=None):

131 """Get link analytics from research resources."""

132 try:

133 if not username:

134 username = flask_session.get("username")

135

136 if not username: 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true

137 return {

138 "link_analytics": {

139 "top_domains": [],

140 "total_unique_domains": 0,

141 "avg_links_per_research": 0,

142 "domain_distribution": {},

143 "source_type_analysis": {},

144 "academic_vs_general": {},

145 "total_links": 0,

146 "error": "No user session",

147 }

148 }

149

150 # Calculate date range

151 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None}

152 days = days_map.get(period, 30)

153

154 with get_user_db_session(username) as session:

155 # Base query

156 query = session.query(ResearchResource)

157

158 # Apply time filter

159 if days: 159 ↛ 166line 159 didn't jump to line 166 because the condition on line 159 was always true

160 cutoff_date = datetime.now(UTC) - timedelta(days=days)

161 query = query.filter(

162 ResearchResource.created_at >= cutoff_date.isoformat()

163 )

164

165 # Get all resources

166 resources = query.all()

167

168 if not resources: 168 ↛ 169line 168 didn't jump to line 169 because the condition on line 168 was never true

169 return {

170 "link_analytics": {

171 "top_domains": [],

172 "total_unique_domains": 0,

173 "avg_links_per_research": 0,

174 "domain_distribution": {},

175 "source_type_analysis": {},

176 "academic_vs_general": {},

177 "total_links": 0,

178 }

179 }

180

181 # Extract domains from URLs

182 from urllib.parse import urlparse

183 from ...domain_classifier.classifier import DomainClassifier

184

185 domain_counts = {}

186 domain_researches = {} # Track which researches used each domain

187 source_types = {}

188 temporal_data = {} # Track links over time

189 domain_connections = {} # Track domain co-occurrences

190

191 # Generic category counting from LLM classifications

192 category_counts = {}

193

194 # Initialize domain classifier for LLM-based categorization

195 domain_classifier = DomainClassifier(username=username)

196 quality_metrics = {

197 "with_title": 0,

198 "with_preview": 0,

199 "with_both": 0,

200 "total": 0,

201 }

202

203 for resource in resources:

204 if resource.url: 204 ↛ 203line 204 didn't jump to line 203 because the condition on line 204 was always true

205 try:

206 parsed = urlparse(resource.url)

207 domain = parsed.netloc.lower()

208 # Remove www. prefix

209 if domain.startswith("www."): 209 ↛ 210line 209 didn't jump to line 210 because the condition on line 209 was never true

210 domain = domain[4:]

211

212 # Count domains

213 domain_counts[domain] = domain_counts.get(domain, 0) + 1

214

215 # Track research IDs for each domain

216 if domain not in domain_researches: 216 ↛ 218line 216 didn't jump to line 218 because the condition on line 216 was always true

217 domain_researches[domain] = set()

218 domain_researches[domain].add(resource.research_id)

219

220 # Track temporal data (daily counts)

221 if resource.created_at: 221 ↛ 230line 221 didn't jump to line 230 because the condition on line 221 was always true

222 date_str = resource.created_at[

223 :10

224 ] # Extract YYYY-MM-DD

225 temporal_data[date_str] = (

226 temporal_data.get(date_str, 0) + 1

227 )

228

229 # Count categories from LLM classification

230 classification = domain_classifier.get_classification(

231 domain

232 )

233 if classification: 233 ↛ 234line 233 didn't jump to line 234 because the condition on line 233 was never true

234 category = classification.category

235 category_counts[category] = (

236 category_counts.get(category, 0) + 1

237 )

238 else:

239 category_counts["Unclassified"] = (

240 category_counts.get("Unclassified", 0) + 1

241 )

242

243 # Track source type from metadata if available

244 if resource.source_type: 244 ↛ 250line 244 didn't jump to line 250 because the condition on line 244 was always true

245 source_types[resource.source_type] = (

246 source_types.get(resource.source_type, 0) + 1

247 )

248

249 # Track quality metrics

250 quality_metrics["total"] += 1

251 if resource.title: 251 ↛ 253line 251 didn't jump to line 253 because the condition on line 251 was always true

252 quality_metrics["with_title"] += 1

253 if resource.content_preview: 253 ↛ 255line 253 didn't jump to line 255 because the condition on line 253 was always true

254 quality_metrics["with_preview"] += 1

255 if resource.title and resource.content_preview: 255 ↛ 259line 255 didn't jump to line 259 because the condition on line 255 was always true

256 quality_metrics["with_both"] += 1

257

258 # Track domain co-occurrences for network visualization

259 research_id = resource.research_id

260 if research_id not in domain_connections:

261 domain_connections[research_id] = []

262 domain_connections[research_id].append(domain)

263

264 except Exception as e:

265 logger.warning(f"Error parsing URL {resource.url}: {e}")

266

267 # Sort domains by count and get top 10

268 sorted_domains = sorted(

269 domain_counts.items(), key=lambda x: x[1], reverse=True

270 )

271 top_10_domains = sorted_domains[:10]

272

273 # Calculate domain distribution (top domains vs others)

274 top_10_count = sum(count for _, count in top_10_domains)

275 others_count = len(resources) - top_10_count

276

277 # Get unique research IDs to calculate average

278 unique_research_ids = set(r.research_id for r in resources)

279 avg_links = (

280 len(resources) / len(unique_research_ids)

281 if unique_research_ids

282 else 0

283 )

284

285 # Prepare temporal trend data (sorted by date)

286 temporal_trend = sorted(

287 [

288 {"date": date, "count": count}

289 for date, count in temporal_data.items()

290 ],

291 key=lambda x: x["date"],

292 )

293

294 # Get most recent research for each top domain and classifications

295 domain_recent_research = {}

296 domain_classifications = {}

297 with get_user_db_session(username) as session:

298 from ...database.models import Research

299

300 # Get classifications for all domains

301 all_classifications = session.query(DomainClassification).all()

302 for classification in all_classifications:

303 domain_classifications[classification.domain] = {

304 "category": classification.category,

305 "subcategory": classification.subcategory,

306 "confidence": classification.confidence,

307 }

308

309 for domain, _ in top_10_domains:

310 if domain in domain_researches: 310 ↛ 309line 310 didn't jump to line 309 because the condition on line 310 was always true

311 research_ids = list(domain_researches[domain])[

312 :3

313 ] # Get up to 3 recent researches

314 researches = (

315 session.query(Research)

316 .filter(Research.id.in_(research_ids))

317 .all()

318 )

319 domain_recent_research[domain] = [

320 {

321 "id": r.id,

322 "query": r.query[:50]

323 if r.query

324 else "Research",

325 }

326 for r in researches

327 ]

328

329 return {

330 "link_analytics": {

331 "top_domains": [

332 {

333 "domain": domain,

334 "count": count,

335 "percentage": round(

336 count / len(resources) * 100, 1

337 ),

338 "research_count": len(

339 domain_researches.get(domain, set())

340 ),

341 "recent_researches": domain_recent_research.get(

342 domain, []

343 ),

344 "classification": domain_classifications.get(

345 domain, None

346 ),

347 }

348 for domain, count in top_10_domains

349 ],

350 "total_unique_domains": len(domain_counts),

351 "avg_links_per_research": round(avg_links, 1),

352 "domain_distribution": {

353 "top_10": top_10_count,

354 "others": others_count,

355 },

356 "source_type_analysis": source_types,

357 "category_distribution": category_counts,

358 # Generic pie chart data - use whatever LLM classifier outputs

359 "domain_categories": category_counts,

360 "total_links": len(resources),

361 "total_researches": len(unique_research_ids),

362 "temporal_trend": temporal_trend,

363 "domain_metrics": {

364 domain: {

365 "usage_count": count,

366 "usage_percentage": round(

367 count / len(resources) * 100, 1

368 ),

369 "research_diversity": len(

370 domain_researches.get(domain, set())

371 ),

372 "frequency_rank": rank + 1,

373 }

374 for rank, (domain, count) in enumerate(top_10_domains)

375 },

376 }

377 }

378

379 except Exception:

380 logger.exception("Error getting link analytics")

381 return {

382 "link_analytics": {

383 "top_domains": [],

384 "total_unique_domains": 0,

385 "avg_links_per_research": 0,

386 "domain_distribution": {},

387 "source_type_analysis": {},

388 "academic_vs_general": {},

389 "total_links": 0,

390 "error": "Failed to retrieve link analytics",

391 }

392 }

393

394

395def get_available_strategies():

396 """Get list of all available search strategies from the search system."""

397 # This list comes from the AdvancedSearchSystem.__init__ method

398 strategies = [

399 {"name": "standard", "description": "Basic iterative search strategy"},

400 {

401 "name": "iterdrag",

402 "description": "Iterative Dense Retrieval Augmented Generation",

403 },

404 {

405 "name": "source-based",

406 "description": "Focuses on finding and extracting from sources",

407 },

408 {

409 "name": "parallel",

410 "description": "Runs multiple search queries in parallel",

411 },

412 {"name": "rapid", "description": "Quick single-pass search"},

413 {

414 "name": "recursive",

415 "description": "Recursive decomposition of complex queries",

416 },

417 {

418 "name": "iterative",

419 "description": "Loop-based reasoning with persistent knowledge",

420 },

421 {"name": "adaptive", "description": "Adaptive step-by-step reasoning"},

422 {

423 "name": "smart",

424 "description": "Automatically chooses best strategy based on query",

425 },

426 {

427 "name": "browsecomp",

428 "description": "Optimized for BrowseComp-style puzzle queries",

429 },

430 {

431 "name": "evidence",

432 "description": "Enhanced evidence-based verification with improved candidate discovery",

433 },

434 {

435 "name": "constrained",

436 "description": "Progressive constraint-based search that narrows candidates step by step",

437 },

438 {

439 "name": "parallel-constrained",

440 "description": "Parallel constraint-based search with combined constraint execution",

441 },

442 {

443 "name": "early-stop-constrained",

444 "description": "Parallel constraint search with immediate evaluation and early stopping at 99% confidence",

445 },

446 {

447 "name": "smart-query",

448 "description": "Smart query generation strategy",

449 },

450 {

451 "name": "dual-confidence",

452 "description": "Dual confidence scoring with positive/negative/uncertainty",

453 },

454 {

455 "name": "dual-confidence-with-rejection",

456 "description": "Dual confidence with early rejection of poor candidates",

457 },

458 {

459 "name": "concurrent-dual-confidence",

460 "description": "Concurrent search & evaluation with progressive constraint relaxation",

461 },

462 {

463 "name": "modular",

464 "description": "Modular architecture using constraint checking and candidate exploration modules",

465 },

466 {

467 "name": "modular-parallel",

468 "description": "Modular strategy with parallel exploration",

469 },

470 {

471 "name": "focused-iteration",

472 "description": "Focused iteration strategy optimized for accuracy",

473 },

474 {

475 "name": "browsecomp-entity",

476 "description": "Entity-focused search for BrowseComp questions with knowledge graph building",

477 },

478 ]

479 return strategies

480

481

482def get_strategy_analytics(period="30d", username=None):

483 """Get strategy usage analytics for the specified period."""

484 try:

485 if not username: 485 ↛ 486line 485 didn't jump to line 486 because the condition on line 485 was never true

486 username = flask_session.get("username")

487

488 if not username: 488 ↛ 489line 488 didn't jump to line 489 because the condition on line 488 was never true

489 return {

490 "strategy_analytics": {

491 "total_research_with_strategy": 0,

492 "total_research": 0,

493 "most_popular_strategy": None,

494 "strategy_usage": [],

495 "strategy_distribution": {},

496 "available_strategies": get_available_strategies(),

497 "error": "No user session",

498 }

499 }

500

501 # Calculate date range

502 days_map = {"7d": 7, "30d": 30, "90d": 90, "365d": 365, "all": None}

503 days = days_map.get(period, 30)

504

505 with get_user_db_session(username) as session:

506 # Check if we have any ResearchStrategy records

507 strategy_count = session.query(ResearchStrategy).count()

508

509 if strategy_count == 0: 509 ↛ 524line 509 didn't jump to line 524 because the condition on line 509 was always true

510 logger.warning("No research strategies found in database")

511 return {

512 "strategy_analytics": {

513 "total_research_with_strategy": 0,

514 "total_research": 0,

515 "most_popular_strategy": None,

516 "strategy_usage": [],

517 "strategy_distribution": {},

518 "available_strategies": get_available_strategies(),

519 "message": "Strategy tracking not yet available - run a research to start tracking",

520 }

521 }

522

523 # Base query for strategy usage (no JOIN needed since we just want strategy counts)

524 query = session.query(

525 ResearchStrategy.strategy_name,

526 func.count(ResearchStrategy.id).label("usage_count"),

527 )

528

529 # Apply time filter if specified

530 if days:

531 cutoff_date = datetime.now(UTC) - timedelta(days=days)

532 query = query.filter(ResearchStrategy.created_at >= cutoff_date)

533

534 # Group by strategy and order by usage

535 strategy_results = (

536 query.group_by(ResearchStrategy.strategy_name)

537 .order_by(func.count(ResearchStrategy.id).desc())

538 .all()

539 )

540

541 # Get total strategy count for percentage calculation

542 total_query = session.query(ResearchStrategy)

543 if days:

544 total_query = total_query.filter(

545 ResearchStrategy.created_at >= cutoff_date

546 )

547 total_research = total_query.count()

548

549 # Format strategy data

550 strategy_usage = []

551 strategy_distribution = {}

552

553 for strategy_name, usage_count in strategy_results:

554 percentage = (

555 (usage_count / total_research * 100)

556 if total_research > 0

557 else 0

558 )

559 strategy_usage.append(

560 {

561 "strategy": strategy_name,

562 "count": usage_count,

563 "percentage": round(percentage, 1),

564 }

565 )

566 strategy_distribution[strategy_name] = usage_count

567

568 # Find most popular strategy

569 most_popular = (

570 strategy_usage[0]["strategy"] if strategy_usage else None

571 )

572

573 return {

574 "strategy_analytics": {

575 "total_research_with_strategy": sum(

576 item["count"] for item in strategy_usage

577 ),

578 "total_research": total_research,

579 "most_popular_strategy": most_popular,

580 "strategy_usage": strategy_usage,

581 "strategy_distribution": strategy_distribution,

582 "available_strategies": get_available_strategies(),

583 }

584 }

585

586 except Exception:

587 logger.exception("Error getting strategy analytics")

588 return {

589 "strategy_analytics": {

590 "total_research_with_strategy": 0,

591 "total_research": 0,

592 "most_popular_strategy": None,

593 "strategy_usage": [],

594 "strategy_distribution": {},

595 "available_strategies": get_available_strategies(),

596 "error": "Failed to retrieve strategy data",

597 }

598 }

599

600

601def get_rate_limiting_analytics(period="30d", username=None):

602 """Get rate limiting analytics for the specified period."""

603 try:

604 if not username: 604 ↛ 605line 604 didn't jump to line 605 because the condition on line 604 was never true

605 username = flask_session.get("username")

606

607 if not username: 607 ↛ 608line 607 didn't jump to line 608 because the condition on line 607 was never true

608 return {

609 "rate_limiting": {

610 "total_attempts": 0,

611 "successful_attempts": 0,

612 "failed_attempts": 0,

613 "success_rate": 0,

614 "rate_limit_events": 0,

615 "avg_wait_time": 0,

616 "avg_successful_wait": 0,

617 "tracked_engines": 0,

618 "engine_stats": [],

619 "total_engines_tracked": 0,

620 "healthy_engines": 0,

621 "degraded_engines": 0,

622 "poor_engines": 0,

623 "error": "No user session",

624 }

625 }

626

627 # Calculate date range for timestamp filtering

628 import time

629

630 if period == "7d":

631 cutoff_time = time.time() - (7 * 24 * 3600)

632 elif period == "30d": 632 ↛ 634line 632 didn't jump to line 634 because the condition on line 632 was always true

633 cutoff_time = time.time() - (30 * 24 * 3600)

634 elif period == "3m":

635 cutoff_time = time.time() - (90 * 24 * 3600)

636 elif period == "1y":

637 cutoff_time = time.time() - (365 * 24 * 3600)

638 else: # all

639 cutoff_time = 0

640

641 with get_user_db_session(username) as session:

642 # Get rate limit attempts

643 rate_limit_query = session.query(RateLimitAttempt)

644

645 # Apply time filter

646 if cutoff_time > 0: 646 ↛ 652line 646 didn't jump to line 652 because the condition on line 646 was always true

647 rate_limit_query = rate_limit_query.filter(

648 RateLimitAttempt.timestamp >= cutoff_time

649 )

650

651 # Get rate limit statistics

652 total_attempts = rate_limit_query.count()

653 successful_attempts = rate_limit_query.filter(

654 RateLimitAttempt.success

655 ).count()

656 failed_attempts = total_attempts - successful_attempts

657

658 # Count rate limiting events (failures with RateLimitError)

659 rate_limit_events = rate_limit_query.filter(

660 ~RateLimitAttempt.success,

661 RateLimitAttempt.error_type == "RateLimitError",

662 ).count()

663

664 logger.info(

665 f"Rate limit attempts in database: total={total_attempts}, successful={successful_attempts}"

666 )

667

668 # Get all attempts for detailed calculations

669 attempts = rate_limit_query.all()

670

671 # Calculate average wait times

672 if attempts: 672 ↛ 673line 672 didn't jump to line 673 because the condition on line 672 was never true

673 avg_wait_time = sum(a.wait_time for a in attempts) / len(

674 attempts

675 )

676 successful_wait_times = [

677 a.wait_time for a in attempts if a.success

678 ]

679 avg_successful_wait = (

680 sum(successful_wait_times) / len(successful_wait_times)

681 if successful_wait_times

682 else 0

683 )

684 else:

685 avg_wait_time = 0

686 avg_successful_wait = 0

687

688 # Get tracked engines - count distinct engine types from attempts

689 tracked_engines_query = session.query(

690 func.count(func.distinct(RateLimitAttempt.engine_type))

691 )

692 if cutoff_time > 0: 692 ↛ 696line 692 didn't jump to line 696 because the condition on line 692 was always true

693 tracked_engines_query = tracked_engines_query.filter(

694 RateLimitAttempt.timestamp >= cutoff_time

695 )

696 tracked_engines = tracked_engines_query.scalar() or 0

697

698 # Get engine-specific stats from attempts

699 engine_stats = []

700

701 # Get distinct engine types from attempts

702 engine_types_query = session.query(

703 RateLimitAttempt.engine_type

704 ).distinct()

705 if cutoff_time > 0: 705 ↛ 709line 705 didn't jump to line 709 because the condition on line 705 was always true

706 engine_types_query = engine_types_query.filter(

707 RateLimitAttempt.timestamp >= cutoff_time

708 )

709 engine_types = [row.engine_type for row in engine_types_query.all()]

710

711 # Preload estimates for relevant engines to avoid N+1 queries

712 estimates_by_engine = {}

713 if engine_types: 713 ↛ 714line 713 didn't jump to line 714 because the condition on line 713 was never true

714 all_estimates = (

715 session.query(RateLimitEstimate)

716 .filter(RateLimitEstimate.engine_type.in_(engine_types))

717 .all()

718 )

719 estimates_by_engine = {e.engine_type: e for e in all_estimates}

720

721 for engine_type in engine_types: 721 ↛ 722line 721 didn't jump to line 722 because the loop on line 721 never started

722 engine_attempts_list = [

723 a for a in attempts if a.engine_type == engine_type

724 ]

725 engine_attempts = len(engine_attempts_list)

726 engine_success = len(

727 [a for a in engine_attempts_list if a.success]

728 )

729

730 # Get estimate from preloaded dict

731 estimate = estimates_by_engine.get(engine_type)

732

733 # Calculate recent success rate

734 recent_success_rate = (

735 (engine_success / engine_attempts * 100)

736 if engine_attempts > 0

737 else 0

738 )

739

740 # Determine status based on success rate

741 if estimate:

742 status = (

743 "healthy"

744 if estimate.success_rate > 0.8

745 else "degraded"

746 if estimate.success_rate > 0.5

747 else "poor"

748 )

749 else:

750 status = (

751 "healthy"

752 if recent_success_rate > 80

753 else "degraded"

754 if recent_success_rate > 50

755 else "poor"

756 )

757

758 engine_stat = {

759 "engine": engine_type,

760 "base_wait": estimate.base_wait_seconds

761 if estimate

762 else 0.0,

763 "base_wait_seconds": round(

764 estimate.base_wait_seconds if estimate else 0.0, 2

765 ),

766 "min_wait_seconds": round(

767 estimate.min_wait_seconds if estimate else 0.0, 2

768 ),

769 "max_wait_seconds": round(

770 estimate.max_wait_seconds if estimate else 0.0, 2

771 ),

772 "success_rate": round(estimate.success_rate * 100, 1)

773 if estimate

774 else recent_success_rate,

775 "total_attempts": estimate.total_attempts

776 if estimate

777 else engine_attempts,

778 "recent_attempts": engine_attempts,

779 "recent_success_rate": round(recent_success_rate, 1),

780 "attempts": engine_attempts,

781 "status": status,

782 }

783

784 if estimate:

785 from datetime import datetime

786

787 engine_stat["last_updated"] = datetime.fromtimestamp(

788 estimate.last_updated, UTC

789 ).isoformat() # ISO format already includes timezone

790 else:

791 engine_stat["last_updated"] = "Never"

792

793 engine_stats.append(engine_stat)

794

795 logger.info(

796 f"Tracked engines: {tracked_engines}, engine_stats: {engine_stats}"

797 )

798

799 result = {

800 "rate_limiting": {

801 "total_attempts": total_attempts,

802 "successful_attempts": successful_attempts,

803 "failed_attempts": failed_attempts,

804 "success_rate": (successful_attempts / total_attempts * 100)

805 if total_attempts > 0

806 else 0,

807 "rate_limit_events": rate_limit_events,

808 "avg_wait_time": round(float(avg_wait_time), 2),

809 "avg_successful_wait": round(float(avg_successful_wait), 2),

810 "tracked_engines": tracked_engines,

811 "engine_stats": engine_stats,

812 "total_engines_tracked": tracked_engines,

813 "healthy_engines": len(

814 [s for s in engine_stats if s["status"] == "healthy"]

815 ),

816 "degraded_engines": len(

817 [s for s in engine_stats if s["status"] == "degraded"]

818 ),

819 "poor_engines": len(

820 [s for s in engine_stats if s["status"] == "poor"]

821 ),

822 }

823 }

824

825 logger.info(

826 f"DEBUG: Returning rate_limiting_analytics result: {result}"

827 )

828 return result

829

830 except Exception:

831 logger.exception("Error getting rate limiting analytics")

832 return {

833 "rate_limiting": {

834 "total_attempts": 0,

835 "successful_attempts": 0,

836 "failed_attempts": 0,

837 "success_rate": 0,

838 "rate_limit_events": 0,

839 "avg_wait_time": 0,

840 "avg_successful_wait": 0,

841 "tracked_engines": 0,

842 "engine_stats": [],

843 "total_engines_tracked": 0,

844 "healthy_engines": 0,

845 "degraded_engines": 0,

846 "poor_engines": 0,

847 "error": "An internal error occurred while processing the request.",

848 }

849 }

850

851

852@metrics_bp.route("/")

853@login_required

854def metrics_dashboard():

855 """Render the metrics dashboard page."""

856 return render_template_with_defaults("pages/metrics.html")

857

858

859@metrics_bp.route("/context-overflow")

860@login_required

861def context_overflow_page():

862 """Context overflow analytics page."""

863 return render_template_with_defaults("pages/context_overflow.html")

864

865

866@metrics_bp.route("/api/metrics")

867@login_required

868def api_metrics():

869 """Get overall metrics data."""

870 logger.debug("api_metrics endpoint called")

871 try:

872 # Get username from session

873 username = flask_session.get("username")

874 if not username: 874 ↛ 875line 874 didn't jump to line 875 because the condition on line 874 was never true

875 return jsonify(

876 {"status": "error", "message": "No user session found"}

877 ), 401

878

879 # Get time period and research mode from query parameters

880 period = request.args.get("period", "30d")

881 research_mode = request.args.get("mode", "all")

882

883 token_counter = TokenCounter()

884 search_tracker = get_search_tracker()

885

886 # Get both token and search metrics

887 token_metrics = token_counter.get_overall_metrics(

888 period=period, research_mode=research_mode

889 )

890 search_metrics = search_tracker.get_search_metrics(

891 period=period, research_mode=research_mode

892 )

893

894 # Get user satisfaction rating data

895 try:

896 with get_user_db_session(username) as session:

897 # Build base query with time filter

898 ratings_query = session.query(ResearchRating)

899 time_condition = get_time_filter_condition(

900 period, ResearchRating.created_at

901 )

902 if time_condition is not None: 902 ↛ 906line 902 didn't jump to line 906 because the condition on line 902 was always true

903 ratings_query = ratings_query.filter(time_condition)

904

905 # Get average rating

906 avg_rating = ratings_query.with_entities(

907 func.avg(ResearchRating.rating).label("avg_rating")

908 ).scalar()

909

910 # Get total rating count

911 total_ratings = ratings_query.count()

912

913 user_satisfaction = {

914 "avg_rating": round(avg_rating, 1) if avg_rating else None,

915 "total_ratings": total_ratings,

916 }

917 except Exception as e:

918 logger.warning(f"Error getting user satisfaction data: {e}")

919 user_satisfaction = {"avg_rating": None, "total_ratings": 0}

920

921 # Get strategy analytics

922 strategy_data = get_strategy_analytics(period, username)

923 logger.debug(f"strategy_data keys: {list(strategy_data.keys())}")

924

925 # Get rate limiting analytics

926 rate_limiting_data = get_rate_limiting_analytics(period, username)

927 logger.debug(f"rate_limiting_data: {rate_limiting_data}")

928 logger.debug(

929 f"rate_limiting_data keys: {list(rate_limiting_data.keys())}"

930 )

931

932 # Combine metrics

933 combined_metrics = {

934 **token_metrics,

935 **search_metrics,

936 **strategy_data,

937 **rate_limiting_data,

938 "user_satisfaction": user_satisfaction,

939 }

940

941 logger.debug(f"combined_metrics keys: {list(combined_metrics.keys())}")

942 logger.debug(

943 f"combined_metrics['rate_limiting']: {combined_metrics.get('rate_limiting', 'NOT FOUND')}"

944 )

945

946 return jsonify(

947 {

948 "status": "success",

949 "metrics": combined_metrics,

950 "period": period,

951 "research_mode": research_mode,

952 }

953 )

954 except Exception:

955 logger.exception("Error getting metrics")

956 return (

957 jsonify(

958 {

959 "status": "error",

960 "message": "An internal error occurred. Please try again later.",

961 }

962 ),

963 500,

964 )

965

966

967@metrics_bp.route("/api/rate-limiting")

968@login_required

969def api_rate_limiting_metrics():

970 """Get detailed rate limiting metrics."""

971 logger.info("DEBUG: api_rate_limiting_metrics endpoint called")

972 try:

973 username = flask_session.get("username")

974 period = request.args.get("period", "30d")

975 rate_limiting_data = get_rate_limiting_analytics(period, username)

976

977 return jsonify(

978 {"status": "success", "data": rate_limiting_data, "period": period}

979 )

980 except Exception:

981 logger.exception("Error getting rate limiting metrics")

982 return jsonify(

983 {

984 "status": "error",

985 "message": "Failed to retrieve rate limiting metrics",

986 }

987 ), 500

988

989

990@metrics_bp.route("/api/rate-limiting/current")

991@login_required

992def api_current_rate_limits():

993 """Get current rate limit estimates for all engines."""

994 try:

995 tracker = get_tracker()

996 stats = tracker.get_stats()

997

998 current_limits = []

999 for stat in stats:

1000 (

1001 engine_type,

1002 base_wait,

1003 min_wait,

1004 max_wait,

1005 last_updated,

1006 total_attempts,

1007 success_rate,

1008 ) = stat

1009 current_limits.append(

1010 {

1011 "engine_type": engine_type,

1012 "base_wait_seconds": round(base_wait, 2),

1013 "min_wait_seconds": round(min_wait, 2),

1014 "max_wait_seconds": round(max_wait, 2),

1015 "success_rate": round(success_rate * 100, 1),

1016 "total_attempts": total_attempts,

1017 "last_updated": datetime.fromtimestamp(

1018 last_updated, UTC

1019 ).isoformat(), # ISO format already includes timezone

1020 "status": "healthy"

1021 if success_rate > 0.8

1022 else "degraded"

1023 if success_rate > 0.5

1024 else "poor",

1025 }

1026 )

1027

1028 return jsonify(

1029 {

1030 "status": "success",

1031 "current_limits": current_limits,

1032 "timestamp": datetime.now(UTC).isoformat(),

1033 }

1034 )

1035 except Exception:

1036 logger.exception("Error getting current rate limits")

1037 return jsonify(

1038 {

1039 "status": "error",

1040 "message": "Failed to retrieve current rate limits",

1041 }

1042 ), 500

1043

1044

1045@metrics_bp.route("/api/metrics/research/<string:research_id>/links")

1046@login_required

1047def api_research_link_metrics(research_id):

1048 """Get link analytics for a specific research."""

1049 try:

1050 username = flask_session.get("username")

1051 if not username: 1051 ↛ 1052line 1051 didn't jump to line 1052 because the condition on line 1051 was never true

1052 return jsonify(

1053 {"status": "error", "message": "No user session found"}

1054 ), 401

1055

1056 with get_user_db_session(username) as session:

1057 # Get all resources for this specific research

1058 resources = (

1059 session.query(ResearchResource)

1060 .filter(ResearchResource.research_id == research_id)

1061 .all()

1062 )

1063

1064 if not resources: 1064 ↛ 1080line 1064 didn't jump to line 1080 because the condition on line 1064 was always true

1065 return jsonify(

1066 {

1067 "status": "success",

1068 "data": {

1069 "total_links": 0,

1070 "unique_domains": 0,

1071 "domains": [],

1072 "category_distribution": {},

1073 "domain_categories": {},

1074 "resources": [],

1075 },

1076 }

1077 )

1078

1079 # Extract domain information

1080 from urllib.parse import urlparse

1081 from ...domain_classifier.classifier import DomainClassifier

1082

1083 domain_counts = {}

1084

1085 # Generic category counting from LLM classifications

1086 category_counts = {}

1087

1088 # Initialize domain classifier for LLM-based categorization

1089 domain_classifier = DomainClassifier(username=username)

1090

1091 for resource in resources:

1092 if resource.url:

1093 try:

1094 parsed = urlparse(resource.url)

1095 domain = parsed.netloc.lower()

1096 if domain.startswith("www."):

1097 domain = domain[4:]

1098

1099 domain_counts[domain] = domain_counts.get(domain, 0) + 1

1100

1101 # Count categories from LLM classification

1102 classification = domain_classifier.get_classification(

1103 domain

1104 )

1105 if classification:

1106 category = classification.category

1107 category_counts[category] = (

1108 category_counts.get(category, 0) + 1

1109 )

1110 else:

1111 category_counts["Unclassified"] = (

1112 category_counts.get("Unclassified", 0) + 1

1113 )

1114 except (AttributeError, KeyError) as e:

1115 logger.debug(f"Error classifying domain {domain}: {e}")

1116

1117 # Sort domains by count

1118 sorted_domains = sorted(

1119 domain_counts.items(), key=lambda x: x[1], reverse=True

1120 )

1121

1122 return jsonify(

1123 {

1124 "status": "success",

1125 "data": {

1126 "total_links": len(resources),

1127 "unique_domains": len(domain_counts),

1128 "domains": [

1129 {

1130 "domain": domain,

1131 "count": count,

1132 "percentage": round(

1133 count / len(resources) * 100, 1

1134 ),

1135 }

1136 for domain, count in sorted_domains[

1137 :20

1138 ] # Top 20 domains

1139 ],

1140 "category_distribution": category_counts,

1141 "domain_categories": category_counts, # Generic categories from LLM

1142 "resources": [

1143 {

1144 "title": r.title or "Untitled",

1145 "url": r.url,

1146 "preview": r.content_preview[:200]

1147 if r.content_preview

1148 else None,

1149 }

1150 for r in resources[:10] # First 10 resources

1151 ],

1152 },

1153 }

1154 )

1155

1156 except Exception:

1157 logger.exception("Error getting research link metrics")

1158 return jsonify(

1159 {"status": "error", "message": "Failed to retrieve link metrics"}

1160 ), 500

1161

1162

1163@metrics_bp.route("/api/metrics/research/<string:research_id>")

1164@login_required

1165def api_research_metrics(research_id):

1166 """Get metrics for a specific research."""

1167 try:

1168 token_counter = TokenCounter()

1169 metrics = token_counter.get_research_metrics(research_id)

1170 return jsonify({"status": "success", "metrics": metrics})

1171 except Exception:

1172 logger.exception("Error getting research metrics")

1173 return (

1174 jsonify(

1175 {

1176 "status": "error",

1177 "message": "An internal error occurred. Please try again later.",

1178 }

1179 ),

1180 500,

1181 )

1182

1183

1184@metrics_bp.route("/api/metrics/research/<string:research_id>/timeline")

1185@login_required

1186def api_research_timeline_metrics(research_id):

1187 """Get timeline metrics for a specific research."""

1188 try:

1189 token_counter = TokenCounter()

1190 timeline_metrics = token_counter.get_research_timeline_metrics(

1191 research_id

1192 )

1193 return jsonify({"status": "success", "metrics": timeline_metrics})

1194 except Exception:

1195 logger.exception("Error getting research timeline metrics")

1196 return (

1197 jsonify(

1198 {

1199 "status": "error",

1200 "message": "An internal error occurred. Please try again later.",

1201 }

1202 ),

1203 500,

1204 )

1205

1206

1207@metrics_bp.route("/api/metrics/research/<string:research_id>/search")

1208@login_required

1209def api_research_search_metrics(research_id):

1210 """Get search metrics for a specific research."""

1211 try:

1212 search_tracker = get_search_tracker()

1213 search_metrics = search_tracker.get_research_search_metrics(research_id)

1214 return jsonify({"status": "success", "metrics": search_metrics})

1215 except Exception:

1216 logger.exception("Error getting research search metrics")

1217 return (

1218 jsonify(

1219 {

1220 "status": "error",

1221 "message": "An internal error occurred. Please try again later.",

1222 }

1223 ),

1224 500,

1225 )

1226

1227

1228@metrics_bp.route("/api/metrics/enhanced")

1229@login_required

1230def api_enhanced_metrics():

1231 """Get enhanced Phase 1 tracking metrics."""

1232 try:

1233 # Get time period and research mode from query parameters

1234 period = request.args.get("period", "30d")

1235 research_mode = request.args.get("mode", "all")

1236 username = flask_session.get("username")

1237

1238 token_counter = TokenCounter()

1239 search_tracker = get_search_tracker()

1240

1241 enhanced_metrics = token_counter.get_enhanced_metrics(

1242 period=period, research_mode=research_mode

1243 )

1244

1245 # Add search time series data for the chart

1246 search_time_series = search_tracker.get_search_time_series(

1247 period=period, research_mode=research_mode

1248 )

1249 enhanced_metrics["search_time_series"] = search_time_series

1250

1251 # Add rating analytics

1252 rating_analytics = get_rating_analytics(period, research_mode, username)

1253 enhanced_metrics.update(rating_analytics)

1254

1255 return jsonify(

1256 {

1257 "status": "success",

1258 "metrics": enhanced_metrics,

1259 "period": period,

1260 "research_mode": research_mode,

1261 }

1262 )

1263 except Exception:

1264 logger.exception("Error getting enhanced metrics")

1265 return (

1266 jsonify(

1267 {

1268 "status": "error",

1269 "message": "An internal error occurred. Please try again later.",

1270 }

1271 ),

1272 500,

1273 )

1274

1275

1276@metrics_bp.route("/api/ratings/<string:research_id>", methods=["GET"])

1277@login_required

1278def api_get_research_rating(research_id):

1279 """Get rating for a specific research session."""

1280 try:

1281 username = flask_session.get("username")

1282 if not username: 1282 ↛ 1283line 1282 didn't jump to line 1283 because the condition on line 1282 was never true

1283 return jsonify(

1284 {"status": "error", "message": "No user session found"}

1285 ), 401

1286

1287 with get_user_db_session(username) as session:

1288 rating = (

1289 session.query(ResearchRating)

1290 .filter_by(research_id=research_id)

1291 .first()

1292 )

1293

1294 if rating:

1295 return jsonify(

1296 {

1297 "status": "success",

1298 "rating": rating.rating,

1299 "created_at": rating.created_at.isoformat(),

1300 "updated_at": rating.updated_at.isoformat(),

1301 }

1302 )

1303 else:

1304 return jsonify({"status": "success", "rating": None})

1305

1306 except Exception:

1307 logger.exception("Error getting research rating")

1308 return (

1309 jsonify(

1310 {

1311 "status": "error",

1312 "message": "An internal error occurred. Please try again later.",

1313 }

1314 ),

1315 500,

1316 )

1317

1318

1319@metrics_bp.route("/api/ratings/<string:research_id>", methods=["POST"])

1320@login_required

1321def api_save_research_rating(research_id):

1322 """Save or update rating for a specific research session."""

1323 try:

1324 username = flask_session.get("username")

1325 if not username: 1325 ↛ 1326line 1325 didn't jump to line 1326 because the condition on line 1325 was never true

1326 return jsonify(

1327 {"status": "error", "message": "No user session found"}

1328 ), 401

1329

1330 data = request.get_json()

1331 rating_value = data.get("rating")

1332

1333 if (

1334 not rating_value

1335 or not isinstance(rating_value, int)

1336 or rating_value < 1

1337 or rating_value > 5

1338 ):

1339 return (

1340 jsonify(

1341 {

1342 "status": "error",

1343 "message": "Rating must be an integer between 1 and 5",

1344 }

1345 ),

1346 400,

1347 )

1348

1349 with get_user_db_session(username) as session:

1350 # Check if rating already exists

1351 existing_rating = (

1352 session.query(ResearchRating)

1353 .filter_by(research_id=research_id)

1354 .first()

1355 )

1356

1357 if existing_rating: 1357 ↛ 1359line 1357 didn't jump to line 1359 because the condition on line 1357 was never true

1358 # Update existing rating

1359 existing_rating.rating = rating_value

1360 existing_rating.updated_at = func.now()

1361 else:

1362 # Create new rating

1363 new_rating = ResearchRating(

1364 research_id=research_id, rating=rating_value

1365 )

1366 session.add(new_rating)

1367

1368 session.commit()

1369

1370 return jsonify(

1371 {

1372 "status": "success",

1373 "message": "Rating saved successfully",

1374 "rating": rating_value,

1375 }

1376 )

1377

1378 except Exception:

1379 logger.exception("Error saving research rating")

1380 return (

1381 jsonify(

1382 {

1383 "status": "error",

1384 "message": "An internal error occurred. Please try again later.",

1385 }

1386 ),

1387 500,

1388 )

1389

1390

1391@metrics_bp.route("/star-reviews")

1392@login_required

1393def star_reviews():

1394 """Display star reviews metrics page."""

1395 return render_template_with_defaults("pages/star_reviews.html")

1396

1397

1398@metrics_bp.route("/costs")

1399@login_required

1400def cost_analytics():

1401 """Display cost analytics page."""

1402 return render_template_with_defaults("pages/cost_analytics.html")

1403

1404

1405@metrics_bp.route("/api/star-reviews")

1406@login_required

1407def api_star_reviews():

1408 """Get star reviews analytics data."""

1409 try:

1410 username = flask_session.get("username")

1411 if not username: 1411 ↛ 1412line 1411 didn't jump to line 1412 because the condition on line 1411 was never true

1412 return jsonify(

1413 {"status": "error", "message": "No user session found"}

1414 ), 401

1415

1416 period = request.args.get("period", "30d")

1417

1418 with get_user_db_session(username) as session:

1419 # Build base query with time filter

1420 base_query = session.query(ResearchRating)

1421 time_condition = get_time_filter_condition(

1422 period, ResearchRating.created_at

1423 )

1424 if time_condition is not None: 1424 ↛ 1428line 1424 didn't jump to line 1428 because the condition on line 1424 was always true

1425 base_query = base_query.filter(time_condition)

1426

1427 # Overall rating statistics

1428 overall_stats = session.query(

1429 func.avg(ResearchRating.rating).label("avg_rating"),

1430 func.count(ResearchRating.rating).label("total_ratings"),

1431 func.sum(case((ResearchRating.rating == 5, 1), else_=0)).label(

1432 "five_star"

1433 ),

1434 func.sum(case((ResearchRating.rating == 4, 1), else_=0)).label(

1435 "four_star"

1436 ),

1437 func.sum(case((ResearchRating.rating == 3, 1), else_=0)).label(

1438 "three_star"

1439 ),

1440 func.sum(case((ResearchRating.rating == 2, 1), else_=0)).label(

1441 "two_star"

1442 ),

1443 func.sum(case((ResearchRating.rating == 1, 1), else_=0)).label(

1444 "one_star"

1445 ),

1446 )

1447

1448 if time_condition is not None: 1448 ↛ 1451line 1448 didn't jump to line 1451 because the condition on line 1448 was always true

1449 overall_stats = overall_stats.filter(time_condition)

1450

1451 overall_stats = overall_stats.first()

1452

1453 # Ratings by LLM model (get from token_usage since Research doesn't have model field)

1454 llm_ratings_query = session.query(

1455 func.coalesce(TokenUsage.model_name, "Unknown").label("model"),

1456 func.avg(ResearchRating.rating).label("avg_rating"),

1457 func.count(ResearchRating.rating).label("rating_count"),

1458 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label(

1459 "positive_ratings"

1460 ),

1461 ).outerjoin(

1462 TokenUsage, ResearchRating.research_id == TokenUsage.research_id

1463 )

1464

1465 if time_condition is not None: 1465 ↛ 1468line 1465 didn't jump to line 1468 because the condition on line 1465 was always true

1466 llm_ratings_query = llm_ratings_query.filter(time_condition)

1467

1468 llm_ratings = (

1469 llm_ratings_query.group_by(TokenUsage.model_name)

1470 .order_by(func.avg(ResearchRating.rating).desc())

1471 .all()

1472 )

1473

1474 # Ratings by search engine (join with token_usage to get search engine info)

1475 search_engine_ratings_query = session.query(

1476 func.coalesce(

1477 TokenUsage.search_engine_selected, "Unknown"

1478 ).label("search_engine"),

1479 func.avg(ResearchRating.rating).label("avg_rating"),

1480 func.count(ResearchRating.rating).label("rating_count"),

1481 func.sum(case((ResearchRating.rating >= 4, 1), else_=0)).label(

1482 "positive_ratings"

1483 ),

1484 ).outerjoin(

1485 TokenUsage, ResearchRating.research_id == TokenUsage.research_id

1486 )

1487

1488 if time_condition is not None: 1488 ↛ 1493line 1488 didn't jump to line 1493 because the condition on line 1488 was always true

1489 search_engine_ratings_query = (

1490 search_engine_ratings_query.filter(time_condition)

1491 )

1492

1493 search_engine_ratings = (

1494 search_engine_ratings_query.group_by(

1495 TokenUsage.search_engine_selected

1496 )

1497 .having(func.count(ResearchRating.rating) > 0)

1498 .order_by(func.avg(ResearchRating.rating).desc())

1499 .all()

1500 )

1501

1502 # Rating trends over time

1503 rating_trends_query = session.query(

1504 func.date(ResearchRating.created_at).label("date"),

1505 func.avg(ResearchRating.rating).label("avg_rating"),

1506 func.count(ResearchRating.rating).label("daily_count"),

1507 )

1508

1509 if time_condition is not None: 1509 ↛ 1512line 1509 didn't jump to line 1512 because the condition on line 1509 was always true

1510 rating_trends_query = rating_trends_query.filter(time_condition)

1511

1512 rating_trends = (

1513 rating_trends_query.group_by(

1514 func.date(ResearchRating.created_at)

1515 )

1516 .order_by("date")

1517 .all()

1518 )

1519

1520 # Recent ratings with research details

1521 recent_ratings_query = (

1522 session.query(

1523 ResearchRating.rating,

1524 ResearchRating.created_at,

1525 ResearchRating.research_id,

1526 Research.query,

1527 Research.mode,

1528 TokenUsage.model_name,

1529 Research.created_at,

1530 )

1531 .outerjoin(Research, ResearchRating.research_id == Research.id)

1532 .outerjoin(

1533 TokenUsage,

1534 ResearchRating.research_id == TokenUsage.research_id,

1535 )

1536 )

1537

1538 if time_condition is not None: 1538 ↛ 1543line 1538 didn't jump to line 1543 because the condition on line 1538 was always true

1539 recent_ratings_query = recent_ratings_query.filter(

1540 time_condition

1541 )

1542

1543 recent_ratings = (

1544 recent_ratings_query.order_by(ResearchRating.created_at.desc())

1545 .limit(20)

1546 .all()

1547 )

1548

1549 return jsonify(

1550 {

1551 "overall_stats": {

1552 "avg_rating": round(overall_stats.avg_rating or 0, 2),

1553 "total_ratings": overall_stats.total_ratings or 0,

1554 "rating_distribution": {

1555 "5": overall_stats.five_star or 0,

1556 "4": overall_stats.four_star or 0,

1557 "3": overall_stats.three_star or 0,

1558 "2": overall_stats.two_star or 0,

1559 "1": overall_stats.one_star or 0,

1560 },

1561 },

1562 "llm_ratings": [

1563 {

1564 "model": rating.model,

1565 "avg_rating": round(rating.avg_rating or 0, 2),

1566 "rating_count": rating.rating_count or 0,

1567 "positive_ratings": rating.positive_ratings or 0,

1568 "satisfaction_rate": round(

1569 (rating.positive_ratings or 0)

1570 / max(rating.rating_count or 1, 1)

1571 * 100,

1572 1,

1573 ),

1574 }

1575 for rating in llm_ratings

1576 ],

1577 "search_engine_ratings": [

1578 {

1579 "search_engine": rating.search_engine,

1580 "avg_rating": round(rating.avg_rating or 0, 2),

1581 "rating_count": rating.rating_count or 0,

1582 "positive_ratings": rating.positive_ratings or 0,

1583 "satisfaction_rate": round(

1584 (rating.positive_ratings or 0)

1585 / max(rating.rating_count or 1, 1)

1586 * 100,

1587 1,

1588 ),

1589 }

1590 for rating in search_engine_ratings

1591 ],

1592 "rating_trends": [

1593 {

1594 "date": str(trend.date),

1595 "avg_rating": round(trend.avg_rating or 0, 2),

1596 "count": trend.daily_count or 0,

1597 }

1598 for trend in rating_trends

1599 ],

1600 "recent_ratings": [

1601 {

1602 "rating": rating.rating,

1603 "created_at": str(rating.created_at),

1604 "research_id": rating.research_id,

1605 "query": (

1606 rating.query

1607 if rating.query

1608 else f"Research Session #{rating.research_id}"

1609 ),

1610 "mode": rating.mode

1611 if rating.mode

1612 else "Standard Research",

1613 "llm_model": (

1614 rating.model_name

1615 if rating.model_name

1616 else "LLM Model"

1617 ),

1618 }

1619 for rating in recent_ratings

1620 ],

1621 }

1622 )

1623

1624 except Exception:

1625 logger.exception("Error getting star reviews data")

1626 return (

1627 jsonify(

1628 {"error": "An internal error occurred. Please try again later."}

1629 ),

1630 500,

1631 )

1632

1633

1634@metrics_bp.route("/api/pricing")

1635@login_required

1636def api_pricing():

1637 """Get current LLM pricing data."""

1638 try:

1639 from ...metrics.pricing.pricing_fetcher import PricingFetcher

1640

1641 # Use static pricing data instead of async

1642 fetcher = PricingFetcher()

1643 pricing_data = fetcher.static_pricing

1644

1645 return jsonify(

1646 {

1647 "status": "success",

1648 "pricing": pricing_data,

1649 "last_updated": datetime.now(UTC).isoformat(),

1650 "note": "Pricing data is from static configuration. Real-time APIs not available for most providers.",

1651 }

1652 )

1653

1654 except Exception:

1655 logger.exception("Error fetching pricing data")

1656 return jsonify({"error": "Internal Server Error"}), 500

1657

1658

1659@metrics_bp.route("/api/pricing/<model_name>")

1660@login_required

1661def api_model_pricing(model_name):

1662 """Get pricing for a specific model."""

1663 try:

1664 # Optional provider parameter

1665 provider = request.args.get("provider")

1666

1667 from ...metrics.pricing.cost_calculator import CostCalculator

1668

1669 # Use synchronous approach with cached/static pricing

1670 calculator = CostCalculator()

1671 pricing = calculator.cache.get_model_pricing(

1672 model_name

1673 ) or calculator.calculate_cost_sync(model_name, 1000, 1000).get(

1674 "pricing_used", {}

1675 )

1676

1677 return jsonify(

1678 {

1679 "status": "success",

1680 "model": model_name,

1681 "provider": provider,

1682 "pricing": pricing,

1683 "last_updated": datetime.now(UTC).isoformat(),

1684 }

1685 )

1686

1687 except Exception:

1688 logger.exception(f"Error getting pricing for model: {model_name}")

1689 return jsonify({"error": "An internal error occurred"}), 500

1690

1691

1692@metrics_bp.route("/api/cost-calculation", methods=["POST"])

1693@login_required

1694def api_cost_calculation():

1695 """Calculate cost for token usage."""

1696 try:

1697 data = request.get_json()

1698

1699 if not data:

1700 return jsonify({"error": "No data provided"}), 400

1701

1702 model_name = data.get("model_name")

1703 provider = data.get("provider") # Optional provider parameter

1704 prompt_tokens = data.get("prompt_tokens", 0)

1705 completion_tokens = data.get("completion_tokens", 0)

1706

1707 if not model_name:

1708 return jsonify({"error": "model_name is required"}), 400

1709

1710 from ...metrics.pricing.cost_calculator import CostCalculator

1711

1712 # Use synchronous cost calculation

1713 calculator = CostCalculator()

1714 cost_data = calculator.calculate_cost_sync(

1715 model_name, prompt_tokens, completion_tokens

1716 )

1717

1718 return jsonify(

1719 {

1720 "status": "success",

1721 "model_name": model_name,

1722 "provider": provider,

1723 "prompt_tokens": prompt_tokens,

1724 "completion_tokens": completion_tokens,

1725 "total_tokens": prompt_tokens + completion_tokens,

1726 **cost_data,

1727 }

1728 )

1729

1730 except Exception:

1731 logger.exception("Error calculating cost")

1732 return jsonify({"error": "An internal error occurred"}), 500

1733

1734

1735@metrics_bp.route("/api/research-costs/<string:research_id>")

1736@login_required

1737def api_research_costs(research_id):

1738 """Get cost analysis for a specific research session."""

1739 try:

1740 username = flask_session.get("username")

1741 if not username: 1741 ↛ 1742line 1741 didn't jump to line 1742 because the condition on line 1741 was never true

1742 return jsonify(

1743 {"status": "error", "message": "No user session found"}

1744 ), 401

1745

1746 with get_user_db_session(username) as session:

1747 # Get token usage records for this research

1748 usage_records = (

1749 session.query(TokenUsage)

1750 .filter(TokenUsage.research_id == research_id)

1751 .all()

1752 )

1753

1754 if not usage_records: 1754 ↛ 1765line 1754 didn't jump to line 1765 because the condition on line 1754 was always true

1755 return jsonify(

1756 {

1757 "status": "success",

1758 "research_id": research_id,

1759 "total_cost": 0.0,

1760 "message": "No token usage data found for this research session",

1761 }

1762 )

1763

1764 # Convert to dict format for cost calculation

1765 usage_data = []

1766 for record in usage_records:

1767 usage_data.append(

1768 {

1769 "model_name": record.model_name,

1770 "provider": getattr(

1771 record, "provider", None

1772 ), # Handle both old and new records

1773 "prompt_tokens": record.prompt_tokens,

1774 "completion_tokens": record.completion_tokens,

1775 "timestamp": record.timestamp,

1776 }

1777 )

1778

1779 from ...metrics.pricing.cost_calculator import CostCalculator

1780

1781 # Use synchronous calculation for research costs

1782 calculator = CostCalculator()

1783 costs = []

1784 for record in usage_data:

1785 cost_data = calculator.calculate_cost_sync(

1786 record["model_name"],

1787 record["prompt_tokens"],

1788 record["completion_tokens"],

1789 )

1790 costs.append({**record, **cost_data})

1791

1792 total_cost = sum(c["total_cost"] for c in costs)

1793 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data)

1794 total_completion_tokens = sum(

1795 r["completion_tokens"] for r in usage_data

1796 )

1797

1798 cost_summary = {

1799 "total_cost": round(total_cost, 6),

1800 "total_tokens": total_prompt_tokens + total_completion_tokens,

1801 "prompt_tokens": total_prompt_tokens,

1802 "completion_tokens": total_completion_tokens,

1803 }

1804

1805 return jsonify(

1806 {

1807 "status": "success",

1808 "research_id": research_id,

1809 **cost_summary,

1810 }

1811 )

1812

1813 except Exception:

1814 logger.exception(

1815 f"Error getting research costs for research: {research_id}"

1816 )

1817 return jsonify({"error": "An internal error occurred"}), 500

1818

1819

1820@metrics_bp.route("/api/cost-analytics")

1821@login_required

1822def api_cost_analytics():

1823 """Get cost analytics across all research sessions."""

1824 try:

1825 username = flask_session.get("username")

1826 if not username: 1826 ↛ 1827line 1826 didn't jump to line 1827 because the condition on line 1826 was never true

1827 return jsonify(

1828 {"status": "error", "message": "No user session found"}

1829 ), 401

1830

1831 period = request.args.get("period", "30d")

1832

1833 with get_user_db_session(username) as session:

1834 # Get token usage for the period

1835 query = session.query(TokenUsage)

1836 time_condition = get_time_filter_condition(

1837 period, TokenUsage.timestamp

1838 )

1839 if time_condition is not None: 1839 ↛ 1843line 1839 didn't jump to line 1843 because the condition on line 1839 was always true

1840 query = query.filter(time_condition)

1841

1842 # First check if we have any records to avoid expensive queries

1843 record_count = query.count()

1844

1845 if record_count == 0:

1846 return jsonify(

1847 {

1848 "status": "success",

1849 "period": period,

1850 "overview": {

1851 "total_cost": 0.0,

1852 "total_tokens": 0,

1853 "prompt_tokens": 0,

1854 "completion_tokens": 0,

1855 },

1856 "top_expensive_research": [],

1857 "research_count": 0,

1858 "message": "No token usage data found for this period",

1859 }

1860 )

1861

1862 # If we have too many records, limit to recent ones to avoid timeout

1863 if record_count > 1000: 1863 ↛ anywhereline 1863 didn't jump anywhere: it always raised an exception.

1864 logger.warning(

1865 f"Large dataset detected ({record_count} records), limiting to recent 1000 for performance"

1866 )

1867 usage_records = (

1868 query.order_by(TokenUsage.timestamp.desc())

1869 .limit(1000)

1870 .all()

1871 )

1872 else:

1873 usage_records = query.all()

1874

1875 # Convert to dict format

1876 usage_data = []

1877 for record in usage_records:

1878 usage_data.append(

1879 {

1880 "model_name": record.model_name,

1881 "provider": getattr(

1882 record, "provider", None

1883 ), # Handle both old and new records

1884 "prompt_tokens": record.prompt_tokens,

1885 "completion_tokens": record.completion_tokens,

1886 "research_id": record.research_id,

1887 "timestamp": record.timestamp,

1888 }

1889 )

1890

1891 from ...metrics.pricing.cost_calculator import CostCalculator

1892

1893 # Use synchronous calculation

1894 calculator = CostCalculator()

1895

1896 # Calculate overall costs

1897 costs = []

1898 for record in usage_data:

1899 cost_data = calculator.calculate_cost_sync(

1900 record["model_name"],

1901 record["prompt_tokens"],

1902 record["completion_tokens"],

1903 )

1904 costs.append({**record, **cost_data})

1905

1906 total_cost = sum(c["total_cost"] for c in costs)

1907 total_prompt_tokens = sum(r["prompt_tokens"] for r in usage_data)

1908 total_completion_tokens = sum(

1909 r["completion_tokens"] for r in usage_data

1910 )

1911

1912 cost_summary = {

1913 "total_cost": round(total_cost, 6),

1914 "total_tokens": total_prompt_tokens + total_completion_tokens,

1915 "prompt_tokens": total_prompt_tokens,

1916 "completion_tokens": total_completion_tokens,

1917 }

1918

1919 # Group by research_id for per-research costs

1920 research_costs = {}

1921 for record in usage_data:

1922 rid = record["research_id"]

1923 if rid not in research_costs:

1924 research_costs[rid] = []

1925 research_costs[rid].append(record)

1926

1927 # Calculate cost per research

1928 research_summaries = {}

1929 for rid, records in research_costs.items():

1930 research_total = 0

1931 for record in records:

1932 cost_data = calculator.calculate_cost_sync(

1933 record["model_name"],

1934 record["prompt_tokens"],

1935 record["completion_tokens"],

1936 )

1937 research_total += cost_data["total_cost"]

1938 research_summaries[rid] = {

1939 "total_cost": round(research_total, 6)

1940 }

1941

1942 # Top expensive research sessions

1943 top_expensive = sorted(

1944 [

1945 (rid, data["total_cost"])

1946 for rid, data in research_summaries.items()

1947 ],

1948 key=lambda x: x[1],

1949 reverse=True,

1950 )[:10]

1951

1952 return jsonify(

1953 {

1954 "status": "success",

1955 "period": period,

1956 "overview": cost_summary,

1957 "top_expensive_research": [

1958 {"research_id": rid, "total_cost": cost}

1959 for rid, cost in top_expensive

1960 ],

1961 "research_count": len(research_summaries),

1962 }

1963 )

1964

1965 except Exception:

1966 logger.exception("Error getting cost analytics")

1967 # Return a more graceful error response

1968 return (

1969 jsonify(

1970 {

1971 "status": "success",

1972 "period": period,

1973 "overview": {

1974 "total_cost": 0.0,

1975 "total_tokens": 0,

1976 "prompt_tokens": 0,

1977 "completion_tokens": 0,

1978 },

1979 "top_expensive_research": [],

1980 "research_count": 0,

1981 "error": "Cost analytics temporarily unavailable",

1982 }

1983 ),

1984 200,

1985 ) # Return 200 to avoid breaking the UI

1986

1987

1988@metrics_bp.route("/links")

1989@login_required

1990def link_analytics():

1991 """Display link analytics page."""

1992 return render_template_with_defaults("pages/link_analytics.html")

1993

1994

1995@metrics_bp.route("/api/link-analytics")

1996@login_required

1997def api_link_analytics():

1998 """Get link analytics data."""

1999 try:

2000 username = flask_session.get("username")

2001 if not username: 2001 ↛ 2002line 2001 didn't jump to line 2002 because the condition on line 2001 was never true

2002 return jsonify(

2003 {"status": "error", "message": "No user session found"}

2004 ), 401

2005

2006 period = request.args.get("period", "30d")

2007

2008 # Get link analytics data

2009 link_data = get_link_analytics(period, username)

2010

2011 return jsonify(

2012 {

2013 "status": "success",

2014 "data": link_data["link_analytics"],

2015 "period": period,

2016 }

2017 )

2018

2019 except Exception:

2020 logger.exception("Error getting link analytics")

2021 return (

2022 jsonify(

2023 {

2024 "status": "error",

2025 "message": "An internal error occurred. Please try again later.",

2026 }

2027 ),

2028 500,

2029 )

2030

2031

2032@metrics_bp.route("/api/domain-classifications", methods=["GET"])

2033@login_required

2034def api_get_domain_classifications():

2035 """Get all domain classifications."""

2036 try:

2037 username = flask_session.get("username")

2038 if not username: 2038 ↛ 2039line 2038 didn't jump to line 2039 because the condition on line 2038 was never true

2039 return jsonify(

2040 {"status": "error", "message": "No user session found"}

2041 ), 401

2042

2043 classifier = DomainClassifier(username)

2044 classifications = classifier.get_all_classifications()

2045

2046 return jsonify(

2047 {

2048 "status": "success",

2049 "classifications": [c.to_dict() for c in classifications],

2050 "total": len(classifications),

2051 }

2052 )

2053

2054 except Exception:

2055 logger.exception("Error getting domain classifications")

2056 return jsonify(

2057 {"status": "error", "message": "Failed to retrieve classifications"}

2058 ), 500

2059

2060

2061@metrics_bp.route("/api/domain-classifications/summary", methods=["GET"])

2062@login_required

2063def api_get_classifications_summary():

2064 """Get summary of domain classifications by category."""

2065 try:

2066 username = flask_session.get("username")

2067 if not username: 2067 ↛ 2068line 2067 didn't jump to line 2068 because the condition on line 2067 was never true

2068 return jsonify(

2069 {"status": "error", "message": "No user session found"}

2070 ), 401

2071

2072 classifier = DomainClassifier(username)

2073 summary = classifier.get_categories_summary()

2074

2075 return jsonify({"status": "success", "summary": summary})

2076

2077 except Exception:

2078 logger.exception("Error getting classifications summary")

2079 return jsonify(

2080 {"status": "error", "message": "Failed to retrieve summary"}

2081 ), 500

2082

2083

2084@metrics_bp.route("/api/domain-classifications/classify", methods=["POST"])

2085@login_required

2086def api_classify_domains():

2087 """Trigger classification of a specific domain or batch classification."""

2088 try:

2089 username = flask_session.get("username")

2090 if not username: 2090 ↛ 2091line 2090 didn't jump to line 2091 because the condition on line 2090 was never true

2091 return jsonify(

2092 {"status": "error", "message": "No user session found"}

2093 ), 401

2094

2095 data = request.get_json() or {}

2096 domain = data.get("domain")

2097 force_update = data.get("force_update", False)

2098 batch_mode = data.get("batch", False)

2099

2100 # Get settings snapshot for LLM configuration

2101 from ...settings.manager import SettingsManager

2102 from ...database.session_context import get_user_db_session

2103

2104 with get_user_db_session(username) as db_session:

2105 settings_manager = SettingsManager(db_session=db_session)

2106 settings_snapshot = settings_manager.get_all_settings()

2107

2108 classifier = DomainClassifier(

2109 username, settings_snapshot=settings_snapshot

2110 )

2111

2112 if domain and not batch_mode: 2112 ↛ 2114line 2112 didn't jump to line 2114 because the condition on line 2112 was never true

2113 # Classify single domain

2114 logger.info(f"Classifying single domain: {domain}")

2115 classification = classifier.classify_domain(domain, force_update)

2116 if classification:

2117 return jsonify(

2118 {

2119 "status": "success",

2120 "classification": classification.to_dict(),

2121 }

2122 )

2123 else:

2124 return jsonify(

2125 {

2126 "status": "error",

2127 "message": f"Failed to classify domain: {domain}",

2128 }

2129 ), 400

2130 elif batch_mode: 2130 ↛ 2133line 2130 didn't jump to line 2133 because the condition on line 2130 was never true

2131 # Batch classification - this should really be a background task

2132 # For now, we'll just return immediately and let the frontend poll

2133 logger.info("Starting batch classification of all domains")

2134 results = classifier.classify_all_domains(force_update)

2135

2136 return jsonify({"status": "success", "results": results})

2137 else:

2138 return jsonify(

2139 {

2140 "status": "error",

2141 "message": "Must provide either 'domain' or set 'batch': true",

2142 }

2143 ), 400

2144

2145 except Exception:

2146 logger.exception("Error classifying domains")

2147 return jsonify(

2148 {"status": "error", "message": "Failed to classify domains"}

2149 ), 500

2150

2151

2152@metrics_bp.route("/api/domain-classifications/progress", methods=["GET"])

2153@login_required

2154def api_classification_progress():

2155 """Get progress of domain classification task."""

2156 try:

2157 username = flask_session.get("username")

2158 if not username: 2158 ↛ 2159line 2158 didn't jump to line 2159 because the condition on line 2158 was never true

2159 return jsonify(

2160 {"status": "error", "message": "No user session found"}

2161 ), 401

2162

2163 # Get counts of classified vs unclassified domains

2164 with get_user_db_session(username) as session:

2165 # Count total unique domains

2166 from urllib.parse import urlparse

2167

2168 resources = session.query(ResearchResource.url).distinct().all()

2169 domains = set()

2170 all_domains = []

2171

2172 for (url,) in resources: 2172 ↛ 2173line 2172 didn't jump to line 2173 because the loop on line 2172 never started

2173 if url:

2174 try:

2175 parsed = urlparse(url)

2176 domain = parsed.netloc.lower()

2177 if domain.startswith("www."):

2178 domain = domain[4:]

2179 if domain:

2180 domains.add(domain)

2181 except (ValueError, AttributeError):

2182 # urlparse can raise ValueError for malformed URLs

2183 continue

2184

2185 all_domains = sorted(list(domains))

2186 total_domains = len(domains)

2187

2188 # Count classified domains

2189 classified_count = session.query(DomainClassification).count()

2190

2191 return jsonify(

2192 {

2193 "status": "success",

2194 "progress": {

2195 "total_domains": total_domains,

2196 "classified": classified_count,

2197 "unclassified": total_domains - classified_count,

2198 "percentage": round(

2199 (classified_count / total_domains * 100)

2200 if total_domains > 0

2201 else 0,

2202 1,

2203 ),

2204 "all_domains": all_domains, # Return all domains for classification

2205 },

2206 }

2207 )

2208

2209 except Exception:

2210 logger.exception("Error getting classification progress")

2211 return jsonify(

2212 {"status": "error", "message": "Failed to retrieve progress"}

2213 ), 500

Coverage for src / local_deep_research / web / routes / metrics_routes.py: 61%

691 statements