Coverage for src / local_deep_research / benchmarks / web_api / benchmark_routes.py: 27%
336 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""Flask routes for benchmark web interface."""
3import time
5from flask import Blueprint, jsonify, request
6from loguru import logger
8from ...database.session_context import get_user_db_session
9from ...web.auth.decorators import login_required
10from ...web.utils.rate_limiter import limiter
11from local_deep_research.settings import SettingsManager
12from ...web.utils.templates import render_template_with_defaults
13from .benchmark_service import benchmark_service
15# Create blueprint for benchmark routes
16benchmark_bp = Blueprint("benchmark", __name__, url_prefix="/benchmark")
19@benchmark_bp.route("/")
20@login_required
21def index():
22 """Benchmark dashboard page."""
23 from flask import session as flask_session
25 username = flask_session.get("username")
26 with get_user_db_session(username) as db_session:
27 settings_manager = SettingsManager(db_session)
29 # Load evaluation settings from database
30 eval_settings = {
31 "evaluation_provider": settings_manager.get_setting(
32 "benchmark.evaluation.provider", "openai_endpoint"
33 ),
34 "evaluation_model": settings_manager.get_setting(
35 "benchmark.evaluation.model", ""
36 ),
37 "evaluation_endpoint_url": settings_manager.get_setting(
38 "benchmark.evaluation.endpoint_url", ""
39 ),
40 "evaluation_temperature": settings_manager.get_setting(
41 "benchmark.evaluation.temperature", 0
42 ),
43 }
45 return render_template_with_defaults(
46 "pages/benchmark.html", eval_settings=eval_settings
47 )
50@benchmark_bp.route("/results")
51@login_required
52def results():
53 """Benchmark results history page."""
54 return render_template_with_defaults("pages/benchmark_results.html")
57@benchmark_bp.route("/api/start", methods=["POST"])
58@login_required
59def start_benchmark():
60 """Start a new benchmark run."""
61 try:
62 data = request.get_json()
64 if not data:
65 return jsonify({"error": "No data provided"}), 400
67 # Extract configuration
68 run_name = data.get("run_name")
70 # Get search config from database instead of request
71 from ...database.session_context import get_user_db_session
72 from local_deep_research.settings import SettingsManager
73 from flask import session as flask_session
75 username = flask_session.get("username")
76 session_id = flask_session.get("session_id")
78 # Try to get password from session store for background thread
79 from ...database.session_passwords import session_password_store
81 user_password = None
82 if session_id:
83 user_password = session_password_store.get_session_password(
84 username, session_id
85 )
87 search_config = {}
88 evaluation_config = {}
89 datasets_config = data.get("datasets_config", {})
91 with get_user_db_session(username) as db_session:
92 # Use the logged-in user's settings
93 settings_manager = SettingsManager(db_session)
95 # Build search config from database settings
96 search_config = {
97 "iterations": int(
98 settings_manager.get_setting("search.iterations", 8)
99 ),
100 "questions_per_iteration": int(
101 settings_manager.get_setting(
102 "search.questions_per_iteration", 5
103 )
104 ),
105 "search_tool": settings_manager.get_setting(
106 "search.tool", "searxng"
107 ),
108 "search_strategy": settings_manager.get_setting(
109 "search.search_strategy", "focused_iteration"
110 ),
111 "model_name": settings_manager.get_setting("llm.model"),
112 "provider": settings_manager.get_setting("llm.provider"),
113 "temperature": float(
114 settings_manager.get_setting("llm.temperature", 0.7)
115 ),
116 }
118 # Add provider-specific settings
119 provider = search_config.get("provider")
120 if provider == "openai_endpoint":
121 search_config["openai_endpoint_url"] = (
122 settings_manager.get_setting("llm.openai_endpoint.url")
123 )
124 search_config["openai_endpoint_api_key"] = (
125 settings_manager.get_setting("llm.openai_endpoint.api_key")
126 )
127 elif provider == "openai":
128 search_config["openai_api_key"] = settings_manager.get_setting(
129 "llm.openai.api_key"
130 )
131 elif provider == "anthropic":
132 search_config["anthropic_api_key"] = (
133 settings_manager.get_setting("llm.anthropic.api_key")
134 )
136 # Get evaluation config from database settings or request
137 if "evaluation_config" in data:
138 evaluation_config = data["evaluation_config"]
139 else:
140 # Read evaluation config from database settings
141 evaluation_provider = settings_manager.get_setting(
142 "benchmark.evaluation.provider", "openai_endpoint"
143 )
144 evaluation_model = settings_manager.get_setting(
145 "benchmark.evaluation.model", "anthropic/claude-3.7-sonnet"
146 )
147 evaluation_temperature = float(
148 settings_manager.get_setting(
149 "benchmark.evaluation.temperature", 0
150 )
151 )
153 evaluation_config = {
154 "provider": evaluation_provider,
155 "model_name": evaluation_model,
156 "temperature": evaluation_temperature,
157 }
159 # Add provider-specific settings for evaluation
160 if evaluation_provider == "openai_endpoint":
161 evaluation_config["openai_endpoint_url"] = (
162 settings_manager.get_setting(
163 "benchmark.evaluation.endpoint_url",
164 "https://openrouter.ai/api/v1",
165 )
166 )
167 evaluation_config["openai_endpoint_api_key"] = (
168 settings_manager.get_setting(
169 "llm.openai_endpoint.api_key"
170 )
171 )
172 elif evaluation_provider == "openai":
173 evaluation_config["openai_api_key"] = (
174 settings_manager.get_setting("llm.openai.api_key")
175 )
176 elif evaluation_provider == "anthropic":
177 evaluation_config["anthropic_api_key"] = (
178 settings_manager.get_setting("llm.anthropic.api_key")
179 )
181 # Validate datasets config
182 if not datasets_config or not any(
183 config.get("count", 0) > 0 for config in datasets_config.values()
184 ):
185 return jsonify(
186 {
187 "error": "At least one dataset with count > 0 must be specified"
188 }
189 ), 400
191 # Create benchmark run
192 benchmark_run_id = benchmark_service.create_benchmark_run(
193 run_name=run_name,
194 search_config=search_config,
195 evaluation_config=evaluation_config,
196 datasets_config=datasets_config,
197 username=username,
198 user_password=user_password,
199 )
201 # Start benchmark
202 success = benchmark_service.start_benchmark(
203 benchmark_run_id, username, user_password
204 )
206 if success:
207 return jsonify(
208 {
209 "success": True,
210 "benchmark_run_id": benchmark_run_id,
211 "message": "Benchmark started successfully",
212 }
213 )
214 else:
215 return jsonify(
216 {"success": False, "error": "Failed to start benchmark"}
217 ), 500
219 except Exception:
220 logger.exception("Error starting benchmark")
221 return jsonify(
222 {"success": False, "error": "An internal error has occurred."}
223 ), 500
226@benchmark_bp.route("/api/running", methods=["GET"])
227@login_required
228def get_running_benchmark():
229 """Check if there's a running benchmark and return its ID."""
230 try:
231 from ...database.models.benchmark import BenchmarkRun, BenchmarkStatus
232 from ...database.session_context import get_user_db_session
233 from flask import session as flask_session
235 username = flask_session.get("username")
236 with get_user_db_session(username) as session:
237 # Find any benchmark that's currently running
238 running_benchmark = (
239 session.query(BenchmarkRun)
240 .filter(BenchmarkRun.status == BenchmarkStatus.IN_PROGRESS)
241 .order_by(BenchmarkRun.created_at.desc())
242 .first()
243 )
245 if running_benchmark: 245 ↛ 246line 245 didn't jump to line 246 because the condition on line 245 was never true
246 return jsonify(
247 {
248 "success": True,
249 "benchmark_run_id": running_benchmark.id,
250 "run_name": running_benchmark.run_name,
251 "total_examples": running_benchmark.total_examples,
252 "completed_examples": running_benchmark.completed_examples,
253 }
254 )
255 else:
256 return jsonify(
257 {"success": False, "message": "No running benchmark found"}
258 )
260 except Exception:
261 logger.exception("Error checking for running benchmark")
262 return jsonify(
263 {"success": False, "error": "An internal error has occurred."}
264 ), 500
267@benchmark_bp.route("/api/status/<int:benchmark_run_id>", methods=["GET"])
268@limiter.exempt
269@login_required
270def get_benchmark_status(benchmark_run_id: int):
271 """Get status of a benchmark run."""
272 try:
273 from flask import session as flask_session
275 username = flask_session.get("username")
276 status = benchmark_service.get_benchmark_status(
277 benchmark_run_id, username
278 )
280 if status:
281 logger.info(
282 f"Returning status for benchmark {benchmark_run_id}: "
283 f"completed={status.get('completed_examples')}, "
284 f"overall_acc={status.get('overall_accuracy')}, "
285 f"avg_time={status.get('avg_time_per_example')}, "
286 f"estimated_remaining={status.get('estimated_time_remaining')}"
287 )
288 return jsonify({"success": True, "status": status})
289 else:
290 return jsonify(
291 {"success": False, "error": "Benchmark run not found"}
292 ), 404
294 except Exception:
295 logger.exception("Error getting benchmark status")
296 return jsonify(
297 {"success": False, "error": "An internal error has occurred."}
298 ), 500
301@benchmark_bp.route("/api/cancel/<int:benchmark_run_id>", methods=["POST"])
302@login_required
303def cancel_benchmark(benchmark_run_id: int):
304 """Cancel a running benchmark."""
305 try:
306 from flask import session as flask_session
308 username = flask_session.get("username")
309 success = benchmark_service.cancel_benchmark(benchmark_run_id, username)
311 if success:
312 return jsonify(
313 {"success": True, "message": "Benchmark cancelled successfully"}
314 )
315 else:
316 return jsonify(
317 {"success": False, "error": "Failed to cancel benchmark"}
318 ), 500
320 except Exception:
321 logger.exception("Error cancelling benchmark")
322 return jsonify(
323 {"success": False, "error": "An internal error has occurred."}
324 ), 500
327@benchmark_bp.route("/api/history", methods=["GET"])
328@login_required
329def get_benchmark_history():
330 """Get list of recent benchmark runs."""
331 try:
332 from ...database.models.benchmark import BenchmarkRun
333 from ...database.session_context import get_user_db_session
334 from flask import session as flask_session
336 username = flask_session.get("username")
337 with get_user_db_session(username) as session:
338 # Get all benchmark runs (completed, failed, cancelled, or in-progress)
339 runs = (
340 session.query(BenchmarkRun)
341 .order_by(BenchmarkRun.created_at.desc())
342 .limit(50)
343 .all()
344 )
346 # Format runs for display
347 formatted_runs = []
348 for run in runs: 348 ↛ 350line 348 didn't jump to line 350 because the loop on line 348 never started
349 # Calculate average processing time from results
350 avg_processing_time = None
351 avg_search_results = None
352 try:
353 from sqlalchemy import func
355 from ...database.models.benchmark import BenchmarkResult
357 avg_result = (
358 session.query(func.avg(BenchmarkResult.processing_time))
359 .filter(
360 BenchmarkResult.benchmark_run_id == run.id,
361 BenchmarkResult.processing_time.isnot(None),
362 BenchmarkResult.processing_time > 0,
363 )
364 .scalar()
365 )
367 if avg_result:
368 avg_processing_time = float(avg_result)
369 except Exception as e:
370 logger.warning(
371 f"Error calculating avg processing time for run {run.id}: {e}"
372 )
374 # Calculate average search results and total search requests from metrics
375 total_search_requests = None
376 try:
377 from ...database.models import SearchCall
378 from ...metrics.search_tracker import get_search_tracker
380 # Get all results for this run to find research_ids
381 results = (
382 session.query(BenchmarkResult)
383 .filter(BenchmarkResult.benchmark_run_id == run.id)
384 .all()
385 )
387 research_ids = [
388 r.research_id for r in results if r.research_id
389 ]
391 if research_ids:
392 tracker = get_search_tracker()
393 with tracker.db.get_session() as metric_session:
394 # Get all search calls for these research_ids
395 search_calls = (
396 metric_session.query(SearchCall)
397 .filter(
398 SearchCall.research_id.in_(research_ids)
399 )
400 .all()
401 )
403 # Group by research_id and calculate metrics per research session
404 research_results = {}
405 research_requests = {}
407 for call in search_calls:
408 if call.research_id:
409 if call.research_id not in research_results:
410 research_results[call.research_id] = 0
411 research_requests[call.research_id] = 0
412 research_results[call.research_id] += (
413 call.results_count or 0
414 )
415 research_requests[call.research_id] += 1
417 # Calculate averages across research sessions
418 if research_results:
419 total_results = sum(research_results.values())
420 avg_search_results = total_results / len(
421 research_results
422 )
424 total_requests = sum(research_requests.values())
425 total_search_requests = total_requests / len(
426 research_requests
427 )
429 except Exception as e:
430 logger.warning(
431 f"Error calculating search metrics for run {run.id}: {e}"
432 )
434 formatted_runs.append(
435 {
436 "id": run.id,
437 "run_name": run.run_name or f"Benchmark #{run.id}",
438 "created_at": run.created_at.isoformat(),
439 "total_examples": run.total_examples,
440 "completed_examples": run.completed_examples,
441 "overall_accuracy": run.overall_accuracy,
442 "status": run.status.value,
443 "search_config": run.search_config,
444 "evaluation_config": run.evaluation_config,
445 "datasets_config": run.datasets_config,
446 "avg_processing_time": avg_processing_time,
447 "avg_search_results": avg_search_results,
448 "total_search_requests": total_search_requests,
449 }
450 )
452 return jsonify({"success": True, "runs": formatted_runs})
454 except Exception:
455 logger.exception("Error getting benchmark history")
456 return jsonify(
457 {"success": False, "error": "An internal error has occurred."}
458 ), 500
461@benchmark_bp.route("/api/results/<int:benchmark_run_id>", methods=["GET"])
462@limiter.exempt
463@login_required
464def get_benchmark_results(benchmark_run_id: int):
465 """Get detailed results for a benchmark run."""
466 try:
467 from ...database.models.benchmark import BenchmarkResult
468 from ...database.session_context import get_user_db_session
469 from flask import session as flask_session
471 logger.info(f"Getting results for benchmark {benchmark_run_id}")
472 username = flask_session.get("username")
474 # First sync any pending results from active runs
475 benchmark_service.sync_pending_results(benchmark_run_id, username)
476 with get_user_db_session(username) as session:
477 # Get recent results (limit to last 10)
478 limit = int(request.args.get("limit", 10))
480 results = (
481 session.query(BenchmarkResult)
482 .filter(BenchmarkResult.benchmark_run_id == benchmark_run_id)
483 # Temporarily show all results including pending evaluations
484 # .filter(
485 # BenchmarkResult.is_correct.isnot(None)
486 # ) # Only completed evaluations
487 .order_by(BenchmarkResult.id.desc()) # Most recent first
488 .limit(limit)
489 .all()
490 )
492 logger.info(f"Found {len(results)} results")
494 # Build a map of research_id to total search results
495 search_results_by_research_id = {}
496 try:
497 from ...database.models import SearchCall
498 from ...metrics.search_tracker import get_search_tracker
500 tracker = get_search_tracker()
502 # Get all unique research_ids from our results
503 research_ids = [r.research_id for r in results if r.research_id]
505 if research_ids:
506 with tracker.db.get_session() as metric_session:
507 # Get all search calls for these research_ids
508 all_search_calls = (
509 metric_session.query(SearchCall)
510 .filter(SearchCall.research_id.in_(research_ids))
511 .all()
512 )
514 # Group search results by research_id
515 for call in all_search_calls:
516 if call.research_id:
517 if (
518 call.research_id
519 not in search_results_by_research_id
520 ):
521 search_results_by_research_id[
522 call.research_id
523 ] = 0
524 search_results_by_research_id[
525 call.research_id
526 ] += call.results_count or 0
528 logger.info(
529 f"Found search metrics for {len(search_results_by_research_id)} research IDs from {len(all_search_calls)} total search calls"
530 )
531 logger.debug(
532 f"Research IDs from results: {research_ids[:5] if len(research_ids) > 5 else research_ids}"
533 )
534 logger.debug(
535 f"Search results by research_id: {dict(list(search_results_by_research_id.items())[:5])}"
536 )
537 except Exception:
538 logger.exception(
539 f"Error getting search metrics for benchmark {benchmark_run_id}"
540 )
542 # Format results for UI display
543 formatted_results = []
544 for result in results:
545 # Get search result count using research_id
546 search_result_count = 0
548 try:
549 if (
550 result.research_id
551 and result.research_id in search_results_by_research_id
552 ):
553 search_result_count = search_results_by_research_id[
554 result.research_id
555 ]
556 logger.debug(
557 f"Found {search_result_count} search results for research_id {result.research_id}"
558 )
560 except Exception:
561 logger.exception(
562 f"Error getting search results for result {result.example_id}"
563 )
565 formatted_results.append(
566 {
567 "example_id": result.example_id,
568 "dataset_type": result.dataset_type.value,
569 "question": result.question,
570 "correct_answer": result.correct_answer,
571 "model_answer": result.extracted_answer,
572 "full_response": result.response,
573 "is_correct": result.is_correct,
574 "confidence": result.confidence,
575 "grader_response": result.grader_response,
576 "processing_time": result.processing_time,
577 "search_result_count": search_result_count,
578 "sources": result.sources,
579 "completed_at": result.completed_at.isoformat()
580 if result.completed_at
581 else None,
582 }
583 )
585 return jsonify({"success": True, "results": formatted_results})
587 except Exception:
588 logger.exception("Error getting benchmark results")
589 return jsonify(
590 {"success": False, "error": "An internal error has occurred."}
591 ), 500
594@benchmark_bp.route("/api/configs", methods=["GET"])
595@login_required
596def get_saved_configs():
597 """Get list of saved benchmark configurations."""
598 try:
599 # TODO: Implement saved configs retrieval from database
600 # For now return default configs
601 default_configs = [
602 {
603 "id": 1,
604 "name": "Quick Test",
605 "description": "Fast benchmark with minimal examples",
606 "search_config": {
607 "iterations": 3,
608 "questions_per_iteration": 3,
609 "search_tool": "searxng",
610 "search_strategy": "focused_iteration",
611 },
612 "datasets_config": {
613 "simpleqa": {"count": 10},
614 "browsecomp": {"count": 5},
615 },
616 },
617 {
618 "id": 2,
619 "name": "Standard Evaluation",
620 "description": "Comprehensive benchmark with standard settings",
621 "search_config": {
622 "iterations": 8,
623 "questions_per_iteration": 5,
624 "search_tool": "searxng",
625 "search_strategy": "focused_iteration",
626 },
627 "datasets_config": {
628 "simpleqa": {"count": 50},
629 "browsecomp": {"count": 25},
630 },
631 },
632 ]
634 return jsonify({"success": True, "configs": default_configs})
636 except Exception:
637 logger.exception("Error getting saved configs")
638 return jsonify(
639 {"success": False, "error": "An internal error has occurred."}
640 ), 500
643@benchmark_bp.route("/api/start-simple", methods=["POST"])
644@login_required
645def start_benchmark_simple():
646 """Start a benchmark using current database settings."""
647 try:
648 data = request.get_json()
649 datasets_config = data.get("datasets_config", {})
651 # Validate datasets
652 if not datasets_config or not any( 652 ↛ 662line 652 didn't jump to line 662 because the condition on line 652 was always true
653 config.get("count", 0) > 0 for config in datasets_config.values()
654 ):
655 return jsonify(
656 {
657 "error": "At least one dataset with count > 0 must be specified"
658 }
659 ), 400
661 # Get current settings from database
662 from flask import session as flask_session
664 username = flask_session.get("username")
665 session_id = flask_session.get("session_id")
667 # Try to get password from session store for background thread
668 from ...database.session_passwords import session_password_store
670 user_password = None
671 if session_id:
672 user_password = session_password_store.get_session_password(
673 username, session_id
674 )
676 with get_user_db_session(username, user_password) as session:
677 # For benchmarks, use a default test username
678 settings_manager = SettingsManager(session, "benchmark_user")
680 # Build search config from database settings
681 search_config = {
682 "iterations": int(
683 settings_manager.get_setting("search.iterations", 8)
684 ),
685 "questions_per_iteration": int(
686 settings_manager.get_setting(
687 "search.questions_per_iteration", 5
688 )
689 ),
690 "search_tool": settings_manager.get_setting(
691 "search.tool", "searxng"
692 ),
693 "search_strategy": settings_manager.get_setting(
694 "search.search_strategy", "focused_iteration"
695 ),
696 "model_name": settings_manager.get_setting("llm.model"),
697 "provider": settings_manager.get_setting("llm.provider"),
698 "temperature": float(
699 settings_manager.get_setting("llm.temperature", 0.7)
700 ),
701 }
703 # Add provider-specific settings
704 provider = search_config.get("provider")
705 if provider == "openai_endpoint":
706 search_config["openai_endpoint_url"] = (
707 settings_manager.get_setting("llm.openai_endpoint.url")
708 )
709 search_config["openai_endpoint_api_key"] = (
710 settings_manager.get_setting("llm.openai_endpoint.api_key")
711 )
712 elif provider == "openai":
713 search_config["openai_api_key"] = settings_manager.get_setting(
714 "llm.openai.api_key"
715 )
716 elif provider == "anthropic":
717 search_config["anthropic_api_key"] = (
718 settings_manager.get_setting("llm.anthropic.api_key")
719 )
721 # Read evaluation config from database settings
722 evaluation_provider = settings_manager.get_setting(
723 "benchmark.evaluation.provider", "openai_endpoint"
724 )
725 evaluation_model = settings_manager.get_setting(
726 "benchmark.evaluation.model", "anthropic/claude-3.7-sonnet"
727 )
728 evaluation_temperature = float(
729 settings_manager.get_setting(
730 "benchmark.evaluation.temperature", 0
731 )
732 )
734 evaluation_config = {
735 "provider": evaluation_provider,
736 "model_name": evaluation_model,
737 "temperature": evaluation_temperature,
738 }
740 # Add provider-specific settings for evaluation
741 if evaluation_provider == "openai_endpoint":
742 evaluation_config["openai_endpoint_url"] = (
743 settings_manager.get_setting(
744 "benchmark.evaluation.endpoint_url",
745 "https://openrouter.ai/api/v1",
746 )
747 )
748 evaluation_config["openai_endpoint_api_key"] = (
749 settings_manager.get_setting("llm.openai_endpoint.api_key")
750 )
751 elif evaluation_provider == "openai":
752 evaluation_config["openai_api_key"] = (
753 settings_manager.get_setting("llm.openai.api_key")
754 )
755 elif evaluation_provider == "anthropic":
756 evaluation_config["anthropic_api_key"] = (
757 settings_manager.get_setting("llm.anthropic.api_key")
758 )
760 # Create and start benchmark
761 benchmark_run_id = benchmark_service.create_benchmark_run(
762 run_name=f"Quick Benchmark - {data.get('run_name', '')}",
763 search_config=search_config,
764 evaluation_config=evaluation_config,
765 datasets_config=datasets_config,
766 username=username,
767 user_password=user_password,
768 )
770 success = benchmark_service.start_benchmark(
771 benchmark_run_id, username, user_password
772 )
774 if success:
775 return jsonify(
776 {
777 "success": True,
778 "benchmark_run_id": benchmark_run_id,
779 "message": "Benchmark started with current settings",
780 }
781 )
782 else:
783 return jsonify(
784 {"success": False, "error": "Failed to start benchmark"}
785 ), 500
787 except Exception:
788 logger.exception("Error starting simple benchmark")
789 return jsonify(
790 {"success": False, "error": "An internal error has occurred."}
791 ), 500
794@benchmark_bp.route("/api/validate-config", methods=["POST"])
795@login_required
796def validate_config():
797 """Validate a benchmark configuration."""
798 try:
799 data = request.get_json()
801 if not data: 801 ↛ 802line 801 didn't jump to line 802 because the condition on line 801 was never true
802 return jsonify({"valid": False, "errors": ["No data provided"]})
804 errors = []
806 # Validate search config
807 search_config = data.get("search_config", {})
808 if not search_config.get("search_tool"): 808 ↛ 810line 808 didn't jump to line 810 because the condition on line 808 was always true
809 errors.append("Search tool is required")
810 if not search_config.get("search_strategy"): 810 ↛ 814line 810 didn't jump to line 814 because the condition on line 810 was always true
811 errors.append("Search strategy is required")
813 # Validate datasets config
814 datasets_config = data.get("datasets_config", {})
815 if not datasets_config: 815 ↛ 818line 815 didn't jump to line 818 because the condition on line 815 was always true
816 errors.append("At least one dataset must be configured")
818 total_examples = sum(
819 config.get("count", 0) for config in datasets_config.values()
820 )
821 if total_examples == 0: 821 ↛ 824line 821 didn't jump to line 824 because the condition on line 821 was always true
822 errors.append("Total examples must be greater than 0")
824 if total_examples > 1000: 824 ↛ 825line 824 didn't jump to line 825 because the condition on line 824 was never true
825 errors.append(
826 "Total examples should not exceed 1000 for web interface"
827 )
829 return jsonify(
830 {
831 "valid": len(errors) == 0,
832 "errors": errors,
833 "total_examples": total_examples,
834 }
835 )
837 except Exception:
838 logger.exception("Error validating config")
839 return jsonify(
840 {"valid": False, "errors": ["An internal error has occurred."]}
841 ), 500
844@benchmark_bp.route("/api/search-quality", methods=["GET"])
845@limiter.exempt
846@login_required
847def get_search_quality():
848 """Get current search quality metrics from rate limiting tracker."""
849 try:
850 from ...web_search_engines.rate_limiting import get_tracker
852 tracker = get_tracker()
853 quality_stats = tracker.get_search_quality_stats()
855 return jsonify(
856 {
857 "success": True,
858 "search_quality": quality_stats,
859 "timestamp": time.time(),
860 }
861 )
863 except Exception:
864 logger.exception("Error getting search quality")
865 return jsonify(
866 {"success": False, "error": "An internal error has occurred."}
867 ), 500
870@benchmark_bp.route("/api/delete/<int:benchmark_run_id>", methods=["DELETE"])
871@login_required
872def delete_benchmark_run(benchmark_run_id: int):
873 """Delete a benchmark run and all its results."""
874 try:
875 from ...database.models.benchmark import (
876 BenchmarkProgress,
877 BenchmarkResult,
878 BenchmarkRun,
879 )
880 from ...database.session_context import get_user_db_session
881 from flask import session as flask_session
883 username = flask_session.get("username")
884 with get_user_db_session(username) as session:
885 # Check if benchmark run exists
886 benchmark_run = (
887 session.query(BenchmarkRun)
888 .filter(BenchmarkRun.id == benchmark_run_id)
889 .first()
890 )
892 if not benchmark_run:
893 return jsonify(
894 {"success": False, "error": "Benchmark run not found"}
895 ), 404
897 # Prevent deletion of running benchmarks
898 if benchmark_run.status.value == "in_progress":
899 return jsonify(
900 {
901 "success": False,
902 "error": "Cannot delete a running benchmark. Cancel it first.",
903 }
904 ), 400
906 # Delete related records (cascade should handle this, but being explicit)
907 session.query(BenchmarkResult).filter(
908 BenchmarkResult.benchmark_run_id == benchmark_run_id
909 ).delete()
911 session.query(BenchmarkProgress).filter(
912 BenchmarkProgress.benchmark_run_id == benchmark_run_id
913 ).delete()
915 # Delete the benchmark run
916 session.delete(benchmark_run)
917 session.commit()
919 logger.info(f"Deleted benchmark run {benchmark_run_id}")
920 return jsonify(
921 {
922 "success": True,
923 "message": f"Benchmark run {benchmark_run_id} deleted successfully",
924 }
925 )
927 except Exception:
928 logger.exception(f"Error deleting benchmark run {benchmark_run_id}")
929 return jsonify(
930 {"success": False, "error": "An internal error has occurred."}
931 ), 500