Coverage for src / local_deep_research / benchmarks / comparison / evaluator.py: 5%
301 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Configuration comparison for Local Deep Research.
4This module provides functions for comparing different parameter configurations
5and evaluating their performance across various metrics.
6"""
8import os
9from datetime import datetime, UTC
10from pathlib import Path
11from typing import Any, Dict, List, Optional
13import matplotlib.pyplot as plt
14import numpy as np
15from loguru import logger
16from matplotlib.patches import Circle, RegularPolygon
18from local_deep_research.benchmarks.efficiency.resource_monitor import (
19 ResourceMonitor,
20)
21from local_deep_research.benchmarks.efficiency.speed_profiler import (
22 SpeedProfiler,
23)
24from local_deep_research.benchmarks.optimization.metrics import (
25 calculate_combined_score,
26 calculate_quality_metrics,
27 calculate_resource_metrics,
28 calculate_speed_metrics,
29)
30from local_deep_research.config.llm_config import get_llm
31from local_deep_research.config.search_config import get_search
32from local_deep_research.search_system import AdvancedSearchSystem
35def compare_configurations(
36 query: str,
37 configurations: List[Dict[str, Any]],
38 output_dir: str = "comparison_results",
39 model_name: Optional[str] = None,
40 provider: Optional[str] = None,
41 search_tool: Optional[str] = None,
42 repetitions: int = 1,
43 metric_weights: Optional[Dict[str, float]] = None,
44) -> Dict[str, Any]:
45 """
46 Compare multiple parameter configurations.
48 Args:
49 query: Research query to use for evaluation
50 configurations: List of parameter configurations to compare
51 output_dir: Directory to save comparison results
52 model_name: Name of the LLM model to use
53 provider: LLM provider
54 search_tool: Search engine to use
55 repetitions: Number of repetitions for each configuration
56 metric_weights: Dictionary of weights for each metric type
58 Returns:
59 Dictionary with comparison results
60 """
61 os.makedirs(output_dir, exist_ok=True)
63 # Default metric weights if not provided
64 if metric_weights is None:
65 metric_weights = {
66 "quality": 0.6,
67 "speed": 0.4,
68 "resource": 0.0, # Disabled by default
69 }
71 # Verify valid configurations
72 if not configurations:
73 logger.error("No configurations provided for comparison")
74 return {"error": "No configurations provided"}
76 # Results storage
77 results = []
79 # Process each configuration
80 for i, config in enumerate(configurations):
81 logger.info(
82 f"Evaluating configuration {i + 1}/{len(configurations)}: {config}"
83 )
85 # Name for this configuration
86 config_name = config.get("name", f"Configuration {i + 1}")
88 # Results for all repetitions of this configuration
89 config_results = []
91 # Run multiple repetitions
92 for rep in range(repetitions):
93 logger.info(
94 f"Starting repetition {rep + 1}/{repetitions} for {config_name}"
95 )
97 try:
98 # Run the configuration
99 result = _evaluate_single_configuration(
100 query=query,
101 config=config,
102 model_name=model_name,
103 provider=provider,
104 search_tool=search_tool,
105 )
107 config_results.append(result)
108 logger.info(f"Completed repetition {rep + 1} for {config_name}")
110 except Exception as e:
111 logger.exception(
112 f"Error in {config_name}, repetition {rep + 1}: {e!s}"
113 )
114 # Add error info but continue with other configurations
115 config_results.append({"error": str(e), "success": False})
117 # Calculate aggregate metrics across repetitions
118 if config_results:
119 # Filter out failed runs
120 successful_runs = [
121 r for r in config_results if r.get("success", False)
122 ]
124 if successful_runs:
125 # Calculate average metrics
126 avg_metrics = _calculate_average_metrics(successful_runs)
128 # Calculate overall score
129 overall_score = calculate_combined_score(
130 quality_metrics=avg_metrics.get("quality_metrics", {}),
131 speed_metrics=avg_metrics.get("speed_metrics", {}),
132 resource_metrics=avg_metrics.get("resource_metrics", {}),
133 weights=metric_weights,
134 )
136 result_summary = {
137 "name": config_name,
138 "configuration": config,
139 "success": True,
140 "runs_completed": len(successful_runs),
141 "runs_failed": len(config_results) - len(successful_runs),
142 "avg_metrics": avg_metrics,
143 "overall_score": overall_score,
144 "individual_results": config_results,
145 }
146 else:
147 # All runs failed
148 result_summary = {
149 "name": config_name,
150 "configuration": config,
151 "success": False,
152 "runs_completed": 0,
153 "runs_failed": len(config_results),
154 "error": "All runs failed",
155 "individual_results": config_results,
156 }
158 results.append(result_summary)
160 # Sort results by overall score (if available)
161 sorted_results = sorted(
162 [r for r in results if r.get("success", False)],
163 key=lambda x: x.get("overall_score", 0),
164 reverse=True,
165 )
167 # Add failed configurations at the end
168 sorted_results.extend([r for r in results if not r.get("success", False)])
170 # Create comparison report
171 comparison_report = {
172 "query": query,
173 "configurations_tested": len(configurations),
174 "successful_configurations": len(
175 [r for r in results if r.get("success", False)]
176 ),
177 "failed_configurations": len(
178 [r for r in results if not r.get("success", False)]
179 ),
180 "repetitions": repetitions,
181 "metric_weights": metric_weights,
182 "timestamp": datetime.now(UTC).isoformat(),
183 "results": sorted_results,
184 }
186 # Save results to file
187 from ...security.file_write_verifier import write_json_verified
189 timestamp = datetime.now(UTC).strftime("%Y%m%d_%H%M%S")
190 result_file = str(Path(output_dir) / f"comparison_results_{timestamp}.json")
192 write_json_verified(
193 result_file,
194 comparison_report,
195 "benchmark.allow_file_output",
196 context="comparison results",
197 )
199 # Generate visualizations
200 visualizations_dir = Path(output_dir) / "visualizations"
201 visualizations_dir.mkdir(parents=True, exist_ok=True)
202 visualizations_dir = str(visualizations_dir)
204 _create_comparison_visualizations(
205 comparison_report, output_dir=visualizations_dir, timestamp=timestamp
206 )
208 logger.info(f"Comparison completed. Results saved to {result_file}")
210 # Add report path to the result
211 comparison_report["report_path"] = result_file
213 return comparison_report
216def _evaluate_single_configuration(
217 query: str,
218 config: Dict[str, Any],
219 model_name: Optional[str] = None,
220 provider: Optional[str] = None,
221 search_tool: Optional[str] = None,
222) -> Dict[str, Any]:
223 """
224 Evaluate a single configuration.
226 Args:
227 query: Research query to evaluate
228 config: Configuration parameters
229 model_name: Name of the LLM model to use
230 provider: LLM provider
231 search_tool: Search engine to use
233 Returns:
234 Dictionary with evaluation results
235 """
236 # Extract configuration parameters
237 config_model_name = config.get("model_name", model_name)
238 config_provider = config.get("provider", provider)
239 config_search_tool = config.get("search_tool", search_tool)
240 config_iterations = config.get("iterations", 2)
241 config_questions_per_iteration = config.get("questions_per_iteration", 2)
242 config_search_strategy = config.get("search_strategy", "iterdrag")
243 config_max_results = config.get("max_results", 50)
244 config_max_filtered_results = config.get("max_filtered_results", 20)
246 # Initialize profiling tools
247 speed_profiler = SpeedProfiler()
248 resource_monitor = ResourceMonitor(sampling_interval=0.5)
250 # Start profiling
251 speed_profiler.start()
252 resource_monitor.start()
254 try:
255 # Get LLM
256 with speed_profiler.timer("llm_initialization"):
257 llm = get_llm(
258 temperature=config.get("temperature", 0.7),
259 model_name=config_model_name,
260 provider=config_provider,
261 )
263 # Set up search engine if specified
264 with speed_profiler.timer("search_initialization"):
265 search = None
266 if config_search_tool:
267 search = get_search(
268 config_search_tool,
269 llm_instance=llm,
270 max_results=config_max_results,
271 max_filtered_results=config_max_filtered_results,
272 )
274 # Create search system
275 system = AdvancedSearchSystem(llm=llm, search=search)
276 system.max_iterations = config_iterations
277 system.questions_per_iteration = config_questions_per_iteration
278 system.strategy_name = config_search_strategy
280 # Run the analysis
281 with speed_profiler.timer("analysis"):
282 results = system.analyze_topic(query)
284 # Stop profiling
285 speed_profiler.stop()
286 resource_monitor.stop()
288 # Calculate metrics
289 quality_metrics = calculate_quality_metrics(
290 results=results,
291 system_info={
292 "all_links_of_system": getattr(
293 system, "all_links_of_system", []
294 )
295 },
296 )
298 speed_metrics = calculate_speed_metrics(
299 timing_info=speed_profiler.get_summary(),
300 system_info={
301 "iterations": config_iterations,
302 "questions_per_iteration": config_questions_per_iteration,
303 "results": results,
304 },
305 )
307 resource_metrics = calculate_resource_metrics(
308 resource_info=resource_monitor.get_combined_stats(),
309 system_info={
310 "iterations": config_iterations,
311 "questions_per_iteration": config_questions_per_iteration,
312 "results": results,
313 },
314 )
316 # Return comprehensive results
317 return {
318 "query": query,
319 "config": config,
320 "success": True,
321 "findings_count": len(results.get("findings", [])),
322 "knowledge_length": len(results.get("current_knowledge", "")),
323 "quality_metrics": quality_metrics,
324 "speed_metrics": speed_metrics,
325 "resource_metrics": resource_metrics,
326 "timing_details": speed_profiler.get_timings(),
327 "resource_details": resource_monitor.get_combined_stats(),
328 }
330 except Exception as e:
331 # Stop profiling on error
332 speed_profiler.stop()
333 resource_monitor.stop()
335 # Log the error
336 logger.exception("Error evaluating configuration")
338 # Return error information
339 return {
340 "query": query,
341 "config": config,
342 "success": False,
343 "error": str(e),
344 "timing_details": speed_profiler.get_timings(),
345 "resource_details": resource_monitor.get_combined_stats(),
346 }
349def _calculate_average_metrics(results: List[Dict[str, Any]]) -> Dict[str, Any]:
350 """
351 Calculate average metrics across multiple runs.
353 Args:
354 results: List of individual run results
356 Returns:
357 Dictionary with averaged metrics
358 """
359 # Check if there are any successful results
360 if not results:
361 return {}
363 # Initialize average metrics
364 avg_metrics = {
365 "quality_metrics": {},
366 "speed_metrics": {},
367 "resource_metrics": {},
368 }
370 # Quality metrics
371 quality_keys = set()
372 for result in results:
373 quality_metrics = result.get("quality_metrics", {})
374 quality_keys.update(quality_metrics.keys())
376 for key in quality_keys:
377 values = [r.get("quality_metrics", {}).get(key) for r in results]
378 values = [v for v in values if v is not None]
379 if values:
380 avg_metrics["quality_metrics"][key] = sum(values) / len(values)
382 # Speed metrics
383 speed_keys = set()
384 for result in results:
385 speed_metrics = result.get("speed_metrics", {})
386 speed_keys.update(speed_metrics.keys())
388 for key in speed_keys:
389 values = [r.get("speed_metrics", {}).get(key) for r in results]
390 values = [v for v in values if v is not None]
391 if values:
392 avg_metrics["speed_metrics"][key] = sum(values) / len(values)
394 # Resource metrics
395 resource_keys = set()
396 for result in results:
397 resource_metrics = result.get("resource_metrics", {})
398 resource_keys.update(resource_metrics.keys())
400 for key in resource_keys:
401 values = [r.get("resource_metrics", {}).get(key) for r in results]
402 values = [v for v in values if v is not None]
403 if values:
404 avg_metrics["resource_metrics"][key] = sum(values) / len(values)
406 return avg_metrics
409def _create_comparison_visualizations(
410 comparison_report: Dict[str, Any], output_dir: str, timestamp: str
411):
412 """
413 Create visualizations for the comparison results.
415 Args:
416 comparison_report: Comparison report dictionary
417 output_dir: Directory to save visualizations
418 timestamp: Timestamp string for filenames
419 """
420 # Check if there are successful results
421 successful_results = [
422 r
423 for r in comparison_report.get("results", [])
424 if r.get("success", False)
425 ]
427 if not successful_results:
428 logger.warning("No successful configurations to visualize")
429 return
431 # Extract configuration names
432 config_names = [
433 r.get("name", f"Config {i + 1}")
434 for i, r in enumerate(successful_results)
435 ]
437 # 1. Overall score comparison
438 plt.figure(figsize=(12, 6))
439 scores = [r.get("overall_score", 0) for r in successful_results]
441 # Create horizontal bar chart
442 plt.barh(config_names, scores, color="skyblue")
443 plt.xlabel("Overall Score")
444 plt.ylabel("Configuration")
445 plt.title("Configuration Performance Comparison")
446 plt.grid(axis="x", linestyle="--", alpha=0.7)
447 plt.tight_layout()
448 plt.savefig(
449 str(Path(output_dir) / f"overall_score_comparison_{timestamp}.png")
450 )
451 plt.close()
453 # 2. Quality metrics comparison
454 quality_metrics = ["overall_quality", "source_count", "lexical_diversity"]
455 _create_metric_comparison_chart(
456 successful_results,
457 config_names,
458 quality_metrics,
459 "quality_metrics",
460 "Quality Metrics Comparison",
461 str(Path(output_dir) / f"quality_metrics_comparison_{timestamp}.png"),
462 )
464 # 3. Speed metrics comparison
465 speed_metrics = ["overall_speed", "total_duration", "duration_per_question"]
466 _create_metric_comparison_chart(
467 successful_results,
468 config_names,
469 speed_metrics,
470 "speed_metrics",
471 "Speed Metrics Comparison",
472 str(Path(output_dir) / f"speed_metrics_comparison_{timestamp}.png"),
473 )
475 # 4. Resource metrics comparison
476 resource_metrics = [
477 "overall_resource",
478 "process_memory_max_mb",
479 "system_cpu_avg",
480 ]
481 _create_metric_comparison_chart(
482 successful_results,
483 config_names,
484 resource_metrics,
485 "resource_metrics",
486 "Resource Usage Comparison",
487 str(Path(output_dir) / f"resource_metrics_comparison_{timestamp}.png"),
488 )
490 # 5. Spider chart for multi-dimensional comparison
491 _create_spider_chart(
492 successful_results,
493 config_names,
494 str(Path(output_dir) / f"spider_chart_comparison_{timestamp}.png"),
495 )
497 # 6. Pareto frontier chart for quality vs. speed
498 _create_pareto_chart(
499 successful_results,
500 str(Path(output_dir) / f"pareto_chart_comparison_{timestamp}.png"),
501 )
504def _create_metric_comparison_chart(
505 results: List[Dict[str, Any]],
506 config_names: List[str],
507 metric_keys: List[str],
508 metric_category: str,
509 title: str,
510 output_path: str,
511):
512 """
513 Create a chart comparing specific metrics across configurations.
515 Args:
516 results: List of configuration results
517 config_names: Names of configurations
518 metric_keys: Keys of metrics to compare
519 metric_category: Category of metrics (quality_metrics, speed_metrics, etc.)
520 title: Chart title
521 output_path: Path to save the chart
522 """
523 # Create figure with multiple subplots (one per metric)
524 fig, axes = plt.subplots(
525 len(metric_keys), 1, figsize=(12, 5 * len(metric_keys))
526 )
528 # Handle case with only one metric
529 if len(metric_keys) == 1:
530 axes = [axes]
532 for i, metric_key in enumerate(metric_keys):
533 ax = axes[i]
535 # Get metric values
536 metric_values = []
537 for result in results:
538 metrics = result.get("avg_metrics", {}).get(metric_category, {})
539 value = metrics.get(metric_key)
541 # Handle time values for better visualization
542 if "duration" in metric_key and value is not None:
543 # Convert to seconds if > 60 seconds, minutes if > 60 minutes
544 if value > 3600:
545 value = value / 3600 # Convert to hours
546 metric_key += " (hours)"
547 elif value > 60:
548 value = value / 60 # Convert to minutes
549 metric_key += " (minutes)"
550 else:
551 metric_key += " (seconds)"
553 metric_values.append(value if value is not None else 0)
555 # Create horizontal bar chart
556 bars = ax.barh(config_names, metric_values, color="lightblue")
557 ax.set_xlabel(metric_key.replace("_", " ").title())
558 ax.set_title(f"{metric_key.replace('_', ' ').title()}")
559 ax.grid(axis="x", linestyle="--", alpha=0.7)
561 # Add value labels to bars
562 for bar in bars:
563 width = bar.get_width()
564 label_x_pos = width * 1.01
565 ax.text(
566 label_x_pos,
567 bar.get_y() + bar.get_height() / 2,
568 f"{width:.2f}",
569 va="center",
570 )
572 plt.suptitle(title, fontsize=16)
573 plt.tight_layout()
574 plt.savefig(output_path)
575 plt.close()
578def _create_spider_chart(
579 results: List[Dict[str, Any]], config_names: List[str], output_path: str
580):
581 """
582 Create a spider chart comparing metrics across configurations.
584 Args:
585 results: List of configuration results
586 config_names: Names of configurations
587 output_path: Path to save the chart
588 """
589 # Try to import the radar chart module
590 try:
591 from matplotlib.path import Path
592 from matplotlib.projections import register_projection
593 from matplotlib.projections.polar import PolarAxes
594 from matplotlib.spines import Spine
596 def radar_factory(num_vars, frame="circle"):
597 """Create a radar chart with `num_vars` axes."""
598 # Calculate evenly-spaced axis angles
599 theta = np.linspace(0, 2 * np.pi, num_vars, endpoint=False)
601 class RadarAxes(PolarAxes):
602 name = "radar"
604 def __init__(self, *args, **kwargs):
605 super().__init__(*args, **kwargs)
606 self.set_theta_zero_location("N")
608 def fill(self, *args, closed=True, **kwargs):
609 return super().fill(closed=closed, *args, **kwargs)
611 def plot(self, *args, **kwargs):
612 return super().plot(*args, **kwargs)
614 def set_varlabels(self, labels):
615 self.set_thetagrids(np.degrees(theta), labels)
617 def _gen_axes_patch(self):
618 if frame == "circle":
619 return Circle((0.5, 0.5), 0.5)
620 elif frame == "polygon":
621 return RegularPolygon(
622 (0.5, 0.5), num_vars, radius=0.5, edgecolor="k"
623 )
624 else:
625 raise ValueError(
626 "Unknown value for 'frame': %s" % frame
627 )
629 def _gen_axes_spines(self):
630 if frame == "circle":
631 return super()._gen_axes_spines()
632 elif frame == "polygon":
633 spine_type = Spine.circular_spine
634 verts = unit_poly_verts(num_vars)
635 vertices = [(0.5, 0.5)] + verts
636 codes = (
637 [Path.MOVETO]
638 + [Path.LINETO] * num_vars
639 + [Path.CLOSEPOLY]
640 )
641 path = Path(vertices, codes)
642 spine = Spine(self, spine_type, path)
643 spine.set_transform(self.transAxes)
644 return {"polar": spine}
645 else:
646 raise ValueError(
647 "Unknown value for 'frame': %s" % frame
648 )
650 def unit_poly_verts(num_vars):
651 """Return vertices of polygon for radar chart."""
652 verts = []
653 for i in range(num_vars):
654 angle = theta[i]
655 verts.append(
656 (0.5 * (1 + np.cos(angle)), 0.5 * (1 + np.sin(angle)))
657 )
658 return verts
660 register_projection(RadarAxes)
661 return theta
663 # Select metrics for the spider chart
664 metrics = [
665 {"name": "Quality", "key": "quality_metrics.overall_quality"},
666 {"name": "Speed", "key": "speed_metrics.overall_speed"},
667 {
668 "name": "Sources",
669 "key": "quality_metrics.normalized_source_count",
670 },
671 {
672 "name": "Content",
673 "key": "quality_metrics.normalized_knowledge_length",
674 },
675 {
676 "name": "Memory",
677 "key": "resource_metrics.normalized_memory_usage",
678 "invert": True,
679 },
680 ]
682 # Extract metric values
683 spoke_labels = [m["name"] for m in metrics]
684 num_vars = len(spoke_labels)
685 theta = radar_factory(num_vars)
687 fig, ax = plt.subplots(
688 figsize=(10, 10), subplot_kw=dict(projection="radar")
689 )
691 # Color map for different configurations
692 colors = plt.cm.viridis(np.linspace(0, 1, len(results)))
694 for i, result in enumerate(results):
695 values = []
696 for metric in metrics:
697 # Extract metric value using the key path (e.g., "quality_metrics.overall_quality")
698 key_parts = metric["key"].split(".")
699 value = result.get("avg_metrics", {})
700 for part in key_parts:
701 value = value.get(part, 0) if isinstance(value, dict) else 0
703 # Invert if needed (for metrics where lower is better)
704 if metric.get("invert", False):
705 value = 1.0 - value
707 values.append(value)
709 # Plot this configuration
710 ax.plot(
711 theta,
712 values,
713 color=colors[i],
714 linewidth=2,
715 label=config_names[i],
716 )
717 ax.fill(theta, values, color=colors[i], alpha=0.25)
719 # Set chart properties
720 ax.set_varlabels(spoke_labels)
721 plt.legend(loc="best", bbox_to_anchor=(0.5, 0.1))
722 plt.title("Multi-Dimensional Configuration Comparison", size=16, y=1.05)
723 plt.tight_layout()
725 # Save chart
726 plt.savefig(output_path)
727 plt.close()
729 except Exception as e:
730 logger.exception("Error creating spider chart")
731 # Create a text-based chart as fallback
732 plt.figure(figsize=(10, 6))
733 plt.text(
734 0.5,
735 0.5,
736 f"Spider chart could not be created: {e!s}",
737 horizontalalignment="center",
738 verticalalignment="center",
739 )
740 plt.axis("off")
741 plt.savefig(output_path)
742 plt.close()
745def _create_pareto_chart(results: List[Dict[str, Any]], output_path: str):
746 """
747 Create a Pareto frontier chart showing quality vs. speed tradeoff.
749 Args:
750 results: List of configuration results
751 output_path: Path to save the chart
752 """
753 # Extract quality and speed metrics
754 quality_scores = []
755 speed_scores = []
756 names = []
758 for result in results:
759 metrics = result.get("avg_metrics", {})
760 quality = metrics.get("quality_metrics", {}).get("overall_quality", 0)
762 # For speed, we use inverse of duration (so higher is better)
763 duration = metrics.get("speed_metrics", {}).get("total_duration", 1)
764 speed = 1.0 / max(duration, 0.001) # Avoid division by zero
766 quality_scores.append(quality)
767 speed_scores.append(speed)
768 names.append(result.get("name", "Configuration"))
770 # Create scatter plot
771 plt.figure(figsize=(10, 8))
772 plt.scatter(quality_scores, speed_scores, s=100, alpha=0.7)
774 # Add labels for each point
775 for i, name in enumerate(names):
776 plt.annotate(
777 name,
778 (quality_scores[i], speed_scores[i]),
779 xytext=(5, 5),
780 textcoords="offset points",
781 )
783 # Identify Pareto frontier
784 pareto_points = []
785 for i, (q, s) in enumerate(zip(quality_scores, speed_scores, strict=False)):
786 is_pareto = True
787 for q2, s2 in zip(quality_scores, speed_scores, strict=False):
788 if q2 > q and s2 > s: # Dominated
789 is_pareto = False
790 break
791 if is_pareto:
792 pareto_points.append(i)
794 # Highlight Pareto frontier
795 pareto_quality = [quality_scores[i] for i in pareto_points]
796 pareto_speed = [speed_scores[i] for i in pareto_points]
798 # Sort pareto points for line drawing
799 pareto_sorted = sorted(
800 zip(pareto_quality, pareto_speed, pareto_points, strict=False)
801 )
802 pareto_quality = [p[0] for p in pareto_sorted]
803 pareto_speed = [p[1] for p in pareto_sorted]
804 pareto_indices = [p[2] for p in pareto_sorted]
806 # Draw Pareto frontier line
807 plt.plot(pareto_quality, pareto_speed, "r--", linewidth=2)
809 # Highlight Pareto optimal points
810 plt.scatter(
811 [quality_scores[i] for i in pareto_indices],
812 [speed_scores[i] for i in pareto_indices],
813 s=150,
814 facecolors="none",
815 edgecolors="r",
816 linewidth=2,
817 )
819 # Add labels for Pareto optimal configurations
820 for i in pareto_indices:
821 plt.annotate(
822 names[i],
823 (quality_scores[i], speed_scores[i]),
824 xytext=(8, 8),
825 textcoords="offset points",
826 bbox=dict(boxstyle="round,pad=0.5", fc="yellow", alpha=0.7),
827 )
829 # Set chart properties
830 plt.xlabel("Quality Score (higher is better)")
831 plt.ylabel("Speed Score (higher is better)")
832 plt.title("Quality vs. Speed Tradeoff (Pareto Frontier)", size=14)
833 plt.grid(True, linestyle="--", alpha=0.7)
835 # Add explanation
836 plt.figtext(
837 0.5,
838 0.01,
839 "Points on the red line are Pareto optimal configurations\n"
840 "(no other configuration is better in both quality and speed)",
841 ha="center",
842 fontsize=10,
843 bbox=dict(boxstyle="round", fc="white", alpha=0.7),
844 )
846 plt.tight_layout()
847 plt.savefig(output_path)
848 plt.close()