Coverage for src / local_deep_research / benchmarks / evaluators / base.py: 60%
15 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Base class for benchmark evaluators.
4This module defines the abstract base class that all benchmark evaluators
5must implement, establishing a common interface for different benchmark types.
6"""
8from abc import ABC, abstractmethod
9from pathlib import Path
10from typing import Any, Dict
13class BaseBenchmarkEvaluator(ABC):
14 """
15 Abstract base class for benchmark evaluators.
17 All benchmark evaluator implementations must inherit from this class and
18 implement the evaluate method to run their specific benchmark type.
19 """
21 def __init__(self, name: str):
22 """
23 Initialize benchmark evaluator with a name.
25 Args:
26 name: Unique identifier for this benchmark type
27 """
28 self.name = name
30 def get_name(self) -> str:
31 """
32 Get the benchmark name.
34 Returns:
35 The benchmark identifier
36 """
37 return self.name
39 @abstractmethod
40 def evaluate(
41 self,
42 system_config: Dict[str, Any],
43 num_examples: int,
44 output_dir: str,
45 ) -> Dict[str, Any]:
46 """
47 Run benchmark evaluation with given system configuration.
49 Args:
50 system_config: Configuration parameters for the system under test
51 num_examples: Number of benchmark examples to evaluate
52 output_dir: Directory to save evaluation results
54 Returns:
55 Dictionary with evaluation metrics including quality_score (0-1)
56 """
57 pass
59 def _create_subdirectory(self, output_dir: str) -> str:
60 """
61 Create a benchmark-specific subdirectory for output.
63 Args:
64 output_dir: Parent directory for output
66 Returns:
67 Path to the benchmark-specific directory
68 """
69 benchmark_dir = Path(output_dir) / self.name
70 benchmark_dir.mkdir(parents=True, exist_ok=True)
71 return str(benchmark_dir)