Coverage for src/local_deep_research/benchmarks/evaluators/base.py: 93%

1"""

2Base class for benchmark evaluators.

4This module defines the abstract base class that all benchmark evaluators

5must implement, establishing a common interface for different benchmark types.

6"""

8from abc import ABC, abstractmethod

9from pathlib import Path

10from typing import Any, Dict

13class BaseBenchmarkEvaluator(ABC):

14 """

15 Abstract base class for benchmark evaluators.

17 All benchmark evaluator implementations must inherit from this class and

18 implement the evaluate method to run their specific benchmark type.

19 """

21 def __init__(self, name: str):

22 """

23 Initialize benchmark evaluator with a name.

25 Args:

26 name: Unique identifier for this benchmark type

27 """

28 self.name = name

30 def get_name(self) -> str:

31 """

32 Get the benchmark name.

34 Returns:

35 The benchmark identifier

36 """

37 return self.name

39 @abstractmethod

40 def evaluate(

41 self,

42 system_config: Dict[str, Any],

43 num_examples: int,

44 output_dir: str,

45 ) -> Dict[str, Any]:

46 """

47 Run benchmark evaluation with given system configuration.

49 Args:

50 system_config: Configuration parameters for the system under test

51 num_examples: Number of benchmark examples to evaluate

52 output_dir: Directory to save evaluation results

54 Returns:

55 Dictionary with evaluation metrics including quality_score (0-1)

56 """

57 pass

59 def _create_subdirectory(self, output_dir: str) -> str:

60 """

61 Create a benchmark-specific subdirectory for output.

63 Args:

64 output_dir: Parent directory for output

66 Returns:

67 Path to the benchmark-specific directory

68 """

69 benchmark_dir = Path(output_dir) / self.name

70 benchmark_dir.mkdir(parents=True, exist_ok=True)

71 return str(benchmark_dir)

Coverage for src / local_deep_research / benchmarks / evaluators / base.py: 93%

15 statements