Coverage for src / local_deep_research / benchmarks / comparison / results.py: 24%
28 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1import json
4class Benchmark_results:
5 def __init__(self, results_file=None):
6 # Allow results file to be set via argument or use default
7 self.results_file = results_file or "benchmark_results.json"
8 self.results = self._load_results()
10 def add_result(
11 self,
12 model,
13 hardware,
14 accuracy_focused,
15 accuracy_source,
16 avg_time_per_question,
17 context_window,
18 temperature,
19 ldr_version,
20 date_tested,
21 notes="",
22 ):
23 # add a new benchmark result, as noted by issue
24 result = {
25 "model": model,
26 "hardware": hardware,
27 "accuracy_focused": accuracy_focused,
28 "accuracy_source": accuracy_source,
29 "avg_time_per_question": avg_time_per_question,
30 "context_window": context_window,
31 "temperature": temperature,
32 "ldr_version": ldr_version,
33 "date_tested": date_tested,
34 "notes": notes,
35 }
37 self.results.append(result)
38 self._save_results()
39 return True
41 def get_all(self):
42 # Getting all benchmark results
43 return self.results
45 def get_best(self, sort_by="accuracy_focused"):
46 """get best performing models"""
47 # Validate that sort_by is a valid key in the result dictionaries
48 if self.results:
49 if sort_by not in self.results[0]:
50 raise ValueError(
51 f"Invalid sort_by key: '{sort_by}'. Valid keys are: {list(self.results[0].keys())}"
52 )
53 if sort_by == "avg_time_per_question":
54 return sorted(
55 self.results, key=lambda x: x["avg_time_per_question"]
56 )
57 else:
58 return sorted(self.results, key=lambda x: x[sort_by], reverse=True)
60 def _load_results(self):
61 # Load results from file
63 try:
64 with open(self.results_file, "r") as f:
65 return json.load(f)
66 except FileNotFoundError:
67 return []
69 def _save_results(self):
70 # Save results to file.
71 from ...security.file_write_verifier import write_json_verified
73 write_json_verified(
74 self.results_file,
75 self.results,
76 "benchmark.allow_file_output",
77 context="benchmark results",
78 )