Coverage for src / local_deep_research / benchmarks / comparison / results.py: 24%

28 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1import json 

2 

3 

4class Benchmark_results: 

5 def __init__(self, results_file=None): 

6 # Allow results file to be set via argument or use default 

7 self.results_file = results_file or "benchmark_results.json" 

8 self.results = self._load_results() 

9 

10 def add_result( 

11 self, 

12 model, 

13 hardware, 

14 accuracy_focused, 

15 accuracy_source, 

16 avg_time_per_question, 

17 context_window, 

18 temperature, 

19 ldr_version, 

20 date_tested, 

21 notes="", 

22 ): 

23 # add a new benchmark result, as noted by issue 

24 result = { 

25 "model": model, 

26 "hardware": hardware, 

27 "accuracy_focused": accuracy_focused, 

28 "accuracy_source": accuracy_source, 

29 "avg_time_per_question": avg_time_per_question, 

30 "context_window": context_window, 

31 "temperature": temperature, 

32 "ldr_version": ldr_version, 

33 "date_tested": date_tested, 

34 "notes": notes, 

35 } 

36 

37 self.results.append(result) 

38 self._save_results() 

39 return True 

40 

41 def get_all(self): 

42 # Getting all benchmark results 

43 return self.results 

44 

45 def get_best(self, sort_by="accuracy_focused"): 

46 """get best performing models""" 

47 # Validate that sort_by is a valid key in the result dictionaries 

48 if self.results: 

49 if sort_by not in self.results[0]: 

50 raise ValueError( 

51 f"Invalid sort_by key: '{sort_by}'. Valid keys are: {list(self.results[0].keys())}" 

52 ) 

53 if sort_by == "avg_time_per_question": 

54 return sorted( 

55 self.results, key=lambda x: x["avg_time_per_question"] 

56 ) 

57 else: 

58 return sorted(self.results, key=lambda x: x[sort_by], reverse=True) 

59 

60 def _load_results(self): 

61 # Load results from file 

62 

63 try: 

64 with open(self.results_file, "r") as f: 

65 return json.load(f) 

66 except FileNotFoundError: 

67 return [] 

68 

69 def _save_results(self): 

70 # Save results to file. 

71 from ...security.file_write_verifier import write_json_verified 

72 

73 write_json_verified( 

74 self.results_file, 

75 self.results, 

76 "benchmark.allow_file_output", 

77 context="benchmark results", 

78 )