Coverage for src / local_deep_research / benchmarks / datasets / simpleqa.py: 42%

25 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2SimpleQA dataset implementation. 

3 

4This module provides a class for the SimpleQA benchmark dataset. 

5""" 

6 

7from typing import Any, Dict 

8 

9from loguru import logger 

10 

11from .base import BenchmarkDataset 

12 

13 

14class SimpleQADataset(BenchmarkDataset): 

15 """SimpleQA benchmark dataset. 

16 

17 This class handles loading and processing the SimpleQA dataset, which 

18 contains straightforward question-answering pairs. 

19 """ 

20 

21 @classmethod 

22 def get_dataset_info(cls) -> Dict[str, str]: 

23 """Get basic information about the dataset.""" 

24 return { 

25 "id": "simpleqa", 

26 "name": "SimpleQA", 

27 "description": "Simple question-answering evaluation dataset", 

28 "url": cls.get_default_dataset_path(), 

29 } 

30 

31 @classmethod 

32 def get_default_dataset_path(cls) -> str: 

33 """Get the default URL for the dataset.""" 

34 return "https://openaipublic.blob.core.windows.net/simple-evals/simple_qa_test_set.csv" 

35 

36 def process_example(self, example: Dict[str, Any]) -> Dict[str, Any]: 

37 """Process a single example from the dataset. 

38 

39 SimpleQA examples are already in plaintext format, so this just 

40 ensures that the necessary fields are present. 

41 

42 Args: 

43 example: Raw example from the dataset. 

44 

45 Returns: 

46 Processed example ready for use. 

47 """ 

48 # Make a copy to avoid modifying the original 

49 processed = dict(example) 

50 

51 # Ensure problem field exists 

52 if "problem" not in processed: 

53 logger.warning("SimpleQA example missing 'problem' field") 

54 processed["problem"] = "" 

55 

56 # Ensure answer field exists 

57 if "answer" not in processed: 

58 logger.warning("SimpleQA example missing 'answer' field") 

59 processed["answer"] = "" 

60 

61 # Add correct_answer field if not present 

62 if "correct_answer" not in processed: 

63 processed["correct_answer"] = processed["answer"] 

64 

65 return processed 

66 

67 def get_question(self, example: Dict[str, Any]) -> str: 

68 """Extract the question from an example.""" 

69 return example.get("problem", "") 

70 

71 def get_answer(self, example: Dict[str, Any]) -> str: 

72 """Extract the answer from an example.""" 

73 return example.get("answer", "")