Coverage for src / local_deep_research / benchmarks / datasets / simpleqa.py: 42%
25 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2SimpleQA dataset implementation.
4This module provides a class for the SimpleQA benchmark dataset.
5"""
7from typing import Any, Dict
9from loguru import logger
11from .base import BenchmarkDataset
14class SimpleQADataset(BenchmarkDataset):
15 """SimpleQA benchmark dataset.
17 This class handles loading and processing the SimpleQA dataset, which
18 contains straightforward question-answering pairs.
19 """
21 @classmethod
22 def get_dataset_info(cls) -> Dict[str, str]:
23 """Get basic information about the dataset."""
24 return {
25 "id": "simpleqa",
26 "name": "SimpleQA",
27 "description": "Simple question-answering evaluation dataset",
28 "url": cls.get_default_dataset_path(),
29 }
31 @classmethod
32 def get_default_dataset_path(cls) -> str:
33 """Get the default URL for the dataset."""
34 return "https://openaipublic.blob.core.windows.net/simple-evals/simple_qa_test_set.csv"
36 def process_example(self, example: Dict[str, Any]) -> Dict[str, Any]:
37 """Process a single example from the dataset.
39 SimpleQA examples are already in plaintext format, so this just
40 ensures that the necessary fields are present.
42 Args:
43 example: Raw example from the dataset.
45 Returns:
46 Processed example ready for use.
47 """
48 # Make a copy to avoid modifying the original
49 processed = dict(example)
51 # Ensure problem field exists
52 if "problem" not in processed:
53 logger.warning("SimpleQA example missing 'problem' field")
54 processed["problem"] = ""
56 # Ensure answer field exists
57 if "answer" not in processed:
58 logger.warning("SimpleQA example missing 'answer' field")
59 processed["answer"] = ""
61 # Add correct_answer field if not present
62 if "correct_answer" not in processed:
63 processed["correct_answer"] = processed["answer"]
65 return processed
67 def get_question(self, example: Dict[str, Any]) -> str:
68 """Extract the question from an example."""
69 return example.get("problem", "")
71 def get_answer(self, example: Dict[str, Any]) -> str:
72 """Extract the answer from an example."""
73 return example.get("answer", "")