Coverage for src / local_deep_research / benchmarks / datasets / custom_dataset_template.py: 0%
25 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Custom dataset template.
4This module provides a template for creating custom benchmark datasets.
5Copy this file and modify it to create your own dataset class.
6"""
8from loguru import logger
9from typing import Any, Dict
11from .base import BenchmarkDataset
14class CustomDataset(BenchmarkDataset):
15 """Template for a custom benchmark dataset.
17 Copy this class and modify it to create your own dataset class.
18 Replace 'Custom' with your dataset name and update the implementation.
19 """
21 @classmethod
22 def get_dataset_info(cls) -> Dict[str, str]:
23 """Get basic information about the dataset."""
24 return {
25 "id": "custom", # Unique identifier for the dataset
26 "name": "Custom Dataset", # Human-readable name
27 "description": "Template for a custom benchmark dataset", # Description
28 "url": cls.get_default_dataset_path(), # Default URL or path
29 }
31 @classmethod
32 def get_default_dataset_path(cls) -> str:
33 """Get the default path or URL for the dataset."""
34 return "path/to/your/dataset.csv" # Replace with your dataset path
36 def process_example(self, example: Dict[str, Any]) -> Dict[str, Any]:
37 """Process a single example from the dataset.
39 This is where you can transform, decrypt, or otherwise process
40 the raw examples from your dataset.
42 Args:
43 example: Raw example from the dataset.
45 Returns:
46 Processed example ready for use.
47 """
48 # Make a copy to avoid modifying the original
49 processed = dict(example)
51 # TODO: Add your custom processing here
52 # For example:
53 # - Extract relevant fields
54 # - Transform data formats
55 # - Handle special cases
56 # - Apply data cleaning
58 # Ensure required fields are present
59 if "problem" not in processed:
60 logger.warning("Example missing 'problem' field")
61 processed["problem"] = ""
63 if "answer" not in processed:
64 logger.warning("Example missing 'answer' field")
65 processed["answer"] = ""
67 # Add correct_answer field if not present
68 if "correct_answer" not in processed:
69 processed["correct_answer"] = processed["answer"]
71 return processed
73 def get_question(self, example: Dict[str, Any]) -> str:
74 """Extract the question from an example.
76 Override this method if your dataset stores the question in a
77 different field than 'problem'.
78 """
79 # Example: return example.get("question", "")
80 return example.get("problem", "")
82 def get_answer(self, example: Dict[str, Any]) -> str:
83 """Extract the answer from an example.
85 Override this method if your dataset stores the answer in a
86 different field than 'answer' or 'correct_answer'.
87 """
88 # Try correct_answer first, then fall back to answer
89 return example.get("correct_answer", example.get("answer", ""))
92# To register your dataset, add this at the bottom of your file:
93# DatasetRegistry.register(CustomDataset)
94#
95# Then import your dataset in the __init__.py file:
96# from .custom_dataset import CustomDataset