Coverage for src / local_deep_research / benchmarks / datasets / custom_dataset_template.py: 0%

25 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Custom dataset template. 

3 

4This module provides a template for creating custom benchmark datasets. 

5Copy this file and modify it to create your own dataset class. 

6""" 

7 

8from loguru import logger 

9from typing import Any, Dict 

10 

11from .base import BenchmarkDataset 

12 

13 

14class CustomDataset(BenchmarkDataset): 

15 """Template for a custom benchmark dataset. 

16 

17 Copy this class and modify it to create your own dataset class. 

18 Replace 'Custom' with your dataset name and update the implementation. 

19 """ 

20 

21 @classmethod 

22 def get_dataset_info(cls) -> Dict[str, str]: 

23 """Get basic information about the dataset.""" 

24 return { 

25 "id": "custom", # Unique identifier for the dataset 

26 "name": "Custom Dataset", # Human-readable name 

27 "description": "Template for a custom benchmark dataset", # Description 

28 "url": cls.get_default_dataset_path(), # Default URL or path 

29 } 

30 

31 @classmethod 

32 def get_default_dataset_path(cls) -> str: 

33 """Get the default path or URL for the dataset.""" 

34 return "path/to/your/dataset.csv" # Replace with your dataset path 

35 

36 def process_example(self, example: Dict[str, Any]) -> Dict[str, Any]: 

37 """Process a single example from the dataset. 

38 

39 This is where you can transform, decrypt, or otherwise process 

40 the raw examples from your dataset. 

41 

42 Args: 

43 example: Raw example from the dataset. 

44 

45 Returns: 

46 Processed example ready for use. 

47 """ 

48 # Make a copy to avoid modifying the original 

49 processed = dict(example) 

50 

51 # TODO: Add your custom processing here 

52 # For example: 

53 # - Extract relevant fields 

54 # - Transform data formats 

55 # - Handle special cases 

56 # - Apply data cleaning 

57 

58 # Ensure required fields are present 

59 if "problem" not in processed: 

60 logger.warning("Example missing 'problem' field") 

61 processed["problem"] = "" 

62 

63 if "answer" not in processed: 

64 logger.warning("Example missing 'answer' field") 

65 processed["answer"] = "" 

66 

67 # Add correct_answer field if not present 

68 if "correct_answer" not in processed: 

69 processed["correct_answer"] = processed["answer"] 

70 

71 return processed 

72 

73 def get_question(self, example: Dict[str, Any]) -> str: 

74 """Extract the question from an example. 

75 

76 Override this method if your dataset stores the question in a 

77 different field than 'problem'. 

78 """ 

79 # Example: return example.get("question", "") 

80 return example.get("problem", "") 

81 

82 def get_answer(self, example: Dict[str, Any]) -> str: 

83 """Extract the answer from an example. 

84 

85 Override this method if your dataset stores the answer in a 

86 different field than 'answer' or 'correct_answer'. 

87 """ 

88 # Try correct_answer first, then fall back to answer 

89 return example.get("correct_answer", example.get("answer", "")) 

90 

91 

92# To register your dataset, add this at the bottom of your file: 

93# DatasetRegistry.register(CustomDataset) 

94# 

95# Then import your dataset in the __init__.py file: 

96# from .custom_dataset import CustomDataset