Coverage for src/local_deep_research/benchmarks/datasets/custom_dataset

1"""

2Custom dataset template.

4This module provides a template for creating custom benchmark datasets.

5Copy this file and modify it to create your own dataset class.

6"""

8from loguru import logger

9from typing import Any, Dict

11from .base import BenchmarkDataset

14class CustomDataset(BenchmarkDataset):

15 """Template for a custom benchmark dataset.

17 Copy this class and modify it to create your own dataset class.

18 Replace 'Custom' with your dataset name and update the implementation.

19 """

21 @classmethod

22 def get_dataset_info(cls) -> Dict[str, str]:

23 """Get basic information about the dataset."""

24 return {

25 "id": "custom", # Unique identifier for the dataset

26 "name": "Custom Dataset", # Human-readable name

27 "description": "Template for a custom benchmark dataset", # Description

28 "url": cls.get_default_dataset_path(), # Default URL or path

29 }

31 @classmethod

32 def get_default_dataset_path(cls) -> str:

33 """Get the default path or URL for the dataset."""

34 return "path/to/your/dataset.csv" # Replace with your dataset path

36 def process_example(self, example: Dict[str, Any]) -> Dict[str, Any]:

37 """Process a single example from the dataset.

39 This is where you can transform, decrypt, or otherwise process

40 the raw examples from your dataset.

42 Args:

43 example: Raw example from the dataset.

45 Returns:

46 Processed example ready for use.

47 """

48 # Make a copy to avoid modifying the original

49 processed = dict(example)

51 # TODO: Add your custom processing here

52 # For example:

53 # - Extract relevant fields

54 # - Transform data formats

55 # - Handle special cases

56 # - Apply data cleaning

58 # Ensure required fields are present

59 if "problem" not in processed:

60 logger.warning("Example missing 'problem' field")

61 processed["problem"] = ""

63 if "answer" not in processed:

64 logger.warning("Example missing 'answer' field")

65 processed["answer"] = ""

67 # Add correct_answer field if not present

68 if "correct_answer" not in processed:

69 processed["correct_answer"] = processed["answer"]

71 return processed

73 def get_question(self, example: Dict[str, Any]) -> str:

74 """Extract the question from an example.

76 Override this method if your dataset stores the question in a

77 different field than 'problem'.

78 """

79 # Example: return example.get("question", "")

80 return example.get("problem", "")

82 def get_answer(self, example: Dict[str, Any]) -> str:

83 """Extract the answer from an example.

85 Override this method if your dataset stores the answer in a

86 different field than 'answer' or 'correct_answer'.

87 """

88 # Try correct_answer first, then fall back to answer

89 return example.get("correct_answer", example.get("answer", ""))

92# To register your dataset, add this at the bottom of your file:

93# DatasetRegistry.register(CustomDataset)

94#

95# Then import your dataset in the __init__.py file:

96# from .custom_dataset import CustomDataset

Coverage for src / local_deep_research / benchmarks / datasets / custom_dataset_template.py: 0%

25 statements