Coverage for src/local_deep_research/exporters/base.py: 92%

1"""Base classes for document exporters.

3This module provides the abstract base class and data structures that all

4exporters must implement to participate in the export system.

5"""

7import re

8from abc import ABC, abstractmethod

9from dataclasses import dataclass, field

10from typing import Any, Dict, Optional

13@dataclass

14class ExportResult:

15 """Result of an export operation."""

17 content: bytes

18 filename: str

19 mimetype: str

22@dataclass

23class ExportOptions:

24 """Common options for all exporters.

26 Attributes:

27 title: Optional document title

28 metadata: Optional metadata dict (author, date, etc.)

29 custom_options: Format-specific options (e.g., custom_css for PDF)

30 """

32 title: Optional[str] = None

33 metadata: Optional[Dict[str, Any]] = None

34 custom_options: Optional[Dict[str, Any]] = field(default_factory=dict)

37class BaseExporter(ABC):

38 """Abstract base class for document exporters.

40 All exporters must inherit from this class and implement the required

41 abstract methods to participate in the export registry.

43 Example:

44 class MyExporter(BaseExporter):

45 @property

46 def format_name(self) -> str:

47 return "myformat"

49 @property

50 def file_extension(self) -> str:

51 return ".myf"

53 @property

54 def mimetype(self) -> str:

55 return "application/x-myformat"

57 def export(self, markdown_content, options=None) -> ExportResult:

58 # Implementation here

59 ...

60 """

62 @property

63 @abstractmethod

64 def format_name(self) -> str:

65 """Return the format identifier (e.g., 'pdf', 'odt', 'latex').

67 This is used to look up the exporter in the registry.

68 """

69 pass

71 @property

72 @abstractmethod

73 def file_extension(self) -> str:

74 """Return the file extension including the dot (e.g., '.pdf', '.odt')."""

75 pass

77 @property

78 @abstractmethod

79 def mimetype(self) -> str:

80 """Return the MIME type for the exported file."""

81 pass

83 @abstractmethod

84 def export(

85 self,

86 markdown_content: str,

87 options: Optional[ExportOptions] = None,

88 ) -> ExportResult:

89 """Export markdown content to the target format.

91 Args:

92 markdown_content: The markdown text to convert

93 options: Optional export options

95 Returns:

96 ExportResult with content bytes, filename, and mimetype

97 """

98 pass

100 def _generate_safe_filename(self, title: Optional[str]) -> str:

101 """Generate a safe filename from the title.

102

103 Args:

104 title: Optional title to use in the filename

105

106 Returns:

107 A sanitized filename with the appropriate extension

108 """

109 if title:

110 safe_title = (

111 re.sub(r"[^\w\s-]", "", title).strip().replace(" ", "_")[:50]

112 )

113 else:

114 safe_title = "research_report"

115 return f"{safe_title}{self.file_extension}"

116

117 def _prepend_title_if_needed(

118 self, content: str, title: Optional[str]

119 ) -> str:

120 """Prepend title as H1 heading if content doesn't already have one.

121

122 This method is used by exporters that render markdown documents

123 (like PDF and ODT) to ensure the title appears in the output.

124 Exporters that don't render documents (like RIS) should not use this.

125

126 Args:

127 content: The markdown content

128 title: Optional title to prepend

129

130 Returns:

131 Content with title prepended if needed, otherwise unchanged

132 """

133 if not title:

134 return content

135 # Don't prepend if content already starts with this title

136 if content.startswith(f"# {title}"):

137 return content

138 # Only prepend if content doesn't start with any heading

139 if not content.lstrip().startswith("#"):

140 return f"# {title}\n\n{content}"

141 return content

Coverage for src / local_deep_research / exporters / base.py: 92%

40 statements