Coverage for src / local_deep_research / exporters / base.py: 92%
40 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
1"""Base classes for document exporters.
3This module provides the abstract base class and data structures that all
4exporters must implement to participate in the export system.
5"""
7import re
8from abc import ABC, abstractmethod
9from dataclasses import dataclass, field
10from typing import Any, Dict, Optional
13@dataclass
14class ExportResult:
15 """Result of an export operation."""
17 content: bytes
18 filename: str
19 mimetype: str
22@dataclass
23class ExportOptions:
24 """Common options for all exporters.
26 Attributes:
27 title: Optional document title
28 metadata: Optional metadata dict (author, date, etc.)
29 custom_options: Format-specific options (e.g., custom_css for PDF)
30 """
32 title: Optional[str] = None
33 metadata: Optional[Dict[str, Any]] = None
34 custom_options: Optional[Dict[str, Any]] = field(default_factory=dict)
37class BaseExporter(ABC):
38 """Abstract base class for document exporters.
40 All exporters must inherit from this class and implement the required
41 abstract methods to participate in the export registry.
43 Example:
44 class MyExporter(BaseExporter):
45 @property
46 def format_name(self) -> str:
47 return "myformat"
49 @property
50 def file_extension(self) -> str:
51 return ".myf"
53 @property
54 def mimetype(self) -> str:
55 return "application/x-myformat"
57 def export(self, markdown_content, options=None) -> ExportResult:
58 # Implementation here
59 ...
60 """
62 @property
63 @abstractmethod
64 def format_name(self) -> str:
65 """Return the format identifier (e.g., 'pdf', 'odt', 'latex').
67 This is used to look up the exporter in the registry.
68 """
69 pass
71 @property
72 @abstractmethod
73 def file_extension(self) -> str:
74 """Return the file extension including the dot (e.g., '.pdf', '.odt')."""
75 pass
77 @property
78 @abstractmethod
79 def mimetype(self) -> str:
80 """Return the MIME type for the exported file."""
81 pass
83 @abstractmethod
84 def export(
85 self,
86 markdown_content: str,
87 options: Optional[ExportOptions] = None,
88 ) -> ExportResult:
89 """Export markdown content to the target format.
91 Args:
92 markdown_content: The markdown text to convert
93 options: Optional export options
95 Returns:
96 ExportResult with content bytes, filename, and mimetype
97 """
98 pass
100 def _generate_safe_filename(self, title: Optional[str]) -> str:
101 """Generate a safe filename from the title.
103 Args:
104 title: Optional title to use in the filename
106 Returns:
107 A sanitized filename with the appropriate extension
108 """
109 if title:
110 safe_title = (
111 re.sub(r"[^\w\s-]", "", title).strip().replace(" ", "_")[:50]
112 )
113 else:
114 safe_title = "research_report"
115 return f"{safe_title}{self.file_extension}"
117 def _prepend_title_if_needed(
118 self, content: str, title: Optional[str]
119 ) -> str:
120 """Prepend title as H1 heading if content doesn't already have one.
122 This method is used by exporters that render markdown documents
123 (like PDF and ODT) to ensure the title appears in the output.
124 Exporters that don't render documents (like RIS) should not use this.
126 Args:
127 content: The markdown content
128 title: Optional title to prepend
130 Returns:
131 Content with title prepended if needed, otherwise unchanged
132 """
133 if not title:
134 return content
135 # Don't prepend if content already starts with this title
136 if content.startswith(f"# {title}"):
137 return content
138 # Only prepend if content doesn't start with any heading
139 if not content.lstrip().startswith("#"):
140 return f"# {title}\n\n{content}"
141 return content