Coverage for src/local_deep_research/research_library/downloaders/extraction/base.py: 100%
5 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
1"""
2Base class for HTML content extractors.
3"""
5from abc import ABC, abstractmethod
6from typing import Optional
9class BaseExtractor(ABC):
10 """Abstract base for content extraction strategies."""
12 @abstractmethod
13 def extract(self, html: str) -> Optional[str]:
14 """Extract main content text from HTML.
16 Args:
17 html: Raw or partially cleaned HTML string.
19 Returns:
20 Extracted plain text, or None if extraction yielded nothing.
21 """