Coverage for src / local_deep_research / research_library / downloaders / extraction / base.py: 100%
5 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2Base class for HTML content extractors.
3"""
5from abc import ABC, abstractmethod
6from typing import Optional
9class BaseExtractor(ABC):
10 """Abstract base for content extraction strategies."""
12 @abstractmethod
13 def extract(self, html: str) -> Optional[str]:
14 """Extract main content text from HTML.
16 Args:
17 html: Raw or partially cleaned HTML string.
19 Returns:
20 Extracted plain text, or None if extraction yielded nothing.
21 """