Coverage for src / local_deep_research / research_library / downloaders / extraction / __init__.py: 100%
8 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2Pluggable HTML content extraction strategies.
4Extractors can be composed in a pipeline: e.g. Readability first
5(structural DOM scoping), then justext (statistical boilerplate removal).
6"""
8from .base import BaseExtractor
9from .justext_extractor import JustextExtractor
10from .metadata_extractor import extract_metadata, metadata_to_text
11from .newspaper_extractor import NewspaperExtractor
12from .pipeline import (
13 batch_fetch_and_extract,
14 extract_content,
15 extract_content_with_metadata,
16 fetch_and_extract,
17)
18from .readability_extractor import ReadabilityExtractor
19from .trafilatura_extractor import TrafilaturaExtractor
21__all__ = [
22 "BaseExtractor",
23 "JustextExtractor",
24 "NewspaperExtractor",
25 "ReadabilityExtractor",
26 "TrafilaturaExtractor",
27 "batch_fetch_and_extract",
28 "extract_content",
29 "extract_content_with_metadata",
30 "extract_metadata",
31 "fetch_and_extract",
32 "metadata_to_text",
33]