Coverage for src/local_deep_research/research_library/downloaders/extraction/__init_

1"""

2Pluggable HTML content extraction strategies.

4Extractors can be composed in a pipeline: e.g. Readability first

5(structural DOM scoping), then justext (statistical boilerplate removal).

6"""

8from .base import BaseExtractor

9from .justext_extractor import JustextExtractor

10from .metadata_extractor import extract_metadata, metadata_to_text

11from .newspaper_extractor import NewspaperExtractor

12from .pipeline import (

13 batch_fetch_and_extract,

14 extract_content,

15 extract_content_with_metadata,

16 fetch_and_extract,

18from .readability_extractor import ReadabilityExtractor

19from .trafilatura_extractor import TrafilaturaExtractor

21__all__ = [

22 "BaseExtractor",

23 "JustextExtractor",

24 "NewspaperExtractor",

25 "ReadabilityExtractor",

26 "TrafilaturaExtractor",

27 "batch_fetch_and_extract",

28 "extract_content",

29 "extract_content_with_metadata",

30 "extract_metadata",

31 "fetch_and_extract",

32 "metadata_to_text",

Coverage for src/local_deep_research/research_library/downloaders/extraction/init.py: 100%