Coverage for src/local_deep_research/citation_handlers/base_citation

1"""

2Base class for all citation handlers.

3"""

5from abc import ABC, abstractmethod

6from typing import Any, Dict, List, Union

8from langchain_core.documents import Document

11class BaseCitationHandler(ABC):

12 """Abstract base class for citation handlers."""

14 def __init__(self, llm, settings_snapshot=None):

15 self.llm = llm

16 self.settings_snapshot = settings_snapshot or {}

18 def get_setting(self, key: str, default=None):

19 """Get a setting value from the snapshot."""

20 if key in self.settings_snapshot:

21 value = self.settings_snapshot[key]

22 # Extract value from dict structure if needed

23 if isinstance(value, dict) and "value" in value:

24 return value["value"]

25 return value

26 return default

28 def _get_output_instruction_prefix(self) -> str:

29 """

30 Get formatted output instructions from settings if present.

32 This allows users to customize output language, tone, style, and formatting

33 for research answers and reports. Instructions are prepended to prompts

34 sent to the LLM.

36 Returns:

37 str: Formatted instruction prefix if custom instructions are set,

38 empty string otherwise.

40 Examples:

41 - "Respond in Spanish with formal academic tone"

42 - "Use simple language suitable for beginners"

43 - "Be concise with bullet points"

44 """

45 output_instructions = self.get_setting(

46 "general.output_instructions", ""

47 ).strip()

49 if output_instructions:

50 return f"User-Specified Output Style: {output_instructions}\n\n"

51 return ""

53 def _create_documents(

54 self, search_results: Union[str, List[Dict]], nr_of_links: int = 0

55 ) -> List[Document]:

56 """

57 Convert search results to LangChain documents format and add index

58 to original search results.

59 """

60 documents = []

61 if isinstance(search_results, str):

62 return documents

64 for i, result in enumerate(search_results):

65 if isinstance(result, dict): 65 ↛ 64line 65 didn't jump to line 64 because the condition on line 65 was always true

66 # Add index to the original search result dictionary if it doesn't exist

67 # This preserves indices that were already set (e.g., for topic organization)

68 if "index" not in result:

69 result["index"] = str(i + nr_of_links + 1)

71 content = result.get("full_content", result.get("snippet", ""))

72 # Use the index from the result if it exists, otherwise calculate it

73 doc_index = int(result.get("index", i + nr_of_links + 1))

74 documents.append(

75 Document(

76 page_content=content,

77 metadata={

78 "source": result.get("link", f"source_{i + 1}"),

79 "title": result.get("title", f"Source {i + 1}"),

80 "index": doc_index,

81 },

82 )

83 )

84 return documents

86 def _format_sources(self, documents: List[Document]) -> str:

87 """Format sources with numbers for citation."""

88 sources = []

89 for doc in documents:

90 source_id = doc.metadata["index"]

91 sources.append(f"[{source_id}] {doc.page_content}")

92 return "\n\n".join(sources)

94 @abstractmethod

95 def analyze_initial(

96 self, query: str, search_results: Union[str, List[Dict]]

97 ) -> Dict[str, Any]:

98 """Process initial analysis with citations."""

99 pass

100

101 @abstractmethod

102 def analyze_followup(

103 self,

104 question: str,

105 search_results: Union[str, List[Dict]],

106 previous_knowledge: str,

107 nr_of_links: int,

108 ) -> Dict[str, Any]:

109 """Process follow-up analysis with citations."""

110 pass

Coverage for src / local_deep_research / citation_handlers / base_citation_handler.py: 95%

43 statements