Coverage for src / local_deep_research / citation_handlers / base_citation_handler.py: 95%
43 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Base class for all citation handlers.
3"""
5from abc import ABC, abstractmethod
6from typing import Any, Dict, List, Union
8from langchain_core.documents import Document
11class BaseCitationHandler(ABC):
12 """Abstract base class for citation handlers."""
14 def __init__(self, llm, settings_snapshot=None):
15 self.llm = llm
16 self.settings_snapshot = settings_snapshot or {}
18 def get_setting(self, key: str, default=None):
19 """Get a setting value from the snapshot."""
20 if key in self.settings_snapshot:
21 value = self.settings_snapshot[key]
22 # Extract value from dict structure if needed
23 if isinstance(value, dict) and "value" in value:
24 return value["value"]
25 return value
26 return default
28 def _get_output_instruction_prefix(self) -> str:
29 """
30 Get formatted output instructions from settings if present.
32 This allows users to customize output language, tone, style, and formatting
33 for research answers and reports. Instructions are prepended to prompts
34 sent to the LLM.
36 Returns:
37 str: Formatted instruction prefix if custom instructions are set,
38 empty string otherwise.
40 Examples:
41 - "Respond in Spanish with formal academic tone"
42 - "Use simple language suitable for beginners"
43 - "Be concise with bullet points"
44 """
45 output_instructions = self.get_setting(
46 "general.output_instructions", ""
47 ).strip()
49 if output_instructions:
50 return f"User-Specified Output Style: {output_instructions}\n\n"
51 return ""
53 def _create_documents(
54 self, search_results: Union[str, List[Dict]], nr_of_links: int = 0
55 ) -> List[Document]:
56 """
57 Convert search results to LangChain documents format and add index
58 to original search results.
59 """
60 documents = []
61 if isinstance(search_results, str):
62 return documents
64 for i, result in enumerate(search_results):
65 if isinstance(result, dict): 65 ↛ 64line 65 didn't jump to line 64 because the condition on line 65 was always true
66 # Add index to the original search result dictionary if it doesn't exist
67 # This preserves indices that were already set (e.g., for topic organization)
68 if "index" not in result:
69 result["index"] = str(i + nr_of_links + 1)
71 content = result.get("full_content", result.get("snippet", ""))
72 # Use the index from the result if it exists, otherwise calculate it
73 doc_index = int(result.get("index", i + nr_of_links + 1))
74 documents.append(
75 Document(
76 page_content=content,
77 metadata={
78 "source": result.get("link", f"source_{i + 1}"),
79 "title": result.get("title", f"Source {i + 1}"),
80 "index": doc_index,
81 },
82 )
83 )
84 return documents
86 def _format_sources(self, documents: List[Document]) -> str:
87 """Format sources with numbers for citation."""
88 sources = []
89 for doc in documents:
90 source_id = doc.metadata["index"]
91 sources.append(f"[{source_id}] {doc.page_content}")
92 return "\n\n".join(sources)
94 @abstractmethod
95 def analyze_initial(
96 self, query: str, search_results: Union[str, List[Dict]]
97 ) -> Dict[str, Any]:
98 """Process initial analysis with citations."""
99 pass
101 @abstractmethod
102 def analyze_followup(
103 self,
104 question: str,
105 search_results: Union[str, List[Dict]],
106 previous_knowledge: str,
107 nr_of_links: int,
108 ) -> Dict[str, Any]:
109 """Process follow-up analysis with citations."""
110 pass