Coverage for src / local_deep_research / citation_handlers / base_citation_handler.py: 96%
54 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2Base class for all citation handlers.
3"""
5from abc import ABC, abstractmethod
6from typing import Any, Dict, List, Union
8from langchain_core.documents import Document
9from loguru import logger
12class BaseCitationHandler(ABC):
13 """Abstract base class for citation handlers."""
15 def __init__(self, llm, settings_snapshot=None):
16 self.llm = llm
17 self.settings_snapshot = settings_snapshot or {}
18 self._fact_checking_logged = False
20 def get_setting(self, key: str, default=None):
21 """Get a setting value from the snapshot."""
22 if key in self.settings_snapshot:
23 value = self.settings_snapshot[key]
24 # Extract value from dict structure if needed
25 if isinstance(value, dict) and "value" in value:
26 return value["value"]
27 return value
28 return default
30 def is_fact_checking_enabled(self) -> bool:
31 """Check if fact-checking is enabled and log the state once."""
32 enabled = self.get_setting("general.enable_fact_checking", True)
33 if not self._fact_checking_logged:
34 handler_name = type(self).__name__
35 if enabled:
36 logger.info(
37 f"[{handler_name}] Fact-checking is ENABLED — "
38 f"extra LLM call per synthesis"
39 )
40 else:
41 logger.info(f"[{handler_name}] Fact-checking is DISABLED")
42 self._fact_checking_logged = True
43 return bool(enabled)
45 def _get_output_instruction_prefix(self) -> str:
46 """
47 Get formatted output instructions from settings if present.
49 This allows users to customize output language, tone, style, and formatting
50 for research answers and reports. Instructions are prepended to prompts
51 sent to the LLM.
53 Returns:
54 str: Formatted instruction prefix if custom instructions are set,
55 empty string otherwise.
57 Examples:
58 - "Respond in Spanish with formal academic tone"
59 - "Use simple language suitable for beginners"
60 - "Be concise with bullet points"
61 """
62 output_instructions = self.get_setting(
63 "general.output_instructions", ""
64 ).strip()
66 if output_instructions:
67 return f"User-Specified Output Style: {output_instructions}\n\n"
68 return ""
70 def _create_documents(
71 self, search_results: Union[str, List[Dict]], nr_of_links: int = 0
72 ) -> List[Document]:
73 """
74 Convert search results to LangChain documents format and add index
75 to original search results.
76 """
77 documents: List[Document] = []
78 if isinstance(search_results, str):
79 return documents
81 for i, result in enumerate(search_results):
82 if isinstance(result, dict): 82 ↛ 81line 82 didn't jump to line 81 because the condition on line 82 was always true
83 # Add index to the original search result dictionary if it doesn't exist
84 # This preserves indices that were already set (e.g., for topic organization)
85 if "index" not in result:
86 result["index"] = str(i + nr_of_links + 1)
88 content = result.get("full_content", result.get("snippet", ""))
89 # Use the index from the result if it exists, otherwise calculate it
90 doc_index = int(result.get("index", i + nr_of_links + 1))
91 documents.append(
92 Document(
93 page_content=content,
94 metadata={
95 "source": result.get("link", f"source_{i + 1}"),
96 "title": result.get("title", f"Source {i + 1}"),
97 "index": doc_index,
98 },
99 )
100 )
101 return documents
103 def _format_sources(self, documents: List[Document]) -> str:
104 """Format sources with numbers for citation."""
105 sources = []
106 for doc in documents:
107 source_id = doc.metadata["index"]
108 sources.append(f"[{source_id}] {doc.page_content}")
109 return "\n\n".join(sources)
111 @abstractmethod
112 def analyze_initial(
113 self, query: str, search_results: Union[str, List[Dict]]
114 ) -> Dict[str, Any]:
115 """Process initial analysis with citations."""
116 pass
118 @abstractmethod
119 def analyze_followup(
120 self,
121 question: str,
122 search_results: Union[str, List[Dict]],
123 previous_knowledge: str,
124 nr_of_links: int,
125 ) -> Dict[str, Any]:
126 """Process follow-up analysis with citations."""
127 pass