Coverage for src / local_deep_research / citation_handlers / base_citation_handler.py: 96%

54 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1""" 

2Base class for all citation handlers. 

3""" 

4 

5from abc import ABC, abstractmethod 

6from typing import Any, Dict, List, Union 

7 

8from langchain_core.documents import Document 

9from loguru import logger 

10 

11 

12class BaseCitationHandler(ABC): 

13 """Abstract base class for citation handlers.""" 

14 

15 def __init__(self, llm, settings_snapshot=None): 

16 self.llm = llm 

17 self.settings_snapshot = settings_snapshot or {} 

18 self._fact_checking_logged = False 

19 

20 def get_setting(self, key: str, default=None): 

21 """Get a setting value from the snapshot.""" 

22 if key in self.settings_snapshot: 

23 value = self.settings_snapshot[key] 

24 # Extract value from dict structure if needed 

25 if isinstance(value, dict) and "value" in value: 

26 return value["value"] 

27 return value 

28 return default 

29 

30 def is_fact_checking_enabled(self) -> bool: 

31 """Check if fact-checking is enabled and log the state once.""" 

32 enabled = self.get_setting("general.enable_fact_checking", True) 

33 if not self._fact_checking_logged: 

34 handler_name = type(self).__name__ 

35 if enabled: 

36 logger.info( 

37 f"[{handler_name}] Fact-checking is ENABLED — " 

38 f"extra LLM call per synthesis" 

39 ) 

40 else: 

41 logger.info(f"[{handler_name}] Fact-checking is DISABLED") 

42 self._fact_checking_logged = True 

43 return bool(enabled) 

44 

45 def _get_output_instruction_prefix(self) -> str: 

46 """ 

47 Get formatted output instructions from settings if present. 

48 

49 This allows users to customize output language, tone, style, and formatting 

50 for research answers and reports. Instructions are prepended to prompts 

51 sent to the LLM. 

52 

53 Returns: 

54 str: Formatted instruction prefix if custom instructions are set, 

55 empty string otherwise. 

56 

57 Examples: 

58 - "Respond in Spanish with formal academic tone" 

59 - "Use simple language suitable for beginners" 

60 - "Be concise with bullet points" 

61 """ 

62 output_instructions = self.get_setting( 

63 "general.output_instructions", "" 

64 ).strip() 

65 

66 if output_instructions: 

67 return f"User-Specified Output Style: {output_instructions}\n\n" 

68 return "" 

69 

70 def _create_documents( 

71 self, search_results: Union[str, List[Dict]], nr_of_links: int = 0 

72 ) -> List[Document]: 

73 """ 

74 Convert search results to LangChain documents format and add index 

75 to original search results. 

76 """ 

77 documents: List[Document] = [] 

78 if isinstance(search_results, str): 

79 return documents 

80 

81 for i, result in enumerate(search_results): 

82 if isinstance(result, dict): 82 ↛ 81line 82 didn't jump to line 81 because the condition on line 82 was always true

83 # Add index to the original search result dictionary if it doesn't exist 

84 # This preserves indices that were already set (e.g., for topic organization) 

85 if "index" not in result: 

86 result["index"] = str(i + nr_of_links + 1) 

87 

88 content = result.get("full_content", result.get("snippet", "")) 

89 # Use the index from the result if it exists, otherwise calculate it 

90 doc_index = int(result.get("index", i + nr_of_links + 1)) 

91 documents.append( 

92 Document( 

93 page_content=content, 

94 metadata={ 

95 "source": result.get("link", f"source_{i + 1}"), 

96 "title": result.get("title", f"Source {i + 1}"), 

97 "index": doc_index, 

98 }, 

99 ) 

100 ) 

101 return documents 

102 

103 def _format_sources(self, documents: List[Document]) -> str: 

104 """Format sources with numbers for citation.""" 

105 sources = [] 

106 for doc in documents: 

107 source_id = doc.metadata["index"] 

108 sources.append(f"[{source_id}] {doc.page_content}") 

109 return "\n\n".join(sources) 

110 

111 @abstractmethod 

112 def analyze_initial( 

113 self, query: str, search_results: Union[str, List[Dict]] 

114 ) -> Dict[str, Any]: 

115 """Process initial analysis with citations.""" 

116 pass 

117 

118 @abstractmethod 

119 def analyze_followup( 

120 self, 

121 question: str, 

122 search_results: Union[str, List[Dict]], 

123 previous_knowledge: str, 

124 nr_of_links: int, 

125 ) -> Dict[str, Any]: 

126 """Process follow-up analysis with citations.""" 

127 pass