Coverage for src / local_deep_research / citation_handlers / base_citation_handler.py: 95%

43 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Base class for all citation handlers. 

3""" 

4 

5from abc import ABC, abstractmethod 

6from typing import Any, Dict, List, Union 

7 

8from langchain_core.documents import Document 

9 

10 

11class BaseCitationHandler(ABC): 

12 """Abstract base class for citation handlers.""" 

13 

14 def __init__(self, llm, settings_snapshot=None): 

15 self.llm = llm 

16 self.settings_snapshot = settings_snapshot or {} 

17 

18 def get_setting(self, key: str, default=None): 

19 """Get a setting value from the snapshot.""" 

20 if key in self.settings_snapshot: 

21 value = self.settings_snapshot[key] 

22 # Extract value from dict structure if needed 

23 if isinstance(value, dict) and "value" in value: 

24 return value["value"] 

25 return value 

26 return default 

27 

28 def _get_output_instruction_prefix(self) -> str: 

29 """ 

30 Get formatted output instructions from settings if present. 

31 

32 This allows users to customize output language, tone, style, and formatting 

33 for research answers and reports. Instructions are prepended to prompts 

34 sent to the LLM. 

35 

36 Returns: 

37 str: Formatted instruction prefix if custom instructions are set, 

38 empty string otherwise. 

39 

40 Examples: 

41 - "Respond in Spanish with formal academic tone" 

42 - "Use simple language suitable for beginners" 

43 - "Be concise with bullet points" 

44 """ 

45 output_instructions = self.get_setting( 

46 "general.output_instructions", "" 

47 ).strip() 

48 

49 if output_instructions: 

50 return f"User-Specified Output Style: {output_instructions}\n\n" 

51 return "" 

52 

53 def _create_documents( 

54 self, search_results: Union[str, List[Dict]], nr_of_links: int = 0 

55 ) -> List[Document]: 

56 """ 

57 Convert search results to LangChain documents format and add index 

58 to original search results. 

59 """ 

60 documents = [] 

61 if isinstance(search_results, str): 

62 return documents 

63 

64 for i, result in enumerate(search_results): 

65 if isinstance(result, dict): 65 ↛ 64line 65 didn't jump to line 64 because the condition on line 65 was always true

66 # Add index to the original search result dictionary if it doesn't exist 

67 # This preserves indices that were already set (e.g., for topic organization) 

68 if "index" not in result: 

69 result["index"] = str(i + nr_of_links + 1) 

70 

71 content = result.get("full_content", result.get("snippet", "")) 

72 # Use the index from the result if it exists, otherwise calculate it 

73 doc_index = int(result.get("index", i + nr_of_links + 1)) 

74 documents.append( 

75 Document( 

76 page_content=content, 

77 metadata={ 

78 "source": result.get("link", f"source_{i + 1}"), 

79 "title": result.get("title", f"Source {i + 1}"), 

80 "index": doc_index, 

81 }, 

82 ) 

83 ) 

84 return documents 

85 

86 def _format_sources(self, documents: List[Document]) -> str: 

87 """Format sources with numbers for citation.""" 

88 sources = [] 

89 for doc in documents: 

90 source_id = doc.metadata["index"] 

91 sources.append(f"[{source_id}] {doc.page_content}") 

92 return "\n\n".join(sources) 

93 

94 @abstractmethod 

95 def analyze_initial( 

96 self, query: str, search_results: Union[str, List[Dict]] 

97 ) -> Dict[str, Any]: 

98 """Process initial analysis with citations.""" 

99 pass 

100 

101 @abstractmethod 

102 def analyze_followup( 

103 self, 

104 question: str, 

105 search_results: Union[str, List[Dict]], 

106 previous_knowledge: str, 

107 nr_of_links: int, 

108 ) -> Dict[str, Any]: 

109 """Process follow-up analysis with citations.""" 

110 pass