Coverage for src / local_deep_research / citation_handlers / forced_answer_citation_handler.py: 100%

52 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Forced answer citation handler - optimized for BrowseComp-style questions. 

3Always provides a specific answer, never returns "cannot determine". 

4""" 

5 

6from datetime import datetime, timezone 

7from typing import Any, Dict, List, Union 

8 

9from loguru import logger 

10 

11from .base_citation_handler import BaseCitationHandler 

12 

13 

14class ForcedAnswerCitationHandler(BaseCitationHandler): 

15 """Citation handler that forces direct answers for benchmark questions.""" 

16 

17 def analyze_initial( 

18 self, query: str, search_results: Union[str, List[Dict]] 

19 ) -> Dict[str, Any]: 

20 """Initial analysis with forced answer generation.""" 

21 documents = self._create_documents(search_results) 

22 formatted_sources = self._format_sources(documents) 

23 

24 current_timestamp = datetime.now(timezone.utc).strftime( 

25 "%Y-%m-%d %H:%M" 

26 ) 

27 

28 output_prefix = self._get_output_instruction_prefix() 

29 

30 prompt = f"""{output_prefix}Analyze the following information and provide a DIRECT answer to the question. Include citations using numbers in square brackets [1], [2], etc. 

31 

32Question: {query} 

33 

34Sources: 

35{formatted_sources} 

36 

37Current time is {current_timestamp} UTC for verifying temporal references in sources. 

38 

39CRITICAL INSTRUCTIONS: 

401. Start your response with a direct answer to the question 

412. NEVER say "I cannot determine" or "insufficient information" 

423. If unsure between options, choose the MOST LIKELY based on evidence 

434. After the direct answer, provide supporting analysis with citations 

44 

45Example response format: 

46"[Direct Answer]. According to [1], this is supported by..." 

47""" 

48 

49 response = self.llm.invoke(prompt) 

50 if not isinstance(response, str): 

51 response = response.content 

52 

53 # If the response still doesn't have a direct answer, extract one 

54 if self._needs_answer_extraction(response, query): 

55 response = self._extract_direct_answer( 

56 query, response, formatted_sources 

57 ) 

58 

59 return {"content": response, "documents": documents} 

60 

61 def analyze_followup( 

62 self, 

63 question: str, 

64 search_results: Union[str, List[Dict]], 

65 previous_knowledge: str, 

66 nr_of_links: int, 

67 ) -> Dict[str, Any]: 

68 """Follow-up analysis with forced answer generation.""" 

69 documents = self._create_documents( 

70 search_results, nr_of_links=nr_of_links 

71 ) 

72 formatted_sources = self._format_sources(documents) 

73 

74 # Fact-checking step (if enabled) 

75 fact_check_response = "" 

76 if self.get_setting("general.enable_fact_checking", True): 

77 fact_check_prompt = f"""Analyze these sources for factual consistency: 

781. Cross-reference major claims between sources 

792. Identify the most frequently mentioned answer 

803. Note any conflicts but identify the most likely correct answer 

81 

82Previous Knowledge: 

83{previous_knowledge} 

84 

85New Sources: 

86{formatted_sources} 

87 

88Return the most likely answer based on evidence consistency.""" 

89 fact_check_response = self.llm.invoke(fact_check_prompt).content 

90 

91 current_timestamp = datetime.now(timezone.utc).strftime( 

92 "%Y-%m-%d %H:%M" 

93 ) 

94 

95 output_prefix = self._get_output_instruction_prefix() 

96 

97 prompt = f"""{output_prefix}Using the previous knowledge and new sources, provide a DIRECT answer to the question. Include citations using numbers in square brackets. 

98 

99Previous Knowledge: 

100{previous_knowledge} 

101 

102Question: {question} 

103 

104New Sources: 

105{formatted_sources} 

106 

107Current time is {current_timestamp} UTC for verifying temporal references in sources. 

108 

109Fact Analysis: {fact_check_response} 

110 

111CRITICAL INSTRUCTIONS: 

1121. You MUST start with a direct, specific answer 

1132. NEVER say "I cannot determine" or similar phrases 

1143. If the question asks for a name, provide a specific name 

1154. If the question asks for a place, provide a specific place 

1165. If unsure, choose the answer with the most supporting evidence 

1176. Format: "[Direct Answer]. Supporting evidence from [1], [2]..." 

118 

119Remember: A wrong answer is better than no answer for this task.""" 

120 

121 response = self.llm.invoke(prompt) 

122 content = response.content 

123 

124 # Final check - if still no direct answer, force extraction 

125 if self._needs_answer_extraction(content, question): 

126 content = self._extract_direct_answer( 

127 question, content, formatted_sources 

128 ) 

129 logger.info(f"Forced answer extraction applied: {content[:100]}...") 

130 

131 return {"content": content, "documents": documents} 

132 

133 def _needs_answer_extraction(self, content: str, query: str) -> bool: 

134 """Check if the response needs forced answer extraction.""" 

135 no_answer_indicators = [ 

136 "cannot determine", 

137 "unable to find", 

138 "insufficient", 

139 "unclear", 

140 "not enough", 

141 "cannot provide", 

142 "no specific answer", 

143 "cannot definitively", 

144 ] 

145 

146 content_lower = content.lower() 

147 

148 # Check for no-answer indicators 

149 for indicator in no_answer_indicators: 

150 if indicator in content_lower: 

151 return True 

152 

153 # Check if it's a direct question but no direct answer given 

154 if query.lower().startswith( 

155 ("what", "who", "which", "where", "when", "name") 

156 ): 

157 # Look for a direct answer pattern in first 100 chars 

158 first_part = content[:100].lower() 

159 if not any( 

160 word in first_part for word in ["is", "was", "are", "were", ":"] 

161 ): 

162 return True 

163 

164 return False 

165 

166 def _extract_direct_answer( 

167 self, query: str, content: str, sources: str 

168 ) -> str: 

169 """Force extraction of a direct answer using LLM.""" 

170 extraction_prompt = f"""Based on the content below, extract a SINGLE, DIRECT answer to the question. 

171 

172Question: {query} 

173 

174Content: {content[:1500]} 

175 

176Sources: {sources[:1500]} 

177 

178RULES: 

1791. Respond with ONLY the answer itself (name, place, number, etc.) 

1802. No explanations, just the answer 

1813. If multiple candidates exist, pick the one mentioned most 

1824. If truly no information exists, make an educated guess 

183 

184Answer:""" 

185 

186 try: 

187 answer = self.llm.invoke(extraction_prompt).content.strip() 

188 

189 # Format as a proper response 

190 return f"{answer}. Based on the available sources, this appears to be the most likely answer. {content}" 

191 

192 except Exception as e: 

193 logger.exception(f"Error in forced answer extraction: {e!s}") 

194 # Fallback - just prepend a guess 

195 return f"Based on the available evidence, the most likely answer appears to be related to the search results. {content}"