Coverage for src/local_deep_research/citation_handlers/forced_answer_citation_handler.py: 100%

51 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1""" 

2Forced answer citation handler - optimized for BrowseComp-style questions. 

3Always provides a specific answer, never returns "cannot determine". 

4""" 

5 

6from datetime import datetime, timezone 

7from typing import Any, Dict, List, Union 

8 

9from loguru import logger 

10 

11from .base_citation_handler import BaseCitationHandler 

12 

13 

14class ForcedAnswerCitationHandler(BaseCitationHandler): 

15 """Citation handler that forces direct answers for benchmark questions.""" 

16 

17 def analyze_initial( 

18 self, query: str, search_results: Union[str, List[Dict]] 

19 ) -> Dict[str, Any]: 

20 """Initial analysis with forced answer generation.""" 

21 documents = self._create_documents(search_results) 

22 formatted_sources = self._format_sources(documents) 

23 

24 current_timestamp = datetime.now(timezone.utc).strftime( 

25 "%Y-%m-%d %H:%M" 

26 ) 

27 

28 output_prefix = self._get_output_instruction_prefix() 

29 

30 prompt = f"""{output_prefix}Analyze the following information and provide a DIRECT answer to the question. Include citations using numbers in square brackets [1], [2], etc. 

31 

32Question: {query} 

33 

34Sources: 

35{formatted_sources} 

36 

37Current time is {current_timestamp} UTC for verifying temporal references in sources. 

38 

39CRITICAL INSTRUCTIONS: 

401. Start your response with a direct answer to the question 

412. NEVER say "I cannot determine" or "insufficient information" 

423. If unsure between options, choose the MOST LIKELY based on evidence 

434. After the direct answer, provide supporting analysis with citations 

445. Do not create the bibliography, it will be provided automatically. 

45 

46Example response format: 

47"[Direct Answer]. According to [1], this is supported by..." 

48""" 

49 

50 response = self._invoke_with_streaming(prompt) 

51 

52 # If the response still doesn't have a direct answer, extract one 

53 if self._needs_answer_extraction(response, query): 

54 response = self._extract_direct_answer( 

55 query, response, formatted_sources 

56 ) 

57 

58 return {"content": response, "documents": documents} 

59 

60 def analyze_followup( 

61 self, 

62 question: str, 

63 search_results: Union[str, List[Dict]], 

64 previous_knowledge: str, 

65 nr_of_links: int, 

66 ) -> Dict[str, Any]: 

67 """Follow-up analysis with forced answer generation.""" 

68 documents = self._create_documents( 

69 search_results, nr_of_links=nr_of_links 

70 ) 

71 formatted_sources = self._format_sources(documents) 

72 

73 # Fact-checking step (if enabled) 

74 fact_check_response = "" 

75 if self.is_fact_checking_enabled(): 

76 fact_check_prompt = f"""Analyze these sources for factual consistency: 

771. Cross-reference major claims between sources 

782. Identify the most frequently mentioned answer 

793. Note any conflicts but identify the most likely correct answer 

80 

81Previous Knowledge: 

82{previous_knowledge} 

83 

84New Sources: 

85{formatted_sources} 

86 

87Return the most likely answer based on evidence consistency.""" 

88 fact_check_response = self._invoke_text(fact_check_prompt) 

89 

90 current_timestamp = datetime.now(timezone.utc).strftime( 

91 "%Y-%m-%d %H:%M" 

92 ) 

93 

94 output_prefix = self._get_output_instruction_prefix() 

95 

96 prompt = f"""{output_prefix}Using the previous knowledge and new sources, provide a DIRECT answer to the question. Include citations using numbers in square brackets. 

97 

98Previous Knowledge: 

99{previous_knowledge} 

100 

101Question: {question} 

102 

103New Sources: 

104{formatted_sources} 

105 

106Current time is {current_timestamp} UTC for verifying temporal references in sources. 

107 

108Fact Analysis: {fact_check_response} 

109 

110CRITICAL INSTRUCTIONS: 

1111. You MUST start with a direct, specific answer 

1122. NEVER say "I cannot determine" or similar phrases 

1133. If the question asks for a name, provide a specific name 

1144. If the question asks for a place, provide a specific place 

1155. If unsure, choose the answer with the most supporting evidence 

1166. Format: "[Direct Answer]. Supporting evidence from [1], [2]..." 

1177. Do not create the bibliography, it will be provided automatically. 

118 

119Remember: A wrong answer is better than no answer for this task.""" 

120 

121 content = self._invoke_with_streaming(prompt) 

122 

123 # Final check - if still no direct answer, force extraction 

124 if self._needs_answer_extraction(content, question): 

125 content = self._extract_direct_answer( 

126 question, content, formatted_sources 

127 ) 

128 logger.info(f"Forced answer extraction applied: {content[:100]}...") 

129 

130 return {"content": content, "documents": documents} 

131 

132 def _needs_answer_extraction(self, content: str, query: str) -> bool: 

133 """Check if the response needs forced answer extraction.""" 

134 no_answer_indicators = [ 

135 "cannot determine", 

136 "unable to find", 

137 "insufficient", 

138 "unclear", 

139 "not enough", 

140 "cannot provide", 

141 "no specific answer", 

142 "cannot definitively", 

143 ] 

144 

145 content_lower = content.lower() 

146 

147 # Check for no-answer indicators 

148 for indicator in no_answer_indicators: 

149 if indicator in content_lower: 

150 return True 

151 

152 # Check if it's a direct question but no direct answer given 

153 if query.lower().startswith( 

154 ("what", "who", "which", "where", "when", "name") 

155 ): 

156 # Look for a direct answer pattern in first 100 chars 

157 first_part = content[:100].lower() 

158 if not any( 

159 word in first_part for word in ["is", "was", "are", "were", ":"] 

160 ): 

161 return True 

162 

163 return False 

164 

165 def _extract_direct_answer( 

166 self, query: str, content: str, sources: str 

167 ) -> str: 

168 """Force extraction of a direct answer using LLM.""" 

169 extraction_prompt = f"""Based on the content below, extract a SINGLE, DIRECT answer to the question. 

170 

171Question: {query} 

172 

173Content: {content[:1500]} 

174 

175Sources: {sources[:1500]} 

176 

177RULES: 

1781. Respond with ONLY the answer itself (name, place, number, etc.) 

1792. No explanations, just the answer 

1803. If multiple candidates exist, pick the one mentioned most 

1814. If truly no information exists, make an educated guess 

182 

183Answer:""" 

184 

185 try: 

186 answer = self._invoke_text(extraction_prompt) 

187 if not answer: 

188 return content 

189 

190 # Format as a proper response 

191 return f"{answer}. Based on the available sources, this appears to be the most likely answer. {content}" 

192 

193 except Exception: 

194 logger.exception("Error in forced answer extraction") 

195 # Fallback - just prepend a guess 

196 return f"Based on the available evidence, the most likely answer appears to be related to the search results. {content}"