Coverage for src / local_deep_research / citation_handlers / forced_answer_citation_handler.py: 100%
52 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Forced answer citation handler - optimized for BrowseComp-style questions.
3Always provides a specific answer, never returns "cannot determine".
4"""
6from datetime import datetime, timezone
7from typing import Any, Dict, List, Union
9from loguru import logger
11from .base_citation_handler import BaseCitationHandler
14class ForcedAnswerCitationHandler(BaseCitationHandler):
15 """Citation handler that forces direct answers for benchmark questions."""
17 def analyze_initial(
18 self, query: str, search_results: Union[str, List[Dict]]
19 ) -> Dict[str, Any]:
20 """Initial analysis with forced answer generation."""
21 documents = self._create_documents(search_results)
22 formatted_sources = self._format_sources(documents)
24 current_timestamp = datetime.now(timezone.utc).strftime(
25 "%Y-%m-%d %H:%M"
26 )
28 output_prefix = self._get_output_instruction_prefix()
30 prompt = f"""{output_prefix}Analyze the following information and provide a DIRECT answer to the question. Include citations using numbers in square brackets [1], [2], etc.
32Question: {query}
34Sources:
35{formatted_sources}
37Current time is {current_timestamp} UTC for verifying temporal references in sources.
39CRITICAL INSTRUCTIONS:
401. Start your response with a direct answer to the question
412. NEVER say "I cannot determine" or "insufficient information"
423. If unsure between options, choose the MOST LIKELY based on evidence
434. After the direct answer, provide supporting analysis with citations
45Example response format:
46"[Direct Answer]. According to [1], this is supported by..."
47"""
49 response = self.llm.invoke(prompt)
50 if not isinstance(response, str):
51 response = response.content
53 # If the response still doesn't have a direct answer, extract one
54 if self._needs_answer_extraction(response, query):
55 response = self._extract_direct_answer(
56 query, response, formatted_sources
57 )
59 return {"content": response, "documents": documents}
61 def analyze_followup(
62 self,
63 question: str,
64 search_results: Union[str, List[Dict]],
65 previous_knowledge: str,
66 nr_of_links: int,
67 ) -> Dict[str, Any]:
68 """Follow-up analysis with forced answer generation."""
69 documents = self._create_documents(
70 search_results, nr_of_links=nr_of_links
71 )
72 formatted_sources = self._format_sources(documents)
74 # Fact-checking step (if enabled)
75 fact_check_response = ""
76 if self.get_setting("general.enable_fact_checking", True):
77 fact_check_prompt = f"""Analyze these sources for factual consistency:
781. Cross-reference major claims between sources
792. Identify the most frequently mentioned answer
803. Note any conflicts but identify the most likely correct answer
82Previous Knowledge:
83{previous_knowledge}
85New Sources:
86{formatted_sources}
88Return the most likely answer based on evidence consistency."""
89 fact_check_response = self.llm.invoke(fact_check_prompt).content
91 current_timestamp = datetime.now(timezone.utc).strftime(
92 "%Y-%m-%d %H:%M"
93 )
95 output_prefix = self._get_output_instruction_prefix()
97 prompt = f"""{output_prefix}Using the previous knowledge and new sources, provide a DIRECT answer to the question. Include citations using numbers in square brackets.
99Previous Knowledge:
100{previous_knowledge}
102Question: {question}
104New Sources:
105{formatted_sources}
107Current time is {current_timestamp} UTC for verifying temporal references in sources.
109Fact Analysis: {fact_check_response}
111CRITICAL INSTRUCTIONS:
1121. You MUST start with a direct, specific answer
1132. NEVER say "I cannot determine" or similar phrases
1143. If the question asks for a name, provide a specific name
1154. If the question asks for a place, provide a specific place
1165. If unsure, choose the answer with the most supporting evidence
1176. Format: "[Direct Answer]. Supporting evidence from [1], [2]..."
119Remember: A wrong answer is better than no answer for this task."""
121 response = self.llm.invoke(prompt)
122 content = response.content
124 # Final check - if still no direct answer, force extraction
125 if self._needs_answer_extraction(content, question):
126 content = self._extract_direct_answer(
127 question, content, formatted_sources
128 )
129 logger.info(f"Forced answer extraction applied: {content[:100]}...")
131 return {"content": content, "documents": documents}
133 def _needs_answer_extraction(self, content: str, query: str) -> bool:
134 """Check if the response needs forced answer extraction."""
135 no_answer_indicators = [
136 "cannot determine",
137 "unable to find",
138 "insufficient",
139 "unclear",
140 "not enough",
141 "cannot provide",
142 "no specific answer",
143 "cannot definitively",
144 ]
146 content_lower = content.lower()
148 # Check for no-answer indicators
149 for indicator in no_answer_indicators:
150 if indicator in content_lower:
151 return True
153 # Check if it's a direct question but no direct answer given
154 if query.lower().startswith(
155 ("what", "who", "which", "where", "when", "name")
156 ):
157 # Look for a direct answer pattern in first 100 chars
158 first_part = content[:100].lower()
159 if not any(
160 word in first_part for word in ["is", "was", "are", "were", ":"]
161 ):
162 return True
164 return False
166 def _extract_direct_answer(
167 self, query: str, content: str, sources: str
168 ) -> str:
169 """Force extraction of a direct answer using LLM."""
170 extraction_prompt = f"""Based on the content below, extract a SINGLE, DIRECT answer to the question.
172Question: {query}
174Content: {content[:1500]}
176Sources: {sources[:1500]}
178RULES:
1791. Respond with ONLY the answer itself (name, place, number, etc.)
1802. No explanations, just the answer
1813. If multiple candidates exist, pick the one mentioned most
1824. If truly no information exists, make an educated guess
184Answer:"""
186 try:
187 answer = self.llm.invoke(extraction_prompt).content.strip()
189 # Format as a proper response
190 return f"{answer}. Based on the available sources, this appears to be the most likely answer. {content}"
192 except Exception as e:
193 logger.exception(f"Error in forced answer extraction: {e!s}")
194 # Fallback - just prepend a guess
195 return f"Based on the available evidence, the most likely answer appears to be related to the search results. {content}"