Coverage for src/local_deep_research/citation_handlers/forced_answer_citation_handler.py: 100%
51 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
1"""
2Forced answer citation handler - optimized for BrowseComp-style questions.
3Always provides a specific answer, never returns "cannot determine".
4"""
6from datetime import datetime, timezone
7from typing import Any, Dict, List, Union
9from loguru import logger
11from .base_citation_handler import BaseCitationHandler
14class ForcedAnswerCitationHandler(BaseCitationHandler):
15 """Citation handler that forces direct answers for benchmark questions."""
17 def analyze_initial(
18 self, query: str, search_results: Union[str, List[Dict]]
19 ) -> Dict[str, Any]:
20 """Initial analysis with forced answer generation."""
21 documents = self._create_documents(search_results)
22 formatted_sources = self._format_sources(documents)
24 current_timestamp = datetime.now(timezone.utc).strftime(
25 "%Y-%m-%d %H:%M"
26 )
28 output_prefix = self._get_output_instruction_prefix()
30 prompt = f"""{output_prefix}Analyze the following information and provide a DIRECT answer to the question. Include citations using numbers in square brackets [1], [2], etc.
32Question: {query}
34Sources:
35{formatted_sources}
37Current time is {current_timestamp} UTC for verifying temporal references in sources.
39CRITICAL INSTRUCTIONS:
401. Start your response with a direct answer to the question
412. NEVER say "I cannot determine" or "insufficient information"
423. If unsure between options, choose the MOST LIKELY based on evidence
434. After the direct answer, provide supporting analysis with citations
445. Do not create the bibliography, it will be provided automatically.
46Example response format:
47"[Direct Answer]. According to [1], this is supported by..."
48"""
50 response = self._invoke_with_streaming(prompt)
52 # If the response still doesn't have a direct answer, extract one
53 if self._needs_answer_extraction(response, query):
54 response = self._extract_direct_answer(
55 query, response, formatted_sources
56 )
58 return {"content": response, "documents": documents}
60 def analyze_followup(
61 self,
62 question: str,
63 search_results: Union[str, List[Dict]],
64 previous_knowledge: str,
65 nr_of_links: int,
66 ) -> Dict[str, Any]:
67 """Follow-up analysis with forced answer generation."""
68 documents = self._create_documents(
69 search_results, nr_of_links=nr_of_links
70 )
71 formatted_sources = self._format_sources(documents)
73 # Fact-checking step (if enabled)
74 fact_check_response = ""
75 if self.is_fact_checking_enabled():
76 fact_check_prompt = f"""Analyze these sources for factual consistency:
771. Cross-reference major claims between sources
782. Identify the most frequently mentioned answer
793. Note any conflicts but identify the most likely correct answer
81Previous Knowledge:
82{previous_knowledge}
84New Sources:
85{formatted_sources}
87Return the most likely answer based on evidence consistency."""
88 fact_check_response = self._invoke_text(fact_check_prompt)
90 current_timestamp = datetime.now(timezone.utc).strftime(
91 "%Y-%m-%d %H:%M"
92 )
94 output_prefix = self._get_output_instruction_prefix()
96 prompt = f"""{output_prefix}Using the previous knowledge and new sources, provide a DIRECT answer to the question. Include citations using numbers in square brackets.
98Previous Knowledge:
99{previous_knowledge}
101Question: {question}
103New Sources:
104{formatted_sources}
106Current time is {current_timestamp} UTC for verifying temporal references in sources.
108Fact Analysis: {fact_check_response}
110CRITICAL INSTRUCTIONS:
1111. You MUST start with a direct, specific answer
1122. NEVER say "I cannot determine" or similar phrases
1133. If the question asks for a name, provide a specific name
1144. If the question asks for a place, provide a specific place
1155. If unsure, choose the answer with the most supporting evidence
1166. Format: "[Direct Answer]. Supporting evidence from [1], [2]..."
1177. Do not create the bibliography, it will be provided automatically.
119Remember: A wrong answer is better than no answer for this task."""
121 content = self._invoke_with_streaming(prompt)
123 # Final check - if still no direct answer, force extraction
124 if self._needs_answer_extraction(content, question):
125 content = self._extract_direct_answer(
126 question, content, formatted_sources
127 )
128 logger.info(f"Forced answer extraction applied: {content[:100]}...")
130 return {"content": content, "documents": documents}
132 def _needs_answer_extraction(self, content: str, query: str) -> bool:
133 """Check if the response needs forced answer extraction."""
134 no_answer_indicators = [
135 "cannot determine",
136 "unable to find",
137 "insufficient",
138 "unclear",
139 "not enough",
140 "cannot provide",
141 "no specific answer",
142 "cannot definitively",
143 ]
145 content_lower = content.lower()
147 # Check for no-answer indicators
148 for indicator in no_answer_indicators:
149 if indicator in content_lower:
150 return True
152 # Check if it's a direct question but no direct answer given
153 if query.lower().startswith(
154 ("what", "who", "which", "where", "when", "name")
155 ):
156 # Look for a direct answer pattern in first 100 chars
157 first_part = content[:100].lower()
158 if not any(
159 word in first_part for word in ["is", "was", "are", "were", ":"]
160 ):
161 return True
163 return False
165 def _extract_direct_answer(
166 self, query: str, content: str, sources: str
167 ) -> str:
168 """Force extraction of a direct answer using LLM."""
169 extraction_prompt = f"""Based on the content below, extract a SINGLE, DIRECT answer to the question.
171Question: {query}
173Content: {content[:1500]}
175Sources: {sources[:1500]}
177RULES:
1781. Respond with ONLY the answer itself (name, place, number, etc.)
1792. No explanations, just the answer
1803. If multiple candidates exist, pick the one mentioned most
1814. If truly no information exists, make an educated guess
183Answer:"""
185 try:
186 answer = self._invoke_text(extraction_prompt)
187 if not answer:
188 return content
190 # Format as a proper response
191 return f"{answer}. Based on the available sources, this appears to be the most likely answer. {content}"
193 except Exception:
194 logger.exception("Error in forced answer extraction")
195 # Fallback - just prepend a guess
196 return f"Based on the available evidence, the most likely answer appears to be related to the search results. {content}"