Coverage for src / local_deep_research / advanced_search_system / questions / atomic_fact_question.py: 10%
44 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Atomic fact question generator for complex queries.
3Decomposes complex queries into atomic, independently searchable facts.
4"""
6from loguru import logger
7from typing import Dict, List
9from .base_question import BaseQuestionGenerator
12class AtomicFactQuestionGenerator(BaseQuestionGenerator):
13 """
14 Generates questions by decomposing complex queries into atomic facts.
16 This approach prevents the system from searching for documents that match
17 ALL criteria at once, instead finding facts independently and then reasoning
18 about connections.
19 """
21 def generate_questions(
22 self,
23 current_knowledge: str,
24 query: str,
25 questions_per_iteration: int = 5,
26 questions_by_iteration: Dict[int, List[str]] = None,
27 ) -> List[str]:
28 """
29 Generate atomic fact questions from a complex query.
31 Args:
32 current_knowledge: The accumulated knowledge so far
33 query: The original research query
34 questions_per_iteration: Number of questions to generate
35 questions_by_iteration: Questions generated in previous iterations
37 Returns:
38 List of atomic fact questions
39 """
40 questions_by_iteration = questions_by_iteration or {}
42 # On first iteration, decompose the query
43 if not questions_by_iteration:
44 return self._decompose_to_atomic_facts(query)
46 # On subsequent iterations, fill knowledge gaps or explore connections
47 return self._generate_gap_filling_questions(
48 query,
49 current_knowledge,
50 questions_by_iteration,
51 questions_per_iteration,
52 )
54 def _decompose_to_atomic_facts(self, query: str) -> List[str]:
55 """Decompose complex query into atomic, searchable facts."""
56 prompt = f"""Decompose this complex query into simple, atomic facts that can be searched independently.
58Query: {query}
60Break this down into individual facts that can be searched separately. Each fact should:
611. Be about ONE thing only
622. Be searchable on its own
633. Not depend on other facts
644. Use general terms (e.g., "body parts" not specific ones)
66For example, if the query is about a location with multiple criteria, create separate questions for:
67- The geographical/geological aspect
68- The naming aspect
69- The historical events
70- The statistical comparisons
72Return ONLY the questions, one per line.
73Example format:
74What locations were formed by glaciers?
75What geographic features are named after body parts?
76Where did falls occur between specific dates?
77"""
79 response = self.model.invoke(prompt)
81 # Extract response text
82 response_text = ""
83 if hasattr(response, "content"):
84 response_text = response.content
85 else:
86 response_text = str(response)
88 # Parse questions
89 questions = []
90 for line in response_text.strip().split("\n"):
91 line = line.strip()
92 if line and not line.startswith("#") and len(line) > 10:
93 # Clean up any numbering or bullets
94 for prefix in ["1.", "2.", "3.", "4.", "5.", "-", "*", "•"]:
95 if line.startswith(prefix):
96 line = line[len(prefix) :].strip()
97 questions.append(line)
99 logger.info(f"Decomposed query into {len(questions)} atomic facts")
100 return questions[:5] # Limit to 5 atomic facts
102 def _generate_gap_filling_questions(
103 self,
104 original_query: str,
105 current_knowledge: str,
106 questions_by_iteration: Dict[int, List[str]],
107 questions_per_iteration: int,
108 ) -> List[str]:
109 """Generate questions to fill knowledge gaps or make connections."""
111 # Check if we have enough information to start reasoning
112 if len(questions_by_iteration) >= 3:
113 prompt = f"""Based on the accumulated knowledge, generate questions that help connect the facts or fill remaining gaps.
115Original Query: {original_query}
117Current Knowledge:
118{current_knowledge}
120Previous Questions:
121{self._format_previous_questions(questions_by_iteration)}
123Generate {questions_per_iteration} questions that:
1241. Connect different facts you've found
1252. Fill specific gaps in knowledge
1263. Search for locations that match multiple criteria
1274. Verify specific details
129Return ONLY the questions, one per line.
130"""
131 else:
132 # Still gathering basic facts
133 prompt = f"""Continue gathering atomic facts for this query.
135Original Query: {original_query}
137Previous Questions:
138{self._format_previous_questions(questions_by_iteration)}
140Current Knowledge:
141{current_knowledge}
143Generate {questions_per_iteration} more atomic fact questions that help build a complete picture.
144Focus on facts not yet explored.
146Return ONLY the questions, one per line.
147"""
149 response = self.model.invoke(prompt)
151 # Extract response text
152 response_text = ""
153 if hasattr(response, "content"):
154 response_text = response.content
155 else:
156 response_text = str(response)
158 # Parse questions
159 questions = []
160 for line in response_text.strip().split("\n"):
161 line = line.strip()
162 if line and not line.startswith("#") and len(line) > 10:
163 # Clean up any numbering or bullets
164 for prefix in ["1.", "2.", "3.", "4.", "5.", "-", "*", "•"]:
165 if line.startswith(prefix):
166 line = line[len(prefix) :].strip()
167 questions.append(line)
169 return questions[:questions_per_iteration]