Coverage for src / local_deep_research / advanced_search_system / questions / atomic_fact_question.py: 10%

44 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Atomic fact question generator for complex queries. 

3Decomposes complex queries into atomic, independently searchable facts. 

4""" 

5 

6from loguru import logger 

7from typing import Dict, List 

8 

9from .base_question import BaseQuestionGenerator 

10 

11 

12class AtomicFactQuestionGenerator(BaseQuestionGenerator): 

13 """ 

14 Generates questions by decomposing complex queries into atomic facts. 

15 

16 This approach prevents the system from searching for documents that match 

17 ALL criteria at once, instead finding facts independently and then reasoning 

18 about connections. 

19 """ 

20 

21 def generate_questions( 

22 self, 

23 current_knowledge: str, 

24 query: str, 

25 questions_per_iteration: int = 5, 

26 questions_by_iteration: Dict[int, List[str]] = None, 

27 ) -> List[str]: 

28 """ 

29 Generate atomic fact questions from a complex query. 

30 

31 Args: 

32 current_knowledge: The accumulated knowledge so far 

33 query: The original research query 

34 questions_per_iteration: Number of questions to generate 

35 questions_by_iteration: Questions generated in previous iterations 

36 

37 Returns: 

38 List of atomic fact questions 

39 """ 

40 questions_by_iteration = questions_by_iteration or {} 

41 

42 # On first iteration, decompose the query 

43 if not questions_by_iteration: 

44 return self._decompose_to_atomic_facts(query) 

45 

46 # On subsequent iterations, fill knowledge gaps or explore connections 

47 return self._generate_gap_filling_questions( 

48 query, 

49 current_knowledge, 

50 questions_by_iteration, 

51 questions_per_iteration, 

52 ) 

53 

54 def _decompose_to_atomic_facts(self, query: str) -> List[str]: 

55 """Decompose complex query into atomic, searchable facts.""" 

56 prompt = f"""Decompose this complex query into simple, atomic facts that can be searched independently. 

57 

58Query: {query} 

59 

60Break this down into individual facts that can be searched separately. Each fact should: 

611. Be about ONE thing only 

622. Be searchable on its own 

633. Not depend on other facts 

644. Use general terms (e.g., "body parts" not specific ones) 

65 

66For example, if the query is about a location with multiple criteria, create separate questions for: 

67- The geographical/geological aspect 

68- The naming aspect 

69- The historical events 

70- The statistical comparisons 

71 

72Return ONLY the questions, one per line. 

73Example format: 

74What locations were formed by glaciers? 

75What geographic features are named after body parts? 

76Where did falls occur between specific dates? 

77""" 

78 

79 response = self.model.invoke(prompt) 

80 

81 # Extract response text 

82 response_text = "" 

83 if hasattr(response, "content"): 

84 response_text = response.content 

85 else: 

86 response_text = str(response) 

87 

88 # Parse questions 

89 questions = [] 

90 for line in response_text.strip().split("\n"): 

91 line = line.strip() 

92 if line and not line.startswith("#") and len(line) > 10: 

93 # Clean up any numbering or bullets 

94 for prefix in ["1.", "2.", "3.", "4.", "5.", "-", "*", "•"]: 

95 if line.startswith(prefix): 

96 line = line[len(prefix) :].strip() 

97 questions.append(line) 

98 

99 logger.info(f"Decomposed query into {len(questions)} atomic facts") 

100 return questions[:5] # Limit to 5 atomic facts 

101 

102 def _generate_gap_filling_questions( 

103 self, 

104 original_query: str, 

105 current_knowledge: str, 

106 questions_by_iteration: Dict[int, List[str]], 

107 questions_per_iteration: int, 

108 ) -> List[str]: 

109 """Generate questions to fill knowledge gaps or make connections.""" 

110 

111 # Check if we have enough information to start reasoning 

112 if len(questions_by_iteration) >= 3: 

113 prompt = f"""Based on the accumulated knowledge, generate questions that help connect the facts or fill remaining gaps. 

114 

115Original Query: {original_query} 

116 

117Current Knowledge: 

118{current_knowledge} 

119 

120Previous Questions: 

121{self._format_previous_questions(questions_by_iteration)} 

122 

123Generate {questions_per_iteration} questions that: 

1241. Connect different facts you've found 

1252. Fill specific gaps in knowledge 

1263. Search for locations that match multiple criteria 

1274. Verify specific details 

128 

129Return ONLY the questions, one per line. 

130""" 

131 else: 

132 # Still gathering basic facts 

133 prompt = f"""Continue gathering atomic facts for this query. 

134 

135Original Query: {original_query} 

136 

137Previous Questions: 

138{self._format_previous_questions(questions_by_iteration)} 

139 

140Current Knowledge: 

141{current_knowledge} 

142 

143Generate {questions_per_iteration} more atomic fact questions that help build a complete picture. 

144Focus on facts not yet explored. 

145 

146Return ONLY the questions, one per line. 

147""" 

148 

149 response = self.model.invoke(prompt) 

150 

151 # Extract response text 

152 response_text = "" 

153 if hasattr(response, "content"): 

154 response_text = response.content 

155 else: 

156 response_text = str(response) 

157 

158 # Parse questions 

159 questions = [] 

160 for line in response_text.strip().split("\n"): 

161 line = line.strip() 

162 if line and not line.startswith("#") and len(line) > 10: 

163 # Clean up any numbering or bullets 

164 for prefix in ["1.", "2.", "3.", "4.", "5.", "-", "*", "•"]: 

165 if line.startswith(prefix): 

166 line = line[len(prefix) :].strip() 

167 questions.append(line) 

168 

169 return questions[:questions_per_iteration]