Coverage for src/local_deep_research/advanced_search_system/questions/atomic_fact

1"""

2Atomic fact question generator for complex queries.

3Decomposes complex queries into atomic, independently searchable facts.

4"""

6from loguru import logger

7from typing import Dict, List

9from .base_question import BaseQuestionGenerator

12class AtomicFactQuestionGenerator(BaseQuestionGenerator):

13 """

14 Generates questions by decomposing complex queries into atomic facts.

16 This approach prevents the system from searching for documents that match

17 ALL criteria at once, instead finding facts independently and then reasoning

18 about connections.

19 """

21 def generate_questions(

22 self,

23 current_knowledge: str,

24 query: str,

25 questions_per_iteration: int = 5,

26 questions_by_iteration: Dict[int, List[str]] = None,

27 ) -> List[str]:

28 """

29 Generate atomic fact questions from a complex query.

31 Args:

32 current_knowledge: The accumulated knowledge so far

33 query: The original research query

34 questions_per_iteration: Number of questions to generate

35 questions_by_iteration: Questions generated in previous iterations

37 Returns:

38 List of atomic fact questions

39 """

40 questions_by_iteration = questions_by_iteration or {}

42 # On first iteration, decompose the query

43 if not questions_by_iteration:

44 return self._decompose_to_atomic_facts(query)

46 # On subsequent iterations, fill knowledge gaps or explore connections

47 return self._generate_gap_filling_questions(

48 query,

49 current_knowledge,

50 questions_by_iteration,

51 questions_per_iteration,

52 )

54 def _decompose_to_atomic_facts(self, query: str) -> List[str]:

55 """Decompose complex query into atomic, searchable facts."""

56 prompt = f"""Decompose this complex query into simple, atomic facts that can be searched independently.

58Query: {query}

60Break this down into individual facts that can be searched separately. Each fact should:

611. Be about ONE thing only

622. Be searchable on its own

633. Not depend on other facts

644. Use general terms (e.g., "body parts" not specific ones)

66For example, if the query is about a location with multiple criteria, create separate questions for:

67- The geographical/geological aspect

68- The naming aspect

69- The historical events

70- The statistical comparisons

72Return ONLY the questions, one per line.

73Example format:

74What locations were formed by glaciers?

75What geographic features are named after body parts?

76Where did falls occur between specific dates?

77"""

79 response = self.model.invoke(prompt)

81 # Extract response text

82 response_text = ""

83 if hasattr(response, "content"):

84 response_text = response.content

85 else:

86 response_text = str(response)

88 # Parse questions

89 questions = []

90 for line in response_text.strip().split("\n"):

91 line = line.strip()

92 if line and not line.startswith("#") and len(line) > 10:

93 # Clean up any numbering or bullets

94 for prefix in ["1.", "2.", "3.", "4.", "5.", "-", "*", "•"]:

95 if line.startswith(prefix):

96 line = line[len(prefix) :].strip()

97 questions.append(line)

99 logger.info(f"Decomposed query into {len(questions)} atomic facts")

100 return questions[:5] # Limit to 5 atomic facts

101

102 def _generate_gap_filling_questions(

103 self,

104 original_query: str,

105 current_knowledge: str,

106 questions_by_iteration: Dict[int, List[str]],

107 questions_per_iteration: int,

108 ) -> List[str]:

109 """Generate questions to fill knowledge gaps or make connections."""

110

111 # Check if we have enough information to start reasoning

112 if len(questions_by_iteration) >= 3:

113 prompt = f"""Based on the accumulated knowledge, generate questions that help connect the facts or fill remaining gaps.

114

115Original Query: {original_query}

116

117Current Knowledge:

118{current_knowledge}

119

120Previous Questions:

121{self._format_previous_questions(questions_by_iteration)}

122

123Generate {questions_per_iteration} questions that:

1241. Connect different facts you've found

1252. Fill specific gaps in knowledge

1263. Search for locations that match multiple criteria

1274. Verify specific details

128

129Return ONLY the questions, one per line.

130"""

131 else:

132 # Still gathering basic facts

133 prompt = f"""Continue gathering atomic facts for this query.

134

135Original Query: {original_query}

136

137Previous Questions:

138{self._format_previous_questions(questions_by_iteration)}

139

140Current Knowledge:

141{current_knowledge}

142

143Generate {questions_per_iteration} more atomic fact questions that help build a complete picture.

144Focus on facts not yet explored.

145

146Return ONLY the questions, one per line.

147"""

148

149 response = self.model.invoke(prompt)

150

151 # Extract response text

152 response_text = ""

153 if hasattr(response, "content"):

154 response_text = response.content

155 else:

156 response_text = str(response)

157

158 # Parse questions

159 questions = []

160 for line in response_text.strip().split("\n"):

161 line = line.strip()

162 if line and not line.startswith("#") and len(line) > 10:

163 # Clean up any numbering or bullets

164 for prefix in ["1.", "2.", "3.", "4.", "5.", "-", "*", "•"]:

165 if line.startswith(prefix):

166 line = line[len(prefix) :].strip()

167 questions.append(line)

168

169 return questions[:questions_per_iteration]

Coverage for src / local_deep_research / advanced_search_system / questions / atomic_fact_question.py: 100%

44 statements