Coverage for src / local_deep_research / advanced_search_system / questions / entity_aware_question.py: 98%

48 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-25 01:07 +0000

1""" 

2Entity-aware question generation for improved entity identification. 

3""" 

4 

5from datetime import datetime, UTC 

6 

7from loguru import logger 

8 

9from .base_question import BaseQuestionGenerator 

10 

11 

12class EntityAwareQuestionGenerator(BaseQuestionGenerator): 

13 """Question generator that creates more targeted searches for entity identification.""" 

14 

15 def generate_questions( 

16 self, 

17 current_knowledge: str, 

18 query: str, 

19 questions_per_iteration: int = 2, 

20 questions_by_iteration: dict[int, list[str]] | None = None, 

21 ) -> list[str]: 

22 """Generate questions with entity-aware search patterns.""" 

23 now = datetime.now(UTC) 

24 current_time = now.strftime("%Y-%m-%d") 

25 questions_by_iteration = questions_by_iteration or {} 

26 

27 logger.info("Generating entity-aware follow-up questions...") 

28 

29 # Detect if this is likely an entity identification query 

30 entity_keywords = [ 

31 "who", 

32 "what", 

33 "which", 

34 "identify", 

35 "name", 

36 "character", 

37 "person", 

38 "place", 

39 "organization", 

40 "company", 

41 "author", 

42 "scientist", 

43 "inventor", 

44 "city", 

45 "country", 

46 "book", 

47 "movie", 

48 ] 

49 

50 is_entity_query = any( 

51 keyword in query.lower() for keyword in entity_keywords 

52 ) 

53 

54 if is_entity_query: 

55 # Use more direct entity-focused prompt 

56 if questions_by_iteration: 

57 prompt = f"""Generate {questions_per_iteration} targeted search queries to identify the specific entity in the query. 

58 

59Query: {query} 

60Today: {current_time} 

61Past questions: {questions_by_iteration!s} 

62Current knowledge: {current_knowledge} 

63 

64Create direct search queries that combine the key identifying features to find the specific name/entity. 

65Focus on: 

661. Combining multiple constraints in a single search 

672. Using quotation marks for exact phrases 

683. Including specific details that narrow down results 

69 

70Format: One question per line, e.g. 

71Q: "fictional character" "breaks fourth wall" "TV show" 1960s 1980s 

72Q: character name ascetics humor television fewer than 50 episodes 

73""" 

74 else: 

75 prompt = f"""Generate {questions_per_iteration} direct search queries to identify the specific entity in: {query} 

76 

77Today: {current_time} 

78 

79Create search queries that: 

801. Combine multiple identifying features 

812. Target the specific entity name/identification 

823. Use variations of key terms 

83 

84Format: One question per line, e.g. 

85Q: question1 

86Q: question2 

87""" 

88 else: 

89 # Fall back to standard question generation for non-entity queries 

90 return super().generate_questions( 

91 current_knowledge, 

92 query, 

93 questions_per_iteration, 

94 questions_by_iteration, 

95 ) 

96 

97 response = self.model.invoke(prompt) 

98 

99 # Handle both string responses and responses with .content attribute 

100 response_text = "" 

101 if hasattr(response, "content"): 

102 response_text = response.content 

103 else: 

104 response_text = str(response) 

105 

106 questions = [ 

107 q.replace("Q:", "").strip() 

108 for q in response_text.split("\n") 

109 if q.strip().startswith("Q:") 

110 ][:questions_per_iteration] 

111 

112 logger.info(f"Generated {len(questions)} entity-aware questions") 

113 

114 return questions 

115 

116 def generate_sub_questions( 

117 self, query: str, context: str = "" 

118 ) -> list[str]: 

119 """Generate sub-questions with entity focus when appropriate.""" 

120 # Check if this is an entity identification query 

121 entity_keywords = [ 

122 "who", 

123 "what", 

124 "which", 

125 "identify", 

126 "name", 

127 "character", 

128 "person", 

129 "place", 

130 "organization", 

131 "company", 

132 ] 

133 

134 is_entity_query = any( 

135 keyword in query.lower() for keyword in entity_keywords 

136 ) 

137 

138 if is_entity_query: 

139 prompt = f"""Break down this entity identification query into targeted sub-questions. 

140 

141Original Question: {query} 

142{context} 

143 

144Generate 2-5 sub-questions that will help identify the specific entity. 

145Focus on: 

1461. Combining constraints to narrow down results 

1472. Finding the actual name/identity 

1483. Verifying the entity matches all criteria 

149 

150Format your response as: 

1511. First sub-question 

1522. Second sub-question 

153... 

154 

155Only provide the numbered sub-questions.""" 

156 else: 

157 return super().generate_sub_questions(query, context) 

158 

159 try: 

160 response = self.model.invoke(prompt) 

161 content = "" 

162 if hasattr(response, "content"): 

163 content = response.content 

164 else: 

165 content = str(response) 

166 

167 # Extract numbered questions 

168 questions = [] 

169 for line in content.strip().split("\n"): 

170 line = line.strip() 

171 if line and (line[0].isdigit() or line.startswith("-")): 

172 # Remove the number/bullet and clean up 

173 question = line.split(".", 1)[-1].strip() 

174 question = question.lstrip("- ").strip() 

175 if question: 175 ↛ 169line 175 didn't jump to line 169 because the condition on line 175 was always true

176 questions.append(question) 

177 

178 return questions 

179 

180 except Exception: 

181 logger.exception("Error generating sub-questions") 

182 return []