Coverage for src / local_deep_research / advanced_search_system / questions / entity_aware_question.py: 11%

49 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Entity-aware question generation for improved entity identification. 

3""" 

4 

5from datetime import datetime, UTC 

6from typing import List 

7 

8from loguru import logger 

9 

10from .base_question import BaseQuestionGenerator 

11 

12 

13class EntityAwareQuestionGenerator(BaseQuestionGenerator): 

14 """Question generator that creates more targeted searches for entity identification.""" 

15 

16 def generate_questions( 

17 self, 

18 current_knowledge: str, 

19 query: str, 

20 questions_per_iteration: int = 2, 

21 questions_by_iteration: dict = None, 

22 ) -> List[str]: 

23 """Generate questions with entity-aware search patterns.""" 

24 now = datetime.now(UTC) 

25 current_time = now.strftime("%Y-%m-%d") 

26 questions_by_iteration = questions_by_iteration or {} 

27 

28 logger.info("Generating entity-aware follow-up questions...") 

29 

30 # Detect if this is likely an entity identification query 

31 entity_keywords = [ 

32 "who", 

33 "what", 

34 "which", 

35 "identify", 

36 "name", 

37 "character", 

38 "person", 

39 "place", 

40 "organization", 

41 "company", 

42 "author", 

43 "scientist", 

44 "inventor", 

45 "city", 

46 "country", 

47 "book", 

48 "movie", 

49 ] 

50 

51 is_entity_query = any( 

52 keyword in query.lower() for keyword in entity_keywords 

53 ) 

54 

55 if is_entity_query: 

56 # Use more direct entity-focused prompt 

57 if questions_by_iteration: 

58 prompt = f"""Generate {questions_per_iteration} targeted search queries to identify the specific entity in the query. 

59 

60Query: {query} 

61Today: {current_time} 

62Past questions: {questions_by_iteration!s} 

63Current knowledge: {current_knowledge} 

64 

65Create direct search queries that combine the key identifying features to find the specific name/entity. 

66Focus on: 

671. Combining multiple constraints in a single search 

682. Using quotation marks for exact phrases 

693. Including specific details that narrow down results 

70 

71Format: One question per line, e.g. 

72Q: "fictional character" "breaks fourth wall" "TV show" 1960s 1980s 

73Q: character name ascetics humor television fewer than 50 episodes 

74""" 

75 else: 

76 prompt = f"""Generate {questions_per_iteration} direct search queries to identify the specific entity in: {query} 

77 

78Today: {current_time} 

79 

80Create search queries that: 

811. Combine multiple identifying features 

822. Target the specific entity name/identification 

833. Use variations of key terms 

84 

85Format: One question per line, e.g. 

86Q: question1 

87Q: question2 

88""" 

89 else: 

90 # Fall back to standard question generation for non-entity queries 

91 return super().generate_questions( 

92 current_knowledge, 

93 query, 

94 questions_per_iteration, 

95 questions_by_iteration, 

96 ) 

97 

98 response = self.model.invoke(prompt) 

99 

100 # Handle both string responses and responses with .content attribute 

101 response_text = "" 

102 if hasattr(response, "content"): 

103 response_text = response.content 

104 else: 

105 response_text = str(response) 

106 

107 questions = [ 

108 q.replace("Q:", "").strip() 

109 for q in response_text.split("\n") 

110 if q.strip().startswith("Q:") 

111 ][:questions_per_iteration] 

112 

113 logger.info(f"Generated {len(questions)} entity-aware questions") 

114 

115 return questions 

116 

117 def generate_sub_questions( 

118 self, query: str, context: str = "" 

119 ) -> List[str]: 

120 """Generate sub-questions with entity focus when appropriate.""" 

121 # Check if this is an entity identification query 

122 entity_keywords = [ 

123 "who", 

124 "what", 

125 "which", 

126 "identify", 

127 "name", 

128 "character", 

129 "person", 

130 "place", 

131 "organization", 

132 "company", 

133 ] 

134 

135 is_entity_query = any( 

136 keyword in query.lower() for keyword in entity_keywords 

137 ) 

138 

139 if is_entity_query: 

140 prompt = f"""Break down this entity identification query into targeted sub-questions. 

141 

142Original Question: {query} 

143{context} 

144 

145Generate 2-5 sub-questions that will help identify the specific entity. 

146Focus on: 

1471. Combining constraints to narrow down results 

1482. Finding the actual name/identity 

1493. Verifying the entity matches all criteria 

150 

151Format your response as: 

1521. First sub-question 

1532. Second sub-question 

154... 

155 

156Only provide the numbered sub-questions.""" 

157 else: 

158 return super().generate_sub_questions(query, context) 

159 

160 try: 

161 response = self.model.invoke(prompt) 

162 content = "" 

163 if hasattr(response, "content"): 

164 content = response.content 

165 else: 

166 content = str(response) 

167 

168 # Extract numbered questions 

169 questions = [] 

170 for line in content.strip().split("\n"): 

171 line = line.strip() 

172 if line and (line[0].isdigit() or line.startswith("-")): 

173 # Remove the number/bullet and clean up 

174 question = line.split(".", 1)[-1].strip() 

175 question = question.lstrip("- ").strip() 

176 if question: 

177 questions.append(question) 

178 

179 return questions 

180 

181 except Exception as e: 

182 logger.exception(f"Error generating sub-questions: {e!s}") 

183 return []