Coverage for src/local_deep_research/advanced_search_system/questions/entity_aware

1"""

2Entity-aware question generation for improved entity identification.

3"""

5from datetime import datetime, UTC

7from loguru import logger

9from .base_question import BaseQuestionGenerator

12class EntityAwareQuestionGenerator(BaseQuestionGenerator):

13 """Question generator that creates more targeted searches for entity identification."""

15 def generate_questions(

16 self,

17 current_knowledge: str,

18 query: str,

19 questions_per_iteration: int = 2,

20 questions_by_iteration: dict[int, list[str]] | None = None,

21 ) -> list[str]:

22 """Generate questions with entity-aware search patterns."""

23 now = datetime.now(UTC)

24 current_time = now.strftime("%Y-%m-%d")

25 questions_by_iteration = questions_by_iteration or {}

27 logger.info("Generating entity-aware follow-up questions...")

29 # Detect if this is likely an entity identification query

30 entity_keywords = [

31 "who",

32 "what",

33 "which",

34 "identify",

35 "name",

36 "character",

37 "person",

38 "place",

39 "organization",

40 "company",

41 "author",

42 "scientist",

43 "inventor",

44 "city",

45 "country",

46 "book",

47 "movie",

48 ]

50 is_entity_query = any(

51 keyword in query.lower() for keyword in entity_keywords

52 )

54 if is_entity_query:

55 # Use more direct entity-focused prompt

56 if questions_by_iteration:

57 prompt = f"""Generate {questions_per_iteration} targeted search queries to identify the specific entity in the query.

59Query: {query}

60Today: {current_time}

61Past questions: {questions_by_iteration!s}

62Current knowledge: {current_knowledge}

64Create direct search queries that combine the key identifying features to find the specific name/entity.

65Focus on:

661. Combining multiple constraints in a single search

672. Using quotation marks for exact phrases

683. Including specific details that narrow down results

70Format: One question per line, e.g.

71Q: "fictional character" "breaks fourth wall" "TV show" 1960s 1980s

72Q: character name ascetics humor television fewer than 50 episodes

73"""

74 else:

75 prompt = f"""Generate {questions_per_iteration} direct search queries to identify the specific entity in: {query}

77Today: {current_time}

79Create search queries that:

801. Combine multiple identifying features

812. Target the specific entity name/identification

823. Use variations of key terms

84Format: One question per line, e.g.

85Q: question1

86Q: question2

87"""

88 else:

89 # Fall back to standard question generation for non-entity queries

90 return super().generate_questions(

91 current_knowledge,

92 query,

93 questions_per_iteration,

94 questions_by_iteration,

95 )

97 response = self.model.invoke(prompt)

99 # Handle both string responses and responses with .content attribute

100 response_text = ""

101 if hasattr(response, "content"):

102 response_text = response.content

103 else:

104 response_text = str(response)

105

106 questions = [

107 q.replace("Q:", "").strip()

108 for q in response_text.split("\n")

109 if q.strip().startswith("Q:")

110 ][:questions_per_iteration]

111

112 logger.info(f"Generated {len(questions)} entity-aware questions")

113

114 return questions

115

116 def generate_sub_questions(

117 self, query: str, context: str = ""

118 ) -> list[str]:

119 """Generate sub-questions with entity focus when appropriate."""

120 # Check if this is an entity identification query

121 entity_keywords = [

122 "who",

123 "what",

124 "which",

125 "identify",

126 "name",

127 "character",

128 "person",

129 "place",

130 "organization",

131 "company",

132 ]

133

134 is_entity_query = any(

135 keyword in query.lower() for keyword in entity_keywords

136 )

137

138 if is_entity_query:

139 prompt = f"""Break down this entity identification query into targeted sub-questions.

140

141Original Question: {query}

142{context}

143

144Generate 2-5 sub-questions that will help identify the specific entity.

145Focus on:

1461. Combining constraints to narrow down results

1472. Finding the actual name/identity

1483. Verifying the entity matches all criteria

149

150Format your response as:

1511. First sub-question

1522. Second sub-question

153...

154

155Only provide the numbered sub-questions."""

156 else:

157 return super().generate_sub_questions(query, context)

158

159 try:

160 response = self.model.invoke(prompt)

161 content = ""

162 if hasattr(response, "content"):

163 content = response.content

164 else:

165 content = str(response)

166

167 # Extract numbered questions

168 questions = []

169 for line in content.strip().split("\n"):

170 line = line.strip()

171 if line and (line[0].isdigit() or line.startswith("-")):

172 # Remove the number/bullet and clean up

173 question = line.split(".", 1)[-1].strip()

174 question = question.lstrip("- ").strip()

175 if question: 175 ↛ 169line 175 didn't jump to line 169 because the condition on line 175 was always true

176 questions.append(question)

177

178 return questions

179

180 except Exception:

181 logger.exception("Error generating sub-questions")

182 return []

Coverage for src / local_deep_research / advanced_search_system / questions / entity_aware_question.py: 98%

48 statements