Coverage for src / local_deep_research / advanced_search_system / questions / entity_aware_question.py: 98%
48 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
1"""
2Entity-aware question generation for improved entity identification.
3"""
5from datetime import datetime, UTC
7from loguru import logger
9from .base_question import BaseQuestionGenerator
12class EntityAwareQuestionGenerator(BaseQuestionGenerator):
13 """Question generator that creates more targeted searches for entity identification."""
15 def generate_questions(
16 self,
17 current_knowledge: str,
18 query: str,
19 questions_per_iteration: int = 2,
20 questions_by_iteration: dict[int, list[str]] | None = None,
21 ) -> list[str]:
22 """Generate questions with entity-aware search patterns."""
23 now = datetime.now(UTC)
24 current_time = now.strftime("%Y-%m-%d")
25 questions_by_iteration = questions_by_iteration or {}
27 logger.info("Generating entity-aware follow-up questions...")
29 # Detect if this is likely an entity identification query
30 entity_keywords = [
31 "who",
32 "what",
33 "which",
34 "identify",
35 "name",
36 "character",
37 "person",
38 "place",
39 "organization",
40 "company",
41 "author",
42 "scientist",
43 "inventor",
44 "city",
45 "country",
46 "book",
47 "movie",
48 ]
50 is_entity_query = any(
51 keyword in query.lower() for keyword in entity_keywords
52 )
54 if is_entity_query:
55 # Use more direct entity-focused prompt
56 if questions_by_iteration:
57 prompt = f"""Generate {questions_per_iteration} targeted search queries to identify the specific entity in the query.
59Query: {query}
60Today: {current_time}
61Past questions: {questions_by_iteration!s}
62Current knowledge: {current_knowledge}
64Create direct search queries that combine the key identifying features to find the specific name/entity.
65Focus on:
661. Combining multiple constraints in a single search
672. Using quotation marks for exact phrases
683. Including specific details that narrow down results
70Format: One question per line, e.g.
71Q: "fictional character" "breaks fourth wall" "TV show" 1960s 1980s
72Q: character name ascetics humor television fewer than 50 episodes
73"""
74 else:
75 prompt = f"""Generate {questions_per_iteration} direct search queries to identify the specific entity in: {query}
77Today: {current_time}
79Create search queries that:
801. Combine multiple identifying features
812. Target the specific entity name/identification
823. Use variations of key terms
84Format: One question per line, e.g.
85Q: question1
86Q: question2
87"""
88 else:
89 # Fall back to standard question generation for non-entity queries
90 return super().generate_questions(
91 current_knowledge,
92 query,
93 questions_per_iteration,
94 questions_by_iteration,
95 )
97 response = self.model.invoke(prompt)
99 # Handle both string responses and responses with .content attribute
100 response_text = ""
101 if hasattr(response, "content"):
102 response_text = response.content
103 else:
104 response_text = str(response)
106 questions = [
107 q.replace("Q:", "").strip()
108 for q in response_text.split("\n")
109 if q.strip().startswith("Q:")
110 ][:questions_per_iteration]
112 logger.info(f"Generated {len(questions)} entity-aware questions")
114 return questions
116 def generate_sub_questions(
117 self, query: str, context: str = ""
118 ) -> list[str]:
119 """Generate sub-questions with entity focus when appropriate."""
120 # Check if this is an entity identification query
121 entity_keywords = [
122 "who",
123 "what",
124 "which",
125 "identify",
126 "name",
127 "character",
128 "person",
129 "place",
130 "organization",
131 "company",
132 ]
134 is_entity_query = any(
135 keyword in query.lower() for keyword in entity_keywords
136 )
138 if is_entity_query:
139 prompt = f"""Break down this entity identification query into targeted sub-questions.
141Original Question: {query}
142{context}
144Generate 2-5 sub-questions that will help identify the specific entity.
145Focus on:
1461. Combining constraints to narrow down results
1472. Finding the actual name/identity
1483. Verifying the entity matches all criteria
150Format your response as:
1511. First sub-question
1522. Second sub-question
153...
155Only provide the numbered sub-questions."""
156 else:
157 return super().generate_sub_questions(query, context)
159 try:
160 response = self.model.invoke(prompt)
161 content = ""
162 if hasattr(response, "content"):
163 content = response.content
164 else:
165 content = str(response)
167 # Extract numbered questions
168 questions = []
169 for line in content.strip().split("\n"):
170 line = line.strip()
171 if line and (line[0].isdigit() or line.startswith("-")):
172 # Remove the number/bullet and clean up
173 question = line.split(".", 1)[-1].strip()
174 question = question.lstrip("- ").strip()
175 if question: 175 ↛ 169line 175 didn't jump to line 169 because the condition on line 175 was always true
176 questions.append(question)
178 return questions
180 except Exception:
181 logger.exception("Error generating sub-questions")
182 return []