Coverage for src / local_deep_research / advanced_search_system / questions / entity_aware_question.py: 98%
48 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2Entity-aware question generation for improved entity identification.
3"""
5from datetime import datetime, UTC
7from loguru import logger
9from .base_question import BaseQuestionGenerator
12class EntityAwareQuestionGenerator(BaseQuestionGenerator):
13 """Question generator that creates more targeted searches for entity identification."""
15 def generate_questions(
16 self,
17 current_knowledge: str,
18 query: str,
19 questions_per_iteration: int = 2,
20 questions_by_iteration: dict[int, list[str]] | None = None,
21 ) -> list[str]:
22 """Generate questions with entity-aware search patterns."""
23 now = datetime.now(UTC)
24 current_time = now.strftime("%Y-%m-%d")
25 questions_by_iteration = questions_by_iteration or {}
27 logger.info("Generating entity-aware follow-up questions...")
29 # Detect if this is likely an entity identification query
30 entity_keywords = [
31 "who",
32 "what",
33 "which",
34 "identify",
35 "name",
36 "character",
37 "person",
38 "place",
39 "organization",
40 "company",
41 "author",
42 "scientist",
43 "inventor",
44 "city",
45 "country",
46 "book",
47 "movie",
48 ]
50 is_entity_query = any(
51 keyword in query.lower() for keyword in entity_keywords
52 )
54 if is_entity_query:
55 # Use more direct entity-focused prompt
56 if questions_by_iteration:
57 prompt = f"""Generate {questions_per_iteration} targeted search queries to identify the specific entity in the query.
59Query: {query}
60Today: {current_time}
61Past questions: {questions_by_iteration!s}
62Current knowledge: {current_knowledge}
64Create direct search queries that combine the key identifying features to find the specific name/entity.
65Focus on:
661. Combining multiple constraints in a single search
672. Using quotation marks for exact phrases
683. Including specific details that narrow down results
70Format: One question per line, e.g.
71Q: "fictional character" "breaks fourth wall" "TV show" 1960s 1980s
72Q: character name ascetics humor television fewer than 50 episodes
73"""
74 else:
75 prompt = f"""Generate {questions_per_iteration} direct search queries to identify the specific entity in: {query}
77Today: {current_time}
79Create search queries that:
801. Combine multiple identifying features
812. Target the specific entity name/identification
823. Use variations of key terms
84Format: One question per line, e.g.
85Q: question1
86Q: question2
87"""
88 else:
89 # Fall back to empty list for non-entity queries
90 # (base class method is abstract; subclasses handle their own generation)
91 return []
93 response = self.model.invoke(prompt)
95 # Handle both string responses and responses with .content attribute
96 response_text = ""
97 if hasattr(response, "content"):
98 response_text = response.content
99 else:
100 response_text = str(response)
102 questions = [
103 q.replace("Q:", "").strip()
104 for q in response_text.split("\n")
105 if q.strip().startswith("Q:")
106 ][:questions_per_iteration]
108 logger.info(f"Generated {len(questions)} entity-aware questions")
110 return questions
112 def generate_sub_questions(
113 self, query: str, context: str = ""
114 ) -> list[str]:
115 """Generate sub-questions with entity focus when appropriate."""
116 # Check if this is an entity identification query
117 entity_keywords = [
118 "who",
119 "what",
120 "which",
121 "identify",
122 "name",
123 "character",
124 "person",
125 "place",
126 "organization",
127 "company",
128 ]
130 is_entity_query = any(
131 keyword in query.lower() for keyword in entity_keywords
132 )
134 if is_entity_query:
135 prompt = f"""Break down this entity identification query into targeted sub-questions.
137Original Question: {query}
138{context}
140Generate 2-5 sub-questions that will help identify the specific entity.
141Focus on:
1421. Combining constraints to narrow down results
1432. Finding the actual name/identity
1443. Verifying the entity matches all criteria
146Format your response as:
1471. First sub-question
1482. Second sub-question
149...
151Only provide the numbered sub-questions."""
152 else:
153 return []
155 try:
156 response = self.model.invoke(prompt)
157 content = ""
158 if hasattr(response, "content"):
159 content = response.content
160 else:
161 content = str(response)
163 # Extract numbered questions
164 questions = []
165 for line in content.strip().split("\n"):
166 line = line.strip()
167 if line and (line[0].isdigit() or line.startswith("-")):
168 # Remove the number/bullet and clean up
169 question = line.split(".", 1)[-1].strip()
170 question = question.lstrip("- ").strip()
171 if question: 171 ↛ 165line 171 didn't jump to line 165 because the condition on line 171 was always true
172 questions.append(question)
174 return questions
176 except Exception:
177 logger.exception("Error generating sub-questions")
178 return []