Coverage for src / local_deep_research / advanced_search_system / questions / entity_aware_question.py: 11%
49 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Entity-aware question generation for improved entity identification.
3"""
5from datetime import datetime, UTC
6from typing import List
8from loguru import logger
10from .base_question import BaseQuestionGenerator
13class EntityAwareQuestionGenerator(BaseQuestionGenerator):
14 """Question generator that creates more targeted searches for entity identification."""
16 def generate_questions(
17 self,
18 current_knowledge: str,
19 query: str,
20 questions_per_iteration: int = 2,
21 questions_by_iteration: dict = None,
22 ) -> List[str]:
23 """Generate questions with entity-aware search patterns."""
24 now = datetime.now(UTC)
25 current_time = now.strftime("%Y-%m-%d")
26 questions_by_iteration = questions_by_iteration or {}
28 logger.info("Generating entity-aware follow-up questions...")
30 # Detect if this is likely an entity identification query
31 entity_keywords = [
32 "who",
33 "what",
34 "which",
35 "identify",
36 "name",
37 "character",
38 "person",
39 "place",
40 "organization",
41 "company",
42 "author",
43 "scientist",
44 "inventor",
45 "city",
46 "country",
47 "book",
48 "movie",
49 ]
51 is_entity_query = any(
52 keyword in query.lower() for keyword in entity_keywords
53 )
55 if is_entity_query:
56 # Use more direct entity-focused prompt
57 if questions_by_iteration:
58 prompt = f"""Generate {questions_per_iteration} targeted search queries to identify the specific entity in the query.
60Query: {query}
61Today: {current_time}
62Past questions: {questions_by_iteration!s}
63Current knowledge: {current_knowledge}
65Create direct search queries that combine the key identifying features to find the specific name/entity.
66Focus on:
671. Combining multiple constraints in a single search
682. Using quotation marks for exact phrases
693. Including specific details that narrow down results
71Format: One question per line, e.g.
72Q: "fictional character" "breaks fourth wall" "TV show" 1960s 1980s
73Q: character name ascetics humor television fewer than 50 episodes
74"""
75 else:
76 prompt = f"""Generate {questions_per_iteration} direct search queries to identify the specific entity in: {query}
78Today: {current_time}
80Create search queries that:
811. Combine multiple identifying features
822. Target the specific entity name/identification
833. Use variations of key terms
85Format: One question per line, e.g.
86Q: question1
87Q: question2
88"""
89 else:
90 # Fall back to standard question generation for non-entity queries
91 return super().generate_questions(
92 current_knowledge,
93 query,
94 questions_per_iteration,
95 questions_by_iteration,
96 )
98 response = self.model.invoke(prompt)
100 # Handle both string responses and responses with .content attribute
101 response_text = ""
102 if hasattr(response, "content"):
103 response_text = response.content
104 else:
105 response_text = str(response)
107 questions = [
108 q.replace("Q:", "").strip()
109 for q in response_text.split("\n")
110 if q.strip().startswith("Q:")
111 ][:questions_per_iteration]
113 logger.info(f"Generated {len(questions)} entity-aware questions")
115 return questions
117 def generate_sub_questions(
118 self, query: str, context: str = ""
119 ) -> List[str]:
120 """Generate sub-questions with entity focus when appropriate."""
121 # Check if this is an entity identification query
122 entity_keywords = [
123 "who",
124 "what",
125 "which",
126 "identify",
127 "name",
128 "character",
129 "person",
130 "place",
131 "organization",
132 "company",
133 ]
135 is_entity_query = any(
136 keyword in query.lower() for keyword in entity_keywords
137 )
139 if is_entity_query:
140 prompt = f"""Break down this entity identification query into targeted sub-questions.
142Original Question: {query}
143{context}
145Generate 2-5 sub-questions that will help identify the specific entity.
146Focus on:
1471. Combining constraints to narrow down results
1482. Finding the actual name/identity
1493. Verifying the entity matches all criteria
151Format your response as:
1521. First sub-question
1532. Second sub-question
154...
156Only provide the numbered sub-questions."""
157 else:
158 return super().generate_sub_questions(query, context)
160 try:
161 response = self.model.invoke(prompt)
162 content = ""
163 if hasattr(response, "content"):
164 content = response.content
165 else:
166 content = str(response)
168 # Extract numbered questions
169 questions = []
170 for line in content.strip().split("\n"):
171 line = line.strip()
172 if line and (line[0].isdigit() or line.startswith("-")):
173 # Remove the number/bullet and clean up
174 question = line.split(".", 1)[-1].strip()
175 question = question.lstrip("- ").strip()
176 if question:
177 questions.append(question)
179 return questions
181 except Exception as e:
182 logger.exception(f"Error generating sub-questions: {e!s}")
183 return []