Coverage for src / local_deep_research / advanced_search_system / knowledge / followup_context_manager.py: 97%
107 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2Follow-up Context Manager
4Manages and processes past research context for follow-up questions.
5This is a standalone class that doesn't inherit from BaseKnowledgeGenerator
6to avoid implementing many abstract methods.
7"""
9from typing import Dict, List, Any, Optional
10from loguru import logger
12from langchain_core.language_models.chat_models import BaseChatModel
13from ...utilities.search_utilities import remove_think_tags
16class FollowUpContextHandler:
17 """
18 Manages past research context for follow-up research.
20 This class handles:
21 1. Loading and structuring past research data
22 2. Summarizing findings for follow-up context
23 3. Extracting relevant information for new searches
24 4. Building comprehensive context for strategies
25 """
27 def __init__(
28 self, model: BaseChatModel, settings_snapshot: Optional[Dict] = None
29 ):
30 """
31 Initialize the context manager.
33 Args:
34 model: Language model for processing context
35 settings_snapshot: Optional settings snapshot
36 """
37 self.model = model
38 self.settings_snapshot = settings_snapshot or {}
39 self.past_research_cache = {}
41 def build_context(
42 self, research_data: Dict[str, Any], follow_up_query: str
43 ) -> Dict[str, Any]:
44 """
45 Build comprehensive context from past research.
47 Args:
48 research_data: Past research data including findings, sources, etc.
49 follow_up_query: The follow-up question being asked
51 Returns:
52 Structured context dictionary for follow-up research
53 """
54 logger.info(f"Building context for follow-up: {follow_up_query}")
56 # Extract all components
57 return {
58 "parent_research_id": research_data.get("research_id", ""),
59 "original_query": research_data.get("query", ""),
60 "follow_up_query": follow_up_query,
61 "past_findings": self._extract_findings(research_data),
62 "past_sources": self._extract_sources(research_data),
63 "key_entities": self._extract_entities(research_data),
64 "summary": self._create_summary(research_data, follow_up_query),
65 "report_content": research_data.get("report_content", ""),
66 "formatted_findings": research_data.get("formatted_findings", ""),
67 "all_links_of_system": research_data.get("all_links_of_system", []),
68 "metadata": self._extract_metadata(research_data),
69 }
71 def _extract_findings(self, research_data: Dict) -> str:
72 """
73 Extract and format findings from past research.
75 Args:
76 research_data: Past research data
78 Returns:
79 Formatted findings string
80 """
81 findings_parts = []
83 # Check various possible locations for findings
84 if formatted := research_data.get("formatted_findings"):
85 findings_parts.append(formatted)
87 if report := research_data.get("report_content"):
88 # Take first part of report if no formatted findings
89 if not findings_parts:
90 findings_parts.append(report[:2000])
92 if not findings_parts:
93 return "No previous findings available"
95 return "\n\n".join(findings_parts)
97 def _extract_sources(self, research_data: Dict) -> List[Dict]:
98 """
99 Extract and structure sources from past research.
101 Args:
102 research_data: Past research data
104 Returns:
105 List of source dictionaries
106 """
107 sources = []
108 seen_urls = set()
110 # Check all possible source fields
111 for field in ["resources", "all_links_of_system", "past_links"]:
112 if field_sources := research_data.get(field, []):
113 for source in field_sources:
114 url = source.get("url", "")
115 # Avoid duplicates by URL
116 if url and url not in seen_urls:
117 sources.append(source)
118 seen_urls.add(url)
119 elif not url:
120 # Include sources without URLs (shouldn't happen but be safe)
121 sources.append(source)
123 return sources
125 def _extract_entities(self, research_data: Dict) -> List[str]:
126 """
127 Extract key entities from past research.
129 Args:
130 research_data: Past research data
132 Returns:
133 List of key entities
134 """
135 findings = self._extract_findings(research_data)
137 if not findings or not self.model:
138 return []
140 prompt = f"""
141Extract key entities (names, places, organizations, concepts) from these research findings:
143{findings[:2000]}
145Return up to 10 most important entities, one per line.
146"""
148 try:
149 response = self.model.invoke(prompt)
150 entities = [
151 line.strip()
152 for line in remove_think_tags(response.content)
153 .strip()
154 .split("\n")
155 if line.strip()
156 ]
157 return entities[:10]
158 except Exception:
159 logger.warning("Failed to extract entities")
160 return []
162 def _create_summary(self, research_data: Dict, follow_up_query: str) -> str:
163 """
164 Create a targeted summary of past research relevant to the follow-up question.
165 This is used internally for building context.
167 Args:
168 research_data: Past research data
169 follow_up_query: The follow-up question
171 Returns:
172 Targeted summary for context building
173 """
174 findings = self._extract_findings(research_data)
175 original_query = research_data.get("query", "")
177 # For internal context, create a brief targeted summary
178 return self._generate_summary(
179 findings=findings,
180 query=follow_up_query,
181 original_query=original_query,
182 max_sentences=5,
183 purpose="context",
184 )
186 def _extract_metadata(self, research_data: Dict) -> Dict:
187 """
188 Extract metadata from past research.
190 Args:
191 research_data: Past research data
193 Returns:
194 Metadata dictionary
195 """
196 return {
197 "strategy": research_data.get("strategy", ""),
198 "mode": research_data.get("mode", ""),
199 "created_at": research_data.get("created_at", ""),
200 "research_meta": research_data.get("research_meta", {}),
201 }
203 def summarize_for_followup(
204 self, findings: str, query: str, max_length: int = 1000
205 ) -> str:
206 """
207 Create a concise summary of findings for external use (e.g., in prompts).
208 This creates a length-constrained summary suitable for inclusion in LLM prompts.
210 Args:
211 findings: Past research findings
212 query: Follow-up query
213 max_length: Maximum length of summary in characters
215 Returns:
216 Concise summary constrained to max_length
217 """
218 # Use the shared summary generation with specific parameters for external use
219 return self._generate_summary(
220 findings=findings,
221 query=query,
222 original_query=None,
223 max_sentences=max_length
224 // 100, # Approximate sentences based on length
225 purpose="prompt",
226 max_length=max_length,
227 )
229 def _generate_summary(
230 self,
231 findings: str,
232 query: str,
233 original_query: Optional[str] = None,
234 max_sentences: int = 5,
235 purpose: str = "context",
236 max_length: Optional[int] = None,
237 ) -> str:
238 """
239 Shared summary generation logic.
241 Args:
242 findings: Research findings to summarize
243 query: Follow-up query
244 original_query: Original research query (optional)
245 max_sentences: Maximum number of sentences
246 purpose: Purpose of summary ("context" or "prompt")
247 max_length: Maximum character length (optional)
249 Returns:
250 Generated summary
251 """
252 if not findings:
253 return ""
255 # If findings are already short enough, return as-is
256 if max_length and len(findings) <= max_length:
257 return findings
259 if not self.model:
260 # Fallback without model
261 if max_length:
262 return findings[:max_length] + "..."
263 return findings[:500] + "..."
265 # Build prompt based on purpose
266 if purpose == "context" and original_query:
267 prompt = f"""
268Create a brief summary of previous research findings that are relevant to this follow-up question:
270Original research question: "{original_query}"
271Follow-up question: "{query}"
273Previous findings:
274{findings[:3000]}
276Provide a {max_sentences}-sentence summary focusing on aspects relevant to the follow-up question.
277"""
278 else:
279 prompt = f"""
280Summarize these research findings in relation to the follow-up question:
282Follow-up question: "{query}"
284Findings:
285{findings[:4000]}
287Create a summary of {max_sentences} sentences that captures the most relevant information.
288"""
290 try:
291 response = self.model.invoke(prompt)
292 summary = remove_think_tags(response.content).strip()
294 # Apply length constraint if specified
295 if max_length and len(summary) > max_length: 295 ↛ 296line 295 didn't jump to line 296 because the condition on line 295 was never true
296 summary = summary[:max_length] + "..."
298 return summary
299 except Exception:
300 logger.warning("Summary generation failed")
301 # Fallback to truncation
302 if max_length: 302 ↛ 304line 302 didn't jump to line 304 because the condition on line 302 was always true
303 return findings[:max_length] + "..."
304 return findings[:500] + "..."
306 def identify_gaps(
307 self, research_data: Dict, follow_up_query: str
308 ) -> List[str]:
309 """
310 Identify information gaps that the follow-up should address.
312 Args:
313 research_data: Past research data
314 follow_up_query: Follow-up question
316 Returns:
317 List of identified gaps
318 """
319 findings = self._extract_findings(research_data)
321 if not findings or not self.model:
322 return []
324 prompt = f"""
325Based on the previous research and the follow-up question, identify information gaps:
327Previous research findings:
328{findings[:2000]}
330Follow-up question: "{follow_up_query}"
332What specific information is missing or needs clarification? List up to 5 gaps, one per line.
333"""
335 try:
336 response = self.model.invoke(prompt)
337 gaps = [
338 line.strip()
339 for line in remove_think_tags(response.content)
340 .strip()
341 .split("\n")
342 if line.strip()
343 ]
344 return gaps[:5]
345 except Exception:
346 logger.warning("Failed to identify gaps")
347 return []
349 def format_for_settings_snapshot(
350 self, context: Dict[str, Any]
351 ) -> Dict[str, Any]:
352 """
353 Format context for inclusion in settings snapshot.
354 Only includes essential metadata, not actual content.
356 Args:
357 context: Full context dictionary
359 Returns:
360 Minimal metadata for settings snapshot
361 """
362 # Only include minimal metadata in settings snapshot
363 # Settings snapshot should be for settings, not data
364 return {
365 "followup_metadata": {
366 "parent_research_id": context.get("parent_research_id"),
367 "is_followup": True,
368 "has_context": bool(context.get("past_findings")),
369 }
370 }
372 def get_relevant_context_for_llm(
373 self, context: Dict[str, Any], max_tokens: int = 2000
374 ) -> str:
375 """
376 Get a concise version of context for LLM prompts.
378 Args:
379 context: Full context dictionary
380 max_tokens: Approximate maximum tokens
382 Returns:
383 Concise context string
384 """
385 parts = []
387 # Add original and follow-up queries
388 parts.append(f"Original research: {context.get('original_query', '')}")
389 parts.append(
390 f"Follow-up question: {context.get('follow_up_query', '')}"
391 )
393 # Add summary
394 if summary := context.get("summary"):
395 parts.append(f"\nPrevious findings summary:\n{summary}")
397 # Add key entities
398 if entities := context.get("key_entities"):
399 parts.append(f"\nKey entities: {', '.join(entities[:5])}")
401 # Add source count
402 if sources := context.get("past_sources"):
403 parts.append(f"\nAvailable sources: {len(sources)}")
405 result = "\n".join(parts)
407 # Truncate if needed (rough approximation: 4 chars per token)
408 max_chars = max_tokens * 4
409 if len(result) > max_chars:
410 result = result[:max_chars] + "..."
412 return result