Coverage for src / local_deep_research / advanced_search_system / knowledge / followup_context_manager.py: 11%
109 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Follow-up Context Manager
4Manages and processes past research context for follow-up questions.
5This is a standalone class that doesn't inherit from BaseKnowledgeGenerator
6to avoid implementing many abstract methods.
7"""
9from typing import Dict, List, Any, Optional
10from loguru import logger
12from langchain_core.language_models.chat_models import BaseChatModel
13from ...utilities.search_utilities import remove_think_tags
16class FollowUpContextHandler:
17 """
18 Manages past research context for follow-up research.
20 This class handles:
21 1. Loading and structuring past research data
22 2. Summarizing findings for follow-up context
23 3. Extracting relevant information for new searches
24 4. Building comprehensive context for strategies
25 """
27 def __init__(
28 self, model: BaseChatModel, settings_snapshot: Optional[Dict] = None
29 ):
30 """
31 Initialize the context manager.
33 Args:
34 model: Language model for processing context
35 settings_snapshot: Optional settings snapshot
36 """
37 self.model = model
38 self.settings_snapshot = settings_snapshot or {}
39 self.past_research_cache = {}
41 def build_context(
42 self, research_data: Dict[str, Any], follow_up_query: str
43 ) -> Dict[str, Any]:
44 """
45 Build comprehensive context from past research.
47 Args:
48 research_data: Past research data including findings, sources, etc.
49 follow_up_query: The follow-up question being asked
51 Returns:
52 Structured context dictionary for follow-up research
53 """
54 logger.info(f"Building context for follow-up: {follow_up_query}")
56 # Extract all components
57 context = {
58 "parent_research_id": research_data.get("research_id", ""),
59 "original_query": research_data.get("query", ""),
60 "follow_up_query": follow_up_query,
61 "past_findings": self._extract_findings(research_data),
62 "past_sources": self._extract_sources(research_data),
63 "key_entities": self._extract_entities(research_data),
64 "summary": self._create_summary(research_data, follow_up_query),
65 "report_content": research_data.get("report_content", ""),
66 "formatted_findings": research_data.get("formatted_findings", ""),
67 "all_links_of_system": research_data.get("all_links_of_system", []),
68 "metadata": self._extract_metadata(research_data),
69 }
71 return context
73 def _extract_findings(self, research_data: Dict) -> str:
74 """
75 Extract and format findings from past research.
77 Args:
78 research_data: Past research data
80 Returns:
81 Formatted findings string
82 """
83 findings_parts = []
85 # Check various possible locations for findings
86 if formatted := research_data.get("formatted_findings"):
87 findings_parts.append(formatted)
89 if report := research_data.get("report_content"):
90 # Take first part of report if no formatted findings
91 if not findings_parts:
92 findings_parts.append(report[:2000])
94 if not findings_parts:
95 return "No previous findings available"
97 combined = "\n\n".join(findings_parts)
98 return combined
100 def _extract_sources(self, research_data: Dict) -> List[Dict]:
101 """
102 Extract and structure sources from past research.
104 Args:
105 research_data: Past research data
107 Returns:
108 List of source dictionaries
109 """
110 sources = []
111 seen_urls = set()
113 # Check all possible source fields
114 for field in ["resources", "all_links_of_system", "past_links"]:
115 if field_sources := research_data.get(field, []):
116 for source in field_sources:
117 url = source.get("url", "")
118 # Avoid duplicates by URL
119 if url and url not in seen_urls:
120 sources.append(source)
121 seen_urls.add(url)
122 elif not url:
123 # Include sources without URLs (shouldn't happen but be safe)
124 sources.append(source)
126 return sources
128 def _extract_entities(self, research_data: Dict) -> List[str]:
129 """
130 Extract key entities from past research.
132 Args:
133 research_data: Past research data
135 Returns:
136 List of key entities
137 """
138 findings = self._extract_findings(research_data)
140 if not findings or not self.model:
141 return []
143 prompt = f"""
144Extract key entities (names, places, organizations, concepts) from these research findings:
146{findings[:2000]}
148Return up to 10 most important entities, one per line.
149"""
151 try:
152 response = self.model.invoke(prompt)
153 entities = [
154 line.strip()
155 for line in remove_think_tags(response.content)
156 .strip()
157 .split("\n")
158 if line.strip()
159 ]
160 return entities[:10]
161 except Exception as e:
162 logger.warning(f"Failed to extract entities: {e}")
163 return []
165 def _create_summary(self, research_data: Dict, follow_up_query: str) -> str:
166 """
167 Create a targeted summary of past research relevant to the follow-up question.
168 This is used internally for building context.
170 Args:
171 research_data: Past research data
172 follow_up_query: The follow-up question
174 Returns:
175 Targeted summary for context building
176 """
177 findings = self._extract_findings(research_data)
178 original_query = research_data.get("query", "")
180 # For internal context, create a brief targeted summary
181 return self._generate_summary(
182 findings=findings,
183 query=follow_up_query,
184 original_query=original_query,
185 max_sentences=5,
186 purpose="context",
187 )
189 def _extract_metadata(self, research_data: Dict) -> Dict:
190 """
191 Extract metadata from past research.
193 Args:
194 research_data: Past research data
196 Returns:
197 Metadata dictionary
198 """
199 return {
200 "strategy": research_data.get("strategy", ""),
201 "mode": research_data.get("mode", ""),
202 "created_at": research_data.get("created_at", ""),
203 "research_meta": research_data.get("research_meta", {}),
204 }
206 def summarize_for_followup(
207 self, findings: str, query: str, max_length: int = 1000
208 ) -> str:
209 """
210 Create a concise summary of findings for external use (e.g., in prompts).
211 This creates a length-constrained summary suitable for inclusion in LLM prompts.
213 Args:
214 findings: Past research findings
215 query: Follow-up query
216 max_length: Maximum length of summary in characters
218 Returns:
219 Concise summary constrained to max_length
220 """
221 # Use the shared summary generation with specific parameters for external use
222 return self._generate_summary(
223 findings=findings,
224 query=query,
225 original_query=None,
226 max_sentences=max_length
227 // 100, # Approximate sentences based on length
228 purpose="prompt",
229 max_length=max_length,
230 )
232 def _generate_summary(
233 self,
234 findings: str,
235 query: str,
236 original_query: Optional[str] = None,
237 max_sentences: int = 5,
238 purpose: str = "context",
239 max_length: Optional[int] = None,
240 ) -> str:
241 """
242 Shared summary generation logic.
244 Args:
245 findings: Research findings to summarize
246 query: Follow-up query
247 original_query: Original research query (optional)
248 max_sentences: Maximum number of sentences
249 purpose: Purpose of summary ("context" or "prompt")
250 max_length: Maximum character length (optional)
252 Returns:
253 Generated summary
254 """
255 if not findings:
256 return ""
258 # If findings are already short enough, return as-is
259 if max_length and len(findings) <= max_length:
260 return findings
262 if not self.model:
263 # Fallback without model
264 if max_length:
265 return findings[:max_length] + "..."
266 return findings[:500] + "..."
268 # Build prompt based on purpose
269 if purpose == "context" and original_query:
270 prompt = f"""
271Create a brief summary of previous research findings that are relevant to this follow-up question:
273Original research question: "{original_query}"
274Follow-up question: "{query}"
276Previous findings:
277{findings[:3000]}
279Provide a {max_sentences}-sentence summary focusing on aspects relevant to the follow-up question.
280"""
281 else:
282 prompt = f"""
283Summarize these research findings in relation to the follow-up question:
285Follow-up question: "{query}"
287Findings:
288{findings[:4000]}
290Create a summary of {max_sentences} sentences that captures the most relevant information.
291"""
293 try:
294 response = self.model.invoke(prompt)
295 summary = remove_think_tags(response.content).strip()
297 # Apply length constraint if specified
298 if max_length and len(summary) > max_length:
299 summary = summary[:max_length] + "..."
301 return summary
302 except Exception as e:
303 logger.warning(f"Summary generation failed: {e}")
304 # Fallback to truncation
305 if max_length:
306 return findings[:max_length] + "..."
307 return findings[:500] + "..."
309 def identify_gaps(
310 self, research_data: Dict, follow_up_query: str
311 ) -> List[str]:
312 """
313 Identify information gaps that the follow-up should address.
315 Args:
316 research_data: Past research data
317 follow_up_query: Follow-up question
319 Returns:
320 List of identified gaps
321 """
322 findings = self._extract_findings(research_data)
324 if not findings or not self.model:
325 return []
327 prompt = f"""
328Based on the previous research and the follow-up question, identify information gaps:
330Previous research findings:
331{findings[:2000]}
333Follow-up question: "{follow_up_query}"
335What specific information is missing or needs clarification? List up to 5 gaps, one per line.
336"""
338 try:
339 response = self.model.invoke(prompt)
340 gaps = [
341 line.strip()
342 for line in remove_think_tags(response.content)
343 .strip()
344 .split("\n")
345 if line.strip()
346 ]
347 return gaps[:5]
348 except Exception as e:
349 logger.warning(f"Failed to identify gaps: {e}")
350 return []
352 def format_for_settings_snapshot(
353 self, context: Dict[str, Any]
354 ) -> Dict[str, Any]:
355 """
356 Format context for inclusion in settings snapshot.
357 Only includes essential metadata, not actual content.
359 Args:
360 context: Full context dictionary
362 Returns:
363 Minimal metadata for settings snapshot
364 """
365 # Only include minimal metadata in settings snapshot
366 # Settings snapshot should be for settings, not data
367 return {
368 "followup_metadata": {
369 "parent_research_id": context.get("parent_research_id"),
370 "is_followup": True,
371 "has_context": bool(context.get("past_findings")),
372 }
373 }
375 def get_relevant_context_for_llm(
376 self, context: Dict[str, Any], max_tokens: int = 2000
377 ) -> str:
378 """
379 Get a concise version of context for LLM prompts.
381 Args:
382 context: Full context dictionary
383 max_tokens: Approximate maximum tokens
385 Returns:
386 Concise context string
387 """
388 parts = []
390 # Add original and follow-up queries
391 parts.append(f"Original research: {context.get('original_query', '')}")
392 parts.append(
393 f"Follow-up question: {context.get('follow_up_query', '')}"
394 )
396 # Add summary
397 if summary := context.get("summary"):
398 parts.append(f"\nPrevious findings summary:\n{summary}")
400 # Add key entities
401 if entities := context.get("key_entities"):
402 parts.append(f"\nKey entities: {', '.join(entities[:5])}")
404 # Add source count
405 if sources := context.get("past_sources"):
406 parts.append(f"\nAvailable sources: {len(sources)}")
408 result = "\n".join(parts)
410 # Truncate if needed (rough approximation: 4 chars per token)
411 max_chars = max_tokens * 4
412 if len(result) > max_chars:
413 result = result[:max_chars] + "..."
415 return result