Coverage for src / local_deep_research / api / research_functions.py: 92%

227 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1""" 

2API module for Local Deep Research. 

3Provides programmatic access to search and research capabilities. 

4""" 

5 

6from datetime import datetime, UTC 

7from typing import Any, Callable 

8 

9from loguru import logger 

10from local_deep_research.settings.logger import log_settings 

11 

12from ..config.llm_config import get_llm 

13from ..config.search_config import get_search 

14from ..config.thread_settings import get_setting_from_snapshot 

15from ..report_generator import IntegratedReportGenerator 

16from ..search_system import AdvancedSearchSystem 

17from ..utilities.db_utils import no_db_settings 

18from ..utilities.thread_context import clear_search_context, set_search_context 

19from ..utilities.search_utilities import remove_think_tags 

20from .settings_utils import create_settings_snapshot 

21 

22 

23def _close_system(system): 

24 """Close an AdvancedSearchSystem and its associated resources.""" 

25 from ..utilities.resource_utils import safe_close 

26 

27 safe_close(system, "search system") 

28 if hasattr(system, "search"): 28 ↛ 30line 28 didn't jump to line 30 because the condition on line 28 was always true

29 safe_close(system.search, "search engine") 

30 if hasattr(system, "model"): 30 ↛ exitline 30 didn't return from function '_close_system' because the condition on line 30 was always true

31 safe_close(system.model, "system LLM") 

32 

33 

34def _init_search_system( 

35 model_name: str | None = None, 

36 temperature: float = 0.7, 

37 provider: str | None = None, 

38 openai_endpoint_url: str | None = None, 

39 progress_callback: Callable[[str, int, dict], None] | None = None, 

40 search_tool: str | None = None, 

41 search_strategy: str = "source_based", 

42 iterations: int = 1, 

43 questions_per_iteration: int = 1, 

44 retrievers: dict[str, Any] | None = None, 

45 llms: dict[str, Any] | None = None, 

46 username: str | None = None, 

47 research_id: str | None = None, 

48 research_context: dict[str, Any] | None = None, 

49 programmatic_mode: bool = True, 

50 search_original_query: bool = True, 

51 settings_snapshot: dict[str, Any] | None = None, 

52 **kwargs: Any, 

53) -> AdvancedSearchSystem: 

54 """ 

55 Initializes the advanced search system with specified parameters. This function sets up 

56 and returns an instance of the AdvancedSearchSystem using the provided configuration 

57 options such as model name, temperature for randomness in responses, provider service 

58 details, endpoint URL, and an optional search tool. 

59 

60 Args: 

61 model_name: Name of the model to use (if None, uses database setting) 

62 temperature: LLM temperature for generation 

63 provider: Provider to use (if None, uses database setting) 

64 openai_endpoint_url: Custom endpoint URL to use (if None, uses database 

65 setting) 

66 progress_callback: Optional callback function to receive progress updates 

67 search_tool: Search engine to use (auto, wikipedia, arxiv, etc.). If None, uses default 

68 search_strategy: Search strategy to use (modular, source_based, etc.). If None, uses default 

69 iterations: Number of research cycles to perform 

70 questions_per_iteration: Number of questions to generate per cycle 

71 search_strategy: The name of the search strategy to use. 

72 retrievers: Optional dictionary of {name: retriever} pairs to use as search engines 

73 llms: Optional dictionary of {name: llm} pairs to use as language models 

74 programmatic_mode: If True, disables database operations and metrics tracking 

75 search_original_query: Whether to include the original query in the first iteration of search 

76 

77 Returns: 

78 AdvancedSearchSystem: An instance of the configured AdvancedSearchSystem. 

79 

80 """ 

81 # Register retrievers if provided 

82 if retrievers: 

83 from ..web_search_engines.retriever_registry import retriever_registry 

84 

85 retriever_registry.register_multiple(retrievers) 

86 logger.info( 

87 f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}" 

88 ) 

89 

90 # Register LLMs if provided 

91 if llms: 

92 from ..llm import register_llm 

93 

94 for name, llm_instance in llms.items(): 

95 register_llm(name, llm_instance) 

96 logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}") 

97 

98 # Use settings_snapshot from parameter, or fall back to kwargs 

99 if settings_snapshot is None: 

100 settings_snapshot = kwargs.get("settings_snapshot") 

101 

102 # Get language model with custom temperature 

103 llm = get_llm( 

104 temperature=temperature, 

105 openai_endpoint_url=openai_endpoint_url, 

106 model_name=model_name, 

107 provider=provider, 

108 research_id=research_id, 

109 research_context=research_context, 

110 settings_snapshot=settings_snapshot, 

111 ) 

112 

113 # Set the search engine if specified or get from settings 

114 search_engine = None 

115 

116 try: 

117 # If no search_tool provided, get from settings_snapshot 

118 if not search_tool and settings_snapshot: 

119 search_tool = get_setting_from_snapshot( 

120 "search.tool", settings_snapshot=settings_snapshot 

121 ) 

122 

123 if search_tool: 

124 search_engine = get_search( 

125 search_tool, 

126 llm_instance=llm, 

127 username=username, 

128 settings_snapshot=settings_snapshot, 

129 programmatic_mode=programmatic_mode, 

130 ) 

131 if search_engine is None: 

132 logger.warning( 

133 f"Could not create search engine '{search_tool}', using default." 

134 ) 

135 

136 # Create search system with custom parameters 

137 logger.info("Search strategy: {}", search_strategy) 

138 system = AdvancedSearchSystem( 

139 llm=llm, 

140 search=search_engine, 

141 strategy_name=search_strategy, 

142 username=username, 

143 research_id=research_id, 

144 research_context=research_context, 

145 settings_snapshot=settings_snapshot, 

146 programmatic_mode=programmatic_mode, 

147 search_original_query=search_original_query, 

148 ) 

149 except Exception: 

150 from ..utilities.resource_utils import safe_close 

151 

152 safe_close(llm, "init LLM") 

153 raise 

154 

155 # Override default settings with user-provided values 

156 system.max_iterations = iterations 

157 system.questions_per_iteration = questions_per_iteration 

158 

159 # Set progress callback if provided 

160 if progress_callback: 

161 system.set_progress_callback(progress_callback) 

162 

163 return system 

164 

165 

166@no_db_settings 

167def quick_summary( 

168 query: str, 

169 research_id: str | None = None, 

170 retrievers: dict[str, Any] | None = None, 

171 llms: dict[str, Any] | None = None, 

172 username: str | None = None, 

173 provider: str | None = None, 

174 api_key: str | None = None, 

175 temperature: float | None = None, 

176 max_search_results: int | None = None, 

177 settings: dict[str, Any] | None = None, 

178 settings_override: dict[str, Any] | None = None, 

179 search_original_query: bool = True, 

180 **kwargs: Any, 

181) -> dict[str, Any]: 

182 """ 

183 Generate a quick research summary for a given query. 

184 

185 Args: 

186 query: The research query to analyze 

187 research_id: Optional research ID (int or UUID string) for tracking metrics 

188 retrievers: Optional dictionary of {name: retriever} pairs to use as search engines 

189 llms: Optional dictionary of {name: llm} pairs to use as language models 

190 provider: LLM provider to use (e.g., 'openai', 'anthropic'). For programmatic API only. 

191 api_key: API key for the provider. For programmatic API only. 

192 temperature: LLM temperature (0.0-1.0). For programmatic API only. 

193 max_search_results: Maximum number of search results to return. For programmatic API only. 

194 settings: Base settings dict to use instead of defaults. For programmatic API only. 

195 settings_override: Dictionary of settings to override (e.g., {"llm.max_tokens": 4000}). For programmatic API only. 

196 search_original_query: Whether to include the original query in the first iteration of search. 

197 Set to False for news searches to avoid sending long subscription prompts to search engines. 

198 **kwargs: Additional configuration for the search system. Will be forwarded to 

199 `_init_search_system()`. 

200 

201 Returns: 

202 Dictionary containing the research results with keys: 

203 - 'summary': The generated summary text 

204 - 'findings': List of detailed findings from each search 

205 - 'iterations': Number of iterations performed 

206 - 'questions': Questions generated during research 

207 

208 Examples: 

209 # Simple usage with defaults 

210 result = quick_summary("What is quantum computing?") 

211 

212 # With custom provider 

213 result = quick_summary( 

214 "What is quantum computing?", 

215 provider="anthropic", 

216 api_key="sk-ant-..." 

217 ) 

218 

219 # With advanced settings 

220 result = quick_summary( 

221 "What is quantum computing?", 

222 temperature=0.2, 

223 settings_override={"search.engines.arxiv.enabled": True} 

224 ) 

225 """ 

226 logger.info("Generating quick summary for query: {}", query) 

227 

228 if "settings_snapshot" not in kwargs: 

229 snapshot_kwargs = {} 

230 if provider is not None: 

231 snapshot_kwargs["provider"] = provider 

232 if api_key is not None: 

233 snapshot_kwargs["api_key"] = api_key 

234 if temperature is not None: 

235 snapshot_kwargs["temperature"] = temperature 

236 if max_search_results is not None: 236 ↛ 237line 236 didn't jump to line 237 because the condition on line 236 was never true

237 snapshot_kwargs["max_search_results"] = max_search_results 

238 

239 if ( 

240 not snapshot_kwargs 

241 and settings is None 

242 and settings_override is None 

243 ): 

244 logger.warning( 

245 "No settings_snapshot or explicit config provided to quick_summary(). " 

246 "Using defaults and environment variables. For explicit control, " 

247 "pass settings_snapshot=create_settings_snapshot(...)." 

248 ) 

249 

250 kwargs["settings_snapshot"] = create_settings_snapshot( 

251 base_settings=settings, 

252 overrides=settings_override, 

253 **snapshot_kwargs, 

254 ) 

255 log_settings( 

256 kwargs["settings_snapshot"], 

257 "Created settings snapshot for programmatic API", 

258 ) 

259 else: 

260 log_settings( 

261 kwargs["settings_snapshot"], 

262 "Using provided settings snapshot for programmatic API", 

263 ) 

264 

265 # Generate a research_id if none provided 

266 if research_id is None: 

267 import uuid 

268 

269 research_id = str(uuid.uuid4()) 

270 logger.debug(f"Generated research_id: {research_id}") 

271 

272 # Register retrievers if provided 

273 if retrievers: 

274 from ..web_search_engines.retriever_registry import retriever_registry 

275 

276 retriever_registry.register_multiple(retrievers) 

277 logger.info( 

278 f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}" 

279 ) 

280 

281 # Register LLMs if provided 

282 if llms: 

283 from ..llm import register_llm 

284 

285 for name, llm_instance in llms.items(): 

286 register_llm(name, llm_instance) 

287 logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}") 

288 

289 search_context = { 

290 "research_id": research_id, # Pass UUID or integer directly 

291 "research_query": query, 

292 "research_mode": kwargs.get("research_mode", "quick"), 

293 "research_phase": "init", 

294 "search_iteration": 0, 

295 "search_engine_selected": kwargs.get("search_tool"), 

296 "username": username, # Include username for metrics tracking 

297 "user_password": kwargs.get( 

298 "user_password" 

299 ), # Include password for metrics tracking 

300 } 

301 set_search_context(search_context) 

302 

303 system = None 

304 try: 

305 # Remove research_mode from kwargs before passing to _init_search_system 

306 init_kwargs = {k: v for k, v in kwargs.items() if k != "research_mode"} 

307 # Make sure username is passed to the system 

308 init_kwargs["username"] = username 

309 init_kwargs["research_id"] = research_id 

310 init_kwargs["research_context"] = search_context 

311 init_kwargs["search_original_query"] = search_original_query 

312 system = _init_search_system(llms=llms, **init_kwargs) 

313 

314 # Perform the search and analysis 

315 results = system.analyze_topic(query) 

316 

317 # Extract the summary from the current knowledge 

318 if results and "current_knowledge" in results: 

319 summary = results["current_knowledge"] 

320 else: 

321 summary = "Unable to generate summary for the query." 

322 

323 # Prepare the return value (guard against None results) 

324 if results is None: 324 ↛ 325line 324 didn't jump to line 325 because the condition on line 324 was never true

325 results = {} 

326 return { 

327 "research_id": research_id, 

328 "summary": summary, 

329 "findings": results.get("findings", []), 

330 "iterations": results.get("iterations", 0), 

331 "questions": results.get("questions", {}), 

332 "formatted_findings": results.get("formatted_findings", ""), 

333 "sources": results.get("all_links_of_system", []), 

334 } 

335 finally: 

336 if system is not None: 

337 _close_system(system) 

338 clear_search_context() 

339 

340 

341@no_db_settings 

342def generate_report( 

343 query: str, 

344 output_file: str | None = None, 

345 progress_callback: Callable | None = None, 

346 searches_per_section: int = 2, 

347 retrievers: dict[str, Any] | None = None, 

348 llms: dict[str, Any] | None = None, 

349 username: str | None = None, 

350 provider: str | None = None, 

351 api_key: str | None = None, 

352 temperature: float | None = None, 

353 max_search_results: int | None = None, 

354 settings: dict[str, Any] | None = None, 

355 settings_override: dict[str, Any] | None = None, 

356 **kwargs: Any, 

357) -> dict[str, Any]: 

358 """ 

359 Generate a comprehensive, structured research report for a given query. 

360 

361 Args: 

362 query: The research query to analyze 

363 output_file: Optional path to save report markdown file 

364 progress_callback: Optional callback function to receive progress updates 

365 searches_per_section: The number of searches to perform for each 

366 section in the report. 

367 retrievers: Optional dictionary of {name: retriever} pairs to use as search engines 

368 llms: Optional dictionary of {name: llm} pairs to use as language models 

369 provider: LLM provider to use (e.g., 'openai', 'anthropic'). For programmatic API only. 

370 api_key: API key for the provider. For programmatic API only. 

371 temperature: LLM temperature (0.0-1.0). For programmatic API only. 

372 max_search_results: Maximum number of search results to return. For programmatic API only. 

373 settings: Base settings dict to use instead of defaults. For programmatic API only. 

374 settings_override: Dictionary of settings to override. For programmatic API only. 

375 **kwargs: Additional configuration for the search system. 

376 

377 Returns: 

378 Dictionary containing the research report with keys: 

379 - 'content': The full report content in markdown format 

380 - 'metadata': Report metadata including generated timestamp and query 

381 - 'file_path': Path to saved file (if output_file was provided) 

382 

383 Examples: 

384 # Simple usage with settings snapshot 

385 from local_deep_research.api.settings_utils import create_settings_snapshot 

386 settings = create_settings_snapshot({"programmatic_mode": True}) 

387 result = generate_report("AI research", settings_snapshot=settings) 

388 

389 # Save to file 

390 result = generate_report( 

391 "AI research", 

392 output_file="report.md", 

393 settings_snapshot=settings 

394 ) 

395 """ 

396 logger.info("Generating comprehensive research report for query: {}", query) 

397 

398 if "settings_snapshot" not in kwargs: 398 ↛ 430line 398 didn't jump to line 430 because the condition on line 398 was always true

399 snapshot_kwargs = {} 

400 if provider is not None: 

401 snapshot_kwargs["provider"] = provider 

402 if api_key is not None: 402 ↛ 403line 402 didn't jump to line 403 because the condition on line 402 was never true

403 snapshot_kwargs["api_key"] = api_key 

404 if temperature is not None: 404 ↛ 405line 404 didn't jump to line 405 because the condition on line 404 was never true

405 snapshot_kwargs["temperature"] = temperature 

406 if max_search_results is not None: 406 ↛ 407line 406 didn't jump to line 407 because the condition on line 406 was never true

407 snapshot_kwargs["max_search_results"] = max_search_results 

408 

409 if ( 

410 not snapshot_kwargs 

411 and settings is None 

412 and settings_override is None 

413 ): 

414 logger.warning( 

415 "No settings_snapshot or explicit config provided to generate_report(). " 

416 "Using defaults and environment variables. For explicit control, " 

417 "pass settings_snapshot=create_settings_snapshot(...)." 

418 ) 

419 

420 kwargs["settings_snapshot"] = create_settings_snapshot( 

421 base_settings=settings, 

422 overrides=settings_override, 

423 **snapshot_kwargs, 

424 ) 

425 log_settings( 

426 kwargs["settings_snapshot"], 

427 "Created settings snapshot for programmatic API", 

428 ) 

429 else: 

430 log_settings( 

431 kwargs["settings_snapshot"], 

432 "Using provided settings snapshot for programmatic API", 

433 ) 

434 

435 # Register retrievers if provided 

436 if retrievers: 

437 from ..web_search_engines.retriever_registry import retriever_registry 

438 

439 retriever_registry.register_multiple(retrievers) 

440 logger.info( 

441 f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}" 

442 ) 

443 

444 # Register LLMs if provided 

445 if llms: 

446 from ..llm import register_llm 

447 

448 for name, llm_instance in llms.items(): 

449 register_llm(name, llm_instance) 

450 logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}") 

451 

452 import uuid 

453 

454 search_context = { 

455 "research_id": str(uuid.uuid4()), 

456 "research_query": query, 

457 "research_mode": "report", 

458 "research_phase": "init", 

459 "search_iteration": 0, 

460 "search_engine_selected": kwargs.get("search_tool"), 

461 "username": username, 

462 "user_password": kwargs.get("user_password"), 

463 } 

464 set_search_context(search_context) 

465 

466 system = None 

467 try: 

468 system = _init_search_system( 

469 retrievers=retrievers, llms=llms, username=username, **kwargs 

470 ) 

471 # Set progress callback if provided 

472 if progress_callback: 

473 system.set_progress_callback(progress_callback) 

474 

475 # Perform the initial research 

476 initial_findings = system.analyze_topic(query) 

477 

478 # Generate the structured report 

479 report_generator = IntegratedReportGenerator( 

480 search_system=system, 

481 llm=system.model, 

482 searches_per_section=searches_per_section, 

483 settings_snapshot=kwargs.get("settings_snapshot"), 

484 ) 

485 report = report_generator.generate_report(initial_findings, query) 

486 

487 # Save report to file if path is provided 

488 if output_file and report and "content" in report: 

489 from ..security.file_write_verifier import write_file_verified 

490 

491 write_file_verified( 

492 output_file, 

493 report["content"], 

494 "api.allow_file_output", 

495 context="API research report", 

496 settings_snapshot=kwargs.get("settings_snapshot"), 

497 ) 

498 logger.info(f"Report saved to {output_file}") 

499 report["file_path"] = output_file 

500 return report 

501 finally: 

502 if system is not None: 502 ↛ 504line 502 didn't jump to line 504 because the condition on line 502 was always true

503 _close_system(system) 

504 clear_search_context() 

505 

506 

507@no_db_settings 

508def detailed_research( 

509 query: str, 

510 research_id: str | None = None, 

511 retrievers: dict[str, Any] | None = None, 

512 llms: dict[str, Any] | None = None, 

513 username: str | None = None, 

514 **kwargs: Any, 

515) -> dict[str, Any]: 

516 """ 

517 Perform detailed research with comprehensive analysis. 

518 

519 Similar to generate_report but returns structured data instead of markdown. 

520 

521 Args: 

522 query: The research query to analyze 

523 research_id: Optional research ID (int or UUID string) for tracking metrics 

524 retrievers: Optional dictionary of {name: retriever} pairs to use as search engines 

525 llms: Optional dictionary of {name: llm} pairs to use as language models 

526 username: Optional username for per-user cache isolation 

527 **kwargs: Configuration for the search system. Pass settings_snapshot 

528 (via create_settings_snapshot()) to configure provider, temperature, etc. 

529 

530 Returns: 

531 Dictionary containing detailed research results 

532 """ 

533 logger.info("Performing detailed research for query: {}", query) 

534 

535 if "settings_snapshot" not in kwargs: 535 ↛ 545line 535 didn't jump to line 545 because the condition on line 535 was always true

536 logger.warning( 

537 "No settings_snapshot provided to detailed_research(). " 

538 "Using defaults and environment variables. For explicit control, " 

539 "pass settings_snapshot=create_settings_snapshot(provider=..., " 

540 "overrides={'search.tool': ...})." 

541 ) 

542 kwargs["settings_snapshot"] = create_settings_snapshot() 

543 

544 # Generate a research_id if none provided 

545 if research_id is None: 

546 import uuid 

547 

548 research_id = str(uuid.uuid4()) 

549 logger.debug(f"Generated research_id: {research_id}") 

550 

551 # Register retrievers if provided 

552 if retrievers: 

553 from ..web_search_engines.retriever_registry import retriever_registry 

554 

555 retriever_registry.register_multiple(retrievers) 

556 logger.info( 

557 f"Registered {len(retrievers)} retrievers: {list(retrievers.keys())}" 

558 ) 

559 

560 # Register LLMs if provided 

561 if llms: 

562 from ..llm import register_llm 

563 

564 for name, llm_instance in llms.items(): 

565 register_llm(name, llm_instance) 

566 logger.info(f"Registered {len(llms)} LLMs: {list(llms.keys())}") 

567 

568 search_context = { 

569 "research_id": research_id, 

570 "research_query": query, 

571 "research_mode": "detailed", 

572 "research_phase": "init", 

573 "search_iteration": 0, 

574 "search_engine_selected": kwargs.get("search_tool"), 

575 "username": username, 

576 "user_password": kwargs.get("user_password"), 

577 } 

578 set_search_context(search_context) 

579 

580 system = None 

581 try: 

582 # Initialize system 

583 system = _init_search_system( 

584 retrievers=retrievers, llms=llms, username=username, **kwargs 

585 ) 

586 

587 # Perform detailed research 

588 results = system.analyze_topic(query) 

589 

590 # Return comprehensive results (guard against None results) 

591 if results is None: 591 ↛ 592line 591 didn't jump to line 592 because the condition on line 591 was never true

592 results = {} 

593 return { 

594 "query": query, 

595 "research_id": research_id, 

596 "summary": results.get("current_knowledge", ""), 

597 "findings": results.get("findings", []), 

598 "iterations": results.get("iterations", 0), 

599 "questions": results.get("questions", {}), 

600 "formatted_findings": results.get("formatted_findings", ""), 

601 "sources": results.get("all_links_of_system", []), 

602 "metadata": { 

603 "timestamp": datetime.now(UTC).isoformat(), 

604 "search_tool": kwargs.get("search_tool", "auto"), 

605 "iterations_requested": kwargs.get("iterations", 1), 

606 "strategy": kwargs.get("search_strategy", "source_based"), 

607 }, 

608 } 

609 finally: 

610 if system is not None: 610 ↛ 612line 610 didn't jump to line 612 because the condition on line 610 was always true

611 _close_system(system) 

612 clear_search_context() 

613 

614 

615@no_db_settings 

616def analyze_documents( 

617 query: str, 

618 collection_name: str, 

619 max_results: int = 10, 

620 temperature: float = 0.7, 

621 force_reindex: bool = False, 

622 output_file: str | None = None, 

623) -> dict[str, Any]: 

624 """ 

625 Search and analyze documents in a specific local collection. 

626 

627 Args: 

628 query: The search query 

629 collection_name: Name of the local document collection to search 

630 max_results: Maximum number of results to return 

631 temperature: LLM temperature for summary generation 

632 force_reindex: Whether to force reindexing the collection 

633 output_file: Optional path to save analysis results to a file 

634 

635 Returns: 

636 Dictionary containing: 

637 - 'summary': Summary of the findings 

638 - 'documents': List of matching documents with content and metadata 

639 """ 

640 logger.info( 

641 f"Analyzing documents in collection '{collection_name}' for query: {query}" 

642 ) 

643 

644 llm = None 

645 search = None 

646 try: 

647 # Get language model with custom temperature 

648 llm = get_llm(temperature=temperature) 

649 

650 # Get search engine for the specified collection 

651 search = get_search(collection_name, llm_instance=llm) 

652 

653 if not search: 

654 from ..utilities.resource_utils import safe_close 

655 

656 safe_close(llm, "LLM") 

657 llm = None 

658 return { 

659 "summary": f"Error: Collection '{collection_name}' not found or not properly configured.", 

660 "documents": [], 

661 } 

662 

663 # Set max results 

664 search.max_results = max_results 

665 # Perform the search 

666 results = search.run(query) 

667 

668 if not results: 

669 return { 

670 "summary": f"No documents found in collection '{collection_name}' for query: '{query}'", 

671 "documents": [], 

672 } 

673 

674 # Get LLM to generate a summary of the results 

675 

676 docs_text = "\n\n".join( 

677 [ 

678 f"Document {i + 1}:" 

679 f" {doc.get('content', doc.get('snippet', ''))[:1000]}" 

680 for i, doc in enumerate(results[:5]) 

681 ] 

682 ) # Limit to first 5 docs and 1000 chars each 

683 

684 summary_prompt = f"""Analyze these document excerpts related to the query: "{query}" 

685 

686 {docs_text} 

687 

688 Provide a concise summary of the key information found in these documents related to the query. 

689 """ 

690 

691 import time 

692 

693 llm_start_time = time.time() 

694 logger.info( 

695 f"Starting LLM summary generation (prompt length: {len(summary_prompt)} chars)..." 

696 ) 

697 

698 summary_response = llm.invoke(summary_prompt) 

699 

700 llm_elapsed = time.time() - llm_start_time 

701 logger.info(f"LLM summary generation completed in {llm_elapsed:.2f}s") 

702 

703 if hasattr(summary_response, "content"): 703 ↛ 706line 703 didn't jump to line 706 because the condition on line 703 was always true

704 summary = remove_think_tags(summary_response.content) 

705 else: 

706 summary = str(summary_response) 

707 

708 # Create result dictionary 

709 analysis_result = { 

710 "summary": summary, 

711 "documents": results, 

712 "collection": collection_name, 

713 "document_count": len(results), 

714 } 

715 

716 # Save to file if requested 

717 if output_file: 

718 from ..security.file_write_verifier import write_file_verified 

719 

720 content = f"# Document Analysis: {query}\n\n" 

721 content += f"## Summary\n\n{summary}\n\n" 

722 content += f"## Documents Found: {len(results)}\n\n" 

723 

724 for i, doc in enumerate(results): 

725 content += ( 

726 f"### Document {i + 1}: {doc.get('title', 'Untitled')}\n\n" 

727 ) 

728 content += f"**Source:** {doc.get('link', 'Unknown')}\n\n" 

729 content += f"**Content:**\n\n{doc.get('content', doc.get('snippet', 'No content available'))[:1000]}...\n\n" 

730 content += "---\n\n" 

731 

732 write_file_verified( 

733 output_file, 

734 content, 

735 "api.allow_file_output", 

736 context="API document analysis", 

737 settings_snapshot=None, # analyze_documents doesn't support programmatic mode yet 

738 ) 

739 

740 analysis_result["file_path"] = output_file 

741 logger.info(f"Analysis saved to {output_file}") 

742 

743 return analysis_result 

744 finally: 

745 from ..utilities.resource_utils import safe_close 

746 

747 safe_close(search, "search engine") 

748 safe_close(llm, "LLM")