Coverage for src / local_deep_research / search_system_factory.py: 95%

152 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1""" 

2Factory for creating search strategies. 

3This module provides a centralized way to create search strategies 

4to avoid code duplication. 

5""" 

6 

7from loguru import logger 

8from typing import Optional, Dict, Any, List 

9from langchain_core.language_models import BaseChatModel 

10 

11# Re-export from constants so existing importers don't break 

12from .constants import ( # noqa: F401 

13 ALL_STRATEGIES, 

14 AVAILABLE_STRATEGIES, 

15 get_available_strategies, 

16) 

17 

18 

19def _get_setting( 

20 settings_snapshot: Optional[Dict], key: str, default: Any 

21) -> Any: 

22 """Get a setting value from the snapshot, handling nested dict structure.""" 

23 if not settings_snapshot or key not in settings_snapshot: 

24 return default 

25 value = settings_snapshot[key] 

26 # Extract value from dict structure if needed 

27 if isinstance(value, dict) and "value" in value: 

28 return value["value"] 

29 return value 

30 

31 

32def create_strategy( 

33 strategy_name: str, 

34 model: BaseChatModel, 

35 search: Any, 

36 all_links_of_system: Optional[List[Dict]] = None, 

37 settings_snapshot: Optional[Dict] = None, 

38 research_context: Optional[Dict] = None, 

39 **kwargs, 

40): 

41 """ 

42 Create a search strategy by name. 

43 

44 Args: 

45 strategy_name: Name of the strategy to create 

46 model: Language model to use 

47 search: Search engine instance 

48 all_links_of_system: List of existing links 

49 settings_snapshot: Settings snapshot 

50 research_context: Research context for special strategies 

51 **kwargs: Additional strategy-specific parameters 

52 

53 Returns: 

54 Strategy instance 

55 """ 

56 if all_links_of_system is None: 

57 all_links_of_system = [] 

58 

59 strategy_name_lower = strategy_name.lower() 

60 

61 # Source-based strategy 

62 if strategy_name_lower in [ 

63 "source-based", 

64 "source_based", 

65 "source_based_search", 

66 ]: 

67 from .advanced_search_system.strategies.source_based_strategy import ( 

68 SourceBasedSearchStrategy, 

69 ) 

70 

71 return SourceBasedSearchStrategy( 

72 model=model, 

73 search=search, 

74 include_text_content=kwargs.get("include_text_content", True), 

75 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True), 

76 all_links_of_system=all_links_of_system, 

77 use_atomic_facts=kwargs.get("use_atomic_facts", False), 

78 settings_snapshot=settings_snapshot, 

79 search_original_query=kwargs.get("search_original_query", True), 

80 ) 

81 

82 # Focused iteration strategy 

83 if strategy_name_lower in ["focused-iteration", "focused_iteration"]: 

84 from .advanced_search_system.strategies.focused_iteration_strategy import ( 

85 FocusedIterationStrategy, 

86 ) 

87 

88 # Read focused_iteration settings with kwargs override 

89 # adaptive_questions is stored as 0/1 integer, convert to bool 

90 enable_adaptive = bool( 

91 kwargs.get( 

92 "enable_adaptive_questions", 

93 _get_setting( 

94 settings_snapshot, "focused_iteration.adaptive_questions", 0 

95 ), 

96 ) 

97 ) 

98 knowledge_limit = kwargs.get( 

99 "knowledge_summary_limit", 

100 _get_setting( 

101 settings_snapshot, 

102 "focused_iteration.knowledge_summary_limit", 

103 10, 

104 ), 

105 ) 

106 snippet_truncate = kwargs.get( 

107 "knowledge_snippet_truncate", 

108 _get_setting( 

109 settings_snapshot, "focused_iteration.snippet_truncate", 200 

110 ), 

111 ) 

112 question_gen_type = kwargs.get( 

113 "question_generator", 

114 _get_setting( 

115 settings_snapshot, 

116 "focused_iteration.question_generator", 

117 "browsecomp", 

118 ), 

119 ) 

120 prompt_knowledge_truncate = kwargs.get( 

121 "prompt_knowledge_truncate", 

122 _get_setting( 

123 settings_snapshot, 

124 "focused_iteration.prompt_knowledge_truncate", 

125 1500, 

126 ), 

127 ) 

128 previous_searches_limit = kwargs.get( 

129 "previous_searches_limit", 

130 _get_setting( 

131 settings_snapshot, 

132 "focused_iteration.previous_searches_limit", 

133 10, 

134 ), 

135 ) 

136 # Convert 0 to None for "unlimited" 

137 if knowledge_limit == 0: 

138 knowledge_limit = None 

139 if snippet_truncate == 0: 

140 snippet_truncate = None 

141 if prompt_knowledge_truncate == 0: 

142 prompt_knowledge_truncate = None 

143 if previous_searches_limit == 0: 

144 previous_searches_limit = None 

145 

146 strategy = FocusedIterationStrategy( 

147 model=model, 

148 search=search, 

149 all_links_of_system=all_links_of_system, 

150 max_iterations=kwargs.get("max_iterations", 8), 

151 questions_per_iteration=kwargs.get("questions_per_iteration", 5), 

152 settings_snapshot=settings_snapshot, 

153 # Options read from settings (with kwargs override) 

154 enable_adaptive_questions=enable_adaptive, 

155 enable_early_termination=kwargs.get( 

156 "enable_early_termination", False 

157 ), 

158 knowledge_summary_limit=knowledge_limit, 

159 knowledge_snippet_truncate=snippet_truncate, 

160 prompt_knowledge_truncate=prompt_knowledge_truncate, 

161 previous_searches_limit=previous_searches_limit, 

162 ) 

163 

164 # Override question generator if flexible is selected 

165 if question_gen_type == "flexible": 

166 from .advanced_search_system.questions.flexible_browsecomp_question import ( 

167 FlexibleBrowseCompQuestionGenerator, 

168 ) 

169 

170 # Pass truncation settings to flexible generator 

171 strategy.question_generator = FlexibleBrowseCompQuestionGenerator( 

172 model, 

173 knowledge_truncate_length=prompt_knowledge_truncate, 

174 previous_searches_limit=previous_searches_limit, 

175 ) 

176 

177 return strategy 

178 

179 # Focused iteration strategy with standard citation handler 

180 if strategy_name_lower in [ 

181 "focused-iteration-standard", 

182 "focused_iteration_standard", 

183 ]: 

184 from .advanced_search_system.strategies.focused_iteration_strategy import ( 

185 FocusedIterationStrategy, 

186 ) 

187 from .citation_handler import CitationHandler 

188 

189 # Use standard citation handler (same question generator as regular focused-iteration) 

190 standard_citation_handler = CitationHandler( 

191 model, handler_type="standard", settings_snapshot=settings_snapshot 

192 ) 

193 

194 # Read focused_iteration settings with kwargs override 

195 # adaptive_questions is stored as 0/1 integer, convert to bool 

196 enable_adaptive = bool( 

197 kwargs.get( 

198 "enable_adaptive_questions", 

199 _get_setting( 

200 settings_snapshot, "focused_iteration.adaptive_questions", 0 

201 ), 

202 ) 

203 ) 

204 knowledge_limit = kwargs.get( 

205 "knowledge_summary_limit", 

206 _get_setting( 

207 settings_snapshot, 

208 "focused_iteration.knowledge_summary_limit", 

209 10, 

210 ), 

211 ) 

212 snippet_truncate = kwargs.get( 

213 "knowledge_snippet_truncate", 

214 _get_setting( 

215 settings_snapshot, "focused_iteration.snippet_truncate", 200 

216 ), 

217 ) 

218 question_gen_type = kwargs.get( 

219 "question_generator", 

220 _get_setting( 

221 settings_snapshot, 

222 "focused_iteration.question_generator", 

223 "browsecomp", 

224 ), 

225 ) 

226 prompt_knowledge_truncate = kwargs.get( 

227 "prompt_knowledge_truncate", 

228 _get_setting( 

229 settings_snapshot, 

230 "focused_iteration.prompt_knowledge_truncate", 

231 1500, 

232 ), 

233 ) 

234 previous_searches_limit = kwargs.get( 

235 "previous_searches_limit", 

236 _get_setting( 

237 settings_snapshot, 

238 "focused_iteration.previous_searches_limit", 

239 10, 

240 ), 

241 ) 

242 # Convert 0 to None for "unlimited" 

243 if knowledge_limit == 0: 243 ↛ 244line 243 didn't jump to line 244 because the condition on line 243 was never true

244 knowledge_limit = None 

245 if snippet_truncate == 0: 245 ↛ 246line 245 didn't jump to line 246 because the condition on line 245 was never true

246 snippet_truncate = None 

247 if prompt_knowledge_truncate == 0: 247 ↛ 248line 247 didn't jump to line 248 because the condition on line 247 was never true

248 prompt_knowledge_truncate = None 

249 if previous_searches_limit == 0: 249 ↛ 250line 249 didn't jump to line 250 because the condition on line 249 was never true

250 previous_searches_limit = None 

251 

252 strategy = FocusedIterationStrategy( 

253 model=model, 

254 search=search, 

255 citation_handler=standard_citation_handler, 

256 all_links_of_system=all_links_of_system, 

257 max_iterations=kwargs.get("max_iterations", 8), 

258 questions_per_iteration=kwargs.get("questions_per_iteration", 5), 

259 use_browsecomp_optimization=True, # Keep BrowseComp features 

260 settings_snapshot=settings_snapshot, 

261 # Options read from settings (with kwargs override) 

262 enable_adaptive_questions=enable_adaptive, 

263 enable_early_termination=kwargs.get( 

264 "enable_early_termination", False 

265 ), 

266 knowledge_summary_limit=knowledge_limit, 

267 knowledge_snippet_truncate=snippet_truncate, 

268 prompt_knowledge_truncate=prompt_knowledge_truncate, 

269 previous_searches_limit=previous_searches_limit, 

270 ) 

271 

272 # Override question generator if flexible is selected 

273 if question_gen_type == "flexible": 273 ↛ 274line 273 didn't jump to line 274 because the condition on line 273 was never true

274 from .advanced_search_system.questions.flexible_browsecomp_question import ( 

275 FlexibleBrowseCompQuestionGenerator, 

276 ) 

277 

278 # Pass truncation settings to flexible generator 

279 strategy.question_generator = FlexibleBrowseCompQuestionGenerator( 

280 model, 

281 knowledge_truncate_length=prompt_knowledge_truncate, 

282 previous_searches_limit=previous_searches_limit, 

283 ) 

284 

285 return strategy 

286 

287 # Iterative reasoning strategy (depth variant) 

288 if strategy_name_lower in [ 

289 "iterative-reasoning", 

290 "iterative_reasoning", 

291 "iterative_reasoning_depth", 

292 ]: 

293 from .advanced_search_system.strategies.iterative_reasoning_strategy import ( 

294 IterativeReasoningStrategy, 

295 ) 

296 

297 return IterativeReasoningStrategy( 

298 model=model, 

299 search=search, 

300 all_links_of_system=all_links_of_system, 

301 ) 

302 

303 # News aggregation strategy 

304 if strategy_name_lower in [ 

305 "news", 

306 "news_aggregation", 

307 "news-aggregation", 

308 ]: 

309 from .advanced_search_system.strategies.news_strategy import ( 

310 NewsAggregationStrategy, 

311 ) 

312 

313 return NewsAggregationStrategy( 

314 model=model, 

315 search=search, 

316 all_links_of_system=all_links_of_system, 

317 ) 

318 

319 # IterDRAG strategy 

320 if strategy_name_lower == "iterdrag": 

321 from .advanced_search_system.strategies.iterdrag_strategy import ( 

322 IterDRAGStrategy, 

323 ) 

324 

325 return IterDRAGStrategy( 

326 model=model, 

327 search=search, 

328 all_links_of_system=all_links_of_system, 

329 settings_snapshot=settings_snapshot, 

330 ) 

331 

332 # Parallel strategy 

333 if strategy_name_lower == "parallel": 

334 from .advanced_search_system.strategies.parallel_search_strategy import ( 

335 ParallelSearchStrategy, 

336 ) 

337 

338 return ParallelSearchStrategy( 

339 model=model, 

340 search=search, 

341 include_text_content=kwargs.get("include_text_content", True), 

342 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True), 

343 all_links_of_system=all_links_of_system, 

344 settings_snapshot=settings_snapshot, 

345 ) 

346 

347 # Rapid strategy 

348 if strategy_name_lower == "rapid": 

349 from .advanced_search_system.strategies.rapid_search_strategy import ( 

350 RapidSearchStrategy, 

351 ) 

352 

353 return RapidSearchStrategy( 

354 model=model, 

355 search=search, 

356 all_links_of_system=all_links_of_system, 

357 settings_snapshot=settings_snapshot, 

358 ) 

359 

360 # Recursive decomposition strategy 

361 if strategy_name_lower in ["recursive", "recursive-decomposition"]: 

362 from .advanced_search_system.strategies.recursive_decomposition_strategy import ( 

363 RecursiveDecompositionStrategy, 

364 ) 

365 

366 return RecursiveDecompositionStrategy( 

367 model=model, 

368 search=search, 

369 all_links_of_system=all_links_of_system, 

370 settings_snapshot=settings_snapshot, 

371 ) 

372 

373 # Iterative reasoning strategy (different from iterative_reasoning_depth) 

374 if strategy_name_lower == "iterative": 

375 from .advanced_search_system.strategies.iterative_reasoning_strategy import ( 

376 IterativeReasoningStrategy, 

377 ) 

378 

379 # Get iteration settings from kwargs or use defaults 

380 max_iterations = kwargs.get("max_iterations", 20) 

381 questions_per_iteration = kwargs.get("questions_per_iteration", 3) 

382 search_iterations_per_round = kwargs.get( 

383 "search_iterations_per_round", 1 

384 ) 

385 

386 return IterativeReasoningStrategy( 

387 model=model, 

388 search=search, 

389 all_links_of_system=all_links_of_system, 

390 max_iterations=max_iterations, 

391 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

392 search_iterations_per_round=search_iterations_per_round, 

393 questions_per_search=questions_per_iteration, 

394 settings_snapshot=settings_snapshot, 

395 ) 

396 

397 # Adaptive decomposition strategy 

398 if strategy_name_lower == "adaptive": 

399 from .advanced_search_system.strategies.adaptive_decomposition_strategy import ( 

400 AdaptiveDecompositionStrategy, 

401 ) 

402 

403 return AdaptiveDecompositionStrategy( 

404 model=model, 

405 search=search, 

406 all_links_of_system=all_links_of_system, 

407 max_steps=kwargs.get("max_steps", kwargs.get("max_iterations", 5)), 

408 min_confidence=kwargs.get("min_confidence", 0.8), 

409 source_search_iterations=kwargs.get("source_search_iterations", 2), 

410 source_questions_per_iteration=kwargs.get( 

411 "source_questions_per_iteration", 

412 kwargs.get("questions_per_iteration", 3), 

413 ), 

414 settings_snapshot=settings_snapshot, 

415 ) 

416 

417 # Smart decomposition strategy 

418 if strategy_name_lower == "smart": 

419 from .advanced_search_system.strategies.smart_decomposition_strategy import ( 

420 SmartDecompositionStrategy, 

421 ) 

422 

423 return SmartDecompositionStrategy( 

424 model=model, 

425 search=search, 

426 all_links_of_system=all_links_of_system, 

427 max_iterations=kwargs.get("max_iterations", 5), 

428 source_search_iterations=kwargs.get("source_search_iterations", 2), 

429 source_questions_per_iteration=kwargs.get( 

430 "source_questions_per_iteration", 

431 kwargs.get("questions_per_iteration", 3), 

432 ), 

433 settings_snapshot=settings_snapshot, 

434 ) 

435 

436 # BrowseComp optimized strategy 

437 if strategy_name_lower == "browsecomp": 

438 from .advanced_search_system.strategies.browsecomp_optimized_strategy import ( 

439 BrowseCompOptimizedStrategy, 

440 ) 

441 

442 return BrowseCompOptimizedStrategy( 

443 model=model, 

444 search=search, 

445 all_links_of_system=all_links_of_system, 

446 max_browsecomp_iterations=kwargs.get( 

447 "max_browsecomp_iterations", 15 

448 ), 

449 confidence_threshold=kwargs.get("confidence_threshold", 0.9), 

450 max_iterations=kwargs.get("max_iterations", 5), 

451 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

452 settings_snapshot=settings_snapshot, 

453 ) 

454 

455 # Enhanced evidence-based strategy 

456 if strategy_name_lower == "evidence": 

457 from .advanced_search_system.strategies.evidence_based_strategy_v2 import ( 

458 EnhancedEvidenceBasedStrategy, 

459 ) 

460 

461 return EnhancedEvidenceBasedStrategy( 

462 model=model, 

463 search=search, 

464 all_links_of_system=all_links_of_system, 

465 max_iterations=kwargs.get("max_iterations", 20), 

466 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

467 candidate_limit=kwargs.get("candidate_limit", 20), 

468 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

469 max_search_iterations=kwargs.get("max_search_iterations", 5), 

470 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

471 min_candidates_threshold=kwargs.get("min_candidates_threshold", 10), 

472 enable_pattern_learning=kwargs.get("enable_pattern_learning", True), 

473 settings_snapshot=settings_snapshot, 

474 ) 

475 

476 # Constrained search strategy 

477 if strategy_name_lower == "constrained": 

478 from .advanced_search_system.strategies.constrained_search_strategy import ( 

479 ConstrainedSearchStrategy, 

480 ) 

481 

482 return ConstrainedSearchStrategy( 

483 model=model, 

484 search=search, 

485 all_links_of_system=all_links_of_system, 

486 max_iterations=kwargs.get("max_iterations", 20), 

487 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

488 candidate_limit=kwargs.get("candidate_limit", 100), 

489 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

490 max_search_iterations=kwargs.get("max_search_iterations", 5), 

491 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

492 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

493 settings_snapshot=settings_snapshot, 

494 ) 

495 

496 # Parallel constrained strategy 

497 if strategy_name_lower in [ 

498 "parallel-constrained", 

499 "parallel_constrained", 

500 ]: 

501 from .advanced_search_system.strategies.parallel_constrained_strategy import ( 

502 ParallelConstrainedStrategy, 

503 ) 

504 

505 return ParallelConstrainedStrategy( 

506 model=model, 

507 search=search, 

508 all_links_of_system=all_links_of_system, 

509 max_iterations=kwargs.get("max_iterations", 20), 

510 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

511 candidate_limit=kwargs.get("candidate_limit", 100), 

512 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

513 max_search_iterations=kwargs.get("max_search_iterations", 5), 

514 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

515 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

516 parallel_workers=kwargs.get("parallel_workers", 100), 

517 settings_snapshot=settings_snapshot, 

518 ) 

519 

520 # Early stop constrained strategy 

521 if strategy_name_lower in [ 

522 "early-stop-constrained", 

523 "early_stop_constrained", 

524 ]: 

525 from .advanced_search_system.strategies.early_stop_constrained_strategy import ( 

526 EarlyStopConstrainedStrategy, 

527 ) 

528 

529 return EarlyStopConstrainedStrategy( 

530 model=model, 

531 search=search, 

532 all_links_of_system=all_links_of_system, 

533 max_iterations=kwargs.get("max_iterations", 20), 

534 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

535 candidate_limit=kwargs.get("candidate_limit", 100), 

536 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

537 max_search_iterations=kwargs.get("max_search_iterations", 5), 

538 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

539 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

540 parallel_workers=kwargs.get("parallel_workers", 100), 

541 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99), 

542 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

543 settings_snapshot=settings_snapshot, 

544 ) 

545 

546 # Smart query strategy 

547 if strategy_name_lower in ["smart-query", "smart_query"]: 

548 from .advanced_search_system.strategies.smart_query_strategy import ( 

549 SmartQueryStrategy, 

550 ) 

551 

552 return SmartQueryStrategy( 

553 model=model, 

554 search=search, 

555 all_links_of_system=all_links_of_system, 

556 max_iterations=kwargs.get("max_iterations", 20), 

557 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

558 candidate_limit=kwargs.get("candidate_limit", 100), 

559 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

560 max_search_iterations=kwargs.get("max_search_iterations", 5), 

561 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

562 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

563 parallel_workers=kwargs.get("parallel_workers", 100), 

564 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99), 

565 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

566 use_llm_query_generation=kwargs.get( 

567 "use_llm_query_generation", True 

568 ), 

569 queries_per_combination=kwargs.get("queries_per_combination", 3), 

570 settings_snapshot=settings_snapshot, 

571 ) 

572 

573 # Dual confidence strategy 

574 if strategy_name_lower in ["dual-confidence", "dual_confidence"]: 

575 from .advanced_search_system.strategies.dual_confidence_strategy import ( 

576 DualConfidenceStrategy, 

577 ) 

578 

579 return DualConfidenceStrategy( 

580 model=model, 

581 search=search, 

582 all_links_of_system=all_links_of_system, 

583 max_iterations=kwargs.get("max_iterations", 20), 

584 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

585 candidate_limit=kwargs.get("candidate_limit", 100), 

586 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

587 max_search_iterations=kwargs.get("max_search_iterations", 5), 

588 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

589 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

590 parallel_workers=kwargs.get("parallel_workers", 100), 

591 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95), 

592 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

593 use_llm_query_generation=kwargs.get( 

594 "use_llm_query_generation", True 

595 ), 

596 queries_per_combination=kwargs.get("queries_per_combination", 3), 

597 use_entity_seeding=kwargs.get("use_entity_seeding", True), 

598 use_direct_property_search=kwargs.get( 

599 "use_direct_property_search", True 

600 ), 

601 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2), 

602 negative_weight=kwargs.get("negative_weight", 0.5), 

603 settings_snapshot=settings_snapshot, 

604 ) 

605 

606 # Dual confidence with rejection strategy 

607 if strategy_name_lower in [ 

608 "dual-confidence-with-rejection", 

609 "dual_confidence_with_rejection", 

610 ]: 

611 from .advanced_search_system.strategies.dual_confidence_with_rejection import ( 

612 DualConfidenceWithRejectionStrategy, 

613 ) 

614 

615 return DualConfidenceWithRejectionStrategy( 

616 model=model, 

617 search=search, 

618 all_links_of_system=all_links_of_system, 

619 max_iterations=kwargs.get("max_iterations", 20), 

620 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

621 candidate_limit=kwargs.get("candidate_limit", 100), 

622 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

623 max_search_iterations=kwargs.get("max_search_iterations", 5), 

624 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

625 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

626 parallel_workers=kwargs.get("parallel_workers", 100), 

627 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95), 

628 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

629 use_llm_query_generation=kwargs.get( 

630 "use_llm_query_generation", True 

631 ), 

632 queries_per_combination=kwargs.get("queries_per_combination", 3), 

633 use_entity_seeding=kwargs.get("use_entity_seeding", True), 

634 use_direct_property_search=kwargs.get( 

635 "use_direct_property_search", True 

636 ), 

637 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2), 

638 negative_weight=kwargs.get("negative_weight", 0.5), 

639 rejection_threshold=kwargs.get("rejection_threshold", 0.3), 

640 positive_threshold=kwargs.get("positive_threshold", 0.2), 

641 critical_constraint_rejection=kwargs.get( 

642 "critical_constraint_rejection", 0.2 

643 ), 

644 settings_snapshot=settings_snapshot, 

645 ) 

646 

647 # Concurrent dual confidence strategy 

648 if strategy_name_lower in [ 

649 "concurrent-dual-confidence", 

650 "concurrent_dual_confidence", 

651 ]: 

652 from .advanced_search_system.strategies.concurrent_dual_confidence_strategy import ( 

653 ConcurrentDualConfidenceStrategy, 

654 ) 

655 

656 return ConcurrentDualConfidenceStrategy( 

657 model=model, 

658 search=search, 

659 all_links_of_system=all_links_of_system, 

660 max_iterations=kwargs.get("max_iterations", 20), 

661 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

662 candidate_limit=kwargs.get("candidate_limit", 100), 

663 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

664 max_search_iterations=kwargs.get("max_search_iterations", 5), 

665 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

666 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

667 parallel_workers=kwargs.get("parallel_workers", 10), 

668 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95), 

669 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

670 use_llm_query_generation=kwargs.get( 

671 "use_llm_query_generation", True 

672 ), 

673 queries_per_combination=kwargs.get("queries_per_combination", 3), 

674 use_entity_seeding=kwargs.get("use_entity_seeding", True), 

675 use_direct_property_search=kwargs.get( 

676 "use_direct_property_search", True 

677 ), 

678 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2), 

679 negative_weight=kwargs.get("negative_weight", 0.5), 

680 rejection_threshold=kwargs.get("rejection_threshold", 0.3), 

681 positive_threshold=kwargs.get("positive_threshold", 0.2), 

682 critical_constraint_rejection=kwargs.get( 

683 "critical_constraint_rejection", 0.2 

684 ), 

685 min_good_candidates=kwargs.get("min_good_candidates", 3), 

686 target_candidates=kwargs.get("target_candidates", 5), 

687 max_candidates=kwargs.get("max_candidates", 10), 

688 min_score_threshold=kwargs.get("min_score_threshold", 0.65), 

689 exceptional_score=kwargs.get("exceptional_score", 0.95), 

690 quality_plateau_threshold=kwargs.get( 

691 "quality_plateau_threshold", 0.1 

692 ), 

693 max_search_time=kwargs.get("max_search_time", 30.0), 

694 max_evaluations=kwargs.get("max_evaluations", 30), 

695 settings_snapshot=settings_snapshot, 

696 ) 

697 

698 # Constraint parallel strategy 

699 if strategy_name_lower in [ 

700 "constraint-parallel", 

701 "constraint_parallel", 

702 ]: 

703 from .advanced_search_system.strategies.constraint_parallel_strategy import ( 

704 ConstraintParallelStrategy, 

705 ) 

706 

707 return ConstraintParallelStrategy( 

708 model=model, 

709 search=search, 

710 all_links_of_system=all_links_of_system, 

711 max_iterations=kwargs.get("max_iterations", 20), 

712 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

713 candidate_limit=kwargs.get("candidate_limit", 100), 

714 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

715 max_search_iterations=kwargs.get("max_search_iterations", 5), 

716 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

717 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

718 parallel_workers=kwargs.get("parallel_workers", 100), 

719 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95), 

720 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

721 use_llm_query_generation=kwargs.get( 

722 "use_llm_query_generation", True 

723 ), 

724 queries_per_combination=kwargs.get("queries_per_combination", 3), 

725 use_entity_seeding=kwargs.get("use_entity_seeding", True), 

726 use_direct_property_search=kwargs.get( 

727 "use_direct_property_search", True 

728 ), 

729 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2), 

730 negative_weight=kwargs.get("negative_weight", 0.5), 

731 rejection_threshold=kwargs.get("rejection_threshold", 0.3), 

732 positive_threshold=kwargs.get("positive_threshold", 0.2), 

733 critical_constraint_rejection=kwargs.get( 

734 "critical_constraint_rejection", 0.2 

735 ), 

736 settings_snapshot=settings_snapshot, 

737 ) 

738 

739 # Modular strategy 

740 if strategy_name_lower in ["modular", "modular-strategy"]: 

741 from .advanced_search_system.strategies.modular_strategy import ( 

742 ModularStrategy, 

743 ) 

744 

745 return ModularStrategy( 

746 model=model, 

747 search=search, 

748 all_links_of_system=all_links_of_system, 

749 constraint_checker_type=kwargs.get( 

750 "constraint_checker_type", "dual_confidence" 

751 ), 

752 exploration_strategy=kwargs.get("exploration_strategy", "adaptive"), 

753 early_rejection=kwargs.get("early_rejection", True), 

754 early_stopping=kwargs.get("early_stopping", True), 

755 llm_constraint_processing=kwargs.get( 

756 "llm_constraint_processing", True 

757 ), 

758 immediate_evaluation=kwargs.get("immediate_evaluation", True), 

759 settings_snapshot=settings_snapshot, 

760 ) 

761 

762 # Modular parallel strategy 

763 if strategy_name_lower in ["modular-parallel", "modular_parallel"]: 

764 from .advanced_search_system.strategies.modular_strategy import ( 

765 ModularStrategy, 

766 ) 

767 

768 return ModularStrategy( 

769 model=model, 

770 search=search, 

771 all_links_of_system=all_links_of_system, 

772 constraint_checker_type="dual_confidence", 

773 exploration_strategy="parallel", 

774 settings_snapshot=settings_snapshot, 

775 ) 

776 

777 # BrowseComp entity strategy 

778 if strategy_name_lower in ["browsecomp-entity", "browsecomp_entity"]: 

779 from .advanced_search_system.strategies.browsecomp_entity_strategy import ( 

780 BrowseCompEntityStrategy, 

781 ) 

782 

783 return BrowseCompEntityStrategy( 

784 model=model, 

785 search=search, 

786 all_links_of_system=all_links_of_system, 

787 ) 

788 

789 # Topic organization strategy 

790 if strategy_name_lower in [ 

791 "topic-organization", 

792 "topic_organization", 

793 "topic", 

794 ]: 

795 from .advanced_search_system.strategies.topic_organization_strategy import ( 

796 TopicOrganizationStrategy, 

797 ) 

798 

799 return TopicOrganizationStrategy( 

800 model=model, 

801 search=search, 

802 all_links_of_system=all_links_of_system, 

803 settings_snapshot=settings_snapshot, 

804 min_sources_per_topic=1, # Allow single-source topics 

805 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True), 

806 filter_reorder=kwargs.get("filter_reorder", True), 

807 filter_reindex=kwargs.get("filter_reindex", True), 

808 cross_engine_max_results=kwargs.get( # type: ignore[arg-type] 

809 "cross_engine_max_results", None 

810 ), 

811 search_original_query=kwargs.get("search_original_query", True), 

812 max_topics=kwargs.get("max_topics", 5), 

813 similarity_threshold=kwargs.get("similarity_threshold", 0.3), 

814 use_focused_iteration=kwargs.get("use_focused_iteration", False), 

815 enable_refinement=kwargs.get( 

816 "enable_refinement", False 

817 ), # Disable refinement iterations for now 

818 max_refinement_iterations=kwargs.get( 

819 "max_refinement_iterations", 

820 1, # Set to 1 iteration for faster results 

821 ), 

822 generate_text=kwargs.get("generate_text", True), 

823 ) 

824 

825 # Iterative refinement strategy 

826 if strategy_name_lower in [ 

827 "iterative-refinement", 

828 "iterative_refinement", 

829 ]: 

830 from .advanced_search_system.strategies.iterative_refinement_strategy import ( 

831 IterativeRefinementStrategy, 

832 ) 

833 

834 # Get the initial strategy to use (default to source-based) 

835 initial_strategy_name = kwargs.get("initial_strategy", "source-based") 

836 

837 # Create the initial strategy 

838 initial_strategy = create_strategy( 

839 strategy_name=initial_strategy_name, 

840 model=model, 

841 search=search, 

842 all_links_of_system=[], # Fresh list for initial strategy 

843 settings_snapshot=settings_snapshot, 

844 search_original_query=kwargs.get("search_original_query", True), 

845 ) 

846 

847 return IterativeRefinementStrategy( 

848 model=model, 

849 search=search, 

850 initial_strategy=initial_strategy, 

851 all_links_of_system=all_links_of_system, 

852 settings_snapshot=settings_snapshot, 

853 evaluation_frequency=kwargs.get("evaluation_frequency", 1), 

854 max_refinements=kwargs.get("max_refinements", 3), 

855 confidence_threshold=kwargs.get( 

856 "confidence_threshold", 0.95 

857 ), # Increased from 0.8 

858 ) 

859 

860 # Standard strategy 

861 if strategy_name_lower == "standard": 

862 from .advanced_search_system.strategies.standard_strategy import ( 

863 StandardSearchStrategy, 

864 ) 

865 

866 return StandardSearchStrategy( 

867 model=model, 

868 search=search, 

869 all_links_of_system=all_links_of_system, 

870 settings_snapshot=settings_snapshot, 

871 ) 

872 

873 # MCP strategy (ReAct pattern - agentic research) 

874 if strategy_name_lower in ["mcp", "agentic"]: 

875 from .advanced_search_system.strategies.mcp_strategy import ( 

876 MCPSearchStrategy, 

877 ) 

878 

879 # Get MCP server configurations from settings 

880 mcp_servers = kwargs.get( 

881 "mcp_servers", 

882 _get_setting(settings_snapshot, "mcp.servers", []), 

883 ) 

884 

885 return MCPSearchStrategy( 

886 model=model, 

887 search=search, 

888 mcp_servers=mcp_servers, 

889 max_iterations=kwargs.get("max_iterations", 10), 

890 include_web_search=kwargs.get("include_web_search", True), 

891 all_links_of_system=all_links_of_system, 

892 settings_snapshot=settings_snapshot, 

893 ) 

894 

895 # LangGraph agent strategy (parallel subagent research) 

896 if strategy_name_lower in ["langgraph-agent", "langgraph_agent"]: 

897 from .advanced_search_system.strategies.langgraph_agent_strategy import ( 

898 LangGraphAgentStrategy, 

899 ) 

900 

901 return LangGraphAgentStrategy( 

902 model=model, 

903 search=search, 

904 max_iterations=kwargs.get( 

905 "max_iterations", 

906 _get_setting( 

907 settings_snapshot, "langgraph_agent.max_iterations", 50 

908 ), 

909 ), 

910 max_sub_iterations=kwargs.get( 

911 "max_sub_iterations", 

912 _get_setting( 

913 settings_snapshot, "langgraph_agent.max_sub_iterations", 8 

914 ), 

915 ), 

916 include_sub_research=kwargs.get( 

917 "include_sub_research", 

918 _get_setting( 

919 settings_snapshot, 

920 "langgraph_agent.include_sub_research", 

921 True, 

922 ), 

923 ), 

924 all_links_of_system=all_links_of_system, 

925 settings_snapshot=settings_snapshot, 

926 ) 

927 

928 # Default to source-based if unknown 

929 logger.warning( 

930 f"Unknown strategy: {strategy_name}, defaulting to source-based" 

931 ) 

932 from .advanced_search_system.strategies.source_based_strategy import ( 

933 SourceBasedSearchStrategy, 

934 ) 

935 

936 return SourceBasedSearchStrategy( 

937 model=model, 

938 search=search, 

939 include_text_content=True, 

940 use_cross_engine_filter=True, 

941 all_links_of_system=all_links_of_system, 

942 use_atomic_facts=False, 

943 settings_snapshot=settings_snapshot, 

944 search_original_query=kwargs.get("search_original_query", True), 

945 )