Coverage for src / local_deep_research / search_system_factory.py: 92%

144 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-25 01:07 +0000

1""" 

2Factory for creating search strategies. 

3This module provides a centralized way to create search strategies 

4to avoid code duplication. 

5""" 

6 

7from loguru import logger 

8from typing import Optional, Dict, Any, List 

9from langchain_core.language_models import BaseChatModel 

10 

11 

12def _get_setting( 

13 settings_snapshot: Optional[Dict], key: str, default: Any 

14) -> Any: 

15 """Get a setting value from the snapshot, handling nested dict structure.""" 

16 if not settings_snapshot or key not in settings_snapshot: 

17 return default 

18 value = settings_snapshot[key] 

19 # Extract value from dict structure if needed 

20 if isinstance(value, dict) and "value" in value: 

21 return value["value"] 

22 return value 

23 

24 

25def create_strategy( 

26 strategy_name: str, 

27 model: BaseChatModel, 

28 search: Any, 

29 all_links_of_system: Optional[List[Dict]] = None, 

30 settings_snapshot: Optional[Dict] = None, 

31 research_context: Optional[Dict] = None, 

32 **kwargs, 

33): 

34 """ 

35 Create a search strategy by name. 

36 

37 Args: 

38 strategy_name: Name of the strategy to create 

39 model: Language model to use 

40 search: Search engine instance 

41 all_links_of_system: List of existing links 

42 settings_snapshot: Settings snapshot 

43 research_context: Research context for special strategies 

44 **kwargs: Additional strategy-specific parameters 

45 

46 Returns: 

47 Strategy instance 

48 """ 

49 if all_links_of_system is None: 

50 all_links_of_system = [] 

51 

52 strategy_name_lower = strategy_name.lower() 

53 

54 # Source-based strategy 

55 if strategy_name_lower in [ 

56 "source-based", 

57 "source_based", 

58 "source_based_search", 

59 ]: 

60 from .advanced_search_system.strategies.source_based_strategy import ( 

61 SourceBasedSearchStrategy, 

62 ) 

63 

64 return SourceBasedSearchStrategy( 

65 model=model, 

66 search=search, 

67 include_text_content=kwargs.get("include_text_content", True), 

68 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True), 

69 all_links_of_system=all_links_of_system, 

70 use_atomic_facts=kwargs.get("use_atomic_facts", False), 

71 settings_snapshot=settings_snapshot, 

72 search_original_query=kwargs.get("search_original_query", True), 

73 ) 

74 

75 # Focused iteration strategy 

76 elif strategy_name_lower in ["focused-iteration", "focused_iteration"]: 

77 from .advanced_search_system.strategies.focused_iteration_strategy import ( 

78 FocusedIterationStrategy, 

79 ) 

80 

81 # Read focused_iteration settings with kwargs override 

82 # adaptive_questions is stored as 0/1 integer, convert to bool 

83 enable_adaptive = bool( 

84 kwargs.get( 

85 "enable_adaptive_questions", 

86 _get_setting( 

87 settings_snapshot, "focused_iteration.adaptive_questions", 0 

88 ), 

89 ) 

90 ) 

91 knowledge_limit = kwargs.get( 

92 "knowledge_summary_limit", 

93 _get_setting( 

94 settings_snapshot, 

95 "focused_iteration.knowledge_summary_limit", 

96 10, 

97 ), 

98 ) 

99 snippet_truncate = kwargs.get( 

100 "knowledge_snippet_truncate", 

101 _get_setting( 

102 settings_snapshot, "focused_iteration.snippet_truncate", 200 

103 ), 

104 ) 

105 question_gen_type = kwargs.get( 

106 "question_generator", 

107 _get_setting( 

108 settings_snapshot, 

109 "focused_iteration.question_generator", 

110 "browsecomp", 

111 ), 

112 ) 

113 prompt_knowledge_truncate = kwargs.get( 

114 "prompt_knowledge_truncate", 

115 _get_setting( 

116 settings_snapshot, 

117 "focused_iteration.prompt_knowledge_truncate", 

118 1500, 

119 ), 

120 ) 

121 previous_searches_limit = kwargs.get( 

122 "previous_searches_limit", 

123 _get_setting( 

124 settings_snapshot, 

125 "focused_iteration.previous_searches_limit", 

126 10, 

127 ), 

128 ) 

129 # Convert 0 to None for "unlimited" 

130 if knowledge_limit == 0: 

131 knowledge_limit = None 

132 if snippet_truncate == 0: 

133 snippet_truncate = None 

134 if prompt_knowledge_truncate == 0: 134 ↛ 135line 134 didn't jump to line 135 because the condition on line 134 was never true

135 prompt_knowledge_truncate = None 

136 if previous_searches_limit == 0: 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true

137 previous_searches_limit = None 

138 

139 strategy = FocusedIterationStrategy( 

140 model=model, 

141 search=search, 

142 all_links_of_system=all_links_of_system, 

143 max_iterations=kwargs.get("max_iterations", 8), 

144 questions_per_iteration=kwargs.get("questions_per_iteration", 5), 

145 settings_snapshot=settings_snapshot, 

146 # Options read from settings (with kwargs override) 

147 enable_adaptive_questions=enable_adaptive, 

148 enable_early_termination=kwargs.get( 

149 "enable_early_termination", False 

150 ), 

151 knowledge_summary_limit=knowledge_limit, 

152 knowledge_snippet_truncate=snippet_truncate, 

153 prompt_knowledge_truncate=prompt_knowledge_truncate, 

154 previous_searches_limit=previous_searches_limit, 

155 ) 

156 

157 # Override question generator if flexible is selected 

158 if question_gen_type == "flexible": 158 ↛ 159line 158 didn't jump to line 159 because the condition on line 158 was never true

159 from .advanced_search_system.questions.flexible_browsecomp_question import ( 

160 FlexibleBrowseCompQuestionGenerator, 

161 ) 

162 

163 # Pass truncation settings to flexible generator 

164 strategy.question_generator = FlexibleBrowseCompQuestionGenerator( 

165 model, 

166 knowledge_truncate_length=prompt_knowledge_truncate, 

167 previous_searches_limit=previous_searches_limit, 

168 ) 

169 

170 return strategy 

171 

172 # Focused iteration strategy with standard citation handler 

173 elif strategy_name_lower in [ 

174 "focused-iteration-standard", 

175 "focused_iteration_standard", 

176 ]: 

177 from .advanced_search_system.strategies.focused_iteration_strategy import ( 

178 FocusedIterationStrategy, 

179 ) 

180 from .citation_handler import CitationHandler 

181 

182 # Use standard citation handler (same question generator as regular focused-iteration) 

183 standard_citation_handler = CitationHandler( 

184 model, handler_type="standard", settings_snapshot=settings_snapshot 

185 ) 

186 

187 # Read focused_iteration settings with kwargs override 

188 # adaptive_questions is stored as 0/1 integer, convert to bool 

189 enable_adaptive = bool( 

190 kwargs.get( 

191 "enable_adaptive_questions", 

192 _get_setting( 

193 settings_snapshot, "focused_iteration.adaptive_questions", 0 

194 ), 

195 ) 

196 ) 

197 knowledge_limit = kwargs.get( 

198 "knowledge_summary_limit", 

199 _get_setting( 

200 settings_snapshot, 

201 "focused_iteration.knowledge_summary_limit", 

202 10, 

203 ), 

204 ) 

205 snippet_truncate = kwargs.get( 

206 "knowledge_snippet_truncate", 

207 _get_setting( 

208 settings_snapshot, "focused_iteration.snippet_truncate", 200 

209 ), 

210 ) 

211 question_gen_type = kwargs.get( 

212 "question_generator", 

213 _get_setting( 

214 settings_snapshot, 

215 "focused_iteration.question_generator", 

216 "browsecomp", 

217 ), 

218 ) 

219 prompt_knowledge_truncate = kwargs.get( 

220 "prompt_knowledge_truncate", 

221 _get_setting( 

222 settings_snapshot, 

223 "focused_iteration.prompt_knowledge_truncate", 

224 1500, 

225 ), 

226 ) 

227 previous_searches_limit = kwargs.get( 

228 "previous_searches_limit", 

229 _get_setting( 

230 settings_snapshot, 

231 "focused_iteration.previous_searches_limit", 

232 10, 

233 ), 

234 ) 

235 # Convert 0 to None for "unlimited" 

236 if knowledge_limit == 0: 236 ↛ 237line 236 didn't jump to line 237 because the condition on line 236 was never true

237 knowledge_limit = None 

238 if snippet_truncate == 0: 238 ↛ 239line 238 didn't jump to line 239 because the condition on line 238 was never true

239 snippet_truncate = None 

240 if prompt_knowledge_truncate == 0: 240 ↛ 241line 240 didn't jump to line 241 because the condition on line 240 was never true

241 prompt_knowledge_truncate = None 

242 if previous_searches_limit == 0: 242 ↛ 243line 242 didn't jump to line 243 because the condition on line 242 was never true

243 previous_searches_limit = None 

244 

245 strategy = FocusedIterationStrategy( 

246 model=model, 

247 search=search, 

248 citation_handler=standard_citation_handler, 

249 all_links_of_system=all_links_of_system, 

250 max_iterations=kwargs.get("max_iterations", 8), 

251 questions_per_iteration=kwargs.get("questions_per_iteration", 5), 

252 use_browsecomp_optimization=True, # Keep BrowseComp features 

253 settings_snapshot=settings_snapshot, 

254 # Options read from settings (with kwargs override) 

255 enable_adaptive_questions=enable_adaptive, 

256 enable_early_termination=kwargs.get( 

257 "enable_early_termination", False 

258 ), 

259 knowledge_summary_limit=knowledge_limit, 

260 knowledge_snippet_truncate=snippet_truncate, 

261 prompt_knowledge_truncate=prompt_knowledge_truncate, 

262 previous_searches_limit=previous_searches_limit, 

263 ) 

264 

265 # Override question generator if flexible is selected 

266 if question_gen_type == "flexible": 266 ↛ 267line 266 didn't jump to line 267 because the condition on line 266 was never true

267 from .advanced_search_system.questions.flexible_browsecomp_question import ( 

268 FlexibleBrowseCompQuestionGenerator, 

269 ) 

270 

271 # Pass truncation settings to flexible generator 

272 strategy.question_generator = FlexibleBrowseCompQuestionGenerator( 

273 model, 

274 knowledge_truncate_length=prompt_knowledge_truncate, 

275 previous_searches_limit=previous_searches_limit, 

276 ) 

277 

278 return strategy 

279 

280 # Iterative reasoning strategy (depth variant) 

281 elif strategy_name_lower in [ 

282 "iterative-reasoning", 

283 "iterative_reasoning", 

284 "iterative_reasoning_depth", 

285 ]: 

286 from .advanced_search_system.strategies.iterative_reasoning_strategy import ( 

287 IterativeReasoningStrategy, 

288 ) 

289 

290 return IterativeReasoningStrategy( 

291 model=model, 

292 search=search, 

293 all_links_of_system=all_links_of_system, 

294 ) 

295 

296 # News aggregation strategy 

297 elif strategy_name_lower in [ 

298 "news", 

299 "news_aggregation", 

300 "news-aggregation", 

301 ]: 

302 from .advanced_search_system.strategies.news_strategy import ( 

303 NewsAggregationStrategy, 

304 ) 

305 

306 return NewsAggregationStrategy( 

307 model=model, 

308 search=search, 

309 all_links_of_system=all_links_of_system, 

310 ) 

311 

312 # IterDRAG strategy 

313 elif strategy_name_lower == "iterdrag": 

314 from .advanced_search_system.strategies.iterdrag_strategy import ( 

315 IterDRAGStrategy, 

316 ) 

317 

318 return IterDRAGStrategy( 

319 model=model, 

320 search=search, 

321 all_links_of_system=all_links_of_system, 

322 settings_snapshot=settings_snapshot, 

323 ) 

324 

325 # Parallel strategy 

326 elif strategy_name_lower == "parallel": 

327 from .advanced_search_system.strategies.parallel_search_strategy import ( 

328 ParallelSearchStrategy, 

329 ) 

330 

331 return ParallelSearchStrategy( 

332 model=model, 

333 search=search, 

334 include_text_content=kwargs.get("include_text_content", True), 

335 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True), 

336 all_links_of_system=all_links_of_system, 

337 settings_snapshot=settings_snapshot, 

338 ) 

339 

340 # Rapid strategy 

341 elif strategy_name_lower == "rapid": 

342 from .advanced_search_system.strategies.rapid_search_strategy import ( 

343 RapidSearchStrategy, 

344 ) 

345 

346 return RapidSearchStrategy( 

347 model=model, 

348 search=search, 

349 all_links_of_system=all_links_of_system, 

350 settings_snapshot=settings_snapshot, 

351 ) 

352 

353 # Recursive decomposition strategy 

354 elif strategy_name_lower in ["recursive", "recursive-decomposition"]: 

355 from .advanced_search_system.strategies.recursive_decomposition_strategy import ( 

356 RecursiveDecompositionStrategy, 

357 ) 

358 

359 return RecursiveDecompositionStrategy( 

360 model=model, 

361 search=search, 

362 all_links_of_system=all_links_of_system, 

363 settings_snapshot=settings_snapshot, 

364 ) 

365 

366 # Iterative reasoning strategy (different from iterative_reasoning_depth) 

367 elif strategy_name_lower == "iterative": 

368 from .advanced_search_system.strategies.iterative_reasoning_strategy import ( 

369 IterativeReasoningStrategy, 

370 ) 

371 

372 # Get iteration settings from kwargs or use defaults 

373 max_iterations = kwargs.get("max_iterations", 20) 

374 questions_per_iteration = kwargs.get("questions_per_iteration", 3) 

375 search_iterations_per_round = kwargs.get( 

376 "search_iterations_per_round", 1 

377 ) 

378 

379 return IterativeReasoningStrategy( 

380 model=model, 

381 search=search, 

382 all_links_of_system=all_links_of_system, 

383 max_iterations=max_iterations, 

384 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

385 search_iterations_per_round=search_iterations_per_round, 

386 questions_per_search=questions_per_iteration, 

387 settings_snapshot=settings_snapshot, 

388 ) 

389 

390 # Adaptive decomposition strategy 

391 elif strategy_name_lower == "adaptive": 

392 from .advanced_search_system.strategies.adaptive_decomposition_strategy import ( 

393 AdaptiveDecompositionStrategy, 

394 ) 

395 

396 return AdaptiveDecompositionStrategy( 

397 model=model, 

398 search=search, 

399 all_links_of_system=all_links_of_system, 

400 max_steps=kwargs.get("max_steps", kwargs.get("max_iterations", 5)), 

401 min_confidence=kwargs.get("min_confidence", 0.8), 

402 source_search_iterations=kwargs.get("source_search_iterations", 2), 

403 source_questions_per_iteration=kwargs.get( 

404 "source_questions_per_iteration", 

405 kwargs.get("questions_per_iteration", 3), 

406 ), 

407 settings_snapshot=settings_snapshot, 

408 ) 

409 

410 # Smart decomposition strategy 

411 elif strategy_name_lower == "smart": 

412 from .advanced_search_system.strategies.smart_decomposition_strategy import ( 

413 SmartDecompositionStrategy, 

414 ) 

415 

416 return SmartDecompositionStrategy( 

417 model=model, 

418 search=search, 

419 all_links_of_system=all_links_of_system, 

420 max_iterations=kwargs.get("max_iterations", 5), 

421 source_search_iterations=kwargs.get("source_search_iterations", 2), 

422 source_questions_per_iteration=kwargs.get( 

423 "source_questions_per_iteration", 

424 kwargs.get("questions_per_iteration", 3), 

425 ), 

426 settings_snapshot=settings_snapshot, 

427 ) 

428 

429 # BrowseComp optimized strategy 

430 elif strategy_name_lower == "browsecomp": 

431 from .advanced_search_system.strategies.browsecomp_optimized_strategy import ( 

432 BrowseCompOptimizedStrategy, 

433 ) 

434 

435 return BrowseCompOptimizedStrategy( 

436 model=model, 

437 search=search, 

438 all_links_of_system=all_links_of_system, 

439 max_browsecomp_iterations=kwargs.get( 

440 "max_browsecomp_iterations", 15 

441 ), 

442 confidence_threshold=kwargs.get("confidence_threshold", 0.9), 

443 max_iterations=kwargs.get("max_iterations", 5), 

444 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

445 settings_snapshot=settings_snapshot, 

446 ) 

447 

448 # Enhanced evidence-based strategy 

449 elif strategy_name_lower == "evidence": 

450 from .advanced_search_system.strategies.evidence_based_strategy_v2 import ( 

451 EnhancedEvidenceBasedStrategy, 

452 ) 

453 

454 return EnhancedEvidenceBasedStrategy( 

455 model=model, 

456 search=search, 

457 all_links_of_system=all_links_of_system, 

458 max_iterations=kwargs.get("max_iterations", 20), 

459 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

460 candidate_limit=kwargs.get("candidate_limit", 20), 

461 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

462 max_search_iterations=kwargs.get("max_search_iterations", 5), 

463 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

464 min_candidates_threshold=kwargs.get("min_candidates_threshold", 10), 

465 enable_pattern_learning=kwargs.get("enable_pattern_learning", True), 

466 settings_snapshot=settings_snapshot, 

467 ) 

468 

469 # Constrained search strategy 

470 elif strategy_name_lower == "constrained": 

471 from .advanced_search_system.strategies.constrained_search_strategy import ( 

472 ConstrainedSearchStrategy, 

473 ) 

474 

475 return ConstrainedSearchStrategy( 

476 model=model, 

477 search=search, 

478 all_links_of_system=all_links_of_system, 

479 max_iterations=kwargs.get("max_iterations", 20), 

480 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

481 candidate_limit=kwargs.get("candidate_limit", 100), 

482 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

483 max_search_iterations=kwargs.get("max_search_iterations", 5), 

484 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

485 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

486 settings_snapshot=settings_snapshot, 

487 ) 

488 

489 # Parallel constrained strategy 

490 elif strategy_name_lower in [ 

491 "parallel-constrained", 

492 "parallel_constrained", 

493 ]: 

494 from .advanced_search_system.strategies.parallel_constrained_strategy import ( 

495 ParallelConstrainedStrategy, 

496 ) 

497 

498 return ParallelConstrainedStrategy( 

499 model=model, 

500 search=search, 

501 all_links_of_system=all_links_of_system, 

502 max_iterations=kwargs.get("max_iterations", 20), 

503 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

504 candidate_limit=kwargs.get("candidate_limit", 100), 

505 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

506 max_search_iterations=kwargs.get("max_search_iterations", 5), 

507 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

508 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

509 parallel_workers=kwargs.get("parallel_workers", 100), 

510 settings_snapshot=settings_snapshot, 

511 ) 

512 

513 # Early stop constrained strategy 

514 elif strategy_name_lower in [ 

515 "early-stop-constrained", 

516 "early_stop_constrained", 

517 ]: 

518 from .advanced_search_system.strategies.early_stop_constrained_strategy import ( 

519 EarlyStopConstrainedStrategy, 

520 ) 

521 

522 return EarlyStopConstrainedStrategy( 

523 model=model, 

524 search=search, 

525 all_links_of_system=all_links_of_system, 

526 max_iterations=kwargs.get("max_iterations", 20), 

527 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

528 candidate_limit=kwargs.get("candidate_limit", 100), 

529 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

530 max_search_iterations=kwargs.get("max_search_iterations", 5), 

531 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

532 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

533 parallel_workers=kwargs.get("parallel_workers", 100), 

534 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99), 

535 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

536 settings_snapshot=settings_snapshot, 

537 ) 

538 

539 # Smart query strategy 

540 elif strategy_name_lower in ["smart-query", "smart_query"]: 

541 from .advanced_search_system.strategies.smart_query_strategy import ( 

542 SmartQueryStrategy, 

543 ) 

544 

545 return SmartQueryStrategy( 

546 model=model, 

547 search=search, 

548 all_links_of_system=all_links_of_system, 

549 max_iterations=kwargs.get("max_iterations", 20), 

550 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

551 candidate_limit=kwargs.get("candidate_limit", 100), 

552 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

553 max_search_iterations=kwargs.get("max_search_iterations", 5), 

554 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

555 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

556 parallel_workers=kwargs.get("parallel_workers", 100), 

557 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99), 

558 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

559 use_llm_query_generation=kwargs.get( 

560 "use_llm_query_generation", True 

561 ), 

562 queries_per_combination=kwargs.get("queries_per_combination", 3), 

563 settings_snapshot=settings_snapshot, 

564 ) 

565 

566 # Dual confidence strategy 

567 elif strategy_name_lower in ["dual-confidence", "dual_confidence"]: 

568 from .advanced_search_system.strategies.dual_confidence_strategy import ( 

569 DualConfidenceStrategy, 

570 ) 

571 

572 return DualConfidenceStrategy( 

573 model=model, 

574 search=search, 

575 all_links_of_system=all_links_of_system, 

576 max_iterations=kwargs.get("max_iterations", 20), 

577 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

578 candidate_limit=kwargs.get("candidate_limit", 100), 

579 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

580 max_search_iterations=kwargs.get("max_search_iterations", 5), 

581 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

582 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

583 parallel_workers=kwargs.get("parallel_workers", 100), 

584 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95), 

585 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

586 use_llm_query_generation=kwargs.get( 

587 "use_llm_query_generation", True 

588 ), 

589 queries_per_combination=kwargs.get("queries_per_combination", 3), 

590 use_entity_seeding=kwargs.get("use_entity_seeding", True), 

591 use_direct_property_search=kwargs.get( 

592 "use_direct_property_search", True 

593 ), 

594 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2), 

595 negative_weight=kwargs.get("negative_weight", 0.5), 

596 settings_snapshot=settings_snapshot, 

597 ) 

598 

599 # Dual confidence with rejection strategy 

600 elif strategy_name_lower in [ 

601 "dual-confidence-with-rejection", 

602 "dual_confidence_with_rejection", 

603 ]: 

604 from .advanced_search_system.strategies.dual_confidence_with_rejection import ( 

605 DualConfidenceWithRejectionStrategy, 

606 ) 

607 

608 return DualConfidenceWithRejectionStrategy( 

609 model=model, 

610 search=search, 

611 all_links_of_system=all_links_of_system, 

612 max_iterations=kwargs.get("max_iterations", 20), 

613 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

614 candidate_limit=kwargs.get("candidate_limit", 100), 

615 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

616 max_search_iterations=kwargs.get("max_search_iterations", 5), 

617 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

618 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

619 parallel_workers=kwargs.get("parallel_workers", 100), 

620 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95), 

621 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

622 use_llm_query_generation=kwargs.get( 

623 "use_llm_query_generation", True 

624 ), 

625 queries_per_combination=kwargs.get("queries_per_combination", 3), 

626 use_entity_seeding=kwargs.get("use_entity_seeding", True), 

627 use_direct_property_search=kwargs.get( 

628 "use_direct_property_search", True 

629 ), 

630 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2), 

631 negative_weight=kwargs.get("negative_weight", 0.5), 

632 rejection_threshold=kwargs.get("rejection_threshold", 0.3), 

633 positive_threshold=kwargs.get("positive_threshold", 0.2), 

634 critical_constraint_rejection=kwargs.get( 

635 "critical_constraint_rejection", 0.2 

636 ), 

637 settings_snapshot=settings_snapshot, 

638 ) 

639 

640 # Concurrent dual confidence strategy 

641 elif strategy_name_lower in [ 

642 "concurrent-dual-confidence", 

643 "concurrent_dual_confidence", 

644 ]: 

645 from .advanced_search_system.strategies.concurrent_dual_confidence_strategy import ( 

646 ConcurrentDualConfidenceStrategy, 

647 ) 

648 

649 return ConcurrentDualConfidenceStrategy( 

650 model=model, 

651 search=search, 

652 all_links_of_system=all_links_of_system, 

653 max_iterations=kwargs.get("max_iterations", 20), 

654 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

655 candidate_limit=kwargs.get("candidate_limit", 100), 

656 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

657 max_search_iterations=kwargs.get("max_search_iterations", 5), 

658 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

659 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

660 parallel_workers=kwargs.get("parallel_workers", 10), 

661 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95), 

662 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

663 use_llm_query_generation=kwargs.get( 

664 "use_llm_query_generation", True 

665 ), 

666 queries_per_combination=kwargs.get("queries_per_combination", 3), 

667 use_entity_seeding=kwargs.get("use_entity_seeding", True), 

668 use_direct_property_search=kwargs.get( 

669 "use_direct_property_search", True 

670 ), 

671 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2), 

672 negative_weight=kwargs.get("negative_weight", 0.5), 

673 rejection_threshold=kwargs.get("rejection_threshold", 0.3), 

674 positive_threshold=kwargs.get("positive_threshold", 0.2), 

675 critical_constraint_rejection=kwargs.get( 

676 "critical_constraint_rejection", 0.2 

677 ), 

678 min_good_candidates=kwargs.get("min_good_candidates", 3), 

679 target_candidates=kwargs.get("target_candidates", 5), 

680 max_candidates=kwargs.get("max_candidates", 10), 

681 min_score_threshold=kwargs.get("min_score_threshold", 0.65), 

682 exceptional_score=kwargs.get("exceptional_score", 0.95), 

683 quality_plateau_threshold=kwargs.get( 

684 "quality_plateau_threshold", 0.1 

685 ), 

686 max_search_time=kwargs.get("max_search_time", 30.0), 

687 max_evaluations=kwargs.get("max_evaluations", 30), 

688 settings_snapshot=settings_snapshot, 

689 ) 

690 

691 # Constraint parallel strategy 

692 elif strategy_name_lower in [ 

693 "constraint-parallel", 

694 "constraint_parallel", 

695 ]: 

696 from .advanced_search_system.strategies.constraint_parallel_strategy import ( 

697 ConstraintParallelStrategy, 

698 ) 

699 

700 return ConstraintParallelStrategy( 

701 model=model, 

702 search=search, 

703 all_links_of_system=all_links_of_system, 

704 max_iterations=kwargs.get("max_iterations", 20), 

705 confidence_threshold=kwargs.get("confidence_threshold", 0.95), 

706 candidate_limit=kwargs.get("candidate_limit", 100), 

707 evidence_threshold=kwargs.get("evidence_threshold", 0.9), 

708 max_search_iterations=kwargs.get("max_search_iterations", 5), 

709 questions_per_iteration=kwargs.get("questions_per_iteration", 3), 

710 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20), 

711 parallel_workers=kwargs.get("parallel_workers", 100), 

712 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95), 

713 concurrent_evaluation=kwargs.get("concurrent_evaluation", True), 

714 use_llm_query_generation=kwargs.get( 

715 "use_llm_query_generation", True 

716 ), 

717 queries_per_combination=kwargs.get("queries_per_combination", 3), 

718 use_entity_seeding=kwargs.get("use_entity_seeding", True), 

719 use_direct_property_search=kwargs.get( 

720 "use_direct_property_search", True 

721 ), 

722 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2), 

723 negative_weight=kwargs.get("negative_weight", 0.5), 

724 rejection_threshold=kwargs.get("rejection_threshold", 0.3), 

725 positive_threshold=kwargs.get("positive_threshold", 0.2), 

726 critical_constraint_rejection=kwargs.get( 

727 "critical_constraint_rejection", 0.2 

728 ), 

729 settings_snapshot=settings_snapshot, 

730 ) 

731 

732 # Modular strategy 

733 elif strategy_name_lower in ["modular", "modular-strategy"]: 

734 from .advanced_search_system.strategies.modular_strategy import ( 

735 ModularStrategy, 

736 ) 

737 

738 return ModularStrategy( 

739 model=model, 

740 search=search, 

741 all_links_of_system=all_links_of_system, 

742 constraint_checker_type=kwargs.get( 

743 "constraint_checker_type", "dual_confidence" 

744 ), 

745 exploration_strategy=kwargs.get("exploration_strategy", "adaptive"), 

746 early_rejection=kwargs.get("early_rejection", True), 

747 early_stopping=kwargs.get("early_stopping", True), 

748 llm_constraint_processing=kwargs.get( 

749 "llm_constraint_processing", True 

750 ), 

751 immediate_evaluation=kwargs.get("immediate_evaluation", True), 

752 settings_snapshot=settings_snapshot, 

753 ) 

754 

755 # Modular parallel strategy 

756 elif strategy_name_lower in ["modular-parallel", "modular_parallel"]: 

757 from .advanced_search_system.strategies.modular_strategy import ( 

758 ModularStrategy, 

759 ) 

760 

761 return ModularStrategy( 

762 model=model, 

763 search=search, 

764 all_links_of_system=all_links_of_system, 

765 constraint_checker_type="dual_confidence", 

766 exploration_strategy="parallel", 

767 settings_snapshot=settings_snapshot, 

768 ) 

769 

770 # BrowseComp entity strategy 

771 elif strategy_name_lower in ["browsecomp-entity", "browsecomp_entity"]: 

772 from .advanced_search_system.strategies.browsecomp_entity_strategy import ( 

773 BrowseCompEntityStrategy, 

774 ) 

775 

776 return BrowseCompEntityStrategy( 

777 model=model, 

778 search=search, 

779 all_links_of_system=all_links_of_system, 

780 ) 

781 

782 # Topic organization strategy 

783 elif strategy_name_lower in [ 

784 "topic-organization", 

785 "topic_organization", 

786 "topic", 

787 ]: 

788 from .advanced_search_system.strategies.topic_organization_strategy import ( 

789 TopicOrganizationStrategy, 

790 ) 

791 

792 return TopicOrganizationStrategy( 

793 model=model, 

794 search=search, 

795 all_links_of_system=all_links_of_system, 

796 settings_snapshot=settings_snapshot, 

797 min_sources_per_topic=1, # Allow single-source topics 

798 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True), 

799 filter_reorder=kwargs.get("filter_reorder", True), 

800 filter_reindex=kwargs.get("filter_reindex", True), 

801 cross_engine_max_results=kwargs.get( 

802 "cross_engine_max_results", None 

803 ), 

804 search_original_query=kwargs.get("search_original_query", True), 

805 max_topics=kwargs.get("max_topics", 5), 

806 similarity_threshold=kwargs.get("similarity_threshold", 0.3), 

807 use_focused_iteration=True, # HARDCODED TO TRUE for testing - original: kwargs.get("use_focused_iteration", False) 

808 enable_refinement=kwargs.get( 

809 "enable_refinement", False 

810 ), # Disable refinement iterations for now 

811 max_refinement_iterations=kwargs.get( 

812 "max_refinement_iterations", 

813 1, # Set to 1 iteration for faster results 

814 ), 

815 generate_text=kwargs.get("generate_text", True), 

816 ) 

817 

818 # Iterative refinement strategy 

819 elif strategy_name_lower in [ 

820 "iterative-refinement", 

821 "iterative_refinement", 

822 ]: 

823 from .advanced_search_system.strategies.iterative_refinement_strategy import ( 

824 IterativeRefinementStrategy, 

825 ) 

826 

827 # Get the initial strategy to use (default to source-based) 

828 initial_strategy_name = kwargs.get("initial_strategy", "source-based") 

829 

830 # Create the initial strategy 

831 initial_strategy = create_strategy( 

832 strategy_name=initial_strategy_name, 

833 model=model, 

834 search=search, 

835 all_links_of_system=[], # Fresh list for initial strategy 

836 settings_snapshot=settings_snapshot, 

837 search_original_query=kwargs.get("search_original_query", True), 

838 ) 

839 

840 return IterativeRefinementStrategy( 

841 model=model, 

842 search=search, 

843 initial_strategy=initial_strategy, 

844 all_links_of_system=all_links_of_system, 

845 settings_snapshot=settings_snapshot, 

846 evaluation_frequency=kwargs.get("evaluation_frequency", 1), 

847 max_refinements=kwargs.get("max_refinements", 3), 

848 confidence_threshold=kwargs.get( 

849 "confidence_threshold", 0.95 

850 ), # Increased from 0.8 

851 ) 

852 

853 # Standard strategy 

854 elif strategy_name_lower == "standard": 

855 from .advanced_search_system.strategies.standard_strategy import ( 

856 StandardSearchStrategy, 

857 ) 

858 

859 return StandardSearchStrategy( 

860 model=model, 

861 search=search, 

862 all_links_of_system=all_links_of_system, 

863 settings_snapshot=settings_snapshot, 

864 ) 

865 

866 else: 

867 # Default to source-based if unknown 

868 logger.warning( 

869 f"Unknown strategy: {strategy_name}, defaulting to source-based" 

870 ) 

871 from .advanced_search_system.strategies.source_based_strategy import ( 

872 SourceBasedSearchStrategy, 

873 ) 

874 

875 return SourceBasedSearchStrategy( 

876 model=model, 

877 search=search, 

878 include_text_content=True, 

879 use_cross_engine_filter=True, 

880 all_links_of_system=all_links_of_system, 

881 use_atomic_facts=False, 

882 settings_snapshot=settings_snapshot, 

883 search_original_query=kwargs.get("search_original_query", True), 

884 )