Coverage for src/local_deep_research/search_system

1"""

2Factory for creating search strategies.

3This module provides a centralized way to create search strategies

4to avoid code duplication.

5"""

7from loguru import logger

8from typing import Optional, Dict, Any, List

9from langchain_core.language_models import BaseChatModel

12def _get_setting(

13 settings_snapshot: Optional[Dict], key: str, default: Any

14) -> Any:

15 """Get a setting value from the snapshot, handling nested dict structure."""

16 if not settings_snapshot or key not in settings_snapshot:

17 return default

18 value = settings_snapshot[key]

19 # Extract value from dict structure if needed

20 if isinstance(value, dict) and "value" in value:

21 return value["value"]

22 return value

25def create_strategy(

26 strategy_name: str,

27 model: BaseChatModel,

28 search: Any,

29 all_links_of_system: Optional[List[Dict]] = None,

30 settings_snapshot: Optional[Dict] = None,

31 research_context: Optional[Dict] = None,

32 **kwargs,

33):

34 """

35 Create a search strategy by name.

37 Args:

38 strategy_name: Name of the strategy to create

39 model: Language model to use

40 search: Search engine instance

41 all_links_of_system: List of existing links

42 settings_snapshot: Settings snapshot

43 research_context: Research context for special strategies

44 **kwargs: Additional strategy-specific parameters

46 Returns:

47 Strategy instance

48 """

49 if all_links_of_system is None:

50 all_links_of_system = []

52 strategy_name_lower = strategy_name.lower()

54 # Source-based strategy

55 if strategy_name_lower in [

56 "source-based",

57 "source_based",

58 "source_based_search",

59 ]:

60 from .advanced_search_system.strategies.source_based_strategy import (

61 SourceBasedSearchStrategy,

62 )

64 return SourceBasedSearchStrategy(

65 model=model,

66 search=search,

67 include_text_content=kwargs.get("include_text_content", True),

68 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),

69 all_links_of_system=all_links_of_system,

70 use_atomic_facts=kwargs.get("use_atomic_facts", False),

71 settings_snapshot=settings_snapshot,

72 search_original_query=kwargs.get("search_original_query", True),

73 )

75 # Focused iteration strategy

76 elif strategy_name_lower in ["focused-iteration", "focused_iteration"]:

77 from .advanced_search_system.strategies.focused_iteration_strategy import (

78 FocusedIterationStrategy,

79 )

81 # Read focused_iteration settings with kwargs override

82 # adaptive_questions is stored as 0/1 integer, convert to bool

83 enable_adaptive = bool(

84 kwargs.get(

85 "enable_adaptive_questions",

86 _get_setting(

87 settings_snapshot, "focused_iteration.adaptive_questions", 0

88 ),

89 )

90 )

91 knowledge_limit = kwargs.get(

92 "knowledge_summary_limit",

93 _get_setting(

94 settings_snapshot,

95 "focused_iteration.knowledge_summary_limit",

96 10,

97 ),

98 )

99 snippet_truncate = kwargs.get(

100 "knowledge_snippet_truncate",

101 _get_setting(

102 settings_snapshot, "focused_iteration.snippet_truncate", 200

103 ),

104 )

105 question_gen_type = kwargs.get(

106 "question_generator",

107 _get_setting(

108 settings_snapshot,

109 "focused_iteration.question_generator",

110 "browsecomp",

111 ),

112 )

113 prompt_knowledge_truncate = kwargs.get(

114 "prompt_knowledge_truncate",

115 _get_setting(

116 settings_snapshot,

117 "focused_iteration.prompt_knowledge_truncate",

118 1500,

119 ),

120 )

121 previous_searches_limit = kwargs.get(

122 "previous_searches_limit",

123 _get_setting(

124 settings_snapshot,

125 "focused_iteration.previous_searches_limit",

126 10,

127 ),

128 )

129 # Convert 0 to None for "unlimited"

130 if knowledge_limit == 0:

131 knowledge_limit = None

132 if snippet_truncate == 0:

133 snippet_truncate = None

134 if prompt_knowledge_truncate == 0: 134 ↛ 135line 134 didn't jump to line 135 because the condition on line 134 was never true

135 prompt_knowledge_truncate = None

136 if previous_searches_limit == 0: 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true

137 previous_searches_limit = None

138

139 strategy = FocusedIterationStrategy(

140 model=model,

141 search=search,

142 all_links_of_system=all_links_of_system,

143 max_iterations=kwargs.get("max_iterations", 8),

144 questions_per_iteration=kwargs.get("questions_per_iteration", 5),

145 settings_snapshot=settings_snapshot,

146 # Options read from settings (with kwargs override)

147 enable_adaptive_questions=enable_adaptive,

148 enable_early_termination=kwargs.get(

149 "enable_early_termination", False

150 ),

151 knowledge_summary_limit=knowledge_limit,

152 knowledge_snippet_truncate=snippet_truncate,

153 prompt_knowledge_truncate=prompt_knowledge_truncate,

154 previous_searches_limit=previous_searches_limit,

155 )

156

157 # Override question generator if flexible is selected

158 if question_gen_type == "flexible": 158 ↛ 159line 158 didn't jump to line 159 because the condition on line 158 was never true

159 from .advanced_search_system.questions.flexible_browsecomp_question import (

160 FlexibleBrowseCompQuestionGenerator,

161 )

162

163 # Pass truncation settings to flexible generator

164 strategy.question_generator = FlexibleBrowseCompQuestionGenerator(

165 model,

166 knowledge_truncate_length=prompt_knowledge_truncate,

167 previous_searches_limit=previous_searches_limit,

168 )

169

170 return strategy

171

172 # Focused iteration strategy with standard citation handler

173 elif strategy_name_lower in [

174 "focused-iteration-standard",

175 "focused_iteration_standard",

176 ]:

177 from .advanced_search_system.strategies.focused_iteration_strategy import (

178 FocusedIterationStrategy,

179 )

180 from .citation_handler import CitationHandler

181

182 # Use standard citation handler (same question generator as regular focused-iteration)

183 standard_citation_handler = CitationHandler(

184 model, handler_type="standard", settings_snapshot=settings_snapshot

185 )

186

187 # Read focused_iteration settings with kwargs override

188 # adaptive_questions is stored as 0/1 integer, convert to bool

189 enable_adaptive = bool(

190 kwargs.get(

191 "enable_adaptive_questions",

192 _get_setting(

193 settings_snapshot, "focused_iteration.adaptive_questions", 0

194 ),

195 )

196 )

197 knowledge_limit = kwargs.get(

198 "knowledge_summary_limit",

199 _get_setting(

200 settings_snapshot,

201 "focused_iteration.knowledge_summary_limit",

202 10,

203 ),

204 )

205 snippet_truncate = kwargs.get(

206 "knowledge_snippet_truncate",

207 _get_setting(

208 settings_snapshot, "focused_iteration.snippet_truncate", 200

209 ),

210 )

211 question_gen_type = kwargs.get(

212 "question_generator",

213 _get_setting(

214 settings_snapshot,

215 "focused_iteration.question_generator",

216 "browsecomp",

217 ),

218 )

219 prompt_knowledge_truncate = kwargs.get(

220 "prompt_knowledge_truncate",

221 _get_setting(

222 settings_snapshot,

223 "focused_iteration.prompt_knowledge_truncate",

224 1500,

225 ),

226 )

227 previous_searches_limit = kwargs.get(

228 "previous_searches_limit",

229 _get_setting(

230 settings_snapshot,

231 "focused_iteration.previous_searches_limit",

232 10,

233 ),

234 )

235 # Convert 0 to None for "unlimited"

236 if knowledge_limit == 0: 236 ↛ 237line 236 didn't jump to line 237 because the condition on line 236 was never true

237 knowledge_limit = None

238 if snippet_truncate == 0: 238 ↛ 239line 238 didn't jump to line 239 because the condition on line 238 was never true

239 snippet_truncate = None

240 if prompt_knowledge_truncate == 0: 240 ↛ 241line 240 didn't jump to line 241 because the condition on line 240 was never true

241 prompt_knowledge_truncate = None

242 if previous_searches_limit == 0: 242 ↛ 243line 242 didn't jump to line 243 because the condition on line 242 was never true

243 previous_searches_limit = None

244

245 strategy = FocusedIterationStrategy(

246 model=model,

247 search=search,

248 citation_handler=standard_citation_handler,

249 all_links_of_system=all_links_of_system,

250 max_iterations=kwargs.get("max_iterations", 8),

251 questions_per_iteration=kwargs.get("questions_per_iteration", 5),

252 use_browsecomp_optimization=True, # Keep BrowseComp features

253 settings_snapshot=settings_snapshot,

254 # Options read from settings (with kwargs override)

255 enable_adaptive_questions=enable_adaptive,

256 enable_early_termination=kwargs.get(

257 "enable_early_termination", False

258 ),

259 knowledge_summary_limit=knowledge_limit,

260 knowledge_snippet_truncate=snippet_truncate,

261 prompt_knowledge_truncate=prompt_knowledge_truncate,

262 previous_searches_limit=previous_searches_limit,

263 )

264

265 # Override question generator if flexible is selected

266 if question_gen_type == "flexible": 266 ↛ 267line 266 didn't jump to line 267 because the condition on line 266 was never true

267 from .advanced_search_system.questions.flexible_browsecomp_question import (

268 FlexibleBrowseCompQuestionGenerator,

269 )

270

271 # Pass truncation settings to flexible generator

272 strategy.question_generator = FlexibleBrowseCompQuestionGenerator(

273 model,

274 knowledge_truncate_length=prompt_knowledge_truncate,

275 previous_searches_limit=previous_searches_limit,

276 )

277

278 return strategy

279

280 # Iterative reasoning strategy (depth variant)

281 elif strategy_name_lower in [

282 "iterative-reasoning",

283 "iterative_reasoning",

284 "iterative_reasoning_depth",

285 ]:

286 from .advanced_search_system.strategies.iterative_reasoning_strategy import (

287 IterativeReasoningStrategy,

288 )

289

290 return IterativeReasoningStrategy(

291 model=model,

292 search=search,

293 all_links_of_system=all_links_of_system,

294 )

295

296 # News aggregation strategy

297 elif strategy_name_lower in [

298 "news",

299 "news_aggregation",

300 "news-aggregation",

301 ]:

302 from .advanced_search_system.strategies.news_strategy import (

303 NewsAggregationStrategy,

304 )

305

306 return NewsAggregationStrategy(

307 model=model,

308 search=search,

309 all_links_of_system=all_links_of_system,

310 )

311

312 # IterDRAG strategy

313 elif strategy_name_lower == "iterdrag":

314 from .advanced_search_system.strategies.iterdrag_strategy import (

315 IterDRAGStrategy,

316 )

317

318 return IterDRAGStrategy(

319 model=model,

320 search=search,

321 all_links_of_system=all_links_of_system,

322 settings_snapshot=settings_snapshot,

323 )

324

325 # Parallel strategy

326 elif strategy_name_lower == "parallel":

327 from .advanced_search_system.strategies.parallel_search_strategy import (

328 ParallelSearchStrategy,

329 )

330

331 return ParallelSearchStrategy(

332 model=model,

333 search=search,

334 include_text_content=kwargs.get("include_text_content", True),

335 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),

336 all_links_of_system=all_links_of_system,

337 settings_snapshot=settings_snapshot,

338 )

339

340 # Rapid strategy

341 elif strategy_name_lower == "rapid":

342 from .advanced_search_system.strategies.rapid_search_strategy import (

343 RapidSearchStrategy,

344 )

345

346 return RapidSearchStrategy(

347 model=model,

348 search=search,

349 all_links_of_system=all_links_of_system,

350 settings_snapshot=settings_snapshot,

351 )

352

353 # Recursive decomposition strategy

354 elif strategy_name_lower in ["recursive", "recursive-decomposition"]:

355 from .advanced_search_system.strategies.recursive_decomposition_strategy import (

356 RecursiveDecompositionStrategy,

357 )

358

359 return RecursiveDecompositionStrategy(

360 model=model,

361 search=search,

362 all_links_of_system=all_links_of_system,

363 settings_snapshot=settings_snapshot,

364 )

365

366 # Iterative reasoning strategy (different from iterative_reasoning_depth)

367 elif strategy_name_lower == "iterative":

368 from .advanced_search_system.strategies.iterative_reasoning_strategy import (

369 IterativeReasoningStrategy,

370 )

371

372 # Get iteration settings from kwargs or use defaults

373 max_iterations = kwargs.get("max_iterations", 20)

374 questions_per_iteration = kwargs.get("questions_per_iteration", 3)

375 search_iterations_per_round = kwargs.get(

376 "search_iterations_per_round", 1

377 )

378

379 return IterativeReasoningStrategy(

380 model=model,

381 search=search,

382 all_links_of_system=all_links_of_system,

383 max_iterations=max_iterations,

384 confidence_threshold=kwargs.get("confidence_threshold", 0.95),

385 search_iterations_per_round=search_iterations_per_round,

386 questions_per_search=questions_per_iteration,

387 settings_snapshot=settings_snapshot,

388 )

389

390 # Adaptive decomposition strategy

391 elif strategy_name_lower == "adaptive":

392 from .advanced_search_system.strategies.adaptive_decomposition_strategy import (

393 AdaptiveDecompositionStrategy,

394 )

395

396 return AdaptiveDecompositionStrategy(

397 model=model,

398 search=search,

399 all_links_of_system=all_links_of_system,

400 max_steps=kwargs.get("max_steps", kwargs.get("max_iterations", 5)),

401 min_confidence=kwargs.get("min_confidence", 0.8),

402 source_search_iterations=kwargs.get("source_search_iterations", 2),

403 source_questions_per_iteration=kwargs.get(

404 "source_questions_per_iteration",

405 kwargs.get("questions_per_iteration", 3),

406 ),

407 settings_snapshot=settings_snapshot,

408 )

409

410 # Smart decomposition strategy

411 elif strategy_name_lower == "smart":

412 from .advanced_search_system.strategies.smart_decomposition_strategy import (

413 SmartDecompositionStrategy,

414 )

415

416 return SmartDecompositionStrategy(

417 model=model,

418 search=search,

419 all_links_of_system=all_links_of_system,

420 max_iterations=kwargs.get("max_iterations", 5),

421 source_search_iterations=kwargs.get("source_search_iterations", 2),

422 source_questions_per_iteration=kwargs.get(

423 "source_questions_per_iteration",

424 kwargs.get("questions_per_iteration", 3),

425 ),

426 settings_snapshot=settings_snapshot,

427 )

428

429 # BrowseComp optimized strategy

430 elif strategy_name_lower == "browsecomp":

431 from .advanced_search_system.strategies.browsecomp_optimized_strategy import (

432 BrowseCompOptimizedStrategy,

433 )

434

435 return BrowseCompOptimizedStrategy(

436 model=model,

437 search=search,

438 all_links_of_system=all_links_of_system,

439 max_browsecomp_iterations=kwargs.get(

440 "max_browsecomp_iterations", 15

441 ),

442 confidence_threshold=kwargs.get("confidence_threshold", 0.9),

443 max_iterations=kwargs.get("max_iterations", 5),

444 questions_per_iteration=kwargs.get("questions_per_iteration", 3),

445 settings_snapshot=settings_snapshot,

446 )

447

448 # Enhanced evidence-based strategy

449 elif strategy_name_lower == "evidence":

450 from .advanced_search_system.strategies.evidence_based_strategy_v2 import (

451 EnhancedEvidenceBasedStrategy,

452 )

453

454 return EnhancedEvidenceBasedStrategy(

455 model=model,

456 search=search,

457 all_links_of_system=all_links_of_system,

458 max_iterations=kwargs.get("max_iterations", 20),

459 confidence_threshold=kwargs.get("confidence_threshold", 0.95),

460 candidate_limit=kwargs.get("candidate_limit", 20),

461 evidence_threshold=kwargs.get("evidence_threshold", 0.9),

462 max_search_iterations=kwargs.get("max_search_iterations", 5),

463 questions_per_iteration=kwargs.get("questions_per_iteration", 3),

464 min_candidates_threshold=kwargs.get("min_candidates_threshold", 10),

465 enable_pattern_learning=kwargs.get("enable_pattern_learning", True),

466 settings_snapshot=settings_snapshot,

467 )

468

469 # Constrained search strategy

470 elif strategy_name_lower == "constrained":

471 from .advanced_search_system.strategies.constrained_search_strategy import (

472 ConstrainedSearchStrategy,

473 )

474

475 return ConstrainedSearchStrategy(

476 model=model,

477 search=search,

478 all_links_of_system=all_links_of_system,

479 max_iterations=kwargs.get("max_iterations", 20),

480 confidence_threshold=kwargs.get("confidence_threshold", 0.95),

481 candidate_limit=kwargs.get("candidate_limit", 100),

482 evidence_threshold=kwargs.get("evidence_threshold", 0.9),

483 max_search_iterations=kwargs.get("max_search_iterations", 5),

484 questions_per_iteration=kwargs.get("questions_per_iteration", 3),

485 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),

486 settings_snapshot=settings_snapshot,

487 )

488

489 # Parallel constrained strategy

490 elif strategy_name_lower in [

491 "parallel-constrained",

492 "parallel_constrained",

493 ]:

494 from .advanced_search_system.strategies.parallel_constrained_strategy import (

495 ParallelConstrainedStrategy,

496 )

497

498 return ParallelConstrainedStrategy(

499 model=model,

500 search=search,

501 all_links_of_system=all_links_of_system,

502 max_iterations=kwargs.get("max_iterations", 20),

503 confidence_threshold=kwargs.get("confidence_threshold", 0.95),

504 candidate_limit=kwargs.get("candidate_limit", 100),

505 evidence_threshold=kwargs.get("evidence_threshold", 0.9),

506 max_search_iterations=kwargs.get("max_search_iterations", 5),

507 questions_per_iteration=kwargs.get("questions_per_iteration", 3),

508 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),

509 parallel_workers=kwargs.get("parallel_workers", 100),

510 settings_snapshot=settings_snapshot,

511 )

512

513 # Early stop constrained strategy

514 elif strategy_name_lower in [

515 "early-stop-constrained",

516 "early_stop_constrained",

517 ]:

518 from .advanced_search_system.strategies.early_stop_constrained_strategy import (

519 EarlyStopConstrainedStrategy,

520 )

521

522 return EarlyStopConstrainedStrategy(

523 model=model,

524 search=search,

525 all_links_of_system=all_links_of_system,

526 max_iterations=kwargs.get("max_iterations", 20),

527 confidence_threshold=kwargs.get("confidence_threshold", 0.95),

528 candidate_limit=kwargs.get("candidate_limit", 100),

529 evidence_threshold=kwargs.get("evidence_threshold", 0.9),

530 max_search_iterations=kwargs.get("max_search_iterations", 5),

531 questions_per_iteration=kwargs.get("questions_per_iteration", 3),

532 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),

533 parallel_workers=kwargs.get("parallel_workers", 100),

534 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99),

535 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),

536 settings_snapshot=settings_snapshot,

537 )

538

539 # Smart query strategy

540 elif strategy_name_lower in ["smart-query", "smart_query"]:

541 from .advanced_search_system.strategies.smart_query_strategy import (

542 SmartQueryStrategy,

543 )

544

545 return SmartQueryStrategy(

546 model=model,

547 search=search,

548 all_links_of_system=all_links_of_system,

549 max_iterations=kwargs.get("max_iterations", 20),

550 confidence_threshold=kwargs.get("confidence_threshold", 0.95),

551 candidate_limit=kwargs.get("candidate_limit", 100),

552 evidence_threshold=kwargs.get("evidence_threshold", 0.9),

553 max_search_iterations=kwargs.get("max_search_iterations", 5),

554 questions_per_iteration=kwargs.get("questions_per_iteration", 3),

555 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),

556 parallel_workers=kwargs.get("parallel_workers", 100),

557 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99),

558 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),

559 use_llm_query_generation=kwargs.get(

560 "use_llm_query_generation", True

561 ),

562 queries_per_combination=kwargs.get("queries_per_combination", 3),

563 settings_snapshot=settings_snapshot,

564 )

565

566 # Dual confidence strategy

567 elif strategy_name_lower in ["dual-confidence", "dual_confidence"]:

568 from .advanced_search_system.strategies.dual_confidence_strategy import (

569 DualConfidenceStrategy,

570 )

571

572 return DualConfidenceStrategy(

573 model=model,

574 search=search,

575 all_links_of_system=all_links_of_system,

576 max_iterations=kwargs.get("max_iterations", 20),

577 confidence_threshold=kwargs.get("confidence_threshold", 0.95),

578 candidate_limit=kwargs.get("candidate_limit", 100),

579 evidence_threshold=kwargs.get("evidence_threshold", 0.9),

580 max_search_iterations=kwargs.get("max_search_iterations", 5),

581 questions_per_iteration=kwargs.get("questions_per_iteration", 3),

582 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),

583 parallel_workers=kwargs.get("parallel_workers", 100),

584 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),

585 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),

586 use_llm_query_generation=kwargs.get(

587 "use_llm_query_generation", True

588 ),

589 queries_per_combination=kwargs.get("queries_per_combination", 3),

590 use_entity_seeding=kwargs.get("use_entity_seeding", True),

591 use_direct_property_search=kwargs.get(

592 "use_direct_property_search", True

593 ),

594 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),

595 negative_weight=kwargs.get("negative_weight", 0.5),

596 settings_snapshot=settings_snapshot,

597 )

598

599 # Dual confidence with rejection strategy

600 elif strategy_name_lower in [

601 "dual-confidence-with-rejection",

602 "dual_confidence_with_rejection",

603 ]:

604 from .advanced_search_system.strategies.dual_confidence_with_rejection import (

605 DualConfidenceWithRejectionStrategy,

606 )

607

608 return DualConfidenceWithRejectionStrategy(

609 model=model,

610 search=search,

611 all_links_of_system=all_links_of_system,

612 max_iterations=kwargs.get("max_iterations", 20),

613 confidence_threshold=kwargs.get("confidence_threshold", 0.95),

614 candidate_limit=kwargs.get("candidate_limit", 100),

615 evidence_threshold=kwargs.get("evidence_threshold", 0.9),

616 max_search_iterations=kwargs.get("max_search_iterations", 5),

617 questions_per_iteration=kwargs.get("questions_per_iteration", 3),

618 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),

619 parallel_workers=kwargs.get("parallel_workers", 100),

620 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),

621 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),

622 use_llm_query_generation=kwargs.get(

623 "use_llm_query_generation", True

624 ),

625 queries_per_combination=kwargs.get("queries_per_combination", 3),

626 use_entity_seeding=kwargs.get("use_entity_seeding", True),

627 use_direct_property_search=kwargs.get(

628 "use_direct_property_search", True

629 ),

630 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),

631 negative_weight=kwargs.get("negative_weight", 0.5),

632 rejection_threshold=kwargs.get("rejection_threshold", 0.3),

633 positive_threshold=kwargs.get("positive_threshold", 0.2),

634 critical_constraint_rejection=kwargs.get(

635 "critical_constraint_rejection", 0.2

636 ),

637 settings_snapshot=settings_snapshot,

638 )

639

640 # Concurrent dual confidence strategy

641 elif strategy_name_lower in [

642 "concurrent-dual-confidence",

643 "concurrent_dual_confidence",

644 ]:

645 from .advanced_search_system.strategies.concurrent_dual_confidence_strategy import (

646 ConcurrentDualConfidenceStrategy,

647 )

648

649 return ConcurrentDualConfidenceStrategy(

650 model=model,

651 search=search,

652 all_links_of_system=all_links_of_system,

653 max_iterations=kwargs.get("max_iterations", 20),

654 confidence_threshold=kwargs.get("confidence_threshold", 0.95),

655 candidate_limit=kwargs.get("candidate_limit", 100),

656 evidence_threshold=kwargs.get("evidence_threshold", 0.9),

657 max_search_iterations=kwargs.get("max_search_iterations", 5),

658 questions_per_iteration=kwargs.get("questions_per_iteration", 3),

659 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),

660 parallel_workers=kwargs.get("parallel_workers", 10),

661 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),

662 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),

663 use_llm_query_generation=kwargs.get(

664 "use_llm_query_generation", True

665 ),

666 queries_per_combination=kwargs.get("queries_per_combination", 3),

667 use_entity_seeding=kwargs.get("use_entity_seeding", True),

668 use_direct_property_search=kwargs.get(

669 "use_direct_property_search", True

670 ),

671 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),

672 negative_weight=kwargs.get("negative_weight", 0.5),

673 rejection_threshold=kwargs.get("rejection_threshold", 0.3),

674 positive_threshold=kwargs.get("positive_threshold", 0.2),

675 critical_constraint_rejection=kwargs.get(

676 "critical_constraint_rejection", 0.2

677 ),

678 min_good_candidates=kwargs.get("min_good_candidates", 3),

679 target_candidates=kwargs.get("target_candidates", 5),

680 max_candidates=kwargs.get("max_candidates", 10),

681 min_score_threshold=kwargs.get("min_score_threshold", 0.65),

682 exceptional_score=kwargs.get("exceptional_score", 0.95),

683 quality_plateau_threshold=kwargs.get(

684 "quality_plateau_threshold", 0.1

685 ),

686 max_search_time=kwargs.get("max_search_time", 30.0),

687 max_evaluations=kwargs.get("max_evaluations", 30),

688 settings_snapshot=settings_snapshot,

689 )

690

691 # Constraint parallel strategy

692 elif strategy_name_lower in [

693 "constraint-parallel",

694 "constraint_parallel",

695 ]:

696 from .advanced_search_system.strategies.constraint_parallel_strategy import (

697 ConstraintParallelStrategy,

698 )

699

700 return ConstraintParallelStrategy(

701 model=model,

702 search=search,

703 all_links_of_system=all_links_of_system,

704 max_iterations=kwargs.get("max_iterations", 20),

705 confidence_threshold=kwargs.get("confidence_threshold", 0.95),

706 candidate_limit=kwargs.get("candidate_limit", 100),

707 evidence_threshold=kwargs.get("evidence_threshold", 0.9),

708 max_search_iterations=kwargs.get("max_search_iterations", 5),

709 questions_per_iteration=kwargs.get("questions_per_iteration", 3),

710 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),

711 parallel_workers=kwargs.get("parallel_workers", 100),

712 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),

713 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),

714 use_llm_query_generation=kwargs.get(

715 "use_llm_query_generation", True

716 ),

717 queries_per_combination=kwargs.get("queries_per_combination", 3),

718 use_entity_seeding=kwargs.get("use_entity_seeding", True),

719 use_direct_property_search=kwargs.get(

720 "use_direct_property_search", True

721 ),

722 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),

723 negative_weight=kwargs.get("negative_weight", 0.5),

724 rejection_threshold=kwargs.get("rejection_threshold", 0.3),

725 positive_threshold=kwargs.get("positive_threshold", 0.2),

726 critical_constraint_rejection=kwargs.get(

727 "critical_constraint_rejection", 0.2

728 ),

729 settings_snapshot=settings_snapshot,

730 )

731

732 # Modular strategy

733 elif strategy_name_lower in ["modular", "modular-strategy"]:

734 from .advanced_search_system.strategies.modular_strategy import (

735 ModularStrategy,

736 )

737

738 return ModularStrategy(

739 model=model,

740 search=search,

741 all_links_of_system=all_links_of_system,

742 constraint_checker_type=kwargs.get(

743 "constraint_checker_type", "dual_confidence"

744 ),

745 exploration_strategy=kwargs.get("exploration_strategy", "adaptive"),

746 early_rejection=kwargs.get("early_rejection", True),

747 early_stopping=kwargs.get("early_stopping", True),

748 llm_constraint_processing=kwargs.get(

749 "llm_constraint_processing", True

750 ),

751 immediate_evaluation=kwargs.get("immediate_evaluation", True),

752 settings_snapshot=settings_snapshot,

753 )

754

755 # Modular parallel strategy

756 elif strategy_name_lower in ["modular-parallel", "modular_parallel"]:

757 from .advanced_search_system.strategies.modular_strategy import (

758 ModularStrategy,

759 )

760

761 return ModularStrategy(

762 model=model,

763 search=search,

764 all_links_of_system=all_links_of_system,

765 constraint_checker_type="dual_confidence",

766 exploration_strategy="parallel",

767 settings_snapshot=settings_snapshot,

768 )

769

770 # BrowseComp entity strategy

771 elif strategy_name_lower in ["browsecomp-entity", "browsecomp_entity"]:

772 from .advanced_search_system.strategies.browsecomp_entity_strategy import (

773 BrowseCompEntityStrategy,

774 )

775

776 return BrowseCompEntityStrategy(

777 model=model,

778 search=search,

779 all_links_of_system=all_links_of_system,

780 )

781

782 # Topic organization strategy

783 elif strategy_name_lower in [

784 "topic-organization",

785 "topic_organization",

786 "topic",

787 ]:

788 from .advanced_search_system.strategies.topic_organization_strategy import (

789 TopicOrganizationStrategy,

790 )

791

792 return TopicOrganizationStrategy(

793 model=model,

794 search=search,

795 all_links_of_system=all_links_of_system,

796 settings_snapshot=settings_snapshot,

797 min_sources_per_topic=1, # Allow single-source topics

798 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),

799 filter_reorder=kwargs.get("filter_reorder", True),

800 filter_reindex=kwargs.get("filter_reindex", True),

801 cross_engine_max_results=kwargs.get(

802 "cross_engine_max_results", None

803 ),

804 search_original_query=kwargs.get("search_original_query", True),

805 max_topics=kwargs.get("max_topics", 5),

806 similarity_threshold=kwargs.get("similarity_threshold", 0.3),

807 use_focused_iteration=True, # HARDCODED TO TRUE for testing - original: kwargs.get("use_focused_iteration", False)

808 enable_refinement=kwargs.get(

809 "enable_refinement", False

810 ), # Disable refinement iterations for now

811 max_refinement_iterations=kwargs.get(

812 "max_refinement_iterations",

813 1, # Set to 1 iteration for faster results

814 ),

815 generate_text=kwargs.get("generate_text", True),

816 )

817

818 # Iterative refinement strategy

819 elif strategy_name_lower in [

820 "iterative-refinement",

821 "iterative_refinement",

822 ]:

823 from .advanced_search_system.strategies.iterative_refinement_strategy import (

824 IterativeRefinementStrategy,

825 )

826

827 # Get the initial strategy to use (default to source-based)

828 initial_strategy_name = kwargs.get("initial_strategy", "source-based")

829

830 # Create the initial strategy

831 initial_strategy = create_strategy(

832 strategy_name=initial_strategy_name,

833 model=model,

834 search=search,

835 all_links_of_system=[], # Fresh list for initial strategy

836 settings_snapshot=settings_snapshot,

837 search_original_query=kwargs.get("search_original_query", True),

838 )

839

840 return IterativeRefinementStrategy(

841 model=model,

842 search=search,

843 initial_strategy=initial_strategy,

844 all_links_of_system=all_links_of_system,

845 settings_snapshot=settings_snapshot,

846 evaluation_frequency=kwargs.get("evaluation_frequency", 1),

847 max_refinements=kwargs.get("max_refinements", 3),

848 confidence_threshold=kwargs.get(

849 "confidence_threshold", 0.95

850 ), # Increased from 0.8

851 )

852

853 # Standard strategy

854 elif strategy_name_lower == "standard":

855 from .advanced_search_system.strategies.standard_strategy import (

856 StandardSearchStrategy,

857 )

858

859 return StandardSearchStrategy(

860 model=model,

861 search=search,

862 all_links_of_system=all_links_of_system,

863 settings_snapshot=settings_snapshot,

864 )

865

866 else:

867 # Default to source-based if unknown

868 logger.warning(

869 f"Unknown strategy: {strategy_name}, defaulting to source-based"

870 )

871 from .advanced_search_system.strategies.source_based_strategy import (

872 SourceBasedSearchStrategy,

873 )

874

875 return SourceBasedSearchStrategy(

876 model=model,

877 search=search,

878 include_text_content=True,

879 use_cross_engine_filter=True,

880 all_links_of_system=all_links_of_system,

881 use_atomic_facts=False,

882 settings_snapshot=settings_snapshot,

883 search_original_query=kwargs.get("search_original_query", True),

884 )

Coverage for src / local_deep_research / search_system_factory.py: 92%

144 statements