Coverage for src / local_deep_research / search_system_factory.py: 95%
152 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2Factory for creating search strategies.
3This module provides a centralized way to create search strategies
4to avoid code duplication.
5"""
7from loguru import logger
8from typing import Optional, Dict, Any, List
9from langchain_core.language_models import BaseChatModel
11# Re-export from constants so existing importers don't break
12from .constants import ( # noqa: F401
13 ALL_STRATEGIES,
14 AVAILABLE_STRATEGIES,
15 get_available_strategies,
16)
19def _get_setting(
20 settings_snapshot: Optional[Dict], key: str, default: Any
21) -> Any:
22 """Get a setting value from the snapshot, handling nested dict structure."""
23 if not settings_snapshot or key not in settings_snapshot:
24 return default
25 value = settings_snapshot[key]
26 # Extract value from dict structure if needed
27 if isinstance(value, dict) and "value" in value:
28 return value["value"]
29 return value
32def create_strategy(
33 strategy_name: str,
34 model: BaseChatModel,
35 search: Any,
36 all_links_of_system: Optional[List[Dict]] = None,
37 settings_snapshot: Optional[Dict] = None,
38 research_context: Optional[Dict] = None,
39 **kwargs,
40):
41 """
42 Create a search strategy by name.
44 Args:
45 strategy_name: Name of the strategy to create
46 model: Language model to use
47 search: Search engine instance
48 all_links_of_system: List of existing links
49 settings_snapshot: Settings snapshot
50 research_context: Research context for special strategies
51 **kwargs: Additional strategy-specific parameters
53 Returns:
54 Strategy instance
55 """
56 if all_links_of_system is None:
57 all_links_of_system = []
59 strategy_name_lower = strategy_name.lower()
61 # Source-based strategy
62 if strategy_name_lower in [
63 "source-based",
64 "source_based",
65 "source_based_search",
66 ]:
67 from .advanced_search_system.strategies.source_based_strategy import (
68 SourceBasedSearchStrategy,
69 )
71 return SourceBasedSearchStrategy(
72 model=model,
73 search=search,
74 include_text_content=kwargs.get("include_text_content", True),
75 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),
76 all_links_of_system=all_links_of_system,
77 use_atomic_facts=kwargs.get("use_atomic_facts", False),
78 settings_snapshot=settings_snapshot,
79 search_original_query=kwargs.get("search_original_query", True),
80 )
82 # Focused iteration strategy
83 if strategy_name_lower in ["focused-iteration", "focused_iteration"]:
84 from .advanced_search_system.strategies.focused_iteration_strategy import (
85 FocusedIterationStrategy,
86 )
88 # Read focused_iteration settings with kwargs override
89 # adaptive_questions is stored as 0/1 integer, convert to bool
90 enable_adaptive = bool(
91 kwargs.get(
92 "enable_adaptive_questions",
93 _get_setting(
94 settings_snapshot, "focused_iteration.adaptive_questions", 0
95 ),
96 )
97 )
98 knowledge_limit = kwargs.get(
99 "knowledge_summary_limit",
100 _get_setting(
101 settings_snapshot,
102 "focused_iteration.knowledge_summary_limit",
103 10,
104 ),
105 )
106 snippet_truncate = kwargs.get(
107 "knowledge_snippet_truncate",
108 _get_setting(
109 settings_snapshot, "focused_iteration.snippet_truncate", 200
110 ),
111 )
112 question_gen_type = kwargs.get(
113 "question_generator",
114 _get_setting(
115 settings_snapshot,
116 "focused_iteration.question_generator",
117 "browsecomp",
118 ),
119 )
120 prompt_knowledge_truncate = kwargs.get(
121 "prompt_knowledge_truncate",
122 _get_setting(
123 settings_snapshot,
124 "focused_iteration.prompt_knowledge_truncate",
125 1500,
126 ),
127 )
128 previous_searches_limit = kwargs.get(
129 "previous_searches_limit",
130 _get_setting(
131 settings_snapshot,
132 "focused_iteration.previous_searches_limit",
133 10,
134 ),
135 )
136 # Convert 0 to None for "unlimited"
137 if knowledge_limit == 0:
138 knowledge_limit = None
139 if snippet_truncate == 0:
140 snippet_truncate = None
141 if prompt_knowledge_truncate == 0:
142 prompt_knowledge_truncate = None
143 if previous_searches_limit == 0:
144 previous_searches_limit = None
146 strategy = FocusedIterationStrategy(
147 model=model,
148 search=search,
149 all_links_of_system=all_links_of_system,
150 max_iterations=kwargs.get("max_iterations", 8),
151 questions_per_iteration=kwargs.get("questions_per_iteration", 5),
152 settings_snapshot=settings_snapshot,
153 # Options read from settings (with kwargs override)
154 enable_adaptive_questions=enable_adaptive,
155 enable_early_termination=kwargs.get(
156 "enable_early_termination", False
157 ),
158 knowledge_summary_limit=knowledge_limit,
159 knowledge_snippet_truncate=snippet_truncate,
160 prompt_knowledge_truncate=prompt_knowledge_truncate,
161 previous_searches_limit=previous_searches_limit,
162 )
164 # Override question generator if flexible is selected
165 if question_gen_type == "flexible":
166 from .advanced_search_system.questions.flexible_browsecomp_question import (
167 FlexibleBrowseCompQuestionGenerator,
168 )
170 # Pass truncation settings to flexible generator
171 strategy.question_generator = FlexibleBrowseCompQuestionGenerator(
172 model,
173 knowledge_truncate_length=prompt_knowledge_truncate,
174 previous_searches_limit=previous_searches_limit,
175 )
177 return strategy
179 # Focused iteration strategy with standard citation handler
180 if strategy_name_lower in [
181 "focused-iteration-standard",
182 "focused_iteration_standard",
183 ]:
184 from .advanced_search_system.strategies.focused_iteration_strategy import (
185 FocusedIterationStrategy,
186 )
187 from .citation_handler import CitationHandler
189 # Use standard citation handler (same question generator as regular focused-iteration)
190 standard_citation_handler = CitationHandler(
191 model, handler_type="standard", settings_snapshot=settings_snapshot
192 )
194 # Read focused_iteration settings with kwargs override
195 # adaptive_questions is stored as 0/1 integer, convert to bool
196 enable_adaptive = bool(
197 kwargs.get(
198 "enable_adaptive_questions",
199 _get_setting(
200 settings_snapshot, "focused_iteration.adaptive_questions", 0
201 ),
202 )
203 )
204 knowledge_limit = kwargs.get(
205 "knowledge_summary_limit",
206 _get_setting(
207 settings_snapshot,
208 "focused_iteration.knowledge_summary_limit",
209 10,
210 ),
211 )
212 snippet_truncate = kwargs.get(
213 "knowledge_snippet_truncate",
214 _get_setting(
215 settings_snapshot, "focused_iteration.snippet_truncate", 200
216 ),
217 )
218 question_gen_type = kwargs.get(
219 "question_generator",
220 _get_setting(
221 settings_snapshot,
222 "focused_iteration.question_generator",
223 "browsecomp",
224 ),
225 )
226 prompt_knowledge_truncate = kwargs.get(
227 "prompt_knowledge_truncate",
228 _get_setting(
229 settings_snapshot,
230 "focused_iteration.prompt_knowledge_truncate",
231 1500,
232 ),
233 )
234 previous_searches_limit = kwargs.get(
235 "previous_searches_limit",
236 _get_setting(
237 settings_snapshot,
238 "focused_iteration.previous_searches_limit",
239 10,
240 ),
241 )
242 # Convert 0 to None for "unlimited"
243 if knowledge_limit == 0: 243 ↛ 244line 243 didn't jump to line 244 because the condition on line 243 was never true
244 knowledge_limit = None
245 if snippet_truncate == 0: 245 ↛ 246line 245 didn't jump to line 246 because the condition on line 245 was never true
246 snippet_truncate = None
247 if prompt_knowledge_truncate == 0: 247 ↛ 248line 247 didn't jump to line 248 because the condition on line 247 was never true
248 prompt_knowledge_truncate = None
249 if previous_searches_limit == 0: 249 ↛ 250line 249 didn't jump to line 250 because the condition on line 249 was never true
250 previous_searches_limit = None
252 strategy = FocusedIterationStrategy(
253 model=model,
254 search=search,
255 citation_handler=standard_citation_handler,
256 all_links_of_system=all_links_of_system,
257 max_iterations=kwargs.get("max_iterations", 8),
258 questions_per_iteration=kwargs.get("questions_per_iteration", 5),
259 use_browsecomp_optimization=True, # Keep BrowseComp features
260 settings_snapshot=settings_snapshot,
261 # Options read from settings (with kwargs override)
262 enable_adaptive_questions=enable_adaptive,
263 enable_early_termination=kwargs.get(
264 "enable_early_termination", False
265 ),
266 knowledge_summary_limit=knowledge_limit,
267 knowledge_snippet_truncate=snippet_truncate,
268 prompt_knowledge_truncate=prompt_knowledge_truncate,
269 previous_searches_limit=previous_searches_limit,
270 )
272 # Override question generator if flexible is selected
273 if question_gen_type == "flexible": 273 ↛ 274line 273 didn't jump to line 274 because the condition on line 273 was never true
274 from .advanced_search_system.questions.flexible_browsecomp_question import (
275 FlexibleBrowseCompQuestionGenerator,
276 )
278 # Pass truncation settings to flexible generator
279 strategy.question_generator = FlexibleBrowseCompQuestionGenerator(
280 model,
281 knowledge_truncate_length=prompt_knowledge_truncate,
282 previous_searches_limit=previous_searches_limit,
283 )
285 return strategy
287 # Iterative reasoning strategy (depth variant)
288 if strategy_name_lower in [
289 "iterative-reasoning",
290 "iterative_reasoning",
291 "iterative_reasoning_depth",
292 ]:
293 from .advanced_search_system.strategies.iterative_reasoning_strategy import (
294 IterativeReasoningStrategy,
295 )
297 return IterativeReasoningStrategy(
298 model=model,
299 search=search,
300 all_links_of_system=all_links_of_system,
301 )
303 # News aggregation strategy
304 if strategy_name_lower in [
305 "news",
306 "news_aggregation",
307 "news-aggregation",
308 ]:
309 from .advanced_search_system.strategies.news_strategy import (
310 NewsAggregationStrategy,
311 )
313 return NewsAggregationStrategy(
314 model=model,
315 search=search,
316 all_links_of_system=all_links_of_system,
317 )
319 # IterDRAG strategy
320 if strategy_name_lower == "iterdrag":
321 from .advanced_search_system.strategies.iterdrag_strategy import (
322 IterDRAGStrategy,
323 )
325 return IterDRAGStrategy(
326 model=model,
327 search=search,
328 all_links_of_system=all_links_of_system,
329 settings_snapshot=settings_snapshot,
330 )
332 # Parallel strategy
333 if strategy_name_lower == "parallel":
334 from .advanced_search_system.strategies.parallel_search_strategy import (
335 ParallelSearchStrategy,
336 )
338 return ParallelSearchStrategy(
339 model=model,
340 search=search,
341 include_text_content=kwargs.get("include_text_content", True),
342 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),
343 all_links_of_system=all_links_of_system,
344 settings_snapshot=settings_snapshot,
345 )
347 # Rapid strategy
348 if strategy_name_lower == "rapid":
349 from .advanced_search_system.strategies.rapid_search_strategy import (
350 RapidSearchStrategy,
351 )
353 return RapidSearchStrategy(
354 model=model,
355 search=search,
356 all_links_of_system=all_links_of_system,
357 settings_snapshot=settings_snapshot,
358 )
360 # Recursive decomposition strategy
361 if strategy_name_lower in ["recursive", "recursive-decomposition"]:
362 from .advanced_search_system.strategies.recursive_decomposition_strategy import (
363 RecursiveDecompositionStrategy,
364 )
366 return RecursiveDecompositionStrategy(
367 model=model,
368 search=search,
369 all_links_of_system=all_links_of_system,
370 settings_snapshot=settings_snapshot,
371 )
373 # Iterative reasoning strategy (different from iterative_reasoning_depth)
374 if strategy_name_lower == "iterative":
375 from .advanced_search_system.strategies.iterative_reasoning_strategy import (
376 IterativeReasoningStrategy,
377 )
379 # Get iteration settings from kwargs or use defaults
380 max_iterations = kwargs.get("max_iterations", 20)
381 questions_per_iteration = kwargs.get("questions_per_iteration", 3)
382 search_iterations_per_round = kwargs.get(
383 "search_iterations_per_round", 1
384 )
386 return IterativeReasoningStrategy(
387 model=model,
388 search=search,
389 all_links_of_system=all_links_of_system,
390 max_iterations=max_iterations,
391 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
392 search_iterations_per_round=search_iterations_per_round,
393 questions_per_search=questions_per_iteration,
394 settings_snapshot=settings_snapshot,
395 )
397 # Adaptive decomposition strategy
398 if strategy_name_lower == "adaptive":
399 from .advanced_search_system.strategies.adaptive_decomposition_strategy import (
400 AdaptiveDecompositionStrategy,
401 )
403 return AdaptiveDecompositionStrategy(
404 model=model,
405 search=search,
406 all_links_of_system=all_links_of_system,
407 max_steps=kwargs.get("max_steps", kwargs.get("max_iterations", 5)),
408 min_confidence=kwargs.get("min_confidence", 0.8),
409 source_search_iterations=kwargs.get("source_search_iterations", 2),
410 source_questions_per_iteration=kwargs.get(
411 "source_questions_per_iteration",
412 kwargs.get("questions_per_iteration", 3),
413 ),
414 settings_snapshot=settings_snapshot,
415 )
417 # Smart decomposition strategy
418 if strategy_name_lower == "smart":
419 from .advanced_search_system.strategies.smart_decomposition_strategy import (
420 SmartDecompositionStrategy,
421 )
423 return SmartDecompositionStrategy(
424 model=model,
425 search=search,
426 all_links_of_system=all_links_of_system,
427 max_iterations=kwargs.get("max_iterations", 5),
428 source_search_iterations=kwargs.get("source_search_iterations", 2),
429 source_questions_per_iteration=kwargs.get(
430 "source_questions_per_iteration",
431 kwargs.get("questions_per_iteration", 3),
432 ),
433 settings_snapshot=settings_snapshot,
434 )
436 # BrowseComp optimized strategy
437 if strategy_name_lower == "browsecomp":
438 from .advanced_search_system.strategies.browsecomp_optimized_strategy import (
439 BrowseCompOptimizedStrategy,
440 )
442 return BrowseCompOptimizedStrategy(
443 model=model,
444 search=search,
445 all_links_of_system=all_links_of_system,
446 max_browsecomp_iterations=kwargs.get(
447 "max_browsecomp_iterations", 15
448 ),
449 confidence_threshold=kwargs.get("confidence_threshold", 0.9),
450 max_iterations=kwargs.get("max_iterations", 5),
451 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
452 settings_snapshot=settings_snapshot,
453 )
455 # Enhanced evidence-based strategy
456 if strategy_name_lower == "evidence":
457 from .advanced_search_system.strategies.evidence_based_strategy_v2 import (
458 EnhancedEvidenceBasedStrategy,
459 )
461 return EnhancedEvidenceBasedStrategy(
462 model=model,
463 search=search,
464 all_links_of_system=all_links_of_system,
465 max_iterations=kwargs.get("max_iterations", 20),
466 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
467 candidate_limit=kwargs.get("candidate_limit", 20),
468 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
469 max_search_iterations=kwargs.get("max_search_iterations", 5),
470 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
471 min_candidates_threshold=kwargs.get("min_candidates_threshold", 10),
472 enable_pattern_learning=kwargs.get("enable_pattern_learning", True),
473 settings_snapshot=settings_snapshot,
474 )
476 # Constrained search strategy
477 if strategy_name_lower == "constrained":
478 from .advanced_search_system.strategies.constrained_search_strategy import (
479 ConstrainedSearchStrategy,
480 )
482 return ConstrainedSearchStrategy(
483 model=model,
484 search=search,
485 all_links_of_system=all_links_of_system,
486 max_iterations=kwargs.get("max_iterations", 20),
487 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
488 candidate_limit=kwargs.get("candidate_limit", 100),
489 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
490 max_search_iterations=kwargs.get("max_search_iterations", 5),
491 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
492 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
493 settings_snapshot=settings_snapshot,
494 )
496 # Parallel constrained strategy
497 if strategy_name_lower in [
498 "parallel-constrained",
499 "parallel_constrained",
500 ]:
501 from .advanced_search_system.strategies.parallel_constrained_strategy import (
502 ParallelConstrainedStrategy,
503 )
505 return ParallelConstrainedStrategy(
506 model=model,
507 search=search,
508 all_links_of_system=all_links_of_system,
509 max_iterations=kwargs.get("max_iterations", 20),
510 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
511 candidate_limit=kwargs.get("candidate_limit", 100),
512 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
513 max_search_iterations=kwargs.get("max_search_iterations", 5),
514 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
515 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
516 parallel_workers=kwargs.get("parallel_workers", 100),
517 settings_snapshot=settings_snapshot,
518 )
520 # Early stop constrained strategy
521 if strategy_name_lower in [
522 "early-stop-constrained",
523 "early_stop_constrained",
524 ]:
525 from .advanced_search_system.strategies.early_stop_constrained_strategy import (
526 EarlyStopConstrainedStrategy,
527 )
529 return EarlyStopConstrainedStrategy(
530 model=model,
531 search=search,
532 all_links_of_system=all_links_of_system,
533 max_iterations=kwargs.get("max_iterations", 20),
534 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
535 candidate_limit=kwargs.get("candidate_limit", 100),
536 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
537 max_search_iterations=kwargs.get("max_search_iterations", 5),
538 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
539 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
540 parallel_workers=kwargs.get("parallel_workers", 100),
541 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99),
542 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
543 settings_snapshot=settings_snapshot,
544 )
546 # Smart query strategy
547 if strategy_name_lower in ["smart-query", "smart_query"]:
548 from .advanced_search_system.strategies.smart_query_strategy import (
549 SmartQueryStrategy,
550 )
552 return SmartQueryStrategy(
553 model=model,
554 search=search,
555 all_links_of_system=all_links_of_system,
556 max_iterations=kwargs.get("max_iterations", 20),
557 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
558 candidate_limit=kwargs.get("candidate_limit", 100),
559 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
560 max_search_iterations=kwargs.get("max_search_iterations", 5),
561 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
562 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
563 parallel_workers=kwargs.get("parallel_workers", 100),
564 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99),
565 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
566 use_llm_query_generation=kwargs.get(
567 "use_llm_query_generation", True
568 ),
569 queries_per_combination=kwargs.get("queries_per_combination", 3),
570 settings_snapshot=settings_snapshot,
571 )
573 # Dual confidence strategy
574 if strategy_name_lower in ["dual-confidence", "dual_confidence"]:
575 from .advanced_search_system.strategies.dual_confidence_strategy import (
576 DualConfidenceStrategy,
577 )
579 return DualConfidenceStrategy(
580 model=model,
581 search=search,
582 all_links_of_system=all_links_of_system,
583 max_iterations=kwargs.get("max_iterations", 20),
584 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
585 candidate_limit=kwargs.get("candidate_limit", 100),
586 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
587 max_search_iterations=kwargs.get("max_search_iterations", 5),
588 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
589 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
590 parallel_workers=kwargs.get("parallel_workers", 100),
591 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
592 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
593 use_llm_query_generation=kwargs.get(
594 "use_llm_query_generation", True
595 ),
596 queries_per_combination=kwargs.get("queries_per_combination", 3),
597 use_entity_seeding=kwargs.get("use_entity_seeding", True),
598 use_direct_property_search=kwargs.get(
599 "use_direct_property_search", True
600 ),
601 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
602 negative_weight=kwargs.get("negative_weight", 0.5),
603 settings_snapshot=settings_snapshot,
604 )
606 # Dual confidence with rejection strategy
607 if strategy_name_lower in [
608 "dual-confidence-with-rejection",
609 "dual_confidence_with_rejection",
610 ]:
611 from .advanced_search_system.strategies.dual_confidence_with_rejection import (
612 DualConfidenceWithRejectionStrategy,
613 )
615 return DualConfidenceWithRejectionStrategy(
616 model=model,
617 search=search,
618 all_links_of_system=all_links_of_system,
619 max_iterations=kwargs.get("max_iterations", 20),
620 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
621 candidate_limit=kwargs.get("candidate_limit", 100),
622 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
623 max_search_iterations=kwargs.get("max_search_iterations", 5),
624 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
625 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
626 parallel_workers=kwargs.get("parallel_workers", 100),
627 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
628 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
629 use_llm_query_generation=kwargs.get(
630 "use_llm_query_generation", True
631 ),
632 queries_per_combination=kwargs.get("queries_per_combination", 3),
633 use_entity_seeding=kwargs.get("use_entity_seeding", True),
634 use_direct_property_search=kwargs.get(
635 "use_direct_property_search", True
636 ),
637 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
638 negative_weight=kwargs.get("negative_weight", 0.5),
639 rejection_threshold=kwargs.get("rejection_threshold", 0.3),
640 positive_threshold=kwargs.get("positive_threshold", 0.2),
641 critical_constraint_rejection=kwargs.get(
642 "critical_constraint_rejection", 0.2
643 ),
644 settings_snapshot=settings_snapshot,
645 )
647 # Concurrent dual confidence strategy
648 if strategy_name_lower in [
649 "concurrent-dual-confidence",
650 "concurrent_dual_confidence",
651 ]:
652 from .advanced_search_system.strategies.concurrent_dual_confidence_strategy import (
653 ConcurrentDualConfidenceStrategy,
654 )
656 return ConcurrentDualConfidenceStrategy(
657 model=model,
658 search=search,
659 all_links_of_system=all_links_of_system,
660 max_iterations=kwargs.get("max_iterations", 20),
661 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
662 candidate_limit=kwargs.get("candidate_limit", 100),
663 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
664 max_search_iterations=kwargs.get("max_search_iterations", 5),
665 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
666 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
667 parallel_workers=kwargs.get("parallel_workers", 10),
668 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
669 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
670 use_llm_query_generation=kwargs.get(
671 "use_llm_query_generation", True
672 ),
673 queries_per_combination=kwargs.get("queries_per_combination", 3),
674 use_entity_seeding=kwargs.get("use_entity_seeding", True),
675 use_direct_property_search=kwargs.get(
676 "use_direct_property_search", True
677 ),
678 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
679 negative_weight=kwargs.get("negative_weight", 0.5),
680 rejection_threshold=kwargs.get("rejection_threshold", 0.3),
681 positive_threshold=kwargs.get("positive_threshold", 0.2),
682 critical_constraint_rejection=kwargs.get(
683 "critical_constraint_rejection", 0.2
684 ),
685 min_good_candidates=kwargs.get("min_good_candidates", 3),
686 target_candidates=kwargs.get("target_candidates", 5),
687 max_candidates=kwargs.get("max_candidates", 10),
688 min_score_threshold=kwargs.get("min_score_threshold", 0.65),
689 exceptional_score=kwargs.get("exceptional_score", 0.95),
690 quality_plateau_threshold=kwargs.get(
691 "quality_plateau_threshold", 0.1
692 ),
693 max_search_time=kwargs.get("max_search_time", 30.0),
694 max_evaluations=kwargs.get("max_evaluations", 30),
695 settings_snapshot=settings_snapshot,
696 )
698 # Constraint parallel strategy
699 if strategy_name_lower in [
700 "constraint-parallel",
701 "constraint_parallel",
702 ]:
703 from .advanced_search_system.strategies.constraint_parallel_strategy import (
704 ConstraintParallelStrategy,
705 )
707 return ConstraintParallelStrategy(
708 model=model,
709 search=search,
710 all_links_of_system=all_links_of_system,
711 max_iterations=kwargs.get("max_iterations", 20),
712 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
713 candidate_limit=kwargs.get("candidate_limit", 100),
714 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
715 max_search_iterations=kwargs.get("max_search_iterations", 5),
716 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
717 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
718 parallel_workers=kwargs.get("parallel_workers", 100),
719 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
720 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
721 use_llm_query_generation=kwargs.get(
722 "use_llm_query_generation", True
723 ),
724 queries_per_combination=kwargs.get("queries_per_combination", 3),
725 use_entity_seeding=kwargs.get("use_entity_seeding", True),
726 use_direct_property_search=kwargs.get(
727 "use_direct_property_search", True
728 ),
729 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
730 negative_weight=kwargs.get("negative_weight", 0.5),
731 rejection_threshold=kwargs.get("rejection_threshold", 0.3),
732 positive_threshold=kwargs.get("positive_threshold", 0.2),
733 critical_constraint_rejection=kwargs.get(
734 "critical_constraint_rejection", 0.2
735 ),
736 settings_snapshot=settings_snapshot,
737 )
739 # Modular strategy
740 if strategy_name_lower in ["modular", "modular-strategy"]:
741 from .advanced_search_system.strategies.modular_strategy import (
742 ModularStrategy,
743 )
745 return ModularStrategy(
746 model=model,
747 search=search,
748 all_links_of_system=all_links_of_system,
749 constraint_checker_type=kwargs.get(
750 "constraint_checker_type", "dual_confidence"
751 ),
752 exploration_strategy=kwargs.get("exploration_strategy", "adaptive"),
753 early_rejection=kwargs.get("early_rejection", True),
754 early_stopping=kwargs.get("early_stopping", True),
755 llm_constraint_processing=kwargs.get(
756 "llm_constraint_processing", True
757 ),
758 immediate_evaluation=kwargs.get("immediate_evaluation", True),
759 settings_snapshot=settings_snapshot,
760 )
762 # Modular parallel strategy
763 if strategy_name_lower in ["modular-parallel", "modular_parallel"]:
764 from .advanced_search_system.strategies.modular_strategy import (
765 ModularStrategy,
766 )
768 return ModularStrategy(
769 model=model,
770 search=search,
771 all_links_of_system=all_links_of_system,
772 constraint_checker_type="dual_confidence",
773 exploration_strategy="parallel",
774 settings_snapshot=settings_snapshot,
775 )
777 # BrowseComp entity strategy
778 if strategy_name_lower in ["browsecomp-entity", "browsecomp_entity"]:
779 from .advanced_search_system.strategies.browsecomp_entity_strategy import (
780 BrowseCompEntityStrategy,
781 )
783 return BrowseCompEntityStrategy(
784 model=model,
785 search=search,
786 all_links_of_system=all_links_of_system,
787 )
789 # Topic organization strategy
790 if strategy_name_lower in [
791 "topic-organization",
792 "topic_organization",
793 "topic",
794 ]:
795 from .advanced_search_system.strategies.topic_organization_strategy import (
796 TopicOrganizationStrategy,
797 )
799 return TopicOrganizationStrategy(
800 model=model,
801 search=search,
802 all_links_of_system=all_links_of_system,
803 settings_snapshot=settings_snapshot,
804 min_sources_per_topic=1, # Allow single-source topics
805 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),
806 filter_reorder=kwargs.get("filter_reorder", True),
807 filter_reindex=kwargs.get("filter_reindex", True),
808 cross_engine_max_results=kwargs.get( # type: ignore[arg-type]
809 "cross_engine_max_results", None
810 ),
811 search_original_query=kwargs.get("search_original_query", True),
812 max_topics=kwargs.get("max_topics", 5),
813 similarity_threshold=kwargs.get("similarity_threshold", 0.3),
814 use_focused_iteration=kwargs.get("use_focused_iteration", False),
815 enable_refinement=kwargs.get(
816 "enable_refinement", False
817 ), # Disable refinement iterations for now
818 max_refinement_iterations=kwargs.get(
819 "max_refinement_iterations",
820 1, # Set to 1 iteration for faster results
821 ),
822 generate_text=kwargs.get("generate_text", True),
823 )
825 # Iterative refinement strategy
826 if strategy_name_lower in [
827 "iterative-refinement",
828 "iterative_refinement",
829 ]:
830 from .advanced_search_system.strategies.iterative_refinement_strategy import (
831 IterativeRefinementStrategy,
832 )
834 # Get the initial strategy to use (default to source-based)
835 initial_strategy_name = kwargs.get("initial_strategy", "source-based")
837 # Create the initial strategy
838 initial_strategy = create_strategy(
839 strategy_name=initial_strategy_name,
840 model=model,
841 search=search,
842 all_links_of_system=[], # Fresh list for initial strategy
843 settings_snapshot=settings_snapshot,
844 search_original_query=kwargs.get("search_original_query", True),
845 )
847 return IterativeRefinementStrategy(
848 model=model,
849 search=search,
850 initial_strategy=initial_strategy,
851 all_links_of_system=all_links_of_system,
852 settings_snapshot=settings_snapshot,
853 evaluation_frequency=kwargs.get("evaluation_frequency", 1),
854 max_refinements=kwargs.get("max_refinements", 3),
855 confidence_threshold=kwargs.get(
856 "confidence_threshold", 0.95
857 ), # Increased from 0.8
858 )
860 # Standard strategy
861 if strategy_name_lower == "standard":
862 from .advanced_search_system.strategies.standard_strategy import (
863 StandardSearchStrategy,
864 )
866 return StandardSearchStrategy(
867 model=model,
868 search=search,
869 all_links_of_system=all_links_of_system,
870 settings_snapshot=settings_snapshot,
871 )
873 # MCP strategy (ReAct pattern - agentic research)
874 if strategy_name_lower in ["mcp", "agentic"]:
875 from .advanced_search_system.strategies.mcp_strategy import (
876 MCPSearchStrategy,
877 )
879 # Get MCP server configurations from settings
880 mcp_servers = kwargs.get(
881 "mcp_servers",
882 _get_setting(settings_snapshot, "mcp.servers", []),
883 )
885 return MCPSearchStrategy(
886 model=model,
887 search=search,
888 mcp_servers=mcp_servers,
889 max_iterations=kwargs.get("max_iterations", 10),
890 include_web_search=kwargs.get("include_web_search", True),
891 all_links_of_system=all_links_of_system,
892 settings_snapshot=settings_snapshot,
893 )
895 # LangGraph agent strategy (parallel subagent research)
896 if strategy_name_lower in ["langgraph-agent", "langgraph_agent"]:
897 from .advanced_search_system.strategies.langgraph_agent_strategy import (
898 LangGraphAgentStrategy,
899 )
901 return LangGraphAgentStrategy(
902 model=model,
903 search=search,
904 max_iterations=kwargs.get(
905 "max_iterations",
906 _get_setting(
907 settings_snapshot, "langgraph_agent.max_iterations", 50
908 ),
909 ),
910 max_sub_iterations=kwargs.get(
911 "max_sub_iterations",
912 _get_setting(
913 settings_snapshot, "langgraph_agent.max_sub_iterations", 8
914 ),
915 ),
916 include_sub_research=kwargs.get(
917 "include_sub_research",
918 _get_setting(
919 settings_snapshot,
920 "langgraph_agent.include_sub_research",
921 True,
922 ),
923 ),
924 all_links_of_system=all_links_of_system,
925 settings_snapshot=settings_snapshot,
926 )
928 # Default to source-based if unknown
929 logger.warning(
930 f"Unknown strategy: {strategy_name}, defaulting to source-based"
931 )
932 from .advanced_search_system.strategies.source_based_strategy import (
933 SourceBasedSearchStrategy,
934 )
936 return SourceBasedSearchStrategy(
937 model=model,
938 search=search,
939 include_text_content=True,
940 use_cross_engine_filter=True,
941 all_links_of_system=all_links_of_system,
942 use_atomic_facts=False,
943 settings_snapshot=settings_snapshot,
944 search_original_query=kwargs.get("search_original_query", True),
945 )