Coverage for src / local_deep_research / search_system_factory.py: 92%
144 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
1"""
2Factory for creating search strategies.
3This module provides a centralized way to create search strategies
4to avoid code duplication.
5"""
7from loguru import logger
8from typing import Optional, Dict, Any, List
9from langchain_core.language_models import BaseChatModel
12def _get_setting(
13 settings_snapshot: Optional[Dict], key: str, default: Any
14) -> Any:
15 """Get a setting value from the snapshot, handling nested dict structure."""
16 if not settings_snapshot or key not in settings_snapshot:
17 return default
18 value = settings_snapshot[key]
19 # Extract value from dict structure if needed
20 if isinstance(value, dict) and "value" in value:
21 return value["value"]
22 return value
25def create_strategy(
26 strategy_name: str,
27 model: BaseChatModel,
28 search: Any,
29 all_links_of_system: Optional[List[Dict]] = None,
30 settings_snapshot: Optional[Dict] = None,
31 research_context: Optional[Dict] = None,
32 **kwargs,
33):
34 """
35 Create a search strategy by name.
37 Args:
38 strategy_name: Name of the strategy to create
39 model: Language model to use
40 search: Search engine instance
41 all_links_of_system: List of existing links
42 settings_snapshot: Settings snapshot
43 research_context: Research context for special strategies
44 **kwargs: Additional strategy-specific parameters
46 Returns:
47 Strategy instance
48 """
49 if all_links_of_system is None:
50 all_links_of_system = []
52 strategy_name_lower = strategy_name.lower()
54 # Source-based strategy
55 if strategy_name_lower in [
56 "source-based",
57 "source_based",
58 "source_based_search",
59 ]:
60 from .advanced_search_system.strategies.source_based_strategy import (
61 SourceBasedSearchStrategy,
62 )
64 return SourceBasedSearchStrategy(
65 model=model,
66 search=search,
67 include_text_content=kwargs.get("include_text_content", True),
68 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),
69 all_links_of_system=all_links_of_system,
70 use_atomic_facts=kwargs.get("use_atomic_facts", False),
71 settings_snapshot=settings_snapshot,
72 search_original_query=kwargs.get("search_original_query", True),
73 )
75 # Focused iteration strategy
76 elif strategy_name_lower in ["focused-iteration", "focused_iteration"]:
77 from .advanced_search_system.strategies.focused_iteration_strategy import (
78 FocusedIterationStrategy,
79 )
81 # Read focused_iteration settings with kwargs override
82 # adaptive_questions is stored as 0/1 integer, convert to bool
83 enable_adaptive = bool(
84 kwargs.get(
85 "enable_adaptive_questions",
86 _get_setting(
87 settings_snapshot, "focused_iteration.adaptive_questions", 0
88 ),
89 )
90 )
91 knowledge_limit = kwargs.get(
92 "knowledge_summary_limit",
93 _get_setting(
94 settings_snapshot,
95 "focused_iteration.knowledge_summary_limit",
96 10,
97 ),
98 )
99 snippet_truncate = kwargs.get(
100 "knowledge_snippet_truncate",
101 _get_setting(
102 settings_snapshot, "focused_iteration.snippet_truncate", 200
103 ),
104 )
105 question_gen_type = kwargs.get(
106 "question_generator",
107 _get_setting(
108 settings_snapshot,
109 "focused_iteration.question_generator",
110 "browsecomp",
111 ),
112 )
113 prompt_knowledge_truncate = kwargs.get(
114 "prompt_knowledge_truncate",
115 _get_setting(
116 settings_snapshot,
117 "focused_iteration.prompt_knowledge_truncate",
118 1500,
119 ),
120 )
121 previous_searches_limit = kwargs.get(
122 "previous_searches_limit",
123 _get_setting(
124 settings_snapshot,
125 "focused_iteration.previous_searches_limit",
126 10,
127 ),
128 )
129 # Convert 0 to None for "unlimited"
130 if knowledge_limit == 0:
131 knowledge_limit = None
132 if snippet_truncate == 0:
133 snippet_truncate = None
134 if prompt_knowledge_truncate == 0: 134 ↛ 135line 134 didn't jump to line 135 because the condition on line 134 was never true
135 prompt_knowledge_truncate = None
136 if previous_searches_limit == 0: 136 ↛ 137line 136 didn't jump to line 137 because the condition on line 136 was never true
137 previous_searches_limit = None
139 strategy = FocusedIterationStrategy(
140 model=model,
141 search=search,
142 all_links_of_system=all_links_of_system,
143 max_iterations=kwargs.get("max_iterations", 8),
144 questions_per_iteration=kwargs.get("questions_per_iteration", 5),
145 settings_snapshot=settings_snapshot,
146 # Options read from settings (with kwargs override)
147 enable_adaptive_questions=enable_adaptive,
148 enable_early_termination=kwargs.get(
149 "enable_early_termination", False
150 ),
151 knowledge_summary_limit=knowledge_limit,
152 knowledge_snippet_truncate=snippet_truncate,
153 prompt_knowledge_truncate=prompt_knowledge_truncate,
154 previous_searches_limit=previous_searches_limit,
155 )
157 # Override question generator if flexible is selected
158 if question_gen_type == "flexible": 158 ↛ 159line 158 didn't jump to line 159 because the condition on line 158 was never true
159 from .advanced_search_system.questions.flexible_browsecomp_question import (
160 FlexibleBrowseCompQuestionGenerator,
161 )
163 # Pass truncation settings to flexible generator
164 strategy.question_generator = FlexibleBrowseCompQuestionGenerator(
165 model,
166 knowledge_truncate_length=prompt_knowledge_truncate,
167 previous_searches_limit=previous_searches_limit,
168 )
170 return strategy
172 # Focused iteration strategy with standard citation handler
173 elif strategy_name_lower in [
174 "focused-iteration-standard",
175 "focused_iteration_standard",
176 ]:
177 from .advanced_search_system.strategies.focused_iteration_strategy import (
178 FocusedIterationStrategy,
179 )
180 from .citation_handler import CitationHandler
182 # Use standard citation handler (same question generator as regular focused-iteration)
183 standard_citation_handler = CitationHandler(
184 model, handler_type="standard", settings_snapshot=settings_snapshot
185 )
187 # Read focused_iteration settings with kwargs override
188 # adaptive_questions is stored as 0/1 integer, convert to bool
189 enable_adaptive = bool(
190 kwargs.get(
191 "enable_adaptive_questions",
192 _get_setting(
193 settings_snapshot, "focused_iteration.adaptive_questions", 0
194 ),
195 )
196 )
197 knowledge_limit = kwargs.get(
198 "knowledge_summary_limit",
199 _get_setting(
200 settings_snapshot,
201 "focused_iteration.knowledge_summary_limit",
202 10,
203 ),
204 )
205 snippet_truncate = kwargs.get(
206 "knowledge_snippet_truncate",
207 _get_setting(
208 settings_snapshot, "focused_iteration.snippet_truncate", 200
209 ),
210 )
211 question_gen_type = kwargs.get(
212 "question_generator",
213 _get_setting(
214 settings_snapshot,
215 "focused_iteration.question_generator",
216 "browsecomp",
217 ),
218 )
219 prompt_knowledge_truncate = kwargs.get(
220 "prompt_knowledge_truncate",
221 _get_setting(
222 settings_snapshot,
223 "focused_iteration.prompt_knowledge_truncate",
224 1500,
225 ),
226 )
227 previous_searches_limit = kwargs.get(
228 "previous_searches_limit",
229 _get_setting(
230 settings_snapshot,
231 "focused_iteration.previous_searches_limit",
232 10,
233 ),
234 )
235 # Convert 0 to None for "unlimited"
236 if knowledge_limit == 0: 236 ↛ 237line 236 didn't jump to line 237 because the condition on line 236 was never true
237 knowledge_limit = None
238 if snippet_truncate == 0: 238 ↛ 239line 238 didn't jump to line 239 because the condition on line 238 was never true
239 snippet_truncate = None
240 if prompt_knowledge_truncate == 0: 240 ↛ 241line 240 didn't jump to line 241 because the condition on line 240 was never true
241 prompt_knowledge_truncate = None
242 if previous_searches_limit == 0: 242 ↛ 243line 242 didn't jump to line 243 because the condition on line 242 was never true
243 previous_searches_limit = None
245 strategy = FocusedIterationStrategy(
246 model=model,
247 search=search,
248 citation_handler=standard_citation_handler,
249 all_links_of_system=all_links_of_system,
250 max_iterations=kwargs.get("max_iterations", 8),
251 questions_per_iteration=kwargs.get("questions_per_iteration", 5),
252 use_browsecomp_optimization=True, # Keep BrowseComp features
253 settings_snapshot=settings_snapshot,
254 # Options read from settings (with kwargs override)
255 enable_adaptive_questions=enable_adaptive,
256 enable_early_termination=kwargs.get(
257 "enable_early_termination", False
258 ),
259 knowledge_summary_limit=knowledge_limit,
260 knowledge_snippet_truncate=snippet_truncate,
261 prompt_knowledge_truncate=prompt_knowledge_truncate,
262 previous_searches_limit=previous_searches_limit,
263 )
265 # Override question generator if flexible is selected
266 if question_gen_type == "flexible": 266 ↛ 267line 266 didn't jump to line 267 because the condition on line 266 was never true
267 from .advanced_search_system.questions.flexible_browsecomp_question import (
268 FlexibleBrowseCompQuestionGenerator,
269 )
271 # Pass truncation settings to flexible generator
272 strategy.question_generator = FlexibleBrowseCompQuestionGenerator(
273 model,
274 knowledge_truncate_length=prompt_knowledge_truncate,
275 previous_searches_limit=previous_searches_limit,
276 )
278 return strategy
280 # Iterative reasoning strategy (depth variant)
281 elif strategy_name_lower in [
282 "iterative-reasoning",
283 "iterative_reasoning",
284 "iterative_reasoning_depth",
285 ]:
286 from .advanced_search_system.strategies.iterative_reasoning_strategy import (
287 IterativeReasoningStrategy,
288 )
290 return IterativeReasoningStrategy(
291 model=model,
292 search=search,
293 all_links_of_system=all_links_of_system,
294 )
296 # News aggregation strategy
297 elif strategy_name_lower in [
298 "news",
299 "news_aggregation",
300 "news-aggregation",
301 ]:
302 from .advanced_search_system.strategies.news_strategy import (
303 NewsAggregationStrategy,
304 )
306 return NewsAggregationStrategy(
307 model=model,
308 search=search,
309 all_links_of_system=all_links_of_system,
310 )
312 # IterDRAG strategy
313 elif strategy_name_lower == "iterdrag":
314 from .advanced_search_system.strategies.iterdrag_strategy import (
315 IterDRAGStrategy,
316 )
318 return IterDRAGStrategy(
319 model=model,
320 search=search,
321 all_links_of_system=all_links_of_system,
322 settings_snapshot=settings_snapshot,
323 )
325 # Parallel strategy
326 elif strategy_name_lower == "parallel":
327 from .advanced_search_system.strategies.parallel_search_strategy import (
328 ParallelSearchStrategy,
329 )
331 return ParallelSearchStrategy(
332 model=model,
333 search=search,
334 include_text_content=kwargs.get("include_text_content", True),
335 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),
336 all_links_of_system=all_links_of_system,
337 settings_snapshot=settings_snapshot,
338 )
340 # Rapid strategy
341 elif strategy_name_lower == "rapid":
342 from .advanced_search_system.strategies.rapid_search_strategy import (
343 RapidSearchStrategy,
344 )
346 return RapidSearchStrategy(
347 model=model,
348 search=search,
349 all_links_of_system=all_links_of_system,
350 settings_snapshot=settings_snapshot,
351 )
353 # Recursive decomposition strategy
354 elif strategy_name_lower in ["recursive", "recursive-decomposition"]:
355 from .advanced_search_system.strategies.recursive_decomposition_strategy import (
356 RecursiveDecompositionStrategy,
357 )
359 return RecursiveDecompositionStrategy(
360 model=model,
361 search=search,
362 all_links_of_system=all_links_of_system,
363 settings_snapshot=settings_snapshot,
364 )
366 # Iterative reasoning strategy (different from iterative_reasoning_depth)
367 elif strategy_name_lower == "iterative":
368 from .advanced_search_system.strategies.iterative_reasoning_strategy import (
369 IterativeReasoningStrategy,
370 )
372 # Get iteration settings from kwargs or use defaults
373 max_iterations = kwargs.get("max_iterations", 20)
374 questions_per_iteration = kwargs.get("questions_per_iteration", 3)
375 search_iterations_per_round = kwargs.get(
376 "search_iterations_per_round", 1
377 )
379 return IterativeReasoningStrategy(
380 model=model,
381 search=search,
382 all_links_of_system=all_links_of_system,
383 max_iterations=max_iterations,
384 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
385 search_iterations_per_round=search_iterations_per_round,
386 questions_per_search=questions_per_iteration,
387 settings_snapshot=settings_snapshot,
388 )
390 # Adaptive decomposition strategy
391 elif strategy_name_lower == "adaptive":
392 from .advanced_search_system.strategies.adaptive_decomposition_strategy import (
393 AdaptiveDecompositionStrategy,
394 )
396 return AdaptiveDecompositionStrategy(
397 model=model,
398 search=search,
399 all_links_of_system=all_links_of_system,
400 max_steps=kwargs.get("max_steps", kwargs.get("max_iterations", 5)),
401 min_confidence=kwargs.get("min_confidence", 0.8),
402 source_search_iterations=kwargs.get("source_search_iterations", 2),
403 source_questions_per_iteration=kwargs.get(
404 "source_questions_per_iteration",
405 kwargs.get("questions_per_iteration", 3),
406 ),
407 settings_snapshot=settings_snapshot,
408 )
410 # Smart decomposition strategy
411 elif strategy_name_lower == "smart":
412 from .advanced_search_system.strategies.smart_decomposition_strategy import (
413 SmartDecompositionStrategy,
414 )
416 return SmartDecompositionStrategy(
417 model=model,
418 search=search,
419 all_links_of_system=all_links_of_system,
420 max_iterations=kwargs.get("max_iterations", 5),
421 source_search_iterations=kwargs.get("source_search_iterations", 2),
422 source_questions_per_iteration=kwargs.get(
423 "source_questions_per_iteration",
424 kwargs.get("questions_per_iteration", 3),
425 ),
426 settings_snapshot=settings_snapshot,
427 )
429 # BrowseComp optimized strategy
430 elif strategy_name_lower == "browsecomp":
431 from .advanced_search_system.strategies.browsecomp_optimized_strategy import (
432 BrowseCompOptimizedStrategy,
433 )
435 return BrowseCompOptimizedStrategy(
436 model=model,
437 search=search,
438 all_links_of_system=all_links_of_system,
439 max_browsecomp_iterations=kwargs.get(
440 "max_browsecomp_iterations", 15
441 ),
442 confidence_threshold=kwargs.get("confidence_threshold", 0.9),
443 max_iterations=kwargs.get("max_iterations", 5),
444 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
445 settings_snapshot=settings_snapshot,
446 )
448 # Enhanced evidence-based strategy
449 elif strategy_name_lower == "evidence":
450 from .advanced_search_system.strategies.evidence_based_strategy_v2 import (
451 EnhancedEvidenceBasedStrategy,
452 )
454 return EnhancedEvidenceBasedStrategy(
455 model=model,
456 search=search,
457 all_links_of_system=all_links_of_system,
458 max_iterations=kwargs.get("max_iterations", 20),
459 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
460 candidate_limit=kwargs.get("candidate_limit", 20),
461 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
462 max_search_iterations=kwargs.get("max_search_iterations", 5),
463 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
464 min_candidates_threshold=kwargs.get("min_candidates_threshold", 10),
465 enable_pattern_learning=kwargs.get("enable_pattern_learning", True),
466 settings_snapshot=settings_snapshot,
467 )
469 # Constrained search strategy
470 elif strategy_name_lower == "constrained":
471 from .advanced_search_system.strategies.constrained_search_strategy import (
472 ConstrainedSearchStrategy,
473 )
475 return ConstrainedSearchStrategy(
476 model=model,
477 search=search,
478 all_links_of_system=all_links_of_system,
479 max_iterations=kwargs.get("max_iterations", 20),
480 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
481 candidate_limit=kwargs.get("candidate_limit", 100),
482 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
483 max_search_iterations=kwargs.get("max_search_iterations", 5),
484 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
485 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
486 settings_snapshot=settings_snapshot,
487 )
489 # Parallel constrained strategy
490 elif strategy_name_lower in [
491 "parallel-constrained",
492 "parallel_constrained",
493 ]:
494 from .advanced_search_system.strategies.parallel_constrained_strategy import (
495 ParallelConstrainedStrategy,
496 )
498 return ParallelConstrainedStrategy(
499 model=model,
500 search=search,
501 all_links_of_system=all_links_of_system,
502 max_iterations=kwargs.get("max_iterations", 20),
503 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
504 candidate_limit=kwargs.get("candidate_limit", 100),
505 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
506 max_search_iterations=kwargs.get("max_search_iterations", 5),
507 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
508 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
509 parallel_workers=kwargs.get("parallel_workers", 100),
510 settings_snapshot=settings_snapshot,
511 )
513 # Early stop constrained strategy
514 elif strategy_name_lower in [
515 "early-stop-constrained",
516 "early_stop_constrained",
517 ]:
518 from .advanced_search_system.strategies.early_stop_constrained_strategy import (
519 EarlyStopConstrainedStrategy,
520 )
522 return EarlyStopConstrainedStrategy(
523 model=model,
524 search=search,
525 all_links_of_system=all_links_of_system,
526 max_iterations=kwargs.get("max_iterations", 20),
527 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
528 candidate_limit=kwargs.get("candidate_limit", 100),
529 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
530 max_search_iterations=kwargs.get("max_search_iterations", 5),
531 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
532 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
533 parallel_workers=kwargs.get("parallel_workers", 100),
534 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99),
535 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
536 settings_snapshot=settings_snapshot,
537 )
539 # Smart query strategy
540 elif strategy_name_lower in ["smart-query", "smart_query"]:
541 from .advanced_search_system.strategies.smart_query_strategy import (
542 SmartQueryStrategy,
543 )
545 return SmartQueryStrategy(
546 model=model,
547 search=search,
548 all_links_of_system=all_links_of_system,
549 max_iterations=kwargs.get("max_iterations", 20),
550 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
551 candidate_limit=kwargs.get("candidate_limit", 100),
552 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
553 max_search_iterations=kwargs.get("max_search_iterations", 5),
554 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
555 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
556 parallel_workers=kwargs.get("parallel_workers", 100),
557 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99),
558 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
559 use_llm_query_generation=kwargs.get(
560 "use_llm_query_generation", True
561 ),
562 queries_per_combination=kwargs.get("queries_per_combination", 3),
563 settings_snapshot=settings_snapshot,
564 )
566 # Dual confidence strategy
567 elif strategy_name_lower in ["dual-confidence", "dual_confidence"]:
568 from .advanced_search_system.strategies.dual_confidence_strategy import (
569 DualConfidenceStrategy,
570 )
572 return DualConfidenceStrategy(
573 model=model,
574 search=search,
575 all_links_of_system=all_links_of_system,
576 max_iterations=kwargs.get("max_iterations", 20),
577 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
578 candidate_limit=kwargs.get("candidate_limit", 100),
579 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
580 max_search_iterations=kwargs.get("max_search_iterations", 5),
581 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
582 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
583 parallel_workers=kwargs.get("parallel_workers", 100),
584 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
585 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
586 use_llm_query_generation=kwargs.get(
587 "use_llm_query_generation", True
588 ),
589 queries_per_combination=kwargs.get("queries_per_combination", 3),
590 use_entity_seeding=kwargs.get("use_entity_seeding", True),
591 use_direct_property_search=kwargs.get(
592 "use_direct_property_search", True
593 ),
594 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
595 negative_weight=kwargs.get("negative_weight", 0.5),
596 settings_snapshot=settings_snapshot,
597 )
599 # Dual confidence with rejection strategy
600 elif strategy_name_lower in [
601 "dual-confidence-with-rejection",
602 "dual_confidence_with_rejection",
603 ]:
604 from .advanced_search_system.strategies.dual_confidence_with_rejection import (
605 DualConfidenceWithRejectionStrategy,
606 )
608 return DualConfidenceWithRejectionStrategy(
609 model=model,
610 search=search,
611 all_links_of_system=all_links_of_system,
612 max_iterations=kwargs.get("max_iterations", 20),
613 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
614 candidate_limit=kwargs.get("candidate_limit", 100),
615 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
616 max_search_iterations=kwargs.get("max_search_iterations", 5),
617 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
618 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
619 parallel_workers=kwargs.get("parallel_workers", 100),
620 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
621 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
622 use_llm_query_generation=kwargs.get(
623 "use_llm_query_generation", True
624 ),
625 queries_per_combination=kwargs.get("queries_per_combination", 3),
626 use_entity_seeding=kwargs.get("use_entity_seeding", True),
627 use_direct_property_search=kwargs.get(
628 "use_direct_property_search", True
629 ),
630 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
631 negative_weight=kwargs.get("negative_weight", 0.5),
632 rejection_threshold=kwargs.get("rejection_threshold", 0.3),
633 positive_threshold=kwargs.get("positive_threshold", 0.2),
634 critical_constraint_rejection=kwargs.get(
635 "critical_constraint_rejection", 0.2
636 ),
637 settings_snapshot=settings_snapshot,
638 )
640 # Concurrent dual confidence strategy
641 elif strategy_name_lower in [
642 "concurrent-dual-confidence",
643 "concurrent_dual_confidence",
644 ]:
645 from .advanced_search_system.strategies.concurrent_dual_confidence_strategy import (
646 ConcurrentDualConfidenceStrategy,
647 )
649 return ConcurrentDualConfidenceStrategy(
650 model=model,
651 search=search,
652 all_links_of_system=all_links_of_system,
653 max_iterations=kwargs.get("max_iterations", 20),
654 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
655 candidate_limit=kwargs.get("candidate_limit", 100),
656 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
657 max_search_iterations=kwargs.get("max_search_iterations", 5),
658 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
659 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
660 parallel_workers=kwargs.get("parallel_workers", 10),
661 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
662 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
663 use_llm_query_generation=kwargs.get(
664 "use_llm_query_generation", True
665 ),
666 queries_per_combination=kwargs.get("queries_per_combination", 3),
667 use_entity_seeding=kwargs.get("use_entity_seeding", True),
668 use_direct_property_search=kwargs.get(
669 "use_direct_property_search", True
670 ),
671 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
672 negative_weight=kwargs.get("negative_weight", 0.5),
673 rejection_threshold=kwargs.get("rejection_threshold", 0.3),
674 positive_threshold=kwargs.get("positive_threshold", 0.2),
675 critical_constraint_rejection=kwargs.get(
676 "critical_constraint_rejection", 0.2
677 ),
678 min_good_candidates=kwargs.get("min_good_candidates", 3),
679 target_candidates=kwargs.get("target_candidates", 5),
680 max_candidates=kwargs.get("max_candidates", 10),
681 min_score_threshold=kwargs.get("min_score_threshold", 0.65),
682 exceptional_score=kwargs.get("exceptional_score", 0.95),
683 quality_plateau_threshold=kwargs.get(
684 "quality_plateau_threshold", 0.1
685 ),
686 max_search_time=kwargs.get("max_search_time", 30.0),
687 max_evaluations=kwargs.get("max_evaluations", 30),
688 settings_snapshot=settings_snapshot,
689 )
691 # Constraint parallel strategy
692 elif strategy_name_lower in [
693 "constraint-parallel",
694 "constraint_parallel",
695 ]:
696 from .advanced_search_system.strategies.constraint_parallel_strategy import (
697 ConstraintParallelStrategy,
698 )
700 return ConstraintParallelStrategy(
701 model=model,
702 search=search,
703 all_links_of_system=all_links_of_system,
704 max_iterations=kwargs.get("max_iterations", 20),
705 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
706 candidate_limit=kwargs.get("candidate_limit", 100),
707 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
708 max_search_iterations=kwargs.get("max_search_iterations", 5),
709 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
710 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
711 parallel_workers=kwargs.get("parallel_workers", 100),
712 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
713 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
714 use_llm_query_generation=kwargs.get(
715 "use_llm_query_generation", True
716 ),
717 queries_per_combination=kwargs.get("queries_per_combination", 3),
718 use_entity_seeding=kwargs.get("use_entity_seeding", True),
719 use_direct_property_search=kwargs.get(
720 "use_direct_property_search", True
721 ),
722 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
723 negative_weight=kwargs.get("negative_weight", 0.5),
724 rejection_threshold=kwargs.get("rejection_threshold", 0.3),
725 positive_threshold=kwargs.get("positive_threshold", 0.2),
726 critical_constraint_rejection=kwargs.get(
727 "critical_constraint_rejection", 0.2
728 ),
729 settings_snapshot=settings_snapshot,
730 )
732 # Modular strategy
733 elif strategy_name_lower in ["modular", "modular-strategy"]:
734 from .advanced_search_system.strategies.modular_strategy import (
735 ModularStrategy,
736 )
738 return ModularStrategy(
739 model=model,
740 search=search,
741 all_links_of_system=all_links_of_system,
742 constraint_checker_type=kwargs.get(
743 "constraint_checker_type", "dual_confidence"
744 ),
745 exploration_strategy=kwargs.get("exploration_strategy", "adaptive"),
746 early_rejection=kwargs.get("early_rejection", True),
747 early_stopping=kwargs.get("early_stopping", True),
748 llm_constraint_processing=kwargs.get(
749 "llm_constraint_processing", True
750 ),
751 immediate_evaluation=kwargs.get("immediate_evaluation", True),
752 settings_snapshot=settings_snapshot,
753 )
755 # Modular parallel strategy
756 elif strategy_name_lower in ["modular-parallel", "modular_parallel"]:
757 from .advanced_search_system.strategies.modular_strategy import (
758 ModularStrategy,
759 )
761 return ModularStrategy(
762 model=model,
763 search=search,
764 all_links_of_system=all_links_of_system,
765 constraint_checker_type="dual_confidence",
766 exploration_strategy="parallel",
767 settings_snapshot=settings_snapshot,
768 )
770 # BrowseComp entity strategy
771 elif strategy_name_lower in ["browsecomp-entity", "browsecomp_entity"]:
772 from .advanced_search_system.strategies.browsecomp_entity_strategy import (
773 BrowseCompEntityStrategy,
774 )
776 return BrowseCompEntityStrategy(
777 model=model,
778 search=search,
779 all_links_of_system=all_links_of_system,
780 )
782 # Topic organization strategy
783 elif strategy_name_lower in [
784 "topic-organization",
785 "topic_organization",
786 "topic",
787 ]:
788 from .advanced_search_system.strategies.topic_organization_strategy import (
789 TopicOrganizationStrategy,
790 )
792 return TopicOrganizationStrategy(
793 model=model,
794 search=search,
795 all_links_of_system=all_links_of_system,
796 settings_snapshot=settings_snapshot,
797 min_sources_per_topic=1, # Allow single-source topics
798 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),
799 filter_reorder=kwargs.get("filter_reorder", True),
800 filter_reindex=kwargs.get("filter_reindex", True),
801 cross_engine_max_results=kwargs.get(
802 "cross_engine_max_results", None
803 ),
804 search_original_query=kwargs.get("search_original_query", True),
805 max_topics=kwargs.get("max_topics", 5),
806 similarity_threshold=kwargs.get("similarity_threshold", 0.3),
807 use_focused_iteration=True, # HARDCODED TO TRUE for testing - original: kwargs.get("use_focused_iteration", False)
808 enable_refinement=kwargs.get(
809 "enable_refinement", False
810 ), # Disable refinement iterations for now
811 max_refinement_iterations=kwargs.get(
812 "max_refinement_iterations",
813 1, # Set to 1 iteration for faster results
814 ),
815 generate_text=kwargs.get("generate_text", True),
816 )
818 # Iterative refinement strategy
819 elif strategy_name_lower in [
820 "iterative-refinement",
821 "iterative_refinement",
822 ]:
823 from .advanced_search_system.strategies.iterative_refinement_strategy import (
824 IterativeRefinementStrategy,
825 )
827 # Get the initial strategy to use (default to source-based)
828 initial_strategy_name = kwargs.get("initial_strategy", "source-based")
830 # Create the initial strategy
831 initial_strategy = create_strategy(
832 strategy_name=initial_strategy_name,
833 model=model,
834 search=search,
835 all_links_of_system=[], # Fresh list for initial strategy
836 settings_snapshot=settings_snapshot,
837 search_original_query=kwargs.get("search_original_query", True),
838 )
840 return IterativeRefinementStrategy(
841 model=model,
842 search=search,
843 initial_strategy=initial_strategy,
844 all_links_of_system=all_links_of_system,
845 settings_snapshot=settings_snapshot,
846 evaluation_frequency=kwargs.get("evaluation_frequency", 1),
847 max_refinements=kwargs.get("max_refinements", 3),
848 confidence_threshold=kwargs.get(
849 "confidence_threshold", 0.95
850 ), # Increased from 0.8
851 )
853 # Standard strategy
854 elif strategy_name_lower == "standard":
855 from .advanced_search_system.strategies.standard_strategy import (
856 StandardSearchStrategy,
857 )
859 return StandardSearchStrategy(
860 model=model,
861 search=search,
862 all_links_of_system=all_links_of_system,
863 settings_snapshot=settings_snapshot,
864 )
866 else:
867 # Default to source-based if unknown
868 logger.warning(
869 f"Unknown strategy: {strategy_name}, defaulting to source-based"
870 )
871 from .advanced_search_system.strategies.source_based_strategy import (
872 SourceBasedSearchStrategy,
873 )
875 return SourceBasedSearchStrategy(
876 model=model,
877 search=search,
878 include_text_content=True,
879 use_cross_engine_filter=True,
880 all_links_of_system=all_links_of_system,
881 use_atomic_facts=False,
882 settings_snapshot=settings_snapshot,
883 search_original_query=kwargs.get("search_original_query", True),
884 )