Coverage for src / local_deep_research / search_system_factory.py: 13%
144 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Factory for creating search strategies.
3This module provides a centralized way to create search strategies
4to avoid code duplication.
5"""
7from loguru import logger
8from typing import Optional, Dict, Any, List
9from langchain_core.language_models import BaseChatModel
12def _get_setting(
13 settings_snapshot: Optional[Dict], key: str, default: Any
14) -> Any:
15 """Get a setting value from the snapshot, handling nested dict structure."""
16 if not settings_snapshot or key not in settings_snapshot:
17 return default
18 value = settings_snapshot[key]
19 # Extract value from dict structure if needed
20 if isinstance(value, dict) and "value" in value:
21 return value["value"]
22 return value
25def create_strategy(
26 strategy_name: str,
27 model: BaseChatModel,
28 search: Any,
29 all_links_of_system: Optional[List[Dict]] = None,
30 settings_snapshot: Optional[Dict] = None,
31 research_context: Optional[Dict] = None,
32 **kwargs,
33):
34 """
35 Create a search strategy by name.
37 Args:
38 strategy_name: Name of the strategy to create
39 model: Language model to use
40 search: Search engine instance
41 all_links_of_system: List of existing links
42 settings_snapshot: Settings snapshot
43 research_context: Research context for special strategies
44 **kwargs: Additional strategy-specific parameters
46 Returns:
47 Strategy instance
48 """
49 if all_links_of_system is None: 49 ↛ 50line 49 didn't jump to line 50 because the condition on line 49 was never true
50 all_links_of_system = []
52 strategy_name_lower = strategy_name.lower()
54 # Source-based strategy
55 if strategy_name_lower in [
56 "source-based",
57 "source_based",
58 "source_based_search",
59 ]:
60 from .advanced_search_system.strategies.source_based_strategy import (
61 SourceBasedSearchStrategy,
62 )
64 return SourceBasedSearchStrategy(
65 model=model,
66 search=search,
67 include_text_content=kwargs.get("include_text_content", True),
68 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),
69 all_links_of_system=all_links_of_system,
70 use_atomic_facts=kwargs.get("use_atomic_facts", False),
71 settings_snapshot=settings_snapshot,
72 search_original_query=kwargs.get("search_original_query", True),
73 )
75 # Focused iteration strategy
76 elif strategy_name_lower in ["focused-iteration", "focused_iteration"]: 76 ↛ 77line 76 didn't jump to line 77 because the condition on line 76 was never true
77 from .advanced_search_system.strategies.focused_iteration_strategy import (
78 FocusedIterationStrategy,
79 )
81 # Read focused_iteration settings with kwargs override
82 # adaptive_questions is stored as 0/1 integer, convert to bool
83 enable_adaptive = bool(
84 kwargs.get(
85 "enable_adaptive_questions",
86 _get_setting(
87 settings_snapshot, "focused_iteration.adaptive_questions", 0
88 ),
89 )
90 )
91 knowledge_limit = kwargs.get(
92 "knowledge_summary_limit",
93 _get_setting(
94 settings_snapshot,
95 "focused_iteration.knowledge_summary_limit",
96 10,
97 ),
98 )
99 snippet_truncate = kwargs.get(
100 "knowledge_snippet_truncate",
101 _get_setting(
102 settings_snapshot, "focused_iteration.snippet_truncate", 200
103 ),
104 )
105 question_gen_type = kwargs.get(
106 "question_generator",
107 _get_setting(
108 settings_snapshot,
109 "focused_iteration.question_generator",
110 "browsecomp",
111 ),
112 )
113 prompt_knowledge_truncate = kwargs.get(
114 "prompt_knowledge_truncate",
115 _get_setting(
116 settings_snapshot,
117 "focused_iteration.prompt_knowledge_truncate",
118 1500,
119 ),
120 )
121 previous_searches_limit = kwargs.get(
122 "previous_searches_limit",
123 _get_setting(
124 settings_snapshot,
125 "focused_iteration.previous_searches_limit",
126 10,
127 ),
128 )
129 # Convert 0 to None for "unlimited"
130 if knowledge_limit == 0:
131 knowledge_limit = None
132 if snippet_truncate == 0:
133 snippet_truncate = None
134 if prompt_knowledge_truncate == 0:
135 prompt_knowledge_truncate = None
136 if previous_searches_limit == 0:
137 previous_searches_limit = None
139 strategy = FocusedIterationStrategy(
140 model=model,
141 search=search,
142 all_links_of_system=all_links_of_system,
143 max_iterations=kwargs.get("max_iterations", 8),
144 questions_per_iteration=kwargs.get("questions_per_iteration", 5),
145 settings_snapshot=settings_snapshot,
146 # Options read from settings (with kwargs override)
147 enable_adaptive_questions=enable_adaptive,
148 enable_early_termination=kwargs.get(
149 "enable_early_termination", False
150 ),
151 knowledge_summary_limit=knowledge_limit,
152 knowledge_snippet_truncate=snippet_truncate,
153 prompt_knowledge_truncate=prompt_knowledge_truncate,
154 previous_searches_limit=previous_searches_limit,
155 )
157 # Override question generator if flexible is selected
158 if question_gen_type == "flexible":
159 from .advanced_search_system.questions.flexible_browsecomp_question import (
160 FlexibleBrowseCompQuestionGenerator,
161 )
163 # Pass truncation settings to flexible generator
164 strategy.question_generator = FlexibleBrowseCompQuestionGenerator(
165 model,
166 knowledge_truncate_length=prompt_knowledge_truncate,
167 previous_searches_limit=previous_searches_limit,
168 )
170 return strategy
172 # Focused iteration strategy with standard citation handler
173 elif strategy_name_lower in [ 173 ↛ 177line 173 didn't jump to line 177 because the condition on line 173 was never true
174 "focused-iteration-standard",
175 "focused_iteration_standard",
176 ]:
177 from .advanced_search_system.strategies.focused_iteration_strategy import (
178 FocusedIterationStrategy,
179 )
180 from .citation_handler import CitationHandler
182 # Use standard citation handler (same question generator as regular focused-iteration)
183 standard_citation_handler = CitationHandler(
184 model, handler_type="standard", settings_snapshot=settings_snapshot
185 )
187 # Read focused_iteration settings with kwargs override
188 # adaptive_questions is stored as 0/1 integer, convert to bool
189 enable_adaptive = bool(
190 kwargs.get(
191 "enable_adaptive_questions",
192 _get_setting(
193 settings_snapshot, "focused_iteration.adaptive_questions", 0
194 ),
195 )
196 )
197 knowledge_limit = kwargs.get(
198 "knowledge_summary_limit",
199 _get_setting(
200 settings_snapshot,
201 "focused_iteration.knowledge_summary_limit",
202 10,
203 ),
204 )
205 snippet_truncate = kwargs.get(
206 "knowledge_snippet_truncate",
207 _get_setting(
208 settings_snapshot, "focused_iteration.snippet_truncate", 200
209 ),
210 )
211 question_gen_type = kwargs.get(
212 "question_generator",
213 _get_setting(
214 settings_snapshot,
215 "focused_iteration.question_generator",
216 "browsecomp",
217 ),
218 )
219 prompt_knowledge_truncate = kwargs.get(
220 "prompt_knowledge_truncate",
221 _get_setting(
222 settings_snapshot,
223 "focused_iteration.prompt_knowledge_truncate",
224 1500,
225 ),
226 )
227 previous_searches_limit = kwargs.get(
228 "previous_searches_limit",
229 _get_setting(
230 settings_snapshot,
231 "focused_iteration.previous_searches_limit",
232 10,
233 ),
234 )
235 # Convert 0 to None for "unlimited"
236 if knowledge_limit == 0:
237 knowledge_limit = None
238 if snippet_truncate == 0:
239 snippet_truncate = None
240 if prompt_knowledge_truncate == 0:
241 prompt_knowledge_truncate = None
242 if previous_searches_limit == 0:
243 previous_searches_limit = None
245 strategy = FocusedIterationStrategy(
246 model=model,
247 search=search,
248 citation_handler=standard_citation_handler,
249 all_links_of_system=all_links_of_system,
250 max_iterations=kwargs.get("max_iterations", 8),
251 questions_per_iteration=kwargs.get("questions_per_iteration", 5),
252 use_browsecomp_optimization=True, # Keep BrowseComp features
253 settings_snapshot=settings_snapshot,
254 # Options read from settings (with kwargs override)
255 enable_adaptive_questions=enable_adaptive,
256 enable_early_termination=kwargs.get(
257 "enable_early_termination", False
258 ),
259 knowledge_summary_limit=knowledge_limit,
260 knowledge_snippet_truncate=snippet_truncate,
261 prompt_knowledge_truncate=prompt_knowledge_truncate,
262 previous_searches_limit=previous_searches_limit,
263 )
265 # Override question generator if flexible is selected
266 if question_gen_type == "flexible":
267 from .advanced_search_system.questions.flexible_browsecomp_question import (
268 FlexibleBrowseCompQuestionGenerator,
269 )
271 # Pass truncation settings to flexible generator
272 strategy.question_generator = FlexibleBrowseCompQuestionGenerator(
273 model,
274 knowledge_truncate_length=prompt_knowledge_truncate,
275 previous_searches_limit=previous_searches_limit,
276 )
278 return strategy
280 # Iterative reasoning strategy
281 elif strategy_name_lower in [ 281 ↛ 286line 281 didn't jump to line 286 because the condition on line 281 was never true
282 "iterative-reasoning",
283 "iterative_reasoning",
284 "iterative_reasoning_depth",
285 ]:
286 from .advanced_search_system.strategies.iterative_reasoning_strategy import (
287 IterativeReasoningDepthStrategy,
288 )
290 return IterativeReasoningDepthStrategy(
291 model=model,
292 search=search,
293 all_links_of_system=all_links_of_system,
294 use_atomic_facts=kwargs.get("use_atomic_facts", True),
295 settings_snapshot=settings_snapshot,
296 )
298 # News aggregation strategy
299 elif strategy_name_lower in [ 299 ↛ 304line 299 didn't jump to line 304 because the condition on line 299 was never true
300 "news",
301 "news_aggregation",
302 "news-aggregation",
303 ]:
304 from .advanced_search_system.strategies.news_strategy import (
305 NewsAggregationStrategy,
306 )
308 return NewsAggregationStrategy(
309 model=model,
310 search=search,
311 all_links_of_system=all_links_of_system,
312 )
314 # IterDRAG strategy
315 elif strategy_name_lower == "iterdrag": 315 ↛ 316line 315 didn't jump to line 316 because the condition on line 315 was never true
316 from .advanced_search_system.strategies.iterdrag_strategy import (
317 IterDRAGStrategy,
318 )
320 return IterDRAGStrategy(
321 model=model,
322 search=search,
323 all_links_of_system=all_links_of_system,
324 settings_snapshot=settings_snapshot,
325 )
327 # Parallel strategy
328 elif strategy_name_lower == "parallel": 328 ↛ 329line 328 didn't jump to line 329 because the condition on line 328 was never true
329 from .advanced_search_system.strategies.parallel_search_strategy import (
330 ParallelSearchStrategy,
331 )
333 return ParallelSearchStrategy(
334 model=model,
335 search=search,
336 include_text_content=kwargs.get("include_text_content", True),
337 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),
338 all_links_of_system=all_links_of_system,
339 settings_snapshot=settings_snapshot,
340 )
342 # Rapid strategy
343 elif strategy_name_lower == "rapid": 343 ↛ 356line 343 didn't jump to line 356 because the condition on line 343 was always true
344 from .advanced_search_system.strategies.rapid_search_strategy import (
345 RapidSearchStrategy,
346 )
348 return RapidSearchStrategy(
349 model=model,
350 search=search,
351 all_links_of_system=all_links_of_system,
352 settings_snapshot=settings_snapshot,
353 )
355 # Recursive decomposition strategy
356 elif strategy_name_lower in ["recursive", "recursive-decomposition"]:
357 from .advanced_search_system.strategies.recursive_decomposition_strategy import (
358 RecursiveDecompositionStrategy,
359 )
361 return RecursiveDecompositionStrategy(
362 model=model,
363 search=search,
364 all_links_of_system=all_links_of_system,
365 settings_snapshot=settings_snapshot,
366 )
368 # Iterative reasoning strategy (different from iterative_reasoning_depth)
369 elif strategy_name_lower == "iterative":
370 from .advanced_search_system.strategies.iterative_reasoning_strategy import (
371 IterativeReasoningStrategy,
372 )
374 # Get iteration settings from kwargs or use defaults
375 max_iterations = kwargs.get("max_iterations", 20)
376 questions_per_iteration = kwargs.get("questions_per_iteration", 3)
377 search_iterations_per_round = kwargs.get(
378 "search_iterations_per_round", 1
379 )
381 return IterativeReasoningStrategy(
382 model=model,
383 search=search,
384 all_links_of_system=all_links_of_system,
385 max_iterations=max_iterations,
386 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
387 search_iterations_per_round=search_iterations_per_round,
388 questions_per_search=questions_per_iteration,
389 settings_snapshot=settings_snapshot,
390 )
392 # Adaptive decomposition strategy
393 elif strategy_name_lower == "adaptive":
394 from .advanced_search_system.strategies.adaptive_decomposition_strategy import (
395 AdaptiveDecompositionStrategy,
396 )
398 return AdaptiveDecompositionStrategy(
399 model=model,
400 search=search,
401 all_links_of_system=all_links_of_system,
402 max_steps=kwargs.get("max_steps", kwargs.get("max_iterations", 5)),
403 min_confidence=kwargs.get("min_confidence", 0.8),
404 source_search_iterations=kwargs.get("source_search_iterations", 2),
405 source_questions_per_iteration=kwargs.get(
406 "source_questions_per_iteration",
407 kwargs.get("questions_per_iteration", 3),
408 ),
409 settings_snapshot=settings_snapshot,
410 )
412 # Smart decomposition strategy
413 elif strategy_name_lower == "smart":
414 from .advanced_search_system.strategies.smart_decomposition_strategy import (
415 SmartDecompositionStrategy,
416 )
418 return SmartDecompositionStrategy(
419 model=model,
420 search=search,
421 all_links_of_system=all_links_of_system,
422 max_iterations=kwargs.get("max_iterations", 5),
423 source_search_iterations=kwargs.get("source_search_iterations", 2),
424 source_questions_per_iteration=kwargs.get(
425 "source_questions_per_iteration",
426 kwargs.get("questions_per_iteration", 3),
427 ),
428 settings_snapshot=settings_snapshot,
429 )
431 # BrowseComp optimized strategy
432 elif strategy_name_lower == "browsecomp":
433 from .advanced_search_system.strategies.browsecomp_optimized_strategy import (
434 BrowseCompOptimizedStrategy,
435 )
437 return BrowseCompOptimizedStrategy(
438 model=model,
439 search=search,
440 all_links_of_system=all_links_of_system,
441 max_browsecomp_iterations=kwargs.get(
442 "max_browsecomp_iterations", 15
443 ),
444 confidence_threshold=kwargs.get("confidence_threshold", 0.9),
445 max_iterations=kwargs.get("max_iterations", 5),
446 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
447 settings_snapshot=settings_snapshot,
448 )
450 # Enhanced evidence-based strategy
451 elif strategy_name_lower == "evidence":
452 from .advanced_search_system.strategies.evidence_based_strategy_v2 import (
453 EnhancedEvidenceBasedStrategy,
454 )
456 return EnhancedEvidenceBasedStrategy(
457 model=model,
458 search=search,
459 all_links_of_system=all_links_of_system,
460 max_iterations=kwargs.get("max_iterations", 20),
461 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
462 candidate_limit=kwargs.get("candidate_limit", 20),
463 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
464 max_search_iterations=kwargs.get("max_search_iterations", 5),
465 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
466 min_candidates_threshold=kwargs.get("min_candidates_threshold", 10),
467 enable_pattern_learning=kwargs.get("enable_pattern_learning", True),
468 settings_snapshot=settings_snapshot,
469 )
471 # Constrained search strategy
472 elif strategy_name_lower == "constrained":
473 from .advanced_search_system.strategies.constrained_search_strategy import (
474 ConstrainedSearchStrategy,
475 )
477 return ConstrainedSearchStrategy(
478 model=model,
479 search=search,
480 all_links_of_system=all_links_of_system,
481 max_iterations=kwargs.get("max_iterations", 20),
482 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
483 candidate_limit=kwargs.get("candidate_limit", 100),
484 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
485 max_search_iterations=kwargs.get("max_search_iterations", 5),
486 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
487 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
488 settings_snapshot=settings_snapshot,
489 )
491 # Parallel constrained strategy
492 elif strategy_name_lower in [
493 "parallel-constrained",
494 "parallel_constrained",
495 ]:
496 from .advanced_search_system.strategies.parallel_constrained_strategy import (
497 ParallelConstrainedStrategy,
498 )
500 return ParallelConstrainedStrategy(
501 model=model,
502 search=search,
503 all_links_of_system=all_links_of_system,
504 max_iterations=kwargs.get("max_iterations", 20),
505 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
506 candidate_limit=kwargs.get("candidate_limit", 100),
507 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
508 max_search_iterations=kwargs.get("max_search_iterations", 5),
509 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
510 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
511 parallel_workers=kwargs.get("parallel_workers", 100),
512 settings_snapshot=settings_snapshot,
513 )
515 # Early stop constrained strategy
516 elif strategy_name_lower in [
517 "early-stop-constrained",
518 "early_stop_constrained",
519 ]:
520 from .advanced_search_system.strategies.early_stop_constrained_strategy import (
521 EarlyStopConstrainedStrategy,
522 )
524 return EarlyStopConstrainedStrategy(
525 model=model,
526 search=search,
527 all_links_of_system=all_links_of_system,
528 max_iterations=kwargs.get("max_iterations", 20),
529 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
530 candidate_limit=kwargs.get("candidate_limit", 100),
531 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
532 max_search_iterations=kwargs.get("max_search_iterations", 5),
533 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
534 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
535 parallel_workers=kwargs.get("parallel_workers", 100),
536 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99),
537 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
538 settings_snapshot=settings_snapshot,
539 )
541 # Smart query strategy
542 elif strategy_name_lower in ["smart-query", "smart_query"]:
543 from .advanced_search_system.strategies.smart_query_strategy import (
544 SmartQueryStrategy,
545 )
547 return SmartQueryStrategy(
548 model=model,
549 search=search,
550 all_links_of_system=all_links_of_system,
551 max_iterations=kwargs.get("max_iterations", 20),
552 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
553 candidate_limit=kwargs.get("candidate_limit", 100),
554 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
555 max_search_iterations=kwargs.get("max_search_iterations", 5),
556 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
557 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
558 parallel_workers=kwargs.get("parallel_workers", 100),
559 early_stop_threshold=kwargs.get("early_stop_threshold", 0.99),
560 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
561 use_llm_query_generation=kwargs.get(
562 "use_llm_query_generation", True
563 ),
564 queries_per_combination=kwargs.get("queries_per_combination", 3),
565 settings_snapshot=settings_snapshot,
566 )
568 # Dual confidence strategy
569 elif strategy_name_lower in ["dual-confidence", "dual_confidence"]:
570 from .advanced_search_system.strategies.dual_confidence_strategy import (
571 DualConfidenceStrategy,
572 )
574 return DualConfidenceStrategy(
575 model=model,
576 search=search,
577 all_links_of_system=all_links_of_system,
578 max_iterations=kwargs.get("max_iterations", 20),
579 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
580 candidate_limit=kwargs.get("candidate_limit", 100),
581 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
582 max_search_iterations=kwargs.get("max_search_iterations", 5),
583 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
584 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
585 parallel_workers=kwargs.get("parallel_workers", 100),
586 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
587 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
588 use_llm_query_generation=kwargs.get(
589 "use_llm_query_generation", True
590 ),
591 queries_per_combination=kwargs.get("queries_per_combination", 3),
592 use_entity_seeding=kwargs.get("use_entity_seeding", True),
593 use_direct_property_search=kwargs.get(
594 "use_direct_property_search", True
595 ),
596 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
597 negative_weight=kwargs.get("negative_weight", 0.5),
598 settings_snapshot=settings_snapshot,
599 )
601 # Dual confidence with rejection strategy
602 elif strategy_name_lower in [
603 "dual-confidence-with-rejection",
604 "dual_confidence_with_rejection",
605 ]:
606 from .advanced_search_system.strategies.dual_confidence_with_rejection import (
607 DualConfidenceWithRejectionStrategy,
608 )
610 return DualConfidenceWithRejectionStrategy(
611 model=model,
612 search=search,
613 all_links_of_system=all_links_of_system,
614 max_iterations=kwargs.get("max_iterations", 20),
615 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
616 candidate_limit=kwargs.get("candidate_limit", 100),
617 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
618 max_search_iterations=kwargs.get("max_search_iterations", 5),
619 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
620 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
621 parallel_workers=kwargs.get("parallel_workers", 100),
622 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
623 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
624 use_llm_query_generation=kwargs.get(
625 "use_llm_query_generation", True
626 ),
627 queries_per_combination=kwargs.get("queries_per_combination", 3),
628 use_entity_seeding=kwargs.get("use_entity_seeding", True),
629 use_direct_property_search=kwargs.get(
630 "use_direct_property_search", True
631 ),
632 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
633 negative_weight=kwargs.get("negative_weight", 0.5),
634 rejection_threshold=kwargs.get("rejection_threshold", 0.3),
635 positive_threshold=kwargs.get("positive_threshold", 0.2),
636 critical_constraint_rejection=kwargs.get(
637 "critical_constraint_rejection", 0.2
638 ),
639 settings_snapshot=settings_snapshot,
640 )
642 # Concurrent dual confidence strategy
643 elif strategy_name_lower in [
644 "concurrent-dual-confidence",
645 "concurrent_dual_confidence",
646 ]:
647 from .advanced_search_system.strategies.concurrent_dual_confidence_strategy import (
648 ConcurrentDualConfidenceStrategy,
649 )
651 return ConcurrentDualConfidenceStrategy(
652 model=model,
653 search=search,
654 all_links_of_system=all_links_of_system,
655 max_iterations=kwargs.get("max_iterations", 20),
656 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
657 candidate_limit=kwargs.get("candidate_limit", 100),
658 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
659 max_search_iterations=kwargs.get("max_search_iterations", 5),
660 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
661 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
662 parallel_workers=kwargs.get("parallel_workers", 10),
663 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
664 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
665 use_llm_query_generation=kwargs.get(
666 "use_llm_query_generation", True
667 ),
668 queries_per_combination=kwargs.get("queries_per_combination", 3),
669 use_entity_seeding=kwargs.get("use_entity_seeding", True),
670 use_direct_property_search=kwargs.get(
671 "use_direct_property_search", True
672 ),
673 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
674 negative_weight=kwargs.get("negative_weight", 0.5),
675 rejection_threshold=kwargs.get("rejection_threshold", 0.3),
676 positive_threshold=kwargs.get("positive_threshold", 0.2),
677 critical_constraint_rejection=kwargs.get(
678 "critical_constraint_rejection", 0.2
679 ),
680 min_good_candidates=kwargs.get("min_good_candidates", 3),
681 target_candidates=kwargs.get("target_candidates", 5),
682 max_candidates=kwargs.get("max_candidates", 10),
683 min_score_threshold=kwargs.get("min_score_threshold", 0.65),
684 exceptional_score=kwargs.get("exceptional_score", 0.95),
685 quality_plateau_threshold=kwargs.get(
686 "quality_plateau_threshold", 0.1
687 ),
688 max_search_time=kwargs.get("max_search_time", 30.0),
689 max_evaluations=kwargs.get("max_evaluations", 30),
690 settings_snapshot=settings_snapshot,
691 )
693 # Constraint parallel strategy
694 elif strategy_name_lower in [
695 "constraint-parallel",
696 "constraint_parallel",
697 ]:
698 from .advanced_search_system.strategies.constraint_parallel_strategy import (
699 ConstraintParallelStrategy,
700 )
702 return ConstraintParallelStrategy(
703 model=model,
704 search=search,
705 all_links_of_system=all_links_of_system,
706 max_iterations=kwargs.get("max_iterations", 20),
707 confidence_threshold=kwargs.get("confidence_threshold", 0.95),
708 candidate_limit=kwargs.get("candidate_limit", 100),
709 evidence_threshold=kwargs.get("evidence_threshold", 0.9),
710 max_search_iterations=kwargs.get("max_search_iterations", 5),
711 questions_per_iteration=kwargs.get("questions_per_iteration", 3),
712 min_candidates_per_stage=kwargs.get("min_candidates_per_stage", 20),
713 parallel_workers=kwargs.get("parallel_workers", 100),
714 early_stop_threshold=kwargs.get("early_stop_threshold", 0.95),
715 concurrent_evaluation=kwargs.get("concurrent_evaluation", True),
716 use_llm_query_generation=kwargs.get(
717 "use_llm_query_generation", True
718 ),
719 queries_per_combination=kwargs.get("queries_per_combination", 3),
720 use_entity_seeding=kwargs.get("use_entity_seeding", True),
721 use_direct_property_search=kwargs.get(
722 "use_direct_property_search", True
723 ),
724 uncertainty_penalty=kwargs.get("uncertainty_penalty", 0.2),
725 negative_weight=kwargs.get("negative_weight", 0.5),
726 rejection_threshold=kwargs.get("rejection_threshold", 0.3),
727 positive_threshold=kwargs.get("positive_threshold", 0.2),
728 critical_constraint_rejection=kwargs.get(
729 "critical_constraint_rejection", 0.2
730 ),
731 settings_snapshot=settings_snapshot,
732 )
734 # Modular strategy
735 elif strategy_name_lower in ["modular", "modular-strategy"]:
736 from .advanced_search_system.strategies.modular_strategy import (
737 ModularStrategy,
738 )
740 return ModularStrategy(
741 model=model,
742 search=search,
743 all_links_of_system=all_links_of_system,
744 constraint_checker_type=kwargs.get(
745 "constraint_checker_type", "dual_confidence"
746 ),
747 exploration_strategy=kwargs.get("exploration_strategy", "adaptive"),
748 early_rejection=kwargs.get("early_rejection", True),
749 early_stopping=kwargs.get("early_stopping", True),
750 llm_constraint_processing=kwargs.get(
751 "llm_constraint_processing", True
752 ),
753 immediate_evaluation=kwargs.get("immediate_evaluation", True),
754 settings_snapshot=settings_snapshot,
755 )
757 # Modular parallel strategy
758 elif strategy_name_lower in ["modular-parallel", "modular_parallel"]:
759 from .advanced_search_system.strategies.modular_strategy import (
760 ModularStrategy,
761 )
763 return ModularStrategy(
764 model=model,
765 search=search,
766 all_links_of_system=all_links_of_system,
767 constraint_checker_type="dual_confidence",
768 exploration_strategy="parallel",
769 settings_snapshot=settings_snapshot,
770 )
772 # BrowseComp entity strategy
773 elif strategy_name_lower in ["browsecomp-entity", "browsecomp_entity"]:
774 from .advanced_search_system.strategies.browsecomp_entity_strategy import (
775 BrowseCompEntityStrategy,
776 )
778 return BrowseCompEntityStrategy(
779 model=model,
780 search=search,
781 all_links_of_system=all_links_of_system,
782 )
784 # Topic organization strategy
785 elif strategy_name_lower in [
786 "topic-organization",
787 "topic_organization",
788 "topic",
789 ]:
790 from .advanced_search_system.strategies.topic_organization_strategy import (
791 TopicOrganizationStrategy,
792 )
794 return TopicOrganizationStrategy(
795 model=model,
796 search=search,
797 all_links_of_system=all_links_of_system,
798 settings_snapshot=settings_snapshot,
799 min_sources_per_topic=1, # Allow single-source topics
800 use_cross_engine_filter=kwargs.get("use_cross_engine_filter", True),
801 filter_reorder=kwargs.get("filter_reorder", True),
802 filter_reindex=kwargs.get("filter_reindex", True),
803 cross_engine_max_results=kwargs.get(
804 "cross_engine_max_results", None
805 ),
806 search_original_query=kwargs.get("search_original_query", True),
807 max_topics=kwargs.get("max_topics", 5),
808 similarity_threshold=kwargs.get("similarity_threshold", 0.3),
809 use_focused_iteration=True, # HARDCODED TO TRUE for testing - original: kwargs.get("use_focused_iteration", False)
810 enable_refinement=kwargs.get(
811 "enable_refinement", False
812 ), # Disable refinement iterations for now
813 max_refinement_iterations=kwargs.get(
814 "max_refinement_iterations",
815 1, # Set to 1 iteration for faster results
816 ),
817 generate_text=kwargs.get("generate_text", True),
818 )
820 # Iterative refinement strategy
821 elif strategy_name_lower in [
822 "iterative-refinement",
823 "iterative_refinement",
824 ]:
825 from .advanced_search_system.strategies.iterative_refinement_strategy import (
826 IterativeRefinementStrategy,
827 )
829 # Get the initial strategy to use (default to source-based)
830 initial_strategy_name = kwargs.get("initial_strategy", "source-based")
832 # Create the initial strategy
833 initial_strategy = create_strategy(
834 strategy_name=initial_strategy_name,
835 model=model,
836 search=search,
837 all_links_of_system=[], # Fresh list for initial strategy
838 settings_snapshot=settings_snapshot,
839 search_original_query=kwargs.get("search_original_query", True),
840 )
842 return IterativeRefinementStrategy(
843 model=model,
844 search=search,
845 initial_strategy=initial_strategy,
846 all_links_of_system=all_links_of_system,
847 settings_snapshot=settings_snapshot,
848 evaluation_frequency=kwargs.get("evaluation_frequency", 1),
849 max_refinements=kwargs.get("max_refinements", 3),
850 confidence_threshold=kwargs.get(
851 "confidence_threshold", 0.95
852 ), # Increased from 0.8
853 )
855 # Standard strategy
856 elif strategy_name_lower == "standard":
857 from .advanced_search_system.strategies.standard_strategy import (
858 StandardSearchStrategy,
859 )
861 return StandardSearchStrategy(
862 model=model,
863 search=search,
864 all_links_of_system=all_links_of_system,
865 settings_snapshot=settings_snapshot,
866 )
868 else:
869 # Default to source-based if unknown
870 logger.warning(
871 f"Unknown strategy: {strategy_name}, defaulting to source-based"
872 )
873 from .advanced_search_system.strategies.source_based_strategy import (
874 SourceBasedSearchStrategy,
875 )
877 return SourceBasedSearchStrategy(
878 model=model,
879 search=search,
880 include_text_content=True,
881 use_cross_engine_filter=True,
882 all_links_of_system=all_links_of_system,
883 use_atomic_facts=False,
884 settings_snapshot=settings_snapshot,
885 search_original_query=kwargs.get("search_original_query", True),
886 )