Coverage for src / local_deep_research / advanced_search_system / candidate_exploration / constraint_guided_explorer.py: 11%
140 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Constraint-guided candidate explorer implementation.
4This explorer uses constraints to guide the search process, prioritizing
5searches that are likely to find candidates satisfying the constraints.
6"""
8import time
9from typing import List, Optional
11from loguru import logger
13from ..candidates.base_candidate import Candidate
14from ..constraints.base_constraint import Constraint, ConstraintType
15from .base_explorer import (
16 BaseCandidateExplorer,
17 ExplorationResult,
18 ExplorationStrategy,
19)
22class ConstraintGuidedExplorer(BaseCandidateExplorer):
23 """
24 Constraint-guided candidate explorer.
26 This explorer:
27 1. Prioritizes searches based on constraint importance
28 2. Uses constraint-specific search strategies
29 3. Validates candidates against constraints early
30 4. Focuses on constraint satisfaction over quantity
31 """
33 def __init__(
34 self,
35 *args,
36 constraint_weight_threshold: float = 0.7, # Focus on constraints above this weight
37 early_validation: bool = True, # Validate candidates during search
38 **kwargs,
39 ):
40 """
41 Initialize constraint-guided explorer.
43 Args:
44 constraint_weight_threshold: Focus on constraints above this weight
45 early_validation: Whether to validate candidates during exploration
46 """
47 super().__init__(*args, **kwargs)
48 self.constraint_weight_threshold = constraint_weight_threshold
49 self.early_validation = early_validation
51 def explore(
52 self,
53 initial_query: str,
54 constraints: Optional[List[Constraint]] = None,
55 entity_type: Optional[str] = None,
56 ) -> ExplorationResult:
57 """Explore candidates using constraint-guided strategy."""
58 start_time = time.time()
59 logger.info(
60 f"Starting constraint-guided exploration for: {initial_query}"
61 )
63 if not constraints:
64 logger.warning(
65 "No constraints provided - falling back to basic search"
66 )
67 return self._basic_exploration(
68 initial_query, entity_type, start_time
69 )
71 all_candidates = []
72 exploration_paths = []
73 total_searched = 0
75 # Prioritize constraints by weight and type
76 prioritized_constraints = self._prioritize_constraints(constraints)
78 # Search for each constraint
79 for i, constraint in enumerate(prioritized_constraints):
80 if not self._should_continue_exploration(
81 start_time, len(all_candidates)
82 ):
83 break
85 logger.info(
86 f"Exploring constraint {i + 1}/{len(prioritized_constraints)}: {constraint.value}"
87 )
89 # Generate constraint-specific queries
90 constraint_queries = self._generate_constraint_queries(
91 constraint, initial_query, entity_type
92 )
94 constraint_candidates = []
96 for query in constraint_queries[:3]: # Limit queries per constraint
97 if query.lower() in self.explored_queries:
98 continue
100 results = self._execute_search(query)
101 candidates = self._extract_candidates_from_results(
102 results, entity_type
103 )
105 # Early validation if enabled
106 if self.early_validation:
107 validated_candidates = self._early_validate_candidates(
108 candidates, constraint
109 )
110 constraint_candidates.extend(validated_candidates)
111 else:
112 constraint_candidates.extend(candidates)
114 total_searched += 1
115 exploration_paths.append(
116 f"Constraint '{constraint.value}': {query} -> {len(candidates)} candidates"
117 )
119 all_candidates.extend(constraint_candidates)
120 logger.info(
121 f"Found {len(constraint_candidates)} candidates for constraint: {constraint.value}"
122 )
124 # Cross-constraint exploration
125 if len(prioritized_constraints) > 1:
126 cross_candidates = self._cross_constraint_exploration(
127 prioritized_constraints[:2], initial_query, entity_type
128 )
129 all_candidates.extend(cross_candidates)
130 exploration_paths.append(
131 f"Cross-constraint search -> {len(cross_candidates)} candidates"
132 )
133 total_searched += 1
135 # Process final results
136 unique_candidates = self._deduplicate_candidates(all_candidates)
137 ranked_candidates = self._rank_by_constraint_alignment(
138 unique_candidates, constraints, initial_query
139 )
140 final_candidates = ranked_candidates[: self.max_candidates]
142 elapsed_time = time.time() - start_time
143 logger.info(
144 f"Constraint-guided exploration completed: {len(final_candidates)} candidates in {elapsed_time:.1f}s"
145 )
147 return ExplorationResult(
148 candidates=final_candidates,
149 total_searched=total_searched,
150 unique_candidates=len(unique_candidates),
151 exploration_paths=exploration_paths,
152 metadata={
153 "strategy": "constraint_guided",
154 "constraints_used": len(prioritized_constraints),
155 "early_validation": self.early_validation,
156 "entity_type": entity_type,
157 },
158 elapsed_time=elapsed_time,
159 strategy_used=ExplorationStrategy.CONSTRAINT_GUIDED,
160 )
162 def generate_exploration_queries(
163 self,
164 base_query: str,
165 found_candidates: List[Candidate],
166 constraints: Optional[List[Constraint]] = None,
167 ) -> List[str]:
168 """Generate constraint-guided exploration queries."""
169 if not constraints:
170 return [base_query]
172 queries = []
174 # Generate queries for each constraint
175 for constraint in constraints[:3]: # Limit to avoid too many queries
176 constraint_queries = self._generate_constraint_queries(
177 constraint, base_query
178 )
179 queries.extend(constraint_queries[:2]) # Top 2 per constraint
181 # Generate queries combining multiple constraints
182 if len(constraints) > 1:
183 combined_query = self._combine_constraints_query(
184 base_query, constraints[:2]
185 )
186 if combined_query:
187 queries.append(combined_query)
189 return queries
191 def _prioritize_constraints(
192 self, constraints: List[Constraint]
193 ) -> List[Constraint]:
194 """Prioritize constraints by weight and type."""
195 # Sort by weight (descending) and then by type priority
196 type_priority = {
197 ConstraintType.NAME_PATTERN: 1,
198 ConstraintType.PROPERTY: 2,
199 ConstraintType.EVENT: 3,
200 ConstraintType.LOCATION: 4,
201 ConstraintType.TEMPORAL: 5,
202 ConstraintType.STATISTIC: 6,
203 ConstraintType.COMPARISON: 7,
204 ConstraintType.EXISTENCE: 8,
205 }
207 return sorted(
208 constraints,
209 key=lambda c: (c.weight, type_priority.get(c.type, 9)),
210 reverse=True,
211 )
213 def _generate_constraint_queries(
214 self,
215 constraint: Constraint,
216 base_query: str,
217 entity_type: Optional[str] = None,
218 ) -> List[str]:
219 """Generate search queries specific to a constraint."""
220 queries = []
222 # Base constraint query
223 if entity_type:
224 queries.append(f"{entity_type} {constraint.value}")
225 else:
226 queries.append(f"{base_query} {constraint.value}")
228 # Constraint-type specific queries
229 if constraint.type == ConstraintType.NAME_PATTERN:
230 queries.extend(
231 self._name_pattern_queries(constraint, base_query, entity_type)
232 )
233 elif constraint.type == ConstraintType.PROPERTY:
234 queries.extend(
235 self._property_queries(constraint, base_query, entity_type)
236 )
237 elif constraint.type == ConstraintType.EVENT:
238 queries.extend(
239 self._event_queries(constraint, base_query, entity_type)
240 )
241 elif constraint.type == ConstraintType.LOCATION:
242 queries.extend(
243 self._location_queries(constraint, base_query, entity_type)
244 )
246 return queries
248 def _name_pattern_queries(
249 self,
250 constraint: Constraint,
251 base_query: str,
252 entity_type: Optional[str],
253 ) -> List[str]:
254 """Generate queries for name pattern constraints."""
255 queries = []
257 if "body part" in constraint.value.lower():
258 body_parts = [
259 "arm",
260 "leg",
261 "foot",
262 "hand",
263 "eye",
264 "ear",
265 "head",
266 "tooth",
267 "nose",
268 "heart",
269 ]
270 for part in body_parts[:3]: # Sample a few
271 if entity_type:
272 queries.append(f"{entity_type} {part}")
273 else:
274 queries.append(f"{base_query} {part} name")
276 return queries
278 def _property_queries(
279 self,
280 constraint: Constraint,
281 base_query: str,
282 entity_type: Optional[str],
283 ) -> List[str]:
284 """Generate queries for property constraints."""
285 base = entity_type or base_query
286 return [
287 f"{base} with {constraint.value}",
288 f"{base} that {constraint.value}",
289 f"{constraint.value} {base}",
290 ]
292 def _event_queries(
293 self,
294 constraint: Constraint,
295 base_query: str,
296 entity_type: Optional[str],
297 ) -> List[str]:
298 """Generate queries for event constraints."""
299 base = entity_type or base_query
300 return [
301 f"{base} {constraint.value} incident",
302 f"{base} {constraint.value} accident",
303 f"{constraint.value} at {base}",
304 ]
306 def _location_queries(
307 self,
308 constraint: Constraint,
309 base_query: str,
310 entity_type: Optional[str],
311 ) -> List[str]:
312 """Generate queries for location constraints."""
313 return [
314 f"{constraint.value} {base_query}",
315 f"{base_query} in {constraint.value}",
316 f"{constraint.value} locations",
317 ]
319 def _cross_constraint_exploration(
320 self,
321 constraints: List[Constraint],
322 base_query: str,
323 entity_type: Optional[str],
324 ) -> List[Candidate]:
325 """Explore candidates satisfying multiple constraints."""
326 if len(constraints) < 2:
327 return []
329 # Combine top 2 constraints
330 combined_query = self._combine_constraints_query(
331 base_query, constraints
332 )
334 if (
335 combined_query
336 and combined_query.lower() not in self.explored_queries
337 ):
338 results = self._execute_search(combined_query)
339 return self._extract_candidates_from_results(results, entity_type)
341 return []
343 def _combine_constraints_query(
344 self, base_query: str, constraints: List[Constraint]
345 ) -> Optional[str]:
346 """Combine multiple constraints into a single query."""
347 if len(constraints) < 2:
348 return None
350 constraint_values = [c.value for c in constraints[:2]]
351 return f"{base_query} {' AND '.join(constraint_values)}"
353 def _early_validate_candidates(
354 self, candidates: List[Candidate], constraint: Constraint
355 ) -> List[Candidate]:
356 """Perform early validation of candidates against constraint."""
357 if not candidates or constraint.type != ConstraintType.NAME_PATTERN:
358 return candidates # Only validate name patterns for now
360 validated = []
362 for candidate in candidates:
363 if self._quick_name_validation(candidate.name, constraint):
364 validated.append(candidate)
366 return validated
368 def _quick_name_validation(
369 self, candidate_name: str, constraint: Constraint
370 ) -> bool:
371 """Quick validation of candidate name against constraint."""
372 if "body part" in constraint.value.lower():
373 body_parts = [
374 "arm",
375 "leg",
376 "foot",
377 "hand",
378 "eye",
379 "ear",
380 "head",
381 "tooth",
382 "nose",
383 "heart",
384 ]
385 name_lower = candidate_name.lower()
386 return any(part in name_lower for part in body_parts)
388 return True # Default to accepting if can't validate
390 def _rank_by_constraint_alignment(
391 self,
392 candidates: List[Candidate],
393 constraints: List[Constraint],
394 base_query: str,
395 ) -> List[Candidate]:
396 """Rank candidates by alignment with constraints."""
397 for candidate in candidates:
398 # Simple scoring based on constraint alignment
399 score = 0.0
401 # Score based on name pattern constraints
402 for constraint in constraints:
403 if constraint.type == ConstraintType.NAME_PATTERN:
404 if self._quick_name_validation(candidate.name, constraint):
405 score += constraint.weight
407 candidate.constraint_alignment_score = score
409 # Sort by constraint alignment, then by relevance
410 ranked = self._rank_candidates_by_relevance(candidates, base_query)
411 return sorted(
412 ranked,
413 key=lambda c: getattr(c, "constraint_alignment_score", 0.0),
414 reverse=True,
415 )
417 def _basic_exploration(
418 self, initial_query: str, entity_type: Optional[str], start_time: float
419 ) -> ExplorationResult:
420 """Fallback basic exploration when no constraints provided."""
421 candidates = []
423 results = self._execute_search(initial_query)
424 candidates = self._extract_candidates_from_results(results, entity_type)
426 elapsed_time = time.time() - start_time
428 return ExplorationResult(
429 candidates=candidates[: self.max_candidates],
430 total_searched=1,
431 unique_candidates=len(candidates),
432 exploration_paths=[f"Basic search: {initial_query}"],
433 metadata={"strategy": "basic_fallback"},
434 elapsed_time=elapsed_time,
435 strategy_used=ExplorationStrategy.BREADTH_FIRST,
436 )