Coverage for src / local_deep_research / advanced_search_system / candidate_exploration / constraint_guided_explorer.py: 11%

140 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Constraint-guided candidate explorer implementation. 

3 

4This explorer uses constraints to guide the search process, prioritizing 

5searches that are likely to find candidates satisfying the constraints. 

6""" 

7 

8import time 

9from typing import List, Optional 

10 

11from loguru import logger 

12 

13from ..candidates.base_candidate import Candidate 

14from ..constraints.base_constraint import Constraint, ConstraintType 

15from .base_explorer import ( 

16 BaseCandidateExplorer, 

17 ExplorationResult, 

18 ExplorationStrategy, 

19) 

20 

21 

22class ConstraintGuidedExplorer(BaseCandidateExplorer): 

23 """ 

24 Constraint-guided candidate explorer. 

25 

26 This explorer: 

27 1. Prioritizes searches based on constraint importance 

28 2. Uses constraint-specific search strategies 

29 3. Validates candidates against constraints early 

30 4. Focuses on constraint satisfaction over quantity 

31 """ 

32 

33 def __init__( 

34 self, 

35 *args, 

36 constraint_weight_threshold: float = 0.7, # Focus on constraints above this weight 

37 early_validation: bool = True, # Validate candidates during search 

38 **kwargs, 

39 ): 

40 """ 

41 Initialize constraint-guided explorer. 

42 

43 Args: 

44 constraint_weight_threshold: Focus on constraints above this weight 

45 early_validation: Whether to validate candidates during exploration 

46 """ 

47 super().__init__(*args, **kwargs) 

48 self.constraint_weight_threshold = constraint_weight_threshold 

49 self.early_validation = early_validation 

50 

51 def explore( 

52 self, 

53 initial_query: str, 

54 constraints: Optional[List[Constraint]] = None, 

55 entity_type: Optional[str] = None, 

56 ) -> ExplorationResult: 

57 """Explore candidates using constraint-guided strategy.""" 

58 start_time = time.time() 

59 logger.info( 

60 f"Starting constraint-guided exploration for: {initial_query}" 

61 ) 

62 

63 if not constraints: 

64 logger.warning( 

65 "No constraints provided - falling back to basic search" 

66 ) 

67 return self._basic_exploration( 

68 initial_query, entity_type, start_time 

69 ) 

70 

71 all_candidates = [] 

72 exploration_paths = [] 

73 total_searched = 0 

74 

75 # Prioritize constraints by weight and type 

76 prioritized_constraints = self._prioritize_constraints(constraints) 

77 

78 # Search for each constraint 

79 for i, constraint in enumerate(prioritized_constraints): 

80 if not self._should_continue_exploration( 

81 start_time, len(all_candidates) 

82 ): 

83 break 

84 

85 logger.info( 

86 f"Exploring constraint {i + 1}/{len(prioritized_constraints)}: {constraint.value}" 

87 ) 

88 

89 # Generate constraint-specific queries 

90 constraint_queries = self._generate_constraint_queries( 

91 constraint, initial_query, entity_type 

92 ) 

93 

94 constraint_candidates = [] 

95 

96 for query in constraint_queries[:3]: # Limit queries per constraint 

97 if query.lower() in self.explored_queries: 

98 continue 

99 

100 results = self._execute_search(query) 

101 candidates = self._extract_candidates_from_results( 

102 results, entity_type 

103 ) 

104 

105 # Early validation if enabled 

106 if self.early_validation: 

107 validated_candidates = self._early_validate_candidates( 

108 candidates, constraint 

109 ) 

110 constraint_candidates.extend(validated_candidates) 

111 else: 

112 constraint_candidates.extend(candidates) 

113 

114 total_searched += 1 

115 exploration_paths.append( 

116 f"Constraint '{constraint.value}': {query} -> {len(candidates)} candidates" 

117 ) 

118 

119 all_candidates.extend(constraint_candidates) 

120 logger.info( 

121 f"Found {len(constraint_candidates)} candidates for constraint: {constraint.value}" 

122 ) 

123 

124 # Cross-constraint exploration 

125 if len(prioritized_constraints) > 1: 

126 cross_candidates = self._cross_constraint_exploration( 

127 prioritized_constraints[:2], initial_query, entity_type 

128 ) 

129 all_candidates.extend(cross_candidates) 

130 exploration_paths.append( 

131 f"Cross-constraint search -> {len(cross_candidates)} candidates" 

132 ) 

133 total_searched += 1 

134 

135 # Process final results 

136 unique_candidates = self._deduplicate_candidates(all_candidates) 

137 ranked_candidates = self._rank_by_constraint_alignment( 

138 unique_candidates, constraints, initial_query 

139 ) 

140 final_candidates = ranked_candidates[: self.max_candidates] 

141 

142 elapsed_time = time.time() - start_time 

143 logger.info( 

144 f"Constraint-guided exploration completed: {len(final_candidates)} candidates in {elapsed_time:.1f}s" 

145 ) 

146 

147 return ExplorationResult( 

148 candidates=final_candidates, 

149 total_searched=total_searched, 

150 unique_candidates=len(unique_candidates), 

151 exploration_paths=exploration_paths, 

152 metadata={ 

153 "strategy": "constraint_guided", 

154 "constraints_used": len(prioritized_constraints), 

155 "early_validation": self.early_validation, 

156 "entity_type": entity_type, 

157 }, 

158 elapsed_time=elapsed_time, 

159 strategy_used=ExplorationStrategy.CONSTRAINT_GUIDED, 

160 ) 

161 

162 def generate_exploration_queries( 

163 self, 

164 base_query: str, 

165 found_candidates: List[Candidate], 

166 constraints: Optional[List[Constraint]] = None, 

167 ) -> List[str]: 

168 """Generate constraint-guided exploration queries.""" 

169 if not constraints: 

170 return [base_query] 

171 

172 queries = [] 

173 

174 # Generate queries for each constraint 

175 for constraint in constraints[:3]: # Limit to avoid too many queries 

176 constraint_queries = self._generate_constraint_queries( 

177 constraint, base_query 

178 ) 

179 queries.extend(constraint_queries[:2]) # Top 2 per constraint 

180 

181 # Generate queries combining multiple constraints 

182 if len(constraints) > 1: 

183 combined_query = self._combine_constraints_query( 

184 base_query, constraints[:2] 

185 ) 

186 if combined_query: 

187 queries.append(combined_query) 

188 

189 return queries 

190 

191 def _prioritize_constraints( 

192 self, constraints: List[Constraint] 

193 ) -> List[Constraint]: 

194 """Prioritize constraints by weight and type.""" 

195 # Sort by weight (descending) and then by type priority 

196 type_priority = { 

197 ConstraintType.NAME_PATTERN: 1, 

198 ConstraintType.PROPERTY: 2, 

199 ConstraintType.EVENT: 3, 

200 ConstraintType.LOCATION: 4, 

201 ConstraintType.TEMPORAL: 5, 

202 ConstraintType.STATISTIC: 6, 

203 ConstraintType.COMPARISON: 7, 

204 ConstraintType.EXISTENCE: 8, 

205 } 

206 

207 return sorted( 

208 constraints, 

209 key=lambda c: (c.weight, type_priority.get(c.type, 9)), 

210 reverse=True, 

211 ) 

212 

213 def _generate_constraint_queries( 

214 self, 

215 constraint: Constraint, 

216 base_query: str, 

217 entity_type: Optional[str] = None, 

218 ) -> List[str]: 

219 """Generate search queries specific to a constraint.""" 

220 queries = [] 

221 

222 # Base constraint query 

223 if entity_type: 

224 queries.append(f"{entity_type} {constraint.value}") 

225 else: 

226 queries.append(f"{base_query} {constraint.value}") 

227 

228 # Constraint-type specific queries 

229 if constraint.type == ConstraintType.NAME_PATTERN: 

230 queries.extend( 

231 self._name_pattern_queries(constraint, base_query, entity_type) 

232 ) 

233 elif constraint.type == ConstraintType.PROPERTY: 

234 queries.extend( 

235 self._property_queries(constraint, base_query, entity_type) 

236 ) 

237 elif constraint.type == ConstraintType.EVENT: 

238 queries.extend( 

239 self._event_queries(constraint, base_query, entity_type) 

240 ) 

241 elif constraint.type == ConstraintType.LOCATION: 

242 queries.extend( 

243 self._location_queries(constraint, base_query, entity_type) 

244 ) 

245 

246 return queries 

247 

248 def _name_pattern_queries( 

249 self, 

250 constraint: Constraint, 

251 base_query: str, 

252 entity_type: Optional[str], 

253 ) -> List[str]: 

254 """Generate queries for name pattern constraints.""" 

255 queries = [] 

256 

257 if "body part" in constraint.value.lower(): 

258 body_parts = [ 

259 "arm", 

260 "leg", 

261 "foot", 

262 "hand", 

263 "eye", 

264 "ear", 

265 "head", 

266 "tooth", 

267 "nose", 

268 "heart", 

269 ] 

270 for part in body_parts[:3]: # Sample a few 

271 if entity_type: 

272 queries.append(f"{entity_type} {part}") 

273 else: 

274 queries.append(f"{base_query} {part} name") 

275 

276 return queries 

277 

278 def _property_queries( 

279 self, 

280 constraint: Constraint, 

281 base_query: str, 

282 entity_type: Optional[str], 

283 ) -> List[str]: 

284 """Generate queries for property constraints.""" 

285 base = entity_type or base_query 

286 return [ 

287 f"{base} with {constraint.value}", 

288 f"{base} that {constraint.value}", 

289 f"{constraint.value} {base}", 

290 ] 

291 

292 def _event_queries( 

293 self, 

294 constraint: Constraint, 

295 base_query: str, 

296 entity_type: Optional[str], 

297 ) -> List[str]: 

298 """Generate queries for event constraints.""" 

299 base = entity_type or base_query 

300 return [ 

301 f"{base} {constraint.value} incident", 

302 f"{base} {constraint.value} accident", 

303 f"{constraint.value} at {base}", 

304 ] 

305 

306 def _location_queries( 

307 self, 

308 constraint: Constraint, 

309 base_query: str, 

310 entity_type: Optional[str], 

311 ) -> List[str]: 

312 """Generate queries for location constraints.""" 

313 return [ 

314 f"{constraint.value} {base_query}", 

315 f"{base_query} in {constraint.value}", 

316 f"{constraint.value} locations", 

317 ] 

318 

319 def _cross_constraint_exploration( 

320 self, 

321 constraints: List[Constraint], 

322 base_query: str, 

323 entity_type: Optional[str], 

324 ) -> List[Candidate]: 

325 """Explore candidates satisfying multiple constraints.""" 

326 if len(constraints) < 2: 

327 return [] 

328 

329 # Combine top 2 constraints 

330 combined_query = self._combine_constraints_query( 

331 base_query, constraints 

332 ) 

333 

334 if ( 

335 combined_query 

336 and combined_query.lower() not in self.explored_queries 

337 ): 

338 results = self._execute_search(combined_query) 

339 return self._extract_candidates_from_results(results, entity_type) 

340 

341 return [] 

342 

343 def _combine_constraints_query( 

344 self, base_query: str, constraints: List[Constraint] 

345 ) -> Optional[str]: 

346 """Combine multiple constraints into a single query.""" 

347 if len(constraints) < 2: 

348 return None 

349 

350 constraint_values = [c.value for c in constraints[:2]] 

351 return f"{base_query} {' AND '.join(constraint_values)}" 

352 

353 def _early_validate_candidates( 

354 self, candidates: List[Candidate], constraint: Constraint 

355 ) -> List[Candidate]: 

356 """Perform early validation of candidates against constraint.""" 

357 if not candidates or constraint.type != ConstraintType.NAME_PATTERN: 

358 return candidates # Only validate name patterns for now 

359 

360 validated = [] 

361 

362 for candidate in candidates: 

363 if self._quick_name_validation(candidate.name, constraint): 

364 validated.append(candidate) 

365 

366 return validated 

367 

368 def _quick_name_validation( 

369 self, candidate_name: str, constraint: Constraint 

370 ) -> bool: 

371 """Quick validation of candidate name against constraint.""" 

372 if "body part" in constraint.value.lower(): 

373 body_parts = [ 

374 "arm", 

375 "leg", 

376 "foot", 

377 "hand", 

378 "eye", 

379 "ear", 

380 "head", 

381 "tooth", 

382 "nose", 

383 "heart", 

384 ] 

385 name_lower = candidate_name.lower() 

386 return any(part in name_lower for part in body_parts) 

387 

388 return True # Default to accepting if can't validate 

389 

390 def _rank_by_constraint_alignment( 

391 self, 

392 candidates: List[Candidate], 

393 constraints: List[Constraint], 

394 base_query: str, 

395 ) -> List[Candidate]: 

396 """Rank candidates by alignment with constraints.""" 

397 for candidate in candidates: 

398 # Simple scoring based on constraint alignment 

399 score = 0.0 

400 

401 # Score based on name pattern constraints 

402 for constraint in constraints: 

403 if constraint.type == ConstraintType.NAME_PATTERN: 

404 if self._quick_name_validation(candidate.name, constraint): 

405 score += constraint.weight 

406 

407 candidate.constraint_alignment_score = score 

408 

409 # Sort by constraint alignment, then by relevance 

410 ranked = self._rank_candidates_by_relevance(candidates, base_query) 

411 return sorted( 

412 ranked, 

413 key=lambda c: getattr(c, "constraint_alignment_score", 0.0), 

414 reverse=True, 

415 ) 

416 

417 def _basic_exploration( 

418 self, initial_query: str, entity_type: Optional[str], start_time: float 

419 ) -> ExplorationResult: 

420 """Fallback basic exploration when no constraints provided.""" 

421 candidates = [] 

422 

423 results = self._execute_search(initial_query) 

424 candidates = self._extract_candidates_from_results(results, entity_type) 

425 

426 elapsed_time = time.time() - start_time 

427 

428 return ExplorationResult( 

429 candidates=candidates[: self.max_candidates], 

430 total_searched=1, 

431 unique_candidates=len(candidates), 

432 exploration_paths=[f"Basic search: {initial_query}"], 

433 metadata={"strategy": "basic_fallback"}, 

434 elapsed_time=elapsed_time, 

435 strategy_used=ExplorationStrategy.BREADTH_FIRST, 

436 )