Coverage for src / local_deep_research / advanced_search_system / constraint_checking / dual_confidence_checker.py: 10%

116 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Dual confidence constraint checker implementation. 

3 

4This implementation uses dual confidence scoring (positive/negative/uncertainty) 

5to evaluate constraints and make rejection decisions. 

6""" 

7 

8from typing import Dict, List, Tuple 

9 

10from loguru import logger 

11 

12from ..candidates.base_candidate import Candidate 

13from ..constraints.base_constraint import Constraint 

14from .base_constraint_checker import ( 

15 BaseConstraintChecker, 

16 ConstraintCheckResult, 

17) 

18from .evidence_analyzer import ConstraintEvidence, EvidenceAnalyzer 

19 

20 

21class DualConfidenceChecker(BaseConstraintChecker): 

22 """ 

23 Constraint checker using dual confidence scoring. 

24 

25 This checker: 

26 1. Analyzes evidence using positive/negative/uncertainty scores 

27 2. Makes rejection decisions based on confidence thresholds 

28 3. Provides detailed scoring breakdown 

29 """ 

30 

31 def __init__( 

32 self, 

33 *args, 

34 negative_threshold: float = 0.25, # Reject if negative evidence > 25% 

35 positive_threshold: float = 0.4, # Reject if positive evidence < 40% 

36 uncertainty_penalty: float = 0.2, 

37 negative_weight: float = 0.5, 

38 uncertainty_threshold: float = 0.6, # Re-evaluate if uncertainty > 60% 

39 max_reevaluations: int = 2, # Maximum re-evaluation rounds 

40 **kwargs, 

41 ): 

42 """ 

43 Initialize dual confidence checker. 

44 

45 Args: 

46 negative_threshold: Threshold for negative evidence rejection 

47 positive_threshold: Minimum positive evidence required 

48 uncertainty_penalty: Penalty for uncertain evidence 

49 negative_weight: Weight for negative evidence in scoring 

50 uncertainty_threshold: Re-evaluate if uncertainty exceeds this 

51 max_reevaluations: Maximum number of re-evaluation rounds 

52 """ 

53 super().__init__(*args, **kwargs) 

54 

55 self.negative_threshold = negative_threshold 

56 self.positive_threshold = positive_threshold 

57 self.uncertainty_penalty = uncertainty_penalty 

58 self.negative_weight = negative_weight 

59 self.uncertainty_threshold = uncertainty_threshold 

60 self.max_reevaluations = max_reevaluations 

61 

62 # Initialize evidence analyzer 

63 self.evidence_analyzer = EvidenceAnalyzer(self.model) 

64 

65 def check_candidate( 

66 self, 

67 candidate: Candidate, 

68 constraints: List[Constraint], 

69 original_query: str = None, 

70 ) -> ConstraintCheckResult: 

71 """Check candidate using dual confidence analysis with LLM pre-screening.""" 

72 logger.info(f"Checking candidate: {candidate.name} (dual confidence)") 

73 

74 # LLM PRE-SCREENING: Check all constraints in one call to save SearXNG capacity 

75 pre_screen_result = self._llm_prescreen_candidate( 

76 candidate, constraints, original_query 

77 ) 

78 if pre_screen_result["should_reject"]: 

79 logger.info( 

80 f"🚫 LLM pre-screen rejected {candidate.name}: {pre_screen_result['reason']}" 

81 ) 

82 return ConstraintCheckResult( 

83 should_reject=True, 

84 rejection_reason=pre_screen_result["reason"], 

85 total_score=0.0, 

86 detailed_results=pre_screen_result["detailed_results"], 

87 ) 

88 

89 constraint_scores = {} 

90 detailed_results = [] 

91 rejection_reason = None 

92 should_reject = False 

93 

94 for constraint in constraints: 

95 # Perform initial evaluation with re-evaluation for uncertain constraints 

96 result = self._evaluate_constraint_with_reevaluation( 

97 candidate, constraint 

98 ) 

99 

100 avg_positive = result["positive"] 

101 avg_negative = result["negative"] 

102 avg_uncertainty = result["uncertainty"] 

103 score = result["score"] 

104 reevaluation_count = result.get("reevaluation_count", 0) 

105 

106 # Check for rejection based on final results 

107 reject, reason = self.should_reject_candidate_from_averages( 

108 candidate, constraint, avg_positive, avg_negative 

109 ) 

110 

111 if reject and not should_reject: # Only record first rejection 

112 should_reject = True 

113 rejection_reason = reason 

114 

115 # Store results 

116 constraint_scores[constraint.value] = { 

117 "total": score, 

118 "positive": avg_positive, 

119 "negative": avg_negative, 

120 "uncertainty": avg_uncertainty, 

121 "weight": constraint.weight, 

122 "reevaluation_count": reevaluation_count, 

123 } 

124 

125 detailed_results.append( 

126 { 

127 "constraint": constraint.value, 

128 "score": score, 

129 "positive": avg_positive, 

130 "negative": avg_negative, 

131 "uncertainty": avg_uncertainty, 

132 "weight": constraint.weight, 

133 "type": constraint.type.value, 

134 "reevaluation_count": reevaluation_count, 

135 } 

136 ) 

137 

138 # Log detailed result with re-evaluation info 

139 self._log_constraint_result_detailed( 

140 candidate, 

141 constraint, 

142 score, 

143 avg_positive, 

144 avg_negative, 

145 avg_uncertainty, 

146 reevaluation_count, 

147 ) 

148 

149 # Calculate total score 

150 if should_reject: 

151 total_score = 0.0 

152 else: 

153 if detailed_results: 

154 weights = [r["weight"] for r in detailed_results] 

155 scores = [r["score"] for r in detailed_results] 

156 total_score = self._calculate_weighted_score(scores, weights) 

157 else: 

158 total_score = 0.0 

159 

160 logger.info(f"Final score for {candidate.name}: {total_score:.2%}") 

161 

162 return ConstraintCheckResult( 

163 candidate=candidate, 

164 total_score=total_score, 

165 constraint_scores=constraint_scores, 

166 should_reject=should_reject, 

167 rejection_reason=rejection_reason, 

168 detailed_results=detailed_results, 

169 ) 

170 

171 def _evaluate_constraint_with_reevaluation( 

172 self, candidate: Candidate, constraint: Constraint 

173 ) -> Dict: 

174 """Evaluate constraint with potential re-evaluation for uncertain results.""" 

175 reevaluation_count = 0 

176 evidence_list = [] 

177 

178 while reevaluation_count <= self.max_reevaluations: 

179 # Gather evidence (fresh each time for re-evaluation) 

180 evidence_list = self._gather_evidence_for_constraint( 

181 candidate, constraint 

182 ) 

183 

184 if not evidence_list: 

185 # No evidence found 

186 return { 

187 "positive": 0.0, 

188 "negative": 0.0, 

189 "uncertainty": 1.0, 

190 "score": 0.5 - self.uncertainty_penalty, 

191 "evidence_list": [], 

192 "reevaluation_count": reevaluation_count, 

193 } 

194 

195 # Analyze with dual confidence 

196 dual_evidence = [ 

197 self.evidence_analyzer.analyze_evidence_dual_confidence( 

198 e, constraint 

199 ) 

200 for e in evidence_list 

201 ] 

202 

203 # Calculate averages 

204 avg_positive = sum( 

205 e.positive_confidence for e in dual_evidence 

206 ) / len(dual_evidence) 

207 avg_negative = sum( 

208 e.negative_confidence for e in dual_evidence 

209 ) / len(dual_evidence) 

210 avg_uncertainty = sum(e.uncertainty for e in dual_evidence) / len( 

211 dual_evidence 

212 ) 

213 

214 # Calculate score 

215 score = self.evidence_analyzer.evaluate_evidence_list( 

216 evidence_list, 

217 constraint, 

218 self.uncertainty_penalty, 

219 self.negative_weight, 

220 ) 

221 

222 # Check if we need re-evaluation 

223 if ( 

224 reevaluation_count < self.max_reevaluations 

225 and avg_uncertainty > self.uncertainty_threshold 

226 and not self._should_early_reject(avg_positive, avg_negative) 

227 ): 

228 reevaluation_count += 1 

229 logger.info( 

230 f"🔄 Re-evaluating {candidate.name} | {constraint.value} " 

231 f"(round {reevaluation_count}) - high uncertainty: {avg_uncertainty:.0%}" 

232 ) 

233 continue 

234 else: 

235 # Final result or early rejection 

236 if reevaluation_count > 0: 

237 logger.info( 

238 f"✅ Final evaluation for {candidate.name} | {constraint.value} " 

239 f"after {reevaluation_count} re-evaluation(s)" 

240 ) 

241 

242 return { 

243 "positive": avg_positive, 

244 "negative": avg_negative, 

245 "uncertainty": avg_uncertainty, 

246 "score": score, 

247 "evidence_list": evidence_list, 

248 "reevaluation_count": reevaluation_count, 

249 } 

250 

251 # Should not reach here, but fallback 

252 return { 

253 "positive": avg_positive, 

254 "negative": avg_negative, 

255 "uncertainty": avg_uncertainty, 

256 "score": score, 

257 "evidence_list": evidence_list, 

258 "reevaluation_count": reevaluation_count, 

259 } 

260 

261 def _should_early_reject( 

262 self, avg_positive: float, avg_negative: float 

263 ) -> bool: 

264 """Check if candidate should be rejected early (before re-evaluation).""" 

265 return ( 

266 avg_negative > self.negative_threshold 

267 or avg_positive < self.positive_threshold 

268 ) 

269 

270 def should_reject_candidate_from_averages( 

271 self, 

272 candidate: Candidate, 

273 constraint: Constraint, 

274 avg_positive: float, 

275 avg_negative: float, 

276 ) -> Tuple[bool, str]: 

277 """Determine rejection based on average confidence scores.""" 

278 # PRIMARY REJECTION: High negative evidence 

279 if avg_negative > self.negative_threshold: 

280 reason = f"High negative evidence ({avg_negative:.0%}) for constraint '{constraint.value}'" 

281 logger.info(f"❌ REJECTION: {candidate.name} - {reason}") 

282 return True, reason 

283 

284 # SECONDARY REJECTION: Low positive evidence 

285 if avg_positive < self.positive_threshold: 

286 reason = f"Insufficient positive evidence ({avg_positive:.0%}) for constraint '{constraint.value}'" 

287 logger.info(f"❌ REJECTION: {candidate.name} - {reason}") 

288 return True, reason 

289 

290 return False, "" 

291 

292 def should_reject_candidate( 

293 self, 

294 candidate: Candidate, 

295 constraint: Constraint, 

296 dual_evidence: List[ConstraintEvidence], 

297 ) -> Tuple[bool, str]: 

298 """Determine rejection based on dual confidence scores.""" 

299 if not dual_evidence: 

300 return False, "" 

301 

302 # Calculate averages 

303 avg_positive = sum(e.positive_confidence for e in dual_evidence) / len( 

304 dual_evidence 

305 ) 

306 avg_negative = sum(e.negative_confidence for e in dual_evidence) / len( 

307 dual_evidence 

308 ) 

309 

310 # PRIMARY REJECTION: High negative evidence 

311 if avg_negative > self.negative_threshold: 

312 reason = f"High negative evidence ({avg_negative:.0%}) for constraint '{constraint.value}'" 

313 logger.info(f"❌ REJECTION: {candidate.name} - {reason}") 

314 return True, reason 

315 

316 # SECONDARY REJECTION: Low positive evidence 

317 if avg_positive < self.positive_threshold: 

318 reason = f"Insufficient positive evidence ({avg_positive:.0%}) for constraint '{constraint.value}'" 

319 logger.info(f"❌ REJECTION: {candidate.name} - {reason}") 

320 return True, reason 

321 

322 return False, "" 

323 

324 def _log_constraint_result_detailed( 

325 self, 

326 candidate, 

327 constraint, 

328 score, 

329 positive, 

330 negative, 

331 uncertainty, 

332 reevaluation_count=0, 

333 ): 

334 """Log detailed constraint result.""" 

335 symbol = "✓" if score >= 0.8 else "○" if score >= 0.5 else "✗" 

336 

337 # Add re-evaluation indicator 

338 reeval_indicator = ( 

339 f" [R{reevaluation_count}]" if reevaluation_count > 0 else "" 

340 ) 

341 

342 logger.info( 

343 f"{symbol} {candidate.name} | {constraint.value}: {int(score * 100)}% " 

344 f"(+{int(positive * 100)}% -{int(negative * 100)}% ?{int(uncertainty * 100)}%){reeval_indicator}" 

345 ) 

346 

347 def _llm_prescreen_candidate( 

348 self, candidate, constraints, original_query=None 

349 ): 

350 """Simple quality check for answer candidates.""" 

351 

352 if not original_query: 

353 return { 

354 "should_reject": False, 

355 "reason": "No original query provided", 

356 "detailed_results": [], 

357 } 

358 

359 prompt = f"""Question: {original_query} 

360Answer: {candidate.name} 

361 

362Is this a good answer to the question? Rate 0-100 where: 

363- 90-100: Excellent direct answer 

364- 70-89: Good answer 

365- 50-69: Partial answer 

366- 30-49: Weak answer 

367- 0-29: Poor/wrong answer 

368 

369Just give the number:""" 

370 

371 try: 

372 response = self.model.generate(prompt) 

373 

374 # Parse confidence score 

375 import re 

376 

377 confidence_match = re.search(r"(\d{1,3})", response.strip()) 

378 

379 if confidence_match: 

380 quality_score = int(confidence_match.group(1)) 

381 

382 # Accept good answers (50+ out of 100) 

383 if quality_score >= 50: 

384 return { 

385 "should_reject": False, 

386 "reason": f"Good answer quality: {quality_score}%", 

387 "detailed_results": [ 

388 { 

389 "constraint": "answer_quality", 

390 "positive_confidence": quality_score / 100.0, 

391 "source": "answer_quality_check", 

392 } 

393 ], 

394 } 

395 else: 

396 return { 

397 "should_reject": True, 

398 "reason": f"Poor answer quality: {quality_score}%", 

399 "detailed_results": [ 

400 { 

401 "constraint": "answer_quality", 

402 "negative_confidence": (100 - quality_score) 

403 / 100.0, 

404 "source": "answer_quality_check", 

405 } 

406 ], 

407 } 

408 

409 # Parsing failed - accept by default 

410 return { 

411 "should_reject": False, 

412 "reason": "Could not parse quality score - accepting", 

413 "detailed_results": [], 

414 } 

415 

416 except Exception as e: 

417 logger.warning( 

418 f"Fast LLM pre-screening failed for {candidate.name}: {e}" 

419 ) 

420 return { 

421 "should_reject": False, 

422 "reason": "", 

423 "detailed_results": [], 

424 }