Coverage for src / local_deep_research / advanced_search_system / constraint_checking / dual_confidence_checker.py: 90%
116 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""
2Dual confidence constraint checker implementation.
4This implementation uses dual confidence scoring (positive/negative/uncertainty)
5to evaluate constraints and make rejection decisions.
6"""
8from typing import Dict, List, Tuple
10from loguru import logger
12from ..candidates.base_candidate import Candidate
13from ..constraints.base_constraint import Constraint
14from .base_constraint_checker import (
15 BaseConstraintChecker,
16 ConstraintCheckResult,
17)
18from .evidence_analyzer import ConstraintEvidence, EvidenceAnalyzer
21class DualConfidenceChecker(BaseConstraintChecker):
22 """
23 Constraint checker using dual confidence scoring.
25 This checker:
26 1. Analyzes evidence using positive/negative/uncertainty scores
27 2. Makes rejection decisions based on confidence thresholds
28 3. Provides detailed scoring breakdown
29 """
31 def __init__(
32 self,
33 *args,
34 negative_threshold: float = 0.25, # Reject if negative evidence > 25%
35 positive_threshold: float = 0.4, # Reject if positive evidence < 40%
36 uncertainty_penalty: float = 0.2,
37 negative_weight: float = 0.5,
38 uncertainty_threshold: float = 0.6, # Re-evaluate if uncertainty > 60%
39 max_reevaluations: int = 2, # Maximum re-evaluation rounds
40 **kwargs,
41 ):
42 """
43 Initialize dual confidence checker.
45 Args:
46 negative_threshold: Threshold for negative evidence rejection
47 positive_threshold: Minimum positive evidence required
48 uncertainty_penalty: Penalty for uncertain evidence
49 negative_weight: Weight for negative evidence in scoring
50 uncertainty_threshold: Re-evaluate if uncertainty exceeds this
51 max_reevaluations: Maximum number of re-evaluation rounds
52 """
53 super().__init__(*args, **kwargs)
55 self.negative_threshold = negative_threshold
56 self.positive_threshold = positive_threshold
57 self.uncertainty_penalty = uncertainty_penalty
58 self.negative_weight = negative_weight
59 self.uncertainty_threshold = uncertainty_threshold
60 self.max_reevaluations = max_reevaluations
62 # Initialize evidence analyzer
63 self.evidence_analyzer = EvidenceAnalyzer(self.model)
65 def check_candidate(
66 self,
67 candidate: Candidate,
68 constraints: List[Constraint],
69 original_query: str = None,
70 ) -> ConstraintCheckResult:
71 """Check candidate using dual confidence analysis with LLM pre-screening."""
72 logger.info(f"Checking candidate: {candidate.name} (dual confidence)")
74 # LLM PRE-SCREENING: Check all constraints in one call to save SearXNG capacity
75 pre_screen_result = self._llm_prescreen_candidate(
76 candidate, constraints, original_query
77 )
78 if pre_screen_result["should_reject"]: 78 ↛ 79line 78 didn't jump to line 79 because the condition on line 78 was never true
79 logger.info(
80 f"🚫 LLM pre-screen rejected {candidate.name}: {pre_screen_result['reason']}"
81 )
82 return ConstraintCheckResult(
83 candidate=candidate,
84 constraint_scores={},
85 should_reject=True,
86 rejection_reason=pre_screen_result["reason"],
87 total_score=0.0,
88 detailed_results=pre_screen_result["detailed_results"],
89 )
91 constraint_scores = {}
92 detailed_results = []
93 rejection_reason = None
94 should_reject = False
96 for constraint in constraints:
97 # Perform initial evaluation with re-evaluation for uncertain constraints
98 result = self._evaluate_constraint_with_reevaluation(
99 candidate, constraint
100 )
102 avg_positive = result["positive"]
103 avg_negative = result["negative"]
104 avg_uncertainty = result["uncertainty"]
105 score = result["score"]
106 reevaluation_count = result.get("reevaluation_count", 0)
108 # Check for rejection based on final results
109 reject, reason = self.should_reject_candidate_from_averages(
110 candidate, constraint, avg_positive, avg_negative
111 )
113 if reject and not should_reject: # Only record first rejection
114 should_reject = True
115 rejection_reason = reason
117 # Store results
118 constraint_scores[constraint.value] = {
119 "total": score,
120 "positive": avg_positive,
121 "negative": avg_negative,
122 "uncertainty": avg_uncertainty,
123 "weight": constraint.weight,
124 "reevaluation_count": reevaluation_count,
125 }
127 detailed_results.append(
128 {
129 "constraint": constraint.value,
130 "score": score,
131 "positive": avg_positive,
132 "negative": avg_negative,
133 "uncertainty": avg_uncertainty,
134 "weight": constraint.weight,
135 "type": constraint.type.value,
136 "reevaluation_count": reevaluation_count,
137 }
138 )
140 # Log detailed result with re-evaluation info
141 self._log_constraint_result_detailed(
142 candidate,
143 constraint,
144 score,
145 avg_positive,
146 avg_negative,
147 avg_uncertainty,
148 reevaluation_count,
149 )
151 # Calculate total score
152 if should_reject:
153 total_score = 0.0
154 else:
155 if detailed_results: 155 ↛ 156line 155 didn't jump to line 156 because the condition on line 155 was never true
156 weights = [r["weight"] for r in detailed_results]
157 scores = [r["score"] for r in detailed_results]
158 total_score = self._calculate_weighted_score(scores, weights)
159 else:
160 total_score = 0.0
162 logger.info(f"Final score for {candidate.name}: {total_score:.2%}")
164 return ConstraintCheckResult(
165 candidate=candidate,
166 total_score=total_score,
167 constraint_scores=constraint_scores,
168 should_reject=should_reject,
169 rejection_reason=rejection_reason,
170 detailed_results=detailed_results,
171 )
173 def _evaluate_constraint_with_reevaluation(
174 self, candidate: Candidate, constraint: Constraint
175 ) -> Dict:
176 """Evaluate constraint with potential re-evaluation for uncertain results."""
177 reevaluation_count = 0
178 evidence_list = []
180 while reevaluation_count <= self.max_reevaluations: 180 ↛ 253line 180 didn't jump to line 253 because the condition on line 180 was always true
181 # Gather evidence (fresh each time for re-evaluation)
182 evidence_list = self._gather_evidence_for_constraint(
183 candidate, constraint
184 )
186 if not evidence_list:
187 # No evidence found
188 return {
189 "positive": 0.0,
190 "negative": 0.0,
191 "uncertainty": 1.0,
192 "score": 0.5 - self.uncertainty_penalty,
193 "evidence_list": [],
194 "reevaluation_count": reevaluation_count,
195 }
197 # Analyze with dual confidence
198 dual_evidence = [
199 self.evidence_analyzer.analyze_evidence_dual_confidence(
200 e, constraint
201 )
202 for e in evidence_list
203 ]
205 # Calculate averages
206 avg_positive = sum(
207 e.positive_confidence for e in dual_evidence
208 ) / len(dual_evidence)
209 avg_negative = sum(
210 e.negative_confidence for e in dual_evidence
211 ) / len(dual_evidence)
212 avg_uncertainty = sum(e.uncertainty for e in dual_evidence) / len(
213 dual_evidence
214 )
216 # Calculate score
217 score = self.evidence_analyzer.evaluate_evidence_list(
218 evidence_list,
219 constraint,
220 self.uncertainty_penalty,
221 self.negative_weight,
222 )
224 # Check if we need re-evaluation
225 if ( 225 ↛ 230line 225 didn't jump to line 230 because the condition on line 225 was never true
226 reevaluation_count < self.max_reevaluations
227 and avg_uncertainty > self.uncertainty_threshold
228 and not self._should_early_reject(avg_positive, avg_negative)
229 ):
230 reevaluation_count += 1
231 logger.info(
232 f"🔄 Re-evaluating {candidate.name} | {constraint.value} "
233 f"(round {reevaluation_count}) - high uncertainty: {avg_uncertainty:.0%}"
234 )
235 continue
236 # Final result or early rejection
237 if reevaluation_count > 0: 237 ↛ 238line 237 didn't jump to line 238 because the condition on line 237 was never true
238 logger.info(
239 f"✅ Final evaluation for {candidate.name} | {constraint.value} "
240 f"after {reevaluation_count} re-evaluation(s)"
241 )
243 return {
244 "positive": avg_positive,
245 "negative": avg_negative,
246 "uncertainty": avg_uncertainty,
247 "score": score,
248 "evidence_list": evidence_list,
249 "reevaluation_count": reevaluation_count,
250 }
252 # Should not reach here, but fallback
253 return {
254 "positive": avg_positive,
255 "negative": avg_negative,
256 "uncertainty": avg_uncertainty,
257 "score": score,
258 "evidence_list": evidence_list,
259 "reevaluation_count": reevaluation_count,
260 }
262 def _should_early_reject(
263 self, avg_positive: float, avg_negative: float
264 ) -> bool:
265 """Check if candidate should be rejected early (before re-evaluation)."""
266 return (
267 avg_negative > self.negative_threshold
268 or avg_positive < self.positive_threshold
269 )
271 def should_reject_candidate_from_averages(
272 self,
273 candidate: Candidate,
274 constraint: Constraint,
275 avg_positive: float,
276 avg_negative: float,
277 ) -> Tuple[bool, str]:
278 """Determine rejection based on average confidence scores."""
279 # PRIMARY REJECTION: High negative evidence
280 if avg_negative > self.negative_threshold:
281 reason = f"High negative evidence ({avg_negative:.0%}) for constraint '{constraint.value}'"
282 logger.info(f"❌ REJECTION: {candidate.name} - {reason}")
283 return True, reason
285 # SECONDARY REJECTION: Low positive evidence
286 if avg_positive < self.positive_threshold:
287 reason = f"Insufficient positive evidence ({avg_positive:.0%}) for constraint '{constraint.value}'"
288 logger.info(f"❌ REJECTION: {candidate.name} - {reason}")
289 return True, reason
291 return False, ""
293 def should_reject_candidate(
294 self,
295 candidate: Candidate,
296 constraint: Constraint,
297 dual_evidence: List[ConstraintEvidence],
298 ) -> Tuple[bool, str]:
299 """Determine rejection based on dual confidence scores."""
300 if not dual_evidence:
301 return False, ""
303 # Calculate averages
304 avg_positive = sum(e.positive_confidence for e in dual_evidence) / len(
305 dual_evidence
306 )
307 avg_negative = sum(e.negative_confidence for e in dual_evidence) / len(
308 dual_evidence
309 )
311 # PRIMARY REJECTION: High negative evidence
312 if avg_negative > self.negative_threshold:
313 reason = f"High negative evidence ({avg_negative:.0%}) for constraint '{constraint.value}'"
314 logger.info(f"❌ REJECTION: {candidate.name} - {reason}")
315 return True, reason
317 # SECONDARY REJECTION: Low positive evidence
318 if avg_positive < self.positive_threshold:
319 reason = f"Insufficient positive evidence ({avg_positive:.0%}) for constraint '{constraint.value}'"
320 logger.info(f"❌ REJECTION: {candidate.name} - {reason}")
321 return True, reason
323 return False, ""
325 def _log_constraint_result_detailed(
326 self,
327 candidate,
328 constraint,
329 score,
330 positive,
331 negative,
332 uncertainty,
333 reevaluation_count=0,
334 ):
335 """Log detailed constraint result."""
336 symbol = "✓" if score >= 0.8 else "○" if score >= 0.5 else "✗"
338 # Add re-evaluation indicator
339 reeval_indicator = (
340 f" [R{reevaluation_count}]" if reevaluation_count > 0 else ""
341 )
343 logger.info(
344 f"{symbol} {candidate.name} | {constraint.value}: {int(score * 100)}% "
345 f"(+{int(positive * 100)}% -{int(negative * 100)}% ?{int(uncertainty * 100)}%){reeval_indicator}"
346 )
348 def _llm_prescreen_candidate(
349 self, candidate, constraints, original_query=None
350 ):
351 """Simple quality check for answer candidates."""
353 if not original_query:
354 return {
355 "should_reject": False,
356 "reason": "No original query provided",
357 "detailed_results": [],
358 }
360 prompt = f"""Question: {original_query}
361Answer: {candidate.name}
363Is this a good answer to the question? Rate 0-100 where:
364- 90-100: Excellent direct answer
365- 70-89: Good answer
366- 50-69: Partial answer
367- 30-49: Weak answer
368- 0-29: Poor/wrong answer
370Just give the number:"""
372 try:
373 response = self.model.invoke(prompt).content
375 # Parse confidence score
376 import re
378 confidence_match = re.search(r"(\d{1,3})", response.strip())
380 if confidence_match:
381 quality_score = int(confidence_match.group(1))
383 # Accept good answers (50+ out of 100)
384 if quality_score >= 50:
385 return {
386 "should_reject": False,
387 "reason": f"Good answer quality: {quality_score}%",
388 "detailed_results": [
389 {
390 "constraint": "answer_quality",
391 "positive_confidence": quality_score / 100.0,
392 "source": "answer_quality_check",
393 }
394 ],
395 }
396 return {
397 "should_reject": True,
398 "reason": f"Poor answer quality: {quality_score}%",
399 "detailed_results": [
400 {
401 "constraint": "answer_quality",
402 "negative_confidence": (100 - quality_score)
403 / 100.0,
404 "source": "answer_quality_check",
405 }
406 ],
407 }
409 # Parsing failed - accept by default
410 return {
411 "should_reject": False,
412 "reason": "Could not parse quality score - accepting",
413 "detailed_results": [],
414 }
416 except Exception:
417 logger.warning(
418 f"Fast LLM pre-screening failed for {candidate.name}"
419 )
420 return {
421 "should_reject": False,
422 "reason": "",
423 "detailed_results": [],
424 }