Coverage for src / local_deep_research / advanced_search_system / constraint_checking / evidence_analyzer.py: 59%

61 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Evidence analysis for constraint checking. 

3 

4This module provides dual confidence evidence analysis that separates 

5positive evidence, negative evidence, and uncertainty. 

6""" 

7 

8import re 

9from dataclasses import dataclass 

10from typing import Dict, List 

11 

12from langchain_core.language_models import BaseChatModel 

13from loguru import logger 

14 

15from ..constraints.base_constraint import Constraint 

16 

17 

18@dataclass 

19class ConstraintEvidence: 

20 """Evidence for a constraint with dual confidence scores.""" 

21 

22 positive_confidence: float # How sure we are the constraint IS satisfied 

23 negative_confidence: ( 

24 float # How sure we are the constraint is NOT satisfied 

25 ) 

26 uncertainty: float # How uncertain we are (neither positive nor negative) 

27 evidence_text: str 

28 source: str 

29 

30 

31class EvidenceAnalyzer: 

32 """ 

33 Analyzes evidence using dual confidence scoring. 

34 

35 This approach separates: 

36 - Positive confidence: Evidence that constraint IS satisfied 

37 - Negative confidence: Evidence that constraint is NOT satisfied 

38 - Uncertainty: Lack of clear evidence either way 

39 """ 

40 

41 def __init__(self, model: BaseChatModel): 

42 """Initialize the evidence analyzer.""" 

43 self.model = model 

44 

45 def analyze_evidence_dual_confidence( 

46 self, evidence: Dict, constraint: Constraint 

47 ) -> ConstraintEvidence: 

48 """Analyze evidence to extract dual confidence scores.""" 

49 text = evidence.get("text", "") 

50 

51 # Use LLM to analyze evidence with dual confidence 

52 prompt = f""" 

53Analyze this evidence for the constraint "{constraint.value}" (type: {constraint.type.value}). 

54 

55Evidence: 

56{text[:1000]} 

57 

58Provide three confidence scores (0-1): 

591. POSITIVE_CONFIDENCE: How confident are you that this constraint IS satisfied? 

602. NEGATIVE_CONFIDENCE: How confident are you that this constraint is NOT satisfied? 

613. UNCERTAINTY: How uncertain are you (lack of clear evidence)? 

62 

63The three scores should approximately sum to 1.0. 

64 

65Format: 

66POSITIVE: [score] 

67NEGATIVE: [score] 

68UNCERTAINTY: [score] 

69""" 

70 

71 try: 

72 response = self.model.invoke(prompt).content 

73 

74 # Extract scores 

75 positive = self._extract_score(response, "POSITIVE") 

76 negative = self._extract_score(response, "NEGATIVE") 

77 uncertainty = self._extract_score(response, "UNCERTAINTY") 

78 

79 # Normalize if needed 

80 total = positive + negative + uncertainty 

81 if total > 0: 81 ↛ 87line 81 didn't jump to line 87 because the condition on line 81 was always true

82 positive /= total 

83 negative /= total 

84 uncertainty /= total 

85 else: 

86 # Default to high uncertainty 

87 uncertainty = 0.8 

88 positive = 0.1 

89 negative = 0.1 

90 

91 return ConstraintEvidence( 

92 positive_confidence=positive, 

93 negative_confidence=negative, 

94 uncertainty=uncertainty, 

95 evidence_text=text[:500], 

96 source=evidence.get("source", "search"), 

97 ) 

98 

99 except Exception: 

100 logger.exception("Error analyzing evidence") 

101 # Default to high uncertainty 

102 return ConstraintEvidence( 

103 positive_confidence=0.1, 

104 negative_confidence=0.1, 

105 uncertainty=0.8, 

106 evidence_text=text[:500], 

107 source=evidence.get("source", "search"), 

108 ) 

109 

110 def _extract_score(self, text: str, label: str) -> float: 

111 """Extract confidence score from LLM response.""" 

112 pattern = rf"{label}:\s*\[?(\d*\.?\d+)\]?" 

113 match = re.search(pattern, text, re.IGNORECASE) 

114 if match: 

115 try: 

116 return float(match.group(1)) 

117 except: 

118 pass 

119 return 0.1 # Default low score 

120 

121 def evaluate_evidence_list( 

122 self, 

123 evidence_list: List[Dict], 

124 constraint: Constraint, 

125 uncertainty_penalty: float = 0.2, 

126 negative_weight: float = 0.5, 

127 ) -> float: 

128 """ 

129 Evaluate a list of evidence using dual confidence scoring. 

130 

131 Args: 

132 evidence_list: List of evidence dictionaries 

133 constraint: The constraint being evaluated 

134 uncertainty_penalty: Penalty for uncertainty 

135 negative_weight: Weight for negative evidence 

136 

137 Returns: 

138 float: Overall score between 0.0 and 1.0 

139 """ 

140 if not evidence_list: 

141 # No evidence means high uncertainty 

142 return 0.5 - uncertainty_penalty 

143 

144 # Convert evidence to dual confidence format 

145 constraint_evidence = [] 

146 for evidence in evidence_list: 

147 dual_evidence = self.analyze_evidence_dual_confidence( 

148 evidence, constraint 

149 ) 

150 constraint_evidence.append(dual_evidence) 

151 

152 # Calculate overall score 

153 total_positive = sum(e.positive_confidence for e in constraint_evidence) 

154 total_negative = sum(e.negative_confidence for e in constraint_evidence) 

155 total_uncertainty = sum(e.uncertainty for e in constraint_evidence) 

156 

157 # Normalize 

158 evidence_count = len(constraint_evidence) 

159 avg_positive = total_positive / evidence_count 

160 avg_negative = total_negative / evidence_count 

161 avg_uncertainty = total_uncertainty / evidence_count 

162 

163 # Calculate final score 

164 # High positive + low negative = high score 

165 # Low positive + high negative = low score 

166 # High uncertainty = penalty 

167 score = ( 

168 avg_positive 

169 - (avg_negative * negative_weight) 

170 - (avg_uncertainty * uncertainty_penalty) 

171 ) 

172 

173 # Clamp to [0, 1] 

174 return max(0.0, min(1.0, score))