Coverage for src/local_deep_research/advanced_search_system/constraint_checking/evidence

1"""

2Evidence analysis for constraint checking.

4This module provides dual confidence evidence analysis that separates

5positive evidence, negative evidence, and uncertainty.

6"""

8import re

9from dataclasses import dataclass

10from typing import Dict, List

12from langchain_core.language_models import BaseChatModel

13from loguru import logger

15from ..constraints.base_constraint import Constraint

18@dataclass

19class ConstraintEvidence:

20 """Evidence for a constraint with dual confidence scores."""

22 positive_confidence: float # How sure we are the constraint IS satisfied

23 negative_confidence: (

24 float # How sure we are the constraint is NOT satisfied

25 )

26 uncertainty: float # How uncertain we are (neither positive nor negative)

27 evidence_text: str

28 source: str

31class EvidenceAnalyzer:

32 """

33 Analyzes evidence using dual confidence scoring.

35 This approach separates:

36 - Positive confidence: Evidence that constraint IS satisfied

37 - Negative confidence: Evidence that constraint is NOT satisfied

38 - Uncertainty: Lack of clear evidence either way

39 """

41 def __init__(self, model: BaseChatModel):

42 """Initialize the evidence analyzer."""

43 self.model = model

45 def analyze_evidence_dual_confidence(

46 self, evidence: Dict, constraint: Constraint

47 ) -> ConstraintEvidence:

48 """Analyze evidence to extract dual confidence scores."""

49 text = evidence.get("text", "")

51 # Use LLM to analyze evidence with dual confidence

52 prompt = f"""

53Analyze this evidence for the constraint "{constraint.value}" (type: {constraint.type.value}).

55Evidence:

56{text[:1000]}

58Provide three confidence scores (0-1):

591. POSITIVE_CONFIDENCE: How confident are you that this constraint IS satisfied?

602. NEGATIVE_CONFIDENCE: How confident are you that this constraint is NOT satisfied?

613. UNCERTAINTY: How uncertain are you (lack of clear evidence)?

63The three scores should approximately sum to 1.0.

65Format:

66POSITIVE: [score]

67NEGATIVE: [score]

68UNCERTAINTY: [score]

69"""

71 try:

72 response = self.model.invoke(prompt).content

74 # Extract scores

75 positive = self._extract_score(response, "POSITIVE")

76 negative = self._extract_score(response, "NEGATIVE")

77 uncertainty = self._extract_score(response, "UNCERTAINTY")

79 # Normalize if needed

80 total = positive + negative + uncertainty

81 if total > 0: 81 ↛ 87line 81 didn't jump to line 87 because the condition on line 81 was always true

82 positive /= total

83 negative /= total

84 uncertainty /= total

85 else:

86 # Default to high uncertainty

87 uncertainty = 0.8

88 positive = 0.1

89 negative = 0.1

91 return ConstraintEvidence(

92 positive_confidence=positive,

93 negative_confidence=negative,

94 uncertainty=uncertainty,

95 evidence_text=text[:500],

96 source=evidence.get("source", "search"),

97 )

99 except Exception:

100 logger.exception("Error analyzing evidence")

101 # Default to high uncertainty

102 return ConstraintEvidence(

103 positive_confidence=0.1,

104 negative_confidence=0.1,

105 uncertainty=0.8,

106 evidence_text=text[:500],

107 source=evidence.get("source", "search"),

108 )

109

110 def _extract_score(self, text: str, label: str) -> float:

111 """Extract confidence score from LLM response."""

112 pattern = rf"{label}:\s*\[?(\d*\.?\d+)\]?"

113 match = re.search(pattern, text, re.IGNORECASE)

114 if match:

115 try:

116 return float(match.group(1))

117 except (ValueError, TypeError):

118 pass

119 return 0.1 # Default low score

120

121 def evaluate_evidence_list(

122 self,

123 evidence_list: List[Dict],

124 constraint: Constraint,

125 uncertainty_penalty: float = 0.2,

126 negative_weight: float = 0.5,

127 ) -> float:

128 """

129 Evaluate a list of evidence using dual confidence scoring.

130

131 Args:

132 evidence_list: List of evidence dictionaries

133 constraint: The constraint being evaluated

134 uncertainty_penalty: Penalty for uncertainty

135 negative_weight: Weight for negative evidence

136

137 Returns:

138 float: Overall score between 0.0 and 1.0

139 """

140 if not evidence_list:

141 # No evidence means high uncertainty

142 return 0.5 - uncertainty_penalty

143

144 # Convert evidence to dual confidence format

145 constraint_evidence = []

146 for evidence in evidence_list:

147 dual_evidence = self.analyze_evidence_dual_confidence(

148 evidence, constraint

149 )

150 constraint_evidence.append(dual_evidence)

151

152 # Calculate overall score

153 total_positive = sum(e.positive_confidence for e in constraint_evidence)

154 total_negative = sum(e.negative_confidence for e in constraint_evidence)

155 total_uncertainty = sum(e.uncertainty for e in constraint_evidence)

156

157 # Normalize

158 evidence_count = len(constraint_evidence)

159 avg_positive = total_positive / evidence_count

160 avg_negative = total_negative / evidence_count

161 avg_uncertainty = total_uncertainty / evidence_count

162

163 # Calculate final score

164 # High positive + low negative = high score

165 # Low positive + high negative = low score

166 # High uncertainty = penalty

167 score = (

168 avg_positive

169 - (avg_negative * negative_weight)

170 - (avg_uncertainty * uncertainty_penalty)

171 )

172

173 # Clamp to [0, 1]

174 return max(0.0, min(1.0, score))

Coverage for src / local_deep_research / advanced_search_system / constraint_checking / evidence_analyzer.py: 91%

56 statements