Coverage for src / local_deep_research / advanced_search_system / evidence / evaluator.py: 19%

50 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1""" 

2Evidence evaluator for assessing evidence quality and relevance. 

3""" 

4 

5from typing import Dict 

6 

7from langchain_core.language_models import BaseChatModel 

8from loguru import logger 

9 

10from ...utilities.search_utilities import remove_think_tags 

11from ..constraints.base_constraint import Constraint 

12from .base_evidence import Evidence, EvidenceType 

13 

14 

15class EvidenceEvaluator: 

16 """Evaluates evidence quality and relevance.""" 

17 

18 def __init__(self, model: BaseChatModel): 

19 """Initialize the evidence evaluator.""" 

20 self.model = model 

21 self.source_reliability = { 

22 "official": 1.0, 

23 "research": 0.95, 

24 "news": 0.8, 

25 "community": 0.6, 

26 "inference": 0.5, 

27 "speculation": 0.3, 

28 } 

29 

30 def extract_evidence( 

31 self, search_result: str, candidate: str, constraint: Constraint 

32 ) -> Evidence: 

33 """Extract evidence from search results for a specific constraint.""" 

34 prompt = f""" 

35Extract evidence regarding whether "{candidate}" satisfies this constraint: 

36 

37Constraint: {constraint.description} 

38Constraint Type: {constraint.type.value} 

39Required Value: {constraint.value} 

40 

41Search Results: 

42{search_result[:3000]} 

43 

44Provide: 

451. CLAIM: What the evidence claims about the constraint 

462. TYPE: direct_statement, official_record, research_finding, news_report, statistical_data, inference, correlation, or speculation 

473. SOURCE: Where this evidence comes from 

484. CONFIDENCE: How confident you are this evidence is accurate (0.0-1.0) 

495. REASONING: Why this evidence supports or refutes the constraint 

506. QUOTE: Relevant quote from the search results (if any) 

51 

52Format: 

53CLAIM: [specific claim] 

54TYPE: [evidence type] 

55SOURCE: [source description] 

56CONFIDENCE: [0.0-1.0] 

57REASONING: [explanation] 

58QUOTE: [relevant text] 

59""" 

60 

61 response = self.model.invoke(prompt) 

62 content = remove_think_tags(response.content) 

63 

64 # Parse response 

65 parsed = self._parse_evidence_response(content) 

66 

67 # Create evidence object 

68 # Safely parse confidence value, handling potential errors 

69 confidence_str = parsed.get("confidence", "0.5") 

70 try: 

71 confidence = float(confidence_str) 

72 # Ensure confidence is between 0 and 1 

73 confidence = max(0.0, min(1.0, confidence)) 

74 except ValueError: 

75 logger.warning( 

76 f"Failed to parse confidence value: {confidence_str}" 

77 ) 

78 confidence = 0.5 

79 

80 evidence = Evidence( 

81 claim=parsed.get("claim", "No clear claim"), 

82 type=self._parse_evidence_type(parsed.get("type", "speculation")), 

83 source=parsed.get("source", "Unknown"), 

84 confidence=confidence, 

85 reasoning=parsed.get("reasoning", ""), 

86 raw_text=parsed.get("quote", ""), 

87 metadata={ 

88 "candidate": candidate, 

89 "constraint_id": constraint.id, 

90 "constraint_type": constraint.type.value, 

91 }, 

92 ) 

93 

94 # Adjust confidence based on how well it matches the constraint 

95 evidence.confidence *= self._assess_match_quality(evidence, constraint) 

96 

97 return evidence 

98 

99 def _parse_evidence_response(self, content: str) -> Dict[str, str]: 

100 """Parse the LLM response into evidence components.""" 

101 import re 

102 

103 parsed = {} 

104 

105 for line in content.strip().split("\n"): 

106 if ":" in line: 

107 key, value = line.split(":", 1) 

108 key = key.strip().lower() 

109 value = value.strip() 

110 

111 if key in [ 

112 "claim", 

113 "type", 

114 "source", 

115 "confidence", 

116 "reasoning", 

117 "quote", 

118 ]: 

119 # Special handling for confidence to extract just the float value 

120 if key == "confidence": 

121 # Extract the first float from the value string 

122 match = re.search(r"(\d*\.?\d+)", value) 

123 if match: 

124 parsed[key] = match.group(1) 

125 else: 

126 parsed[key] = value 

127 else: 

128 parsed[key] = value 

129 

130 return parsed 

131 

132 def _parse_evidence_type(self, type_str: str) -> EvidenceType: 

133 """Parse evidence type from string.""" 

134 type_map = { 

135 "direct_statement": EvidenceType.DIRECT_STATEMENT, 

136 "official_record": EvidenceType.OFFICIAL_RECORD, 

137 "research_finding": EvidenceType.RESEARCH_FINDING, 

138 "news_report": EvidenceType.NEWS_REPORT, 

139 "statistical_data": EvidenceType.STATISTICAL_DATA, 

140 "inference": EvidenceType.INFERENCE, 

141 "correlation": EvidenceType.CORRELATION, 

142 "speculation": EvidenceType.SPECULATION, 

143 } 

144 return type_map.get(type_str.lower(), EvidenceType.SPECULATION) 

145 

146 def _assess_match_quality( 

147 self, evidence: Evidence, constraint: Constraint 

148 ) -> float: 

149 """Assess how well the evidence matches the constraint.""" 

150 # This is a simplified version - could be made more sophisticated 

151 if constraint.value.lower() in evidence.claim.lower(): 

152 return 1.0 

153 elif any( 

154 word in evidence.claim.lower() 

155 for word in constraint.value.lower().split() 

156 ): 

157 return 0.8 

158 else: 

159 return 0.6 # Partial match at best