Coverage for src/local_deep_research/advanced_search_system/evidence/evaluator.py: 100%

1"""

2Evidence evaluator for assessing evidence quality and relevance.

3"""

5from typing import Dict

7from langchain_core.language_models import BaseChatModel

8from loguru import logger

10from ...utilities.search_utilities import remove_think_tags

11from ..constraints.base_constraint import Constraint

12from .base_evidence import Evidence, EvidenceType

15class EvidenceEvaluator:

16 """Evaluates evidence quality and relevance."""

18 def __init__(self, model: BaseChatModel):

19 """Initialize the evidence evaluator."""

20 self.model = model

21 self.source_reliability = {

22 "official": 1.0,

23 "research": 0.95,

24 "news": 0.8,

25 "community": 0.6,

26 "inference": 0.5,

27 "speculation": 0.3,

28 }

30 def extract_evidence(

31 self, search_result: str, candidate: str, constraint: Constraint

32 ) -> Evidence:

33 """Extract evidence from search results for a specific constraint."""

34 prompt = f"""

35Extract evidence regarding whether "{candidate}" satisfies this constraint:

37Constraint: {constraint.description}

38Constraint Type: {constraint.type.value}

39Required Value: {constraint.value}

41Search Results:

42{search_result[:3000]}

44Provide:

451. CLAIM: What the evidence claims about the constraint

462. TYPE: direct_statement, official_record, research_finding, news_report, statistical_data, inference, correlation, or speculation

473. SOURCE: Where this evidence comes from

484. CONFIDENCE: How confident you are this evidence is accurate (0.0-1.0)

495. REASONING: Why this evidence supports or refutes the constraint

506. QUOTE: Relevant quote from the search results (if any)

52Format:

53CLAIM: [specific claim]

54TYPE: [evidence type]

55SOURCE: [source description]

56CONFIDENCE: [0.0-1.0]

57REASONING: [explanation]

58QUOTE: [relevant text]

59"""

61 response = self.model.invoke(prompt)

62 content = remove_think_tags(response.content)

64 # Parse response

65 parsed = self._parse_evidence_response(content)

67 # Create evidence object

68 # Safely parse confidence value, handling potential errors

69 confidence_str = parsed.get("confidence", "0.5")

70 try:

71 confidence = float(confidence_str)

72 # Ensure confidence is between 0 and 1

73 confidence = max(0.0, min(1.0, confidence))

74 except ValueError:

75 logger.warning(

76 f"Failed to parse confidence value: {confidence_str}"

77 )

78 confidence = 0.5

80 evidence = Evidence(

81 claim=parsed.get("claim", "No clear claim"),

82 type=self._parse_evidence_type(parsed.get("type", "speculation")),

83 source=parsed.get("source", "Unknown"),

84 confidence=confidence,

85 reasoning=parsed.get("reasoning", ""),

86 raw_text=parsed.get("quote", ""),

87 metadata={

88 "candidate": candidate,

89 "constraint_id": constraint.id,

90 "constraint_type": constraint.type.value,

91 },

92 )

94 # Adjust confidence based on how well it matches the constraint

95 evidence.confidence *= self._assess_match_quality(evidence, constraint)

97 return evidence

99 def _parse_evidence_response(self, content: str) -> Dict[str, str]:

100 """Parse the LLM response into evidence components."""

101 import re

102

103 parsed = {}

104

105 for line in content.strip().split("\n"):

106 if ":" in line:

107 key, value = line.split(":", 1)

108 key = key.strip().lower()

109 value = value.strip()

110

111 if key in [

112 "claim",

113 "type",

114 "source",

115 "confidence",

116 "reasoning",

117 "quote",

118 ]:

119 # Special handling for confidence to extract just the float value

120 if key == "confidence":

121 # Extract the first float from the value string

122 match = re.search(r"(\d*\.?\d+)", value)

123 if match:

124 parsed[key] = match.group(1)

125 else:

126 parsed[key] = value

127 else:

128 parsed[key] = value

129

130 return parsed

131

132 def _parse_evidence_type(self, type_str: str) -> EvidenceType:

133 """Parse evidence type from string."""

134 type_map = {

135 "direct_statement": EvidenceType.DIRECT_STATEMENT,

136 "official_record": EvidenceType.OFFICIAL_RECORD,

137 "research_finding": EvidenceType.RESEARCH_FINDING,

138 "news_report": EvidenceType.NEWS_REPORT,

139 "statistical_data": EvidenceType.STATISTICAL_DATA,

140 "inference": EvidenceType.INFERENCE,

141 "correlation": EvidenceType.CORRELATION,

142 "speculation": EvidenceType.SPECULATION,

143 }

144 return type_map.get(type_str.lower(), EvidenceType.SPECULATION)

145

146 def _assess_match_quality(

147 self, evidence: Evidence, constraint: Constraint

148 ) -> float:

149 """Assess how well the evidence matches the constraint."""

150 # This is a simplified version - could be made more sophisticated

151 if constraint.value.lower() in evidence.claim.lower():

152 return 1.0

153 elif any(

154 word in evidence.claim.lower()

155 for word in constraint.value.lower().split()

156 ):

157 return 0.8

158 else:

159 return 0.6 # Partial match at best

Coverage for src / local_deep_research / advanced_search_system / evidence / evaluator.py: 100%

50 statements