Coverage for src / local_deep_research / advanced_search_system / evidence / evaluator.py: 19%
50 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Evidence evaluator for assessing evidence quality and relevance.
3"""
5from typing import Dict
7from langchain_core.language_models import BaseChatModel
8from loguru import logger
10from ...utilities.search_utilities import remove_think_tags
11from ..constraints.base_constraint import Constraint
12from .base_evidence import Evidence, EvidenceType
15class EvidenceEvaluator:
16 """Evaluates evidence quality and relevance."""
18 def __init__(self, model: BaseChatModel):
19 """Initialize the evidence evaluator."""
20 self.model = model
21 self.source_reliability = {
22 "official": 1.0,
23 "research": 0.95,
24 "news": 0.8,
25 "community": 0.6,
26 "inference": 0.5,
27 "speculation": 0.3,
28 }
30 def extract_evidence(
31 self, search_result: str, candidate: str, constraint: Constraint
32 ) -> Evidence:
33 """Extract evidence from search results for a specific constraint."""
34 prompt = f"""
35Extract evidence regarding whether "{candidate}" satisfies this constraint:
37Constraint: {constraint.description}
38Constraint Type: {constraint.type.value}
39Required Value: {constraint.value}
41Search Results:
42{search_result[:3000]}
44Provide:
451. CLAIM: What the evidence claims about the constraint
462. TYPE: direct_statement, official_record, research_finding, news_report, statistical_data, inference, correlation, or speculation
473. SOURCE: Where this evidence comes from
484. CONFIDENCE: How confident you are this evidence is accurate (0.0-1.0)
495. REASONING: Why this evidence supports or refutes the constraint
506. QUOTE: Relevant quote from the search results (if any)
52Format:
53CLAIM: [specific claim]
54TYPE: [evidence type]
55SOURCE: [source description]
56CONFIDENCE: [0.0-1.0]
57REASONING: [explanation]
58QUOTE: [relevant text]
59"""
61 response = self.model.invoke(prompt)
62 content = remove_think_tags(response.content)
64 # Parse response
65 parsed = self._parse_evidence_response(content)
67 # Create evidence object
68 # Safely parse confidence value, handling potential errors
69 confidence_str = parsed.get("confidence", "0.5")
70 try:
71 confidence = float(confidence_str)
72 # Ensure confidence is between 0 and 1
73 confidence = max(0.0, min(1.0, confidence))
74 except ValueError:
75 logger.warning(
76 f"Failed to parse confidence value: {confidence_str}"
77 )
78 confidence = 0.5
80 evidence = Evidence(
81 claim=parsed.get("claim", "No clear claim"),
82 type=self._parse_evidence_type(parsed.get("type", "speculation")),
83 source=parsed.get("source", "Unknown"),
84 confidence=confidence,
85 reasoning=parsed.get("reasoning", ""),
86 raw_text=parsed.get("quote", ""),
87 metadata={
88 "candidate": candidate,
89 "constraint_id": constraint.id,
90 "constraint_type": constraint.type.value,
91 },
92 )
94 # Adjust confidence based on how well it matches the constraint
95 evidence.confidence *= self._assess_match_quality(evidence, constraint)
97 return evidence
99 def _parse_evidence_response(self, content: str) -> Dict[str, str]:
100 """Parse the LLM response into evidence components."""
101 import re
103 parsed = {}
105 for line in content.strip().split("\n"):
106 if ":" in line:
107 key, value = line.split(":", 1)
108 key = key.strip().lower()
109 value = value.strip()
111 if key in [
112 "claim",
113 "type",
114 "source",
115 "confidence",
116 "reasoning",
117 "quote",
118 ]:
119 # Special handling for confidence to extract just the float value
120 if key == "confidence":
121 # Extract the first float from the value string
122 match = re.search(r"(\d*\.?\d+)", value)
123 if match:
124 parsed[key] = match.group(1)
125 else:
126 parsed[key] = value
127 else:
128 parsed[key] = value
130 return parsed
132 def _parse_evidence_type(self, type_str: str) -> EvidenceType:
133 """Parse evidence type from string."""
134 type_map = {
135 "direct_statement": EvidenceType.DIRECT_STATEMENT,
136 "official_record": EvidenceType.OFFICIAL_RECORD,
137 "research_finding": EvidenceType.RESEARCH_FINDING,
138 "news_report": EvidenceType.NEWS_REPORT,
139 "statistical_data": EvidenceType.STATISTICAL_DATA,
140 "inference": EvidenceType.INFERENCE,
141 "correlation": EvidenceType.CORRELATION,
142 "speculation": EvidenceType.SPECULATION,
143 }
144 return type_map.get(type_str.lower(), EvidenceType.SPECULATION)
146 def _assess_match_quality(
147 self, evidence: Evidence, constraint: Constraint
148 ) -> float:
149 """Assess how well the evidence matches the constraint."""
150 # This is a simplified version - could be made more sophisticated
151 if constraint.value.lower() in evidence.claim.lower():
152 return 1.0
153 elif any(
154 word in evidence.claim.lower()
155 for word in constraint.value.lower().split()
156 ):
157 return 0.8
158 else:
159 return 0.6 # Partial match at best