Coverage for src / local_deep_research / web_search_engines / engines / search_engine_retriever.py: 100%
57 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1"""
2Search engine implementation that wraps any LangChain retriever.
3This allows using vector stores, databases, or any custom retriever as a search source in LDR.
4"""
6from typing import Any, Dict, List
8from langchain_core.retrievers import BaseRetriever, Document
9from loguru import logger
11from ..search_engine_base import BaseSearchEngine
14class RetrieverSearchEngine(BaseSearchEngine):
15 """
16 Search engine that uses any LangChain retriever.
18 This allows users to plug in any LangChain retriever (vector stores,
19 databases, custom implementations) and use it as a search engine in LDR.
20 """
22 def __init__(
23 self,
24 retriever: BaseRetriever,
25 max_results: int = 10,
26 name: str = None,
27 **kwargs,
28 ):
29 """
30 Initialize the retriever-based search engine.
32 Args:
33 retriever: Any LangChain BaseRetriever instance
34 max_results: Maximum number of results to return
35 name: Display name for this retriever (defaults to retriever class name)
36 **kwargs: Additional parameters passed to parent
37 """
38 super().__init__(max_results=max_results, **kwargs)
39 self.retriever = retriever
40 self.name = name if name is not None else retriever.__class__.__name__
42 def run(
43 self, query: str, research_context: Dict[str, Any] | None = None
44 ) -> List[Dict[str, Any]]:
45 """
46 Execute search using the LangChain retriever.
48 Args:
49 query: Search query
50 research_context: Context from previous research to use.
52 Returns:
53 List of search results in LDR format
54 """
55 try:
56 # Use the retriever to get relevant documents
57 docs = self.retriever.invoke(query)
59 # Convert LangChain documents to LDR search result format
60 results = []
61 for i, doc in enumerate(docs[: self.max_results]):
62 result = self._convert_document_to_result(doc, i)
63 results.append(result)
65 logger.info(
66 f"Retriever '{self.name}' returned {len(results)} results for query: {query}"
67 )
68 return results
70 except Exception:
71 logger.exception("Error in retriever search")
72 return []
74 def _convert_document_to_result(
75 self, doc: Document, index: int
76 ) -> Dict[str, Any]:
77 """
78 Convert a LangChain Document to LDR search result format.
80 Args:
81 doc: LangChain Document
82 index: Result index
84 Returns:
85 Search result in LDR format
86 """
87 # Extract metadata
88 metadata = doc.metadata or {}
90 # Build the result
91 result = {
92 # Required fields for LDR
93 "title": metadata.get("title", f"Document {index + 1}"),
94 "url": metadata.get(
95 "source",
96 metadata.get("url", f"retriever://{self.name}/doc_{index}"),
97 ),
98 "snippet": doc.page_content[:500] if doc.page_content else "",
99 # Optional fields
100 "full_content": doc.page_content,
101 "author": metadata.get("author", ""),
102 "date": metadata.get("date", ""),
103 # Include all metadata for flexibility
104 "metadata": metadata,
105 # Score if available
106 "score": metadata.get("score", 1.0),
107 # Source information
108 "source": self.name,
109 "retriever_type": self.retriever.__class__.__name__,
110 }
112 return result
114 def _get_previews(self, query: str) -> List[Dict[str, Any]]:
115 """
116 Get preview information from the retriever.
118 Args:
119 query: Search query
121 Returns:
122 List of preview dictionaries
123 """
124 try:
125 # Use the retriever to get relevant documents
126 docs = self.retriever.invoke(query)
128 # Convert to preview format
129 previews = []
130 for i, doc in enumerate(docs[: self.max_results]):
131 preview = self._convert_document_to_result(doc, i)
132 previews.append(preview)
134 logger.info(
135 f"Retriever '{self.name}' returned {len(previews)} previews for query: {query}"
136 )
137 return previews
139 except Exception:
140 logger.exception("Error getting previews from retriever")
141 return []
143 def _get_full_content(
144 self, relevant_items: List[Dict[str, Any]]
145 ) -> List[Dict[str, Any]]:
146 """
147 For retrievers, previews already contain full content.
149 Args:
150 relevant_items: List of relevant preview dictionaries
152 Returns:
153 Same list with full content (already included)
154 """
155 # For retrievers, the preview already contains the full content
156 # Just ensure the 'full_content' field is present
157 for item in relevant_items:
158 if "full_content" not in item and "snippet" in item:
159 item["full_content"] = item["snippet"]
160 return relevant_items
162 async def arun(self, query: str) -> List[Dict[str, Any]]:
163 """
164 Async version of search using the retriever.
166 Args:
167 query: Search query
169 Returns:
170 List of search results in LDR format
171 """
172 try:
173 # Use async retriever if available
174 if hasattr(self.retriever, "aget_relevant_documents"):
175 docs = await self.retriever.aget_relevant_documents(query)
176 else:
177 # Fall back to sync version
178 logger.debug(
179 f"Retriever '{self.name}' doesn't support async, using sync version"
180 )
181 return self.run(query)
183 # Convert documents to results
184 results = []
185 for i, doc in enumerate(docs[: self.max_results]):
186 result = self._convert_document_to_result(doc, i)
187 results.append(result)
189 logger.info(
190 f"Retriever '{self.name}' returned {len(results)} async results for query: {query}"
191 )
192 return results
194 except Exception:
195 logger.exception("Error in async retriever search")
196 return []