Coverage for src / local_deep_research / web_search_engines / engines / search_engine_retriever.py: 100%
58 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
1"""
2Search engine implementation that wraps any LangChain retriever.
3This allows using vector stores, databases, or any custom retriever as a search source in LDR.
4"""
6from typing import Any, Dict, List
8from langchain_core.retrievers import BaseRetriever, Document
9from loguru import logger
11from ...constants import SNIPPET_LENGTH_LONG
12from ..search_engine_base import BaseSearchEngine
15class RetrieverSearchEngine(BaseSearchEngine):
16 """
17 Search engine that uses any LangChain retriever.
19 This allows users to plug in any LangChain retriever (vector stores,
20 databases, custom implementations) and use it as a search engine in LDR.
21 """
23 def __init__(
24 self,
25 retriever: BaseRetriever,
26 max_results: int = 10,
27 name: str = None,
28 **kwargs,
29 ):
30 """
31 Initialize the retriever-based search engine.
33 Args:
34 retriever: Any LangChain BaseRetriever instance
35 max_results: Maximum number of results to return
36 name: Display name for this retriever (defaults to retriever class name)
37 **kwargs: Additional parameters passed to parent
38 """
39 super().__init__(max_results=max_results, **kwargs)
40 self.retriever = retriever
41 self.name = name if name is not None else retriever.__class__.__name__
43 def run(
44 self, query: str, research_context: Dict[str, Any] | None = None
45 ) -> List[Dict[str, Any]]:
46 """
47 Execute search using the LangChain retriever.
49 Args:
50 query: Search query
51 research_context: Context from previous research to use.
53 Returns:
54 List of search results in LDR format
55 """
56 try:
57 # Use the retriever to get relevant documents
58 docs = self.retriever.invoke(query)
60 # Convert LangChain documents to LDR search result format
61 results = []
62 for i, doc in enumerate(docs[: self.max_results]):
63 result = self._convert_document_to_result(doc, i)
64 results.append(result)
66 logger.info(
67 f"Retriever '{self.name}' returned {len(results)} results for query: {query}"
68 )
69 return results
71 except Exception:
72 logger.exception("Error in retriever search")
73 return []
75 def _convert_document_to_result(
76 self, doc: Document, index: int
77 ) -> Dict[str, Any]:
78 """
79 Convert a LangChain Document to LDR search result format.
81 Args:
82 doc: LangChain Document
83 index: Result index
85 Returns:
86 Search result in LDR format
87 """
88 # Extract metadata
89 metadata = doc.metadata or {}
91 # Build the result
92 result = {
93 # Required fields for LDR
94 "title": metadata.get("title", f"Document {index + 1}"),
95 "url": metadata.get(
96 "source",
97 metadata.get("url", f"retriever://{self.name}/doc_{index}"),
98 ),
99 "snippet": doc.page_content[:SNIPPET_LENGTH_LONG]
100 if doc.page_content
101 else "",
102 # Optional fields
103 "full_content": doc.page_content,
104 "author": metadata.get("author", ""),
105 "date": metadata.get("date", ""),
106 # Include all metadata for flexibility
107 "metadata": metadata,
108 # Score if available
109 "score": metadata.get("score", 1.0),
110 # Source information
111 "source": self.name,
112 "retriever_type": self.retriever.__class__.__name__,
113 }
115 return result
117 def _get_previews(self, query: str) -> List[Dict[str, Any]]:
118 """
119 Get preview information from the retriever.
121 Args:
122 query: Search query
124 Returns:
125 List of preview dictionaries
126 """
127 try:
128 # Use the retriever to get relevant documents
129 docs = self.retriever.invoke(query)
131 # Convert to preview format
132 previews = []
133 for i, doc in enumerate(docs[: self.max_results]):
134 preview = self._convert_document_to_result(doc, i)
135 previews.append(preview)
137 logger.info(
138 f"Retriever '{self.name}' returned {len(previews)} previews for query: {query}"
139 )
140 return previews
142 except Exception:
143 logger.exception("Error getting previews from retriever")
144 return []
146 def _get_full_content(
147 self, relevant_items: List[Dict[str, Any]]
148 ) -> List[Dict[str, Any]]:
149 """
150 For retrievers, previews already contain full content.
152 Args:
153 relevant_items: List of relevant preview dictionaries
155 Returns:
156 Same list with full content (already included)
157 """
158 # For retrievers, the preview already contains the full content
159 # Just ensure the 'full_content' field is present
160 for item in relevant_items:
161 if "full_content" not in item and "snippet" in item:
162 item["full_content"] = item["snippet"]
163 return relevant_items
165 async def arun(self, query: str) -> List[Dict[str, Any]]:
166 """
167 Async version of search using the retriever.
169 Args:
170 query: Search query
172 Returns:
173 List of search results in LDR format
174 """
175 try:
176 # Use async retriever if available
177 if hasattr(self.retriever, "aget_relevant_documents"):
178 docs = await self.retriever.aget_relevant_documents(query)
179 else:
180 # Fall back to sync version
181 logger.debug(
182 f"Retriever '{self.name}' doesn't support async, using sync version"
183 )
184 return self.run(query)
186 # Convert documents to results
187 results = []
188 for i, doc in enumerate(docs[: self.max_results]):
189 result = self._convert_document_to_result(doc, i)
190 results.append(result)
192 logger.info(
193 f"Retriever '{self.name}' returned {len(results)} async results for query: {query}"
194 )
195 return results
197 except Exception:
198 logger.exception("Error in async retriever search")
199 return []