Coverage for src/local_deep_research/web_search_engines/engines/local_embedding

1import threading

2from typing import Any, Dict, Optional

5from ...database.models.library import DocumentChunk

6from ...database.session_context import get_user_db_session

7from ...security.log_sanitizer import scrub_error

8from ...security.secure_logging import logger

9from ...utilities.url_utils import normalize_url

12class LocalEmbeddingManager:

13 """Handles embedding generation and storage for local document search"""

15 def __init__(

16 self,

17 embedding_model: str = "all-MiniLM-L6-v2",

18 embedding_device: str = "cpu",

19 embedding_model_type: str = "sentence_transformers", # or 'ollama'

20 ollama_base_url: Optional[str] = None,

21 settings_snapshot: Optional[Dict[str, Any]] = None,

22 ):

23 """

24 Initialize the embedding manager for local document search.

26 Args:

27 embedding_model: Name of the embedding model to use

28 embedding_device: Device to run embeddings on ('cpu' or 'cuda')

29 embedding_model_type: Type of embedding model ('sentence_transformers' or 'ollama')

30 ollama_base_url: Base URL for Ollama API if using ollama embeddings

31 settings_snapshot: Optional settings snapshot for background threads

32 """

34 self.embedding_model = embedding_model

35 self.embedding_device = embedding_device

36 self.embedding_model_type = embedding_model_type

37 self.ollama_base_url = ollama_base_url

38 self.settings_snapshot = settings_snapshot or {}

40 # Username for database access (extracted from settings if available)

41 self.username = (

42 settings_snapshot.get("_username") if settings_snapshot else None

43 )

44 # Password for encrypted database access (can be set later)

45 self.db_password = None

47 # Initialize the embedding model (with lock for thread-safe lazy init)

48 self._embeddings = None

49 self._embedding_lock = threading.Lock()

51 # Track if this manager has been closed

52 self._closed = False

54 def close(self):

55 """Release embedding model resources.

57 For Ollama embeddings, this also closes the underlying per-instance

58 ``httpx.Client`` / ``httpx.AsyncClient`` pair. langchain_ollama's

59 ``OllamaEmbeddings`` eagerly constructs both clients in its Pydantic

60 ``@model_validator(mode="after")``, so dropping the Python reference

61 alone leaks ~2 FDs per instance — see the migration regression note

62 in docs/developing/resource-cleanup.md. Non-Ollama providers

63 (sentence_transformers, OpenAI's lru_cache'd shared client) are

64 no-ops via the module-prefix check inside ``_close_base_llm``.

65 """

66 if self._closed:

67 return

68 self._closed = True

69 if self._embeddings is not None:

70 from ...utilities.llm_utils import _close_base_llm

72 _close_base_llm(self._embeddings)

73 self._embeddings = None

74 logger.debug("LocalEmbeddingManager closed")

76 def __enter__(self):

77 """Context manager entry."""

78 return self

80 def __exit__(self, exc_type, exc_val, exc_tb):

81 """Context manager exit - ensures resources are released."""

82 self.close()

83 return False

85 @property

86 def embeddings(self):

87 """

88 Lazily initialize embeddings when first accessed.

89 This allows the LocalEmbeddingManager to be created without

90 immediately loading models, which is helpful when no local search is performed.

92 Uses double-checked locking to ensure thread-safe initialization.

93 Concurrent SentenceTransformer model loading causes meta tensor errors

94 in PyTorch when multiple threads call model.to(device) simultaneously.

95 """

96 if self._embeddings is None:

97 with self._embedding_lock:

98 if self._embeddings is None:

99 logger.info("Initializing embeddings on first use")

100 self._embeddings = self._initialize_embeddings()

101 return self._embeddings

102

103 def _initialize_embeddings(self):

104 """Initialize the embedding model based on configuration"""

105 try:

106 # Use the new unified embedding system

107 from ...embeddings import get_embeddings

108

109 # Prepare kwargs for provider-specific parameters

110 kwargs = {}

111

112 # Add device for sentence transformers

113 if self.embedding_model_type == "sentence_transformers":

114 kwargs["device"] = self.embedding_device

115

116 # Add base_url for ollama if specified

117 if self.embedding_model_type == "ollama" and self.ollama_base_url:

118 kwargs["base_url"] = normalize_url(self.ollama_base_url)

119

120 logger.info(

121 f"Initializing embeddings with provider={self.embedding_model_type}, model={self.embedding_model}"

122 )

123

124 return get_embeddings(

125 provider=self.embedding_model_type,

126 model=self.embedding_model,

127 settings_snapshot=self.settings_snapshot,

128 **kwargs,

129 )

130 except ImportError as exc:

131 # Only fall back when the configured provider's dependency

132 # genuinely isn't installed — that's a deployment shape, not

133 # a transient runtime error. Any OTHER exception (Ollama

134 # DNS hiccup, provider validation, policy denial) must

135 # propagate so we don't silently fetch from huggingface.co

136 # when the user has explicitly opted into local embeddings.

137 safe_msg = scrub_error(exc)

138 logger.exception(

139 f"Embedding provider import failed ({type(exc).__name__}) — falling back to local SBERT: {safe_msg}"

140 )

141 # Route the fallback through get_embeddings(sentence_transformers)

142 # rather than constructing HuggingFaceEmbeddings directly: the SBERT

143 # model is fetched from huggingface.co on a cache miss, which the

144 # provider gate refuses under PRIVATE_ONLY / embeddings.require_local.

145 # Constructing it raw here would bypass that gate and leak an

146 # outbound HF download in offline mode. PolicyDeniedError from the

147 # gate propagates (fail closed).

148 from ...embeddings import get_embeddings

149

150 return get_embeddings(

151 provider="sentence_transformers",

152 model=None, # provider default (all-MiniLM-L6-v2)

153 settings_snapshot=self.settings_snapshot,

154 )

155

156 # NOTE(cutover): the old ``_store_chunks_to_db`` (LangChain-Document-list

157 # -> DocumentChunk rows, with chunk-hash+collection dedup/reuse for a

158 # FAISS uuid-keyed docstore) was deleted here. Its caller

159 # ``LibraryRAGService.index_document`` now writes chunk rows via

160 # ``vector_stores.facade.VectorIndex.index`` instead (int-id keyed,

161 # one row per source's chunk, no cross-source hash dedup/reuse; see

162 # the "One row = one vector" note in vector_stores/facade.py). Grep

163 # confirmed no other production callers before deletion.

164

165 def _delete_chunks_from_db(

166 self,

167 collection_name: str,

168 source_path: Optional[str] = None,

169 source_id: Optional[int] = None,

170 ) -> int:

171 """

172 Delete chunks from database.

173

174 Args:

175 collection_name: Name of the collection

176 source_path: Path to source file (for local files)

177 source_id: ID of source document (for library documents)

178

179 Returns:

180 Number of chunks deleted

181 """

182 if not self.username:

183 logger.warning(

184 "No username available, cannot delete chunks from database"

185 )

186 return 0

187

188 try:

189 with get_user_db_session(

190 self.username, self.db_password

191 ) as session:

192 query = session.query(DocumentChunk).filter_by(

193 collection_name=collection_name

194 )

195

196 if source_path:

197 query = query.filter_by(source_path=str(source_path))

198 if source_id: 198 ↛ 199line 198 didn't jump to line 199 because the condition on line 198 was never true

199 query = query.filter_by(source_id=source_id)

200

201 count = int(query.delete())

202 session.commit()

203

204 logger.info(

205 f"Deleted {count} chunks from database for collection '{collection_name}'"

206 )

207 return count

208

209 except Exception as e:

210 safe_msg = scrub_error(e, self.db_password)

211 logger.exception(

212 f"Error deleting chunks from database for collection '{collection_name}' ({type(e).__name__}): {safe_msg}"

213 )

214 return 0

Coverage for src/local_deep_research/web_search_engines/engines/local_embedding_manager.py: 98%

75 statements