Coverage for src/local_deep_research/embeddings/providers/implementations/ollama.py: 97%

86 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1"""Ollama embedding provider.""" 

2 

3import weakref 

4from typing import Any, Dict, List, Optional 

5 

6from langchain_ollama import OllamaEmbeddings 

7from langchain_core.embeddings import Embeddings 

8from loguru import logger 

9 

10from ....config.thread_settings import get_setting_from_snapshot 

11from ....utilities.llm_utils import ( 

12 _close_inner_ollama_clients, 

13 get_ollama_base_url, 

14) 

15from ..base import BaseEmbeddingProvider 

16from ....security import safe_get, safe_post 

17 

18 

19class OllamaEmbeddingsProvider(BaseEmbeddingProvider): 

20 """ 

21 Ollama embedding provider. 

22 

23 Uses Ollama API for local embedding models. 

24 No API key required, runs locally. 

25 """ 

26 

27 provider_name = "Ollama" 

28 provider_key = "OLLAMA" 

29 requires_api_key = False 

30 supports_local = True 

31 default_model = "nomic-embed-text" # type: ignore[assignment] 

32 

33 @classmethod 

34 def create_embeddings( 

35 cls, 

36 model: Optional[str] = None, 

37 settings_snapshot: Optional[Dict[str, Any]] = None, 

38 **kwargs, 

39 ) -> Embeddings: 

40 """ 

41 Create Ollama embeddings instance. 

42 

43 Args: 

44 model: Model name (defaults to nomic-embed-text) 

45 settings_snapshot: Optional settings snapshot 

46 **kwargs: Additional parameters (base_url, etc.) 

47 

48 Returns: 

49 OllamaEmbeddings instance 

50 """ 

51 # Get model from settings if not specified 

52 if model is None: 

53 model = get_setting_from_snapshot( 

54 "embeddings.ollama.model", 

55 default=cls.default_model, 

56 settings_snapshot=settings_snapshot, 

57 ) 

58 

59 # Get Ollama URL 

60 base_url = kwargs.get("base_url") 

61 if base_url is None: 

62 base_url = get_ollama_base_url(settings_snapshot) 

63 

64 # Without an explicit num_ctx, Ollama uses the model's modelfile 

65 # default (often 2048). Inputs longer than that return HTTP 500 

66 # ("input length exceeds the context length") rather than being 

67 # truncated, which aborts indexing mid-batch. 

68 num_ctx = get_setting_from_snapshot( 

69 "embeddings.ollama.num_ctx", 

70 default=8192, 

71 settings_snapshot=settings_snapshot, 

72 ) 

73 

74 logger.info( 

75 f"Creating OllamaEmbeddings with model={model}, " 

76 f"base_url={base_url}, num_ctx={num_ctx}" 

77 ) 

78 

79 ollama_kwargs: Dict[str, Any] = { 

80 "model": model, 

81 "base_url": base_url, 

82 } 

83 if num_ctx: 

84 ollama_kwargs["num_ctx"] = int(num_ctx) 

85 

86 instance = OllamaEmbeddings(**ollama_kwargs) 

87 

88 # Safety net for callers that bypass LocalEmbeddingManager (e.g., 

89 # the programmatic-API examples in examples/api_usage, direct 

90 # constructions in test fixtures). The manager-driven explicit 

91 # close remains the load-bearing primary path; this finalizer 

92 # only fires when the instance is GC'd without an explicit 

93 # close. We pass the inner sync/async ``ollama.Client`` objects 

94 # rather than ``instance`` itself — a strong reference back to 

95 # the wrapping instance would defeat the finalizer's purpose by 

96 # keeping the instance alive forever. 

97 try: 

98 weakref.finalize( 

99 instance, 

100 _close_inner_ollama_clients, 

101 instance._client, 

102 instance._async_client, 

103 ) 

104 except AttributeError: 

105 # Future langchain_ollama versions may reshape the private 

106 # attrs; don't crash the factory if the introspection misses. 

107 logger.debug( 

108 "OllamaEmbeddings shape changed — finalizer not registered" 

109 ) 

110 

111 return instance 

112 

113 @classmethod 

114 def is_available( 

115 cls, settings_snapshot: Optional[Dict[str, Any]] = None 

116 ) -> bool: 

117 """Check if Ollama is available.""" 

118 try: 

119 import requests 

120 

121 # Get Ollama URL 

122 base_url = get_ollama_base_url(settings_snapshot) 

123 

124 # Check if Ollama is running 

125 try: 

126 response = safe_get( 

127 f"{base_url}/api/tags", 

128 timeout=3, 

129 allow_localhost=True, 

130 allow_private_ips=True, 

131 ) 

132 return response.status_code == 200 

133 except requests.exceptions.RequestException: 

134 return False 

135 

136 except Exception: 

137 logger.exception("Error checking Ollama availability") 

138 return False 

139 

140 @classmethod 

141 def _get_model_capabilities( 

142 cls, base_url: str, model_name: str 

143 ) -> Optional[List[str]]: 

144 """Query Ollama /api/show for a model's capabilities. 

145 

146 Returns the capabilities list (e.g. ["embedding"]) or None on failure. 

147 """ 

148 try: 

149 response = safe_post( 

150 f"{base_url}/api/show", 

151 json={"model": model_name}, 

152 timeout=5, 

153 allow_localhost=True, 

154 allow_private_ips=True, 

155 ) 

156 if response.status_code == 200: 

157 return response.json().get("capabilities") # type: ignore[no-any-return] 

158 except Exception: 

159 logger.debug(f"Could not fetch capabilities for {model_name}") 

160 return None 

161 

162 @classmethod 

163 def is_embedding_model( 

164 cls, 

165 model: str, 

166 settings_snapshot: Optional[Dict[str, Any]] = None, 

167 ) -> Optional[bool]: 

168 """Check whether an Ollama model supports embeddings. 

169 

170 Uses the /api/show capabilities field. Returns ``None`` when the 

171 capability list isn't available (older Ollama servers) — the 

172 provider doesn't guess from the model name. Callers must treat 

173 ``None`` as "unknown", not as "no", so models stay listed even 

174 when their capability can't be confirmed. 

175 """ 

176 base_url = get_ollama_base_url(settings_snapshot) 

177 caps = cls._get_model_capabilities(base_url, model) 

178 

179 # No name-based fallback on purpose — see method docstring. 

180 if caps is None: 

181 return None 

182 return "embedding" in caps 

183 

184 @classmethod 

185 def get_available_models( 

186 cls, settings_snapshot: Optional[Dict[str, Any]] = None 

187 ) -> List[Dict[str, Any]]: 

188 """Get all Ollama models, tagged when /api/show reports support. 

189 

190 No filtering on the model list itself — every model the Ollama 

191 server reports is returned. We only *tag* entries with 

192 ``is_embedding`` when ``/api/show`` exposes a real capabilities 

193 list (so the UI can sort them); we don't guess from the model 

194 name. Older Ollama servers without capabilities → models are 

195 returned untagged and the user decides. 

196 """ 

197 from ....utilities.llm_utils import fetch_ollama_models 

198 

199 base_url = get_ollama_base_url(settings_snapshot) 

200 # fetch_ollama_models returns every installed model. We pass it 

201 # through unfiltered — no name heuristic, no exclusions. 

202 all_models = fetch_ollama_models(base_url, timeout=3.0) 

203 

204 if not all_models: 

205 return [] 

206 

207 embedding_models: List[Dict[str, Any]] = [] 

208 untagged_models: List[Dict[str, Any]] = [] 

209 other_models: List[Dict[str, Any]] = [] 

210 

211 for model in all_models: 

212 model_name = model["value"] 

213 caps = cls._get_model_capabilities(base_url, model_name) 

214 

215 entry: Dict[str, Any] = dict(model) 

216 if caps is None: 

217 # No capability signal from the server → don't guess. 

218 # Keep the model in the list so the user can still 

219 # select it. 

220 untagged_models.append(entry) 

221 continue 

222 

223 # /api/show capabilities is an API-driven signal (not a 

224 # name match), so it's safe to use for the flag. 

225 is_embed = "embedding" in caps 

226 entry["is_embedding"] = is_embed 

227 if is_embed: 

228 embedding_models.append(entry) 

229 else: 

230 other_models.append(entry) 

231 

232 logger.info( 

233 "Found {} embedding-capable, {} non-embedding, and {} " 

234 "untagged models from Ollama", 

235 len(embedding_models), 

236 len(other_models), 

237 len(untagged_models), 

238 ) 

239 

240 # Embedding-tagged first so they're the default pick; untagged 

241 # next (capability unknown — user decides); then explicit 

242 # non-embedding. Nothing is dropped. 

243 return embedding_models + untagged_models + other_models