Coverage for src / local_deep_research / embeddings / providers / implementations / ollama.py: 97%

76 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-25 01:07 +0000

1"""Ollama embedding provider.""" 

2 

3from typing import Any, Dict, List, Optional 

4 

5from langchain_community.embeddings import OllamaEmbeddings 

6from langchain_core.embeddings import Embeddings 

7from loguru import logger 

8 

9from ....config.thread_settings import get_setting_from_snapshot 

10from ....utilities.llm_utils import get_ollama_base_url 

11from ..base import BaseEmbeddingProvider 

12from ....security import safe_get, safe_post 

13 

14 

15class OllamaEmbeddingsProvider(BaseEmbeddingProvider): 

16 """ 

17 Ollama embedding provider. 

18 

19 Uses Ollama API for local embedding models. 

20 No API key required, runs locally. 

21 """ 

22 

23 provider_name = "Ollama" 

24 provider_key = "OLLAMA" 

25 requires_api_key = False 

26 supports_local = True 

27 default_model = "nomic-embed-text" 

28 

29 @classmethod 

30 def create_embeddings( 

31 cls, 

32 model: Optional[str] = None, 

33 settings_snapshot: Optional[Dict[str, Any]] = None, 

34 **kwargs, 

35 ) -> Embeddings: 

36 """ 

37 Create Ollama embeddings instance. 

38 

39 Args: 

40 model: Model name (defaults to nomic-embed-text) 

41 settings_snapshot: Optional settings snapshot 

42 **kwargs: Additional parameters (base_url, etc.) 

43 

44 Returns: 

45 OllamaEmbeddings instance 

46 """ 

47 # Get model from settings if not specified 

48 if model is None: 

49 model = get_setting_from_snapshot( 

50 "embeddings.ollama.model", 

51 default=cls.default_model, 

52 settings_snapshot=settings_snapshot, 

53 ) 

54 

55 # Get Ollama URL 

56 base_url = kwargs.get("base_url") 

57 if base_url is None: 

58 base_url = get_ollama_base_url(settings_snapshot) 

59 

60 logger.info( 

61 f"Creating OllamaEmbeddings with model={model}, base_url={base_url}" 

62 ) 

63 

64 return OllamaEmbeddings( 

65 model=model, 

66 base_url=base_url, 

67 ) 

68 

69 @classmethod 

70 def is_available( 

71 cls, settings_snapshot: Optional[Dict[str, Any]] = None 

72 ) -> bool: 

73 """Check if Ollama is available.""" 

74 try: 

75 import requests 

76 

77 # Get Ollama URL 

78 base_url = get_ollama_base_url(settings_snapshot) 

79 

80 # Check if Ollama is running 

81 try: 

82 response = safe_get( 

83 f"{base_url}/api/tags", 

84 timeout=3.0, 

85 allow_localhost=True, 

86 allow_private_ips=True, 

87 ) 

88 return response.status_code == 200 

89 except requests.exceptions.RequestException: 

90 return False 

91 

92 except Exception: 

93 logger.exception("Error checking Ollama availability") 

94 return False 

95 

96 @classmethod 

97 def _get_model_capabilities( 

98 cls, base_url: str, model_name: str 

99 ) -> Optional[List[str]]: 

100 """Query Ollama /api/show for a model's capabilities. 

101 

102 Returns the capabilities list (e.g. ["embedding"]) or None on failure. 

103 """ 

104 try: 

105 response = safe_post( 

106 f"{base_url}/api/show", 

107 json={"model": model_name}, 

108 timeout=5, 

109 allow_localhost=True, 

110 allow_private_ips=True, 

111 ) 

112 if response.status_code == 200: 

113 return response.json().get("capabilities") 

114 except Exception: 

115 logger.debug(f"Could not fetch capabilities for {model_name}") 

116 return None 

117 

118 @classmethod 

119 def is_embedding_model( 

120 cls, 

121 model: str, 

122 settings_snapshot: Optional[Dict[str, Any]] = None, 

123 ) -> Optional[bool]: 

124 """Check whether an Ollama model supports embeddings. 

125 

126 Uses the /api/show capabilities field. Falls back to name heuristics 

127 for older Ollama versions that don't expose capabilities. 

128 """ 

129 base_url = get_ollama_base_url(settings_snapshot) 

130 caps = cls._get_model_capabilities(base_url, model) 

131 

132 if caps is not None: 

133 return "embedding" in caps 

134 

135 # Fallback: older Ollama without capabilities field 

136 return _name_looks_like_embedding(model) 

137 

138 @classmethod 

139 def get_available_models( 

140 cls, settings_snapshot: Optional[Dict[str, Any]] = None 

141 ) -> List[Dict[str, str]]: 

142 """Get all Ollama models with embedding compatibility info. 

143 

144 Queries each model's capabilities via /api/show and marks models 

145 with an `is_embedding` flag. Returns embedding models first, 

146 then non-embedding models. 

147 """ 

148 from ....utilities.llm_utils import fetch_ollama_models 

149 

150 base_url = get_ollama_base_url(settings_snapshot) 

151 all_models = fetch_ollama_models(base_url, timeout=3.0) 

152 

153 if not all_models: 

154 return [] 

155 

156 embedding_models = [] 

157 other_models = [] 

158 

159 for model in all_models: 

160 model_name = model["value"] 

161 caps = cls._get_model_capabilities(base_url, model_name) 

162 

163 if caps is not None: 

164 is_embed = "embedding" in caps 

165 else: 

166 # Older Ollama without capabilities — use name heuristic 

167 is_embed = _name_looks_like_embedding(model_name) 

168 

169 model["is_embedding"] = is_embed 

170 if is_embed: 

171 embedding_models.append(model) 

172 else: 

173 other_models.append(model) 

174 

175 logger.info( 

176 f"Found {len(embedding_models)} embedding models and " 

177 f"{len(other_models)} other models from Ollama" 

178 ) 

179 

180 # Embedding models first, then the rest 

181 return embedding_models + other_models 

182 

183 

184def _name_looks_like_embedding(model_name: str) -> bool: 

185 """Heuristic: check if a model name suggests it's an embedding model. 

186 

187 Used as fallback for Ollama versions that don't expose capabilities. 

188 """ 

189 name_lower = model_name.lower() 

190 return "embed" in name_lower or "bge" in name_lower