Coverage for src / local_deep_research / embeddings / providers / implementations / openai.py: 100%
62 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1"""OpenAI embedding provider."""
3from typing import Any, Dict, List, Optional
5from langchain_core.embeddings import Embeddings
6from loguru import logger
8from ....config.thread_settings import get_setting_from_snapshot
9from ..base import BaseEmbeddingProvider
12class OpenAIEmbeddingsProvider(BaseEmbeddingProvider):
13 """
14 OpenAI embedding provider.
16 Uses OpenAI API for cloud-based embeddings.
17 Requires API key.
18 """
20 provider_name = "OpenAI"
21 provider_key = "OPENAI"
22 requires_api_key = True
23 supports_local = False
24 default_model = "text-embedding-3-small" # type: ignore[assignment]
26 @classmethod
27 def create_embeddings(
28 cls,
29 model: Optional[str] = None,
30 settings_snapshot: Optional[Dict[str, Any]] = None,
31 **kwargs,
32 ) -> Embeddings:
33 """
34 Create OpenAI embeddings instance.
36 Args:
37 model: Model name (defaults to text-embedding-3-small)
38 settings_snapshot: Optional settings snapshot
39 **kwargs: Additional parameters (api_key, etc.)
41 Returns:
42 OpenAIEmbeddings instance
44 Raises:
45 ValueError: If API key is not configured
46 """
47 from langchain_openai import OpenAIEmbeddings
49 # Get API key
50 api_key = kwargs.get("api_key")
51 if api_key is None:
52 api_key = get_setting_from_snapshot(
53 "embeddings.openai.api_key",
54 default=None,
55 settings_snapshot=settings_snapshot,
56 )
58 if not api_key:
59 logger.error("OpenAI API key not found in settings")
60 raise ValueError(
61 "OpenAI API key not configured. "
62 "Please set embeddings.openai.api_key in settings."
63 )
65 # Get model from settings if not specified
66 if model is None:
67 model = get_setting_from_snapshot(
68 "embeddings.openai.model",
69 default=cls.default_model,
70 settings_snapshot=settings_snapshot,
71 )
73 # Get optional parameters
74 base_url = kwargs.get("base_url")
75 if base_url is None:
76 base_url = get_setting_from_snapshot(
77 "embeddings.openai.base_url",
78 default=None,
79 settings_snapshot=settings_snapshot,
80 )
82 dimensions = kwargs.get("dimensions")
83 if dimensions is None:
84 dimensions = get_setting_from_snapshot(
85 "embeddings.openai.dimensions",
86 default=None,
87 settings_snapshot=settings_snapshot,
88 )
90 logger.info(f"Creating OpenAIEmbeddings with model={model}")
92 # Build parameters
93 params = {
94 "model": model,
95 "openai_api_key": api_key,
96 }
98 if base_url:
99 params["openai_api_base"] = base_url
101 # For text-embedding-3 models, dimensions can be customized
102 if dimensions and model.startswith("text-embedding-3"):
103 params["dimensions"] = int(dimensions)
105 return OpenAIEmbeddings(**params)
107 @classmethod
108 def is_available(
109 cls, settings_snapshot: Optional[Dict[str, Any]] = None
110 ) -> bool:
111 """Check if OpenAI embeddings are available."""
112 try:
113 # Check for API key
114 api_key = get_setting_from_snapshot(
115 "embeddings.openai.api_key",
116 default=None,
117 settings_snapshot=settings_snapshot,
118 )
119 return bool(api_key)
120 except Exception:
121 logger.debug(
122 "Error checking OpenAI embedding availability", exc_info=True
123 )
124 return False
126 @classmethod
127 def get_available_models(
128 cls, settings_snapshot: Optional[Dict[str, Any]] = None
129 ) -> List[Dict[str, str]]:
130 """Get list of available OpenAI embedding models from API."""
131 try:
132 from openai import OpenAI
134 # Get API key
135 api_key = get_setting_from_snapshot(
136 "embeddings.openai.api_key",
137 default=None,
138 settings_snapshot=settings_snapshot,
139 )
141 if not api_key:
142 logger.warning("OpenAI API key not configured")
143 return []
145 # Create client and fetch models
146 client = OpenAI(api_key=api_key)
147 models_response = client.models.list()
149 # Filter for embedding models only
150 embedding_models = []
151 for model in models_response.data:
152 model_id = model.id
153 # OpenAI embedding models typically have "embedding" in the name
154 if "embedding" in model_id.lower():
155 embedding_models.append(
156 {
157 "value": model_id,
158 "label": model_id,
159 }
160 )
162 return embedding_models
164 except Exception:
165 logger.exception("Error fetching OpenAI embedding models")
166 return []