Coverage for src / local_deep_research / embeddings / providers / implementations / openai.py: 100%

62 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1"""OpenAI embedding provider.""" 

2 

3from typing import Any, Dict, List, Optional 

4 

5from langchain_core.embeddings import Embeddings 

6from loguru import logger 

7 

8from ....config.thread_settings import get_setting_from_snapshot 

9from ..base import BaseEmbeddingProvider 

10 

11 

12class OpenAIEmbeddingsProvider(BaseEmbeddingProvider): 

13 """ 

14 OpenAI embedding provider. 

15 

16 Uses OpenAI API for cloud-based embeddings. 

17 Requires API key. 

18 """ 

19 

20 provider_name = "OpenAI" 

21 provider_key = "OPENAI" 

22 requires_api_key = True 

23 supports_local = False 

24 default_model = "text-embedding-3-small" # type: ignore[assignment] 

25 

26 @classmethod 

27 def create_embeddings( 

28 cls, 

29 model: Optional[str] = None, 

30 settings_snapshot: Optional[Dict[str, Any]] = None, 

31 **kwargs, 

32 ) -> Embeddings: 

33 """ 

34 Create OpenAI embeddings instance. 

35 

36 Args: 

37 model: Model name (defaults to text-embedding-3-small) 

38 settings_snapshot: Optional settings snapshot 

39 **kwargs: Additional parameters (api_key, etc.) 

40 

41 Returns: 

42 OpenAIEmbeddings instance 

43 

44 Raises: 

45 ValueError: If API key is not configured 

46 """ 

47 from langchain_openai import OpenAIEmbeddings 

48 

49 # Get API key 

50 api_key = kwargs.get("api_key") 

51 if api_key is None: 

52 api_key = get_setting_from_snapshot( 

53 "embeddings.openai.api_key", 

54 default=None, 

55 settings_snapshot=settings_snapshot, 

56 ) 

57 

58 if not api_key: 

59 logger.error("OpenAI API key not found in settings") 

60 raise ValueError( 

61 "OpenAI API key not configured. " 

62 "Please set embeddings.openai.api_key in settings." 

63 ) 

64 

65 # Get model from settings if not specified 

66 if model is None: 

67 model = get_setting_from_snapshot( 

68 "embeddings.openai.model", 

69 default=cls.default_model, 

70 settings_snapshot=settings_snapshot, 

71 ) 

72 

73 # Get optional parameters 

74 base_url = kwargs.get("base_url") 

75 if base_url is None: 

76 base_url = get_setting_from_snapshot( 

77 "embeddings.openai.base_url", 

78 default=None, 

79 settings_snapshot=settings_snapshot, 

80 ) 

81 

82 dimensions = kwargs.get("dimensions") 

83 if dimensions is None: 

84 dimensions = get_setting_from_snapshot( 

85 "embeddings.openai.dimensions", 

86 default=None, 

87 settings_snapshot=settings_snapshot, 

88 ) 

89 

90 logger.info(f"Creating OpenAIEmbeddings with model={model}") 

91 

92 # Build parameters 

93 params = { 

94 "model": model, 

95 "openai_api_key": api_key, 

96 } 

97 

98 if base_url: 

99 params["openai_api_base"] = base_url 

100 

101 # For text-embedding-3 models, dimensions can be customized 

102 if dimensions and model.startswith("text-embedding-3"): 

103 params["dimensions"] = int(dimensions) 

104 

105 return OpenAIEmbeddings(**params) 

106 

107 @classmethod 

108 def is_available( 

109 cls, settings_snapshot: Optional[Dict[str, Any]] = None 

110 ) -> bool: 

111 """Check if OpenAI embeddings are available.""" 

112 try: 

113 # Check for API key 

114 api_key = get_setting_from_snapshot( 

115 "embeddings.openai.api_key", 

116 default=None, 

117 settings_snapshot=settings_snapshot, 

118 ) 

119 return bool(api_key) 

120 except Exception: 

121 logger.debug( 

122 "Error checking OpenAI embedding availability", exc_info=True 

123 ) 

124 return False 

125 

126 @classmethod 

127 def get_available_models( 

128 cls, settings_snapshot: Optional[Dict[str, Any]] = None 

129 ) -> List[Dict[str, str]]: 

130 """Get list of available OpenAI embedding models from API.""" 

131 try: 

132 from openai import OpenAI 

133 

134 # Get API key 

135 api_key = get_setting_from_snapshot( 

136 "embeddings.openai.api_key", 

137 default=None, 

138 settings_snapshot=settings_snapshot, 

139 ) 

140 

141 if not api_key: 

142 logger.warning("OpenAI API key not configured") 

143 return [] 

144 

145 # Create client and fetch models 

146 client = OpenAI(api_key=api_key) 

147 models_response = client.models.list() 

148 

149 # Filter for embedding models only 

150 embedding_models = [] 

151 for model in models_response.data: 

152 model_id = model.id 

153 # OpenAI embedding models typically have "embedding" in the name 

154 if "embedding" in model_id.lower(): 

155 embedding_models.append( 

156 { 

157 "value": model_id, 

158 "label": model_id, 

159 } 

160 ) 

161 

162 return embedding_models 

163 

164 except Exception: 

165 logger.exception("Error fetching OpenAI embedding models") 

166 return []