Coverage for src / local_deep_research / embeddings / providers / implementations / openai.py: 21%

61 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1"""OpenAI embedding provider.""" 

2 

3from typing import Any, Dict, List, Optional 

4 

5from langchain_core.embeddings import Embeddings 

6from loguru import logger 

7 

8from ....config.thread_settings import get_setting_from_snapshot 

9from ..base import BaseEmbeddingProvider 

10 

11 

12class OpenAIEmbeddingsProvider(BaseEmbeddingProvider): 

13 """ 

14 OpenAI embedding provider. 

15 

16 Uses OpenAI API for cloud-based embeddings. 

17 Requires API key. 

18 """ 

19 

20 provider_name = "OpenAI" 

21 provider_key = "OPENAI" 

22 requires_api_key = True 

23 supports_local = False 

24 default_model = "text-embedding-3-small" 

25 

26 @classmethod 

27 def create_embeddings( 

28 cls, 

29 model: Optional[str] = None, 

30 settings_snapshot: Optional[Dict[str, Any]] = None, 

31 **kwargs, 

32 ) -> Embeddings: 

33 """ 

34 Create OpenAI embeddings instance. 

35 

36 Args: 

37 model: Model name (defaults to text-embedding-3-small) 

38 settings_snapshot: Optional settings snapshot 

39 **kwargs: Additional parameters (api_key, etc.) 

40 

41 Returns: 

42 OpenAIEmbeddings instance 

43 

44 Raises: 

45 ValueError: If API key is not configured 

46 """ 

47 from langchain_openai import OpenAIEmbeddings 

48 

49 # Get API key 

50 api_key = kwargs.get("api_key") 

51 if api_key is None: 

52 api_key = get_setting_from_snapshot( 

53 "embeddings.openai.api_key", 

54 default=None, 

55 settings_snapshot=settings_snapshot, 

56 ) 

57 

58 if not api_key: 

59 logger.error("OpenAI API key not found in settings") 

60 raise ValueError( 

61 "OpenAI API key not configured. " 

62 "Please set embeddings.openai.api_key in settings." 

63 ) 

64 

65 # Get model from settings if not specified 

66 if model is None: 

67 model = get_setting_from_snapshot( 

68 "embeddings.openai.model", 

69 default=cls.default_model, 

70 settings_snapshot=settings_snapshot, 

71 ) 

72 

73 # Get optional parameters 

74 base_url = kwargs.get("base_url") 

75 if base_url is None: 

76 base_url = get_setting_from_snapshot( 

77 "embeddings.openai.base_url", 

78 default=None, 

79 settings_snapshot=settings_snapshot, 

80 ) 

81 

82 dimensions = kwargs.get("dimensions") 

83 if dimensions is None: 

84 dimensions = get_setting_from_snapshot( 

85 "embeddings.openai.dimensions", 

86 default=None, 

87 settings_snapshot=settings_snapshot, 

88 ) 

89 

90 logger.info(f"Creating OpenAIEmbeddings with model={model}") 

91 

92 # Build parameters 

93 params = { 

94 "model": model, 

95 "openai_api_key": api_key, 

96 } 

97 

98 if base_url: 

99 params["openai_api_base"] = base_url 

100 

101 # For text-embedding-3 models, dimensions can be customized 

102 if dimensions and model.startswith("text-embedding-3"): 

103 params["dimensions"] = int(dimensions) 

104 

105 return OpenAIEmbeddings(**params) 

106 

107 @classmethod 

108 def is_available( 

109 cls, settings_snapshot: Optional[Dict[str, Any]] = None 

110 ) -> bool: 

111 """Check if OpenAI embeddings are available.""" 

112 try: 

113 # Check for API key 

114 api_key = get_setting_from_snapshot( 

115 "embeddings.openai.api_key", 

116 default=None, 

117 settings_snapshot=settings_snapshot, 

118 ) 

119 return bool(api_key) 

120 except Exception: 

121 return False 

122 

123 @classmethod 

124 def get_available_models( 

125 cls, settings_snapshot: Optional[Dict[str, Any]] = None 

126 ) -> List[Dict[str, str]]: 

127 """Get list of available OpenAI embedding models from API.""" 

128 try: 

129 from openai import OpenAI 

130 

131 # Get API key 

132 api_key = get_setting_from_snapshot( 

133 "embeddings.openai.api_key", 

134 default=None, 

135 settings_snapshot=settings_snapshot, 

136 ) 

137 

138 if not api_key: 

139 logger.warning("OpenAI API key not configured") 

140 return [] 

141 

142 # Create client and fetch models 

143 client = OpenAI(api_key=api_key) 

144 models_response = client.models.list() 

145 

146 # Filter for embedding models only 

147 embedding_models = [] 

148 for model in models_response.data: 

149 model_id = model.id 

150 # OpenAI embedding models typically have "embedding" in the name 

151 if "embedding" in model_id.lower(): 

152 embedding_models.append( 

153 { 

154 "value": model_id, 

155 "label": model_id, 

156 } 

157 ) 

158 

159 return embedding_models 

160 

161 except Exception: 

162 logger.exception("Error fetching OpenAI embedding models") 

163 return []