Coverage for src/local_deep_research/llm/providers/openai_base.py: 95%

125 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1"""Base OpenAI-compatible endpoint provider for Local Deep Research.""" 

2 

3from langchain_openai import ChatOpenAI 

4from loguru import logger 

5 

6from ...config.thread_settings import ( 

7 get_setting_from_snapshot, 

8 NoSettingsContextError, 

9) 

10from ...utilities.url_utils import normalize_url 

11from .base import BaseLLMProvider 

12 

13 

14class OpenAICompatibleProvider(BaseLLMProvider): 

15 """Base class for OpenAI-compatible API providers. 

16 

17 This class provides a common implementation for any service that offers 

18 an OpenAI-compatible API endpoint (Google, OpenRouter, Groq, Together, etc.) 

19 """ 

20 

21 # Override these in subclasses 

22 provider_name = "openai_endpoint" # Name used in logs 

23 api_key_setting = "llm.openai_endpoint.api_key" # Settings key for API key 

24 url_setting = None # Settings key for URL (e.g., "llm.lmstudio.url") 

25 default_base_url = "https://api.openai.com/v1" # Default endpoint URL 

26 default_model = ( 

27 "" # User must explicitly configure llm.model — no silent fallback 

28 ) 

29 

30 @classmethod 

31 def create_llm(cls, model_name=None, temperature=0.7, **kwargs): 

32 """Factory function for OpenAI-compatible LLMs. 

33 

34 Args: 

35 model_name: Name of the model to use 

36 temperature: Model temperature (0.0-1.0) 

37 **kwargs: Additional arguments including settings_snapshot 

38 

39 Returns: 

40 A configured ChatOpenAI instance 

41 

42 Raises: 

43 ValueError: If API key is not configured 

44 """ 

45 settings_snapshot = kwargs.get("settings_snapshot") 

46 

47 # Get API key from settings (if provider requires one) 

48 if cls.api_key_setting: 

49 api_key = get_setting_from_snapshot( 

50 cls.api_key_setting, 

51 default=None, 

52 settings_snapshot=settings_snapshot, 

53 ) 

54 

55 if not api_key: 

56 logger.error( 

57 f"{cls.provider_name} API key not found in settings" 

58 ) 

59 raise ValueError( 

60 f"{cls.provider_name} API key not configured. " 

61 f"Please set {cls.api_key_setting} in settings." 

62 ) 

63 else: 

64 # Provider doesn't require API key (e.g., LM Studio) 

65 api_key = kwargs.get("api_key", "dummy-key") 

66 

67 # Require an explicit model — no silent fallback to a hardcoded default. 

68 if not model_name or not model_name.strip(): 

69 logger.error(f"{cls.provider_name} model name not provided") 

70 raise ValueError( 

71 f"{cls.provider_name} model not configured. " 

72 f"Please set llm.model in settings." 

73 ) 

74 

75 # Get endpoint URL (can be overridden in kwargs for flexibility) 

76 base_url = kwargs.get("base_url", cls.default_base_url) 

77 base_url = normalize_url(base_url) if base_url else cls.default_base_url 

78 

79 # Build parameters for OpenAI client 

80 llm_params = { 

81 "model": model_name, 

82 "api_key": api_key, 

83 "base_url": base_url, 

84 "temperature": temperature, 

85 } 

86 

87 # Add max_tokens if specified in settings 

88 try: 

89 max_tokens = get_setting_from_snapshot( 

90 "llm.max_tokens", 

91 default=None, 

92 settings_snapshot=settings_snapshot, 

93 ) 

94 if max_tokens: 

95 llm_params["max_tokens"] = int(max_tokens) 

96 except NoSettingsContextError: 

97 pass # Optional parameter 

98 

99 # Add streaming if specified 

100 try: 

101 streaming = get_setting_from_snapshot( 

102 "llm.streaming", 

103 default=None, 

104 settings_snapshot=settings_snapshot, 

105 ) 

106 if streaming is not None: 

107 llm_params["streaming"] = streaming 

108 except NoSettingsContextError: 

109 pass # Optional parameter 

110 

111 # Add max_retries if specified 

112 try: 

113 max_retries = get_setting_from_snapshot( 

114 "llm.max_retries", 

115 default=None, 

116 settings_snapshot=settings_snapshot, 

117 ) 

118 if max_retries is not None: 

119 llm_params["max_retries"] = max_retries 

120 except NoSettingsContextError: 

121 pass # Optional parameter 

122 

123 # Add request_timeout if specified 

124 try: 

125 request_timeout = get_setting_from_snapshot( 

126 "llm.request_timeout", 

127 default=None, 

128 settings_snapshot=settings_snapshot, 

129 ) 

130 if request_timeout is not None: 

131 llm_params["request_timeout"] = request_timeout 

132 except NoSettingsContextError: 

133 pass # Optional parameter 

134 

135 logger.info( 

136 f"Creating {cls.provider_name} LLM with model: {model_name}, " 

137 f"temperature: {temperature}, endpoint: {base_url}" 

138 ) 

139 

140 return ChatOpenAI(**llm_params) 

141 

142 @classmethod 

143 def _create_llm_instance(cls, model_name=None, temperature=0.7, **kwargs): 

144 """Internal method to create LLM instance with provided parameters. 

145 

146 This bypasses API key checking for providers that handle auth differently. 

147 """ 

148 settings_snapshot = kwargs.get("settings_snapshot") 

149 

150 # Require an explicit model — no silent fallback to a hardcoded default. 

151 if not model_name or not model_name.strip(): 151 ↛ 152line 151 didn't jump to line 152 because the condition on line 151 was never true

152 logger.error(f"{cls.provider_name} model name not provided") 

153 raise ValueError( 

154 f"{cls.provider_name} model not configured. " 

155 f"Please set llm.model in settings." 

156 ) 

157 

158 # Get endpoint URL (can be overridden in kwargs for flexibility) 

159 base_url = kwargs.get("base_url", cls.default_base_url) 

160 base_url = normalize_url(base_url) if base_url else cls.default_base_url 

161 

162 # Get API key from kwargs (caller is responsible for providing it) 

163 api_key = kwargs.get("api_key", "dummy-key") 

164 

165 # Build parameters for OpenAI client 

166 llm_params = { 

167 "model": model_name, 

168 "api_key": api_key, 

169 "base_url": base_url, 

170 "temperature": temperature, 

171 } 

172 

173 # Add optional parameters (same as in create_llm) 

174 try: 

175 max_tokens = get_setting_from_snapshot( 

176 "llm.max_tokens", 

177 default=None, 

178 settings_snapshot=settings_snapshot, 

179 ) 

180 if max_tokens: 180 ↛ 181line 180 didn't jump to line 181 because the condition on line 180 was never true

181 llm_params["max_tokens"] = int(max_tokens) 

182 except NoSettingsContextError: 

183 pass 

184 

185 return ChatOpenAI(**llm_params) 

186 

187 @classmethod 

188 def is_available(cls, settings_snapshot=None): 

189 """Check if this provider is available. 

190 

191 Args: 

192 settings_snapshot: Optional settings snapshot to use 

193 

194 Returns: 

195 True if API key is configured (or not needed), False otherwise 

196 """ 

197 try: 

198 # If provider doesn't require API key, it's available 

199 if not cls.api_key_setting: 

200 return True 

201 

202 # Check if API key is configured 

203 api_key = get_setting_from_snapshot( 

204 cls.api_key_setting, 

205 default=None, 

206 settings_snapshot=settings_snapshot, 

207 ) 

208 return bool(api_key and str(api_key).strip()) 

209 except Exception: 

210 return False 

211 

212 @classmethod 

213 def requires_auth_for_models(cls): 

214 """Check if this provider requires authentication for listing models. 

215 

216 Override in subclasses that don't require auth. 

217 

218 Returns: 

219 True if authentication is required, False otherwise 

220 """ 

221 return True 

222 

223 # Resolves base URL from settings; called by list_models(). 

224 @classmethod 

225 def _get_base_url_for_models(cls, settings_snapshot=None): 

226 """Get the base URL to use for listing models. 

227 

228 Reads from url_setting if defined, otherwise uses default_base_url. 

229 

230 Args: 

231 settings_snapshot: Optional settings snapshot dict 

232 

233 Returns: 

234 The base URL string to use for model listing 

235 """ 

236 if cls.url_setting: 

237 # Use get_setting_from_snapshot which handles both settings_snapshot 

238 # and thread-local context, with proper fallback 

239 url = get_setting_from_snapshot( 

240 cls.url_setting, 

241 default=None, 

242 settings_snapshot=settings_snapshot, 

243 ) 

244 if url: 244 ↛ 247line 244 didn't jump to line 247 because the condition on line 244 was always true

245 return url.rstrip("/") 

246 

247 return cls.default_base_url 

248 

249 @classmethod 

250 def list_models_for_api(cls, api_key=None, base_url=None): 

251 """List available models for API endpoint use. 

252 

253 This method is designed to be called from Flask routes. 

254 

255 Args: 

256 api_key: Optional API key (if None and required, returns empty list) 

257 base_url: Optional base URL to use (if None, uses cls.default_base_url) 

258 

259 Returns: 

260 List of model dictionaries with 'value' and 'label' keys 

261 """ 

262 try: 

263 # Defense-in-depth: never send a non-string credential to the SDK. 

264 # The OpenAI client coerces the api_key into "Authorization: Bearer 

265 # <repr(api_key)>" — passing a dict would leak its contents to the 

266 # endpoint we're listing models from. 

267 if api_key is not None and not isinstance(api_key, str): 

268 logger.error( 

269 f"{cls.provider_name}.list_models_for_api received " 

270 f"non-string api_key of type {type(api_key).__name__}; " 

271 f"refusing to send." 

272 ) 

273 return [] 

274 

275 # Check if auth is required 

276 if cls.requires_auth_for_models(): 

277 if not api_key: 

278 logger.debug( 

279 f"{cls.provider_name} requires API key for model listing" 

280 ) 

281 return [] 

282 else: 

283 # Use a dummy key for providers that don't require auth 

284 api_key = api_key or "dummy-key-for-models-list" 

285 

286 from openai import OpenAI 

287 

288 # Use provided base_url or fall back to class default 

289 if not base_url: 

290 base_url = cls.default_base_url 

291 

292 # Create OpenAI client (uses library defaults for timeout) 

293 client = OpenAI(api_key=api_key, base_url=base_url) 

294 

295 # Fetch models 

296 logger.debug( 

297 f"Fetching models from {cls.provider_name} at {base_url}" 

298 ) 

299 models_response = client.models.list() 

300 

301 models = [] 

302 for model in models_response.data: 

303 if model.id: 303 ↛ 302line 303 didn't jump to line 302 because the condition on line 303 was always true

304 models.append( 

305 { 

306 "value": model.id, 

307 "label": model.id, 

308 } 

309 ) 

310 

311 logger.info(f"Found {len(models)} models from {cls.provider_name}") 

312 return models 

313 

314 except Exception: 

315 # Use warning level since connection failures are expected 

316 # when the provider is not running (e.g., LM Studio not started) 

317 logger.warning(f"Could not list models from {cls.provider_name}") 

318 return [] 

319 

320 # High-level settings-aware wrapper around list_models_for_api(). 

321 # Documented in docs/developing/EXTENDING.md as the provider interface 

322 # for custom providers. 

323 @classmethod 

324 def list_models(cls, settings_snapshot=None): 

325 """List available models from this provider. 

326 

327 Args: 

328 settings_snapshot: Optional settings snapshot to use 

329 

330 Returns: 

331 List of model dictionaries with 'value' and 'label' keys 

332 """ 

333 try: 

334 # Get API key from settings if auth is required 

335 api_key = None 

336 if cls.requires_auth_for_models(): 336 ↛ 344line 336 didn't jump to line 344 because the condition on line 336 was always true

337 api_key = get_setting_from_snapshot( 

338 cls.api_key_setting, 

339 default=None, 

340 settings_snapshot=settings_snapshot, 

341 ) 

342 

343 # Get base URL from settings if provider has configurable URL 

344 base_url = cls._get_base_url_for_models(settings_snapshot) 

345 

346 return cls.list_models_for_api(api_key, base_url) 

347 

348 except Exception: 

349 logger.exception(f"Error listing models from {cls.provider_name}") 

350 return []