Coverage for src/local_deep_research/config/llm_config.py: 95%

334 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1from functools import cache 

2from typing import Any 

3 

4from langchain_anthropic import ChatAnthropic 

5from langchain_core.language_models import BaseChatModel 

6from langchain_core.messages import AIMessage 

7from langchain_ollama import ChatOllama 

8from langchain_openai import ChatOpenAI 

9from loguru import logger 

10 

11from ..llm import get_llm_from_registry, is_llm_registered 

12from ..utilities.search_utilities import remove_think_tags 

13from ..utilities.url_utils import normalize_url 

14from .constants import ( 

15 DEFAULT_LLAMACPP_URL, 

16 DEFAULT_LMSTUDIO_URL, 

17 DEFAULT_OLLAMA_URL, 

18) 

19 

20# Import providers module to trigger auto-discovery 

21try: 

22 from ..llm.providers import discover_providers # noqa: F401 

23 # Auto-discovery happens on module import 

24except ImportError: 

25 logger.debug("Providers module not available yet") 

26from ..llm.providers.base import normalize_provider 

27from .thread_settings import ( 

28 get_setting_from_snapshot, 

29 NoSettingsContextError, 

30) 

31 

32# Valid provider options 

33VALID_PROVIDERS = [ 

34 "ollama", 

35 "openai", 

36 "anthropic", 

37 "google", 

38 "openrouter", 

39 "openai_endpoint", 

40 "lmstudio", 

41 "llamacpp", 

42 "none", 

43] 

44 

45 

46def is_openai_available(settings_snapshot=None): 

47 """Check if OpenAI is available by delegating to the provider class.""" 

48 try: 

49 from ..llm.providers.implementations.openai import OpenAIProvider 

50 

51 return OpenAIProvider.is_available(settings_snapshot) 

52 except ImportError: 

53 return False 

54 except Exception: 

55 logger.debug("Error checking OpenAI availability", exc_info=True) 

56 return False 

57 

58 

59def is_anthropic_available(settings_snapshot=None): 

60 """Check if Anthropic is available by delegating to the provider class.""" 

61 try: 

62 from ..llm.providers.implementations.anthropic import AnthropicProvider 

63 

64 return AnthropicProvider.is_available(settings_snapshot) 

65 except ImportError: 

66 return False 

67 except Exception: 

68 logger.debug("Error checking Anthropic availability", exc_info=True) 

69 return False 

70 

71 

72def is_openai_endpoint_available(settings_snapshot=None): 

73 """Check if OpenAI endpoint is available by delegating to the provider class.""" 

74 try: 

75 from ..llm.providers.implementations.custom_openai_endpoint import ( 

76 CustomOpenAIEndpointProvider, 

77 ) 

78 

79 return CustomOpenAIEndpointProvider.is_available(settings_snapshot) 

80 except ImportError: 

81 return False 

82 except Exception: 

83 logger.debug( 

84 "Error checking OpenAI endpoint availability", exc_info=True 

85 ) 

86 return False 

87 

88 

89def is_ollama_available(settings_snapshot=None): 

90 """Check if Ollama is running by delegating to the provider class.""" 

91 try: 

92 from ..llm.providers.implementations.ollama import OllamaProvider 

93 

94 return OllamaProvider.is_available(settings_snapshot) 

95 except ImportError: 

96 return False 

97 except Exception: 

98 logger.debug("Error checking Ollama availability", exc_info=True) 

99 return False 

100 

101 

102def is_lmstudio_available(settings_snapshot=None): 

103 """Check if LM Studio is available by delegating to the provider class.""" 

104 try: 

105 from ..llm.providers.implementations.lmstudio import LMStudioProvider 

106 

107 return LMStudioProvider.is_available(settings_snapshot) 

108 except ImportError: 

109 return False 

110 except Exception: 

111 logger.debug("Error checking LM Studio availability", exc_info=True) 

112 return False 

113 

114 

115def is_llamacpp_available(settings_snapshot=None): 

116 """Check if llama.cpp's HTTP server is reachable. 

117 

118 Talks to `llama-server`'s OpenAI-compatible endpoint (no in-process 

119 `llama-cpp-python` binding required). 

120 """ 

121 try: 

122 from ..llm.providers.implementations.llamacpp import LlamaCppProvider 

123 

124 return LlamaCppProvider.is_available(settings_snapshot) 

125 except ImportError: 

126 return False 

127 except Exception: 

128 logger.debug("Error checking llama.cpp availability", exc_info=True) 

129 return False 

130 

131 

132def is_google_available(settings_snapshot=None): 

133 """Check if Google/Gemini is available""" 

134 try: 

135 from ..llm.providers.implementations.google import GoogleProvider 

136 

137 return GoogleProvider.is_available(settings_snapshot) 

138 except ImportError: 

139 return False 

140 except Exception: 

141 logger.debug("Error checking Google availability", exc_info=True) 

142 return False 

143 

144 

145def is_openrouter_available(settings_snapshot=None): 

146 """Check if OpenRouter is available""" 

147 try: 

148 from ..llm.providers.implementations.openrouter import ( 

149 OpenRouterProvider, 

150 ) 

151 

152 return OpenRouterProvider.is_available(settings_snapshot) 

153 except ImportError: 

154 return False 

155 except Exception: 

156 logger.debug("Error checking OpenRouter availability", exc_info=True) 

157 return False 

158 

159 

160@cache 

161def get_available_providers(settings_snapshot=None): 

162 """Return available model providers""" 

163 providers = {} 

164 

165 if is_ollama_available(settings_snapshot): 

166 providers["ollama"] = "Ollama (local models)" 

167 

168 if is_openai_available(settings_snapshot): 

169 providers["openai"] = "OpenAI API" 

170 

171 if is_anthropic_available(settings_snapshot): 

172 providers["anthropic"] = "Anthropic API" 

173 

174 if is_google_available(settings_snapshot): 

175 providers["google"] = "Google Gemini API" 

176 

177 if is_openrouter_available(settings_snapshot): 

178 providers["openrouter"] = "OpenRouter API" 

179 

180 if is_openai_endpoint_available(settings_snapshot): 

181 providers["openai_endpoint"] = "OpenAI-Compatible Endpoint" 

182 

183 if is_lmstudio_available(settings_snapshot): 

184 providers["lmstudio"] = "LM Studio (local models)" 

185 

186 if is_llamacpp_available(settings_snapshot): 

187 providers["llamacpp"] = "LlamaCpp (local models)" 

188 

189 # Default fallback 

190 if not providers: 

191 providers["none"] = "No model providers available" 

192 

193 return providers 

194 

195 

196def get_selected_llm_provider(settings_snapshot=None): 

197 return normalize_provider( 

198 get_setting_from_snapshot( 

199 "llm.provider", "ollama", settings_snapshot=settings_snapshot 

200 ) 

201 ) 

202 

203 

204def _get_context_window_for_provider(provider_type, settings_snapshot=None): 

205 """Get context window size from settings based on provider type. 

206 

207 Local providers (ollama, llamacpp, lmstudio) use a smaller default to prevent 

208 memory issues. Cloud providers check if unrestricted mode is enabled. 

209 

210 Returns: 

211 int or None: The context window size, or None for unrestricted cloud providers. 

212 """ 

213 if provider_type in ["ollama", "llamacpp", "lmstudio"]: 

214 # Local providers: use smaller default to prevent memory issues 

215 window_size = get_setting_from_snapshot( 

216 "llm.local_context_window_size", 

217 8192, 

218 settings_snapshot=settings_snapshot, 

219 ) 

220 # Ensure it's an integer 

221 return int(window_size) if window_size is not None else 8192 

222 # Cloud providers: check if unrestricted mode is enabled 

223 use_unrestricted = get_setting_from_snapshot( 

224 "llm.context_window_unrestricted", 

225 True, 

226 settings_snapshot=settings_snapshot, 

227 ) 

228 if use_unrestricted: 

229 # Let cloud providers auto-handle context (return None or very large value) 

230 return None # Will be handled per provider 

231 # Use user-specified limit 

232 window_size = get_setting_from_snapshot( 

233 "llm.context_window_size", 

234 128000, 

235 settings_snapshot=settings_snapshot, 

236 ) 

237 return int(window_size) if window_size is not None else 128000 

238 

239 

240def get_llm( 

241 model_name=None, 

242 temperature=None, 

243 provider=None, 

244 openai_endpoint_url=None, 

245 research_id=None, 

246 research_context=None, 

247 settings_snapshot=None, 

248): 

249 """ 

250 Get LLM instance based on model name and provider. 

251 

252 Args: 

253 model_name: Name of the model to use (if None, uses database setting) 

254 temperature: Model temperature (if None, uses database setting) 

255 provider: Provider to use (if None, uses database setting) 

256 openai_endpoint_url: Custom endpoint URL to use (if None, uses database 

257 setting) 

258 research_id: Optional research ID for token tracking 

259 research_context: Optional research context for enhanced token tracking 

260 

261 Returns: 

262 A LangChain LLM instance with automatic think-tag removal 

263 """ 

264 

265 # Use database values for parameters if not provided 

266 if model_name is None: 

267 model_name = get_setting_from_snapshot( 

268 "llm.model", "", settings_snapshot=settings_snapshot 

269 ) 

270 if temperature is None: 

271 temperature = get_setting_from_snapshot( 

272 "llm.temperature", 0.7, settings_snapshot=settings_snapshot 

273 ) 

274 if provider is None: 

275 provider = get_setting_from_snapshot( 

276 "llm.provider", "ollama", settings_snapshot=settings_snapshot 

277 ) 

278 

279 # Clean model name: remove quotes and extra whitespace 

280 if model_name: 

281 model_name = model_name.strip().strip("\"'").strip() 

282 

283 # Clean provider: remove quotes and extra whitespace 

284 if provider: 284 ↛ 288line 284 didn't jump to line 288 because the condition on line 284 was always true

285 provider = provider.strip().strip("\"'").strip() 

286 

287 # Normalize provider: convert to lowercase canonical form 

288 provider = normalize_provider(provider) 

289 

290 # Check if this is a registered custom LLM first 

291 if provider and is_llm_registered(provider): 

292 logger.info(f"Using registered custom LLM: {provider}") 

293 custom_llm = get_llm_from_registry(provider) 

294 

295 # Check if it's a callable (factory function) or a BaseChatModel instance 

296 if callable(custom_llm) and not isinstance(custom_llm, BaseChatModel): 

297 # It's a callable (factory function), call it with parameters 

298 try: 

299 llm_instance = custom_llm( 

300 model_name=model_name, 

301 temperature=temperature, 

302 settings_snapshot=settings_snapshot, 

303 ) 

304 except TypeError as e: 

305 # Re-raise TypeError with better message 

306 raise TypeError( 

307 f"Registered LLM factory '{provider}' has invalid signature. " 

308 f"Factory functions must accept 'model_name', 'temperature', and 'settings_snapshot' parameters. " 

309 f"Error: {e}" 

310 ) 

311 

312 # Validate the result is a BaseChatModel 

313 if not isinstance(llm_instance, BaseChatModel): 

314 raise ValueError( 

315 f"Factory function for {provider} must return a BaseChatModel instance, " 

316 f"got {type(llm_instance).__name__}" 

317 ) 

318 elif isinstance(custom_llm, BaseChatModel): 

319 # It's already a proper LLM instance, use it directly 

320 llm_instance = custom_llm 

321 else: 

322 raise ValueError( 

323 f"Registered LLM {provider} must be either a BaseChatModel instance " 

324 f"or a callable factory function. Got: {type(custom_llm).__name__}" 

325 ) 

326 

327 return wrap_llm_without_think_tags( 

328 llm_instance, 

329 research_id=research_id, 

330 provider=provider, 

331 research_context=research_context, 

332 settings_snapshot=settings_snapshot, 

333 ) 

334 

335 # Validate provider 

336 if provider not in VALID_PROVIDERS: 

337 logger.error(f"Invalid provider in settings: {provider}") 

338 raise ValueError( 

339 f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}" 

340 ) 

341 

342 # Require an explicit model for built-in providers. Mirrors the 

343 # API-key-not-configured pattern in openai_base.py and the URL-not- 

344 # configured pattern in providers/implementations/ollama.py: no silent 

345 # substitution to a hardcoded default model. 

346 if not model_name or not model_name.strip(): 

347 logger.error("llm.model is not configured (empty/None after lookup)") 

348 raise ValueError( 

349 "LLM model not configured. Please open Settings, choose an LLM " 

350 "provider, and select a model name (e.g. 'gpt-4o-mini' for " 

351 "OpenAI, 'claude-3-5-sonnet-20241022' for Anthropic, " 

352 "'llama3.1:8b' for Ollama). The 'llm.model' setting is required." 

353 ) 

354 logger.info( 

355 f"Getting LLM with model: {model_name}, temperature: {temperature}, provider: {provider}" 

356 ) 

357 

358 # Common parameters for all models 

359 common_params = { 

360 "temperature": temperature, 

361 } 

362 

363 context_window_size = _get_context_window_for_provider( 

364 provider, settings_snapshot 

365 ) 

366 

367 # Add context limit to research context for overflow detection 

368 if research_context and context_window_size: 

369 research_context["context_limit"] = context_window_size 

370 logger.info( 

371 f"Set context_limit={context_window_size} in research_context" 

372 ) 

373 else: 

374 logger.debug( 

375 f"Context limit not set: research_context={bool(research_context)}, context_window_size={context_window_size}" 

376 ) 

377 

378 max_tokens = None 

379 if get_setting_from_snapshot( 

380 "llm.supports_max_tokens", True, settings_snapshot=settings_snapshot 

381 ): 

382 # Use 80% of context window to leave room for prompts 

383 if context_window_size is not None: 

384 max_tokens = min( 

385 int( 

386 get_setting_from_snapshot( 

387 "llm.max_tokens", 

388 100000, 

389 settings_snapshot=settings_snapshot, 

390 ) 

391 ), 

392 int(context_window_size * 0.8), 

393 ) 

394 common_params["max_tokens"] = max_tokens 

395 else: 

396 # Unrestricted context: use provider's default max_tokens 

397 max_tokens = int( 

398 get_setting_from_snapshot( 

399 "llm.max_tokens", 

400 100000, 

401 settings_snapshot=settings_snapshot, 

402 ) 

403 ) 

404 common_params["max_tokens"] = max_tokens 

405 

406 # Handle different providers 

407 if provider == "anthropic": 

408 api_key = get_setting_from_snapshot( 

409 "llm.anthropic.api_key", settings_snapshot=settings_snapshot 

410 ) 

411 

412 if not api_key: 

413 raise ValueError( 

414 "Anthropic API key not configured. Please set llm.anthropic.api_key in settings." 

415 ) 

416 

417 llm: BaseChatModel = ChatAnthropic( 

418 model=model_name, 

419 anthropic_api_key=api_key, 

420 **common_params, # type: ignore[call-arg] 

421 ) 

422 return wrap_llm_without_think_tags( 

423 llm, 

424 research_id=research_id, 

425 provider=provider, 

426 research_context=research_context, 

427 settings_snapshot=settings_snapshot, 

428 ) 

429 

430 if provider == "openai": 

431 api_key = get_setting_from_snapshot( 

432 "llm.openai.api_key", settings_snapshot=settings_snapshot 

433 ) 

434 

435 if not api_key: 

436 raise ValueError( 

437 "OpenAI API key not configured. Please set llm.openai.api_key in settings." 

438 ) 

439 

440 # Build OpenAI-specific parameters 

441 openai_params = { 

442 "model": model_name, 

443 "api_key": api_key, 

444 **common_params, 

445 } 

446 

447 # Add optional parameters if they exist in settings 

448 try: 

449 api_base = get_setting_from_snapshot( 

450 "llm.openai.api_base", 

451 default=None, 

452 settings_snapshot=settings_snapshot, 

453 ) 

454 if api_base: 

455 openai_params["openai_api_base"] = api_base 

456 except NoSettingsContextError: 

457 pass # Optional parameter 

458 

459 try: 

460 organization = get_setting_from_snapshot( 

461 "llm.openai.organization", 

462 default=None, 

463 settings_snapshot=settings_snapshot, 

464 ) 

465 if organization: 

466 openai_params["openai_organization"] = organization 

467 except NoSettingsContextError: 

468 pass # Optional parameter 

469 

470 try: 

471 streaming = get_setting_from_snapshot( 

472 "llm.streaming", 

473 default=None, 

474 settings_snapshot=settings_snapshot, 

475 ) 

476 except NoSettingsContextError: 

477 streaming = None # Optional parameter 

478 if streaming is not None: 478 ↛ 479line 478 didn't jump to line 479 because the condition on line 478 was never true

479 openai_params["streaming"] = streaming 

480 

481 try: 

482 max_retries = get_setting_from_snapshot( 

483 "llm.max_retries", 

484 default=None, 

485 settings_snapshot=settings_snapshot, 

486 ) 

487 if max_retries is not None: 487 ↛ 488line 487 didn't jump to line 488 because the condition on line 487 was never true

488 openai_params["max_retries"] = max_retries 

489 except NoSettingsContextError: 

490 pass # Optional parameter 

491 

492 try: 

493 request_timeout = get_setting_from_snapshot( 

494 "llm.request_timeout", 

495 default=None, 

496 settings_snapshot=settings_snapshot, 

497 ) 

498 if request_timeout is not None: 498 ↛ 499line 498 didn't jump to line 499 because the condition on line 498 was never true

499 openai_params["request_timeout"] = request_timeout 

500 except NoSettingsContextError: 

501 pass # Optional parameter 

502 

503 llm = ChatOpenAI(**openai_params) # type: ignore[assignment] 

504 return wrap_llm_without_think_tags( 

505 llm, 

506 research_id=research_id, 

507 provider=provider, 

508 research_context=research_context, 

509 settings_snapshot=settings_snapshot, 

510 ) 

511 

512 if provider == "openai_endpoint": 

513 api_key = get_setting_from_snapshot( 

514 "llm.openai_endpoint.api_key", settings_snapshot=settings_snapshot 

515 ) 

516 

517 # Local servers (e.g. llama.cpp) don't require an API key. 

518 # Use a placeholder so ChatOpenAI doesn't reject the request. 

519 if not api_key: 

520 logger.info( 

521 "No API key configured for openai_endpoint provider. " 

522 "Using placeholder key. If you are connecting to a hosted " 

523 "service, set llm.openai_endpoint.api_key in settings." 

524 ) 

525 api_key = "not-needed" # noqa: S105 # gitleaks:allow 

526 

527 # Get endpoint URL from settings 

528 if openai_endpoint_url is None: 528 ↛ 534line 528 didn't jump to line 534 because the condition on line 528 was always true

529 openai_endpoint_url = get_setting_from_snapshot( 

530 "llm.openai_endpoint.url", 

531 "https://openrouter.ai/api/v1", 

532 settings_snapshot=settings_snapshot, 

533 ) 

534 openai_endpoint_url = normalize_url(openai_endpoint_url) 

535 

536 llm = ChatOpenAI( # type: ignore[assignment, call-arg] 

537 model=model_name, 

538 api_key=api_key, 

539 openai_api_base=openai_endpoint_url, 

540 **common_params, 

541 ) 

542 return wrap_llm_without_think_tags( 

543 llm, 

544 research_id=research_id, 

545 provider=provider, 

546 research_context=research_context, 

547 settings_snapshot=settings_snapshot, 

548 ) 

549 

550 if provider == "ollama": 

551 try: 

552 # Use the configurable Ollama base URL 

553 raw_base_url = get_setting_from_snapshot( 

554 "llm.ollama.url", 

555 DEFAULT_OLLAMA_URL, 

556 settings_snapshot=settings_snapshot, 

557 ) 

558 base_url = ( 

559 normalize_url(raw_base_url) 

560 if raw_base_url 

561 else DEFAULT_OLLAMA_URL 

562 ) 

563 

564 logger.info( 

565 f"Creating ChatOllama with model={model_name}, base_url={base_url}" 

566 ) 

567 try: 

568 # Add num_ctx parameter for Ollama context window size 

569 ollama_params = {**common_params} 

570 if context_window_size is not None: 570 ↛ 587line 570 didn't jump to line 587 because the condition on line 570 was always true

571 ollama_params["num_ctx"] = context_window_size 

572 

573 # Thinking/reasoning handling for models like deepseek-r1: 

574 # The 'reasoning' parameter controls both: 

575 # 1. Whether the model performs thinking (makes it smarter when True) 

576 # 2. Whether thinking is separated from the answer (always separated when True) 

577 # 

578 # When reasoning=True: 

579 # - Model performs thinking/reasoning 

580 # - Thinking goes to additional_kwargs["reasoning_content"] (discarded by LDR) 

581 # - Only the final answer appears in response.content 

582 # 

583 # When reasoning=False: 

584 # - Model does NOT perform thinking (faster but less smart) 

585 # - Gives direct answers 

586 

587 enable_thinking = get_setting_from_snapshot( 

588 "llm.ollama.enable_thinking", 

589 True, # Default: enable thinking (smarter responses) 

590 settings_snapshot=settings_snapshot, 

591 ) 

592 

593 if enable_thinking is not None and isinstance( 593 ↛ 602line 593 didn't jump to line 602 because the condition on line 593 was always true

594 enable_thinking, bool 

595 ): 

596 ollama_params["reasoning"] = enable_thinking 

597 logger.debug( 

598 f"Ollama thinking enabled: {enable_thinking} " 

599 f"(thinking will be {'shown internally but discarded' if enable_thinking else 'disabled'})" 

600 ) 

601 

602 llm = ChatOllama( # type: ignore[assignment] 

603 model=model_name, base_url=base_url, **ollama_params 

604 ) 

605 

606 # Log the actual client configuration after creation 

607 logger.debug( 

608 f"ChatOllama created - base_url attribute: {getattr(llm, 'base_url', 'not found')}" 

609 ) 

610 if hasattr(llm, "_client"): 610 ↛ 623line 610 didn't jump to line 623 because the condition on line 610 was always true

611 client = llm._client 

612 logger.debug(f"ChatOllama _client type: {type(client)}") 

613 if hasattr(client, "_client"): 613 ↛ 623line 613 didn't jump to line 623 because the condition on line 613 was always true

614 inner_client = client._client 

615 logger.debug( 

616 f"ChatOllama inner client type: {type(inner_client)}" 

617 ) 

618 if hasattr(inner_client, "base_url"): 618 ↛ 623line 618 didn't jump to line 623 because the condition on line 618 was always true

619 logger.debug( 

620 f"ChatOllama inner client base_url: {inner_client.base_url}" 

621 ) 

622 

623 return wrap_llm_without_think_tags( 

624 llm, 

625 research_id=research_id, 

626 provider=provider, 

627 research_context=research_context, 

628 settings_snapshot=settings_snapshot, 

629 ) 

630 except Exception: 

631 logger.exception("Error creating or testing ChatOllama") 

632 raise 

633 except Exception: 

634 logger.exception("Error in Ollama provider section") 

635 raise 

636 

637 elif provider == "lmstudio": 

638 # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly 

639 lmstudio_url = get_setting_from_snapshot( 

640 "llm.lmstudio.url", 

641 DEFAULT_LMSTUDIO_URL, 

642 settings_snapshot=settings_snapshot, 

643 ) 

644 # Use URL as-is (default already includes /v1) 

645 base_url = normalize_url(lmstudio_url) 

646 # Optional API key for LM Studio with auth enabled. Empty/whitespace 

647 # falls back to a placeholder ChatOpenAI accepts; a no-auth LM Studio 

648 # ignores it. Whitespace stripping mirrors LMStudioProvider.create_llm. 

649 lmstudio_auth_raw = get_setting_from_snapshot( # gitleaks:allow 

650 "llm.lmstudio.api_key", "", settings_snapshot=settings_snapshot 

651 ) 

652 lmstudio_auth = ( 

653 str(lmstudio_auth_raw or "").strip() 

654 or "lm-studio" # gitleaks:allow 

655 ) 

656 

657 llm = ChatOpenAI( # type: ignore[assignment, call-arg, arg-type] 

658 model=model_name, 

659 api_key=lmstudio_auth, # gitleaks:allow 

660 base_url=base_url, 

661 temperature=temperature, 

662 max_tokens=max_tokens, # Use calculated max_tokens based on context size 

663 ) 

664 return wrap_llm_without_think_tags( 

665 llm, 

666 research_id=research_id, 

667 provider=provider, 

668 research_context=research_context, 

669 settings_snapshot=settings_snapshot, 

670 ) 

671 

672 elif provider == "llamacpp": 

673 # llama.cpp's `llama-server` exposes an OpenAI-compatible API, so we 

674 # use ChatOpenAI directly (same pattern as lmstudio above). 

675 llamacpp_url = get_setting_from_snapshot( 

676 "llm.llamacpp.url", 

677 DEFAULT_LLAMACPP_URL, 

678 settings_snapshot=settings_snapshot, 

679 ) 

680 base_url = normalize_url(llamacpp_url) 

681 # Optional API key for setups behind an auth proxy. Empty falls back 

682 # to a placeholder ChatOpenAI accepts; a no-auth llama-server ignores it. 

683 llamacpp_auth = ( # gitleaks:allow 

684 get_setting_from_snapshot( 

685 "llm.llamacpp.api_key", "", settings_snapshot=settings_snapshot 

686 ) 

687 or "lm-studio" # gitleaks:allow 

688 ) 

689 

690 llm = ChatOpenAI( # type: ignore[assignment, call-arg, arg-type] 

691 model=model_name, 

692 api_key=llamacpp_auth, # gitleaks:allow 

693 base_url=base_url, 

694 temperature=temperature, 

695 max_tokens=max_tokens, 

696 ) 

697 return wrap_llm_without_think_tags( 

698 llm, 

699 research_id=research_id, 

700 provider=provider, 

701 research_context=research_context, 

702 settings_snapshot=settings_snapshot, 

703 ) 

704 

705 elif provider == "none": 705 ↛ 714line 705 didn't jump to line 714 because the condition on line 705 was always true

706 raise ValueError( 

707 "No LLM provider configured. Please set llm.provider in settings " 

708 "to a valid provider (e.g., 'ollama', 'openai', 'anthropic')." 

709 ) 

710 

711 else: 

712 # Provider validated above but not handled - this shouldn't happen 

713 # since VALID_PROVIDERS check above would catch unknown providers 

714 raise ValueError( 

715 f"Provider '{provider}' is valid but not implemented. " 

716 f"This is a bug - please report it." 

717 ) 

718 

719 

720def wrap_llm_without_think_tags( 

721 llm, 

722 research_id=None, 

723 provider=None, 

724 research_context=None, 

725 settings_snapshot=None, 

726): 

727 """Create a wrapper class that processes LLM outputs with remove_think_tags and token counting""" 

728 

729 # First apply rate limiting if enabled 

730 from ..web_search_engines.rate_limiting.llm import ( 

731 create_rate_limited_llm_wrapper, 

732 ) 

733 

734 # Check if LLM rate limiting is enabled (independent of search rate limiting) 

735 # Use the thread-safe get_db_setting defined in this module 

736 if get_setting_from_snapshot( 

737 "rate_limiting.llm_enabled", False, settings_snapshot=settings_snapshot 

738 ): 

739 llm = create_rate_limited_llm_wrapper(llm, provider) 

740 

741 # Set context_limit in research_context for overflow detection. 

742 # This is needed for providers that go through the registered provider path 

743 # (which returns before the code in get_llm that sets context_limit). 

744 if research_context is not None and provider is not None: 

745 if "context_limit" not in research_context: 

746 context_limit = _get_context_window_for_provider( 

747 provider, settings_snapshot 

748 ) 

749 if context_limit is not None: 

750 research_context["context_limit"] = context_limit 

751 logger.info( 

752 f"Set context_limit={context_limit} in wrap_llm for provider={provider}" 

753 ) 

754 

755 # Import token counting functionality if research_id is provided 

756 callbacks = [] 

757 if research_id is not None: 

758 from ..metrics import TokenCounter 

759 

760 token_counter = TokenCounter() 

761 token_callback = token_counter.create_callback( 

762 research_id, research_context 

763 ) 

764 # Set provider and model info on the callback 

765 if provider: 

766 token_callback.preset_provider = provider 

767 # Try to extract model name from the LLM instance 

768 if hasattr(llm, "model_name"): 

769 token_callback.preset_model = llm.model_name 

770 elif hasattr(llm, "model"): 

771 token_callback.preset_model = llm.model 

772 callbacks.append(token_callback) 

773 

774 # Add callbacks to the LLM if it supports them 

775 if callbacks and hasattr(llm, "callbacks"): 

776 if llm.callbacks is None: 

777 llm.callbacks = callbacks 

778 else: 

779 llm.callbacks.extend(callbacks) 

780 

781 class ProcessingLLMWrapper: 

782 def __init__(self, base_llm): 

783 self.base_llm = base_llm 

784 

785 @staticmethod 

786 def _normalize_response(response: Any) -> Any: 

787 """Strip <think> tags and normalize the response shape. 

788 

789 A message keeps its object identity (only ``.content`` is rewritten, 

790 so ``additional_kwargs``/``reasoning_content``/``tool_calls`` survive). 

791 A bare-string return (some providers/wrappers) is wrapped into an 

792 ``AIMessage`` so callers can always rely on ``.content``. Anything 

793 else is passed through unchanged. 

794 """ 

795 if hasattr(response, "content"): 

796 response.content = remove_think_tags(response.content) 

797 elif isinstance(response, str): 

798 response = AIMessage(content=remove_think_tags(response)) 

799 return response 

800 

801 @staticmethod 

802 def _log_llm_error(error: Exception) -> None: 

803 """Log an LLM call failure, surfacing any URL embedded in the error.""" 

804 logger.exception("LLM Request - Failed with error") 

805 error_str = str(error) 

806 if "http://" in error_str or "https://" in error_str: 806 ↛ 807line 806 didn't jump to line 807 because the condition on line 806 was never true

807 logger.exception( 

808 f"LLM Request - Error contains URL info: {error_str}" 

809 ) 

810 

811 def invoke(self, *args: Any, **kwargs: Any) -> Any: 

812 try: 

813 response = self.base_llm.invoke(*args, **kwargs) 

814 except Exception as e: 

815 self._log_llm_error(e) 

816 raise 

817 return self._normalize_response(response) 

818 

819 async def ainvoke(self, *args: Any, **kwargs: Any) -> Any: 

820 # Async counterpart of invoke(); without this, ainvoke() would fall 

821 # through __getattr__ to the base LLM and bypass think-tag stripping. 

822 try: 

823 response = await self.base_llm.ainvoke(*args, **kwargs) 

824 except Exception as e: 

825 self._log_llm_error(e) 

826 raise 

827 return self._normalize_response(response) 

828 

829 # Pass through any other attributes to the base LLM 

830 def __getattr__(self, name): 

831 return getattr(self.base_llm, name) 

832 

833 def close(self): 

834 """Close underlying HTTP clients held by this LLM. Idempotent.""" 

835 try: 

836 from ..utilities.llm_utils import _close_base_llm 

837 

838 _close_base_llm(self.base_llm) 

839 except Exception: 

840 logger.debug( 

841 "best-effort cleanup of HTTP clients on shutdown", 

842 exc_info=True, 

843 ) 

844 

845 return ProcessingLLMWrapper(llm)