Coverage for src/local_deep_research/config/llm_config.py: 95%
334 statements
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
« prev ^ index » next coverage.py v7.14.1, created at 2026-06-03 23:15 +0000
1from functools import cache
2from typing import Any
4from langchain_anthropic import ChatAnthropic
5from langchain_core.language_models import BaseChatModel
6from langchain_core.messages import AIMessage
7from langchain_ollama import ChatOllama
8from langchain_openai import ChatOpenAI
9from loguru import logger
11from ..llm import get_llm_from_registry, is_llm_registered
12from ..utilities.search_utilities import remove_think_tags
13from ..utilities.url_utils import normalize_url
14from .constants import (
15 DEFAULT_LLAMACPP_URL,
16 DEFAULT_LMSTUDIO_URL,
17 DEFAULT_OLLAMA_URL,
18)
20# Import providers module to trigger auto-discovery
21try:
22 from ..llm.providers import discover_providers # noqa: F401
23 # Auto-discovery happens on module import
24except ImportError:
25 logger.debug("Providers module not available yet")
26from ..llm.providers.base import normalize_provider
27from .thread_settings import (
28 get_setting_from_snapshot,
29 NoSettingsContextError,
30)
32# Valid provider options
33VALID_PROVIDERS = [
34 "ollama",
35 "openai",
36 "anthropic",
37 "google",
38 "openrouter",
39 "openai_endpoint",
40 "lmstudio",
41 "llamacpp",
42 "none",
43]
46def is_openai_available(settings_snapshot=None):
47 """Check if OpenAI is available by delegating to the provider class."""
48 try:
49 from ..llm.providers.implementations.openai import OpenAIProvider
51 return OpenAIProvider.is_available(settings_snapshot)
52 except ImportError:
53 return False
54 except Exception:
55 logger.debug("Error checking OpenAI availability", exc_info=True)
56 return False
59def is_anthropic_available(settings_snapshot=None):
60 """Check if Anthropic is available by delegating to the provider class."""
61 try:
62 from ..llm.providers.implementations.anthropic import AnthropicProvider
64 return AnthropicProvider.is_available(settings_snapshot)
65 except ImportError:
66 return False
67 except Exception:
68 logger.debug("Error checking Anthropic availability", exc_info=True)
69 return False
72def is_openai_endpoint_available(settings_snapshot=None):
73 """Check if OpenAI endpoint is available by delegating to the provider class."""
74 try:
75 from ..llm.providers.implementations.custom_openai_endpoint import (
76 CustomOpenAIEndpointProvider,
77 )
79 return CustomOpenAIEndpointProvider.is_available(settings_snapshot)
80 except ImportError:
81 return False
82 except Exception:
83 logger.debug(
84 "Error checking OpenAI endpoint availability", exc_info=True
85 )
86 return False
89def is_ollama_available(settings_snapshot=None):
90 """Check if Ollama is running by delegating to the provider class."""
91 try:
92 from ..llm.providers.implementations.ollama import OllamaProvider
94 return OllamaProvider.is_available(settings_snapshot)
95 except ImportError:
96 return False
97 except Exception:
98 logger.debug("Error checking Ollama availability", exc_info=True)
99 return False
102def is_lmstudio_available(settings_snapshot=None):
103 """Check if LM Studio is available by delegating to the provider class."""
104 try:
105 from ..llm.providers.implementations.lmstudio import LMStudioProvider
107 return LMStudioProvider.is_available(settings_snapshot)
108 except ImportError:
109 return False
110 except Exception:
111 logger.debug("Error checking LM Studio availability", exc_info=True)
112 return False
115def is_llamacpp_available(settings_snapshot=None):
116 """Check if llama.cpp's HTTP server is reachable.
118 Talks to `llama-server`'s OpenAI-compatible endpoint (no in-process
119 `llama-cpp-python` binding required).
120 """
121 try:
122 from ..llm.providers.implementations.llamacpp import LlamaCppProvider
124 return LlamaCppProvider.is_available(settings_snapshot)
125 except ImportError:
126 return False
127 except Exception:
128 logger.debug("Error checking llama.cpp availability", exc_info=True)
129 return False
132def is_google_available(settings_snapshot=None):
133 """Check if Google/Gemini is available"""
134 try:
135 from ..llm.providers.implementations.google import GoogleProvider
137 return GoogleProvider.is_available(settings_snapshot)
138 except ImportError:
139 return False
140 except Exception:
141 logger.debug("Error checking Google availability", exc_info=True)
142 return False
145def is_openrouter_available(settings_snapshot=None):
146 """Check if OpenRouter is available"""
147 try:
148 from ..llm.providers.implementations.openrouter import (
149 OpenRouterProvider,
150 )
152 return OpenRouterProvider.is_available(settings_snapshot)
153 except ImportError:
154 return False
155 except Exception:
156 logger.debug("Error checking OpenRouter availability", exc_info=True)
157 return False
160@cache
161def get_available_providers(settings_snapshot=None):
162 """Return available model providers"""
163 providers = {}
165 if is_ollama_available(settings_snapshot):
166 providers["ollama"] = "Ollama (local models)"
168 if is_openai_available(settings_snapshot):
169 providers["openai"] = "OpenAI API"
171 if is_anthropic_available(settings_snapshot):
172 providers["anthropic"] = "Anthropic API"
174 if is_google_available(settings_snapshot):
175 providers["google"] = "Google Gemini API"
177 if is_openrouter_available(settings_snapshot):
178 providers["openrouter"] = "OpenRouter API"
180 if is_openai_endpoint_available(settings_snapshot):
181 providers["openai_endpoint"] = "OpenAI-Compatible Endpoint"
183 if is_lmstudio_available(settings_snapshot):
184 providers["lmstudio"] = "LM Studio (local models)"
186 if is_llamacpp_available(settings_snapshot):
187 providers["llamacpp"] = "LlamaCpp (local models)"
189 # Default fallback
190 if not providers:
191 providers["none"] = "No model providers available"
193 return providers
196def get_selected_llm_provider(settings_snapshot=None):
197 return normalize_provider(
198 get_setting_from_snapshot(
199 "llm.provider", "ollama", settings_snapshot=settings_snapshot
200 )
201 )
204def _get_context_window_for_provider(provider_type, settings_snapshot=None):
205 """Get context window size from settings based on provider type.
207 Local providers (ollama, llamacpp, lmstudio) use a smaller default to prevent
208 memory issues. Cloud providers check if unrestricted mode is enabled.
210 Returns:
211 int or None: The context window size, or None for unrestricted cloud providers.
212 """
213 if provider_type in ["ollama", "llamacpp", "lmstudio"]:
214 # Local providers: use smaller default to prevent memory issues
215 window_size = get_setting_from_snapshot(
216 "llm.local_context_window_size",
217 8192,
218 settings_snapshot=settings_snapshot,
219 )
220 # Ensure it's an integer
221 return int(window_size) if window_size is not None else 8192
222 # Cloud providers: check if unrestricted mode is enabled
223 use_unrestricted = get_setting_from_snapshot(
224 "llm.context_window_unrestricted",
225 True,
226 settings_snapshot=settings_snapshot,
227 )
228 if use_unrestricted:
229 # Let cloud providers auto-handle context (return None or very large value)
230 return None # Will be handled per provider
231 # Use user-specified limit
232 window_size = get_setting_from_snapshot(
233 "llm.context_window_size",
234 128000,
235 settings_snapshot=settings_snapshot,
236 )
237 return int(window_size) if window_size is not None else 128000
240def get_llm(
241 model_name=None,
242 temperature=None,
243 provider=None,
244 openai_endpoint_url=None,
245 research_id=None,
246 research_context=None,
247 settings_snapshot=None,
248):
249 """
250 Get LLM instance based on model name and provider.
252 Args:
253 model_name: Name of the model to use (if None, uses database setting)
254 temperature: Model temperature (if None, uses database setting)
255 provider: Provider to use (if None, uses database setting)
256 openai_endpoint_url: Custom endpoint URL to use (if None, uses database
257 setting)
258 research_id: Optional research ID for token tracking
259 research_context: Optional research context for enhanced token tracking
261 Returns:
262 A LangChain LLM instance with automatic think-tag removal
263 """
265 # Use database values for parameters if not provided
266 if model_name is None:
267 model_name = get_setting_from_snapshot(
268 "llm.model", "", settings_snapshot=settings_snapshot
269 )
270 if temperature is None:
271 temperature = get_setting_from_snapshot(
272 "llm.temperature", 0.7, settings_snapshot=settings_snapshot
273 )
274 if provider is None:
275 provider = get_setting_from_snapshot(
276 "llm.provider", "ollama", settings_snapshot=settings_snapshot
277 )
279 # Clean model name: remove quotes and extra whitespace
280 if model_name:
281 model_name = model_name.strip().strip("\"'").strip()
283 # Clean provider: remove quotes and extra whitespace
284 if provider: 284 ↛ 288line 284 didn't jump to line 288 because the condition on line 284 was always true
285 provider = provider.strip().strip("\"'").strip()
287 # Normalize provider: convert to lowercase canonical form
288 provider = normalize_provider(provider)
290 # Check if this is a registered custom LLM first
291 if provider and is_llm_registered(provider):
292 logger.info(f"Using registered custom LLM: {provider}")
293 custom_llm = get_llm_from_registry(provider)
295 # Check if it's a callable (factory function) or a BaseChatModel instance
296 if callable(custom_llm) and not isinstance(custom_llm, BaseChatModel):
297 # It's a callable (factory function), call it with parameters
298 try:
299 llm_instance = custom_llm(
300 model_name=model_name,
301 temperature=temperature,
302 settings_snapshot=settings_snapshot,
303 )
304 except TypeError as e:
305 # Re-raise TypeError with better message
306 raise TypeError(
307 f"Registered LLM factory '{provider}' has invalid signature. "
308 f"Factory functions must accept 'model_name', 'temperature', and 'settings_snapshot' parameters. "
309 f"Error: {e}"
310 )
312 # Validate the result is a BaseChatModel
313 if not isinstance(llm_instance, BaseChatModel):
314 raise ValueError(
315 f"Factory function for {provider} must return a BaseChatModel instance, "
316 f"got {type(llm_instance).__name__}"
317 )
318 elif isinstance(custom_llm, BaseChatModel):
319 # It's already a proper LLM instance, use it directly
320 llm_instance = custom_llm
321 else:
322 raise ValueError(
323 f"Registered LLM {provider} must be either a BaseChatModel instance "
324 f"or a callable factory function. Got: {type(custom_llm).__name__}"
325 )
327 return wrap_llm_without_think_tags(
328 llm_instance,
329 research_id=research_id,
330 provider=provider,
331 research_context=research_context,
332 settings_snapshot=settings_snapshot,
333 )
335 # Validate provider
336 if provider not in VALID_PROVIDERS:
337 logger.error(f"Invalid provider in settings: {provider}")
338 raise ValueError(
339 f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}"
340 )
342 # Require an explicit model for built-in providers. Mirrors the
343 # API-key-not-configured pattern in openai_base.py and the URL-not-
344 # configured pattern in providers/implementations/ollama.py: no silent
345 # substitution to a hardcoded default model.
346 if not model_name or not model_name.strip():
347 logger.error("llm.model is not configured (empty/None after lookup)")
348 raise ValueError(
349 "LLM model not configured. Please open Settings, choose an LLM "
350 "provider, and select a model name (e.g. 'gpt-4o-mini' for "
351 "OpenAI, 'claude-3-5-sonnet-20241022' for Anthropic, "
352 "'llama3.1:8b' for Ollama). The 'llm.model' setting is required."
353 )
354 logger.info(
355 f"Getting LLM with model: {model_name}, temperature: {temperature}, provider: {provider}"
356 )
358 # Common parameters for all models
359 common_params = {
360 "temperature": temperature,
361 }
363 context_window_size = _get_context_window_for_provider(
364 provider, settings_snapshot
365 )
367 # Add context limit to research context for overflow detection
368 if research_context and context_window_size:
369 research_context["context_limit"] = context_window_size
370 logger.info(
371 f"Set context_limit={context_window_size} in research_context"
372 )
373 else:
374 logger.debug(
375 f"Context limit not set: research_context={bool(research_context)}, context_window_size={context_window_size}"
376 )
378 max_tokens = None
379 if get_setting_from_snapshot(
380 "llm.supports_max_tokens", True, settings_snapshot=settings_snapshot
381 ):
382 # Use 80% of context window to leave room for prompts
383 if context_window_size is not None:
384 max_tokens = min(
385 int(
386 get_setting_from_snapshot(
387 "llm.max_tokens",
388 100000,
389 settings_snapshot=settings_snapshot,
390 )
391 ),
392 int(context_window_size * 0.8),
393 )
394 common_params["max_tokens"] = max_tokens
395 else:
396 # Unrestricted context: use provider's default max_tokens
397 max_tokens = int(
398 get_setting_from_snapshot(
399 "llm.max_tokens",
400 100000,
401 settings_snapshot=settings_snapshot,
402 )
403 )
404 common_params["max_tokens"] = max_tokens
406 # Handle different providers
407 if provider == "anthropic":
408 api_key = get_setting_from_snapshot(
409 "llm.anthropic.api_key", settings_snapshot=settings_snapshot
410 )
412 if not api_key:
413 raise ValueError(
414 "Anthropic API key not configured. Please set llm.anthropic.api_key in settings."
415 )
417 llm: BaseChatModel = ChatAnthropic(
418 model=model_name,
419 anthropic_api_key=api_key,
420 **common_params, # type: ignore[call-arg]
421 )
422 return wrap_llm_without_think_tags(
423 llm,
424 research_id=research_id,
425 provider=provider,
426 research_context=research_context,
427 settings_snapshot=settings_snapshot,
428 )
430 if provider == "openai":
431 api_key = get_setting_from_snapshot(
432 "llm.openai.api_key", settings_snapshot=settings_snapshot
433 )
435 if not api_key:
436 raise ValueError(
437 "OpenAI API key not configured. Please set llm.openai.api_key in settings."
438 )
440 # Build OpenAI-specific parameters
441 openai_params = {
442 "model": model_name,
443 "api_key": api_key,
444 **common_params,
445 }
447 # Add optional parameters if they exist in settings
448 try:
449 api_base = get_setting_from_snapshot(
450 "llm.openai.api_base",
451 default=None,
452 settings_snapshot=settings_snapshot,
453 )
454 if api_base:
455 openai_params["openai_api_base"] = api_base
456 except NoSettingsContextError:
457 pass # Optional parameter
459 try:
460 organization = get_setting_from_snapshot(
461 "llm.openai.organization",
462 default=None,
463 settings_snapshot=settings_snapshot,
464 )
465 if organization:
466 openai_params["openai_organization"] = organization
467 except NoSettingsContextError:
468 pass # Optional parameter
470 try:
471 streaming = get_setting_from_snapshot(
472 "llm.streaming",
473 default=None,
474 settings_snapshot=settings_snapshot,
475 )
476 except NoSettingsContextError:
477 streaming = None # Optional parameter
478 if streaming is not None: 478 ↛ 479line 478 didn't jump to line 479 because the condition on line 478 was never true
479 openai_params["streaming"] = streaming
481 try:
482 max_retries = get_setting_from_snapshot(
483 "llm.max_retries",
484 default=None,
485 settings_snapshot=settings_snapshot,
486 )
487 if max_retries is not None: 487 ↛ 488line 487 didn't jump to line 488 because the condition on line 487 was never true
488 openai_params["max_retries"] = max_retries
489 except NoSettingsContextError:
490 pass # Optional parameter
492 try:
493 request_timeout = get_setting_from_snapshot(
494 "llm.request_timeout",
495 default=None,
496 settings_snapshot=settings_snapshot,
497 )
498 if request_timeout is not None: 498 ↛ 499line 498 didn't jump to line 499 because the condition on line 498 was never true
499 openai_params["request_timeout"] = request_timeout
500 except NoSettingsContextError:
501 pass # Optional parameter
503 llm = ChatOpenAI(**openai_params) # type: ignore[assignment]
504 return wrap_llm_without_think_tags(
505 llm,
506 research_id=research_id,
507 provider=provider,
508 research_context=research_context,
509 settings_snapshot=settings_snapshot,
510 )
512 if provider == "openai_endpoint":
513 api_key = get_setting_from_snapshot(
514 "llm.openai_endpoint.api_key", settings_snapshot=settings_snapshot
515 )
517 # Local servers (e.g. llama.cpp) don't require an API key.
518 # Use a placeholder so ChatOpenAI doesn't reject the request.
519 if not api_key:
520 logger.info(
521 "No API key configured for openai_endpoint provider. "
522 "Using placeholder key. If you are connecting to a hosted "
523 "service, set llm.openai_endpoint.api_key in settings."
524 )
525 api_key = "not-needed" # noqa: S105 # gitleaks:allow
527 # Get endpoint URL from settings
528 if openai_endpoint_url is None: 528 ↛ 534line 528 didn't jump to line 534 because the condition on line 528 was always true
529 openai_endpoint_url = get_setting_from_snapshot(
530 "llm.openai_endpoint.url",
531 "https://openrouter.ai/api/v1",
532 settings_snapshot=settings_snapshot,
533 )
534 openai_endpoint_url = normalize_url(openai_endpoint_url)
536 llm = ChatOpenAI( # type: ignore[assignment, call-arg]
537 model=model_name,
538 api_key=api_key,
539 openai_api_base=openai_endpoint_url,
540 **common_params,
541 )
542 return wrap_llm_without_think_tags(
543 llm,
544 research_id=research_id,
545 provider=provider,
546 research_context=research_context,
547 settings_snapshot=settings_snapshot,
548 )
550 if provider == "ollama":
551 try:
552 # Use the configurable Ollama base URL
553 raw_base_url = get_setting_from_snapshot(
554 "llm.ollama.url",
555 DEFAULT_OLLAMA_URL,
556 settings_snapshot=settings_snapshot,
557 )
558 base_url = (
559 normalize_url(raw_base_url)
560 if raw_base_url
561 else DEFAULT_OLLAMA_URL
562 )
564 logger.info(
565 f"Creating ChatOllama with model={model_name}, base_url={base_url}"
566 )
567 try:
568 # Add num_ctx parameter for Ollama context window size
569 ollama_params = {**common_params}
570 if context_window_size is not None: 570 ↛ 587line 570 didn't jump to line 587 because the condition on line 570 was always true
571 ollama_params["num_ctx"] = context_window_size
573 # Thinking/reasoning handling for models like deepseek-r1:
574 # The 'reasoning' parameter controls both:
575 # 1. Whether the model performs thinking (makes it smarter when True)
576 # 2. Whether thinking is separated from the answer (always separated when True)
577 #
578 # When reasoning=True:
579 # - Model performs thinking/reasoning
580 # - Thinking goes to additional_kwargs["reasoning_content"] (discarded by LDR)
581 # - Only the final answer appears in response.content
582 #
583 # When reasoning=False:
584 # - Model does NOT perform thinking (faster but less smart)
585 # - Gives direct answers
587 enable_thinking = get_setting_from_snapshot(
588 "llm.ollama.enable_thinking",
589 True, # Default: enable thinking (smarter responses)
590 settings_snapshot=settings_snapshot,
591 )
593 if enable_thinking is not None and isinstance( 593 ↛ 602line 593 didn't jump to line 602 because the condition on line 593 was always true
594 enable_thinking, bool
595 ):
596 ollama_params["reasoning"] = enable_thinking
597 logger.debug(
598 f"Ollama thinking enabled: {enable_thinking} "
599 f"(thinking will be {'shown internally but discarded' if enable_thinking else 'disabled'})"
600 )
602 llm = ChatOllama( # type: ignore[assignment]
603 model=model_name, base_url=base_url, **ollama_params
604 )
606 # Log the actual client configuration after creation
607 logger.debug(
608 f"ChatOllama created - base_url attribute: {getattr(llm, 'base_url', 'not found')}"
609 )
610 if hasattr(llm, "_client"): 610 ↛ 623line 610 didn't jump to line 623 because the condition on line 610 was always true
611 client = llm._client
612 logger.debug(f"ChatOllama _client type: {type(client)}")
613 if hasattr(client, "_client"): 613 ↛ 623line 613 didn't jump to line 623 because the condition on line 613 was always true
614 inner_client = client._client
615 logger.debug(
616 f"ChatOllama inner client type: {type(inner_client)}"
617 )
618 if hasattr(inner_client, "base_url"): 618 ↛ 623line 618 didn't jump to line 623 because the condition on line 618 was always true
619 logger.debug(
620 f"ChatOllama inner client base_url: {inner_client.base_url}"
621 )
623 return wrap_llm_without_think_tags(
624 llm,
625 research_id=research_id,
626 provider=provider,
627 research_context=research_context,
628 settings_snapshot=settings_snapshot,
629 )
630 except Exception:
631 logger.exception("Error creating or testing ChatOllama")
632 raise
633 except Exception:
634 logger.exception("Error in Ollama provider section")
635 raise
637 elif provider == "lmstudio":
638 # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly
639 lmstudio_url = get_setting_from_snapshot(
640 "llm.lmstudio.url",
641 DEFAULT_LMSTUDIO_URL,
642 settings_snapshot=settings_snapshot,
643 )
644 # Use URL as-is (default already includes /v1)
645 base_url = normalize_url(lmstudio_url)
646 # Optional API key for LM Studio with auth enabled. Empty/whitespace
647 # falls back to a placeholder ChatOpenAI accepts; a no-auth LM Studio
648 # ignores it. Whitespace stripping mirrors LMStudioProvider.create_llm.
649 lmstudio_auth_raw = get_setting_from_snapshot( # gitleaks:allow
650 "llm.lmstudio.api_key", "", settings_snapshot=settings_snapshot
651 )
652 lmstudio_auth = (
653 str(lmstudio_auth_raw or "").strip()
654 or "lm-studio" # gitleaks:allow
655 )
657 llm = ChatOpenAI( # type: ignore[assignment, call-arg, arg-type]
658 model=model_name,
659 api_key=lmstudio_auth, # gitleaks:allow
660 base_url=base_url,
661 temperature=temperature,
662 max_tokens=max_tokens, # Use calculated max_tokens based on context size
663 )
664 return wrap_llm_without_think_tags(
665 llm,
666 research_id=research_id,
667 provider=provider,
668 research_context=research_context,
669 settings_snapshot=settings_snapshot,
670 )
672 elif provider == "llamacpp":
673 # llama.cpp's `llama-server` exposes an OpenAI-compatible API, so we
674 # use ChatOpenAI directly (same pattern as lmstudio above).
675 llamacpp_url = get_setting_from_snapshot(
676 "llm.llamacpp.url",
677 DEFAULT_LLAMACPP_URL,
678 settings_snapshot=settings_snapshot,
679 )
680 base_url = normalize_url(llamacpp_url)
681 # Optional API key for setups behind an auth proxy. Empty falls back
682 # to a placeholder ChatOpenAI accepts; a no-auth llama-server ignores it.
683 llamacpp_auth = ( # gitleaks:allow
684 get_setting_from_snapshot(
685 "llm.llamacpp.api_key", "", settings_snapshot=settings_snapshot
686 )
687 or "lm-studio" # gitleaks:allow
688 )
690 llm = ChatOpenAI( # type: ignore[assignment, call-arg, arg-type]
691 model=model_name,
692 api_key=llamacpp_auth, # gitleaks:allow
693 base_url=base_url,
694 temperature=temperature,
695 max_tokens=max_tokens,
696 )
697 return wrap_llm_without_think_tags(
698 llm,
699 research_id=research_id,
700 provider=provider,
701 research_context=research_context,
702 settings_snapshot=settings_snapshot,
703 )
705 elif provider == "none": 705 ↛ 714line 705 didn't jump to line 714 because the condition on line 705 was always true
706 raise ValueError(
707 "No LLM provider configured. Please set llm.provider in settings "
708 "to a valid provider (e.g., 'ollama', 'openai', 'anthropic')."
709 )
711 else:
712 # Provider validated above but not handled - this shouldn't happen
713 # since VALID_PROVIDERS check above would catch unknown providers
714 raise ValueError(
715 f"Provider '{provider}' is valid but not implemented. "
716 f"This is a bug - please report it."
717 )
720def wrap_llm_without_think_tags(
721 llm,
722 research_id=None,
723 provider=None,
724 research_context=None,
725 settings_snapshot=None,
726):
727 """Create a wrapper class that processes LLM outputs with remove_think_tags and token counting"""
729 # First apply rate limiting if enabled
730 from ..web_search_engines.rate_limiting.llm import (
731 create_rate_limited_llm_wrapper,
732 )
734 # Check if LLM rate limiting is enabled (independent of search rate limiting)
735 # Use the thread-safe get_db_setting defined in this module
736 if get_setting_from_snapshot(
737 "rate_limiting.llm_enabled", False, settings_snapshot=settings_snapshot
738 ):
739 llm = create_rate_limited_llm_wrapper(llm, provider)
741 # Set context_limit in research_context for overflow detection.
742 # This is needed for providers that go through the registered provider path
743 # (which returns before the code in get_llm that sets context_limit).
744 if research_context is not None and provider is not None:
745 if "context_limit" not in research_context:
746 context_limit = _get_context_window_for_provider(
747 provider, settings_snapshot
748 )
749 if context_limit is not None:
750 research_context["context_limit"] = context_limit
751 logger.info(
752 f"Set context_limit={context_limit} in wrap_llm for provider={provider}"
753 )
755 # Import token counting functionality if research_id is provided
756 callbacks = []
757 if research_id is not None:
758 from ..metrics import TokenCounter
760 token_counter = TokenCounter()
761 token_callback = token_counter.create_callback(
762 research_id, research_context
763 )
764 # Set provider and model info on the callback
765 if provider:
766 token_callback.preset_provider = provider
767 # Try to extract model name from the LLM instance
768 if hasattr(llm, "model_name"):
769 token_callback.preset_model = llm.model_name
770 elif hasattr(llm, "model"):
771 token_callback.preset_model = llm.model
772 callbacks.append(token_callback)
774 # Add callbacks to the LLM if it supports them
775 if callbacks and hasattr(llm, "callbacks"):
776 if llm.callbacks is None:
777 llm.callbacks = callbacks
778 else:
779 llm.callbacks.extend(callbacks)
781 class ProcessingLLMWrapper:
782 def __init__(self, base_llm):
783 self.base_llm = base_llm
785 @staticmethod
786 def _normalize_response(response: Any) -> Any:
787 """Strip <think> tags and normalize the response shape.
789 A message keeps its object identity (only ``.content`` is rewritten,
790 so ``additional_kwargs``/``reasoning_content``/``tool_calls`` survive).
791 A bare-string return (some providers/wrappers) is wrapped into an
792 ``AIMessage`` so callers can always rely on ``.content``. Anything
793 else is passed through unchanged.
794 """
795 if hasattr(response, "content"):
796 response.content = remove_think_tags(response.content)
797 elif isinstance(response, str):
798 response = AIMessage(content=remove_think_tags(response))
799 return response
801 @staticmethod
802 def _log_llm_error(error: Exception) -> None:
803 """Log an LLM call failure, surfacing any URL embedded in the error."""
804 logger.exception("LLM Request - Failed with error")
805 error_str = str(error)
806 if "http://" in error_str or "https://" in error_str: 806 ↛ 807line 806 didn't jump to line 807 because the condition on line 806 was never true
807 logger.exception(
808 f"LLM Request - Error contains URL info: {error_str}"
809 )
811 def invoke(self, *args: Any, **kwargs: Any) -> Any:
812 try:
813 response = self.base_llm.invoke(*args, **kwargs)
814 except Exception as e:
815 self._log_llm_error(e)
816 raise
817 return self._normalize_response(response)
819 async def ainvoke(self, *args: Any, **kwargs: Any) -> Any:
820 # Async counterpart of invoke(); without this, ainvoke() would fall
821 # through __getattr__ to the base LLM and bypass think-tag stripping.
822 try:
823 response = await self.base_llm.ainvoke(*args, **kwargs)
824 except Exception as e:
825 self._log_llm_error(e)
826 raise
827 return self._normalize_response(response)
829 # Pass through any other attributes to the base LLM
830 def __getattr__(self, name):
831 return getattr(self.base_llm, name)
833 def close(self):
834 """Close underlying HTTP clients held by this LLM. Idempotent."""
835 try:
836 from ..utilities.llm_utils import _close_base_llm
838 _close_base_llm(self.base_llm)
839 except Exception:
840 logger.debug(
841 "best-effort cleanup of HTTP clients on shutdown",
842 exc_info=True,
843 )
845 return ProcessingLLMWrapper(llm)