Coverage for src / local_deep_research / config / llm_config.py: 94%
351 statements
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
« prev ^ index » next coverage.py v7.13.5, created at 2026-04-14 23:55 +0000
1from functools import cache
3from langchain_anthropic import ChatAnthropic
4from langchain_core.language_models import BaseChatModel
5from langchain_ollama import ChatOllama
6from langchain_openai import ChatOpenAI
7from loguru import logger
9from ..llm import get_llm_from_registry, is_llm_registered
10from ..utilities.search_utilities import remove_think_tags
11from ..utilities.url_utils import normalize_url
12from .constants import DEFAULT_OLLAMA_URL, DEFAULT_LMSTUDIO_URL
14# Import providers module to trigger auto-discovery
15try:
16 from ..llm.providers import discover_providers # noqa: F401
17 # Auto-discovery happens on module import
18except ImportError:
19 logger.debug("Providers module not available yet")
20from ..llm.providers.base import normalize_provider
21from .thread_settings import (
22 get_setting_from_snapshot,
23 NoSettingsContextError,
24)
26# Valid provider options
27VALID_PROVIDERS = [
28 "ollama",
29 "openai",
30 "anthropic",
31 "google",
32 "openrouter",
33 "openai_endpoint",
34 "lmstudio",
35 "llamacpp",
36 "none",
37]
40def is_openai_available(settings_snapshot=None):
41 """Check if OpenAI is available by delegating to the provider class."""
42 try:
43 from ..llm.providers.implementations.openai import OpenAIProvider
45 return OpenAIProvider.is_available(settings_snapshot)
46 except ImportError:
47 return False
48 except Exception:
49 logger.debug("Error checking OpenAI availability", exc_info=True)
50 return False
53def is_anthropic_available(settings_snapshot=None):
54 """Check if Anthropic is available by delegating to the provider class."""
55 try:
56 from ..llm.providers.implementations.anthropic import AnthropicProvider
58 return AnthropicProvider.is_available(settings_snapshot)
59 except ImportError:
60 return False
61 except Exception:
62 logger.debug("Error checking Anthropic availability", exc_info=True)
63 return False
66def is_openai_endpoint_available(settings_snapshot=None):
67 """Check if OpenAI endpoint is available by delegating to the provider class."""
68 try:
69 from ..llm.providers.implementations.custom_openai_endpoint import (
70 CustomOpenAIEndpointProvider,
71 )
73 return CustomOpenAIEndpointProvider.is_available(settings_snapshot)
74 except ImportError:
75 return False
76 except Exception:
77 logger.debug(
78 "Error checking OpenAI endpoint availability", exc_info=True
79 )
80 return False
83def is_ollama_available(settings_snapshot=None):
84 """Check if Ollama is running by delegating to the provider class."""
85 try:
86 from ..llm.providers.implementations.ollama import OllamaProvider
88 return OllamaProvider.is_available(settings_snapshot)
89 except ImportError:
90 return False
91 except Exception:
92 logger.debug("Error checking Ollama availability", exc_info=True)
93 return False
96def is_lmstudio_available(settings_snapshot=None):
97 """Check if LM Studio is available by delegating to the provider class."""
98 try:
99 from ..llm.providers.implementations.lmstudio import LMStudioProvider
101 return LMStudioProvider.is_available(settings_snapshot)
102 except ImportError:
103 return False
104 except Exception:
105 logger.debug("Error checking LM Studio availability", exc_info=True)
106 return False
109def is_llamacpp_available(settings_snapshot=None):
110 """Check if LlamaCpp is available and properly configured.
112 Checks that the library is installed and a model path is configured.
113 For llama.cpp server connections, use 'openai_endpoint' provider instead.
114 """
115 try:
116 # Import check
117 from langchain_community.llms import LlamaCpp # noqa: F401
119 # Check if model path is configured and looks valid
120 # Note: For llama.cpp server connections, use 'openai_endpoint' provider instead
121 model_path_str = get_setting_from_snapshot(
122 "llm.llamacpp_model_path",
123 default=None,
124 settings_snapshot=settings_snapshot,
125 )
127 # If no path configured, LlamaCpp is not available
128 if not model_path_str:
129 return False
131 # Path is configured, actual validation happens when model loads
132 return True
134 except ImportError:
135 # LlamaCpp library not installed
136 return False
138 except Exception:
139 logger.debug("Error checking LlamaCpp availability", exc_info=True)
140 return False
143def is_google_available(settings_snapshot=None):
144 """Check if Google/Gemini is available"""
145 try:
146 from ..llm.providers.implementations.google import GoogleProvider
148 return GoogleProvider.is_available(settings_snapshot)
149 except ImportError:
150 return False
151 except Exception:
152 logger.debug("Error checking Google availability", exc_info=True)
153 return False
156def is_openrouter_available(settings_snapshot=None):
157 """Check if OpenRouter is available"""
158 try:
159 from ..llm.providers.implementations.openrouter import (
160 OpenRouterProvider,
161 )
163 return OpenRouterProvider.is_available(settings_snapshot)
164 except ImportError:
165 return False
166 except Exception:
167 logger.debug("Error checking OpenRouter availability", exc_info=True)
168 return False
171@cache
172def get_available_providers(settings_snapshot=None):
173 """Return available model providers"""
174 providers = {}
176 if is_ollama_available(settings_snapshot):
177 providers["ollama"] = "Ollama (local models)"
179 if is_openai_available(settings_snapshot):
180 providers["openai"] = "OpenAI API"
182 if is_anthropic_available(settings_snapshot):
183 providers["anthropic"] = "Anthropic API"
185 if is_google_available(settings_snapshot):
186 providers["google"] = "Google Gemini API"
188 if is_openrouter_available(settings_snapshot):
189 providers["openrouter"] = "OpenRouter API"
191 if is_openai_endpoint_available(settings_snapshot):
192 providers["openai_endpoint"] = "OpenAI-Compatible Endpoint"
194 if is_lmstudio_available(settings_snapshot):
195 providers["lmstudio"] = "LM Studio (local models)"
197 if is_llamacpp_available(settings_snapshot):
198 providers["llamacpp"] = "LlamaCpp (local models)"
200 # Default fallback
201 if not providers:
202 providers["none"] = "No model providers available"
204 return providers
207def get_selected_llm_provider(settings_snapshot=None):
208 return normalize_provider(
209 get_setting_from_snapshot(
210 "llm.provider", "ollama", settings_snapshot=settings_snapshot
211 )
212 )
215def _get_context_window_for_provider(provider_type, settings_snapshot=None):
216 """Get context window size from settings based on provider type.
218 Local providers (ollama, llamacpp, lmstudio) use a smaller default to prevent
219 memory issues. Cloud providers check if unrestricted mode is enabled.
221 Returns:
222 int or None: The context window size, or None for unrestricted cloud providers.
223 """
224 if provider_type in ["ollama", "llamacpp", "lmstudio"]:
225 # Local providers: use smaller default to prevent memory issues
226 window_size = get_setting_from_snapshot(
227 "llm.local_context_window_size",
228 8192,
229 settings_snapshot=settings_snapshot,
230 )
231 # Ensure it's an integer
232 return int(window_size) if window_size is not None else 8192
233 # Cloud providers: check if unrestricted mode is enabled
234 use_unrestricted = get_setting_from_snapshot(
235 "llm.context_window_unrestricted",
236 True,
237 settings_snapshot=settings_snapshot,
238 )
239 if use_unrestricted:
240 # Let cloud providers auto-handle context (return None or very large value)
241 return None # Will be handled per provider
242 # Use user-specified limit
243 window_size = get_setting_from_snapshot(
244 "llm.context_window_size",
245 128000,
246 settings_snapshot=settings_snapshot,
247 )
248 return int(window_size) if window_size is not None else 128000
251def get_llm(
252 model_name=None,
253 temperature=None,
254 provider=None,
255 openai_endpoint_url=None,
256 research_id=None,
257 research_context=None,
258 settings_snapshot=None,
259):
260 """
261 Get LLM instance based on model name and provider.
263 Args:
264 model_name: Name of the model to use (if None, uses database setting)
265 temperature: Model temperature (if None, uses database setting)
266 provider: Provider to use (if None, uses database setting)
267 openai_endpoint_url: Custom endpoint URL to use (if None, uses database
268 setting)
269 research_id: Optional research ID for token tracking
270 research_context: Optional research context for enhanced token tracking
272 Returns:
273 A LangChain LLM instance with automatic think-tag removal
274 """
276 # Use database values for parameters if not provided
277 if model_name is None:
278 model_name = get_setting_from_snapshot(
279 "llm.model", "gemma3:12b", settings_snapshot=settings_snapshot
280 )
281 if temperature is None:
282 temperature = get_setting_from_snapshot(
283 "llm.temperature", 0.7, settings_snapshot=settings_snapshot
284 )
285 if provider is None:
286 provider = get_setting_from_snapshot(
287 "llm.provider", "ollama", settings_snapshot=settings_snapshot
288 )
290 # Clean model name: remove quotes and extra whitespace
291 if model_name:
292 model_name = model_name.strip().strip("\"'").strip()
294 # Clean provider: remove quotes and extra whitespace
295 if provider: 295 ↛ 299line 295 didn't jump to line 299 because the condition on line 295 was always true
296 provider = provider.strip().strip("\"'").strip()
298 # Normalize provider: convert to lowercase canonical form
299 provider = normalize_provider(provider)
301 # Check if this is a registered custom LLM first
302 if provider and is_llm_registered(provider):
303 logger.info(f"Using registered custom LLM: {provider}")
304 custom_llm = get_llm_from_registry(provider)
306 # Check if it's a callable (factory function) or a BaseChatModel instance
307 if callable(custom_llm) and not isinstance(custom_llm, BaseChatModel):
308 # It's a callable (factory function), call it with parameters
309 try:
310 llm_instance = custom_llm(
311 model_name=model_name,
312 temperature=temperature,
313 settings_snapshot=settings_snapshot,
314 )
315 except TypeError as e:
316 # Re-raise TypeError with better message
317 raise TypeError(
318 f"Registered LLM factory '{provider}' has invalid signature. "
319 f"Factory functions must accept 'model_name', 'temperature', and 'settings_snapshot' parameters. "
320 f"Error: {e}"
321 )
323 # Validate the result is a BaseChatModel
324 if not isinstance(llm_instance, BaseChatModel):
325 raise ValueError(
326 f"Factory function for {provider} must return a BaseChatModel instance, "
327 f"got {type(llm_instance).__name__}"
328 )
329 elif isinstance(custom_llm, BaseChatModel):
330 # It's already a proper LLM instance, use it directly
331 llm_instance = custom_llm
332 else:
333 raise ValueError(
334 f"Registered LLM {provider} must be either a BaseChatModel instance "
335 f"or a callable factory function. Got: {type(custom_llm).__name__}"
336 )
338 return wrap_llm_without_think_tags(
339 llm_instance,
340 research_id=research_id,
341 provider=provider,
342 research_context=research_context,
343 settings_snapshot=settings_snapshot,
344 )
346 # Validate provider
347 if provider not in VALID_PROVIDERS:
348 logger.error(f"Invalid provider in settings: {provider}")
349 raise ValueError(
350 f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}"
351 )
352 logger.info(
353 f"Getting LLM with model: {model_name}, temperature: {temperature}, provider: {provider}"
354 )
356 # Common parameters for all models
357 common_params = {
358 "temperature": temperature,
359 }
361 context_window_size = _get_context_window_for_provider(
362 provider, settings_snapshot
363 )
365 # Add context limit to research context for overflow detection
366 if research_context and context_window_size:
367 research_context["context_limit"] = context_window_size
368 logger.info(
369 f"Set context_limit={context_window_size} in research_context"
370 )
371 else:
372 logger.debug(
373 f"Context limit not set: research_context={bool(research_context)}, context_window_size={context_window_size}"
374 )
376 max_tokens = None
377 if get_setting_from_snapshot(
378 "llm.supports_max_tokens", True, settings_snapshot=settings_snapshot
379 ):
380 # Use 80% of context window to leave room for prompts
381 if context_window_size is not None:
382 max_tokens = min(
383 int(
384 get_setting_from_snapshot(
385 "llm.max_tokens",
386 100000,
387 settings_snapshot=settings_snapshot,
388 )
389 ),
390 int(context_window_size * 0.8),
391 )
392 common_params["max_tokens"] = max_tokens
393 else:
394 # Unrestricted context: use provider's default max_tokens
395 max_tokens = int(
396 get_setting_from_snapshot(
397 "llm.max_tokens",
398 100000,
399 settings_snapshot=settings_snapshot,
400 )
401 )
402 common_params["max_tokens"] = max_tokens
404 # Handle different providers
405 if provider == "anthropic":
406 api_key = get_setting_from_snapshot(
407 "llm.anthropic.api_key", settings_snapshot=settings_snapshot
408 )
410 if not api_key:
411 raise ValueError(
412 "Anthropic API key not configured. Please set llm.anthropic.api_key in settings."
413 )
415 llm: BaseChatModel = ChatAnthropic(
416 model=model_name,
417 anthropic_api_key=api_key,
418 **common_params, # type: ignore[call-arg]
419 )
420 return wrap_llm_without_think_tags(
421 llm,
422 research_id=research_id,
423 provider=provider,
424 research_context=research_context,
425 settings_snapshot=settings_snapshot,
426 )
428 if provider == "openai":
429 api_key = get_setting_from_snapshot(
430 "llm.openai.api_key", settings_snapshot=settings_snapshot
431 )
433 if not api_key:
434 raise ValueError(
435 "OpenAI API key not configured. Please set llm.openai.api_key in settings."
436 )
438 # Build OpenAI-specific parameters
439 openai_params = {
440 "model": model_name,
441 "api_key": api_key,
442 **common_params,
443 }
445 # Add optional parameters if they exist in settings
446 try:
447 api_base = get_setting_from_snapshot(
448 "llm.openai.api_base",
449 default=None,
450 settings_snapshot=settings_snapshot,
451 )
452 if api_base:
453 openai_params["openai_api_base"] = api_base
454 except NoSettingsContextError:
455 pass # Optional parameter
457 try:
458 organization = get_setting_from_snapshot(
459 "llm.openai.organization",
460 default=None,
461 settings_snapshot=settings_snapshot,
462 )
463 if organization:
464 openai_params["openai_organization"] = organization
465 except NoSettingsContextError:
466 pass # Optional parameter
468 try:
469 streaming = get_setting_from_snapshot(
470 "llm.streaming",
471 default=None,
472 settings_snapshot=settings_snapshot,
473 )
474 except NoSettingsContextError:
475 streaming = None # Optional parameter
476 if streaming is not None: 476 ↛ 477line 476 didn't jump to line 477 because the condition on line 476 was never true
477 openai_params["streaming"] = streaming
479 try:
480 max_retries = get_setting_from_snapshot(
481 "llm.max_retries",
482 default=None,
483 settings_snapshot=settings_snapshot,
484 )
485 if max_retries is not None: 485 ↛ 486line 485 didn't jump to line 486 because the condition on line 485 was never true
486 openai_params["max_retries"] = max_retries
487 except NoSettingsContextError:
488 pass # Optional parameter
490 try:
491 request_timeout = get_setting_from_snapshot(
492 "llm.request_timeout",
493 default=None,
494 settings_snapshot=settings_snapshot,
495 )
496 if request_timeout is not None: 496 ↛ 497line 496 didn't jump to line 497 because the condition on line 496 was never true
497 openai_params["request_timeout"] = request_timeout
498 except NoSettingsContextError:
499 pass # Optional parameter
501 llm = ChatOpenAI(**openai_params) # type: ignore[assignment]
502 return wrap_llm_without_think_tags(
503 llm,
504 research_id=research_id,
505 provider=provider,
506 research_context=research_context,
507 settings_snapshot=settings_snapshot,
508 )
510 if provider == "openai_endpoint":
511 api_key = get_setting_from_snapshot(
512 "llm.openai_endpoint.api_key", settings_snapshot=settings_snapshot
513 )
515 # Local servers (e.g. llama.cpp) don't require an API key.
516 # Use a placeholder so ChatOpenAI doesn't reject the request.
517 if not api_key:
518 logger.info(
519 "No API key configured for openai_endpoint provider. "
520 "Using placeholder key. If you are connecting to a hosted "
521 "service, set llm.openai_endpoint.api_key in settings."
522 )
523 api_key = "not-needed" # noqa: S105 # gitleaks:allow
525 # Get endpoint URL from settings
526 if openai_endpoint_url is None: 526 ↛ 532line 526 didn't jump to line 532 because the condition on line 526 was always true
527 openai_endpoint_url = get_setting_from_snapshot(
528 "llm.openai_endpoint.url",
529 "https://openrouter.ai/api/v1",
530 settings_snapshot=settings_snapshot,
531 )
532 openai_endpoint_url = normalize_url(openai_endpoint_url)
534 llm = ChatOpenAI( # type: ignore[assignment, call-arg]
535 model=model_name,
536 api_key=api_key,
537 openai_api_base=openai_endpoint_url,
538 **common_params,
539 )
540 return wrap_llm_without_think_tags(
541 llm,
542 research_id=research_id,
543 provider=provider,
544 research_context=research_context,
545 settings_snapshot=settings_snapshot,
546 )
548 if provider == "ollama":
549 try:
550 # Use the configurable Ollama base URL
551 raw_base_url = get_setting_from_snapshot(
552 "llm.ollama.url",
553 DEFAULT_OLLAMA_URL,
554 settings_snapshot=settings_snapshot,
555 )
556 base_url = (
557 normalize_url(raw_base_url)
558 if raw_base_url
559 else DEFAULT_OLLAMA_URL
560 )
562 logger.info(
563 f"Creating ChatOllama with model={model_name}, base_url={base_url}"
564 )
565 try:
566 # Add num_ctx parameter for Ollama context window size
567 ollama_params = {**common_params}
568 if context_window_size is not None: 568 ↛ 585line 568 didn't jump to line 585 because the condition on line 568 was always true
569 ollama_params["num_ctx"] = context_window_size
571 # Thinking/reasoning handling for models like deepseek-r1:
572 # The 'reasoning' parameter controls both:
573 # 1. Whether the model performs thinking (makes it smarter when True)
574 # 2. Whether thinking is separated from the answer (always separated when True)
575 #
576 # When reasoning=True:
577 # - Model performs thinking/reasoning
578 # - Thinking goes to additional_kwargs["reasoning_content"] (discarded by LDR)
579 # - Only the final answer appears in response.content
580 #
581 # When reasoning=False:
582 # - Model does NOT perform thinking (faster but less smart)
583 # - Gives direct answers
585 enable_thinking = get_setting_from_snapshot(
586 "llm.ollama.enable_thinking",
587 True, # Default: enable thinking (smarter responses)
588 settings_snapshot=settings_snapshot,
589 )
591 if enable_thinking is not None and isinstance( 591 ↛ 600line 591 didn't jump to line 600 because the condition on line 591 was always true
592 enable_thinking, bool
593 ):
594 ollama_params["reasoning"] = enable_thinking
595 logger.debug(
596 f"Ollama thinking enabled: {enable_thinking} "
597 f"(thinking will be {'shown internally but discarded' if enable_thinking else 'disabled'})"
598 )
600 llm = ChatOllama( # type: ignore[assignment]
601 model=model_name, base_url=base_url, **ollama_params
602 )
604 # Log the actual client configuration after creation
605 logger.debug(
606 f"ChatOllama created - base_url attribute: {getattr(llm, 'base_url', 'not found')}"
607 )
608 if hasattr(llm, "_client"): 608 ↛ 621line 608 didn't jump to line 621 because the condition on line 608 was always true
609 client = llm._client
610 logger.debug(f"ChatOllama _client type: {type(client)}")
611 if hasattr(client, "_client"): 611 ↛ 621line 611 didn't jump to line 621 because the condition on line 611 was always true
612 inner_client = client._client
613 logger.debug(
614 f"ChatOllama inner client type: {type(inner_client)}"
615 )
616 if hasattr(inner_client, "base_url"): 616 ↛ 621line 616 didn't jump to line 621 because the condition on line 616 was always true
617 logger.debug(
618 f"ChatOllama inner client base_url: {inner_client.base_url}"
619 )
621 return wrap_llm_without_think_tags(
622 llm,
623 research_id=research_id,
624 provider=provider,
625 research_context=research_context,
626 settings_snapshot=settings_snapshot,
627 )
628 except Exception:
629 logger.exception("Error creating or testing ChatOllama")
630 raise
631 except Exception:
632 logger.exception("Error in Ollama provider section")
633 raise
635 elif provider == "lmstudio":
636 # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly
637 lmstudio_url = get_setting_from_snapshot(
638 "llm.lmstudio.url",
639 DEFAULT_LMSTUDIO_URL,
640 settings_snapshot=settings_snapshot,
641 )
642 # Use URL as-is (default already includes /v1)
643 base_url = normalize_url(lmstudio_url)
645 llm = ChatOpenAI( # type: ignore[assignment, call-arg, arg-type]
646 model=model_name,
647 api_key="lm-studio", # LM Studio doesn't require a real API key # pragma: allowlist secret
648 base_url=base_url,
649 temperature=temperature,
650 max_tokens=max_tokens, # Use calculated max_tokens based on context size
651 )
652 return wrap_llm_without_think_tags(
653 llm,
654 research_id=research_id,
655 provider=provider,
656 research_context=research_context,
657 settings_snapshot=settings_snapshot,
658 )
660 # Update the llamacpp section in get_llm function
661 elif provider == "llamacpp":
662 # Import LlamaCpp
663 from langchain_community.llms import LlamaCpp
665 # Note: For llama.cpp server connections, use 'openai_endpoint' provider
666 # with the server's /v1 URL (e.g., 'http://localhost:8000/v1')
668 # Get LlamaCpp model path from settings
669 model_path = get_setting_from_snapshot(
670 "llm.llamacpp_model_path", settings_snapshot=settings_snapshot
671 )
672 if not model_path:
673 logger.error("llamacpp_model_path not set in settings")
674 raise ValueError(
675 "LlamaCpp model path not configured. Either:\n"
676 "1. Set 'llm.llamacpp_model_path' to your .gguf file path, or\n"
677 "2. For llama.cpp server connections, use 'openai_endpoint' provider "
678 "with the server's /v1 endpoint (e.g., 'http://localhost:8000/v1')"
679 )
681 # Validate model path for security FIRST using centralized validator
682 # This MUST happen before any filesystem operations on user input
683 from ..security.path_validator import PathValidator
684 from .paths import get_models_directory
686 try:
687 validated_path = PathValidator.validate_model_path(model_path)
688 except ValueError as e:
689 error_msg = str(e)
690 # If the path is not a file, try to provide helpful directory listing
691 # Only do this after path has passed security validation (safe_join check)
692 if "not a file" in error_msg: 692 ↛ 723line 692 didn't jump to line 723 because the condition on line 692 was always true
693 helpful_message = None
694 try:
695 model_root = str(get_models_directory())
696 safe_path = PathValidator.validate_safe_path(
697 model_path, model_root, allow_absolute=False
698 )
699 if safe_path and safe_path.is_dir(): 699 ↛ 721line 699 didn't jump to line 721 because the condition on line 699 was always true
700 gguf_files = list(safe_path.glob("*.gguf"))
701 if gguf_files:
702 files_list = ", ".join(
703 f.name for f in gguf_files[:5]
704 )
705 if len(gguf_files) > 5: 705 ↛ 706line 705 didn't jump to line 706 because the condition on line 705 was never true
706 files_list += (
707 f" (and {len(gguf_files) - 5} more)"
708 )
709 suggestion = f"Found .gguf files: {files_list}"
710 else:
711 suggestion = (
712 "No .gguf files found in this directory"
713 )
714 helpful_message = (
715 f"Model path is a directory, not a file: {model_path}\n"
716 f"Please specify the full path to a .gguf model file.\n"
717 f"{suggestion}"
718 )
719 except ValueError:
720 pass # Secondary validation failed, use original error
721 if helpful_message: 721 ↛ 723line 721 didn't jump to line 723 because the condition on line 721 was always true
722 raise ValueError(helpful_message) from e
723 logger.exception("Model path validation failed")
724 raise
726 model_path = str(validated_path)
728 # Validate file extension - LlamaCpp requires .gguf or .bin files
729 # Safe to use validated_path here since it passed security validation
730 if validated_path.suffix.lower() not in (".gguf", ".bin"):
731 raise ValueError(
732 f"Invalid model file extension: {validated_path.suffix}\n"
733 f"LlamaCpp requires .gguf or .bin model files.\n"
734 f"File: {validated_path.name}"
735 )
737 # Get additional LlamaCpp parameters
738 n_gpu_layers = get_setting_from_snapshot(
739 "llm.llamacpp_n_gpu_layers",
740 1,
741 settings_snapshot=settings_snapshot,
742 )
743 n_batch = get_setting_from_snapshot(
744 "llm.llamacpp_n_batch", 512, settings_snapshot=settings_snapshot
745 )
746 f16_kv = get_setting_from_snapshot(
747 "llm.llamacpp_f16_kv", True, settings_snapshot=settings_snapshot
748 )
750 # Create LlamaCpp instance
751 llm = LlamaCpp(
752 model_path=model_path,
753 temperature=temperature,
754 max_tokens=max_tokens, # Use calculated max_tokens
755 n_gpu_layers=n_gpu_layers,
756 n_batch=n_batch,
757 f16_kv=f16_kv,
758 n_ctx=context_window_size, # Set context window size directly (None = use default)
759 verbose=True,
760 )
762 return wrap_llm_without_think_tags(
763 llm,
764 research_id=research_id,
765 provider=provider,
766 research_context=research_context,
767 settings_snapshot=settings_snapshot,
768 )
770 elif provider == "none": 770 ↛ 779line 770 didn't jump to line 779 because the condition on line 770 was always true
771 raise ValueError(
772 "No LLM provider configured. Please set llm.provider in settings "
773 "to a valid provider (e.g., 'ollama', 'openai', 'anthropic')."
774 )
776 else:
777 # Provider validated above but not handled - this shouldn't happen
778 # since VALID_PROVIDERS check above would catch unknown providers
779 raise ValueError(
780 f"Provider '{provider}' is valid but not implemented. "
781 f"This is a bug - please report it."
782 )
785def wrap_llm_without_think_tags(
786 llm,
787 research_id=None,
788 provider=None,
789 research_context=None,
790 settings_snapshot=None,
791):
792 """Create a wrapper class that processes LLM outputs with remove_think_tags and token counting"""
794 # First apply rate limiting if enabled
795 from ..web_search_engines.rate_limiting.llm import (
796 create_rate_limited_llm_wrapper,
797 )
799 # Check if LLM rate limiting is enabled (independent of search rate limiting)
800 # Use the thread-safe get_db_setting defined in this module
801 if get_setting_from_snapshot(
802 "rate_limiting.llm_enabled", False, settings_snapshot=settings_snapshot
803 ):
804 llm = create_rate_limited_llm_wrapper(llm, provider)
806 # Set context_limit in research_context for overflow detection.
807 # This is needed for providers that go through the registered provider path
808 # (which returns before the code in get_llm that sets context_limit).
809 if research_context is not None and provider is not None:
810 if "context_limit" not in research_context:
811 context_limit = _get_context_window_for_provider(
812 provider, settings_snapshot
813 )
814 if context_limit is not None:
815 research_context["context_limit"] = context_limit
816 logger.info(
817 f"Set context_limit={context_limit} in wrap_llm for provider={provider}"
818 )
820 # Import token counting functionality if research_id is provided
821 callbacks = []
822 if research_id is not None:
823 from ..metrics import TokenCounter
825 token_counter = TokenCounter()
826 token_callback = token_counter.create_callback(
827 research_id, research_context
828 )
829 # Set provider and model info on the callback
830 if provider:
831 token_callback.preset_provider = provider
832 # Try to extract model name from the LLM instance
833 if hasattr(llm, "model_name"):
834 token_callback.preset_model = llm.model_name
835 elif hasattr(llm, "model"):
836 token_callback.preset_model = llm.model
837 callbacks.append(token_callback)
839 # Add callbacks to the LLM if it supports them
840 if callbacks and hasattr(llm, "callbacks"):
841 if llm.callbacks is None:
842 llm.callbacks = callbacks
843 else:
844 llm.callbacks.extend(callbacks)
846 class ProcessingLLMWrapper:
847 def __init__(self, base_llm):
848 self.base_llm = base_llm
850 def invoke(self, *args, **kwargs):
851 # Removed verbose debug logging to reduce log clutter
852 # Uncomment the lines below if you need to debug LLM requests
853 try:
854 response = self.base_llm.invoke(*args, **kwargs)
855 except Exception as e:
856 logger.exception("LLM Request - Failed with error")
857 # Log any URL information from the error
858 error_str = str(e)
859 if "http://" in error_str or "https://" in error_str: 859 ↛ 860line 859 didn't jump to line 860 because the condition on line 859 was never true
860 logger.exception(
861 f"LLM Request - Error contains URL info: {error_str}"
862 )
863 raise
865 # Process the response content if it has a content attribute
866 if hasattr(response, "content"):
867 response.content = remove_think_tags(response.content)
868 elif isinstance(response, str):
869 response = remove_think_tags(response)
871 return response
873 # Pass through any other attributes to the base LLM
874 def __getattr__(self, name):
875 return getattr(self.base_llm, name)
877 def close(self):
878 """Close underlying HTTP clients held by this LLM. Idempotent."""
879 try:
880 from ..utilities.llm_utils import _close_base_llm
882 _close_base_llm(self.base_llm)
883 except Exception:
884 logger.debug(
885 "best-effort cleanup of HTTP clients on shutdown",
886 exc_info=True,
887 )
889 return ProcessingLLMWrapper(llm)