Coverage for src / local_deep_research / config / llm_config.py: 70%
386 statements
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
« prev ^ index » next coverage.py v7.13.4, created at 2026-02-25 01:07 +0000
1import os
2from functools import cache
4from langchain_anthropic import ChatAnthropic
5from langchain_core.language_models import BaseChatModel, FakeListChatModel
6from langchain_ollama import ChatOllama
7from langchain_openai import ChatOpenAI
8from loguru import logger
10from ..llm import get_llm_from_registry, is_llm_registered
11from ..utilities.search_utilities import remove_think_tags
12from ..utilities.url_utils import normalize_url
13from ..security import safe_get
15# Import providers module to trigger auto-discovery
16try:
17 from ..llm.providers import discover_providers # noqa: F401
18 # Auto-discovery happens on module import
19except ImportError:
20 pass # Providers module not available yet
21from .thread_settings import (
22 get_llm_setting_from_snapshot as get_setting_from_snapshot,
23 NoSettingsContextError,
24)
26# Valid provider options
27VALID_PROVIDERS = [
28 "ollama",
29 "openai",
30 "anthropic",
31 "google",
32 "openrouter",
33 "vllm",
34 "openai_endpoint",
35 "lmstudio",
36 "llamacpp",
37 "none",
38]
41def is_openai_available(settings_snapshot=None):
42 """Check if OpenAI is available by delegating to the provider class."""
43 try:
44 from ..llm.providers.implementations.openai import OpenAIProvider
46 return OpenAIProvider.is_available(settings_snapshot)
47 except ImportError:
48 return False
49 except Exception:
50 return False
53def is_anthropic_available(settings_snapshot=None):
54 """Check if Anthropic is available by delegating to the provider class."""
55 try:
56 from ..llm.providers.implementations.anthropic import AnthropicProvider
58 return AnthropicProvider.is_available(settings_snapshot)
59 except ImportError:
60 return False
61 except Exception:
62 return False
65def is_openai_endpoint_available(settings_snapshot=None):
66 """Check if OpenAI endpoint is available by delegating to the provider class."""
67 try:
68 from ..llm.providers.implementations.custom_openai_endpoint import (
69 CustomOpenAIEndpointProvider,
70 )
72 return CustomOpenAIEndpointProvider.is_available(settings_snapshot)
73 except ImportError:
74 return False
75 except Exception:
76 return False
79def is_ollama_available(settings_snapshot=None):
80 """Check if Ollama is running by delegating to the provider class."""
81 try:
82 from ..llm.providers.implementations.ollama import OllamaProvider
84 return OllamaProvider.is_available(settings_snapshot)
85 except ImportError:
86 return False
87 except Exception:
88 return False
91def is_vllm_available():
92 """Check if VLLM capability is available"""
93 try:
94 import torch # noqa: F401
95 import transformers # noqa: F401
97 # Only try to import VLLM if the dependencies are available
98 # The VLLM class itself might not fail to import, but using it will fail
99 # without the proper dependencies
100 import vllm # noqa: F401
101 from langchain_community.llms import VLLM # noqa: F401
103 return True
104 except ImportError:
105 return False
108def is_lmstudio_available(settings_snapshot=None):
109 """Check if LM Studio is available by delegating to the provider class."""
110 try:
111 from ..llm.providers.implementations.lmstudio import LMStudioProvider
113 return LMStudioProvider.is_available(settings_snapshot)
114 except ImportError:
115 return False
116 except Exception:
117 return False
120def is_llamacpp_available(settings_snapshot=None):
121 """Check if LlamaCpp is available and properly configured.
123 Checks that the library is installed and a model path is configured.
124 For llama.cpp server connections, use 'openai_endpoint' provider instead.
125 """
126 try:
127 # Import check
128 from langchain_community.llms import LlamaCpp # noqa: F401
130 # Check if model path is configured and looks valid
131 # Note: For llama.cpp server connections, use 'openai_endpoint' provider instead
132 model_path_str = get_setting_from_snapshot(
133 "llm.llamacpp_model_path",
134 default=None,
135 settings_snapshot=settings_snapshot,
136 )
138 # If no path configured, LlamaCpp is not available
139 if not model_path_str:
140 return False
142 # Path is configured, actual validation happens when model loads
143 return True
145 except ImportError:
146 # LlamaCpp library not installed
147 return False
149 except Exception:
150 return False
153def is_google_available(settings_snapshot=None):
154 """Check if Google/Gemini is available"""
155 try:
156 from ..llm.providers.google import GoogleProvider
158 return GoogleProvider.is_available(settings_snapshot)
159 except ImportError:
160 return False
161 except Exception:
162 return False
165def is_openrouter_available(settings_snapshot=None):
166 """Check if OpenRouter is available"""
167 try:
168 from ..llm.providers.openrouter import OpenRouterProvider
170 return OpenRouterProvider.is_available(settings_snapshot)
171 except ImportError:
172 return False
173 except Exception:
174 return False
177@cache
178def get_available_providers(settings_snapshot=None):
179 """Return available model providers"""
180 providers = {}
182 if is_ollama_available(settings_snapshot):
183 providers["ollama"] = "Ollama (local models)"
185 if is_openai_available(settings_snapshot): 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true
186 providers["openai"] = "OpenAI API"
188 if is_anthropic_available(settings_snapshot): 188 ↛ 189line 188 didn't jump to line 189 because the condition on line 188 was never true
189 providers["anthropic"] = "Anthropic API"
191 if is_google_available(settings_snapshot): 191 ↛ 192line 191 didn't jump to line 192 because the condition on line 191 was never true
192 providers["google"] = "Google Gemini API"
194 if is_openrouter_available(settings_snapshot): 194 ↛ 195line 194 didn't jump to line 195 because the condition on line 194 was never true
195 providers["openrouter"] = "OpenRouter API"
197 if is_openai_endpoint_available(settings_snapshot): 197 ↛ 198line 197 didn't jump to line 198 because the condition on line 197 was never true
198 providers["openai_endpoint"] = "OpenAI-compatible Endpoint"
200 if is_lmstudio_available(settings_snapshot): 200 ↛ 201line 200 didn't jump to line 201 because the condition on line 200 was never true
201 providers["lmstudio"] = "LM Studio (local models)"
203 if is_llamacpp_available(settings_snapshot): 203 ↛ 204line 203 didn't jump to line 204 because the condition on line 203 was never true
204 providers["llamacpp"] = "LlamaCpp (local models)"
206 # Check for VLLM capability
207 if is_vllm_available(): 207 ↛ 208line 207 didn't jump to line 208 because the condition on line 207 was never true
208 providers["vllm"] = "VLLM (local models)"
210 # Default fallback
211 if not providers:
212 providers["none"] = "No model providers available"
214 return providers
217def get_selected_llm_provider(settings_snapshot=None):
218 return get_setting_from_snapshot(
219 "llm.provider", "ollama", settings_snapshot=settings_snapshot
220 ).lower()
223def _get_context_window_for_provider(provider_type, settings_snapshot=None):
224 """Get context window size from settings based on provider type.
226 Local providers (ollama, llamacpp, lmstudio) use a smaller default to prevent
227 memory issues. Cloud providers check if unrestricted mode is enabled.
229 Returns:
230 int or None: The context window size, or None for unrestricted cloud providers.
231 """
232 if provider_type in ["ollama", "llamacpp", "lmstudio"]:
233 # Local providers: use smaller default to prevent memory issues
234 window_size = get_setting_from_snapshot(
235 "llm.local_context_window_size",
236 4096,
237 settings_snapshot=settings_snapshot,
238 )
239 # Ensure it's an integer
240 return int(window_size) if window_size is not None else 4096
241 else:
242 # Cloud providers: check if unrestricted mode is enabled
243 use_unrestricted = get_setting_from_snapshot(
244 "llm.context_window_unrestricted",
245 True,
246 settings_snapshot=settings_snapshot,
247 )
248 if use_unrestricted: 248 ↛ 253line 248 didn't jump to line 253 because the condition on line 248 was always true
249 # Let cloud providers auto-handle context (return None or very large value)
250 return None # Will be handled per provider
251 else:
252 # Use user-specified limit
253 window_size = get_setting_from_snapshot(
254 "llm.context_window_size",
255 128000,
256 settings_snapshot=settings_snapshot,
257 )
258 return int(window_size) if window_size is not None else 128000
261def get_llm(
262 model_name=None,
263 temperature=None,
264 provider=None,
265 openai_endpoint_url=None,
266 research_id=None,
267 research_context=None,
268 settings_snapshot=None,
269):
270 """
271 Get LLM instance based on model name and provider.
273 Args:
274 model_name: Name of the model to use (if None, uses database setting)
275 temperature: Model temperature (if None, uses database setting)
276 provider: Provider to use (if None, uses database setting)
277 openai_endpoint_url: Custom endpoint URL to use (if None, uses database
278 setting)
279 research_id: Optional research ID for token tracking
280 research_context: Optional research context for enhanced token tracking
282 Returns:
283 A LangChain LLM instance with automatic think-tag removal
284 """
286 # Use database values for parameters if not provided
287 if model_name is None:
288 model_name = get_setting_from_snapshot(
289 "llm.model", "gemma:latest", settings_snapshot=settings_snapshot
290 )
291 if temperature is None:
292 temperature = get_setting_from_snapshot(
293 "llm.temperature", 0.7, settings_snapshot=settings_snapshot
294 )
295 if provider is None:
296 provider = get_setting_from_snapshot(
297 "llm.provider", "ollama", settings_snapshot=settings_snapshot
298 )
300 # Clean model name: remove quotes and extra whitespace
301 if model_name:
302 model_name = model_name.strip().strip("\"'").strip()
304 # Clean provider: remove quotes and extra whitespace
305 if provider: 305 ↛ 309line 305 didn't jump to line 309 because the condition on line 305 was always true
306 provider = provider.strip().strip("\"'").strip()
308 # Normalize provider: convert to lowercase
309 provider = provider.lower() if provider else None
311 # Check if this is a registered custom LLM first
312 if provider and is_llm_registered(provider):
313 logger.info(f"Using registered custom LLM: {provider}")
314 custom_llm = get_llm_from_registry(provider)
316 # Check if it's a callable (factory function) or a BaseChatModel instance
317 if callable(custom_llm) and not isinstance(custom_llm, BaseChatModel):
318 # It's a callable (factory function), call it with parameters
319 try:
320 llm_instance = custom_llm(
321 model_name=model_name,
322 temperature=temperature,
323 settings_snapshot=settings_snapshot,
324 )
325 except TypeError as e:
326 # Re-raise TypeError with better message
327 raise TypeError(
328 f"Registered LLM factory '{provider}' has invalid signature. "
329 f"Factory functions must accept 'model_name', 'temperature', and 'settings_snapshot' parameters. "
330 f"Error: {e}"
331 )
333 # Validate the result is a BaseChatModel
334 if not isinstance(llm_instance, BaseChatModel):
335 raise ValueError(
336 f"Factory function for {provider} must return a BaseChatModel instance, "
337 f"got {type(llm_instance).__name__}"
338 )
339 elif isinstance(custom_llm, BaseChatModel): 339 ↛ 343line 339 didn't jump to line 343 because the condition on line 339 was always true
340 # It's already a proper LLM instance, use it directly
341 llm_instance = custom_llm
342 else:
343 raise ValueError(
344 f"Registered LLM {provider} must be either a BaseChatModel instance "
345 f"or a callable factory function. Got: {type(custom_llm).__name__}"
346 )
348 return wrap_llm_without_think_tags(
349 llm_instance,
350 research_id=research_id,
351 provider=provider,
352 research_context=research_context,
353 settings_snapshot=settings_snapshot,
354 )
356 # Check if we're in testing mode and should use fallback (but only when no API keys are configured)
357 # Skip fallback check if we're in test mode with mocks
358 if os.environ.get("LDR_USE_FALLBACK_LLM", "") and not os.environ.get( 358 ↛ 362line 358 didn't jump to line 362 because the condition on line 358 was never true
359 "LDR_TESTING_WITH_MOCKS", ""
360 ):
361 # Only use fallback if the provider has no valid configuration
362 provider_has_config = False
364 if (
365 (
366 provider == "openai"
367 and get_setting_from_snapshot(
368 "llm.openai.api_key",
369 default=None,
370 settings_snapshot=settings_snapshot,
371 )
372 )
373 or (
374 provider == "anthropic"
375 and get_setting_from_snapshot(
376 "llm.anthropic.api_key",
377 default=None,
378 settings_snapshot=settings_snapshot,
379 )
380 )
381 or (
382 provider == "openai_endpoint"
383 and get_setting_from_snapshot(
384 "llm.openai_endpoint.api_key",
385 settings_snapshot=settings_snapshot,
386 )
387 )
388 or (
389 provider == "ollama"
390 and is_ollama_available(settings_snapshot=settings_snapshot)
391 )
392 ):
393 provider_has_config = True
394 elif provider in ["vllm", "lmstudio", "llamacpp"]:
395 # These are local providers, check their availability
396 if (
397 (provider == "vllm" and is_vllm_available())
398 or (
399 provider == "lmstudio"
400 and is_lmstudio_available(
401 settings_snapshot=settings_snapshot
402 )
403 )
404 or (
405 provider == "llamacpp"
406 and is_llamacpp_available(
407 settings_snapshot=settings_snapshot
408 )
409 )
410 ):
411 provider_has_config = True
413 if not provider_has_config:
414 logger.info(
415 "LDR_USE_FALLBACK_LLM is set and no valid provider config found, using fallback model"
416 )
417 return wrap_llm_without_think_tags(
418 get_fallback_model(temperature),
419 research_id=research_id,
420 provider="fallback",
421 research_context=research_context,
422 settings_snapshot=settings_snapshot,
423 )
425 # Validate provider
426 if provider not in VALID_PROVIDERS:
427 logger.error(f"Invalid provider in settings: {provider}")
428 raise ValueError(
429 f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}"
430 )
431 logger.info(
432 f"Getting LLM with model: {model_name}, temperature: {temperature}, provider: {provider}"
433 )
435 # Common parameters for all models
436 common_params = {
437 "temperature": temperature,
438 }
440 context_window_size = _get_context_window_for_provider(
441 provider, settings_snapshot
442 )
444 # Add context limit to research context for overflow detection
445 if research_context and context_window_size: 445 ↛ 446line 445 didn't jump to line 446 because the condition on line 445 was never true
446 research_context["context_limit"] = context_window_size
447 logger.info(
448 f"Set context_limit={context_window_size} in research_context"
449 )
450 else:
451 logger.debug(
452 f"Context limit not set: research_context={bool(research_context)}, context_window_size={context_window_size}"
453 )
455 if get_setting_from_snapshot(
456 "llm.supports_max_tokens", True, settings_snapshot=settings_snapshot
457 ):
458 # Use 80% of context window to leave room for prompts
459 if context_window_size is not None:
460 max_tokens = min(
461 int(
462 get_setting_from_snapshot(
463 "llm.max_tokens",
464 100000,
465 settings_snapshot=settings_snapshot,
466 )
467 ),
468 int(context_window_size * 0.8),
469 )
470 common_params["max_tokens"] = max_tokens
471 else:
472 # Unrestricted context: use provider's default max_tokens
473 max_tokens = int(
474 get_setting_from_snapshot(
475 "llm.max_tokens",
476 100000,
477 settings_snapshot=settings_snapshot,
478 )
479 )
480 common_params["max_tokens"] = max_tokens
482 # Handle different providers
483 if provider == "anthropic":
484 api_key = get_setting_from_snapshot(
485 "llm.anthropic.api_key", settings_snapshot=settings_snapshot
486 )
488 if not api_key:
489 logger.warning(
490 "Anthropic API key not found in settings. Falling back to default model."
491 )
492 return get_fallback_model(temperature)
494 llm = ChatAnthropic(
495 model=model_name, anthropic_api_key=api_key, **common_params
496 )
497 return wrap_llm_without_think_tags(
498 llm,
499 research_id=research_id,
500 provider=provider,
501 research_context=research_context,
502 settings_snapshot=settings_snapshot,
503 )
505 elif provider == "openai":
506 api_key = get_setting_from_snapshot(
507 "llm.openai.api_key", settings_snapshot=settings_snapshot
508 )
510 if not api_key:
511 logger.warning(
512 "OpenAI API key not found in settings. Falling back to default model."
513 )
514 return get_fallback_model(temperature)
516 # Build OpenAI-specific parameters
517 openai_params = {
518 "model": model_name,
519 "api_key": api_key,
520 **common_params,
521 }
523 # Add optional parameters if they exist in settings
524 try:
525 api_base = get_setting_from_snapshot(
526 "llm.openai.api_base",
527 default=None,
528 settings_snapshot=settings_snapshot,
529 )
530 if api_base:
531 openai_params["openai_api_base"] = api_base
532 except NoSettingsContextError:
533 pass # Optional parameter
535 try:
536 organization = get_setting_from_snapshot(
537 "llm.openai.organization",
538 default=None,
539 settings_snapshot=settings_snapshot,
540 )
541 if organization:
542 openai_params["openai_organization"] = organization
543 except NoSettingsContextError:
544 pass # Optional parameter
546 try:
547 streaming = get_setting_from_snapshot(
548 "llm.streaming",
549 default=None,
550 settings_snapshot=settings_snapshot,
551 )
552 except NoSettingsContextError:
553 streaming = None # Optional parameter
554 if streaming is not None: 554 ↛ 555line 554 didn't jump to line 555 because the condition on line 554 was never true
555 openai_params["streaming"] = streaming
557 try:
558 max_retries = get_setting_from_snapshot(
559 "llm.max_retries",
560 default=None,
561 settings_snapshot=settings_snapshot,
562 )
563 if max_retries is not None: 563 ↛ 564line 563 didn't jump to line 564 because the condition on line 563 was never true
564 openai_params["max_retries"] = max_retries
565 except NoSettingsContextError:
566 pass # Optional parameter
568 try:
569 request_timeout = get_setting_from_snapshot(
570 "llm.request_timeout",
571 default=None,
572 settings_snapshot=settings_snapshot,
573 )
574 if request_timeout is not None: 574 ↛ 575line 574 didn't jump to line 575 because the condition on line 574 was never true
575 openai_params["request_timeout"] = request_timeout
576 except NoSettingsContextError:
577 pass # Optional parameter
579 llm = ChatOpenAI(**openai_params)
580 return wrap_llm_without_think_tags(
581 llm,
582 research_id=research_id,
583 provider=provider,
584 research_context=research_context,
585 settings_snapshot=settings_snapshot,
586 )
588 elif provider == "openai_endpoint":
589 api_key = get_setting_from_snapshot(
590 "llm.openai_endpoint.api_key", settings_snapshot=settings_snapshot
591 )
593 if not api_key: 593 ↛ 594line 593 didn't jump to line 594 because the condition on line 593 was never true
594 logger.warning(
595 "OpenAI endpoint API key not found in settings. Falling back to default model."
596 )
597 return get_fallback_model(temperature)
599 # Get endpoint URL from settings
600 if openai_endpoint_url is None: 600 ↛ 606line 600 didn't jump to line 606 because the condition on line 600 was always true
601 openai_endpoint_url = get_setting_from_snapshot(
602 "llm.openai_endpoint.url",
603 "https://openrouter.ai/api/v1",
604 settings_snapshot=settings_snapshot,
605 )
606 openai_endpoint_url = normalize_url(openai_endpoint_url)
608 llm = ChatOpenAI(
609 model=model_name,
610 api_key=api_key,
611 openai_api_base=openai_endpoint_url,
612 **common_params,
613 )
614 return wrap_llm_without_think_tags(
615 llm,
616 research_id=research_id,
617 provider=provider,
618 research_context=research_context,
619 settings_snapshot=settings_snapshot,
620 )
622 elif provider == "vllm":
623 if not is_vllm_available():
624 logger.warning(
625 "VLLM dependencies are not available. Falling back to default model."
626 )
627 return get_fallback_model(temperature)
629 try:
630 from langchain_community.llms import VLLM
632 llm = VLLM(
633 model=model_name,
634 trust_remote_code=True,
635 max_new_tokens=128,
636 top_k=10,
637 top_p=0.95,
638 temperature=temperature,
639 )
640 return wrap_llm_without_think_tags(
641 llm,
642 research_id=research_id,
643 provider=provider,
644 research_context=research_context,
645 )
646 except Exception:
647 logger.exception("Error loading VLLM model")
648 return get_fallback_model(temperature)
650 elif provider == "ollama":
651 try:
652 # Use the configurable Ollama base URL
653 raw_base_url = get_setting_from_snapshot(
654 "llm.ollama.url",
655 "http://localhost:11434",
656 settings_snapshot=settings_snapshot,
657 )
658 base_url = (
659 normalize_url(raw_base_url)
660 if raw_base_url
661 else "http://localhost:11434"
662 )
664 # Check if Ollama is available before trying to use it
665 if not is_ollama_available(settings_snapshot=settings_snapshot):
666 logger.error(
667 f"Ollama not available at {base_url}. Falling back to dummy model."
668 )
669 return get_fallback_model(temperature)
671 # Check if the requested model exists
672 try:
673 logger.info(
674 f"Checking if model '{model_name}' exists in Ollama"
675 )
676 response = safe_get(
677 f"{base_url}/api/tags",
678 timeout=3.0,
679 allow_localhost=True,
680 allow_private_ips=True,
681 )
682 if response.status_code == 200: 682 ↛ 710line 682 didn't jump to line 710 because the condition on line 682 was always true
683 # Handle both newer and older Ollama API formats
684 data = response.json()
685 models = []
686 if "models" in data: 686 ↛ 691line 686 didn't jump to line 691 because the condition on line 686 was always true
687 # Newer Ollama API
688 models = data.get("models", [])
689 else:
690 # Older Ollama API format
691 models = data
693 # Get list of model names
694 model_names = [m.get("name", "").lower() for m in models]
695 logger.info(
696 f"Available Ollama models: {', '.join(model_names[:5])}{' and more' if len(model_names) > 5 else ''}"
697 )
699 if model_name.lower() not in model_names:
700 logger.error(
701 f"Model '{model_name}' not found in Ollama. Available models: {', '.join(model_names[:5])}"
702 )
703 return get_fallback_model(temperature)
704 except Exception:
705 logger.exception(
706 f"Error checking for model '{model_name}' in Ollama"
707 )
708 # Continue anyway, let ChatOllama handle potential errors
710 logger.info(
711 f"Creating ChatOllama with model={model_name}, base_url={base_url}"
712 )
713 try:
714 # Add num_ctx parameter for Ollama context window size
715 ollama_params = {**common_params}
716 if context_window_size is not None: 716 ↛ 733line 716 didn't jump to line 733 because the condition on line 716 was always true
717 ollama_params["num_ctx"] = context_window_size
719 # Thinking/reasoning handling for models like deepseek-r1:
720 # The 'reasoning' parameter controls both:
721 # 1. Whether the model performs thinking (makes it smarter when True)
722 # 2. Whether thinking is separated from the answer (always separated when True)
723 #
724 # When reasoning=True:
725 # - Model performs thinking/reasoning
726 # - Thinking goes to additional_kwargs["reasoning_content"] (discarded by LDR)
727 # - Only the final answer appears in response.content
728 #
729 # When reasoning=False:
730 # - Model does NOT perform thinking (faster but less smart)
731 # - Gives direct answers
733 enable_thinking = get_setting_from_snapshot(
734 "llm.ollama.enable_thinking",
735 True, # Default: enable thinking (smarter responses)
736 settings_snapshot=settings_snapshot,
737 )
739 if enable_thinking is not None and isinstance( 739 ↛ 748line 739 didn't jump to line 748 because the condition on line 739 was always true
740 enable_thinking, bool
741 ):
742 ollama_params["reasoning"] = enable_thinking
743 logger.debug(
744 f"Ollama thinking enabled: {enable_thinking} "
745 f"(thinking will be {'shown internally but discarded' if enable_thinking else 'disabled'})"
746 )
748 llm = ChatOllama(
749 model=model_name, base_url=base_url, **ollama_params
750 )
752 # Log the actual client configuration after creation
753 logger.debug(
754 f"ChatOllama created - base_url attribute: {getattr(llm, 'base_url', 'not found')}"
755 )
756 if hasattr(llm, "_client"): 756 ↛ 769line 756 didn't jump to line 769 because the condition on line 756 was always true
757 client = llm._client
758 logger.debug(f"ChatOllama _client type: {type(client)}")
759 if hasattr(client, "_client"): 759 ↛ 769line 759 didn't jump to line 769 because the condition on line 759 was always true
760 inner_client = client._client
761 logger.debug(
762 f"ChatOllama inner client type: {type(inner_client)}"
763 )
764 if hasattr(inner_client, "base_url"): 764 ↛ 769line 764 didn't jump to line 769 because the condition on line 764 was always true
765 logger.debug(
766 f"ChatOllama inner client base_url: {inner_client.base_url}"
767 )
769 return wrap_llm_without_think_tags(
770 llm,
771 research_id=research_id,
772 provider=provider,
773 research_context=research_context,
774 settings_snapshot=settings_snapshot,
775 )
776 except Exception:
777 logger.exception("Error creating or testing ChatOllama")
778 return get_fallback_model(temperature)
779 except Exception:
780 logger.exception("Error in Ollama provider section")
781 return get_fallback_model(temperature)
783 elif provider == "lmstudio":
784 # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly
785 lmstudio_url = get_setting_from_snapshot(
786 "llm.lmstudio.url",
787 "http://localhost:1234/v1",
788 settings_snapshot=settings_snapshot,
789 )
790 # Use URL as-is (default already includes /v1)
791 base_url = normalize_url(lmstudio_url)
793 llm = ChatOpenAI(
794 model=model_name,
795 api_key="lm-studio", # LM Studio doesn't require a real API key # pragma: allowlist secret
796 base_url=base_url,
797 temperature=temperature,
798 max_tokens=max_tokens, # Use calculated max_tokens based on context size
799 )
800 return wrap_llm_without_think_tags(
801 llm,
802 research_id=research_id,
803 provider=provider,
804 research_context=research_context,
805 settings_snapshot=settings_snapshot,
806 )
808 # Update the llamacpp section in get_llm function
809 elif provider == "llamacpp":
810 # Import LlamaCpp
811 from langchain_community.llms import LlamaCpp
813 # Note: For llama.cpp server connections, use 'openai_endpoint' provider
814 # with the server's /v1 URL (e.g., 'http://localhost:8000/v1')
816 # Get LlamaCpp model path from settings
817 model_path = get_setting_from_snapshot(
818 "llm.llamacpp_model_path", settings_snapshot=settings_snapshot
819 )
820 if not model_path:
821 logger.error("llamacpp_model_path not set in settings")
822 raise ValueError(
823 "LlamaCpp model path not configured. Either:\n"
824 "1. Set 'llm.llamacpp_model_path' to your .gguf file path, or\n"
825 "2. For llama.cpp server connections, use 'openai_endpoint' provider "
826 "with the server's /v1 endpoint (e.g., 'http://localhost:8000/v1')"
827 )
829 # Validate model path for security FIRST using centralized validator
830 # This MUST happen before any filesystem operations on user input
831 from ..security.path_validator import PathValidator
832 from .paths import get_models_directory
834 try:
835 validated_path = PathValidator.validate_model_path(model_path)
836 except ValueError as e:
837 error_msg = str(e)
838 # If the path is not a file, try to provide helpful directory listing
839 # Only do this after path has passed security validation (safe_join check)
840 if "not a file" in error_msg:
841 try:
842 model_root = str(get_models_directory())
843 safe_path = PathValidator.validate_safe_path(
844 model_path, model_root, allow_absolute=False
845 )
846 if safe_path and safe_path.is_dir():
847 gguf_files = list(safe_path.glob("*.gguf"))
848 if gguf_files:
849 files_list = ", ".join(
850 f.name for f in gguf_files[:5]
851 )
852 if len(gguf_files) > 5:
853 files_list += (
854 f" (and {len(gguf_files) - 5} more)"
855 )
856 suggestion = f"Found .gguf files: {files_list}"
857 else:
858 suggestion = (
859 "No .gguf files found in this directory"
860 )
861 raise ValueError(
862 f"Model path is a directory, not a file: {model_path}\n"
863 f"Please specify the full path to a .gguf model file.\n"
864 f"{suggestion}"
865 ) from e
866 except ValueError:
867 pass # Re-raise original error if secondary validation fails
868 logger.exception("Model path validation failed")
869 raise
871 model_path = str(validated_path)
873 # Validate file extension - LlamaCpp requires .gguf or .bin files
874 # Safe to use validated_path here since it passed security validation
875 if validated_path.suffix.lower() not in (".gguf", ".bin"):
876 raise ValueError(
877 f"Invalid model file extension: {validated_path.suffix}\n"
878 f"LlamaCpp requires .gguf or .bin model files.\n"
879 f"File: {validated_path.name}"
880 )
882 # Get additional LlamaCpp parameters
883 n_gpu_layers = get_setting_from_snapshot(
884 "llm.llamacpp_n_gpu_layers",
885 1,
886 settings_snapshot=settings_snapshot,
887 )
888 n_batch = get_setting_from_snapshot(
889 "llm.llamacpp_n_batch", 512, settings_snapshot=settings_snapshot
890 )
891 f16_kv = get_setting_from_snapshot(
892 "llm.llamacpp_f16_kv", True, settings_snapshot=settings_snapshot
893 )
895 # Create LlamaCpp instance
896 llm = LlamaCpp(
897 model_path=model_path,
898 temperature=temperature,
899 max_tokens=max_tokens, # Use calculated max_tokens
900 n_gpu_layers=n_gpu_layers,
901 n_batch=n_batch,
902 f16_kv=f16_kv,
903 n_ctx=context_window_size, # Set context window size directly (None = use default)
904 verbose=True,
905 )
907 return wrap_llm_without_think_tags(
908 llm,
909 research_id=research_id,
910 provider=provider,
911 research_context=research_context,
912 settings_snapshot=settings_snapshot,
913 )
915 else:
916 return wrap_llm_without_think_tags(
917 get_fallback_model(temperature),
918 research_id=research_id,
919 provider=provider,
920 research_context=research_context,
921 settings_snapshot=settings_snapshot,
922 )
925def get_fallback_model(temperature=None):
926 """Create a dummy model for when no providers are available"""
927 return FakeListChatModel(
928 responses=[
929 "No language models are available. Please install Ollama or set up API keys."
930 ]
931 )
934def wrap_llm_without_think_tags(
935 llm,
936 research_id=None,
937 provider=None,
938 research_context=None,
939 settings_snapshot=None,
940):
941 """Create a wrapper class that processes LLM outputs with remove_think_tags and token counting"""
943 # First apply rate limiting if enabled
944 from ..web_search_engines.rate_limiting.llm import (
945 create_rate_limited_llm_wrapper,
946 )
948 # Check if LLM rate limiting is enabled (independent of search rate limiting)
949 # Use the thread-safe get_db_setting defined in this module
950 if get_setting_from_snapshot(
951 "rate_limiting.llm_enabled", False, settings_snapshot=settings_snapshot
952 ):
953 llm = create_rate_limited_llm_wrapper(llm, provider)
955 # Set context_limit in research_context for overflow detection.
956 # This is needed for providers that go through the registered provider path
957 # (which returns before the code in get_llm that sets context_limit).
958 if research_context is not None and provider is not None:
959 if "context_limit" not in research_context: 959 ↛ 970line 959 didn't jump to line 970 because the condition on line 959 was always true
960 context_limit = _get_context_window_for_provider(
961 provider, settings_snapshot
962 )
963 if context_limit is not None: 963 ↛ 964line 963 didn't jump to line 964 because the condition on line 963 was never true
964 research_context["context_limit"] = context_limit
965 logger.info(
966 f"Set context_limit={context_limit} in wrap_llm for provider={provider}"
967 )
969 # Import token counting functionality if research_id is provided
970 callbacks = []
971 if research_id is not None: 971 ↛ 972line 971 didn't jump to line 972 because the condition on line 971 was never true
972 from ..metrics import TokenCounter
974 token_counter = TokenCounter()
975 token_callback = token_counter.create_callback(
976 research_id, research_context
977 )
978 # Set provider and model info on the callback
979 if provider:
980 token_callback.preset_provider = provider
981 # Try to extract model name from the LLM instance
982 if hasattr(llm, "model_name"):
983 token_callback.preset_model = llm.model_name
984 elif hasattr(llm, "model"):
985 token_callback.preset_model = llm.model
986 callbacks.append(token_callback)
988 # Add callbacks to the LLM if it supports them
989 if callbacks and hasattr(llm, "callbacks"): 989 ↛ 990line 989 didn't jump to line 990 because the condition on line 989 was never true
990 if llm.callbacks is None:
991 llm.callbacks = callbacks
992 else:
993 llm.callbacks.extend(callbacks)
995 class ProcessingLLMWrapper:
996 def __init__(self, base_llm):
997 self.base_llm = base_llm
999 def invoke(self, *args, **kwargs):
1000 # Removed verbose debug logging to reduce log clutter
1001 # Uncomment the lines below if you need to debug LLM requests
1002 # if hasattr(self.base_llm, "base_url"):
1003 # logger.debug(f"LLM Request - Base URL: {self.base_llm.base_url}")
1004 # logger.debug(f"LLM Request - Model: {getattr(self.base_llm, 'model', 'unknown')}")
1006 try:
1007 response = self.base_llm.invoke(*args, **kwargs)
1008 # logger.debug(f"LLM Response - Success, type: {type(response)}")
1009 except Exception as e:
1010 logger.exception("LLM Request - Failed with error")
1011 # Log any URL information from the error
1012 error_str = str(e)
1013 if "http://" in error_str or "https://" in error_str: 1013 ↛ 1014line 1013 didn't jump to line 1014 because the condition on line 1013 was never true
1014 logger.exception(
1015 f"LLM Request - Error contains URL info: {error_str}"
1016 )
1017 raise
1019 # Process the response content if it has a content attribute
1020 if hasattr(response, "content"):
1021 response.content = remove_think_tags(response.content)
1022 elif isinstance(response, str): 1022 ↛ 1025line 1022 didn't jump to line 1025 because the condition on line 1022 was always true
1023 response = remove_think_tags(response)
1025 return response
1027 # Pass through any other attributes to the base LLM
1028 def __getattr__(self, name):
1029 return getattr(self.base_llm, name)
1031 return ProcessingLLMWrapper(llm)