Coverage for src / local_deep_research / config / llm_config.py: 57%
377 statements
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
« prev ^ index » next coverage.py v7.12.0, created at 2026-01-11 00:51 +0000
1import os
2from functools import cache
4from langchain_anthropic import ChatAnthropic
5from langchain_core.language_models import BaseChatModel, FakeListChatModel
6from langchain_ollama import ChatOllama
7from langchain_openai import ChatOpenAI
8from loguru import logger
10from ..llm import get_llm_from_registry, is_llm_registered
11from ..utilities.search_utilities import remove_think_tags
12from ..utilities.url_utils import normalize_url
13from ..security import safe_get
15# Import providers module to trigger auto-discovery
16try:
17 from ..llm.providers import discover_providers # noqa: F401
18 # Auto-discovery happens on module import
19except ImportError:
20 pass # Providers module not available yet
21from .thread_settings import (
22 get_setting_from_snapshot as _get_setting_from_snapshot,
23 NoSettingsContextError,
24)
26# Valid provider options
27VALID_PROVIDERS = [
28 "ollama",
29 "openai",
30 "anthropic",
31 "google",
32 "openrouter",
33 "vllm",
34 "openai_endpoint",
35 "lmstudio",
36 "llamacpp",
37 "none",
38]
41def get_setting_from_snapshot(
42 key, default=None, username=None, settings_snapshot=None
43):
44 """Get setting from context only - no database access from threads.
46 This is a wrapper around the shared function that enables fallback LLM check.
47 """
48 return _get_setting_from_snapshot(
49 key, default, username, settings_snapshot, check_fallback_llm=True
50 )
53def is_openai_available(settings_snapshot=None):
54 """Check if OpenAI is available"""
55 try:
56 api_key = get_setting_from_snapshot(
57 "llm.openai.api_key",
58 default=None,
59 settings_snapshot=settings_snapshot,
60 )
61 return bool(api_key)
62 except Exception:
63 return False
66def is_anthropic_available(settings_snapshot=None):
67 """Check if Anthropic is available"""
68 try:
69 api_key = get_setting_from_snapshot(
70 "llm.anthropic.api_key",
71 default=None,
72 settings_snapshot=settings_snapshot,
73 )
74 return bool(api_key)
75 except Exception:
76 return False
79def is_openai_endpoint_available(settings_snapshot=None):
80 """Check if OpenAI endpoint is available"""
81 try:
82 api_key = get_setting_from_snapshot(
83 "llm.openai_endpoint.api_key",
84 default=None,
85 settings_snapshot=settings_snapshot,
86 )
87 return bool(api_key)
88 except Exception:
89 return False
92def is_ollama_available(settings_snapshot=None):
93 """Check if Ollama is running"""
94 try:
95 import requests
97 raw_base_url = get_setting_from_snapshot(
98 "llm.ollama.url",
99 "http://localhost:11434",
100 settings_snapshot=settings_snapshot,
101 )
102 base_url = (
103 normalize_url(raw_base_url)
104 if raw_base_url
105 else "http://localhost:11434"
106 )
107 logger.info(f"Checking Ollama availability at {base_url}/api/tags")
109 try:
110 response = safe_get(
111 f"{base_url}/api/tags",
112 timeout=3.0,
113 allow_localhost=True,
114 allow_private_ips=True,
115 )
116 if response.status_code == 200:
117 logger.info(
118 f"Ollama is available. Status code: {response.status_code}"
119 )
120 # Log first 100 chars of response to debug
121 logger.info(f"Response preview: {str(response.text)[:100]}")
122 return True
123 else:
124 logger.warning(
125 f"Ollama API returned status code: {response.status_code}"
126 )
127 return False
128 except requests.exceptions.RequestException as req_error:
129 logger.exception(
130 f"Request error when checking Ollama: {req_error!s}"
131 )
132 return False
133 except Exception:
134 logger.exception("Unexpected error when checking Ollama")
135 return False
136 except Exception:
137 logger.exception("Error in is_ollama_available")
138 return False
141def is_vllm_available():
142 """Check if VLLM capability is available"""
143 try:
144 import torch # noqa: F401
145 import transformers # noqa: F401
147 # Only try to import VLLM if the dependencies are available
148 # The VLLM class itself might not fail to import, but using it will fail
149 # without the proper dependencies
150 import vllm # noqa: F401
151 from langchain_community.llms import VLLM # noqa: F401
153 return True
154 except ImportError:
155 return False
158def is_lmstudio_available(settings_snapshot=None):
159 """Check if LM Studio is available"""
160 try:
161 lmstudio_url = get_setting_from_snapshot(
162 "llm.lmstudio.url",
163 "http://localhost:1234/v1",
164 settings_snapshot=settings_snapshot,
165 )
166 # Use URL as-is (default already includes /v1)
167 base_url = normalize_url(lmstudio_url)
168 # LM Studio typically uses OpenAI-compatible endpoints
169 response = safe_get(
170 f"{base_url}/models",
171 timeout=1.0,
172 allow_localhost=True,
173 allow_private_ips=True,
174 )
175 return response.status_code == 200
176 except Exception:
177 return False
180def is_llamacpp_available(settings_snapshot=None):
181 """Check if LlamaCpp is available and configured"""
182 try:
183 # Import check
184 from langchain_community.llms import LlamaCpp # noqa: F401
186 # Get the configured model path
187 model_path_str = get_setting_from_snapshot(
188 "llm.llamacpp_model_path",
189 default=None,
190 settings_snapshot=settings_snapshot,
191 )
193 # If no path configured, LlamaCpp is not available
194 if not model_path_str:
195 return False
197 # Security Note: Path validation is critical here
198 # CodeQL may flag filesystem operations with user input
199 # We validate paths are within allowed directories before any filesystem access
201 # For security, we simply check if a path is configured
202 # The actual path validation will happen when the model is loaded
203 # This avoids CodeQL alerts about filesystem access with user input
204 # The LlamaCpp library itself will validate the path when loading
205 return True
207 except ImportError:
208 # LlamaCpp library not installed
209 return False
211 except Exception:
212 return False
215def is_google_available(settings_snapshot=None):
216 """Check if Google/Gemini is available"""
217 try:
218 from ..llm.providers.google import GoogleProvider
220 return GoogleProvider.is_available(settings_snapshot)
221 except ImportError:
222 return False
223 except Exception:
224 return False
227def is_openrouter_available(settings_snapshot=None):
228 """Check if OpenRouter is available"""
229 try:
230 from ..llm.providers.openrouter import OpenRouterProvider
232 return OpenRouterProvider.is_available(settings_snapshot)
233 except ImportError:
234 return False
235 except Exception:
236 return False
239@cache
240def get_available_providers(settings_snapshot=None):
241 """Return available model providers"""
242 providers = {}
244 if is_ollama_available(settings_snapshot):
245 providers["ollama"] = "Ollama (local models)"
247 if is_openai_available(settings_snapshot): 247 ↛ 248line 247 didn't jump to line 248 because the condition on line 247 was never true
248 providers["openai"] = "OpenAI API"
250 if is_anthropic_available(settings_snapshot): 250 ↛ 251line 250 didn't jump to line 251 because the condition on line 250 was never true
251 providers["anthropic"] = "Anthropic API"
253 if is_google_available(settings_snapshot): 253 ↛ 254line 253 didn't jump to line 254 because the condition on line 253 was never true
254 providers["google"] = "Google Gemini API"
256 if is_openrouter_available(settings_snapshot): 256 ↛ 257line 256 didn't jump to line 257 because the condition on line 256 was never true
257 providers["openrouter"] = "OpenRouter API"
259 if is_openai_endpoint_available(settings_snapshot): 259 ↛ 260line 259 didn't jump to line 260 because the condition on line 259 was never true
260 providers["openai_endpoint"] = "OpenAI-compatible Endpoint"
262 if is_lmstudio_available(settings_snapshot): 262 ↛ 263line 262 didn't jump to line 263 because the condition on line 262 was never true
263 providers["lmstudio"] = "LM Studio (local models)"
265 if is_llamacpp_available(settings_snapshot): 265 ↛ 266line 265 didn't jump to line 266 because the condition on line 265 was never true
266 providers["llamacpp"] = "LlamaCpp (local models)"
268 # Check for VLLM capability
269 if is_vllm_available(): 269 ↛ 270line 269 didn't jump to line 270 because the condition on line 269 was never true
270 providers["vllm"] = "VLLM (local models)"
272 # Default fallback
273 if not providers:
274 providers["none"] = "No model providers available"
276 return providers
279def get_selected_llm_provider(settings_snapshot=None):
280 return get_setting_from_snapshot(
281 "llm.provider", "ollama", settings_snapshot=settings_snapshot
282 ).lower()
285def get_llm(
286 model_name=None,
287 temperature=None,
288 provider=None,
289 openai_endpoint_url=None,
290 research_id=None,
291 research_context=None,
292 settings_snapshot=None,
293):
294 """
295 Get LLM instance based on model name and provider.
297 Args:
298 model_name: Name of the model to use (if None, uses database setting)
299 temperature: Model temperature (if None, uses database setting)
300 provider: Provider to use (if None, uses database setting)
301 openai_endpoint_url: Custom endpoint URL to use (if None, uses database
302 setting)
303 research_id: Optional research ID for token tracking
304 research_context: Optional research context for enhanced token tracking
306 Returns:
307 A LangChain LLM instance with automatic think-tag removal
308 """
310 # Use database values for parameters if not provided
311 if model_name is None:
312 model_name = get_setting_from_snapshot(
313 "llm.model", "gemma:latest", settings_snapshot=settings_snapshot
314 )
315 if temperature is None:
316 temperature = get_setting_from_snapshot(
317 "llm.temperature", 0.7, settings_snapshot=settings_snapshot
318 )
319 if provider is None:
320 provider = get_setting_from_snapshot(
321 "llm.provider", "ollama", settings_snapshot=settings_snapshot
322 )
324 # Clean model name: remove quotes and extra whitespace
325 if model_name: 325 ↛ 329line 325 didn't jump to line 329 because the condition on line 325 was always true
326 model_name = model_name.strip().strip("\"'").strip()
328 # Clean provider: remove quotes and extra whitespace
329 if provider: 329 ↛ 333line 329 didn't jump to line 333 because the condition on line 329 was always true
330 provider = provider.strip().strip("\"'").strip()
332 # Normalize provider: convert to lowercase
333 provider = provider.lower() if provider else None
335 # Check if this is a registered custom LLM first
336 if provider and is_llm_registered(provider):
337 logger.info(f"Using registered custom LLM: {provider}")
338 custom_llm = get_llm_from_registry(provider)
340 # Check if it's a callable (factory function) or a BaseChatModel instance
341 if callable(custom_llm) and not isinstance(custom_llm, BaseChatModel):
342 # It's a callable (factory function), call it with parameters
343 try:
344 llm_instance = custom_llm(
345 model_name=model_name,
346 temperature=temperature,
347 settings_snapshot=settings_snapshot,
348 )
349 except TypeError as e:
350 # Re-raise TypeError with better message
351 raise TypeError(
352 f"Registered LLM factory '{provider}' has invalid signature. "
353 f"Factory functions must accept 'model_name', 'temperature', and 'settings_snapshot' parameters. "
354 f"Error: {e}"
355 )
357 # Validate the result is a BaseChatModel
358 if not isinstance(llm_instance, BaseChatModel):
359 raise ValueError(
360 f"Factory function for {provider} must return a BaseChatModel instance, "
361 f"got {type(llm_instance).__name__}"
362 )
363 elif isinstance(custom_llm, BaseChatModel): 363 ↛ 367line 363 didn't jump to line 367 because the condition on line 363 was always true
364 # It's already a proper LLM instance, use it directly
365 llm_instance = custom_llm
366 else:
367 raise ValueError(
368 f"Registered LLM {provider} must be either a BaseChatModel instance "
369 f"or a callable factory function. Got: {type(custom_llm).__name__}"
370 )
372 return wrap_llm_without_think_tags(
373 llm_instance,
374 research_id=research_id,
375 provider=provider,
376 research_context=research_context,
377 settings_snapshot=settings_snapshot,
378 )
380 # Check if we're in testing mode and should use fallback (but only when no API keys are configured)
381 # Skip fallback check if we're in test mode with mocks
382 if os.environ.get("LDR_USE_FALLBACK_LLM", "") and not os.environ.get( 382 ↛ 386line 382 didn't jump to line 386 because the condition on line 382 was never true
383 "LDR_TESTING_WITH_MOCKS", ""
384 ):
385 # Only use fallback if the provider has no valid configuration
386 provider_has_config = False
388 if (
389 (
390 provider == "openai"
391 and get_setting_from_snapshot(
392 "llm.openai.api_key",
393 default=None,
394 settings_snapshot=settings_snapshot,
395 )
396 )
397 or (
398 provider == "anthropic"
399 and get_setting_from_snapshot(
400 "llm.anthropic.api_key",
401 default=None,
402 settings_snapshot=settings_snapshot,
403 )
404 )
405 or (
406 provider == "openai_endpoint"
407 and get_setting_from_snapshot(
408 "llm.openai_endpoint.api_key",
409 settings_snapshot=settings_snapshot,
410 )
411 )
412 or (
413 provider == "ollama"
414 and is_ollama_available(settings_snapshot=settings_snapshot)
415 )
416 ):
417 provider_has_config = True
418 elif provider in ["vllm", "lmstudio", "llamacpp"]:
419 # These are local providers, check their availability
420 if (
421 (provider == "vllm" and is_vllm_available())
422 or (
423 provider == "lmstudio"
424 and is_lmstudio_available(
425 settings_snapshot=settings_snapshot
426 )
427 )
428 or (
429 provider == "llamacpp"
430 and is_llamacpp_available(
431 settings_snapshot=settings_snapshot
432 )
433 )
434 ):
435 provider_has_config = True
437 if not provider_has_config:
438 logger.info(
439 "LDR_USE_FALLBACK_LLM is set and no valid provider config found, using fallback model"
440 )
441 return wrap_llm_without_think_tags(
442 get_fallback_model(temperature),
443 research_id=research_id,
444 provider="fallback",
445 research_context=research_context,
446 settings_snapshot=settings_snapshot,
447 )
449 # Validate provider
450 if provider not in VALID_PROVIDERS:
451 logger.error(f"Invalid provider in settings: {provider}")
452 raise ValueError(
453 f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}"
454 )
455 logger.info(
456 f"Getting LLM with model: {model_name}, temperature: {temperature}, provider: {provider}"
457 )
459 # Common parameters for all models
460 common_params = {
461 "temperature": temperature,
462 }
464 # Get context window size from settings (use different defaults for local vs cloud providers)
465 def get_context_window_size(provider_type):
466 if provider_type in ["ollama", "llamacpp", "lmstudio"]:
467 # Local providers: use smaller default to prevent memory issues
468 window_size = get_setting_from_snapshot(
469 "llm.local_context_window_size",
470 4096,
471 settings_snapshot=settings_snapshot,
472 )
473 # Ensure it's an integer
474 return int(window_size) if window_size is not None else 4096
475 else:
476 # Cloud providers: check if unrestricted mode is enabled
477 use_unrestricted = get_setting_from_snapshot(
478 "llm.context_window_unrestricted",
479 True,
480 settings_snapshot=settings_snapshot,
481 )
482 if use_unrestricted: 482 ↛ 487line 482 didn't jump to line 487 because the condition on line 482 was always true
483 # Let cloud providers auto-handle context (return None or very large value)
484 return None # Will be handled per provider
485 else:
486 # Use user-specified limit
487 window_size = get_setting_from_snapshot(
488 "llm.context_window_size",
489 128000,
490 settings_snapshot=settings_snapshot,
491 )
492 return int(window_size) if window_size is not None else 128000
494 context_window_size = get_context_window_size(provider)
496 # Add context limit to research context for overflow detection
497 if research_context and context_window_size:
498 research_context["context_limit"] = context_window_size
499 logger.info(
500 f"Set context_limit={context_window_size} in research_context"
501 )
502 else:
503 logger.debug(
504 f"Context limit not set: research_context={bool(research_context)}, context_window_size={context_window_size}"
505 )
507 if get_setting_from_snapshot( 507 ↛ 535line 507 didn't jump to line 535 because the condition on line 507 was always true
508 "llm.supports_max_tokens", True, settings_snapshot=settings_snapshot
509 ):
510 # Use 80% of context window to leave room for prompts
511 if context_window_size is not None:
512 max_tokens = min(
513 int(
514 get_setting_from_snapshot(
515 "llm.max_tokens",
516 100000,
517 settings_snapshot=settings_snapshot,
518 )
519 ),
520 int(context_window_size * 0.8),
521 )
522 common_params["max_tokens"] = max_tokens
523 else:
524 # Unrestricted context: use provider's default max_tokens
525 max_tokens = int(
526 get_setting_from_snapshot(
527 "llm.max_tokens",
528 100000,
529 settings_snapshot=settings_snapshot,
530 )
531 )
532 common_params["max_tokens"] = max_tokens
534 # Handle different providers
535 if provider == "anthropic":
536 api_key = get_setting_from_snapshot(
537 "llm.anthropic.api_key", settings_snapshot=settings_snapshot
538 )
540 if not api_key:
541 logger.warning(
542 "Anthropic API key not found in settings. Falling back to default model."
543 )
544 return get_fallback_model(temperature)
546 llm = ChatAnthropic(
547 model=model_name, anthropic_api_key=api_key, **common_params
548 )
549 return wrap_llm_without_think_tags(
550 llm,
551 research_id=research_id,
552 provider=provider,
553 research_context=research_context,
554 settings_snapshot=settings_snapshot,
555 )
557 elif provider == "openai":
558 api_key = get_setting_from_snapshot(
559 "llm.openai.api_key", settings_snapshot=settings_snapshot
560 )
562 if not api_key:
563 logger.warning(
564 "OpenAI API key not found in settings. Falling back to default model."
565 )
566 return get_fallback_model(temperature)
568 # Build OpenAI-specific parameters
569 openai_params = {
570 "model": model_name,
571 "api_key": api_key,
572 **common_params,
573 }
575 # Add optional parameters if they exist in settings
576 try:
577 api_base = get_setting_from_snapshot(
578 "llm.openai.api_base",
579 default=None,
580 settings_snapshot=settings_snapshot,
581 )
582 if api_base: 582 ↛ 583line 582 didn't jump to line 583 because the condition on line 582 was never true
583 openai_params["openai_api_base"] = api_base
584 except NoSettingsContextError:
585 pass # Optional parameter
587 try:
588 organization = get_setting_from_snapshot(
589 "llm.openai.organization",
590 default=None,
591 settings_snapshot=settings_snapshot,
592 )
593 if organization: 593 ↛ 594line 593 didn't jump to line 594 because the condition on line 593 was never true
594 openai_params["openai_organization"] = organization
595 except NoSettingsContextError:
596 pass # Optional parameter
598 try:
599 streaming = get_setting_from_snapshot(
600 "llm.streaming",
601 default=None,
602 settings_snapshot=settings_snapshot,
603 )
604 except NoSettingsContextError:
605 streaming = None # Optional parameter
606 if streaming is not None: 606 ↛ 607line 606 didn't jump to line 607 because the condition on line 606 was never true
607 openai_params["streaming"] = streaming
609 try:
610 max_retries = get_setting_from_snapshot(
611 "llm.max_retries",
612 default=None,
613 settings_snapshot=settings_snapshot,
614 )
615 if max_retries is not None: 615 ↛ 616line 615 didn't jump to line 616 because the condition on line 615 was never true
616 openai_params["max_retries"] = max_retries
617 except NoSettingsContextError:
618 pass # Optional parameter
620 try:
621 request_timeout = get_setting_from_snapshot(
622 "llm.request_timeout",
623 default=None,
624 settings_snapshot=settings_snapshot,
625 )
626 if request_timeout is not None: 626 ↛ 627line 626 didn't jump to line 627 because the condition on line 626 was never true
627 openai_params["request_timeout"] = request_timeout
628 except NoSettingsContextError:
629 pass # Optional parameter
631 llm = ChatOpenAI(**openai_params)
632 return wrap_llm_without_think_tags(
633 llm,
634 research_id=research_id,
635 provider=provider,
636 research_context=research_context,
637 settings_snapshot=settings_snapshot,
638 )
640 elif provider == "openai_endpoint":
641 api_key = get_setting_from_snapshot(
642 "llm.openai_endpoint.api_key", settings_snapshot=settings_snapshot
643 )
645 if not api_key: 645 ↛ 646line 645 didn't jump to line 646 because the condition on line 645 was never true
646 logger.warning(
647 "OpenAI endpoint API key not found in settings. Falling back to default model."
648 )
649 return get_fallback_model(temperature)
651 # Get endpoint URL from settings
652 if openai_endpoint_url is None: 652 ↛ 658line 652 didn't jump to line 658 because the condition on line 652 was always true
653 openai_endpoint_url = get_setting_from_snapshot(
654 "llm.openai_endpoint.url",
655 "https://openrouter.ai/api/v1",
656 settings_snapshot=settings_snapshot,
657 )
658 openai_endpoint_url = normalize_url(openai_endpoint_url)
660 llm = ChatOpenAI(
661 model=model_name,
662 api_key=api_key,
663 openai_api_base=openai_endpoint_url,
664 **common_params,
665 )
666 return wrap_llm_without_think_tags(
667 llm,
668 research_id=research_id,
669 provider=provider,
670 research_context=research_context,
671 settings_snapshot=settings_snapshot,
672 )
674 elif provider == "vllm": 674 ↛ 675line 674 didn't jump to line 675 because the condition on line 674 was never true
675 if not is_vllm_available():
676 logger.warning(
677 "VLLM dependencies are not available. Falling back to default model."
678 )
679 return get_fallback_model(temperature)
681 try:
682 from langchain_community.llms import VLLM
684 llm = VLLM(
685 model=model_name,
686 trust_remote_code=True,
687 max_new_tokens=128,
688 top_k=10,
689 top_p=0.95,
690 temperature=temperature,
691 )
692 return wrap_llm_without_think_tags(
693 llm,
694 research_id=research_id,
695 provider=provider,
696 research_context=research_context,
697 )
698 except Exception:
699 logger.exception("Error loading VLLM model")
700 return get_fallback_model(temperature)
702 elif provider == "ollama":
703 try:
704 # Use the configurable Ollama base URL
705 raw_base_url = get_setting_from_snapshot(
706 "llm.ollama.url",
707 "http://localhost:11434",
708 settings_snapshot=settings_snapshot,
709 )
710 base_url = (
711 normalize_url(raw_base_url)
712 if raw_base_url
713 else "http://localhost:11434"
714 )
716 # Check if Ollama is available before trying to use it
717 if not is_ollama_available(settings_snapshot=settings_snapshot): 717 ↛ 724line 717 didn't jump to line 724 because the condition on line 717 was always true
718 logger.error(
719 f"Ollama not available at {base_url}. Falling back to dummy model."
720 )
721 return get_fallback_model(temperature)
723 # Check if the requested model exists
724 try:
725 logger.info(
726 f"Checking if model '{model_name}' exists in Ollama"
727 )
728 response = safe_get(
729 f"{base_url}/api/tags",
730 timeout=3.0,
731 allow_localhost=True,
732 allow_private_ips=True,
733 )
734 if response.status_code == 200:
735 # Handle both newer and older Ollama API formats
736 data = response.json()
737 models = []
738 if "models" in data:
739 # Newer Ollama API
740 models = data.get("models", [])
741 else:
742 # Older Ollama API format
743 models = data
745 # Get list of model names
746 model_names = [m.get("name", "").lower() for m in models]
747 logger.info(
748 f"Available Ollama models: {', '.join(model_names[:5])}{' and more' if len(model_names) > 5 else ''}"
749 )
751 if model_name.lower() not in model_names:
752 logger.error(
753 f"Model '{model_name}' not found in Ollama. Available models: {', '.join(model_names[:5])}"
754 )
755 return get_fallback_model(temperature)
756 except Exception:
757 logger.exception(
758 f"Error checking for model '{model_name}' in Ollama"
759 )
760 # Continue anyway, let ChatOllama handle potential errors
762 logger.info(
763 f"Creating ChatOllama with model={model_name}, base_url={base_url}"
764 )
765 try:
766 # Add num_ctx parameter for Ollama context window size
767 ollama_params = {**common_params}
768 if context_window_size is not None:
769 ollama_params["num_ctx"] = context_window_size
771 # Thinking/reasoning handling for models like deepseek-r1:
772 # The 'reasoning' parameter controls both:
773 # 1. Whether the model performs thinking (makes it smarter when True)
774 # 2. Whether thinking is separated from the answer (always separated when True)
775 #
776 # When reasoning=True:
777 # - Model performs thinking/reasoning
778 # - Thinking goes to additional_kwargs["reasoning_content"] (discarded by LDR)
779 # - Only the final answer appears in response.content
780 #
781 # When reasoning=False:
782 # - Model does NOT perform thinking (faster but less smart)
783 # - Gives direct answers
785 enable_thinking = get_setting_from_snapshot(
786 "llm.ollama.enable_thinking",
787 True, # Default: enable thinking (smarter responses)
788 settings_snapshot=settings_snapshot,
789 )
791 if enable_thinking is not None and isinstance(
792 enable_thinking, bool
793 ):
794 ollama_params["reasoning"] = enable_thinking
795 logger.debug(
796 f"Ollama thinking enabled: {enable_thinking} "
797 f"(thinking will be {'shown internally but discarded' if enable_thinking else 'disabled'})"
798 )
800 llm = ChatOllama(
801 model=model_name, base_url=base_url, **ollama_params
802 )
804 # Log the actual client configuration after creation
805 logger.debug(
806 f"ChatOllama created - base_url attribute: {getattr(llm, 'base_url', 'not found')}"
807 )
808 if hasattr(llm, "_client"):
809 client = llm._client
810 logger.debug(f"ChatOllama _client type: {type(client)}")
811 if hasattr(client, "_client"):
812 inner_client = client._client
813 logger.debug(
814 f"ChatOllama inner client type: {type(inner_client)}"
815 )
816 if hasattr(inner_client, "base_url"):
817 logger.debug(
818 f"ChatOllama inner client base_url: {inner_client.base_url}"
819 )
821 return wrap_llm_without_think_tags(
822 llm,
823 research_id=research_id,
824 provider=provider,
825 research_context=research_context,
826 settings_snapshot=settings_snapshot,
827 )
828 except Exception:
829 logger.exception("Error creating or testing ChatOllama")
830 return get_fallback_model(temperature)
831 except Exception:
832 logger.exception("Error in Ollama provider section")
833 return get_fallback_model(temperature)
835 elif provider == "lmstudio":
836 # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly
837 lmstudio_url = get_setting_from_snapshot(
838 "llm.lmstudio.url",
839 "http://localhost:1234/v1",
840 settings_snapshot=settings_snapshot,
841 )
842 # Use URL as-is (default already includes /v1)
843 base_url = normalize_url(lmstudio_url)
845 llm = ChatOpenAI(
846 model=model_name,
847 api_key="lm-studio", # LM Studio doesn't require a real API key # pragma: allowlist secret
848 base_url=base_url,
849 temperature=temperature,
850 max_tokens=max_tokens, # Use calculated max_tokens based on context size
851 )
852 return wrap_llm_without_think_tags(
853 llm,
854 research_id=research_id,
855 provider=provider,
856 research_context=research_context,
857 settings_snapshot=settings_snapshot,
858 )
860 # Update the llamacpp section in get_llm function
861 elif provider == "llamacpp": 861 ↛ 863line 861 didn't jump to line 863 because the condition on line 861 was never true
862 # Import LlamaCpp
863 from langchain_community.llms import LlamaCpp
865 # Get LlamaCpp connection mode from settings
866 connection_mode = get_setting_from_snapshot(
867 "llm.llamacpp_connection_mode",
868 "local",
869 settings_snapshot=settings_snapshot,
870 )
872 if connection_mode == "http":
873 # Use HTTP client mode
874 from langchain_community.llms.llamacpp_client import LlamaCppClient
876 server_url = get_setting_from_snapshot(
877 "llm.llamacpp_server_url",
878 "http://localhost:8000",
879 settings_snapshot=settings_snapshot,
880 )
882 llm = LlamaCppClient(
883 server_url=server_url,
884 temperature=temperature,
885 max_tokens=get_setting_from_snapshot(
886 "llm.max_tokens", 8192, settings_snapshot=settings_snapshot
887 ),
888 )
889 else:
890 # Use direct model loading (existing code)
891 # Get LlamaCpp model path from settings
892 model_path = get_setting_from_snapshot(
893 "llm.llamacpp_model_path", settings_snapshot=settings_snapshot
894 )
895 if not model_path:
896 logger.error("llamacpp_model_path not set in settings")
897 raise ValueError("llamacpp_model_path not set in settings")
899 # Validate model path for security using centralized validator
900 from ..security.path_validator import PathValidator
902 try:
903 validated_path = PathValidator.validate_model_path(model_path)
904 model_path = str(validated_path)
905 except ValueError:
906 logger.exception("Model path validation failed")
907 raise
909 # Get additional LlamaCpp parameters
910 n_gpu_layers = get_setting_from_snapshot(
911 "llm.llamacpp_n_gpu_layers",
912 1,
913 settings_snapshot=settings_snapshot,
914 )
915 n_batch = get_setting_from_snapshot(
916 "llm.llamacpp_n_batch", 512, settings_snapshot=settings_snapshot
917 )
918 f16_kv = get_setting_from_snapshot(
919 "llm.llamacpp_f16_kv", True, settings_snapshot=settings_snapshot
920 )
922 # Create LlamaCpp instance
923 llm = LlamaCpp(
924 model_path=model_path,
925 temperature=temperature,
926 max_tokens=max_tokens, # Use calculated max_tokens
927 n_gpu_layers=n_gpu_layers,
928 n_batch=n_batch,
929 f16_kv=f16_kv,
930 n_ctx=context_window_size, # Set context window size directly (None = use default)
931 verbose=True,
932 )
934 return wrap_llm_without_think_tags(
935 llm,
936 research_id=research_id,
937 provider=provider,
938 research_context=research_context,
939 settings_snapshot=settings_snapshot,
940 )
942 else:
943 return wrap_llm_without_think_tags(
944 get_fallback_model(temperature),
945 research_id=research_id,
946 provider=provider,
947 research_context=research_context,
948 settings_snapshot=settings_snapshot,
949 )
952def get_fallback_model(temperature=None):
953 """Create a dummy model for when no providers are available"""
954 return FakeListChatModel(
955 responses=[
956 "No language models are available. Please install Ollama or set up API keys."
957 ]
958 )
961def wrap_llm_without_think_tags(
962 llm,
963 research_id=None,
964 provider=None,
965 research_context=None,
966 settings_snapshot=None,
967):
968 """Create a wrapper class that processes LLM outputs with remove_think_tags and token counting"""
970 # First apply rate limiting if enabled
971 from ..web_search_engines.rate_limiting.llm import (
972 create_rate_limited_llm_wrapper,
973 )
975 # Check if LLM rate limiting is enabled (independent of search rate limiting)
976 # Use the thread-safe get_db_setting defined in this module
977 if get_setting_from_snapshot(
978 "rate_limiting.llm_enabled", False, settings_snapshot=settings_snapshot
979 ):
980 llm = create_rate_limited_llm_wrapper(llm, provider)
982 # Import token counting functionality if research_id is provided
983 callbacks = []
984 if research_id is not None: 984 ↛ 985line 984 didn't jump to line 985 because the condition on line 984 was never true
985 from ..metrics import TokenCounter
987 token_counter = TokenCounter()
988 token_callback = token_counter.create_callback(
989 research_id, research_context
990 )
991 # Set provider and model info on the callback
992 if provider:
993 token_callback.preset_provider = provider
994 # Try to extract model name from the LLM instance
995 if hasattr(llm, "model_name"):
996 token_callback.preset_model = llm.model_name
997 elif hasattr(llm, "model"):
998 token_callback.preset_model = llm.model
999 callbacks.append(token_callback)
1001 # Add callbacks to the LLM if it supports them
1002 if callbacks and hasattr(llm, "callbacks"): 1002 ↛ 1003line 1002 didn't jump to line 1003 because the condition on line 1002 was never true
1003 if llm.callbacks is None:
1004 llm.callbacks = callbacks
1005 else:
1006 llm.callbacks.extend(callbacks)
1008 class ProcessingLLMWrapper:
1009 def __init__(self, base_llm):
1010 self.base_llm = base_llm
1012 def invoke(self, *args, **kwargs):
1013 # Removed verbose debug logging to reduce log clutter
1014 # Uncomment the lines below if you need to debug LLM requests
1015 # if hasattr(self.base_llm, "base_url"):
1016 # logger.debug(f"LLM Request - Base URL: {self.base_llm.base_url}")
1017 # logger.debug(f"LLM Request - Model: {getattr(self.base_llm, 'model', 'unknown')}")
1019 try:
1020 response = self.base_llm.invoke(*args, **kwargs)
1021 # logger.debug(f"LLM Response - Success, type: {type(response)}")
1022 except Exception as e:
1023 logger.exception("LLM Request - Failed with error")
1024 # Log any URL information from the error
1025 error_str = str(e)
1026 if "http://" in error_str or "https://" in error_str: 1026 ↛ 1027line 1026 didn't jump to line 1027 because the condition on line 1026 was never true
1027 logger.exception(
1028 f"LLM Request - Error contains URL info: {error_str}"
1029 )
1030 raise
1032 # Process the response content if it has a content attribute
1033 if hasattr(response, "content"):
1034 response.content = remove_think_tags(response.content)
1035 elif isinstance(response, str): 1035 ↛ 1038line 1035 didn't jump to line 1038 because the condition on line 1035 was always true
1036 response = remove_think_tags(response)
1038 return response
1040 # Pass through any other attributes to the base LLM
1041 def __getattr__(self, name):
1042 return getattr(self.base_llm, name)
1044 return ProcessingLLMWrapper(llm)