Coverage for src/local_deep_research/config/llm

1import os

2from functools import cache

4from langchain_anthropic import ChatAnthropic

5from langchain_core.language_models import BaseChatModel, FakeListChatModel

6from langchain_ollama import ChatOllama

7from langchain_openai import ChatOpenAI

8from loguru import logger

10from ..llm import get_llm_from_registry, is_llm_registered

11from ..utilities.search_utilities import remove_think_tags

12from ..utilities.url_utils import normalize_url

13from ..security import safe_get

15# Import providers module to trigger auto-discovery

16try:

17 from ..llm.providers import discover_providers # noqa: F401

18 # Auto-discovery happens on module import

19except ImportError:

20 pass # Providers module not available yet

21from .thread_settings import (

22 get_llm_setting_from_snapshot as get_setting_from_snapshot,

23 NoSettingsContextError,

24)

26# Valid provider options

27VALID_PROVIDERS = [

28 "ollama",

29 "openai",

30 "anthropic",

31 "google",

32 "openrouter",

33 "vllm",

34 "openai_endpoint",

35 "lmstudio",

36 "llamacpp",

37 "none",

38]

41def is_openai_available(settings_snapshot=None):

42 """Check if OpenAI is available by delegating to the provider class."""

43 try:

44 from ..llm.providers.implementations.openai import OpenAIProvider

46 return OpenAIProvider.is_available(settings_snapshot)

47 except ImportError:

48 return False

49 except Exception:

50 return False

53def is_anthropic_available(settings_snapshot=None):

54 """Check if Anthropic is available by delegating to the provider class."""

55 try:

56 from ..llm.providers.implementations.anthropic import AnthropicProvider

58 return AnthropicProvider.is_available(settings_snapshot)

59 except ImportError:

60 return False

61 except Exception:

62 return False

65def is_openai_endpoint_available(settings_snapshot=None):

66 """Check if OpenAI endpoint is available by delegating to the provider class."""

67 try:

68 from ..llm.providers.implementations.custom_openai_endpoint import (

69 CustomOpenAIEndpointProvider,

70 )

72 return CustomOpenAIEndpointProvider.is_available(settings_snapshot)

73 except ImportError:

74 return False

75 except Exception:

76 return False

79def is_ollama_available(settings_snapshot=None):

80 """Check if Ollama is running by delegating to the provider class."""

81 try:

82 from ..llm.providers.implementations.ollama import OllamaProvider

84 return OllamaProvider.is_available(settings_snapshot)

85 except ImportError:

86 return False

87 except Exception:

88 return False

91def is_vllm_available():

92 """Check if VLLM capability is available"""

93 try:

94 import torch # noqa: F401

95 import transformers # noqa: F401

97 # Only try to import VLLM if the dependencies are available

98 # The VLLM class itself might not fail to import, but using it will fail

99 # without the proper dependencies

100 import vllm # noqa: F401

101 from langchain_community.llms import VLLM # noqa: F401

102

103 return True

104 except ImportError:

105 return False

106

107

108def is_lmstudio_available(settings_snapshot=None):

109 """Check if LM Studio is available by delegating to the provider class."""

110 try:

111 from ..llm.providers.implementations.lmstudio import LMStudioProvider

112

113 return LMStudioProvider.is_available(settings_snapshot)

114 except ImportError:

115 return False

116 except Exception:

117 return False

118

119

120def is_llamacpp_available(settings_snapshot=None):

121 """Check if LlamaCpp is available and properly configured.

122

123 Checks that the library is installed and a model path is configured.

124 For llama.cpp server connections, use 'openai_endpoint' provider instead.

125 """

126 try:

127 # Import check

128 from langchain_community.llms import LlamaCpp # noqa: F401

129

130 # Check if model path is configured and looks valid

131 # Note: For llama.cpp server connections, use 'openai_endpoint' provider instead

132 model_path_str = get_setting_from_snapshot(

133 "llm.llamacpp_model_path",

134 default=None,

135 settings_snapshot=settings_snapshot,

136 )

137

138 # If no path configured, LlamaCpp is not available

139 if not model_path_str:

140 return False

141

142 # Path is configured, actual validation happens when model loads

143 return True

144

145 except ImportError:

146 # LlamaCpp library not installed

147 return False

148

149 except Exception:

150 return False

151

152

153def is_google_available(settings_snapshot=None):

154 """Check if Google/Gemini is available"""

155 try:

156 from ..llm.providers.google import GoogleProvider

157

158 return GoogleProvider.is_available(settings_snapshot)

159 except ImportError:

160 return False

161 except Exception:

162 return False

163

164

165def is_openrouter_available(settings_snapshot=None):

166 """Check if OpenRouter is available"""

167 try:

168 from ..llm.providers.openrouter import OpenRouterProvider

169

170 return OpenRouterProvider.is_available(settings_snapshot)

171 except ImportError:

172 return False

173 except Exception:

174 return False

175

176

177@cache

178def get_available_providers(settings_snapshot=None):

179 """Return available model providers"""

180 providers = {}

181

182 if is_ollama_available(settings_snapshot):

183 providers["ollama"] = "Ollama (local models)"

184

185 if is_openai_available(settings_snapshot): 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true

186 providers["openai"] = "OpenAI API"

187

188 if is_anthropic_available(settings_snapshot): 188 ↛ 189line 188 didn't jump to line 189 because the condition on line 188 was never true

189 providers["anthropic"] = "Anthropic API"

190

191 if is_google_available(settings_snapshot): 191 ↛ 192line 191 didn't jump to line 192 because the condition on line 191 was never true

192 providers["google"] = "Google Gemini API"

193

194 if is_openrouter_available(settings_snapshot): 194 ↛ 195line 194 didn't jump to line 195 because the condition on line 194 was never true

195 providers["openrouter"] = "OpenRouter API"

196

197 if is_openai_endpoint_available(settings_snapshot): 197 ↛ 198line 197 didn't jump to line 198 because the condition on line 197 was never true

198 providers["openai_endpoint"] = "OpenAI-compatible Endpoint"

199

200 if is_lmstudio_available(settings_snapshot): 200 ↛ 201line 200 didn't jump to line 201 because the condition on line 200 was never true

201 providers["lmstudio"] = "LM Studio (local models)"

202

203 if is_llamacpp_available(settings_snapshot): 203 ↛ 204line 203 didn't jump to line 204 because the condition on line 203 was never true

204 providers["llamacpp"] = "LlamaCpp (local models)"

205

206 # Check for VLLM capability

207 if is_vllm_available(): 207 ↛ 208line 207 didn't jump to line 208 because the condition on line 207 was never true

208 providers["vllm"] = "VLLM (local models)"

209

210 # Default fallback

211 if not providers:

212 providers["none"] = "No model providers available"

213

214 return providers

215

216

217def get_selected_llm_provider(settings_snapshot=None):

218 return get_setting_from_snapshot(

219 "llm.provider", "ollama", settings_snapshot=settings_snapshot

220 ).lower()

221

222

223def _get_context_window_for_provider(provider_type, settings_snapshot=None):

224 """Get context window size from settings based on provider type.

225

226 Local providers (ollama, llamacpp, lmstudio) use a smaller default to prevent

227 memory issues. Cloud providers check if unrestricted mode is enabled.

228

229 Returns:

230 int or None: The context window size, or None for unrestricted cloud providers.

231 """

232 if provider_type in ["ollama", "llamacpp", "lmstudio"]:

233 # Local providers: use smaller default to prevent memory issues

234 window_size = get_setting_from_snapshot(

235 "llm.local_context_window_size",

236 4096,

237 settings_snapshot=settings_snapshot,

238 )

239 # Ensure it's an integer

240 return int(window_size) if window_size is not None else 4096

241 else:

242 # Cloud providers: check if unrestricted mode is enabled

243 use_unrestricted = get_setting_from_snapshot(

244 "llm.context_window_unrestricted",

245 True,

246 settings_snapshot=settings_snapshot,

247 )

248 if use_unrestricted: 248 ↛ 253line 248 didn't jump to line 253 because the condition on line 248 was always true

249 # Let cloud providers auto-handle context (return None or very large value)

250 return None # Will be handled per provider

251 else:

252 # Use user-specified limit

253 window_size = get_setting_from_snapshot(

254 "llm.context_window_size",

255 128000,

256 settings_snapshot=settings_snapshot,

257 )

258 return int(window_size) if window_size is not None else 128000

259

260

261def get_llm(

262 model_name=None,

263 temperature=None,

264 provider=None,

265 openai_endpoint_url=None,

266 research_id=None,

267 research_context=None,

268 settings_snapshot=None,

269):

270 """

271 Get LLM instance based on model name and provider.

272

273 Args:

274 model_name: Name of the model to use (if None, uses database setting)

275 temperature: Model temperature (if None, uses database setting)

276 provider: Provider to use (if None, uses database setting)

277 openai_endpoint_url: Custom endpoint URL to use (if None, uses database

278 setting)

279 research_id: Optional research ID for token tracking

280 research_context: Optional research context for enhanced token tracking

281

282 Returns:

283 A LangChain LLM instance with automatic think-tag removal

284 """

285

286 # Use database values for parameters if not provided

287 if model_name is None:

288 model_name = get_setting_from_snapshot(

289 "llm.model", "gemma:latest", settings_snapshot=settings_snapshot

290 )

291 if temperature is None:

292 temperature = get_setting_from_snapshot(

293 "llm.temperature", 0.7, settings_snapshot=settings_snapshot

294 )

295 if provider is None:

296 provider = get_setting_from_snapshot(

297 "llm.provider", "ollama", settings_snapshot=settings_snapshot

298 )

299

300 # Clean model name: remove quotes and extra whitespace

301 if model_name:

302 model_name = model_name.strip().strip("\"'").strip()

303

304 # Clean provider: remove quotes and extra whitespace

305 if provider: 305 ↛ 309line 305 didn't jump to line 309 because the condition on line 305 was always true

306 provider = provider.strip().strip("\"'").strip()

307

308 # Normalize provider: convert to lowercase

309 provider = provider.lower() if provider else None

310

311 # Check if this is a registered custom LLM first

312 if provider and is_llm_registered(provider):

313 logger.info(f"Using registered custom LLM: {provider}")

314 custom_llm = get_llm_from_registry(provider)

315

316 # Check if it's a callable (factory function) or a BaseChatModel instance

317 if callable(custom_llm) and not isinstance(custom_llm, BaseChatModel):

318 # It's a callable (factory function), call it with parameters

319 try:

320 llm_instance = custom_llm(

321 model_name=model_name,

322 temperature=temperature,

323 settings_snapshot=settings_snapshot,

324 )

325 except TypeError as e:

326 # Re-raise TypeError with better message

327 raise TypeError(

328 f"Registered LLM factory '{provider}' has invalid signature. "

329 f"Factory functions must accept 'model_name', 'temperature', and 'settings_snapshot' parameters. "

330 f"Error: {e}"

331 )

332

333 # Validate the result is a BaseChatModel

334 if not isinstance(llm_instance, BaseChatModel):

335 raise ValueError(

336 f"Factory function for {provider} must return a BaseChatModel instance, "

337 f"got {type(llm_instance).__name__}"

338 )

339 elif isinstance(custom_llm, BaseChatModel): 339 ↛ 343line 339 didn't jump to line 343 because the condition on line 339 was always true

340 # It's already a proper LLM instance, use it directly

341 llm_instance = custom_llm

342 else:

343 raise ValueError(

344 f"Registered LLM {provider} must be either a BaseChatModel instance "

345 f"or a callable factory function. Got: {type(custom_llm).__name__}"

346 )

347

348 return wrap_llm_without_think_tags(

349 llm_instance,

350 research_id=research_id,

351 provider=provider,

352 research_context=research_context,

353 settings_snapshot=settings_snapshot,

354 )

355

356 # Check if we're in testing mode and should use fallback (but only when no API keys are configured)

357 # Skip fallback check if we're in test mode with mocks

358 if os.environ.get("LDR_USE_FALLBACK_LLM", "") and not os.environ.get( 358 ↛ 362line 358 didn't jump to line 362 because the condition on line 358 was never true

359 "LDR_TESTING_WITH_MOCKS", ""

360 ):

361 # Only use fallback if the provider has no valid configuration

362 provider_has_config = False

363

364 if (

365 (

366 provider == "openai"

367 and get_setting_from_snapshot(

368 "llm.openai.api_key",

369 default=None,

370 settings_snapshot=settings_snapshot,

371 )

372 )

373 or (

374 provider == "anthropic"

375 and get_setting_from_snapshot(

376 "llm.anthropic.api_key",

377 default=None,

378 settings_snapshot=settings_snapshot,

379 )

380 )

381 or (

382 provider == "openai_endpoint"

383 and get_setting_from_snapshot(

384 "llm.openai_endpoint.api_key",

385 settings_snapshot=settings_snapshot,

386 )

387 )

388 or (

389 provider == "ollama"

390 and is_ollama_available(settings_snapshot=settings_snapshot)

391 )

392 ):

393 provider_has_config = True

394 elif provider in ["vllm", "lmstudio", "llamacpp"]:

395 # These are local providers, check their availability

396 if (

397 (provider == "vllm" and is_vllm_available())

398 or (

399 provider == "lmstudio"

400 and is_lmstudio_available(

401 settings_snapshot=settings_snapshot

402 )

403 )

404 or (

405 provider == "llamacpp"

406 and is_llamacpp_available(

407 settings_snapshot=settings_snapshot

408 )

409 )

410 ):

411 provider_has_config = True

412

413 if not provider_has_config:

414 logger.info(

415 "LDR_USE_FALLBACK_LLM is set and no valid provider config found, using fallback model"

416 )

417 return wrap_llm_without_think_tags(

418 get_fallback_model(temperature),

419 research_id=research_id,

420 provider="fallback",

421 research_context=research_context,

422 settings_snapshot=settings_snapshot,

423 )

424

425 # Validate provider

426 if provider not in VALID_PROVIDERS:

427 logger.error(f"Invalid provider in settings: {provider}")

428 raise ValueError(

429 f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}"

430 )

431 logger.info(

432 f"Getting LLM with model: {model_name}, temperature: {temperature}, provider: {provider}"

433 )

434

435 # Common parameters for all models

436 common_params = {

437 "temperature": temperature,

438 }

439

440 context_window_size = _get_context_window_for_provider(

441 provider, settings_snapshot

442 )

443

444 # Add context limit to research context for overflow detection

445 if research_context and context_window_size: 445 ↛ 446line 445 didn't jump to line 446 because the condition on line 445 was never true

446 research_context["context_limit"] = context_window_size

447 logger.info(

448 f"Set context_limit={context_window_size} in research_context"

449 )

450 else:

451 logger.debug(

452 f"Context limit not set: research_context={bool(research_context)}, context_window_size={context_window_size}"

453 )

454

455 if get_setting_from_snapshot(

456 "llm.supports_max_tokens", True, settings_snapshot=settings_snapshot

457 ):

458 # Use 80% of context window to leave room for prompts

459 if context_window_size is not None:

460 max_tokens = min(

461 int(

462 get_setting_from_snapshot(

463 "llm.max_tokens",

464 100000,

465 settings_snapshot=settings_snapshot,

466 )

467 ),

468 int(context_window_size * 0.8),

469 )

470 common_params["max_tokens"] = max_tokens

471 else:

472 # Unrestricted context: use provider's default max_tokens

473 max_tokens = int(

474 get_setting_from_snapshot(

475 "llm.max_tokens",

476 100000,

477 settings_snapshot=settings_snapshot,

478 )

479 )

480 common_params["max_tokens"] = max_tokens

481

482 # Handle different providers

483 if provider == "anthropic":

484 api_key = get_setting_from_snapshot(

485 "llm.anthropic.api_key", settings_snapshot=settings_snapshot

486 )

487

488 if not api_key:

489 logger.warning(

490 "Anthropic API key not found in settings. Falling back to default model."

491 )

492 return get_fallback_model(temperature)

493

494 llm = ChatAnthropic(

495 model=model_name, anthropic_api_key=api_key, **common_params

496 )

497 return wrap_llm_without_think_tags(

498 llm,

499 research_id=research_id,

500 provider=provider,

501 research_context=research_context,

502 settings_snapshot=settings_snapshot,

503 )

504

505 elif provider == "openai":

506 api_key = get_setting_from_snapshot(

507 "llm.openai.api_key", settings_snapshot=settings_snapshot

508 )

509

510 if not api_key:

511 logger.warning(

512 "OpenAI API key not found in settings. Falling back to default model."

513 )

514 return get_fallback_model(temperature)

515

516 # Build OpenAI-specific parameters

517 openai_params = {

518 "model": model_name,

519 "api_key": api_key,

520 **common_params,

521 }

522

523 # Add optional parameters if they exist in settings

524 try:

525 api_base = get_setting_from_snapshot(

526 "llm.openai.api_base",

527 default=None,

528 settings_snapshot=settings_snapshot,

529 )

530 if api_base:

531 openai_params["openai_api_base"] = api_base

532 except NoSettingsContextError:

533 pass # Optional parameter

534

535 try:

536 organization = get_setting_from_snapshot(

537 "llm.openai.organization",

538 default=None,

539 settings_snapshot=settings_snapshot,

540 )

541 if organization:

542 openai_params["openai_organization"] = organization

543 except NoSettingsContextError:

544 pass # Optional parameter

545

546 try:

547 streaming = get_setting_from_snapshot(

548 "llm.streaming",

549 default=None,

550 settings_snapshot=settings_snapshot,

551 )

552 except NoSettingsContextError:

553 streaming = None # Optional parameter

554 if streaming is not None: 554 ↛ 555line 554 didn't jump to line 555 because the condition on line 554 was never true

555 openai_params["streaming"] = streaming

556

557 try:

558 max_retries = get_setting_from_snapshot(

559 "llm.max_retries",

560 default=None,

561 settings_snapshot=settings_snapshot,

562 )

563 if max_retries is not None: 563 ↛ 564line 563 didn't jump to line 564 because the condition on line 563 was never true

564 openai_params["max_retries"] = max_retries

565 except NoSettingsContextError:

566 pass # Optional parameter

567

568 try:

569 request_timeout = get_setting_from_snapshot(

570 "llm.request_timeout",

571 default=None,

572 settings_snapshot=settings_snapshot,

573 )

574 if request_timeout is not None: 574 ↛ 575line 574 didn't jump to line 575 because the condition on line 574 was never true

575 openai_params["request_timeout"] = request_timeout

576 except NoSettingsContextError:

577 pass # Optional parameter

578

579 llm = ChatOpenAI(**openai_params)

580 return wrap_llm_without_think_tags(

581 llm,

582 research_id=research_id,

583 provider=provider,

584 research_context=research_context,

585 settings_snapshot=settings_snapshot,

586 )

587

588 elif provider == "openai_endpoint":

589 api_key = get_setting_from_snapshot(

590 "llm.openai_endpoint.api_key", settings_snapshot=settings_snapshot

591 )

592

593 if not api_key: 593 ↛ 594line 593 didn't jump to line 594 because the condition on line 593 was never true

594 logger.warning(

595 "OpenAI endpoint API key not found in settings. Falling back to default model."

596 )

597 return get_fallback_model(temperature)

598

599 # Get endpoint URL from settings

600 if openai_endpoint_url is None: 600 ↛ 606line 600 didn't jump to line 606 because the condition on line 600 was always true

601 openai_endpoint_url = get_setting_from_snapshot(

602 "llm.openai_endpoint.url",

603 "https://openrouter.ai/api/v1",

604 settings_snapshot=settings_snapshot,

605 )

606 openai_endpoint_url = normalize_url(openai_endpoint_url)

607

608 llm = ChatOpenAI(

609 model=model_name,

610 api_key=api_key,

611 openai_api_base=openai_endpoint_url,

612 **common_params,

613 )

614 return wrap_llm_without_think_tags(

615 llm,

616 research_id=research_id,

617 provider=provider,

618 research_context=research_context,

619 settings_snapshot=settings_snapshot,

620 )

621

622 elif provider == "vllm":

623 if not is_vllm_available():

624 logger.warning(

625 "VLLM dependencies are not available. Falling back to default model."

626 )

627 return get_fallback_model(temperature)

628

629 try:

630 from langchain_community.llms import VLLM

631

632 llm = VLLM(

633 model=model_name,

634 trust_remote_code=True,

635 max_new_tokens=128,

636 top_k=10,

637 top_p=0.95,

638 temperature=temperature,

639 )

640 return wrap_llm_without_think_tags(

641 llm,

642 research_id=research_id,

643 provider=provider,

644 research_context=research_context,

645 )

646 except Exception:

647 logger.exception("Error loading VLLM model")

648 return get_fallback_model(temperature)

649

650 elif provider == "ollama":

651 try:

652 # Use the configurable Ollama base URL

653 raw_base_url = get_setting_from_snapshot(

654 "llm.ollama.url",

655 "http://localhost:11434",

656 settings_snapshot=settings_snapshot,

657 )

658 base_url = (

659 normalize_url(raw_base_url)

660 if raw_base_url

661 else "http://localhost:11434"

662 )

663

664 # Check if Ollama is available before trying to use it

665 if not is_ollama_available(settings_snapshot=settings_snapshot):

666 logger.error(

667 f"Ollama not available at {base_url}. Falling back to dummy model."

668 )

669 return get_fallback_model(temperature)

670

671 # Check if the requested model exists

672 try:

673 logger.info(

674 f"Checking if model '{model_name}' exists in Ollama"

675 )

676 response = safe_get(

677 f"{base_url}/api/tags",

678 timeout=3.0,

679 allow_localhost=True,

680 allow_private_ips=True,

681 )

682 if response.status_code == 200: 682 ↛ 710line 682 didn't jump to line 710 because the condition on line 682 was always true

683 # Handle both newer and older Ollama API formats

684 data = response.json()

685 models = []

686 if "models" in data: 686 ↛ 691line 686 didn't jump to line 691 because the condition on line 686 was always true

687 # Newer Ollama API

688 models = data.get("models", [])

689 else:

690 # Older Ollama API format

691 models = data

692

693 # Get list of model names

694 model_names = [m.get("name", "").lower() for m in models]

695 logger.info(

696 f"Available Ollama models: {', '.join(model_names[:5])}{' and more' if len(model_names) > 5 else ''}"

697 )

698

699 if model_name.lower() not in model_names:

700 logger.error(

701 f"Model '{model_name}' not found in Ollama. Available models: {', '.join(model_names[:5])}"

702 )

703 return get_fallback_model(temperature)

704 except Exception:

705 logger.exception(

706 f"Error checking for model '{model_name}' in Ollama"

707 )

708 # Continue anyway, let ChatOllama handle potential errors

709

710 logger.info(

711 f"Creating ChatOllama with model={model_name}, base_url={base_url}"

712 )

713 try:

714 # Add num_ctx parameter for Ollama context window size

715 ollama_params = {**common_params}

716 if context_window_size is not None: 716 ↛ 733line 716 didn't jump to line 733 because the condition on line 716 was always true

717 ollama_params["num_ctx"] = context_window_size

718

719 # Thinking/reasoning handling for models like deepseek-r1:

720 # The 'reasoning' parameter controls both:

721 # 1. Whether the model performs thinking (makes it smarter when True)

722 # 2. Whether thinking is separated from the answer (always separated when True)

723 #

724 # When reasoning=True:

725 # - Model performs thinking/reasoning

726 # - Thinking goes to additional_kwargs["reasoning_content"] (discarded by LDR)

727 # - Only the final answer appears in response.content

728 #

729 # When reasoning=False:

730 # - Model does NOT perform thinking (faster but less smart)

731 # - Gives direct answers

732

733 enable_thinking = get_setting_from_snapshot(

734 "llm.ollama.enable_thinking",

735 True, # Default: enable thinking (smarter responses)

736 settings_snapshot=settings_snapshot,

737 )

738

739 if enable_thinking is not None and isinstance( 739 ↛ 748line 739 didn't jump to line 748 because the condition on line 739 was always true

740 enable_thinking, bool

741 ):

742 ollama_params["reasoning"] = enable_thinking

743 logger.debug(

744 f"Ollama thinking enabled: {enable_thinking} "

745 f"(thinking will be {'shown internally but discarded' if enable_thinking else 'disabled'})"

746 )

747

748 llm = ChatOllama(

749 model=model_name, base_url=base_url, **ollama_params

750 )

751

752 # Log the actual client configuration after creation

753 logger.debug(

754 f"ChatOllama created - base_url attribute: {getattr(llm, 'base_url', 'not found')}"

755 )

756 if hasattr(llm, "_client"): 756 ↛ 769line 756 didn't jump to line 769 because the condition on line 756 was always true

757 client = llm._client

758 logger.debug(f"ChatOllama _client type: {type(client)}")

759 if hasattr(client, "_client"): 759 ↛ 769line 759 didn't jump to line 769 because the condition on line 759 was always true

760 inner_client = client._client

761 logger.debug(

762 f"ChatOllama inner client type: {type(inner_client)}"

763 )

764 if hasattr(inner_client, "base_url"): 764 ↛ 769line 764 didn't jump to line 769 because the condition on line 764 was always true

765 logger.debug(

766 f"ChatOllama inner client base_url: {inner_client.base_url}"

767 )

768

769 return wrap_llm_without_think_tags(

770 llm,

771 research_id=research_id,

772 provider=provider,

773 research_context=research_context,

774 settings_snapshot=settings_snapshot,

775 )

776 except Exception:

777 logger.exception("Error creating or testing ChatOllama")

778 return get_fallback_model(temperature)

779 except Exception:

780 logger.exception("Error in Ollama provider section")

781 return get_fallback_model(temperature)

782

783 elif provider == "lmstudio":

784 # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly

785 lmstudio_url = get_setting_from_snapshot(

786 "llm.lmstudio.url",

787 "http://localhost:1234/v1",

788 settings_snapshot=settings_snapshot,

789 )

790 # Use URL as-is (default already includes /v1)

791 base_url = normalize_url(lmstudio_url)

792

793 llm = ChatOpenAI(

794 model=model_name,

795 api_key="lm-studio", # LM Studio doesn't require a real API key # pragma: allowlist secret

796 base_url=base_url,

797 temperature=temperature,

798 max_tokens=max_tokens, # Use calculated max_tokens based on context size

799 )

800 return wrap_llm_without_think_tags(

801 llm,

802 research_id=research_id,

803 provider=provider,

804 research_context=research_context,

805 settings_snapshot=settings_snapshot,

806 )

807

808 # Update the llamacpp section in get_llm function

809 elif provider == "llamacpp":

810 # Import LlamaCpp

811 from langchain_community.llms import LlamaCpp

812

813 # Note: For llama.cpp server connections, use 'openai_endpoint' provider

814 # with the server's /v1 URL (e.g., 'http://localhost:8000/v1')

815

816 # Get LlamaCpp model path from settings

817 model_path = get_setting_from_snapshot(

818 "llm.llamacpp_model_path", settings_snapshot=settings_snapshot

819 )

820 if not model_path:

821 logger.error("llamacpp_model_path not set in settings")

822 raise ValueError(

823 "LlamaCpp model path not configured. Either:\n"

824 "1. Set 'llm.llamacpp_model_path' to your .gguf file path, or\n"

825 "2. For llama.cpp server connections, use 'openai_endpoint' provider "

826 "with the server's /v1 endpoint (e.g., 'http://localhost:8000/v1')"

827 )

828

829 # Validate model path for security FIRST using centralized validator

830 # This MUST happen before any filesystem operations on user input

831 from ..security.path_validator import PathValidator

832 from .paths import get_models_directory

833

834 try:

835 validated_path = PathValidator.validate_model_path(model_path)

836 except ValueError as e:

837 error_msg = str(e)

838 # If the path is not a file, try to provide helpful directory listing

839 # Only do this after path has passed security validation (safe_join check)

840 if "not a file" in error_msg:

841 try:

842 model_root = str(get_models_directory())

843 safe_path = PathValidator.validate_safe_path(

844 model_path, model_root, allow_absolute=False

845 )

846 if safe_path and safe_path.is_dir():

847 gguf_files = list(safe_path.glob("*.gguf"))

848 if gguf_files:

849 files_list = ", ".join(

850 f.name for f in gguf_files[:5]

851 )

852 if len(gguf_files) > 5:

853 files_list += (

854 f" (and {len(gguf_files) - 5} more)"

855 )

856 suggestion = f"Found .gguf files: {files_list}"

857 else:

858 suggestion = (

859 "No .gguf files found in this directory"

860 )

861 raise ValueError(

862 f"Model path is a directory, not a file: {model_path}\n"

863 f"Please specify the full path to a .gguf model file.\n"

864 f"{suggestion}"

865 ) from e

866 except ValueError:

867 pass # Re-raise original error if secondary validation fails

868 logger.exception("Model path validation failed")

869 raise

870

871 model_path = str(validated_path)

872

873 # Validate file extension - LlamaCpp requires .gguf or .bin files

874 # Safe to use validated_path here since it passed security validation

875 if validated_path.suffix.lower() not in (".gguf", ".bin"):

876 raise ValueError(

877 f"Invalid model file extension: {validated_path.suffix}\n"

878 f"LlamaCpp requires .gguf or .bin model files.\n"

879 f"File: {validated_path.name}"

880 )

881

882 # Get additional LlamaCpp parameters

883 n_gpu_layers = get_setting_from_snapshot(

884 "llm.llamacpp_n_gpu_layers",

885 1,

886 settings_snapshot=settings_snapshot,

887 )

888 n_batch = get_setting_from_snapshot(

889 "llm.llamacpp_n_batch", 512, settings_snapshot=settings_snapshot

890 )

891 f16_kv = get_setting_from_snapshot(

892 "llm.llamacpp_f16_kv", True, settings_snapshot=settings_snapshot

893 )

894

895 # Create LlamaCpp instance

896 llm = LlamaCpp(

897 model_path=model_path,

898 temperature=temperature,

899 max_tokens=max_tokens, # Use calculated max_tokens

900 n_gpu_layers=n_gpu_layers,

901 n_batch=n_batch,

902 f16_kv=f16_kv,

903 n_ctx=context_window_size, # Set context window size directly (None = use default)

904 verbose=True,

905 )

906

907 return wrap_llm_without_think_tags(

908 llm,

909 research_id=research_id,

910 provider=provider,

911 research_context=research_context,

912 settings_snapshot=settings_snapshot,

913 )

914

915 else:

916 return wrap_llm_without_think_tags(

917 get_fallback_model(temperature),

918 research_id=research_id,

919 provider=provider,

920 research_context=research_context,

921 settings_snapshot=settings_snapshot,

922 )

923

924

925def get_fallback_model(temperature=None):

926 """Create a dummy model for when no providers are available"""

927 return FakeListChatModel(

928 responses=[

929 "No language models are available. Please install Ollama or set up API keys."

930 ]

931 )

932

933

934def wrap_llm_without_think_tags(

935 llm,

936 research_id=None,

937 provider=None,

938 research_context=None,

939 settings_snapshot=None,

940):

941 """Create a wrapper class that processes LLM outputs with remove_think_tags and token counting"""

942

943 # First apply rate limiting if enabled

944 from ..web_search_engines.rate_limiting.llm import (

945 create_rate_limited_llm_wrapper,

946 )

947

948 # Check if LLM rate limiting is enabled (independent of search rate limiting)

949 # Use the thread-safe get_db_setting defined in this module

950 if get_setting_from_snapshot(

951 "rate_limiting.llm_enabled", False, settings_snapshot=settings_snapshot

952 ):

953 llm = create_rate_limited_llm_wrapper(llm, provider)

954

955 # Set context_limit in research_context for overflow detection.

956 # This is needed for providers that go through the registered provider path

957 # (which returns before the code in get_llm that sets context_limit).

958 if research_context is not None and provider is not None:

959 if "context_limit" not in research_context: 959 ↛ 970line 959 didn't jump to line 970 because the condition on line 959 was always true

960 context_limit = _get_context_window_for_provider(

961 provider, settings_snapshot

962 )

963 if context_limit is not None: 963 ↛ 964line 963 didn't jump to line 964 because the condition on line 963 was never true

964 research_context["context_limit"] = context_limit

965 logger.info(

966 f"Set context_limit={context_limit} in wrap_llm for provider={provider}"

967 )

968

969 # Import token counting functionality if research_id is provided

970 callbacks = []

971 if research_id is not None: 971 ↛ 972line 971 didn't jump to line 972 because the condition on line 971 was never true

972 from ..metrics import TokenCounter

973

974 token_counter = TokenCounter()

975 token_callback = token_counter.create_callback(

976 research_id, research_context

977 )

978 # Set provider and model info on the callback

979 if provider:

980 token_callback.preset_provider = provider

981 # Try to extract model name from the LLM instance

982 if hasattr(llm, "model_name"):

983 token_callback.preset_model = llm.model_name

984 elif hasattr(llm, "model"):

985 token_callback.preset_model = llm.model

986 callbacks.append(token_callback)

987

988 # Add callbacks to the LLM if it supports them

989 if callbacks and hasattr(llm, "callbacks"): 989 ↛ 990line 989 didn't jump to line 990 because the condition on line 989 was never true

990 if llm.callbacks is None:

991 llm.callbacks = callbacks

992 else:

993 llm.callbacks.extend(callbacks)

994

995 class ProcessingLLMWrapper:

996 def __init__(self, base_llm):

997 self.base_llm = base_llm

998

999 def invoke(self, *args, **kwargs):

1000 # Removed verbose debug logging to reduce log clutter

1001 # Uncomment the lines below if you need to debug LLM requests

1002 # if hasattr(self.base_llm, "base_url"):

1003 # logger.debug(f"LLM Request - Base URL: {self.base_llm.base_url}")

1004 # logger.debug(f"LLM Request - Model: {getattr(self.base_llm, 'model', 'unknown')}")

1005

1006 try:

1007 response = self.base_llm.invoke(*args, **kwargs)

1008 # logger.debug(f"LLM Response - Success, type: {type(response)}")

1009 except Exception as e:

1010 logger.exception("LLM Request - Failed with error")

1011 # Log any URL information from the error

1012 error_str = str(e)

1013 if "http://" in error_str or "https://" in error_str: 1013 ↛ 1014line 1013 didn't jump to line 1014 because the condition on line 1013 was never true

1014 logger.exception(

1015 f"LLM Request - Error contains URL info: {error_str}"

1016 )

1017 raise

1018

1019 # Process the response content if it has a content attribute

1020 if hasattr(response, "content"):

1021 response.content = remove_think_tags(response.content)

1022 elif isinstance(response, str): 1022 ↛ 1025line 1022 didn't jump to line 1025 because the condition on line 1022 was always true

1023 response = remove_think_tags(response)

1024

1025 return response

1026

1027 # Pass through any other attributes to the base LLM

1028 def __getattr__(self, name):

1029 return getattr(self.base_llm, name)

1030

1031 return ProcessingLLMWrapper(llm)

Coverage for src / local_deep_research / config / llm_config.py: 70%

386 statements