Coverage for src / local_deep_research / config / llm_config.py: 94%

351 statements  

« prev     ^ index     » next       coverage.py v7.13.5, created at 2026-04-14 23:55 +0000

1from functools import cache 

2 

3from langchain_anthropic import ChatAnthropic 

4from langchain_core.language_models import BaseChatModel 

5from langchain_ollama import ChatOllama 

6from langchain_openai import ChatOpenAI 

7from loguru import logger 

8 

9from ..llm import get_llm_from_registry, is_llm_registered 

10from ..utilities.search_utilities import remove_think_tags 

11from ..utilities.url_utils import normalize_url 

12from .constants import DEFAULT_OLLAMA_URL, DEFAULT_LMSTUDIO_URL 

13 

14# Import providers module to trigger auto-discovery 

15try: 

16 from ..llm.providers import discover_providers # noqa: F401 

17 # Auto-discovery happens on module import 

18except ImportError: 

19 logger.debug("Providers module not available yet") 

20from ..llm.providers.base import normalize_provider 

21from .thread_settings import ( 

22 get_setting_from_snapshot, 

23 NoSettingsContextError, 

24) 

25 

26# Valid provider options 

27VALID_PROVIDERS = [ 

28 "ollama", 

29 "openai", 

30 "anthropic", 

31 "google", 

32 "openrouter", 

33 "openai_endpoint", 

34 "lmstudio", 

35 "llamacpp", 

36 "none", 

37] 

38 

39 

40def is_openai_available(settings_snapshot=None): 

41 """Check if OpenAI is available by delegating to the provider class.""" 

42 try: 

43 from ..llm.providers.implementations.openai import OpenAIProvider 

44 

45 return OpenAIProvider.is_available(settings_snapshot) 

46 except ImportError: 

47 return False 

48 except Exception: 

49 logger.debug("Error checking OpenAI availability", exc_info=True) 

50 return False 

51 

52 

53def is_anthropic_available(settings_snapshot=None): 

54 """Check if Anthropic is available by delegating to the provider class.""" 

55 try: 

56 from ..llm.providers.implementations.anthropic import AnthropicProvider 

57 

58 return AnthropicProvider.is_available(settings_snapshot) 

59 except ImportError: 

60 return False 

61 except Exception: 

62 logger.debug("Error checking Anthropic availability", exc_info=True) 

63 return False 

64 

65 

66def is_openai_endpoint_available(settings_snapshot=None): 

67 """Check if OpenAI endpoint is available by delegating to the provider class.""" 

68 try: 

69 from ..llm.providers.implementations.custom_openai_endpoint import ( 

70 CustomOpenAIEndpointProvider, 

71 ) 

72 

73 return CustomOpenAIEndpointProvider.is_available(settings_snapshot) 

74 except ImportError: 

75 return False 

76 except Exception: 

77 logger.debug( 

78 "Error checking OpenAI endpoint availability", exc_info=True 

79 ) 

80 return False 

81 

82 

83def is_ollama_available(settings_snapshot=None): 

84 """Check if Ollama is running by delegating to the provider class.""" 

85 try: 

86 from ..llm.providers.implementations.ollama import OllamaProvider 

87 

88 return OllamaProvider.is_available(settings_snapshot) 

89 except ImportError: 

90 return False 

91 except Exception: 

92 logger.debug("Error checking Ollama availability", exc_info=True) 

93 return False 

94 

95 

96def is_lmstudio_available(settings_snapshot=None): 

97 """Check if LM Studio is available by delegating to the provider class.""" 

98 try: 

99 from ..llm.providers.implementations.lmstudio import LMStudioProvider 

100 

101 return LMStudioProvider.is_available(settings_snapshot) 

102 except ImportError: 

103 return False 

104 except Exception: 

105 logger.debug("Error checking LM Studio availability", exc_info=True) 

106 return False 

107 

108 

109def is_llamacpp_available(settings_snapshot=None): 

110 """Check if LlamaCpp is available and properly configured. 

111 

112 Checks that the library is installed and a model path is configured. 

113 For llama.cpp server connections, use 'openai_endpoint' provider instead. 

114 """ 

115 try: 

116 # Import check 

117 from langchain_community.llms import LlamaCpp # noqa: F401 

118 

119 # Check if model path is configured and looks valid 

120 # Note: For llama.cpp server connections, use 'openai_endpoint' provider instead 

121 model_path_str = get_setting_from_snapshot( 

122 "llm.llamacpp_model_path", 

123 default=None, 

124 settings_snapshot=settings_snapshot, 

125 ) 

126 

127 # If no path configured, LlamaCpp is not available 

128 if not model_path_str: 

129 return False 

130 

131 # Path is configured, actual validation happens when model loads 

132 return True 

133 

134 except ImportError: 

135 # LlamaCpp library not installed 

136 return False 

137 

138 except Exception: 

139 logger.debug("Error checking LlamaCpp availability", exc_info=True) 

140 return False 

141 

142 

143def is_google_available(settings_snapshot=None): 

144 """Check if Google/Gemini is available""" 

145 try: 

146 from ..llm.providers.implementations.google import GoogleProvider 

147 

148 return GoogleProvider.is_available(settings_snapshot) 

149 except ImportError: 

150 return False 

151 except Exception: 

152 logger.debug("Error checking Google availability", exc_info=True) 

153 return False 

154 

155 

156def is_openrouter_available(settings_snapshot=None): 

157 """Check if OpenRouter is available""" 

158 try: 

159 from ..llm.providers.implementations.openrouter import ( 

160 OpenRouterProvider, 

161 ) 

162 

163 return OpenRouterProvider.is_available(settings_snapshot) 

164 except ImportError: 

165 return False 

166 except Exception: 

167 logger.debug("Error checking OpenRouter availability", exc_info=True) 

168 return False 

169 

170 

171@cache 

172def get_available_providers(settings_snapshot=None): 

173 """Return available model providers""" 

174 providers = {} 

175 

176 if is_ollama_available(settings_snapshot): 

177 providers["ollama"] = "Ollama (local models)" 

178 

179 if is_openai_available(settings_snapshot): 

180 providers["openai"] = "OpenAI API" 

181 

182 if is_anthropic_available(settings_snapshot): 

183 providers["anthropic"] = "Anthropic API" 

184 

185 if is_google_available(settings_snapshot): 

186 providers["google"] = "Google Gemini API" 

187 

188 if is_openrouter_available(settings_snapshot): 

189 providers["openrouter"] = "OpenRouter API" 

190 

191 if is_openai_endpoint_available(settings_snapshot): 

192 providers["openai_endpoint"] = "OpenAI-Compatible Endpoint" 

193 

194 if is_lmstudio_available(settings_snapshot): 

195 providers["lmstudio"] = "LM Studio (local models)" 

196 

197 if is_llamacpp_available(settings_snapshot): 

198 providers["llamacpp"] = "LlamaCpp (local models)" 

199 

200 # Default fallback 

201 if not providers: 

202 providers["none"] = "No model providers available" 

203 

204 return providers 

205 

206 

207def get_selected_llm_provider(settings_snapshot=None): 

208 return normalize_provider( 

209 get_setting_from_snapshot( 

210 "llm.provider", "ollama", settings_snapshot=settings_snapshot 

211 ) 

212 ) 

213 

214 

215def _get_context_window_for_provider(provider_type, settings_snapshot=None): 

216 """Get context window size from settings based on provider type. 

217 

218 Local providers (ollama, llamacpp, lmstudio) use a smaller default to prevent 

219 memory issues. Cloud providers check if unrestricted mode is enabled. 

220 

221 Returns: 

222 int or None: The context window size, or None for unrestricted cloud providers. 

223 """ 

224 if provider_type in ["ollama", "llamacpp", "lmstudio"]: 

225 # Local providers: use smaller default to prevent memory issues 

226 window_size = get_setting_from_snapshot( 

227 "llm.local_context_window_size", 

228 8192, 

229 settings_snapshot=settings_snapshot, 

230 ) 

231 # Ensure it's an integer 

232 return int(window_size) if window_size is not None else 8192 

233 # Cloud providers: check if unrestricted mode is enabled 

234 use_unrestricted = get_setting_from_snapshot( 

235 "llm.context_window_unrestricted", 

236 True, 

237 settings_snapshot=settings_snapshot, 

238 ) 

239 if use_unrestricted: 

240 # Let cloud providers auto-handle context (return None or very large value) 

241 return None # Will be handled per provider 

242 # Use user-specified limit 

243 window_size = get_setting_from_snapshot( 

244 "llm.context_window_size", 

245 128000, 

246 settings_snapshot=settings_snapshot, 

247 ) 

248 return int(window_size) if window_size is not None else 128000 

249 

250 

251def get_llm( 

252 model_name=None, 

253 temperature=None, 

254 provider=None, 

255 openai_endpoint_url=None, 

256 research_id=None, 

257 research_context=None, 

258 settings_snapshot=None, 

259): 

260 """ 

261 Get LLM instance based on model name and provider. 

262 

263 Args: 

264 model_name: Name of the model to use (if None, uses database setting) 

265 temperature: Model temperature (if None, uses database setting) 

266 provider: Provider to use (if None, uses database setting) 

267 openai_endpoint_url: Custom endpoint URL to use (if None, uses database 

268 setting) 

269 research_id: Optional research ID for token tracking 

270 research_context: Optional research context for enhanced token tracking 

271 

272 Returns: 

273 A LangChain LLM instance with automatic think-tag removal 

274 """ 

275 

276 # Use database values for parameters if not provided 

277 if model_name is None: 

278 model_name = get_setting_from_snapshot( 

279 "llm.model", "gemma3:12b", settings_snapshot=settings_snapshot 

280 ) 

281 if temperature is None: 

282 temperature = get_setting_from_snapshot( 

283 "llm.temperature", 0.7, settings_snapshot=settings_snapshot 

284 ) 

285 if provider is None: 

286 provider = get_setting_from_snapshot( 

287 "llm.provider", "ollama", settings_snapshot=settings_snapshot 

288 ) 

289 

290 # Clean model name: remove quotes and extra whitespace 

291 if model_name: 

292 model_name = model_name.strip().strip("\"'").strip() 

293 

294 # Clean provider: remove quotes and extra whitespace 

295 if provider: 295 ↛ 299line 295 didn't jump to line 299 because the condition on line 295 was always true

296 provider = provider.strip().strip("\"'").strip() 

297 

298 # Normalize provider: convert to lowercase canonical form 

299 provider = normalize_provider(provider) 

300 

301 # Check if this is a registered custom LLM first 

302 if provider and is_llm_registered(provider): 

303 logger.info(f"Using registered custom LLM: {provider}") 

304 custom_llm = get_llm_from_registry(provider) 

305 

306 # Check if it's a callable (factory function) or a BaseChatModel instance 

307 if callable(custom_llm) and not isinstance(custom_llm, BaseChatModel): 

308 # It's a callable (factory function), call it with parameters 

309 try: 

310 llm_instance = custom_llm( 

311 model_name=model_name, 

312 temperature=temperature, 

313 settings_snapshot=settings_snapshot, 

314 ) 

315 except TypeError as e: 

316 # Re-raise TypeError with better message 

317 raise TypeError( 

318 f"Registered LLM factory '{provider}' has invalid signature. " 

319 f"Factory functions must accept 'model_name', 'temperature', and 'settings_snapshot' parameters. " 

320 f"Error: {e}" 

321 ) 

322 

323 # Validate the result is a BaseChatModel 

324 if not isinstance(llm_instance, BaseChatModel): 

325 raise ValueError( 

326 f"Factory function for {provider} must return a BaseChatModel instance, " 

327 f"got {type(llm_instance).__name__}" 

328 ) 

329 elif isinstance(custom_llm, BaseChatModel): 

330 # It's already a proper LLM instance, use it directly 

331 llm_instance = custom_llm 

332 else: 

333 raise ValueError( 

334 f"Registered LLM {provider} must be either a BaseChatModel instance " 

335 f"or a callable factory function. Got: {type(custom_llm).__name__}" 

336 ) 

337 

338 return wrap_llm_without_think_tags( 

339 llm_instance, 

340 research_id=research_id, 

341 provider=provider, 

342 research_context=research_context, 

343 settings_snapshot=settings_snapshot, 

344 ) 

345 

346 # Validate provider 

347 if provider not in VALID_PROVIDERS: 

348 logger.error(f"Invalid provider in settings: {provider}") 

349 raise ValueError( 

350 f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}" 

351 ) 

352 logger.info( 

353 f"Getting LLM with model: {model_name}, temperature: {temperature}, provider: {provider}" 

354 ) 

355 

356 # Common parameters for all models 

357 common_params = { 

358 "temperature": temperature, 

359 } 

360 

361 context_window_size = _get_context_window_for_provider( 

362 provider, settings_snapshot 

363 ) 

364 

365 # Add context limit to research context for overflow detection 

366 if research_context and context_window_size: 

367 research_context["context_limit"] = context_window_size 

368 logger.info( 

369 f"Set context_limit={context_window_size} in research_context" 

370 ) 

371 else: 

372 logger.debug( 

373 f"Context limit not set: research_context={bool(research_context)}, context_window_size={context_window_size}" 

374 ) 

375 

376 max_tokens = None 

377 if get_setting_from_snapshot( 

378 "llm.supports_max_tokens", True, settings_snapshot=settings_snapshot 

379 ): 

380 # Use 80% of context window to leave room for prompts 

381 if context_window_size is not None: 

382 max_tokens = min( 

383 int( 

384 get_setting_from_snapshot( 

385 "llm.max_tokens", 

386 100000, 

387 settings_snapshot=settings_snapshot, 

388 ) 

389 ), 

390 int(context_window_size * 0.8), 

391 ) 

392 common_params["max_tokens"] = max_tokens 

393 else: 

394 # Unrestricted context: use provider's default max_tokens 

395 max_tokens = int( 

396 get_setting_from_snapshot( 

397 "llm.max_tokens", 

398 100000, 

399 settings_snapshot=settings_snapshot, 

400 ) 

401 ) 

402 common_params["max_tokens"] = max_tokens 

403 

404 # Handle different providers 

405 if provider == "anthropic": 

406 api_key = get_setting_from_snapshot( 

407 "llm.anthropic.api_key", settings_snapshot=settings_snapshot 

408 ) 

409 

410 if not api_key: 

411 raise ValueError( 

412 "Anthropic API key not configured. Please set llm.anthropic.api_key in settings." 

413 ) 

414 

415 llm: BaseChatModel = ChatAnthropic( 

416 model=model_name, 

417 anthropic_api_key=api_key, 

418 **common_params, # type: ignore[call-arg] 

419 ) 

420 return wrap_llm_without_think_tags( 

421 llm, 

422 research_id=research_id, 

423 provider=provider, 

424 research_context=research_context, 

425 settings_snapshot=settings_snapshot, 

426 ) 

427 

428 if provider == "openai": 

429 api_key = get_setting_from_snapshot( 

430 "llm.openai.api_key", settings_snapshot=settings_snapshot 

431 ) 

432 

433 if not api_key: 

434 raise ValueError( 

435 "OpenAI API key not configured. Please set llm.openai.api_key in settings." 

436 ) 

437 

438 # Build OpenAI-specific parameters 

439 openai_params = { 

440 "model": model_name, 

441 "api_key": api_key, 

442 **common_params, 

443 } 

444 

445 # Add optional parameters if they exist in settings 

446 try: 

447 api_base = get_setting_from_snapshot( 

448 "llm.openai.api_base", 

449 default=None, 

450 settings_snapshot=settings_snapshot, 

451 ) 

452 if api_base: 

453 openai_params["openai_api_base"] = api_base 

454 except NoSettingsContextError: 

455 pass # Optional parameter 

456 

457 try: 

458 organization = get_setting_from_snapshot( 

459 "llm.openai.organization", 

460 default=None, 

461 settings_snapshot=settings_snapshot, 

462 ) 

463 if organization: 

464 openai_params["openai_organization"] = organization 

465 except NoSettingsContextError: 

466 pass # Optional parameter 

467 

468 try: 

469 streaming = get_setting_from_snapshot( 

470 "llm.streaming", 

471 default=None, 

472 settings_snapshot=settings_snapshot, 

473 ) 

474 except NoSettingsContextError: 

475 streaming = None # Optional parameter 

476 if streaming is not None: 476 ↛ 477line 476 didn't jump to line 477 because the condition on line 476 was never true

477 openai_params["streaming"] = streaming 

478 

479 try: 

480 max_retries = get_setting_from_snapshot( 

481 "llm.max_retries", 

482 default=None, 

483 settings_snapshot=settings_snapshot, 

484 ) 

485 if max_retries is not None: 485 ↛ 486line 485 didn't jump to line 486 because the condition on line 485 was never true

486 openai_params["max_retries"] = max_retries 

487 except NoSettingsContextError: 

488 pass # Optional parameter 

489 

490 try: 

491 request_timeout = get_setting_from_snapshot( 

492 "llm.request_timeout", 

493 default=None, 

494 settings_snapshot=settings_snapshot, 

495 ) 

496 if request_timeout is not None: 496 ↛ 497line 496 didn't jump to line 497 because the condition on line 496 was never true

497 openai_params["request_timeout"] = request_timeout 

498 except NoSettingsContextError: 

499 pass # Optional parameter 

500 

501 llm = ChatOpenAI(**openai_params) # type: ignore[assignment] 

502 return wrap_llm_without_think_tags( 

503 llm, 

504 research_id=research_id, 

505 provider=provider, 

506 research_context=research_context, 

507 settings_snapshot=settings_snapshot, 

508 ) 

509 

510 if provider == "openai_endpoint": 

511 api_key = get_setting_from_snapshot( 

512 "llm.openai_endpoint.api_key", settings_snapshot=settings_snapshot 

513 ) 

514 

515 # Local servers (e.g. llama.cpp) don't require an API key. 

516 # Use a placeholder so ChatOpenAI doesn't reject the request. 

517 if not api_key: 

518 logger.info( 

519 "No API key configured for openai_endpoint provider. " 

520 "Using placeholder key. If you are connecting to a hosted " 

521 "service, set llm.openai_endpoint.api_key in settings." 

522 ) 

523 api_key = "not-needed" # noqa: S105 # gitleaks:allow 

524 

525 # Get endpoint URL from settings 

526 if openai_endpoint_url is None: 526 ↛ 532line 526 didn't jump to line 532 because the condition on line 526 was always true

527 openai_endpoint_url = get_setting_from_snapshot( 

528 "llm.openai_endpoint.url", 

529 "https://openrouter.ai/api/v1", 

530 settings_snapshot=settings_snapshot, 

531 ) 

532 openai_endpoint_url = normalize_url(openai_endpoint_url) 

533 

534 llm = ChatOpenAI( # type: ignore[assignment, call-arg] 

535 model=model_name, 

536 api_key=api_key, 

537 openai_api_base=openai_endpoint_url, 

538 **common_params, 

539 ) 

540 return wrap_llm_without_think_tags( 

541 llm, 

542 research_id=research_id, 

543 provider=provider, 

544 research_context=research_context, 

545 settings_snapshot=settings_snapshot, 

546 ) 

547 

548 if provider == "ollama": 

549 try: 

550 # Use the configurable Ollama base URL 

551 raw_base_url = get_setting_from_snapshot( 

552 "llm.ollama.url", 

553 DEFAULT_OLLAMA_URL, 

554 settings_snapshot=settings_snapshot, 

555 ) 

556 base_url = ( 

557 normalize_url(raw_base_url) 

558 if raw_base_url 

559 else DEFAULT_OLLAMA_URL 

560 ) 

561 

562 logger.info( 

563 f"Creating ChatOllama with model={model_name}, base_url={base_url}" 

564 ) 

565 try: 

566 # Add num_ctx parameter for Ollama context window size 

567 ollama_params = {**common_params} 

568 if context_window_size is not None: 568 ↛ 585line 568 didn't jump to line 585 because the condition on line 568 was always true

569 ollama_params["num_ctx"] = context_window_size 

570 

571 # Thinking/reasoning handling for models like deepseek-r1: 

572 # The 'reasoning' parameter controls both: 

573 # 1. Whether the model performs thinking (makes it smarter when True) 

574 # 2. Whether thinking is separated from the answer (always separated when True) 

575 # 

576 # When reasoning=True: 

577 # - Model performs thinking/reasoning 

578 # - Thinking goes to additional_kwargs["reasoning_content"] (discarded by LDR) 

579 # - Only the final answer appears in response.content 

580 # 

581 # When reasoning=False: 

582 # - Model does NOT perform thinking (faster but less smart) 

583 # - Gives direct answers 

584 

585 enable_thinking = get_setting_from_snapshot( 

586 "llm.ollama.enable_thinking", 

587 True, # Default: enable thinking (smarter responses) 

588 settings_snapshot=settings_snapshot, 

589 ) 

590 

591 if enable_thinking is not None and isinstance( 591 ↛ 600line 591 didn't jump to line 600 because the condition on line 591 was always true

592 enable_thinking, bool 

593 ): 

594 ollama_params["reasoning"] = enable_thinking 

595 logger.debug( 

596 f"Ollama thinking enabled: {enable_thinking} " 

597 f"(thinking will be {'shown internally but discarded' if enable_thinking else 'disabled'})" 

598 ) 

599 

600 llm = ChatOllama( # type: ignore[assignment] 

601 model=model_name, base_url=base_url, **ollama_params 

602 ) 

603 

604 # Log the actual client configuration after creation 

605 logger.debug( 

606 f"ChatOllama created - base_url attribute: {getattr(llm, 'base_url', 'not found')}" 

607 ) 

608 if hasattr(llm, "_client"): 608 ↛ 621line 608 didn't jump to line 621 because the condition on line 608 was always true

609 client = llm._client 

610 logger.debug(f"ChatOllama _client type: {type(client)}") 

611 if hasattr(client, "_client"): 611 ↛ 621line 611 didn't jump to line 621 because the condition on line 611 was always true

612 inner_client = client._client 

613 logger.debug( 

614 f"ChatOllama inner client type: {type(inner_client)}" 

615 ) 

616 if hasattr(inner_client, "base_url"): 616 ↛ 621line 616 didn't jump to line 621 because the condition on line 616 was always true

617 logger.debug( 

618 f"ChatOllama inner client base_url: {inner_client.base_url}" 

619 ) 

620 

621 return wrap_llm_without_think_tags( 

622 llm, 

623 research_id=research_id, 

624 provider=provider, 

625 research_context=research_context, 

626 settings_snapshot=settings_snapshot, 

627 ) 

628 except Exception: 

629 logger.exception("Error creating or testing ChatOllama") 

630 raise 

631 except Exception: 

632 logger.exception("Error in Ollama provider section") 

633 raise 

634 

635 elif provider == "lmstudio": 

636 # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly 

637 lmstudio_url = get_setting_from_snapshot( 

638 "llm.lmstudio.url", 

639 DEFAULT_LMSTUDIO_URL, 

640 settings_snapshot=settings_snapshot, 

641 ) 

642 # Use URL as-is (default already includes /v1) 

643 base_url = normalize_url(lmstudio_url) 

644 

645 llm = ChatOpenAI( # type: ignore[assignment, call-arg, arg-type] 

646 model=model_name, 

647 api_key="lm-studio", # LM Studio doesn't require a real API key # pragma: allowlist secret 

648 base_url=base_url, 

649 temperature=temperature, 

650 max_tokens=max_tokens, # Use calculated max_tokens based on context size 

651 ) 

652 return wrap_llm_without_think_tags( 

653 llm, 

654 research_id=research_id, 

655 provider=provider, 

656 research_context=research_context, 

657 settings_snapshot=settings_snapshot, 

658 ) 

659 

660 # Update the llamacpp section in get_llm function 

661 elif provider == "llamacpp": 

662 # Import LlamaCpp 

663 from langchain_community.llms import LlamaCpp 

664 

665 # Note: For llama.cpp server connections, use 'openai_endpoint' provider 

666 # with the server's /v1 URL (e.g., 'http://localhost:8000/v1') 

667 

668 # Get LlamaCpp model path from settings 

669 model_path = get_setting_from_snapshot( 

670 "llm.llamacpp_model_path", settings_snapshot=settings_snapshot 

671 ) 

672 if not model_path: 

673 logger.error("llamacpp_model_path not set in settings") 

674 raise ValueError( 

675 "LlamaCpp model path not configured. Either:\n" 

676 "1. Set 'llm.llamacpp_model_path' to your .gguf file path, or\n" 

677 "2. For llama.cpp server connections, use 'openai_endpoint' provider " 

678 "with the server's /v1 endpoint (e.g., 'http://localhost:8000/v1')" 

679 ) 

680 

681 # Validate model path for security FIRST using centralized validator 

682 # This MUST happen before any filesystem operations on user input 

683 from ..security.path_validator import PathValidator 

684 from .paths import get_models_directory 

685 

686 try: 

687 validated_path = PathValidator.validate_model_path(model_path) 

688 except ValueError as e: 

689 error_msg = str(e) 

690 # If the path is not a file, try to provide helpful directory listing 

691 # Only do this after path has passed security validation (safe_join check) 

692 if "not a file" in error_msg: 692 ↛ 723line 692 didn't jump to line 723 because the condition on line 692 was always true

693 helpful_message = None 

694 try: 

695 model_root = str(get_models_directory()) 

696 safe_path = PathValidator.validate_safe_path( 

697 model_path, model_root, allow_absolute=False 

698 ) 

699 if safe_path and safe_path.is_dir(): 699 ↛ 721line 699 didn't jump to line 721 because the condition on line 699 was always true

700 gguf_files = list(safe_path.glob("*.gguf")) 

701 if gguf_files: 

702 files_list = ", ".join( 

703 f.name for f in gguf_files[:5] 

704 ) 

705 if len(gguf_files) > 5: 705 ↛ 706line 705 didn't jump to line 706 because the condition on line 705 was never true

706 files_list += ( 

707 f" (and {len(gguf_files) - 5} more)" 

708 ) 

709 suggestion = f"Found .gguf files: {files_list}" 

710 else: 

711 suggestion = ( 

712 "No .gguf files found in this directory" 

713 ) 

714 helpful_message = ( 

715 f"Model path is a directory, not a file: {model_path}\n" 

716 f"Please specify the full path to a .gguf model file.\n" 

717 f"{suggestion}" 

718 ) 

719 except ValueError: 

720 pass # Secondary validation failed, use original error 

721 if helpful_message: 721 ↛ 723line 721 didn't jump to line 723 because the condition on line 721 was always true

722 raise ValueError(helpful_message) from e 

723 logger.exception("Model path validation failed") 

724 raise 

725 

726 model_path = str(validated_path) 

727 

728 # Validate file extension - LlamaCpp requires .gguf or .bin files 

729 # Safe to use validated_path here since it passed security validation 

730 if validated_path.suffix.lower() not in (".gguf", ".bin"): 

731 raise ValueError( 

732 f"Invalid model file extension: {validated_path.suffix}\n" 

733 f"LlamaCpp requires .gguf or .bin model files.\n" 

734 f"File: {validated_path.name}" 

735 ) 

736 

737 # Get additional LlamaCpp parameters 

738 n_gpu_layers = get_setting_from_snapshot( 

739 "llm.llamacpp_n_gpu_layers", 

740 1, 

741 settings_snapshot=settings_snapshot, 

742 ) 

743 n_batch = get_setting_from_snapshot( 

744 "llm.llamacpp_n_batch", 512, settings_snapshot=settings_snapshot 

745 ) 

746 f16_kv = get_setting_from_snapshot( 

747 "llm.llamacpp_f16_kv", True, settings_snapshot=settings_snapshot 

748 ) 

749 

750 # Create LlamaCpp instance 

751 llm = LlamaCpp( 

752 model_path=model_path, 

753 temperature=temperature, 

754 max_tokens=max_tokens, # Use calculated max_tokens 

755 n_gpu_layers=n_gpu_layers, 

756 n_batch=n_batch, 

757 f16_kv=f16_kv, 

758 n_ctx=context_window_size, # Set context window size directly (None = use default) 

759 verbose=True, 

760 ) 

761 

762 return wrap_llm_without_think_tags( 

763 llm, 

764 research_id=research_id, 

765 provider=provider, 

766 research_context=research_context, 

767 settings_snapshot=settings_snapshot, 

768 ) 

769 

770 elif provider == "none": 770 ↛ 779line 770 didn't jump to line 779 because the condition on line 770 was always true

771 raise ValueError( 

772 "No LLM provider configured. Please set llm.provider in settings " 

773 "to a valid provider (e.g., 'ollama', 'openai', 'anthropic')." 

774 ) 

775 

776 else: 

777 # Provider validated above but not handled - this shouldn't happen 

778 # since VALID_PROVIDERS check above would catch unknown providers 

779 raise ValueError( 

780 f"Provider '{provider}' is valid but not implemented. " 

781 f"This is a bug - please report it." 

782 ) 

783 

784 

785def wrap_llm_without_think_tags( 

786 llm, 

787 research_id=None, 

788 provider=None, 

789 research_context=None, 

790 settings_snapshot=None, 

791): 

792 """Create a wrapper class that processes LLM outputs with remove_think_tags and token counting""" 

793 

794 # First apply rate limiting if enabled 

795 from ..web_search_engines.rate_limiting.llm import ( 

796 create_rate_limited_llm_wrapper, 

797 ) 

798 

799 # Check if LLM rate limiting is enabled (independent of search rate limiting) 

800 # Use the thread-safe get_db_setting defined in this module 

801 if get_setting_from_snapshot( 

802 "rate_limiting.llm_enabled", False, settings_snapshot=settings_snapshot 

803 ): 

804 llm = create_rate_limited_llm_wrapper(llm, provider) 

805 

806 # Set context_limit in research_context for overflow detection. 

807 # This is needed for providers that go through the registered provider path 

808 # (which returns before the code in get_llm that sets context_limit). 

809 if research_context is not None and provider is not None: 

810 if "context_limit" not in research_context: 

811 context_limit = _get_context_window_for_provider( 

812 provider, settings_snapshot 

813 ) 

814 if context_limit is not None: 

815 research_context["context_limit"] = context_limit 

816 logger.info( 

817 f"Set context_limit={context_limit} in wrap_llm for provider={provider}" 

818 ) 

819 

820 # Import token counting functionality if research_id is provided 

821 callbacks = [] 

822 if research_id is not None: 

823 from ..metrics import TokenCounter 

824 

825 token_counter = TokenCounter() 

826 token_callback = token_counter.create_callback( 

827 research_id, research_context 

828 ) 

829 # Set provider and model info on the callback 

830 if provider: 

831 token_callback.preset_provider = provider 

832 # Try to extract model name from the LLM instance 

833 if hasattr(llm, "model_name"): 

834 token_callback.preset_model = llm.model_name 

835 elif hasattr(llm, "model"): 

836 token_callback.preset_model = llm.model 

837 callbacks.append(token_callback) 

838 

839 # Add callbacks to the LLM if it supports them 

840 if callbacks and hasattr(llm, "callbacks"): 

841 if llm.callbacks is None: 

842 llm.callbacks = callbacks 

843 else: 

844 llm.callbacks.extend(callbacks) 

845 

846 class ProcessingLLMWrapper: 

847 def __init__(self, base_llm): 

848 self.base_llm = base_llm 

849 

850 def invoke(self, *args, **kwargs): 

851 # Removed verbose debug logging to reduce log clutter 

852 # Uncomment the lines below if you need to debug LLM requests 

853 try: 

854 response = self.base_llm.invoke(*args, **kwargs) 

855 except Exception as e: 

856 logger.exception("LLM Request - Failed with error") 

857 # Log any URL information from the error 

858 error_str = str(e) 

859 if "http://" in error_str or "https://" in error_str: 859 ↛ 860line 859 didn't jump to line 860 because the condition on line 859 was never true

860 logger.exception( 

861 f"LLM Request - Error contains URL info: {error_str}" 

862 ) 

863 raise 

864 

865 # Process the response content if it has a content attribute 

866 if hasattr(response, "content"): 

867 response.content = remove_think_tags(response.content) 

868 elif isinstance(response, str): 

869 response = remove_think_tags(response) 

870 

871 return response 

872 

873 # Pass through any other attributes to the base LLM 

874 def __getattr__(self, name): 

875 return getattr(self.base_llm, name) 

876 

877 def close(self): 

878 """Close underlying HTTP clients held by this LLM. Idempotent.""" 

879 try: 

880 from ..utilities.llm_utils import _close_base_llm 

881 

882 _close_base_llm(self.base_llm) 

883 except Exception: 

884 logger.debug( 

885 "best-effort cleanup of HTTP clients on shutdown", 

886 exc_info=True, 

887 ) 

888 

889 return ProcessingLLMWrapper(llm)