Coverage for src / local_deep_research / config / llm_config.py: 70%

386 statements  

« prev     ^ index     » next       coverage.py v7.13.4, created at 2026-02-25 01:07 +0000

1import os 

2from functools import cache 

3 

4from langchain_anthropic import ChatAnthropic 

5from langchain_core.language_models import BaseChatModel, FakeListChatModel 

6from langchain_ollama import ChatOllama 

7from langchain_openai import ChatOpenAI 

8from loguru import logger 

9 

10from ..llm import get_llm_from_registry, is_llm_registered 

11from ..utilities.search_utilities import remove_think_tags 

12from ..utilities.url_utils import normalize_url 

13from ..security import safe_get 

14 

15# Import providers module to trigger auto-discovery 

16try: 

17 from ..llm.providers import discover_providers # noqa: F401 

18 # Auto-discovery happens on module import 

19except ImportError: 

20 pass # Providers module not available yet 

21from .thread_settings import ( 

22 get_llm_setting_from_snapshot as get_setting_from_snapshot, 

23 NoSettingsContextError, 

24) 

25 

26# Valid provider options 

27VALID_PROVIDERS = [ 

28 "ollama", 

29 "openai", 

30 "anthropic", 

31 "google", 

32 "openrouter", 

33 "vllm", 

34 "openai_endpoint", 

35 "lmstudio", 

36 "llamacpp", 

37 "none", 

38] 

39 

40 

41def is_openai_available(settings_snapshot=None): 

42 """Check if OpenAI is available by delegating to the provider class.""" 

43 try: 

44 from ..llm.providers.implementations.openai import OpenAIProvider 

45 

46 return OpenAIProvider.is_available(settings_snapshot) 

47 except ImportError: 

48 return False 

49 except Exception: 

50 return False 

51 

52 

53def is_anthropic_available(settings_snapshot=None): 

54 """Check if Anthropic is available by delegating to the provider class.""" 

55 try: 

56 from ..llm.providers.implementations.anthropic import AnthropicProvider 

57 

58 return AnthropicProvider.is_available(settings_snapshot) 

59 except ImportError: 

60 return False 

61 except Exception: 

62 return False 

63 

64 

65def is_openai_endpoint_available(settings_snapshot=None): 

66 """Check if OpenAI endpoint is available by delegating to the provider class.""" 

67 try: 

68 from ..llm.providers.implementations.custom_openai_endpoint import ( 

69 CustomOpenAIEndpointProvider, 

70 ) 

71 

72 return CustomOpenAIEndpointProvider.is_available(settings_snapshot) 

73 except ImportError: 

74 return False 

75 except Exception: 

76 return False 

77 

78 

79def is_ollama_available(settings_snapshot=None): 

80 """Check if Ollama is running by delegating to the provider class.""" 

81 try: 

82 from ..llm.providers.implementations.ollama import OllamaProvider 

83 

84 return OllamaProvider.is_available(settings_snapshot) 

85 except ImportError: 

86 return False 

87 except Exception: 

88 return False 

89 

90 

91def is_vllm_available(): 

92 """Check if VLLM capability is available""" 

93 try: 

94 import torch # noqa: F401 

95 import transformers # noqa: F401 

96 

97 # Only try to import VLLM if the dependencies are available 

98 # The VLLM class itself might not fail to import, but using it will fail 

99 # without the proper dependencies 

100 import vllm # noqa: F401 

101 from langchain_community.llms import VLLM # noqa: F401 

102 

103 return True 

104 except ImportError: 

105 return False 

106 

107 

108def is_lmstudio_available(settings_snapshot=None): 

109 """Check if LM Studio is available by delegating to the provider class.""" 

110 try: 

111 from ..llm.providers.implementations.lmstudio import LMStudioProvider 

112 

113 return LMStudioProvider.is_available(settings_snapshot) 

114 except ImportError: 

115 return False 

116 except Exception: 

117 return False 

118 

119 

120def is_llamacpp_available(settings_snapshot=None): 

121 """Check if LlamaCpp is available and properly configured. 

122 

123 Checks that the library is installed and a model path is configured. 

124 For llama.cpp server connections, use 'openai_endpoint' provider instead. 

125 """ 

126 try: 

127 # Import check 

128 from langchain_community.llms import LlamaCpp # noqa: F401 

129 

130 # Check if model path is configured and looks valid 

131 # Note: For llama.cpp server connections, use 'openai_endpoint' provider instead 

132 model_path_str = get_setting_from_snapshot( 

133 "llm.llamacpp_model_path", 

134 default=None, 

135 settings_snapshot=settings_snapshot, 

136 ) 

137 

138 # If no path configured, LlamaCpp is not available 

139 if not model_path_str: 

140 return False 

141 

142 # Path is configured, actual validation happens when model loads 

143 return True 

144 

145 except ImportError: 

146 # LlamaCpp library not installed 

147 return False 

148 

149 except Exception: 

150 return False 

151 

152 

153def is_google_available(settings_snapshot=None): 

154 """Check if Google/Gemini is available""" 

155 try: 

156 from ..llm.providers.google import GoogleProvider 

157 

158 return GoogleProvider.is_available(settings_snapshot) 

159 except ImportError: 

160 return False 

161 except Exception: 

162 return False 

163 

164 

165def is_openrouter_available(settings_snapshot=None): 

166 """Check if OpenRouter is available""" 

167 try: 

168 from ..llm.providers.openrouter import OpenRouterProvider 

169 

170 return OpenRouterProvider.is_available(settings_snapshot) 

171 except ImportError: 

172 return False 

173 except Exception: 

174 return False 

175 

176 

177@cache 

178def get_available_providers(settings_snapshot=None): 

179 """Return available model providers""" 

180 providers = {} 

181 

182 if is_ollama_available(settings_snapshot): 

183 providers["ollama"] = "Ollama (local models)" 

184 

185 if is_openai_available(settings_snapshot): 185 ↛ 186line 185 didn't jump to line 186 because the condition on line 185 was never true

186 providers["openai"] = "OpenAI API" 

187 

188 if is_anthropic_available(settings_snapshot): 188 ↛ 189line 188 didn't jump to line 189 because the condition on line 188 was never true

189 providers["anthropic"] = "Anthropic API" 

190 

191 if is_google_available(settings_snapshot): 191 ↛ 192line 191 didn't jump to line 192 because the condition on line 191 was never true

192 providers["google"] = "Google Gemini API" 

193 

194 if is_openrouter_available(settings_snapshot): 194 ↛ 195line 194 didn't jump to line 195 because the condition on line 194 was never true

195 providers["openrouter"] = "OpenRouter API" 

196 

197 if is_openai_endpoint_available(settings_snapshot): 197 ↛ 198line 197 didn't jump to line 198 because the condition on line 197 was never true

198 providers["openai_endpoint"] = "OpenAI-compatible Endpoint" 

199 

200 if is_lmstudio_available(settings_snapshot): 200 ↛ 201line 200 didn't jump to line 201 because the condition on line 200 was never true

201 providers["lmstudio"] = "LM Studio (local models)" 

202 

203 if is_llamacpp_available(settings_snapshot): 203 ↛ 204line 203 didn't jump to line 204 because the condition on line 203 was never true

204 providers["llamacpp"] = "LlamaCpp (local models)" 

205 

206 # Check for VLLM capability 

207 if is_vllm_available(): 207 ↛ 208line 207 didn't jump to line 208 because the condition on line 207 was never true

208 providers["vllm"] = "VLLM (local models)" 

209 

210 # Default fallback 

211 if not providers: 

212 providers["none"] = "No model providers available" 

213 

214 return providers 

215 

216 

217def get_selected_llm_provider(settings_snapshot=None): 

218 return get_setting_from_snapshot( 

219 "llm.provider", "ollama", settings_snapshot=settings_snapshot 

220 ).lower() 

221 

222 

223def _get_context_window_for_provider(provider_type, settings_snapshot=None): 

224 """Get context window size from settings based on provider type. 

225 

226 Local providers (ollama, llamacpp, lmstudio) use a smaller default to prevent 

227 memory issues. Cloud providers check if unrestricted mode is enabled. 

228 

229 Returns: 

230 int or None: The context window size, or None for unrestricted cloud providers. 

231 """ 

232 if provider_type in ["ollama", "llamacpp", "lmstudio"]: 

233 # Local providers: use smaller default to prevent memory issues 

234 window_size = get_setting_from_snapshot( 

235 "llm.local_context_window_size", 

236 4096, 

237 settings_snapshot=settings_snapshot, 

238 ) 

239 # Ensure it's an integer 

240 return int(window_size) if window_size is not None else 4096 

241 else: 

242 # Cloud providers: check if unrestricted mode is enabled 

243 use_unrestricted = get_setting_from_snapshot( 

244 "llm.context_window_unrestricted", 

245 True, 

246 settings_snapshot=settings_snapshot, 

247 ) 

248 if use_unrestricted: 248 ↛ 253line 248 didn't jump to line 253 because the condition on line 248 was always true

249 # Let cloud providers auto-handle context (return None or very large value) 

250 return None # Will be handled per provider 

251 else: 

252 # Use user-specified limit 

253 window_size = get_setting_from_snapshot( 

254 "llm.context_window_size", 

255 128000, 

256 settings_snapshot=settings_snapshot, 

257 ) 

258 return int(window_size) if window_size is not None else 128000 

259 

260 

261def get_llm( 

262 model_name=None, 

263 temperature=None, 

264 provider=None, 

265 openai_endpoint_url=None, 

266 research_id=None, 

267 research_context=None, 

268 settings_snapshot=None, 

269): 

270 """ 

271 Get LLM instance based on model name and provider. 

272 

273 Args: 

274 model_name: Name of the model to use (if None, uses database setting) 

275 temperature: Model temperature (if None, uses database setting) 

276 provider: Provider to use (if None, uses database setting) 

277 openai_endpoint_url: Custom endpoint URL to use (if None, uses database 

278 setting) 

279 research_id: Optional research ID for token tracking 

280 research_context: Optional research context for enhanced token tracking 

281 

282 Returns: 

283 A LangChain LLM instance with automatic think-tag removal 

284 """ 

285 

286 # Use database values for parameters if not provided 

287 if model_name is None: 

288 model_name = get_setting_from_snapshot( 

289 "llm.model", "gemma:latest", settings_snapshot=settings_snapshot 

290 ) 

291 if temperature is None: 

292 temperature = get_setting_from_snapshot( 

293 "llm.temperature", 0.7, settings_snapshot=settings_snapshot 

294 ) 

295 if provider is None: 

296 provider = get_setting_from_snapshot( 

297 "llm.provider", "ollama", settings_snapshot=settings_snapshot 

298 ) 

299 

300 # Clean model name: remove quotes and extra whitespace 

301 if model_name: 

302 model_name = model_name.strip().strip("\"'").strip() 

303 

304 # Clean provider: remove quotes and extra whitespace 

305 if provider: 305 ↛ 309line 305 didn't jump to line 309 because the condition on line 305 was always true

306 provider = provider.strip().strip("\"'").strip() 

307 

308 # Normalize provider: convert to lowercase 

309 provider = provider.lower() if provider else None 

310 

311 # Check if this is a registered custom LLM first 

312 if provider and is_llm_registered(provider): 

313 logger.info(f"Using registered custom LLM: {provider}") 

314 custom_llm = get_llm_from_registry(provider) 

315 

316 # Check if it's a callable (factory function) or a BaseChatModel instance 

317 if callable(custom_llm) and not isinstance(custom_llm, BaseChatModel): 

318 # It's a callable (factory function), call it with parameters 

319 try: 

320 llm_instance = custom_llm( 

321 model_name=model_name, 

322 temperature=temperature, 

323 settings_snapshot=settings_snapshot, 

324 ) 

325 except TypeError as e: 

326 # Re-raise TypeError with better message 

327 raise TypeError( 

328 f"Registered LLM factory '{provider}' has invalid signature. " 

329 f"Factory functions must accept 'model_name', 'temperature', and 'settings_snapshot' parameters. " 

330 f"Error: {e}" 

331 ) 

332 

333 # Validate the result is a BaseChatModel 

334 if not isinstance(llm_instance, BaseChatModel): 

335 raise ValueError( 

336 f"Factory function for {provider} must return a BaseChatModel instance, " 

337 f"got {type(llm_instance).__name__}" 

338 ) 

339 elif isinstance(custom_llm, BaseChatModel): 339 ↛ 343line 339 didn't jump to line 343 because the condition on line 339 was always true

340 # It's already a proper LLM instance, use it directly 

341 llm_instance = custom_llm 

342 else: 

343 raise ValueError( 

344 f"Registered LLM {provider} must be either a BaseChatModel instance " 

345 f"or a callable factory function. Got: {type(custom_llm).__name__}" 

346 ) 

347 

348 return wrap_llm_without_think_tags( 

349 llm_instance, 

350 research_id=research_id, 

351 provider=provider, 

352 research_context=research_context, 

353 settings_snapshot=settings_snapshot, 

354 ) 

355 

356 # Check if we're in testing mode and should use fallback (but only when no API keys are configured) 

357 # Skip fallback check if we're in test mode with mocks 

358 if os.environ.get("LDR_USE_FALLBACK_LLM", "") and not os.environ.get( 358 ↛ 362line 358 didn't jump to line 362 because the condition on line 358 was never true

359 "LDR_TESTING_WITH_MOCKS", "" 

360 ): 

361 # Only use fallback if the provider has no valid configuration 

362 provider_has_config = False 

363 

364 if ( 

365 ( 

366 provider == "openai" 

367 and get_setting_from_snapshot( 

368 "llm.openai.api_key", 

369 default=None, 

370 settings_snapshot=settings_snapshot, 

371 ) 

372 ) 

373 or ( 

374 provider == "anthropic" 

375 and get_setting_from_snapshot( 

376 "llm.anthropic.api_key", 

377 default=None, 

378 settings_snapshot=settings_snapshot, 

379 ) 

380 ) 

381 or ( 

382 provider == "openai_endpoint" 

383 and get_setting_from_snapshot( 

384 "llm.openai_endpoint.api_key", 

385 settings_snapshot=settings_snapshot, 

386 ) 

387 ) 

388 or ( 

389 provider == "ollama" 

390 and is_ollama_available(settings_snapshot=settings_snapshot) 

391 ) 

392 ): 

393 provider_has_config = True 

394 elif provider in ["vllm", "lmstudio", "llamacpp"]: 

395 # These are local providers, check their availability 

396 if ( 

397 (provider == "vllm" and is_vllm_available()) 

398 or ( 

399 provider == "lmstudio" 

400 and is_lmstudio_available( 

401 settings_snapshot=settings_snapshot 

402 ) 

403 ) 

404 or ( 

405 provider == "llamacpp" 

406 and is_llamacpp_available( 

407 settings_snapshot=settings_snapshot 

408 ) 

409 ) 

410 ): 

411 provider_has_config = True 

412 

413 if not provider_has_config: 

414 logger.info( 

415 "LDR_USE_FALLBACK_LLM is set and no valid provider config found, using fallback model" 

416 ) 

417 return wrap_llm_without_think_tags( 

418 get_fallback_model(temperature), 

419 research_id=research_id, 

420 provider="fallback", 

421 research_context=research_context, 

422 settings_snapshot=settings_snapshot, 

423 ) 

424 

425 # Validate provider 

426 if provider not in VALID_PROVIDERS: 

427 logger.error(f"Invalid provider in settings: {provider}") 

428 raise ValueError( 

429 f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}" 

430 ) 

431 logger.info( 

432 f"Getting LLM with model: {model_name}, temperature: {temperature}, provider: {provider}" 

433 ) 

434 

435 # Common parameters for all models 

436 common_params = { 

437 "temperature": temperature, 

438 } 

439 

440 context_window_size = _get_context_window_for_provider( 

441 provider, settings_snapshot 

442 ) 

443 

444 # Add context limit to research context for overflow detection 

445 if research_context and context_window_size: 445 ↛ 446line 445 didn't jump to line 446 because the condition on line 445 was never true

446 research_context["context_limit"] = context_window_size 

447 logger.info( 

448 f"Set context_limit={context_window_size} in research_context" 

449 ) 

450 else: 

451 logger.debug( 

452 f"Context limit not set: research_context={bool(research_context)}, context_window_size={context_window_size}" 

453 ) 

454 

455 if get_setting_from_snapshot( 

456 "llm.supports_max_tokens", True, settings_snapshot=settings_snapshot 

457 ): 

458 # Use 80% of context window to leave room for prompts 

459 if context_window_size is not None: 

460 max_tokens = min( 

461 int( 

462 get_setting_from_snapshot( 

463 "llm.max_tokens", 

464 100000, 

465 settings_snapshot=settings_snapshot, 

466 ) 

467 ), 

468 int(context_window_size * 0.8), 

469 ) 

470 common_params["max_tokens"] = max_tokens 

471 else: 

472 # Unrestricted context: use provider's default max_tokens 

473 max_tokens = int( 

474 get_setting_from_snapshot( 

475 "llm.max_tokens", 

476 100000, 

477 settings_snapshot=settings_snapshot, 

478 ) 

479 ) 

480 common_params["max_tokens"] = max_tokens 

481 

482 # Handle different providers 

483 if provider == "anthropic": 

484 api_key = get_setting_from_snapshot( 

485 "llm.anthropic.api_key", settings_snapshot=settings_snapshot 

486 ) 

487 

488 if not api_key: 

489 logger.warning( 

490 "Anthropic API key not found in settings. Falling back to default model." 

491 ) 

492 return get_fallback_model(temperature) 

493 

494 llm = ChatAnthropic( 

495 model=model_name, anthropic_api_key=api_key, **common_params 

496 ) 

497 return wrap_llm_without_think_tags( 

498 llm, 

499 research_id=research_id, 

500 provider=provider, 

501 research_context=research_context, 

502 settings_snapshot=settings_snapshot, 

503 ) 

504 

505 elif provider == "openai": 

506 api_key = get_setting_from_snapshot( 

507 "llm.openai.api_key", settings_snapshot=settings_snapshot 

508 ) 

509 

510 if not api_key: 

511 logger.warning( 

512 "OpenAI API key not found in settings. Falling back to default model." 

513 ) 

514 return get_fallback_model(temperature) 

515 

516 # Build OpenAI-specific parameters 

517 openai_params = { 

518 "model": model_name, 

519 "api_key": api_key, 

520 **common_params, 

521 } 

522 

523 # Add optional parameters if they exist in settings 

524 try: 

525 api_base = get_setting_from_snapshot( 

526 "llm.openai.api_base", 

527 default=None, 

528 settings_snapshot=settings_snapshot, 

529 ) 

530 if api_base: 

531 openai_params["openai_api_base"] = api_base 

532 except NoSettingsContextError: 

533 pass # Optional parameter 

534 

535 try: 

536 organization = get_setting_from_snapshot( 

537 "llm.openai.organization", 

538 default=None, 

539 settings_snapshot=settings_snapshot, 

540 ) 

541 if organization: 

542 openai_params["openai_organization"] = organization 

543 except NoSettingsContextError: 

544 pass # Optional parameter 

545 

546 try: 

547 streaming = get_setting_from_snapshot( 

548 "llm.streaming", 

549 default=None, 

550 settings_snapshot=settings_snapshot, 

551 ) 

552 except NoSettingsContextError: 

553 streaming = None # Optional parameter 

554 if streaming is not None: 554 ↛ 555line 554 didn't jump to line 555 because the condition on line 554 was never true

555 openai_params["streaming"] = streaming 

556 

557 try: 

558 max_retries = get_setting_from_snapshot( 

559 "llm.max_retries", 

560 default=None, 

561 settings_snapshot=settings_snapshot, 

562 ) 

563 if max_retries is not None: 563 ↛ 564line 563 didn't jump to line 564 because the condition on line 563 was never true

564 openai_params["max_retries"] = max_retries 

565 except NoSettingsContextError: 

566 pass # Optional parameter 

567 

568 try: 

569 request_timeout = get_setting_from_snapshot( 

570 "llm.request_timeout", 

571 default=None, 

572 settings_snapshot=settings_snapshot, 

573 ) 

574 if request_timeout is not None: 574 ↛ 575line 574 didn't jump to line 575 because the condition on line 574 was never true

575 openai_params["request_timeout"] = request_timeout 

576 except NoSettingsContextError: 

577 pass # Optional parameter 

578 

579 llm = ChatOpenAI(**openai_params) 

580 return wrap_llm_without_think_tags( 

581 llm, 

582 research_id=research_id, 

583 provider=provider, 

584 research_context=research_context, 

585 settings_snapshot=settings_snapshot, 

586 ) 

587 

588 elif provider == "openai_endpoint": 

589 api_key = get_setting_from_snapshot( 

590 "llm.openai_endpoint.api_key", settings_snapshot=settings_snapshot 

591 ) 

592 

593 if not api_key: 593 ↛ 594line 593 didn't jump to line 594 because the condition on line 593 was never true

594 logger.warning( 

595 "OpenAI endpoint API key not found in settings. Falling back to default model." 

596 ) 

597 return get_fallback_model(temperature) 

598 

599 # Get endpoint URL from settings 

600 if openai_endpoint_url is None: 600 ↛ 606line 600 didn't jump to line 606 because the condition on line 600 was always true

601 openai_endpoint_url = get_setting_from_snapshot( 

602 "llm.openai_endpoint.url", 

603 "https://openrouter.ai/api/v1", 

604 settings_snapshot=settings_snapshot, 

605 ) 

606 openai_endpoint_url = normalize_url(openai_endpoint_url) 

607 

608 llm = ChatOpenAI( 

609 model=model_name, 

610 api_key=api_key, 

611 openai_api_base=openai_endpoint_url, 

612 **common_params, 

613 ) 

614 return wrap_llm_without_think_tags( 

615 llm, 

616 research_id=research_id, 

617 provider=provider, 

618 research_context=research_context, 

619 settings_snapshot=settings_snapshot, 

620 ) 

621 

622 elif provider == "vllm": 

623 if not is_vllm_available(): 

624 logger.warning( 

625 "VLLM dependencies are not available. Falling back to default model." 

626 ) 

627 return get_fallback_model(temperature) 

628 

629 try: 

630 from langchain_community.llms import VLLM 

631 

632 llm = VLLM( 

633 model=model_name, 

634 trust_remote_code=True, 

635 max_new_tokens=128, 

636 top_k=10, 

637 top_p=0.95, 

638 temperature=temperature, 

639 ) 

640 return wrap_llm_without_think_tags( 

641 llm, 

642 research_id=research_id, 

643 provider=provider, 

644 research_context=research_context, 

645 ) 

646 except Exception: 

647 logger.exception("Error loading VLLM model") 

648 return get_fallback_model(temperature) 

649 

650 elif provider == "ollama": 

651 try: 

652 # Use the configurable Ollama base URL 

653 raw_base_url = get_setting_from_snapshot( 

654 "llm.ollama.url", 

655 "http://localhost:11434", 

656 settings_snapshot=settings_snapshot, 

657 ) 

658 base_url = ( 

659 normalize_url(raw_base_url) 

660 if raw_base_url 

661 else "http://localhost:11434" 

662 ) 

663 

664 # Check if Ollama is available before trying to use it 

665 if not is_ollama_available(settings_snapshot=settings_snapshot): 

666 logger.error( 

667 f"Ollama not available at {base_url}. Falling back to dummy model." 

668 ) 

669 return get_fallback_model(temperature) 

670 

671 # Check if the requested model exists 

672 try: 

673 logger.info( 

674 f"Checking if model '{model_name}' exists in Ollama" 

675 ) 

676 response = safe_get( 

677 f"{base_url}/api/tags", 

678 timeout=3.0, 

679 allow_localhost=True, 

680 allow_private_ips=True, 

681 ) 

682 if response.status_code == 200: 682 ↛ 710line 682 didn't jump to line 710 because the condition on line 682 was always true

683 # Handle both newer and older Ollama API formats 

684 data = response.json() 

685 models = [] 

686 if "models" in data: 686 ↛ 691line 686 didn't jump to line 691 because the condition on line 686 was always true

687 # Newer Ollama API 

688 models = data.get("models", []) 

689 else: 

690 # Older Ollama API format 

691 models = data 

692 

693 # Get list of model names 

694 model_names = [m.get("name", "").lower() for m in models] 

695 logger.info( 

696 f"Available Ollama models: {', '.join(model_names[:5])}{' and more' if len(model_names) > 5 else ''}" 

697 ) 

698 

699 if model_name.lower() not in model_names: 

700 logger.error( 

701 f"Model '{model_name}' not found in Ollama. Available models: {', '.join(model_names[:5])}" 

702 ) 

703 return get_fallback_model(temperature) 

704 except Exception: 

705 logger.exception( 

706 f"Error checking for model '{model_name}' in Ollama" 

707 ) 

708 # Continue anyway, let ChatOllama handle potential errors 

709 

710 logger.info( 

711 f"Creating ChatOllama with model={model_name}, base_url={base_url}" 

712 ) 

713 try: 

714 # Add num_ctx parameter for Ollama context window size 

715 ollama_params = {**common_params} 

716 if context_window_size is not None: 716 ↛ 733line 716 didn't jump to line 733 because the condition on line 716 was always true

717 ollama_params["num_ctx"] = context_window_size 

718 

719 # Thinking/reasoning handling for models like deepseek-r1: 

720 # The 'reasoning' parameter controls both: 

721 # 1. Whether the model performs thinking (makes it smarter when True) 

722 # 2. Whether thinking is separated from the answer (always separated when True) 

723 # 

724 # When reasoning=True: 

725 # - Model performs thinking/reasoning 

726 # - Thinking goes to additional_kwargs["reasoning_content"] (discarded by LDR) 

727 # - Only the final answer appears in response.content 

728 # 

729 # When reasoning=False: 

730 # - Model does NOT perform thinking (faster but less smart) 

731 # - Gives direct answers 

732 

733 enable_thinking = get_setting_from_snapshot( 

734 "llm.ollama.enable_thinking", 

735 True, # Default: enable thinking (smarter responses) 

736 settings_snapshot=settings_snapshot, 

737 ) 

738 

739 if enable_thinking is not None and isinstance( 739 ↛ 748line 739 didn't jump to line 748 because the condition on line 739 was always true

740 enable_thinking, bool 

741 ): 

742 ollama_params["reasoning"] = enable_thinking 

743 logger.debug( 

744 f"Ollama thinking enabled: {enable_thinking} " 

745 f"(thinking will be {'shown internally but discarded' if enable_thinking else 'disabled'})" 

746 ) 

747 

748 llm = ChatOllama( 

749 model=model_name, base_url=base_url, **ollama_params 

750 ) 

751 

752 # Log the actual client configuration after creation 

753 logger.debug( 

754 f"ChatOllama created - base_url attribute: {getattr(llm, 'base_url', 'not found')}" 

755 ) 

756 if hasattr(llm, "_client"): 756 ↛ 769line 756 didn't jump to line 769 because the condition on line 756 was always true

757 client = llm._client 

758 logger.debug(f"ChatOllama _client type: {type(client)}") 

759 if hasattr(client, "_client"): 759 ↛ 769line 759 didn't jump to line 769 because the condition on line 759 was always true

760 inner_client = client._client 

761 logger.debug( 

762 f"ChatOllama inner client type: {type(inner_client)}" 

763 ) 

764 if hasattr(inner_client, "base_url"): 764 ↛ 769line 764 didn't jump to line 769 because the condition on line 764 was always true

765 logger.debug( 

766 f"ChatOllama inner client base_url: {inner_client.base_url}" 

767 ) 

768 

769 return wrap_llm_without_think_tags( 

770 llm, 

771 research_id=research_id, 

772 provider=provider, 

773 research_context=research_context, 

774 settings_snapshot=settings_snapshot, 

775 ) 

776 except Exception: 

777 logger.exception("Error creating or testing ChatOllama") 

778 return get_fallback_model(temperature) 

779 except Exception: 

780 logger.exception("Error in Ollama provider section") 

781 return get_fallback_model(temperature) 

782 

783 elif provider == "lmstudio": 

784 # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly 

785 lmstudio_url = get_setting_from_snapshot( 

786 "llm.lmstudio.url", 

787 "http://localhost:1234/v1", 

788 settings_snapshot=settings_snapshot, 

789 ) 

790 # Use URL as-is (default already includes /v1) 

791 base_url = normalize_url(lmstudio_url) 

792 

793 llm = ChatOpenAI( 

794 model=model_name, 

795 api_key="lm-studio", # LM Studio doesn't require a real API key # pragma: allowlist secret 

796 base_url=base_url, 

797 temperature=temperature, 

798 max_tokens=max_tokens, # Use calculated max_tokens based on context size 

799 ) 

800 return wrap_llm_without_think_tags( 

801 llm, 

802 research_id=research_id, 

803 provider=provider, 

804 research_context=research_context, 

805 settings_snapshot=settings_snapshot, 

806 ) 

807 

808 # Update the llamacpp section in get_llm function 

809 elif provider == "llamacpp": 

810 # Import LlamaCpp 

811 from langchain_community.llms import LlamaCpp 

812 

813 # Note: For llama.cpp server connections, use 'openai_endpoint' provider 

814 # with the server's /v1 URL (e.g., 'http://localhost:8000/v1') 

815 

816 # Get LlamaCpp model path from settings 

817 model_path = get_setting_from_snapshot( 

818 "llm.llamacpp_model_path", settings_snapshot=settings_snapshot 

819 ) 

820 if not model_path: 

821 logger.error("llamacpp_model_path not set in settings") 

822 raise ValueError( 

823 "LlamaCpp model path not configured. Either:\n" 

824 "1. Set 'llm.llamacpp_model_path' to your .gguf file path, or\n" 

825 "2. For llama.cpp server connections, use 'openai_endpoint' provider " 

826 "with the server's /v1 endpoint (e.g., 'http://localhost:8000/v1')" 

827 ) 

828 

829 # Validate model path for security FIRST using centralized validator 

830 # This MUST happen before any filesystem operations on user input 

831 from ..security.path_validator import PathValidator 

832 from .paths import get_models_directory 

833 

834 try: 

835 validated_path = PathValidator.validate_model_path(model_path) 

836 except ValueError as e: 

837 error_msg = str(e) 

838 # If the path is not a file, try to provide helpful directory listing 

839 # Only do this after path has passed security validation (safe_join check) 

840 if "not a file" in error_msg: 

841 try: 

842 model_root = str(get_models_directory()) 

843 safe_path = PathValidator.validate_safe_path( 

844 model_path, model_root, allow_absolute=False 

845 ) 

846 if safe_path and safe_path.is_dir(): 

847 gguf_files = list(safe_path.glob("*.gguf")) 

848 if gguf_files: 

849 files_list = ", ".join( 

850 f.name for f in gguf_files[:5] 

851 ) 

852 if len(gguf_files) > 5: 

853 files_list += ( 

854 f" (and {len(gguf_files) - 5} more)" 

855 ) 

856 suggestion = f"Found .gguf files: {files_list}" 

857 else: 

858 suggestion = ( 

859 "No .gguf files found in this directory" 

860 ) 

861 raise ValueError( 

862 f"Model path is a directory, not a file: {model_path}\n" 

863 f"Please specify the full path to a .gguf model file.\n" 

864 f"{suggestion}" 

865 ) from e 

866 except ValueError: 

867 pass # Re-raise original error if secondary validation fails 

868 logger.exception("Model path validation failed") 

869 raise 

870 

871 model_path = str(validated_path) 

872 

873 # Validate file extension - LlamaCpp requires .gguf or .bin files 

874 # Safe to use validated_path here since it passed security validation 

875 if validated_path.suffix.lower() not in (".gguf", ".bin"): 

876 raise ValueError( 

877 f"Invalid model file extension: {validated_path.suffix}\n" 

878 f"LlamaCpp requires .gguf or .bin model files.\n" 

879 f"File: {validated_path.name}" 

880 ) 

881 

882 # Get additional LlamaCpp parameters 

883 n_gpu_layers = get_setting_from_snapshot( 

884 "llm.llamacpp_n_gpu_layers", 

885 1, 

886 settings_snapshot=settings_snapshot, 

887 ) 

888 n_batch = get_setting_from_snapshot( 

889 "llm.llamacpp_n_batch", 512, settings_snapshot=settings_snapshot 

890 ) 

891 f16_kv = get_setting_from_snapshot( 

892 "llm.llamacpp_f16_kv", True, settings_snapshot=settings_snapshot 

893 ) 

894 

895 # Create LlamaCpp instance 

896 llm = LlamaCpp( 

897 model_path=model_path, 

898 temperature=temperature, 

899 max_tokens=max_tokens, # Use calculated max_tokens 

900 n_gpu_layers=n_gpu_layers, 

901 n_batch=n_batch, 

902 f16_kv=f16_kv, 

903 n_ctx=context_window_size, # Set context window size directly (None = use default) 

904 verbose=True, 

905 ) 

906 

907 return wrap_llm_without_think_tags( 

908 llm, 

909 research_id=research_id, 

910 provider=provider, 

911 research_context=research_context, 

912 settings_snapshot=settings_snapshot, 

913 ) 

914 

915 else: 

916 return wrap_llm_without_think_tags( 

917 get_fallback_model(temperature), 

918 research_id=research_id, 

919 provider=provider, 

920 research_context=research_context, 

921 settings_snapshot=settings_snapshot, 

922 ) 

923 

924 

925def get_fallback_model(temperature=None): 

926 """Create a dummy model for when no providers are available""" 

927 return FakeListChatModel( 

928 responses=[ 

929 "No language models are available. Please install Ollama or set up API keys." 

930 ] 

931 ) 

932 

933 

934def wrap_llm_without_think_tags( 

935 llm, 

936 research_id=None, 

937 provider=None, 

938 research_context=None, 

939 settings_snapshot=None, 

940): 

941 """Create a wrapper class that processes LLM outputs with remove_think_tags and token counting""" 

942 

943 # First apply rate limiting if enabled 

944 from ..web_search_engines.rate_limiting.llm import ( 

945 create_rate_limited_llm_wrapper, 

946 ) 

947 

948 # Check if LLM rate limiting is enabled (independent of search rate limiting) 

949 # Use the thread-safe get_db_setting defined in this module 

950 if get_setting_from_snapshot( 

951 "rate_limiting.llm_enabled", False, settings_snapshot=settings_snapshot 

952 ): 

953 llm = create_rate_limited_llm_wrapper(llm, provider) 

954 

955 # Set context_limit in research_context for overflow detection. 

956 # This is needed for providers that go through the registered provider path 

957 # (which returns before the code in get_llm that sets context_limit). 

958 if research_context is not None and provider is not None: 

959 if "context_limit" not in research_context: 959 ↛ 970line 959 didn't jump to line 970 because the condition on line 959 was always true

960 context_limit = _get_context_window_for_provider( 

961 provider, settings_snapshot 

962 ) 

963 if context_limit is not None: 963 ↛ 964line 963 didn't jump to line 964 because the condition on line 963 was never true

964 research_context["context_limit"] = context_limit 

965 logger.info( 

966 f"Set context_limit={context_limit} in wrap_llm for provider={provider}" 

967 ) 

968 

969 # Import token counting functionality if research_id is provided 

970 callbacks = [] 

971 if research_id is not None: 971 ↛ 972line 971 didn't jump to line 972 because the condition on line 971 was never true

972 from ..metrics import TokenCounter 

973 

974 token_counter = TokenCounter() 

975 token_callback = token_counter.create_callback( 

976 research_id, research_context 

977 ) 

978 # Set provider and model info on the callback 

979 if provider: 

980 token_callback.preset_provider = provider 

981 # Try to extract model name from the LLM instance 

982 if hasattr(llm, "model_name"): 

983 token_callback.preset_model = llm.model_name 

984 elif hasattr(llm, "model"): 

985 token_callback.preset_model = llm.model 

986 callbacks.append(token_callback) 

987 

988 # Add callbacks to the LLM if it supports them 

989 if callbacks and hasattr(llm, "callbacks"): 989 ↛ 990line 989 didn't jump to line 990 because the condition on line 989 was never true

990 if llm.callbacks is None: 

991 llm.callbacks = callbacks 

992 else: 

993 llm.callbacks.extend(callbacks) 

994 

995 class ProcessingLLMWrapper: 

996 def __init__(self, base_llm): 

997 self.base_llm = base_llm 

998 

999 def invoke(self, *args, **kwargs): 

1000 # Removed verbose debug logging to reduce log clutter 

1001 # Uncomment the lines below if you need to debug LLM requests 

1002 # if hasattr(self.base_llm, "base_url"): 

1003 # logger.debug(f"LLM Request - Base URL: {self.base_llm.base_url}") 

1004 # logger.debug(f"LLM Request - Model: {getattr(self.base_llm, 'model', 'unknown')}") 

1005 

1006 try: 

1007 response = self.base_llm.invoke(*args, **kwargs) 

1008 # logger.debug(f"LLM Response - Success, type: {type(response)}") 

1009 except Exception as e: 

1010 logger.exception("LLM Request - Failed with error") 

1011 # Log any URL information from the error 

1012 error_str = str(e) 

1013 if "http://" in error_str or "https://" in error_str: 1013 ↛ 1014line 1013 didn't jump to line 1014 because the condition on line 1013 was never true

1014 logger.exception( 

1015 f"LLM Request - Error contains URL info: {error_str}" 

1016 ) 

1017 raise 

1018 

1019 # Process the response content if it has a content attribute 

1020 if hasattr(response, "content"): 

1021 response.content = remove_think_tags(response.content) 

1022 elif isinstance(response, str): 1022 ↛ 1025line 1022 didn't jump to line 1025 because the condition on line 1022 was always true

1023 response = remove_think_tags(response) 

1024 

1025 return response 

1026 

1027 # Pass through any other attributes to the base LLM 

1028 def __getattr__(self, name): 

1029 return getattr(self.base_llm, name) 

1030 

1031 return ProcessingLLMWrapper(llm)