Coverage for src / local_deep_research / config / llm_config.py: 57%

377 statements  

« prev     ^ index     » next       coverage.py v7.12.0, created at 2026-01-11 00:51 +0000

1import os 

2from functools import cache 

3 

4from langchain_anthropic import ChatAnthropic 

5from langchain_core.language_models import BaseChatModel, FakeListChatModel 

6from langchain_ollama import ChatOllama 

7from langchain_openai import ChatOpenAI 

8from loguru import logger 

9 

10from ..llm import get_llm_from_registry, is_llm_registered 

11from ..utilities.search_utilities import remove_think_tags 

12from ..utilities.url_utils import normalize_url 

13from ..security import safe_get 

14 

15# Import providers module to trigger auto-discovery 

16try: 

17 from ..llm.providers import discover_providers # noqa: F401 

18 # Auto-discovery happens on module import 

19except ImportError: 

20 pass # Providers module not available yet 

21from .thread_settings import ( 

22 get_setting_from_snapshot as _get_setting_from_snapshot, 

23 NoSettingsContextError, 

24) 

25 

26# Valid provider options 

27VALID_PROVIDERS = [ 

28 "ollama", 

29 "openai", 

30 "anthropic", 

31 "google", 

32 "openrouter", 

33 "vllm", 

34 "openai_endpoint", 

35 "lmstudio", 

36 "llamacpp", 

37 "none", 

38] 

39 

40 

41def get_setting_from_snapshot( 

42 key, default=None, username=None, settings_snapshot=None 

43): 

44 """Get setting from context only - no database access from threads. 

45 

46 This is a wrapper around the shared function that enables fallback LLM check. 

47 """ 

48 return _get_setting_from_snapshot( 

49 key, default, username, settings_snapshot, check_fallback_llm=True 

50 ) 

51 

52 

53def is_openai_available(settings_snapshot=None): 

54 """Check if OpenAI is available""" 

55 try: 

56 api_key = get_setting_from_snapshot( 

57 "llm.openai.api_key", 

58 default=None, 

59 settings_snapshot=settings_snapshot, 

60 ) 

61 return bool(api_key) 

62 except Exception: 

63 return False 

64 

65 

66def is_anthropic_available(settings_snapshot=None): 

67 """Check if Anthropic is available""" 

68 try: 

69 api_key = get_setting_from_snapshot( 

70 "llm.anthropic.api_key", 

71 default=None, 

72 settings_snapshot=settings_snapshot, 

73 ) 

74 return bool(api_key) 

75 except Exception: 

76 return False 

77 

78 

79def is_openai_endpoint_available(settings_snapshot=None): 

80 """Check if OpenAI endpoint is available""" 

81 try: 

82 api_key = get_setting_from_snapshot( 

83 "llm.openai_endpoint.api_key", 

84 default=None, 

85 settings_snapshot=settings_snapshot, 

86 ) 

87 return bool(api_key) 

88 except Exception: 

89 return False 

90 

91 

92def is_ollama_available(settings_snapshot=None): 

93 """Check if Ollama is running""" 

94 try: 

95 import requests 

96 

97 raw_base_url = get_setting_from_snapshot( 

98 "llm.ollama.url", 

99 "http://localhost:11434", 

100 settings_snapshot=settings_snapshot, 

101 ) 

102 base_url = ( 

103 normalize_url(raw_base_url) 

104 if raw_base_url 

105 else "http://localhost:11434" 

106 ) 

107 logger.info(f"Checking Ollama availability at {base_url}/api/tags") 

108 

109 try: 

110 response = safe_get( 

111 f"{base_url}/api/tags", 

112 timeout=3.0, 

113 allow_localhost=True, 

114 allow_private_ips=True, 

115 ) 

116 if response.status_code == 200: 

117 logger.info( 

118 f"Ollama is available. Status code: {response.status_code}" 

119 ) 

120 # Log first 100 chars of response to debug 

121 logger.info(f"Response preview: {str(response.text)[:100]}") 

122 return True 

123 else: 

124 logger.warning( 

125 f"Ollama API returned status code: {response.status_code}" 

126 ) 

127 return False 

128 except requests.exceptions.RequestException as req_error: 

129 logger.exception( 

130 f"Request error when checking Ollama: {req_error!s}" 

131 ) 

132 return False 

133 except Exception: 

134 logger.exception("Unexpected error when checking Ollama") 

135 return False 

136 except Exception: 

137 logger.exception("Error in is_ollama_available") 

138 return False 

139 

140 

141def is_vllm_available(): 

142 """Check if VLLM capability is available""" 

143 try: 

144 import torch # noqa: F401 

145 import transformers # noqa: F401 

146 

147 # Only try to import VLLM if the dependencies are available 

148 # The VLLM class itself might not fail to import, but using it will fail 

149 # without the proper dependencies 

150 import vllm # noqa: F401 

151 from langchain_community.llms import VLLM # noqa: F401 

152 

153 return True 

154 except ImportError: 

155 return False 

156 

157 

158def is_lmstudio_available(settings_snapshot=None): 

159 """Check if LM Studio is available""" 

160 try: 

161 lmstudio_url = get_setting_from_snapshot( 

162 "llm.lmstudio.url", 

163 "http://localhost:1234/v1", 

164 settings_snapshot=settings_snapshot, 

165 ) 

166 # Use URL as-is (default already includes /v1) 

167 base_url = normalize_url(lmstudio_url) 

168 # LM Studio typically uses OpenAI-compatible endpoints 

169 response = safe_get( 

170 f"{base_url}/models", 

171 timeout=1.0, 

172 allow_localhost=True, 

173 allow_private_ips=True, 

174 ) 

175 return response.status_code == 200 

176 except Exception: 

177 return False 

178 

179 

180def is_llamacpp_available(settings_snapshot=None): 

181 """Check if LlamaCpp is available and configured""" 

182 try: 

183 # Import check 

184 from langchain_community.llms import LlamaCpp # noqa: F401 

185 

186 # Get the configured model path 

187 model_path_str = get_setting_from_snapshot( 

188 "llm.llamacpp_model_path", 

189 default=None, 

190 settings_snapshot=settings_snapshot, 

191 ) 

192 

193 # If no path configured, LlamaCpp is not available 

194 if not model_path_str: 

195 return False 

196 

197 # Security Note: Path validation is critical here 

198 # CodeQL may flag filesystem operations with user input 

199 # We validate paths are within allowed directories before any filesystem access 

200 

201 # For security, we simply check if a path is configured 

202 # The actual path validation will happen when the model is loaded 

203 # This avoids CodeQL alerts about filesystem access with user input 

204 # The LlamaCpp library itself will validate the path when loading 

205 return True 

206 

207 except ImportError: 

208 # LlamaCpp library not installed 

209 return False 

210 

211 except Exception: 

212 return False 

213 

214 

215def is_google_available(settings_snapshot=None): 

216 """Check if Google/Gemini is available""" 

217 try: 

218 from ..llm.providers.google import GoogleProvider 

219 

220 return GoogleProvider.is_available(settings_snapshot) 

221 except ImportError: 

222 return False 

223 except Exception: 

224 return False 

225 

226 

227def is_openrouter_available(settings_snapshot=None): 

228 """Check if OpenRouter is available""" 

229 try: 

230 from ..llm.providers.openrouter import OpenRouterProvider 

231 

232 return OpenRouterProvider.is_available(settings_snapshot) 

233 except ImportError: 

234 return False 

235 except Exception: 

236 return False 

237 

238 

239@cache 

240def get_available_providers(settings_snapshot=None): 

241 """Return available model providers""" 

242 providers = {} 

243 

244 if is_ollama_available(settings_snapshot): 

245 providers["ollama"] = "Ollama (local models)" 

246 

247 if is_openai_available(settings_snapshot): 247 ↛ 248line 247 didn't jump to line 248 because the condition on line 247 was never true

248 providers["openai"] = "OpenAI API" 

249 

250 if is_anthropic_available(settings_snapshot): 250 ↛ 251line 250 didn't jump to line 251 because the condition on line 250 was never true

251 providers["anthropic"] = "Anthropic API" 

252 

253 if is_google_available(settings_snapshot): 253 ↛ 254line 253 didn't jump to line 254 because the condition on line 253 was never true

254 providers["google"] = "Google Gemini API" 

255 

256 if is_openrouter_available(settings_snapshot): 256 ↛ 257line 256 didn't jump to line 257 because the condition on line 256 was never true

257 providers["openrouter"] = "OpenRouter API" 

258 

259 if is_openai_endpoint_available(settings_snapshot): 259 ↛ 260line 259 didn't jump to line 260 because the condition on line 259 was never true

260 providers["openai_endpoint"] = "OpenAI-compatible Endpoint" 

261 

262 if is_lmstudio_available(settings_snapshot): 262 ↛ 263line 262 didn't jump to line 263 because the condition on line 262 was never true

263 providers["lmstudio"] = "LM Studio (local models)" 

264 

265 if is_llamacpp_available(settings_snapshot): 265 ↛ 266line 265 didn't jump to line 266 because the condition on line 265 was never true

266 providers["llamacpp"] = "LlamaCpp (local models)" 

267 

268 # Check for VLLM capability 

269 if is_vllm_available(): 269 ↛ 270line 269 didn't jump to line 270 because the condition on line 269 was never true

270 providers["vllm"] = "VLLM (local models)" 

271 

272 # Default fallback 

273 if not providers: 

274 providers["none"] = "No model providers available" 

275 

276 return providers 

277 

278 

279def get_selected_llm_provider(settings_snapshot=None): 

280 return get_setting_from_snapshot( 

281 "llm.provider", "ollama", settings_snapshot=settings_snapshot 

282 ).lower() 

283 

284 

285def get_llm( 

286 model_name=None, 

287 temperature=None, 

288 provider=None, 

289 openai_endpoint_url=None, 

290 research_id=None, 

291 research_context=None, 

292 settings_snapshot=None, 

293): 

294 """ 

295 Get LLM instance based on model name and provider. 

296 

297 Args: 

298 model_name: Name of the model to use (if None, uses database setting) 

299 temperature: Model temperature (if None, uses database setting) 

300 provider: Provider to use (if None, uses database setting) 

301 openai_endpoint_url: Custom endpoint URL to use (if None, uses database 

302 setting) 

303 research_id: Optional research ID for token tracking 

304 research_context: Optional research context for enhanced token tracking 

305 

306 Returns: 

307 A LangChain LLM instance with automatic think-tag removal 

308 """ 

309 

310 # Use database values for parameters if not provided 

311 if model_name is None: 

312 model_name = get_setting_from_snapshot( 

313 "llm.model", "gemma:latest", settings_snapshot=settings_snapshot 

314 ) 

315 if temperature is None: 

316 temperature = get_setting_from_snapshot( 

317 "llm.temperature", 0.7, settings_snapshot=settings_snapshot 

318 ) 

319 if provider is None: 

320 provider = get_setting_from_snapshot( 

321 "llm.provider", "ollama", settings_snapshot=settings_snapshot 

322 ) 

323 

324 # Clean model name: remove quotes and extra whitespace 

325 if model_name: 325 ↛ 329line 325 didn't jump to line 329 because the condition on line 325 was always true

326 model_name = model_name.strip().strip("\"'").strip() 

327 

328 # Clean provider: remove quotes and extra whitespace 

329 if provider: 329 ↛ 333line 329 didn't jump to line 333 because the condition on line 329 was always true

330 provider = provider.strip().strip("\"'").strip() 

331 

332 # Normalize provider: convert to lowercase 

333 provider = provider.lower() if provider else None 

334 

335 # Check if this is a registered custom LLM first 

336 if provider and is_llm_registered(provider): 

337 logger.info(f"Using registered custom LLM: {provider}") 

338 custom_llm = get_llm_from_registry(provider) 

339 

340 # Check if it's a callable (factory function) or a BaseChatModel instance 

341 if callable(custom_llm) and not isinstance(custom_llm, BaseChatModel): 

342 # It's a callable (factory function), call it with parameters 

343 try: 

344 llm_instance = custom_llm( 

345 model_name=model_name, 

346 temperature=temperature, 

347 settings_snapshot=settings_snapshot, 

348 ) 

349 except TypeError as e: 

350 # Re-raise TypeError with better message 

351 raise TypeError( 

352 f"Registered LLM factory '{provider}' has invalid signature. " 

353 f"Factory functions must accept 'model_name', 'temperature', and 'settings_snapshot' parameters. " 

354 f"Error: {e}" 

355 ) 

356 

357 # Validate the result is a BaseChatModel 

358 if not isinstance(llm_instance, BaseChatModel): 

359 raise ValueError( 

360 f"Factory function for {provider} must return a BaseChatModel instance, " 

361 f"got {type(llm_instance).__name__}" 

362 ) 

363 elif isinstance(custom_llm, BaseChatModel): 363 ↛ 367line 363 didn't jump to line 367 because the condition on line 363 was always true

364 # It's already a proper LLM instance, use it directly 

365 llm_instance = custom_llm 

366 else: 

367 raise ValueError( 

368 f"Registered LLM {provider} must be either a BaseChatModel instance " 

369 f"or a callable factory function. Got: {type(custom_llm).__name__}" 

370 ) 

371 

372 return wrap_llm_without_think_tags( 

373 llm_instance, 

374 research_id=research_id, 

375 provider=provider, 

376 research_context=research_context, 

377 settings_snapshot=settings_snapshot, 

378 ) 

379 

380 # Check if we're in testing mode and should use fallback (but only when no API keys are configured) 

381 # Skip fallback check if we're in test mode with mocks 

382 if os.environ.get("LDR_USE_FALLBACK_LLM", "") and not os.environ.get( 382 ↛ 386line 382 didn't jump to line 386 because the condition on line 382 was never true

383 "LDR_TESTING_WITH_MOCKS", "" 

384 ): 

385 # Only use fallback if the provider has no valid configuration 

386 provider_has_config = False 

387 

388 if ( 

389 ( 

390 provider == "openai" 

391 and get_setting_from_snapshot( 

392 "llm.openai.api_key", 

393 default=None, 

394 settings_snapshot=settings_snapshot, 

395 ) 

396 ) 

397 or ( 

398 provider == "anthropic" 

399 and get_setting_from_snapshot( 

400 "llm.anthropic.api_key", 

401 default=None, 

402 settings_snapshot=settings_snapshot, 

403 ) 

404 ) 

405 or ( 

406 provider == "openai_endpoint" 

407 and get_setting_from_snapshot( 

408 "llm.openai_endpoint.api_key", 

409 settings_snapshot=settings_snapshot, 

410 ) 

411 ) 

412 or ( 

413 provider == "ollama" 

414 and is_ollama_available(settings_snapshot=settings_snapshot) 

415 ) 

416 ): 

417 provider_has_config = True 

418 elif provider in ["vllm", "lmstudio", "llamacpp"]: 

419 # These are local providers, check their availability 

420 if ( 

421 (provider == "vllm" and is_vllm_available()) 

422 or ( 

423 provider == "lmstudio" 

424 and is_lmstudio_available( 

425 settings_snapshot=settings_snapshot 

426 ) 

427 ) 

428 or ( 

429 provider == "llamacpp" 

430 and is_llamacpp_available( 

431 settings_snapshot=settings_snapshot 

432 ) 

433 ) 

434 ): 

435 provider_has_config = True 

436 

437 if not provider_has_config: 

438 logger.info( 

439 "LDR_USE_FALLBACK_LLM is set and no valid provider config found, using fallback model" 

440 ) 

441 return wrap_llm_without_think_tags( 

442 get_fallback_model(temperature), 

443 research_id=research_id, 

444 provider="fallback", 

445 research_context=research_context, 

446 settings_snapshot=settings_snapshot, 

447 ) 

448 

449 # Validate provider 

450 if provider not in VALID_PROVIDERS: 

451 logger.error(f"Invalid provider in settings: {provider}") 

452 raise ValueError( 

453 f"Invalid provider: {provider}. Must be one of: {VALID_PROVIDERS}" 

454 ) 

455 logger.info( 

456 f"Getting LLM with model: {model_name}, temperature: {temperature}, provider: {provider}" 

457 ) 

458 

459 # Common parameters for all models 

460 common_params = { 

461 "temperature": temperature, 

462 } 

463 

464 # Get context window size from settings (use different defaults for local vs cloud providers) 

465 def get_context_window_size(provider_type): 

466 if provider_type in ["ollama", "llamacpp", "lmstudio"]: 

467 # Local providers: use smaller default to prevent memory issues 

468 window_size = get_setting_from_snapshot( 

469 "llm.local_context_window_size", 

470 4096, 

471 settings_snapshot=settings_snapshot, 

472 ) 

473 # Ensure it's an integer 

474 return int(window_size) if window_size is not None else 4096 

475 else: 

476 # Cloud providers: check if unrestricted mode is enabled 

477 use_unrestricted = get_setting_from_snapshot( 

478 "llm.context_window_unrestricted", 

479 True, 

480 settings_snapshot=settings_snapshot, 

481 ) 

482 if use_unrestricted: 482 ↛ 487line 482 didn't jump to line 487 because the condition on line 482 was always true

483 # Let cloud providers auto-handle context (return None or very large value) 

484 return None # Will be handled per provider 

485 else: 

486 # Use user-specified limit 

487 window_size = get_setting_from_snapshot( 

488 "llm.context_window_size", 

489 128000, 

490 settings_snapshot=settings_snapshot, 

491 ) 

492 return int(window_size) if window_size is not None else 128000 

493 

494 context_window_size = get_context_window_size(provider) 

495 

496 # Add context limit to research context for overflow detection 

497 if research_context and context_window_size: 

498 research_context["context_limit"] = context_window_size 

499 logger.info( 

500 f"Set context_limit={context_window_size} in research_context" 

501 ) 

502 else: 

503 logger.debug( 

504 f"Context limit not set: research_context={bool(research_context)}, context_window_size={context_window_size}" 

505 ) 

506 

507 if get_setting_from_snapshot( 507 ↛ 535line 507 didn't jump to line 535 because the condition on line 507 was always true

508 "llm.supports_max_tokens", True, settings_snapshot=settings_snapshot 

509 ): 

510 # Use 80% of context window to leave room for prompts 

511 if context_window_size is not None: 

512 max_tokens = min( 

513 int( 

514 get_setting_from_snapshot( 

515 "llm.max_tokens", 

516 100000, 

517 settings_snapshot=settings_snapshot, 

518 ) 

519 ), 

520 int(context_window_size * 0.8), 

521 ) 

522 common_params["max_tokens"] = max_tokens 

523 else: 

524 # Unrestricted context: use provider's default max_tokens 

525 max_tokens = int( 

526 get_setting_from_snapshot( 

527 "llm.max_tokens", 

528 100000, 

529 settings_snapshot=settings_snapshot, 

530 ) 

531 ) 

532 common_params["max_tokens"] = max_tokens 

533 

534 # Handle different providers 

535 if provider == "anthropic": 

536 api_key = get_setting_from_snapshot( 

537 "llm.anthropic.api_key", settings_snapshot=settings_snapshot 

538 ) 

539 

540 if not api_key: 

541 logger.warning( 

542 "Anthropic API key not found in settings. Falling back to default model." 

543 ) 

544 return get_fallback_model(temperature) 

545 

546 llm = ChatAnthropic( 

547 model=model_name, anthropic_api_key=api_key, **common_params 

548 ) 

549 return wrap_llm_without_think_tags( 

550 llm, 

551 research_id=research_id, 

552 provider=provider, 

553 research_context=research_context, 

554 settings_snapshot=settings_snapshot, 

555 ) 

556 

557 elif provider == "openai": 

558 api_key = get_setting_from_snapshot( 

559 "llm.openai.api_key", settings_snapshot=settings_snapshot 

560 ) 

561 

562 if not api_key: 

563 logger.warning( 

564 "OpenAI API key not found in settings. Falling back to default model." 

565 ) 

566 return get_fallback_model(temperature) 

567 

568 # Build OpenAI-specific parameters 

569 openai_params = { 

570 "model": model_name, 

571 "api_key": api_key, 

572 **common_params, 

573 } 

574 

575 # Add optional parameters if they exist in settings 

576 try: 

577 api_base = get_setting_from_snapshot( 

578 "llm.openai.api_base", 

579 default=None, 

580 settings_snapshot=settings_snapshot, 

581 ) 

582 if api_base: 582 ↛ 583line 582 didn't jump to line 583 because the condition on line 582 was never true

583 openai_params["openai_api_base"] = api_base 

584 except NoSettingsContextError: 

585 pass # Optional parameter 

586 

587 try: 

588 organization = get_setting_from_snapshot( 

589 "llm.openai.organization", 

590 default=None, 

591 settings_snapshot=settings_snapshot, 

592 ) 

593 if organization: 593 ↛ 594line 593 didn't jump to line 594 because the condition on line 593 was never true

594 openai_params["openai_organization"] = organization 

595 except NoSettingsContextError: 

596 pass # Optional parameter 

597 

598 try: 

599 streaming = get_setting_from_snapshot( 

600 "llm.streaming", 

601 default=None, 

602 settings_snapshot=settings_snapshot, 

603 ) 

604 except NoSettingsContextError: 

605 streaming = None # Optional parameter 

606 if streaming is not None: 606 ↛ 607line 606 didn't jump to line 607 because the condition on line 606 was never true

607 openai_params["streaming"] = streaming 

608 

609 try: 

610 max_retries = get_setting_from_snapshot( 

611 "llm.max_retries", 

612 default=None, 

613 settings_snapshot=settings_snapshot, 

614 ) 

615 if max_retries is not None: 615 ↛ 616line 615 didn't jump to line 616 because the condition on line 615 was never true

616 openai_params["max_retries"] = max_retries 

617 except NoSettingsContextError: 

618 pass # Optional parameter 

619 

620 try: 

621 request_timeout = get_setting_from_snapshot( 

622 "llm.request_timeout", 

623 default=None, 

624 settings_snapshot=settings_snapshot, 

625 ) 

626 if request_timeout is not None: 626 ↛ 627line 626 didn't jump to line 627 because the condition on line 626 was never true

627 openai_params["request_timeout"] = request_timeout 

628 except NoSettingsContextError: 

629 pass # Optional parameter 

630 

631 llm = ChatOpenAI(**openai_params) 

632 return wrap_llm_without_think_tags( 

633 llm, 

634 research_id=research_id, 

635 provider=provider, 

636 research_context=research_context, 

637 settings_snapshot=settings_snapshot, 

638 ) 

639 

640 elif provider == "openai_endpoint": 

641 api_key = get_setting_from_snapshot( 

642 "llm.openai_endpoint.api_key", settings_snapshot=settings_snapshot 

643 ) 

644 

645 if not api_key: 645 ↛ 646line 645 didn't jump to line 646 because the condition on line 645 was never true

646 logger.warning( 

647 "OpenAI endpoint API key not found in settings. Falling back to default model." 

648 ) 

649 return get_fallback_model(temperature) 

650 

651 # Get endpoint URL from settings 

652 if openai_endpoint_url is None: 652 ↛ 658line 652 didn't jump to line 658 because the condition on line 652 was always true

653 openai_endpoint_url = get_setting_from_snapshot( 

654 "llm.openai_endpoint.url", 

655 "https://openrouter.ai/api/v1", 

656 settings_snapshot=settings_snapshot, 

657 ) 

658 openai_endpoint_url = normalize_url(openai_endpoint_url) 

659 

660 llm = ChatOpenAI( 

661 model=model_name, 

662 api_key=api_key, 

663 openai_api_base=openai_endpoint_url, 

664 **common_params, 

665 ) 

666 return wrap_llm_without_think_tags( 

667 llm, 

668 research_id=research_id, 

669 provider=provider, 

670 research_context=research_context, 

671 settings_snapshot=settings_snapshot, 

672 ) 

673 

674 elif provider == "vllm": 674 ↛ 675line 674 didn't jump to line 675 because the condition on line 674 was never true

675 if not is_vllm_available(): 

676 logger.warning( 

677 "VLLM dependencies are not available. Falling back to default model." 

678 ) 

679 return get_fallback_model(temperature) 

680 

681 try: 

682 from langchain_community.llms import VLLM 

683 

684 llm = VLLM( 

685 model=model_name, 

686 trust_remote_code=True, 

687 max_new_tokens=128, 

688 top_k=10, 

689 top_p=0.95, 

690 temperature=temperature, 

691 ) 

692 return wrap_llm_without_think_tags( 

693 llm, 

694 research_id=research_id, 

695 provider=provider, 

696 research_context=research_context, 

697 ) 

698 except Exception: 

699 logger.exception("Error loading VLLM model") 

700 return get_fallback_model(temperature) 

701 

702 elif provider == "ollama": 

703 try: 

704 # Use the configurable Ollama base URL 

705 raw_base_url = get_setting_from_snapshot( 

706 "llm.ollama.url", 

707 "http://localhost:11434", 

708 settings_snapshot=settings_snapshot, 

709 ) 

710 base_url = ( 

711 normalize_url(raw_base_url) 

712 if raw_base_url 

713 else "http://localhost:11434" 

714 ) 

715 

716 # Check if Ollama is available before trying to use it 

717 if not is_ollama_available(settings_snapshot=settings_snapshot): 717 ↛ 724line 717 didn't jump to line 724 because the condition on line 717 was always true

718 logger.error( 

719 f"Ollama not available at {base_url}. Falling back to dummy model." 

720 ) 

721 return get_fallback_model(temperature) 

722 

723 # Check if the requested model exists 

724 try: 

725 logger.info( 

726 f"Checking if model '{model_name}' exists in Ollama" 

727 ) 

728 response = safe_get( 

729 f"{base_url}/api/tags", 

730 timeout=3.0, 

731 allow_localhost=True, 

732 allow_private_ips=True, 

733 ) 

734 if response.status_code == 200: 

735 # Handle both newer and older Ollama API formats 

736 data = response.json() 

737 models = [] 

738 if "models" in data: 

739 # Newer Ollama API 

740 models = data.get("models", []) 

741 else: 

742 # Older Ollama API format 

743 models = data 

744 

745 # Get list of model names 

746 model_names = [m.get("name", "").lower() for m in models] 

747 logger.info( 

748 f"Available Ollama models: {', '.join(model_names[:5])}{' and more' if len(model_names) > 5 else ''}" 

749 ) 

750 

751 if model_name.lower() not in model_names: 

752 logger.error( 

753 f"Model '{model_name}' not found in Ollama. Available models: {', '.join(model_names[:5])}" 

754 ) 

755 return get_fallback_model(temperature) 

756 except Exception: 

757 logger.exception( 

758 f"Error checking for model '{model_name}' in Ollama" 

759 ) 

760 # Continue anyway, let ChatOllama handle potential errors 

761 

762 logger.info( 

763 f"Creating ChatOllama with model={model_name}, base_url={base_url}" 

764 ) 

765 try: 

766 # Add num_ctx parameter for Ollama context window size 

767 ollama_params = {**common_params} 

768 if context_window_size is not None: 

769 ollama_params["num_ctx"] = context_window_size 

770 

771 # Thinking/reasoning handling for models like deepseek-r1: 

772 # The 'reasoning' parameter controls both: 

773 # 1. Whether the model performs thinking (makes it smarter when True) 

774 # 2. Whether thinking is separated from the answer (always separated when True) 

775 # 

776 # When reasoning=True: 

777 # - Model performs thinking/reasoning 

778 # - Thinking goes to additional_kwargs["reasoning_content"] (discarded by LDR) 

779 # - Only the final answer appears in response.content 

780 # 

781 # When reasoning=False: 

782 # - Model does NOT perform thinking (faster but less smart) 

783 # - Gives direct answers 

784 

785 enable_thinking = get_setting_from_snapshot( 

786 "llm.ollama.enable_thinking", 

787 True, # Default: enable thinking (smarter responses) 

788 settings_snapshot=settings_snapshot, 

789 ) 

790 

791 if enable_thinking is not None and isinstance( 

792 enable_thinking, bool 

793 ): 

794 ollama_params["reasoning"] = enable_thinking 

795 logger.debug( 

796 f"Ollama thinking enabled: {enable_thinking} " 

797 f"(thinking will be {'shown internally but discarded' if enable_thinking else 'disabled'})" 

798 ) 

799 

800 llm = ChatOllama( 

801 model=model_name, base_url=base_url, **ollama_params 

802 ) 

803 

804 # Log the actual client configuration after creation 

805 logger.debug( 

806 f"ChatOllama created - base_url attribute: {getattr(llm, 'base_url', 'not found')}" 

807 ) 

808 if hasattr(llm, "_client"): 

809 client = llm._client 

810 logger.debug(f"ChatOllama _client type: {type(client)}") 

811 if hasattr(client, "_client"): 

812 inner_client = client._client 

813 logger.debug( 

814 f"ChatOllama inner client type: {type(inner_client)}" 

815 ) 

816 if hasattr(inner_client, "base_url"): 

817 logger.debug( 

818 f"ChatOllama inner client base_url: {inner_client.base_url}" 

819 ) 

820 

821 return wrap_llm_without_think_tags( 

822 llm, 

823 research_id=research_id, 

824 provider=provider, 

825 research_context=research_context, 

826 settings_snapshot=settings_snapshot, 

827 ) 

828 except Exception: 

829 logger.exception("Error creating or testing ChatOllama") 

830 return get_fallback_model(temperature) 

831 except Exception: 

832 logger.exception("Error in Ollama provider section") 

833 return get_fallback_model(temperature) 

834 

835 elif provider == "lmstudio": 

836 # LM Studio supports OpenAI API format, so we can use ChatOpenAI directly 

837 lmstudio_url = get_setting_from_snapshot( 

838 "llm.lmstudio.url", 

839 "http://localhost:1234/v1", 

840 settings_snapshot=settings_snapshot, 

841 ) 

842 # Use URL as-is (default already includes /v1) 

843 base_url = normalize_url(lmstudio_url) 

844 

845 llm = ChatOpenAI( 

846 model=model_name, 

847 api_key="lm-studio", # LM Studio doesn't require a real API key # pragma: allowlist secret 

848 base_url=base_url, 

849 temperature=temperature, 

850 max_tokens=max_tokens, # Use calculated max_tokens based on context size 

851 ) 

852 return wrap_llm_without_think_tags( 

853 llm, 

854 research_id=research_id, 

855 provider=provider, 

856 research_context=research_context, 

857 settings_snapshot=settings_snapshot, 

858 ) 

859 

860 # Update the llamacpp section in get_llm function 

861 elif provider == "llamacpp": 861 ↛ 863line 861 didn't jump to line 863 because the condition on line 861 was never true

862 # Import LlamaCpp 

863 from langchain_community.llms import LlamaCpp 

864 

865 # Get LlamaCpp connection mode from settings 

866 connection_mode = get_setting_from_snapshot( 

867 "llm.llamacpp_connection_mode", 

868 "local", 

869 settings_snapshot=settings_snapshot, 

870 ) 

871 

872 if connection_mode == "http": 

873 # Use HTTP client mode 

874 from langchain_community.llms.llamacpp_client import LlamaCppClient 

875 

876 server_url = get_setting_from_snapshot( 

877 "llm.llamacpp_server_url", 

878 "http://localhost:8000", 

879 settings_snapshot=settings_snapshot, 

880 ) 

881 

882 llm = LlamaCppClient( 

883 server_url=server_url, 

884 temperature=temperature, 

885 max_tokens=get_setting_from_snapshot( 

886 "llm.max_tokens", 8192, settings_snapshot=settings_snapshot 

887 ), 

888 ) 

889 else: 

890 # Use direct model loading (existing code) 

891 # Get LlamaCpp model path from settings 

892 model_path = get_setting_from_snapshot( 

893 "llm.llamacpp_model_path", settings_snapshot=settings_snapshot 

894 ) 

895 if not model_path: 

896 logger.error("llamacpp_model_path not set in settings") 

897 raise ValueError("llamacpp_model_path not set in settings") 

898 

899 # Validate model path for security using centralized validator 

900 from ..security.path_validator import PathValidator 

901 

902 try: 

903 validated_path = PathValidator.validate_model_path(model_path) 

904 model_path = str(validated_path) 

905 except ValueError: 

906 logger.exception("Model path validation failed") 

907 raise 

908 

909 # Get additional LlamaCpp parameters 

910 n_gpu_layers = get_setting_from_snapshot( 

911 "llm.llamacpp_n_gpu_layers", 

912 1, 

913 settings_snapshot=settings_snapshot, 

914 ) 

915 n_batch = get_setting_from_snapshot( 

916 "llm.llamacpp_n_batch", 512, settings_snapshot=settings_snapshot 

917 ) 

918 f16_kv = get_setting_from_snapshot( 

919 "llm.llamacpp_f16_kv", True, settings_snapshot=settings_snapshot 

920 ) 

921 

922 # Create LlamaCpp instance 

923 llm = LlamaCpp( 

924 model_path=model_path, 

925 temperature=temperature, 

926 max_tokens=max_tokens, # Use calculated max_tokens 

927 n_gpu_layers=n_gpu_layers, 

928 n_batch=n_batch, 

929 f16_kv=f16_kv, 

930 n_ctx=context_window_size, # Set context window size directly (None = use default) 

931 verbose=True, 

932 ) 

933 

934 return wrap_llm_without_think_tags( 

935 llm, 

936 research_id=research_id, 

937 provider=provider, 

938 research_context=research_context, 

939 settings_snapshot=settings_snapshot, 

940 ) 

941 

942 else: 

943 return wrap_llm_without_think_tags( 

944 get_fallback_model(temperature), 

945 research_id=research_id, 

946 provider=provider, 

947 research_context=research_context, 

948 settings_snapshot=settings_snapshot, 

949 ) 

950 

951 

952def get_fallback_model(temperature=None): 

953 """Create a dummy model for when no providers are available""" 

954 return FakeListChatModel( 

955 responses=[ 

956 "No language models are available. Please install Ollama or set up API keys." 

957 ] 

958 ) 

959 

960 

961def wrap_llm_without_think_tags( 

962 llm, 

963 research_id=None, 

964 provider=None, 

965 research_context=None, 

966 settings_snapshot=None, 

967): 

968 """Create a wrapper class that processes LLM outputs with remove_think_tags and token counting""" 

969 

970 # First apply rate limiting if enabled 

971 from ..web_search_engines.rate_limiting.llm import ( 

972 create_rate_limited_llm_wrapper, 

973 ) 

974 

975 # Check if LLM rate limiting is enabled (independent of search rate limiting) 

976 # Use the thread-safe get_db_setting defined in this module 

977 if get_setting_from_snapshot( 

978 "rate_limiting.llm_enabled", False, settings_snapshot=settings_snapshot 

979 ): 

980 llm = create_rate_limited_llm_wrapper(llm, provider) 

981 

982 # Import token counting functionality if research_id is provided 

983 callbacks = [] 

984 if research_id is not None: 984 ↛ 985line 984 didn't jump to line 985 because the condition on line 984 was never true

985 from ..metrics import TokenCounter 

986 

987 token_counter = TokenCounter() 

988 token_callback = token_counter.create_callback( 

989 research_id, research_context 

990 ) 

991 # Set provider and model info on the callback 

992 if provider: 

993 token_callback.preset_provider = provider 

994 # Try to extract model name from the LLM instance 

995 if hasattr(llm, "model_name"): 

996 token_callback.preset_model = llm.model_name 

997 elif hasattr(llm, "model"): 

998 token_callback.preset_model = llm.model 

999 callbacks.append(token_callback) 

1000 

1001 # Add callbacks to the LLM if it supports them 

1002 if callbacks and hasattr(llm, "callbacks"): 1002 ↛ 1003line 1002 didn't jump to line 1003 because the condition on line 1002 was never true

1003 if llm.callbacks is None: 

1004 llm.callbacks = callbacks 

1005 else: 

1006 llm.callbacks.extend(callbacks) 

1007 

1008 class ProcessingLLMWrapper: 

1009 def __init__(self, base_llm): 

1010 self.base_llm = base_llm 

1011 

1012 def invoke(self, *args, **kwargs): 

1013 # Removed verbose debug logging to reduce log clutter 

1014 # Uncomment the lines below if you need to debug LLM requests 

1015 # if hasattr(self.base_llm, "base_url"): 

1016 # logger.debug(f"LLM Request - Base URL: {self.base_llm.base_url}") 

1017 # logger.debug(f"LLM Request - Model: {getattr(self.base_llm, 'model', 'unknown')}") 

1018 

1019 try: 

1020 response = self.base_llm.invoke(*args, **kwargs) 

1021 # logger.debug(f"LLM Response - Success, type: {type(response)}") 

1022 except Exception as e: 

1023 logger.exception("LLM Request - Failed with error") 

1024 # Log any URL information from the error 

1025 error_str = str(e) 

1026 if "http://" in error_str or "https://" in error_str: 1026 ↛ 1027line 1026 didn't jump to line 1027 because the condition on line 1026 was never true

1027 logger.exception( 

1028 f"LLM Request - Error contains URL info: {error_str}" 

1029 ) 

1030 raise 

1031 

1032 # Process the response content if it has a content attribute 

1033 if hasattr(response, "content"): 

1034 response.content = remove_think_tags(response.content) 

1035 elif isinstance(response, str): 1035 ↛ 1038line 1035 didn't jump to line 1038 because the condition on line 1035 was always true

1036 response = remove_think_tags(response) 

1037 

1038 return response 

1039 

1040 # Pass through any other attributes to the base LLM 

1041 def __getattr__(self, name): 

1042 return getattr(self.base_llm, name) 

1043 

1044 return ProcessingLLMWrapper(llm)