Coverage for src/local_deep_research/llm/providers/implementations/llamacpp.py: 100%

35 statements  

« prev     ^ index     » next       coverage.py v7.14.1, created at 2026-06-03 23:15 +0000

1"""llama.cpp LLM provider for Local Deep Research. 

2 

3Talks to llama.cpp's OpenAI-compatible HTTP server (`llama-server`) instead 

4of loading models in-process via `llama-cpp-python`. Modeled after 

5`LMStudioProvider`. For setups that need API key auth or non-default URLs 

6beyond a single endpoint, use the `openai_endpoint` provider directly. 

7""" 

8 

9from ....config.constants import DEFAULT_LLAMACPP_URL 

10from ....utilities.url_utils import normalize_url 

11from ..openai_base import OpenAICompatibleProvider 

12 

13 

14class LlamaCppProvider(OpenAICompatibleProvider): 

15 """llama.cpp provider using its OpenAI-compatible HTTP endpoint. 

16 

17 Run `llama-server -m <model.gguf>` (port 8080 by default) and point 

18 `llm.llamacpp.url` at the server's `/v1` endpoint. 

19 """ 

20 

21 provider_name = "llama.cpp" 

22 # api_key_setting=None tells the parent class no key is *required*; the 

23 # create_llm override below still reads `llm.llamacpp.api_key` for the 

24 # optional auth-enabled case and falls back to a placeholder otherwise. 

25 api_key_setting = None # type: ignore[assignment] 

26 url_setting = "llm.llamacpp.url" # type: ignore[assignment] 

27 default_base_url = DEFAULT_LLAMACPP_URL 

28 default_model = "" # User must specify the model loaded by llama-server 

29 

30 # Metadata for auto-discovery 

31 provider_key = "LLAMACPP" 

32 company_name = "llama.cpp" 

33 is_cloud = False # Local provider 

34 

35 @classmethod 

36 def create_llm(cls, model_name=None, temperature=0.7, **kwargs): 

37 """Create a ChatOpenAI client pointed at llama-server.""" 

38 from ....config.thread_settings import get_setting_from_snapshot 

39 

40 settings_snapshot = kwargs.get("settings_snapshot") 

41 

42 url = get_setting_from_snapshot( 

43 "llm.llamacpp.url", 

44 cls.default_base_url, 

45 settings_snapshot=settings_snapshot, 

46 ) 

47 api_key = get_setting_from_snapshot( 

48 "llm.llamacpp.api_key", 

49 "", 

50 settings_snapshot=settings_snapshot, 

51 ) 

52 

53 kwargs["base_url"] = normalize_url(url) 

54 # If the user configured an API key (e.g. llama-server behind an 

55 # auth proxy), use it. Otherwise pass a placeholder so ChatOpenAI 

56 # doesn't reject the request — a no-auth llama-server ignores it. 

57 kwargs["api_key"] = api_key or "lm-studio" # gitleaks:allow 

58 

59 return super()._create_llm_instance(model_name, temperature, **kwargs) 

60 

61 @classmethod 

62 def is_available(cls, settings_snapshot=None): 

63 """Check whether llama-server is reachable.""" 

64 try: 

65 from ....config.thread_settings import get_setting_from_snapshot 

66 from ....security import safe_get 

67 

68 url = get_setting_from_snapshot( 

69 "llm.llamacpp.url", 

70 cls.default_base_url, 

71 settings_snapshot=settings_snapshot, 

72 ) 

73 base_url = normalize_url(url) 

74 response = safe_get( 

75 f"{base_url}/models", 

76 timeout=1, 

77 allow_localhost=True, 

78 allow_private_ips=True, 

79 ) 

80 return response.status_code == 200 

81 except Exception: 

82 return False 

83 

84 @classmethod 

85 def requires_auth_for_models(cls): 

86 """llama-server doesn't require authentication for listing models.""" 

87 return False