Estimate the VRAM for GGUF models + autoset gpu-layers (#6980)

This commit is contained in:
oobabooga 2025-05-16 00:07:37 -03:00 committed by GitHub
parent c4a715fd1e
commit 5534d01da0
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 193 additions and 4 deletions

View file

@ -71,7 +71,6 @@ def llama_cpp_server_loader(model_name):
else:
model_file = sorted(Path(f'{shared.args.model_dir}/{model_name}').glob('*.gguf'))[0]
logger.info(f"llama.cpp weights detected: \"{model_file}\"")
try:
model = LlamaServer(model_file)
return model, model