mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-05 14:45:28 +00:00
llama.cpp: Use -1 instead of 0 for auto gpu_layers
This commit is contained in:
parent
d45aa6606a
commit
fbfcd59fe0
4 changed files with 8 additions and 7 deletions
|
|
@ -347,7 +347,7 @@ class LlamaServer:
|
|||
"--flash-attn", "on",
|
||||
]
|
||||
|
||||
if shared.args.gpu_layers > 0:
|
||||
if shared.args.gpu_layers >= 0:
|
||||
cmd += ["--gpu-layers", str(shared.args.gpu_layers), "--fit", "off"]
|
||||
else:
|
||||
cmd += ["--fit", "on"]
|
||||
|
|
@ -448,7 +448,8 @@ class LlamaServer:
|
|||
print(' '.join(str(item) for item in cmd[1:]))
|
||||
print()
|
||||
|
||||
logger.info(f"Using gpu_layers={shared.args.gpu_layers} | ctx_size={shared.args.ctx_size} | cache_type={cache_type}")
|
||||
gpu_layers_str = "auto" if shared.args.gpu_layers < 0 else str(shared.args.gpu_layers)
|
||||
logger.info(f"Using gpu_layers={gpu_layers_str} | ctx_size={shared.args.ctx_size} | cache_type={cache_type}")
|
||||
# Start the server with pipes for output
|
||||
self.process = subprocess.Popen(
|
||||
cmd,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue