mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-05 14:45:28 +00:00
Delegate GPU layer allocation to llama.cpp's --fit
This commit is contained in:
parent
8a3d866401
commit
f4d787ab8d
5 changed files with 26 additions and 145 deletions
|
|
@ -329,7 +329,6 @@ class LlamaServer:
|
|||
self.server_path,
|
||||
"--model", self.model_path,
|
||||
"--ctx-size", str(shared.args.ctx_size),
|
||||
"--gpu-layers", str(shared.args.gpu_layers),
|
||||
"--batch-size", str(shared.args.batch_size),
|
||||
"--ubatch-size", str(shared.args.ubatch_size),
|
||||
"--port", str(self.port),
|
||||
|
|
@ -337,6 +336,11 @@ class LlamaServer:
|
|||
"--flash-attn", "on",
|
||||
]
|
||||
|
||||
if shared.args.gpu_layers > 0:
|
||||
cmd += ["--gpu-layers", str(shared.args.gpu_layers), "--fit", "off"]
|
||||
else:
|
||||
cmd += ["--fit", "on"]
|
||||
|
||||
if shared.args.threads > 0:
|
||||
cmd += ["--threads", str(shared.args.threads)]
|
||||
if shared.args.threads_batch > 0:
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue