mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-05 14:45:28 +00:00
llama.cpp: allow ctx_size=0 for auto context via --fit
This commit is contained in:
parent
fbfcd59fe0
commit
69fa4dd0b1
6 changed files with 13 additions and 9 deletions
|
|
@ -339,7 +339,6 @@ class LlamaServer:
|
|||
cmd = [
|
||||
self.server_path,
|
||||
"--model", self.model_path,
|
||||
"--ctx-size", str(shared.args.ctx_size),
|
||||
"--batch-size", str(shared.args.batch_size),
|
||||
"--ubatch-size", str(shared.args.ubatch_size),
|
||||
"--port", str(self.port),
|
||||
|
|
@ -347,6 +346,9 @@ class LlamaServer:
|
|||
"--flash-attn", "on",
|
||||
]
|
||||
|
||||
if shared.args.ctx_size > 0:
|
||||
cmd += ["--ctx-size", str(shared.args.ctx_size)]
|
||||
|
||||
if shared.args.gpu_layers >= 0:
|
||||
cmd += ["--gpu-layers", str(shared.args.gpu_layers), "--fit", "off"]
|
||||
else:
|
||||
|
|
@ -449,7 +451,8 @@ class LlamaServer:
|
|||
print()
|
||||
|
||||
gpu_layers_str = "auto" if shared.args.gpu_layers < 0 else str(shared.args.gpu_layers)
|
||||
logger.info(f"Using gpu_layers={gpu_layers_str} | ctx_size={shared.args.ctx_size} | cache_type={cache_type}")
|
||||
ctx_size_str = "auto" if shared.args.ctx_size == 0 else str(shared.args.ctx_size)
|
||||
logger.info(f"Using gpu_layers={gpu_layers_str} | ctx_size={ctx_size_str} | cache_type={cache_type}")
|
||||
# Start the server with pipes for output
|
||||
self.process = subprocess.Popen(
|
||||
cmd,
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue