llama.cpp: --swa-full to llama-server when streaming-llm is checked

This commit is contained in:
oobabooga 2025-08-11 15:17:25 -07:00
parent 0e88a621fd
commit 0e3def449a

View file

@ -356,6 +356,7 @@ class LlamaServer:
cmd += ["--ctx-size-draft", str(shared.args.ctx_size_draft)]
if shared.args.streaming_llm:
cmd += ["--cache-reuse", "1"]
cmd += ["--swa-full"]
if shared.args.extra_flags:
# Clean up the input
extra_flags = shared.args.extra_flags.strip()