From 0e3def449a8bf71ab40c052e4206f612aeba0a60 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 11 Aug 2025 15:17:25 -0700 Subject: [PATCH] llama.cpp: --swa-full to llama-server when streaming-llm is checked --- modules/llama_cpp_server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 63c8eda0..58534f26 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -356,6 +356,7 @@ class LlamaServer: cmd += ["--ctx-size-draft", str(shared.args.ctx_size_draft)] if shared.args.streaming_llm: cmd += ["--cache-reuse", "1"] + cmd += ["--swa-full"] if shared.args.extra_flags: # Clean up the input extra_flags = shared.args.extra_flags.strip()