llama.cpp: --swa-full to llama-server when streaming-llm is checked

2026-03-07 14:13:49 +01:00 · 2025-08-11 15:17:25 -07:00 · 2025-08-11 15:17:25 -07:00 · 0e3def449a
parent 0e88a621fd
commit 0e3def449a
1 changed files with 1 additions and 0 deletions
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@ -356,6 +356,7 @@ class LlamaServer:
                cmd += ["--ctx-size-draft", str(shared.args.ctx_size_draft)]
        if shared.args.streaming_llm:
            cmd += ["--cache-reuse", "1"]
+            cmd += ["--swa-full"]
        if shared.args.extra_flags:
            # Clean up the input
            extra_flags = shared.args.extra_flags.strip()