llama.cpp: Remove the --flash-attn flag (it's always on now)

2026-04-06 07:03:37 +00:00 · 2025-08-30 20:27:32 -07:00 · 2025-08-30 20:27:32 -07:00 · 13876a1ee8
commit 13876a1ee8
parent 7b80e9a2ad
5 changed files with 0 additions and 9 deletions
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@ -318,8 +318,6 @@ class LlamaServer:
            "--no-webui",
        ]

-        if shared.args.flash_attn:
-            cmd.append("--flash-attn")
        if shared.args.threads > 0:
            cmd += ["--threads", str(shared.args.threads)]
        if shared.args.threads_batch > 0: