From f3829b268a870c8113dc4146a13e5d9e07fd1aea Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 2 Sep 2025 12:12:17 -0700 Subject: [PATCH] llama.cpp: Always pass --flash-attn on --- modules/llama_cpp_server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 6a094c9d..38589cf2 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -316,6 +316,7 @@ class LlamaServer: "--batch-size", str(shared.args.batch_size), "--port", str(self.port), "--no-webui", + "--flash-attn", "on", ] if shared.args.threads > 0: