llama.cpp: Always pass --flash-attn on

2026-02-03 22:35:14 +01:00 · 2025-09-02 12:12:17 -07:00 · 2025-09-02 12:12:17 -07:00 · f3829b268a
parent 2395c647d4
commit f3829b268a
1 changed files with 1 additions and 0 deletions
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@ -316,6 +316,7 @@ class LlamaServer:
            "--batch-size", str(shared.args.batch_size),
            "--port", str(self.port),
            "--no-webui",
+            "--flash-attn", "on",
        ]

        if shared.args.threads > 0: