diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 6a094c9d..38589cf2 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -316,6 +316,7 @@ class LlamaServer: "--batch-size", str(shared.args.batch_size), "--port", str(self.port), "--no-webui", + "--flash-attn", "on", ] if shared.args.threads > 0: