llama.cpp: Always pass --flash-attn on

This commit is contained in:
oobabooga 2025-09-02 12:12:17 -07:00
parent 2395c647d4
commit f3829b268a

View file

@ -316,6 +316,7 @@ class LlamaServer:
"--batch-size", str(shared.args.batch_size),
"--port", str(self.port),
"--no-webui",
"--flash-attn", "on",
]
if shared.args.threads > 0: