Add slider for --ubatch-size for llama.cpp loader, change defaults for better MoE performance (#7316)

This commit is contained in:
GodEmperor785 2025-11-21 20:56:02 +01:00 committed by GitHub
parent 8f0048663d
commit 400bb0694b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 6 additions and 1 deletions

View file

@ -317,6 +317,7 @@ class LlamaServer:
"--ctx-size", str(shared.args.ctx_size),
"--gpu-layers", str(shared.args.gpu_layers),
"--batch-size", str(shared.args.batch_size),
"--ubatch-size", str(shared.args.ubatch_size),
"--port", str(self.port),
"--no-webui",
"--flash-attn", "on",