Add a --cpu-moe model for llama.cpp

This commit is contained in:
oobabooga 2025-11-19 05:23:43 -08:00
parent d6f39e1fef
commit 0d4eff284c
5 changed files with 7 additions and 1 deletions

View file

@ -326,6 +326,8 @@ class LlamaServer:
cmd += ["--threads", str(shared.args.threads)]
if shared.args.threads_batch > 0:
cmd += ["--threads-batch", str(shared.args.threads_batch)]
if shared.args.cpu_moe:
cmd.append("--cpu-moe")
if shared.args.no_mmap:
cmd.append("--no-mmap")
if shared.args.mlock: