Add a --cpu-moe model for llama.cpp

2026-04-20 22:13:43 +00:00 · 2025-11-19 05:23:43 -08:00 · 2025-11-19 05:23:43 -08:00 · 0d4eff284c
commit 0d4eff284c
parent d6f39e1fef
5 changed files with 7 additions and 1 deletions
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@ -326,6 +326,8 @@ class LlamaServer:
            cmd += ["--threads", str(shared.args.threads)]
        if shared.args.threads_batch > 0:
            cmd += ["--threads-batch", str(shared.args.threads_batch)]
+        if shared.args.cpu_moe:
+            cmd.append("--cpu-moe")
        if shared.args.no_mmap:
            cmd.append("--no-mmap")
        if shared.args.mlock: