llama.cpp: Explicitly send cache_prompt = True

2026-02-16 04:35:32 +01:00 · 2025-04-30 15:24:07 -07:00 · 2025-04-30 15:24:07 -07:00 · a6c3ec2299
parent 195a45c6e1
commit a6c3ec2299
1 changed files with 1 additions and 0 deletions
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@ -135,6 +135,7 @@ class LlamaServer:
            "prompt": token_ids,
            "n_predict": max_new_tokens,
            "stream": True,
+            "cache_prompt": True
        })

        if shared.args.verbose: