From a6c3ec2299fe6dc766bf661ab8171781c8167245 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 30 Apr 2025 15:24:07 -0700 Subject: [PATCH] llama.cpp: Explicitly send cache_prompt = True --- modules/llama_cpp_server.py | 1 + 1 file changed, 1 insertion(+) diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 1f2db670..71d498b1 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -135,6 +135,7 @@ class LlamaServer: "prompt": token_ids, "n_predict": max_new_tokens, "stream": True, + "cache_prompt": True }) if shared.args.verbose: