diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 05c07748..c3a8d105 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -378,6 +378,7 @@ class LlamaServer: cmd += ["--gpu-layers", str(shared.args.gpu_layers), "--fit", "off"] else: cmd += ["--fit", "on"] + cmd += ["--fit-ctx", "8192"] if shared.args.fit_target: cmd += ["--fit-target", shared.args.fit_target]