llama.cpp: Add StreamingLLM (--streaming-llm)

This commit is contained in:
oobabooga 2025-04-25 16:21:35 -07:00
parent d35818f4e1
commit 877cf44c08
3 changed files with 4 additions and 0 deletions

View file

@ -301,6 +301,8 @@ class LlamaServer:
cmd += ["--device-draft", shared.args.device_draft]
if shared.args.ctx_size_draft > 0:
cmd += ["--ctx-size-draft", str(shared.args.ctx_size_draft)]
if shared.args.streaming_llm:
cmd += ["--cache-reuse", "1"]
if shared.args.extra_flags:
# Clean up the input
extra_flags = shared.args.extra_flags.strip()