mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-06 15:13:38 +00:00
ik_llama.cpp: Auto-enable Hadamard KV cache rotation with quantized cache
This commit is contained in:
parent
be6fc0663a
commit
0466b6e271
1 changed files with 6 additions and 0 deletions
|
|
@ -631,6 +631,12 @@ def _patch_cmd_for_ik(cmd):
|
|||
--cache-reuse → (removed, unsupported)
|
||||
--swa-full → (removed, unsupported)
|
||||
"""
|
||||
# Add Hadamard KV cache rotation when using quantized cache types.
|
||||
# This significantly improves quantized cache quality (especially q4_0)
|
||||
# and is a no-op for MLA models like DeepSeek.
|
||||
if shared.args.cache_type in ("q8_0", "q4_0"):
|
||||
cmd += ["-khad", "-vhad"]
|
||||
|
||||
patched = []
|
||||
i = 0
|
||||
while i < len(cmd):
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue