ik_llama.cpp: Auto-enable Hadamard KV cache rotation with quantized cache

This commit is contained in:
oobabooga 2026-03-29 15:52:36 -07:00
parent be6fc0663a
commit 0466b6e271

View file

@ -631,6 +631,12 @@ def _patch_cmd_for_ik(cmd):
--cache-reuse (removed, unsupported)
--swa-full (removed, unsupported)
"""
# Add Hadamard KV cache rotation when using quantized cache types.
# This significantly improves quantized cache quality (especially q4_0)
# and is a no-op for MLA models like DeepSeek.
if shared.args.cache_type in ("q8_0", "q4_0"):
cmd += ["-khad", "-vhad"]
patched = []
i = 0
while i < len(cmd):