Fix row_split not working with ik_llama.cpp (closes #7489)

This commit is contained in:
oobabooga 2026-04-14 23:18:36 -03:00
parent a8a0f8dc82
commit 9e323425d8

View file

@ -675,6 +675,7 @@ def _patch_cmd_for_ik(cmd):
--fit-target --fit-margin
--cache-reuse (removed, unsupported)
--swa-full (removed, unsupported)
--split-mode row --split-mode graph
"""
# Add Hadamard KV cache rotation when using quantized cache types.
# This significantly improves quantized cache quality (especially q4_0)
@ -702,6 +703,9 @@ def _patch_cmd_for_ik(cmd):
patched.append("--fit-margin")
elif arg == "--cache-reuse":
i += 1 # skip the value
elif arg == "--split-mode" and i + 1 < len(cmd) and cmd[i + 1] == "row":
patched += ["--split-mode", "graph"]
i += 1 # skip the value
elif arg == "--swa-full":
pass # bare flag, just drop it
else: