Allow more granular KV cache settings (#6561)

2026-04-20 22:13:43 +00:00 · 2024-12-17 15:43:48 -05:00 · 2024-12-17 15:43:48 -05:00 · addad3c63e
commit addad3c63e
parent c43ee5db11
8 changed files with 140 additions and 37 deletions
--- a/modules/loaders.py
+++ b/modules/loaders.py
@ -31,8 +31,7 @@ loaders_and_params = OrderedDict({
    'llama.cpp': [
        'n_ctx',
        'n_gpu_layers',
-        'cache_8bit',
-        'cache_4bit',
+        'cache_type',
        'tensor_split',
        'n_batch',
        'threads',
@ -54,8 +53,7 @@ loaders_and_params = OrderedDict({
    'llamacpp_HF': [
        'n_ctx',
        'n_gpu_layers',
-        'cache_8bit',
-        'cache_4bit',
+        'cache_type',
        'tensor_split',
        'n_batch',
        'threads',
@ -87,8 +85,7 @@ loaders_and_params = OrderedDict({
        'no_xformers',
        'no_sdpa',
        'num_experts_per_token',
-        'cache_8bit',
-        'cache_4bit',
+        'cache_type',
        'autosplit',
        'enable_tp',
        'alpha_value',
@ -103,8 +100,7 @@ loaders_and_params = OrderedDict({
        'no_xformers',
        'no_sdpa',
        'num_experts_per_token',
-        'cache_8bit',
-        'cache_4bit',
+        'cache_type',
        'autosplit',
        'enable_tp',
        'alpha_value',