From 1443612e72619e063d57226d05df3c159ec6d0ca Mon Sep 17 00:00:00 2001 From: Miriam Date: Mon, 9 Jun 2025 19:22:01 -0700 Subject: [PATCH] check .attention.head_count if .attention.head_count_kv doesn't exist (#7048) --- modules/models_settings.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/modules/models_settings.py b/modules/models_settings.py index c914bdea..283a9744 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -329,6 +329,7 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type): # Extract values from metadata n_layers = None n_kv_heads = None + n_attention_heads = None # Fallback for models without separate KV heads embedding_dim = None for key, value in metadata.items(): @@ -336,9 +337,14 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type): n_layers = value elif key.endswith('.attention.head_count_kv'): n_kv_heads = max(value) if isinstance(value, list) else value + elif key.endswith('.attention.head_count'): + n_attention_heads = max(value) if isinstance(value, list) else value elif key.endswith('.embedding_length'): embedding_dim = value + if n_kv_heads is None: + n_kv_heads = n_attention_heads + if gpu_layers > n_layers: gpu_layers = n_layers