check .attention.head_count if .attention.head_count_kv doesn't exist (#7048)

2026-03-15 10:03:51 +01:00 · 2025-06-09 19:22:01 -07:00 · 2025-06-09 19:22:01 -07:00 · 1443612e72
parent d085dc6a93
commit 1443612e72
1 changed files with 6 additions and 0 deletions
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@ -329,6 +329,7 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
    # Extract values from metadata
    n_layers = None
    n_kv_heads = None
+    n_attention_heads = None  # Fallback for models without separate KV heads
    embedding_dim = None

    for key, value in metadata.items():
@ -336,9 +337,14 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
            n_layers = value
        elif key.endswith('.attention.head_count_kv'):
            n_kv_heads = max(value) if isinstance(value, list) else value
+        elif key.endswith('.attention.head_count'):
+            n_attention_heads = max(value) if isinstance(value, list) else value
        elif key.endswith('.embedding_length'):
            embedding_dim = value

+    if n_kv_heads is None:
+        n_kv_heads = n_attention_heads
+
    if gpu_layers > n_layers:
        gpu_layers = n_layers