From 1443612e72619e063d57226d05df3c159ec6d0ca Mon Sep 17 00:00:00 2001
From: Miriam <yimeng630@gmail.com>
Date: Mon, 9 Jun 2025 19:22:01 -0700
Subject: [PATCH] check .attention.head_count if .attention.head_count_kv
 doesn't exist (#7048)

---
 modules/models_settings.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modules/models_settings.py b/modules/models_settings.py
index c914bdea..283a9744 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -329,6 +329,7 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
     # Extract values from metadata
     n_layers = None
     n_kv_heads = None
+    n_attention_heads = None  # Fallback for models without separate KV heads
     embedding_dim = None
 
     for key, value in metadata.items():
@@ -336,9 +337,14 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
             n_layers = value
         elif key.endswith('.attention.head_count_kv'):
             n_kv_heads = max(value) if isinstance(value, list) else value
+        elif key.endswith('.attention.head_count'):
+            n_attention_heads = max(value) if isinstance(value, list) else value
         elif key.endswith('.embedding_length'):
             embedding_dim = value
 
+    if n_kv_heads is None:
+        n_kv_heads = n_attention_heads
+
     if gpu_layers > n_layers:
         gpu_layers = n_layers