mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-01-26 10:24:25 +01:00
check .attention.head_count if .attention.head_count_kv doesn't exist (#7048)
This commit is contained in:
parent
d085dc6a93
commit
1443612e72
|
|
@ -329,6 +329,7 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
|
|||
# Extract values from metadata
|
||||
n_layers = None
|
||||
n_kv_heads = None
|
||||
n_attention_heads = None # Fallback for models without separate KV heads
|
||||
embedding_dim = None
|
||||
|
||||
for key, value in metadata.items():
|
||||
|
|
@ -336,9 +337,14 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
|
|||
n_layers = value
|
||||
elif key.endswith('.attention.head_count_kv'):
|
||||
n_kv_heads = max(value) if isinstance(value, list) else value
|
||||
elif key.endswith('.attention.head_count'):
|
||||
n_attention_heads = max(value) if isinstance(value, list) else value
|
||||
elif key.endswith('.embedding_length'):
|
||||
embedding_dim = value
|
||||
|
||||
if n_kv_heads is None:
|
||||
n_kv_heads = n_attention_heads
|
||||
|
||||
if gpu_layers > n_layers:
|
||||
gpu_layers = n_layers
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue