diff --git a/modules/models_settings.py b/modules/models_settings.py index 3fdf3c84..6715d494 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -72,6 +72,7 @@ def get_model_metadata(model): model_settings['compress_pos_emb'] = metadata[k] elif k.endswith('block_count'): model_settings['gpu_layers'] = metadata[k] + 1 + model_settings['max_gpu_layers'] = metadata[k] + 1 if 'tokenizer.chat_template' in metadata: template = metadata['tokenizer.chat_template'] @@ -450,17 +451,28 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type, max_layers = gpu_layers if auto_adjust: - # Get max layers from model metadata + # Get model settings including user preferences model_settings = get_model_metadata(model) - max_layers = model_settings.get('gpu_layers', gpu_layers) - # Auto-adjust based on available VRAM - available_vram = get_nvidia_free_vram() - if available_vram > 0: - tolerance = 906 - current_layers = max_layers - while current_layers > 0 and estimate_vram(model, current_layers, ctx_size, cache_type) > available_vram - tolerance: - current_layers -= 1 + # Check if the value is from user config-user.yaml + user_config = shared.user_config + model_regex = Path(model).name + '$' + has_user_setting = model_regex in user_config and 'gpu_layers' in user_config[model_regex] + + if has_user_setting: + # Just return the current user value without adjustment + max_layers = model_settings.get('max_gpu_layers', 256) + else: + # No user setting, use model's max and auto-adjust + max_layers = model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', gpu_layers)) + current_layers = max_layers # Start from max + + # Auto-adjust based on available VRAM + available_vram = get_nvidia_free_vram() + if available_vram > 0: + tolerance = 906 + while current_layers > 0 and estimate_vram(model, current_layers, ctx_size, cache_type) > available_vram - tolerance: + current_layers -= 1 # Calculate VRAM with current layers vram_usage = estimate_vram(model, current_layers, ctx_size, cache_type) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 59bb6759..5b7dfdd8 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -318,7 +318,7 @@ def get_initial_vram_info(): def get_initial_gpu_layers_max(): if shared.model_name != 'None' and shared.args.loader == 'llama.cpp': model_settings = get_model_metadata(shared.model_name) - return model_settings.get('gpu_layers', 256) + return model_settings.get('max_gpu_layers', model_settings.get('gpu_layers', 256)) return 256