diff --git a/modules/models_settings.py b/modules/models_settings.py index 15ff2830..f0767b1d 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -26,6 +26,23 @@ def get_fallback_settings(): } +def get_loader_defaults(): + ''' + Returns default values for all loader parameters from shared.args_defaults. + Used to reset UI when switching to a model without saved settings. + ''' + defaults = {} + for param in loaders.get_all_params(): + if hasattr(shared.args_defaults, param): + value = getattr(shared.args_defaults, param) + if value is None and param in ('tensor_split', 'gpu_split', 'extra_flags', 'device_draft'): + value = '' + elif value is None and param in ('model_draft', 'mmproj'): + value = 'None' + defaults[param] = value + return defaults + + def get_model_metadata(model): model_path = resolve_model_path(model) model_settings = {} @@ -253,6 +270,12 @@ def apply_model_settings_to_state(model, state): ''' UI: update the state variable with the model settings ''' + # Reset loader parameters to defaults before applying model-specific settings + loader_defaults = get_loader_defaults() + for k, v in loader_defaults.items(): + if k in state: + state[k] = v + model_settings = get_model_metadata(model) if 'loader' in model_settings: loader = model_settings.pop('loader') @@ -472,7 +495,7 @@ def get_nvidia_vram(return_free=True): return -1 -def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type, auto_adjust=False, for_ui=True): +def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type, lock_settings=False, auto_adjust=False, for_ui=True): """ Unified function to handle GPU layers and VRAM updates. @@ -483,12 +506,16 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type, - If for_ui=True: (vram_info_update, gpu_layers_update) or just vram_info_update - If for_ui=False: (vram_usage, adjusted_layers) or just vram_usage """ + should_adjust = auto_adjust + if lock_settings and should_adjust: + should_adjust = False + if loader != 'llama.cpp' or model in ["None", None] or not model.endswith(".gguf"): vram_info = "
Estimated VRAM to load the model:
" if for_ui: return (vram_info, gr.update()) if auto_adjust else vram_info else: - return (0, gpu_layers) if auto_adjust else 0 + return (0, gpu_layers) if should_adjust else 0 # Get model settings including user preferences model_settings = get_model_metadata(model) @@ -496,7 +523,7 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type, current_layers = gpu_layers max_layers = model_settings.get('max_gpu_layers', 256) - if auto_adjust: + if should_adjust: # Check if this is a user-saved setting user_config = shared.user_config model_regex = Path(model).name + '$' @@ -521,7 +548,10 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type, if for_ui: vram_info = f"
Estimated VRAM to load the model: {vram_usage:.0f} MiB
" if auto_adjust: - return vram_info, gr.update(value=current_layers, maximum=max_layers) + if should_adjust: + return vram_info, gr.update(value=current_layers, maximum=max_layers) + else: + return vram_info, gr.update() else: return vram_info else: diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 86adc229..e3889d86 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -41,6 +41,7 @@ def create_ui(): gr.Markdown("## Main options") with gr.Row(): with gr.Column(): + shared.gradio['lock_model_settings'] = gr.Checkbox(label="Lock settings", value=False, info='Prevent automatic changes to loader settings') shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.') shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') @@ -146,7 +147,7 @@ def create_event_handlers(): # with the model defaults (if any), and then the model is loaded shared.gradio['model_menu'].change( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - handle_load_model_event_initial, gradio('model_menu', 'interface_state'), gradio(ui.list_interface_input_elements()) + gradio('interface_state') + gradio('vram_info'), show_progress=False).then( + handle_load_model_event_initial, gradio('model_menu', 'interface_state', 'lock_model_settings'), gradio(ui.list_interface_input_elements()) + gradio('interface_state') + gradio('vram_info'), show_progress=False).then( partial(load_model_wrapper, autoload=False), gradio('model_menu', 'loader'), gradio('model_status'), show_progress=True).success( handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False) @@ -157,7 +158,7 @@ def create_event_handlers(): handle_load_model_event_final, gradio('truncation_length', 'loader', 'interface_state'), gradio('truncation_length', 'filter_by_loader'), show_progress=False) shared.gradio['unload_model'].click(handle_unload_model_click, None, gradio('model_status'), show_progress=False).then( - partial(update_gpu_layers_and_vram, auto_adjust=True), gradio('loader', 'model_menu', 'gpu_layers', 'ctx_size', 'cache_type'), gradio('vram_info', 'gpu_layers'), show_progress=False) + partial(update_gpu_layers_and_vram, auto_adjust=True), gradio('loader', 'model_menu', 'gpu_layers', 'ctx_size', 'cache_type', 'lock_model_settings'), gradio('vram_info', 'gpu_layers'), show_progress=False) shared.gradio['save_model_settings'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( @@ -167,7 +168,7 @@ def create_event_handlers(): for param in ['ctx_size', 'cache_type']: shared.gradio[param].change( partial(update_gpu_layers_and_vram, auto_adjust=True), - gradio('loader', 'model_menu', 'gpu_layers', 'ctx_size', 'cache_type'), + gradio('loader', 'model_menu', 'gpu_layers', 'ctx_size', 'cache_type', 'lock_model_settings'), gradio('vram_info', 'gpu_layers'), show_progress=False) # For manual gpu_layers changes - only update VRAM @@ -176,6 +177,12 @@ def create_event_handlers(): gradio('loader', 'model_menu', 'gpu_layers', 'ctx_size', 'cache_type'), gradio('vram_info'), show_progress=False) + # When lock is unchecked, apply settings for current model + shared.gradio['lock_model_settings'].change( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + handle_unlock_settings, gradio('model_menu', 'interface_state', 'lock_model_settings'), + gradio(ui.list_interface_input_elements()) + gradio('interface_state') + gradio('vram_info'), show_progress=False) + if not shared.args.portable: shared.gradio['lora_menu_apply'].click(load_lora_wrapper, gradio('lora_menu'), gradio('model_status'), show_progress=False) @@ -401,8 +408,9 @@ def get_initial_gpu_layers_max(): return 256 -def handle_load_model_event_initial(model, state): - state = apply_model_settings_to_state(model, state) +def handle_load_model_event_initial(model, state, lock_settings): + if not lock_settings: + state = apply_model_settings_to_state(model, state) output = ui.apply_interface_values(state) update_model_parameters(state) # This updates the command-line flags @@ -410,6 +418,15 @@ def handle_load_model_event_initial(model, state): return output + [state] + [vram_info] +def handle_unlock_settings(model, state, lock_settings): + if not lock_settings: + state = apply_model_settings_to_state(model, state) + output = ui.apply_interface_values(state) + vram_info = state.get('vram_info', "
Estimated VRAM to load the model:
") + return output + [state] + [vram_info] + return [gr.update()] * len(ui.list_interface_input_elements()) + [state, gr.update()] + + def handle_load_model_event_final(truncation_length, loader, state): truncation_length = update_truncation_length(truncation_length, state) return [truncation_length, loader]