From 2eef90a32346f5acea1771803460504294b9d9be Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 4 Apr 2026 11:00:14 -0700 Subject: [PATCH] API: Remove deprecated "settings" parameter from model load endpoint --- modules/api/models.py | 14 +------------- modules/api/script.py | 20 ++++---------------- modules/api/typing.py | 1 - 3 files changed, 5 insertions(+), 30 deletions(-) diff --git a/modules/api/models.py b/modules/api/models.py index e0bd21f3..5dd77850 100644 --- a/modules/api/models.py +++ b/modules/api/models.py @@ -1,5 +1,4 @@ from modules import loaders, shared -from modules.logging_colors import logger from modules.LoRA import add_lora_to_model from modules.models import load_model, unload_model from modules.models_settings import get_model_metadata, update_model_parameters @@ -42,8 +41,7 @@ def model_info_dict(model_name: str) -> dict: def _load_model(data): model_name = data["model_name"] - args = data["args"] - settings = data["settings"] + args = data.get("args") unload_model() model_settings = get_model_metadata(model_name) @@ -71,16 +69,6 @@ def _load_model(data): shared.model, shared.tokenizer = load_model(model_name) - # Update shared.settings with custom generation defaults - if settings: - for k in settings: - if k in shared.settings: - shared.settings[k] = settings[k] - if k == 'truncation_length': - logger.info(f"CONTEXT LENGTH (UPDATED): {shared.settings['truncation_length']}") - elif k == 'instruction_template': - logger.info(f"INSTRUCTION TEMPLATE (UPDATED): {shared.settings['instruction_template']}") - def list_loras(): return {'lora_names': get_available_loras()[1:]} diff --git a/modules/api/script.py b/modules/api/script.py index 85f4974f..beed3d06 100644 --- a/modules/api/script.py +++ b/modules/api/script.py @@ -475,10 +475,8 @@ async def handle_list_models(): @app.post("/v1/internal/model/load", dependencies=check_admin_key) async def handle_load_model(request_data: LoadModelRequest): ''' - This endpoint is experimental and may change in the future. - - The "args" parameter can be used to modify flags like "--load-in-4bit" - or "--n-gpu-layers" before loading a model. Example: + The "args" parameter can be used to modify loader flags before loading + a model. Example: ``` "args": { @@ -487,18 +485,8 @@ async def handle_load_model(request_data: LoadModelRequest): } ``` - Note that those settings will remain after loading the model. So you - may need to change them back to load a second model. - - The "settings" parameter is also a dict but with keys for the - shared.settings object. It can be used to modify the default instruction - template like this: - - ``` - "settings": { - "instruction_template": "Alpaca" - } - ``` + Loader args are reset to their startup defaults between loads, so + settings from a previous load do not leak into the next one. ''' try: diff --git a/modules/api/typing.py b/modules/api/typing.py index 1d486e8f..a758743e 100644 --- a/modules/api/typing.py +++ b/modules/api/typing.py @@ -271,7 +271,6 @@ class ModelListResponse(BaseModel): class LoadModelRequest(BaseModel): model_name: str args: dict | None = None - settings: dict | None = None class LoraListResponse(BaseModel):