Keep things more modular

This commit is contained in:
oobabooga 2025-11-27 15:32:01 -08:00
parent 0adda7a5c5
commit 148a5d1e44
3 changed files with 433 additions and 323 deletions

View file

@ -52,11 +52,12 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft
# Image generation
group = parser.add_argument_group('Image model')
group.add_argument('--image-model', type=str, help='Name of the image model to load by default.')
group.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).')
group.add_argument('--image-model-dir', type=str, default='user_data/image_models', help='Path to directory with all the image models.')
group.add_argument('--image-dtype', type=str, default='bfloat16', choices=['bfloat16', 'float16'], help='Data type for image model.')
group.add_argument('--image-attn-backend', type=str, default='sdpa', choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], help='Attention backend for image model.')
group.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.')
group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], help='Attention backend for image model.')
group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.')
group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.')
# Model loader
group = parser.add_argument_group('Model loader')

View file

@ -1,12 +1,16 @@
# modules/ui_image_generation.py
import os
import traceback
from datetime import datetime
from pathlib import Path
import gradio as gr
import numpy as np
import torch
from modules import shared
from modules import shared, utils
from modules.image_models import load_image_model, unload_image_model
from modules.image_model_settings import get_effective_settings, save_image_model_settings
# Aspect ratio definitions: name -> (width_ratio, height_ratio)
@ -21,16 +25,113 @@ ASPECT_RATIOS = {
STEP = 32 # Slider step for rounding
def round_to_step(value, step=STEP):
"""Round a value to the nearest step."""
return round(value / step) * step
def clamp(value, min_val, max_val):
"""Clamp value between min and max."""
return max(min_val, min(max_val, value))
def apply_aspect_ratio(aspect_ratio, current_width, current_height):
"""
Apply an aspect ratio preset.
Logic to prevent dimension creep:
- For tall ratios (like 9:16): keep width fixed, calculate height
- For wide ratios (like 16:9): keep height fixed, calculate width
- For square (1:1): use the smaller of the current dimensions
Returns (new_width, new_height).
"""
if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
return current_width, current_height
w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
if w_ratio == h_ratio:
# Square ratio - use the smaller current dimension to prevent creep
base = min(current_width, current_height)
new_width = base
new_height = base
elif w_ratio < h_ratio:
# Tall ratio (like 9:16) - width is the smaller side, keep it fixed
new_width = current_width
new_height = round_to_step(current_width * h_ratio / w_ratio)
else:
# Wide ratio (like 16:9) - height is the smaller side, keep it fixed
new_height = current_height
new_width = round_to_step(current_height * w_ratio / h_ratio)
# Clamp to slider bounds
new_width = clamp(new_width, 256, 2048)
new_height = clamp(new_height, 256, 2048)
return int(new_width), int(new_height)
def update_height_from_width(width, aspect_ratio):
"""Update height when width changes (if not Custom)."""
if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
return gr.update()
w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
new_height = round_to_step(width * h_ratio / w_ratio)
new_height = clamp(new_height, 256, 2048)
return int(new_height)
def update_width_from_height(height, aspect_ratio):
"""Update width when height changes (if not Custom)."""
if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
return gr.update()
w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
new_width = round_to_step(height * w_ratio / h_ratio)
new_width = clamp(new_width, 256, 2048)
return int(new_width)
def swap_dimensions_and_update_ratio(width, height, aspect_ratio):
"""Swap dimensions and update aspect ratio to match (or set to Custom)."""
new_width, new_height = height, width
# Try to find a matching aspect ratio for the swapped dimensions
new_ratio = "Custom"
for name, ratios in ASPECT_RATIOS.items():
if ratios is None:
continue
w_r, h_r = ratios
# Check if the swapped dimensions match this ratio (within tolerance)
expected_height = new_width * h_r / w_r
if abs(expected_height - new_height) < STEP:
new_ratio = name
break
return new_width, new_height, new_ratio
def create_ui():
# Get effective settings (CLI > yaml > defaults)
settings = get_effective_settings()
# Update shared state (but don't load the model yet)
if settings['model_name'] != 'None':
shared.image_model_name = settings['model_name']
with gr.Tab("Image AI", elem_id="image-ai-tab"):
with gr.Tabs():
# TAB 1: GENERATION STUDIO
with gr.TabItem("Generate Images"):
with gr.TabItem("Generate"):
with gr.Row():
# === LEFT COLUMN: CONTROLS ===
with gr.Column(scale=4, min_width=350):
# 1. PROMPT
prompt = gr.Textbox(label="Prompt", placeholder="Describe your imagination...", lines=3, autofocus=True)
neg_prompt = gr.Textbox(label="Negative Prompt", placeholder="Low quality...", lines=3)
@ -58,7 +159,7 @@ def create_ui():
swap_btn = gr.Button("⇄ Swap", elem_classes='refresh-button', scale=0, min_width=80)
# 4. SETTINGS & BATCHING
gr.Markdown("### ⚙️ Config")
gr.Markdown("### ⚙️ Config")
with gr.Row():
with gr.Column():
steps_slider = gr.Slider(1, 15, value=9, step=1, label="Steps")
@ -68,15 +169,15 @@ def create_ui():
with gr.Column():
batch_size_parallel = gr.Slider(1, 32, value=1, step=1, label="Batch Size (VRAM Heavy)", info="Generates N images at once.")
batch_count_seq = gr.Slider(1, 128, value=1, step=1, label="Sequential Count (Loop)", info="Repeats the generation N times.")
# === RIGHT COLUMN: VIEWPORT ===
with gr.Column(scale=6, min_width=500):
with gr.Column(elem_classes=["viewport-container"]):
output_gallery = gr.Gallery(
label="Output", show_label=False, columns=2, rows=2, height="80vh", object_fit="contain", preview=True
label="Output", show_label=False, columns=2, rows=2, height="80vh", object_fit="contain", preview=True
)
with gr.Row():
used_seed = gr.Markdown(label="Info", interactive=False, lines=3)
used_seed = gr.Markdown(label="Info", interactive=False)
# TAB 2: HISTORY VIEWER
with gr.TabItem("Gallery"):
@ -87,8 +188,67 @@ def create_ui():
label="History", show_label=False, columns=6, object_fit="cover", height="auto", allow_preview=True
)
# === WIRING ===
# TAB 3: MODEL SETTINGS
with gr.TabItem("Model"):
with gr.Row():
with gr.Column():
with gr.Row():
image_model_menu = gr.Dropdown(
choices=utils.get_available_image_models(),
value=settings['model_name'],
label='Model',
elem_classes='slim-dropdown'
)
image_refresh_models = gr.Button("🔄", elem_classes='refresh-button', scale=0, min_width=40)
with gr.Row():
image_load_model = gr.Button("Load", variant='primary')
image_unload_model = gr.Button("Unload")
gr.Markdown("### Settings")
image_dtype = gr.Dropdown(
choices=['bfloat16', 'float16'],
value=settings['dtype'],
label='Data Type',
info='bfloat16 recommended for modern GPUs'
)
image_attn_backend = gr.Dropdown(
choices=['sdpa', 'flash_attention_2', 'flash_attention_3'],
value=settings['attn_backend'],
label='Attention Backend',
info='SDPA is default. Flash Attention requires compatible GPU.'
)
image_cpu_offload = gr.Checkbox(
value=settings['cpu_offload'],
label='CPU Offload',
info='Enable for low VRAM GPUs. Slower but uses less memory.'
)
image_compile = gr.Checkbox(
value=settings['compile_model'],
label='Compile Model',
info='Faster inference after first run. First run will be slow.'
)
image_model_status = gr.Markdown(
value=f"Model: **{settings['model_name']}** (not loaded)" if settings['model_name'] != 'None' else "No model selected"
)
with gr.Column():
gr.Markdown("### Download Model")
image_download_path = gr.Textbox(
label="Hugging Face Model",
placeholder="Tongyi-MAI/Z-Image-Turbo",
info="Enter the HuggingFace model path. Use : for branch, e.g. model:main"
)
image_download_btn = gr.Button("Download", variant='primary')
image_download_status = gr.Markdown("")
# === WIRING ===
# Aspect ratio preset changes -> update dimensions
preset_radio.change(
fn=apply_aspect_ratio,
@ -96,7 +256,7 @@ def create_ui():
outputs=[width_slider, height_slider],
show_progress=False
)
# Width slider changes -> update height (if not Custom)
width_slider.release(
fn=update_height_from_width,
@ -104,7 +264,7 @@ def create_ui():
outputs=[height_slider],
show_progress=False
)
# Height slider changes -> update width (if not Custom)
height_slider.release(
fn=update_width_from_height,
@ -112,7 +272,7 @@ def create_ui():
outputs=[width_slider],
show_progress=False
)
# Swap button -> swap dimensions and update aspect ratio
swap_btn.click(
fn=swap_dimensions_and_update_ratio,
@ -125,62 +285,92 @@ def create_ui():
inputs = [prompt, neg_prompt, width_slider, height_slider, steps_slider, seed_input, batch_size_parallel, batch_count_seq]
outputs = [output_gallery, used_seed]
generate_btn.click(fn=generate, inputs=inputs, outputs=outputs)
prompt.submit(fn=generate, inputs=inputs, outputs=outputs)
neg_prompt.submit(fn=generate, inputs=inputs, outputs=outputs)
generate_btn.click(
fn=lambda *args: generate(*args, image_model_menu, image_dtype, image_attn_backend, image_cpu_offload, image_compile),
inputs=inputs,
outputs=outputs
)
prompt.submit(
fn=lambda *args: generate(*args, image_model_menu, image_dtype, image_attn_backend, image_cpu_offload, image_compile),
inputs=inputs,
outputs=outputs
)
neg_prompt.submit(
fn=lambda *args: generate(*args, image_model_menu, image_dtype, image_attn_backend, image_cpu_offload, image_compile),
inputs=inputs,
outputs=outputs
)
# Model tab events
image_refresh_models.click(
fn=lambda: gr.update(choices=utils.get_available_image_models()),
inputs=None,
outputs=[image_model_menu],
show_progress=False
)
image_load_model.click(
fn=load_image_model_wrapper,
inputs=[image_model_menu, image_dtype, image_attn_backend, image_cpu_offload, image_compile],
outputs=[image_model_status],
show_progress=True
)
image_unload_model.click(
fn=unload_image_model_wrapper,
inputs=None,
outputs=[image_model_status],
show_progress=False
)
image_download_btn.click(
fn=download_image_model_wrapper,
inputs=[image_download_path],
outputs=[image_download_status, image_model_menu],
show_progress=True
)
# History
# refresh_btn.click(fn=get_history_images, inputs=None, outputs=history_gallery)
# Aspect Buttons
# btn_sq.click(lambda: set_dims(1024, 1024), outputs=[width_slider, height_slider])
# btn_port.click(lambda: set_dims(720, 1280), outputs=[width_slider, height_slider])
# btn_land.click(lambda: set_dims(1280, 720), outputs=[width_slider, height_slider])
# btn_wide.click(lambda: set_dims(1536, 640), outputs=[width_slider, height_slider])
# Generation
inputs = [prompt, neg_prompt, width_slider, height_slider, steps_slider, seed_input, batch_size_parallel, batch_count_seq]
outputs = [output_gallery, used_seed]
generate_btn.click(fn=generate, inputs=inputs, outputs=outputs)
prompt.submit(fn=generate, inputs=inputs, outputs=outputs)
neg_prompt.submit(fn=generate, inputs=inputs, outputs=outputs)
# System
# load_btn.click(fn=load_pipeline, inputs=[backend_drop, compile_check, offload_check, gr.State("bfloat16")], outputs=None)
# History
# refresh_btn.click(fn=get_history_images, inputs=None, outputs=history_gallery)
# Load history on app launch
# demo.load(fn=get_history_images, inputs=None, outputs=history_gallery)
refresh_btn.click(fn=get_history_images, inputs=None, outputs=history_gallery, show_progress=False)
def generate(prompt, neg_prompt, width, height, steps, seed, batch_size_parallel, batch_count_seq):
import numpy as np
import torch
from modules import shared
from modules.image_models import load_image_model
def generate(prompt, neg_prompt, width, height, steps, seed, batch_size_parallel, batch_count_seq,
model_menu, dtype_dropdown, attn_dropdown, cpu_offload_checkbox, compile_checkbox):
"""Generate images with the current model settings."""
# Get current UI values (these are Gradio components, we need their values)
model_name = shared.image_model_name
if model_name == 'None':
return [], "No image model selected. Go to the Model tab and select a model."
# Auto-load model if not loaded
if shared.image_model is None:
if shared.image_model_name == 'None':
return [], "No image model selected. Please load a model first."
load_image_model(shared.image_model_name)
if shared.image_model is None:
return [], "Failed to load image model."
# Load saved settings for the model
saved_settings = load_image_model_settings()
result = load_image_model(
model_name,
dtype=saved_settings['dtype'],
attn_backend=saved_settings['attn_backend'],
cpu_offload=saved_settings['cpu_offload'],
compile_model=saved_settings['compile_model']
)
if result is None:
return [], f"Failed to load model `{model_name}`."
if seed == -1:
seed = np.random.randint(0, 2**32 - 1)
generator = torch.Generator("cuda").manual_seed(int(seed))
all_images = []
# Sequential loop (easier on VRAM)
for i in range(int(batch_count_seq)):
current_seed = seed + i
generator.manual_seed(int(current_seed))
# Parallel generation
batch_results = shared.image_model(
prompt=prompt,
@ -192,150 +382,128 @@ def generate(prompt, neg_prompt, width, height, steps, seed, batch_size_parallel
num_images_per_prompt=int(batch_size_parallel),
generator=generator,
).images
all_images.extend(batch_results)
# Save to disk
save_generated_images(all_images, prompt, seed)
return all_images, f"Seed: {seed}"
# --- File Saving Logic ---
def load_image_model_wrapper(model_name, dtype, attn_backend, cpu_offload, compile_model):
"""Load model and save settings."""
if model_name == 'None' or not model_name:
yield "No model selected"
return
try:
yield f"Loading `{model_name}`..."
# Unload existing model first
unload_image_model()
# Load the new model
result = load_image_model(
model_name,
dtype=dtype,
attn_backend=attn_backend,
cpu_offload=cpu_offload,
compile_model=compile_model
)
if result is not None:
# Save settings to yaml
save_image_model_settings(model_name, dtype, attn_backend, cpu_offload, compile_model)
yield f"✓ Loaded **{model_name}**"
else:
yield f"✗ Failed to load `{model_name}`"
except Exception:
exc = traceback.format_exc()
yield f"Error:\n```\n{exc}\n```"
def unload_image_model_wrapper():
"""Unload model wrapper."""
unload_image_model()
if shared.image_model_name != 'None':
return f"Model: **{shared.image_model_name}** (not loaded)"
else:
return "No model loaded"
def download_image_model_wrapper(model_path):
"""Download a model from Hugging Face."""
from huggingface_hub import snapshot_download
if not model_path:
yield "No model specified", gr.update()
return
try:
# Parse model name and branch
if ':' in model_path:
model_id, branch = model_path.rsplit(':', 1)
else:
model_id, branch = model_path, 'main'
# Output folder name
folder_name = model_id.split('/')[-1]
output_folder = Path(shared.args.image_model_dir) / folder_name
yield f"Downloading `{model_id}` (branch: {branch})...", gr.update()
snapshot_download(
repo_id=model_id,
revision=branch,
local_dir=output_folder,
local_dir_use_symlinks=False,
)
# Refresh the model list
new_choices = utils.get_available_image_models()
yield f"✓ Downloaded to `{output_folder}`", gr.update(choices=new_choices, value=folder_name)
except Exception:
exc = traceback.format_exc()
yield f"Error:\n```\n{exc}\n```", gr.update()
def save_generated_images(images, prompt, seed):
# Create folder structure: outputs/YYYY-MM-DD/
"""Save generated images to disk."""
date_str = datetime.now().strftime("%Y-%m-%d")
folder_path = os.path.join("outputs", date_str)
folder_path = os.path.join("user_data", "image_outputs", date_str)
os.makedirs(folder_path, exist_ok=True)
saved_paths = []
for idx, img in enumerate(images):
timestamp = datetime.now().strftime("%H-%M-%S")
# Filename: Time_Seed_Index.png
filename = f"{timestamp}_{seed}_{idx}.png"
full_path = os.path.join(folder_path, filename)
# Save image
img.save(full_path)
saved_paths.append(full_path)
# Optional: Save prompt metadata in a text file next to it?
# For now, we just save the image.
return saved_paths
# --- History Logic ---
def get_history_images():
"""Scans the outputs folder and returns all images, newest first"""
if not os.path.exists("outputs"):
"""Scan the outputs folder and return all images, newest first."""
output_dir = os.path.join("user_data", "image_outputs")
if not os.path.exists(output_dir):
return []
image_files = []
for root, dirs, files in os.walk("outputs"):
for root, dirs, files in os.walk(output_dir):
for file in files:
if file.endswith((".png", ".jpg", ".jpeg")):
full_path = os.path.join(root, file)
# Get creation time for sorting
mtime = os.path.getmtime(full_path)
image_files.append((full_path, mtime))
# Sort by time, newest first
image_files.sort(key=lambda x: x[1], reverse=True)
return [x[0] for x in image_files]
def round_to_step(value, step=STEP):
"""Round a value to the nearest step."""
return round(value / step) * step
def clamp(value, min_val, max_val):
"""Clamp value between min and max."""
return max(min_val, min(max_val, value))
def apply_aspect_ratio(aspect_ratio, current_width, current_height):
"""
Apply an aspect ratio preset.
Logic to prevent dimension creep:
- For tall ratios (like 9:16): keep width fixed, calculate height
- For wide ratios (like 16:9): keep height fixed, calculate width
- For square (1:1): use the smaller of the current dimensions
Returns (new_width, new_height).
"""
if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
return current_width, current_height
w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
if w_ratio == h_ratio:
# Square ratio - use the smaller current dimension to prevent creep
base = min(current_width, current_height)
new_width = base
new_height = base
elif w_ratio < h_ratio:
# Tall ratio (like 9:16) - width is the smaller side, keep it fixed
new_width = current_width
new_height = round_to_step(current_width * h_ratio / w_ratio)
else:
# Wide ratio (like 16:9) - height is the smaller side, keep it fixed
new_height = current_height
new_width = round_to_step(current_height * w_ratio / h_ratio)
# Clamp to slider bounds
new_width = clamp(new_width, 256, 2048)
new_height = clamp(new_height, 256, 2048)
return int(new_width), int(new_height)
def update_height_from_width(width, aspect_ratio):
"""Update height when width changes (if not Custom)."""
if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
return gr.update()
w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
new_height = round_to_step(width * h_ratio / w_ratio)
new_height = clamp(new_height, 256, 2048)
return int(new_height)
def update_width_from_height(height, aspect_ratio):
"""Update width when height changes (if not Custom)."""
if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
return gr.update()
w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
new_width = round_to_step(height * w_ratio / h_ratio)
new_width = clamp(new_width, 256, 2048)
return int(new_width)
def swap_dimensions(width, height):
"""Swap width and height values."""
return height, width
def swap_dimensions_and_update_ratio(width, height, aspect_ratio):
"""Swap dimensions and update aspect ratio to match (or set to Custom)."""
new_width, new_height = height, width
# Try to find a matching aspect ratio for the swapped dimensions
new_ratio = "Custom"
for name, ratios in ASPECT_RATIOS.items():
if ratios is None:
continue
w_r, h_r = ratios
# Check if the swapped dimensions match this ratio (within tolerance)
expected_height = new_width * h_r / w_r
if abs(expected_height - new_height) < STEP:
new_ratio = name
break
return new_width, new_height, new_ratio

View file

@ -27,149 +27,112 @@ def create_ui():
mu = shared.args.multi_user
with gr.Tab("Model", elem_id="model-tab"):
with gr.Tab("Text model"):
with gr.Row():
with gr.Column():
with gr.Row():
shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=lambda: shared.model_name, label='Model', elem_classes='slim-dropdown', interactive=not mu)
ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
shared.gradio['load_model'] = gr.Button("Load", elem_classes='refresh-button', interactive=not mu)
shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu)
shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu)
with gr.Row():
with gr.Column():
with gr.Row():
shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=lambda: shared.model_name, label='Model', elem_classes='slim-dropdown', interactive=not mu)
ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
shared.gradio['load_model'] = gr.Button("Load", elem_classes='refresh-button', interactive=not mu)
shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu)
shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu)
shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys() if not shared.args.portable else ['llama.cpp'], value=None)
with gr.Blocks():
gr.Markdown("## Main options")
shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys() if not shared.args.portable else ['llama.cpp'], value=None)
with gr.Blocks():
gr.Markdown("## Main options")
with gr.Row():
with gr.Column():
shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.')
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.')
shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
shared.gradio['tp_backend'] = gr.Dropdown(label="tp-backend", choices=['native', 'nccl'], value=shared.args.tp_backend, info='The backend for tensor parallelism.')
with gr.Column():
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant, info='Used by load-in-4bit.')
shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.')
shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info='Enable tensor parallelism (TP).')
shared.gradio['cpp_runner'] = gr.Checkbox(label="cpp-runner", value=shared.args.cpp_runner, info='Enable inference with ModelRunnerCpp, which is faster than the default ModelRunner.')
shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `ctx_size` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
# Multimodal
with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
with gr.Row():
shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info='Select a file that matches your model. Must be placed in user_data/mmproj/', interactive=not mu)
ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
# Speculative decoding
with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
with gr.Row():
shared.gradio['model_draft'] = gr.Dropdown(label="model-draft", choices=['None'] + utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', info='Draft model. Speculative decoding only works with models sharing the same vocabulary (e.g., same model family).', interactive=not mu)
ui.create_refresh_button(shared.gradio['model_draft'], lambda: None, lambda: {'choices': ['None'] + utils.get_available_models()}, 'refresh-button', interactive=not mu)
shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.')
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Number of tokens to draft for speculative decoding. Recommended value: 4.')
shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
gr.Markdown("## Other options")
with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
with gr.Row():
with gr.Column():
shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.')
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.')
shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
shared.gradio['tp_backend'] = gr.Dropdown(label="tp-backend", choices=['native', 'nccl'], value=shared.args.tp_backend, info='The backend for tensor parallelism.')
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
shared.gradio['ubatch_size'] = gr.Slider(label="ubatch_size", minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"', value=shared.args.extra_flags)
shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.')
shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.')
shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=2, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.')
shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype, info='Used by load-in-4bit.')
shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type, info='Used by load-in-4bit.')
shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
with gr.Column():
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant, info='Used by load-in-4bit.')
shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.')
shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info='Enable tensor parallelism (TP).')
shared.gradio['cpp_runner'] = gr.Checkbox(label="cpp-runner", value=shared.args.cpp_runner, info='Enable inference with ModelRunnerCpp, which is faster than the default ModelRunner.')
shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `ctx_size` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
# Multimodal
with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn)
shared.gradio['no_xformers'] = gr.Checkbox(label="no_xformers", value=shared.args.no_xformers)
shared.gradio['no_sdpa'] = gr.Checkbox(label="no_sdpa", value=shared.args.no_sdpa)
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.')
shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
if not shared.args.portable:
with gr.Row():
shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info='Select a file that matches your model. Must be placed in user_data/mmproj/', interactive=not mu)
ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
# Speculative decoding
with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
with gr.Row():
shared.gradio['model_draft'] = gr.Dropdown(label="model-draft", choices=['None'] + utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', info='Draft model. Speculative decoding only works with models sharing the same vocabulary (e.g., same model family).', interactive=not mu)
ui.create_refresh_button(shared.gradio['model_draft'], lambda: None, lambda: {'choices': ['None'] + utils.get_available_models()}, 'refresh-button', interactive=not mu)
shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.')
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Number of tokens to draft for speculative decoding. Recommended value: 4.')
shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
gr.Markdown("## Other options")
with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
with gr.Row():
with gr.Column():
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
shared.gradio['ubatch_size'] = gr.Slider(label="ubatch_size", minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"', value=shared.args.extra_flags)
shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.')
shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.')
shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=2, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.')
shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype, info='Used by load-in-4bit.')
shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type, info='Used by load-in-4bit.')
shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
with gr.Column():
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn)
shared.gradio['no_xformers'] = gr.Checkbox(label="no_xformers", value=shared.args.no_xformers)
shared.gradio['no_sdpa'] = gr.Checkbox(label="no_sdpa", value=shared.args.no_sdpa)
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.')
shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
if not shared.args.portable:
with gr.Row():
shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=utils.get_available_loras(), value=shared.lora_names, label='LoRA(s)', elem_classes='slim-dropdown', interactive=not mu)
ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': utils.get_available_loras(), 'value': shared.lora_names}, 'refresh-button', interactive=not mu)
shared.gradio['lora_menu_apply'] = gr.Button(value='Apply LoRAs', elem_classes='refresh-button', interactive=not mu)
with gr.Column():
with gr.Tab("Download"):
shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
with gr.Row():
shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
with gr.Tab("Customize instruction template"):
with gr.Row():
shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label='Select the desired instruction template', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
shared.gradio['customized_template_submit'] = gr.Button("Submit", variant="primary", interactive=not mu)
gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.")
shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=utils.get_available_loras(), value=shared.lora_names, label='LoRA(s)', elem_classes='slim-dropdown', interactive=not mu)
ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': utils.get_available_loras(), 'value': shared.lora_names}, 'refresh-button', interactive=not mu)
shared.gradio['lora_menu_apply'] = gr.Button(value='Apply LoRAs', elem_classes='refresh-button', interactive=not mu)
with gr.Column():
with gr.Tab("Download"):
shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
with gr.Row():
shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
with gr.Tab("Image model"):
with gr.Row():
with gr.Column():
with gr.Tab("Customize instruction template"):
with gr.Row():
shared.gradio['image_model_menu'] = gr.Dropdown(choices=utils.get_available_image_models(), value=lambda: shared.image_model_name, label='Model', elem_classes='slim-dropdown', interactive=not mu)
ui.create_refresh_button(shared.gradio['image_model_menu'], lambda: None, lambda: {'choices': utils.get_available_image_models()}, 'refresh-button', interactive=not mu)
shared.gradio['image_load_model'] = gr.Button("Load", elem_classes='refresh-button', interactive=not mu)
shared.gradio['image_unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu)
shared.gradio['image_save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu)
shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label='Select the desired instruction template', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
with gr.Blocks():
gr.Markdown("## Main options")
with gr.Row():
with gr.Column():
pass
shared.gradio['customized_template_submit'] = gr.Button("Submit", variant="primary", interactive=not mu)
gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.")
with gr.Column():
pass
gr.Markdown("## Other options")
with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
with gr.Row():
with gr.Column():
pass
with gr.Column():
pass
with gr.Column():
shared.gradio['image_custom_model_menu'] = gr.Textbox(label="Download model (diffusers format)", info="Enter the Hugging Face username/model path, for instance: Tongyi-MAI/Z-Image-Turbo. To specify a branch, add it at the end after a \":\" character like this: Tongyi-MAI/Z-Image-Turbo:main.", interactive=not mu)
with gr.Row():
shared.gradio['image_download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
with gr.Row():
shared.gradio['image_model_status'] = gr.Markdown('No model is loaded' if shared.image_model_name == 'None' else 'Ready')
with gr.Row():
shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
def create_event_handlers():
@ -220,28 +183,6 @@ def create_event_handlers():
shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
shared.gradio['customized_template_submit'].click(save_instruction_template, gradio('model_menu', 'customized_template'), gradio('model_status'), show_progress=True)
# Image model event handlers
shared.gradio['image_load_model'].click(
load_image_model_wrapper,
gradio('image_model_menu'),
gradio('image_model_status'),
show_progress=True
)
shared.gradio['image_unload_model'].click(
handle_unload_image_model_click,
None,
gradio('image_model_status'),
show_progress=False
)
shared.gradio['image_download_model_button'].click(
download_image_model_wrapper,
gradio('image_custom_model_menu'),
gradio('image_model_status'),
show_progress=True
)
def load_model_wrapper(selected_model, loader, autoload=False):
try: