Keep things more modular

2026-03-03 20:23:49 +01:00 · 2025-11-27 15:32:01 -08:00 · 2025-11-27 15:32:01 -08:00 · 148a5d1e44
parent 0adda7a5c5
commit 148a5d1e44
3 changed files with 433 additions and 323 deletions
--- a/modules/shared.py
+++ b/modules/shared.py
@ -52,11 +52,12 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft

 # Image generation
 group = parser.add_argument_group('Image model')
-group.add_argument('--image-model', type=str, help='Name of the image model to load by default.')
+group.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).')
 group.add_argument('--image-model-dir', type=str, default='user_data/image_models', help='Path to directory with all the image models.')
-group.add_argument('--image-dtype', type=str, default='bfloat16', choices=['bfloat16', 'float16'], help='Data type for image model.')
-group.add_argument('--image-attn-backend', type=str, default='sdpa', choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], help='Attention backend for image model.')
+group.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.')
+group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], help='Attention backend for image model.')
 group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.')
+group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.')

 # Model loader
 group = parser.add_argument_group('Model loader')
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@ -1,12 +1,16 @@
+# modules/ui_image_generation.py
 import os
+import traceback
 from datetime import datetime
+from pathlib import Path

 import gradio as gr
 import numpy as np
 import torch

-from modules import shared
+from modules import shared, utils
 from modules.image_models import load_image_model, unload_image_model
+from modules.image_model_settings import get_effective_settings, save_image_model_settings


 # Aspect ratio definitions: name -> (width_ratio, height_ratio)
@ -21,16 +25,113 @@ ASPECT_RATIOS = {
 STEP = 32  # Slider step for rounding


+def round_to_step(value, step=STEP):
+    """Round a value to the nearest step."""
+    return round(value / step) * step
+
+
+def clamp(value, min_val, max_val):
+    """Clamp value between min and max."""
+    return max(min_val, min(max_val, value))
+
+
+def apply_aspect_ratio(aspect_ratio, current_width, current_height):
+    """
+    Apply an aspect ratio preset.
+    
+    Logic to prevent dimension creep:
+    - For tall ratios (like 9:16): keep width fixed, calculate height
+    - For wide ratios (like 16:9): keep height fixed, calculate width  
+    - For square (1:1): use the smaller of the current dimensions
+    
+    Returns (new_width, new_height).
+    """
+    if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
+        return current_width, current_height
+    
+    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
+    
+    if w_ratio == h_ratio:
+        # Square ratio - use the smaller current dimension to prevent creep
+        base = min(current_width, current_height)
+        new_width = base
+        new_height = base
+    elif w_ratio < h_ratio:
+        # Tall ratio (like 9:16) - width is the smaller side, keep it fixed
+        new_width = current_width
+        new_height = round_to_step(current_width * h_ratio / w_ratio)
+    else:
+        # Wide ratio (like 16:9) - height is the smaller side, keep it fixed
+        new_height = current_height
+        new_width = round_to_step(current_height * w_ratio / h_ratio)
+    
+    # Clamp to slider bounds
+    new_width = clamp(new_width, 256, 2048)
+    new_height = clamp(new_height, 256, 2048)
+    
+    return int(new_width), int(new_height)
+
+
+def update_height_from_width(width, aspect_ratio):
+    """Update height when width changes (if not Custom)."""
+    if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
+        return gr.update()
+    
+    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
+    new_height = round_to_step(width * h_ratio / w_ratio)
+    new_height = clamp(new_height, 256, 2048)
+    
+    return int(new_height)
+
+
+def update_width_from_height(height, aspect_ratio):
+    """Update width when height changes (if not Custom)."""
+    if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
+        return gr.update()
+    
+    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
+    new_width = round_to_step(height * w_ratio / h_ratio)
+    new_width = clamp(new_width, 256, 2048)
+    
+    return int(new_width)
+
+
+def swap_dimensions_and_update_ratio(width, height, aspect_ratio):
+    """Swap dimensions and update aspect ratio to match (or set to Custom)."""
+    new_width, new_height = height, width
+    
+    # Try to find a matching aspect ratio for the swapped dimensions
+    new_ratio = "Custom"
+    for name, ratios in ASPECT_RATIOS.items():
+        if ratios is None:
+            continue
+        w_r, h_r = ratios
+        # Check if the swapped dimensions match this ratio (within tolerance)
+        expected_height = new_width * h_r / w_r
+        if abs(expected_height - new_height) < STEP:
+            new_ratio = name
+            break
+    
+    return new_width, new_height, new_ratio
+
+
 def create_ui():
+    # Get effective settings (CLI > yaml > defaults)
+    settings = get_effective_settings()
+    
+    # Update shared state (but don't load the model yet)
+    if settings['model_name'] != 'None':
+        shared.image_model_name = settings['model_name']
+    
    with gr.Tab("Image AI", elem_id="image-ai-tab"):
        with gr.Tabs():
            # TAB 1: GENERATION STUDIO
-            with gr.TabItem("Generate Images"):
+            with gr.TabItem("Generate"):
                with gr.Row():
-
+                    
                    # === LEFT COLUMN: CONTROLS ===
                    with gr.Column(scale=4, min_width=350):
-
+                        
                        # 1. PROMPT
                        prompt = gr.Textbox(label="Prompt", placeholder="Describe your imagination...", lines=3, autofocus=True)
                        neg_prompt = gr.Textbox(label="Negative Prompt", placeholder="Low quality...", lines=3)
@ -58,7 +159,7 @@ def create_ui():
                            swap_btn = gr.Button("⇄ Swap", elem_classes='refresh-button', scale=0, min_width=80)

                        # 4. SETTINGS & BATCHING
-                        gr.Markdown("### ⚙️  Config")
+                        gr.Markdown("### ⚙️ Config")
                        with gr.Row():
                            with gr.Column():
                                steps_slider = gr.Slider(1, 15, value=9, step=1, label="Steps")
@ -68,15 +169,15 @@ def create_ui():
                            with gr.Column():
                                batch_size_parallel = gr.Slider(1, 32, value=1, step=1, label="Batch Size (VRAM Heavy)", info="Generates N images at once.")
                                batch_count_seq = gr.Slider(1, 128, value=1, step=1, label="Sequential Count (Loop)", info="Repeats the generation N times.")
-
+                        
                    # === RIGHT COLUMN: VIEWPORT ===
                    with gr.Column(scale=6, min_width=500):
                        with gr.Column(elem_classes=["viewport-container"]):
                            output_gallery = gr.Gallery(
-                                label="Output", show_label=False, columns=2, rows=2, height="80vh", object_fit="contain", preview=True
+                                label="Output", show_label=False, columns=2, rows=2, height="80vh", object_fit="contain", preview=True 
                            )
                        with gr.Row():
-                            used_seed = gr.Markdown(label="Info", interactive=False, lines=3)
+                            used_seed = gr.Markdown(label="Info", interactive=False)

            # TAB 2: HISTORY VIEWER
            with gr.TabItem("Gallery"):
@ -87,8 +188,67 @@ def create_ui():
                    label="History", show_label=False, columns=6, object_fit="cover", height="auto", allow_preview=True
                )

-        # === WIRING ===
+            # TAB 3: MODEL SETTINGS
+            with gr.TabItem("Model"):
+                with gr.Row():
+                    with gr.Column():
+                        with gr.Row():
+                            image_model_menu = gr.Dropdown(
+                                choices=utils.get_available_image_models(),
+                                value=settings['model_name'],
+                                label='Model',
+                                elem_classes='slim-dropdown'
+                            )
+                            image_refresh_models = gr.Button("🔄", elem_classes='refresh-button', scale=0, min_width=40)
+                        
+                        with gr.Row():
+                            image_load_model = gr.Button("Load", variant='primary')
+                            image_unload_model = gr.Button("Unload")
+                        
+                        gr.Markdown("### Settings")
+                        
+                        image_dtype = gr.Dropdown(
+                            choices=['bfloat16', 'float16'],
+                            value=settings['dtype'],
+                            label='Data Type',
+                            info='bfloat16 recommended for modern GPUs'
+                        )
+                        
+                        image_attn_backend = gr.Dropdown(
+                            choices=['sdpa', 'flash_attention_2', 'flash_attention_3'],
+                            value=settings['attn_backend'],
+                            label='Attention Backend',
+                            info='SDPA is default. Flash Attention requires compatible GPU.'
+                        )
+                        
+                        image_cpu_offload = gr.Checkbox(
+                            value=settings['cpu_offload'],
+                            label='CPU Offload',
+                            info='Enable for low VRAM GPUs. Slower but uses less memory.'
+                        )
+                        
+                        image_compile = gr.Checkbox(
+                            value=settings['compile_model'],
+                            label='Compile Model',
+                            info='Faster inference after first run. First run will be slow.'
+                        )
+                        
+                        image_model_status = gr.Markdown(
+                            value=f"Model: **{settings['model_name']}** (not loaded)" if settings['model_name'] != 'None' else "No model selected"
+                        )

+                    with gr.Column():
+                        gr.Markdown("### Download Model")
+                        image_download_path = gr.Textbox(
+                            label="Hugging Face Model",
+                            placeholder="Tongyi-MAI/Z-Image-Turbo",
+                            info="Enter the HuggingFace model path. Use : for branch, e.g. model:main"
+                        )
+                        image_download_btn = gr.Button("Download", variant='primary')
+                        image_download_status = gr.Markdown("")
+
+        # === WIRING ===
+        
        # Aspect ratio preset changes -> update dimensions
        preset_radio.change(
            fn=apply_aspect_ratio,
@ -96,7 +256,7 @@ def create_ui():
            outputs=[width_slider, height_slider],
            show_progress=False
        )
-
+        
        # Width slider changes -> update height (if not Custom)
        width_slider.release(
            fn=update_height_from_width,
@ -104,7 +264,7 @@ def create_ui():
            outputs=[height_slider],
            show_progress=False
        )
-
+        
        # Height slider changes -> update width (if not Custom)
        height_slider.release(
            fn=update_width_from_height,
@ -112,7 +272,7 @@ def create_ui():
            outputs=[width_slider],
            show_progress=False
        )
-
+        
        # Swap button -> swap dimensions and update aspect ratio
        swap_btn.click(
            fn=swap_dimensions_and_update_ratio,
@ -125,62 +285,92 @@ def create_ui():
        inputs = [prompt, neg_prompt, width_slider, height_slider, steps_slider, seed_input, batch_size_parallel, batch_count_seq]
        outputs = [output_gallery, used_seed]

-        generate_btn.click(fn=generate, inputs=inputs, outputs=outputs)
-        prompt.submit(fn=generate, inputs=inputs, outputs=outputs)
-        neg_prompt.submit(fn=generate, inputs=inputs, outputs=outputs)
-
+        generate_btn.click(
+            fn=lambda *args: generate(*args, image_model_menu, image_dtype, image_attn_backend, image_cpu_offload, image_compile),
+            inputs=inputs,
+            outputs=outputs
+        )
+        prompt.submit(
+            fn=lambda *args: generate(*args, image_model_menu, image_dtype, image_attn_backend, image_cpu_offload, image_compile),
+            inputs=inputs,
+            outputs=outputs
+        )
+        neg_prompt.submit(
+            fn=lambda *args: generate(*args, image_model_menu, image_dtype, image_attn_backend, image_cpu_offload, image_compile),
+            inputs=inputs,
+            outputs=outputs
+        )
+        
+        # Model tab events
+        image_refresh_models.click(
+            fn=lambda: gr.update(choices=utils.get_available_image_models()),
+            inputs=None,
+            outputs=[image_model_menu],
+            show_progress=False
+        )
+        
+        image_load_model.click(
+            fn=load_image_model_wrapper,
+            inputs=[image_model_menu, image_dtype, image_attn_backend, image_cpu_offload, image_compile],
+            outputs=[image_model_status],
+            show_progress=True
+        )
+        
+        image_unload_model.click(
+            fn=unload_image_model_wrapper,
+            inputs=None,
+            outputs=[image_model_status],
+            show_progress=False
+        )
+        
+        image_download_btn.click(
+            fn=download_image_model_wrapper,
+            inputs=[image_download_path],
+            outputs=[image_download_status, image_model_menu],
+            show_progress=True
+        )
+        
        # History
-        # refresh_btn.click(fn=get_history_images, inputs=None, outputs=history_gallery)
-
-        # Aspect Buttons
-        # btn_sq.click(lambda: set_dims(1024, 1024), outputs=[width_slider, height_slider])
-        # btn_port.click(lambda: set_dims(720, 1280), outputs=[width_slider, height_slider])
-        # btn_land.click(lambda: set_dims(1280, 720), outputs=[width_slider, height_slider])
-        # btn_wide.click(lambda: set_dims(1536, 640), outputs=[width_slider, height_slider])
-
-        # Generation
-        inputs = [prompt, neg_prompt, width_slider, height_slider, steps_slider, seed_input, batch_size_parallel, batch_count_seq]
-        outputs = [output_gallery, used_seed]
-
-        generate_btn.click(fn=generate, inputs=inputs, outputs=outputs)
-        prompt.submit(fn=generate, inputs=inputs, outputs=outputs)
-        neg_prompt.submit(fn=generate, inputs=inputs, outputs=outputs)
-
-        # System
-        # load_btn.click(fn=load_pipeline, inputs=[backend_drop, compile_check, offload_check, gr.State("bfloat16")], outputs=None)
-
-        # History
-        # refresh_btn.click(fn=get_history_images, inputs=None, outputs=history_gallery)
-        # Load history on app launch
-        # demo.load(fn=get_history_images, inputs=None, outputs=history_gallery)
+        refresh_btn.click(fn=get_history_images, inputs=None, outputs=history_gallery, show_progress=False)


-def generate(prompt, neg_prompt, width, height, steps, seed, batch_size_parallel, batch_count_seq):
-    import numpy as np
-    import torch
-    from modules import shared
-    from modules.image_models import load_image_model
-
+def generate(prompt, neg_prompt, width, height, steps, seed, batch_size_parallel, batch_count_seq,
+             model_menu, dtype_dropdown, attn_dropdown, cpu_offload_checkbox, compile_checkbox):
+    """Generate images with the current model settings."""
+    
+    # Get current UI values (these are Gradio components, we need their values)
+    model_name = shared.image_model_name
+    
+    if model_name == 'None':
+        return [], "No image model selected. Go to the Model tab and select a model."
+    
    # Auto-load model if not loaded
    if shared.image_model is None:
-        if shared.image_model_name == 'None':
-            return [], "No image model selected. Please load a model first."
-        load_image_model(shared.image_model_name)
-
-    if shared.image_model is None:
-        return [], "Failed to load image model."
-
+        # Load saved settings for the model
+        saved_settings = load_image_model_settings()
+        
+        result = load_image_model(
+            model_name,
+            dtype=saved_settings['dtype'],
+            attn_backend=saved_settings['attn_backend'],
+            cpu_offload=saved_settings['cpu_offload'],
+            compile_model=saved_settings['compile_model']
+        )
+        
+        if result is None:
+            return [], f"Failed to load model `{model_name}`."
+    
    if seed == -1:
        seed = np.random.randint(0, 2**32 - 1)
-
+    
    generator = torch.Generator("cuda").manual_seed(int(seed))
    all_images = []
-
+    
    # Sequential loop (easier on VRAM)
    for i in range(int(batch_count_seq)):
        current_seed = seed + i
        generator.manual_seed(int(current_seed))
-
+        
        # Parallel generation
        batch_results = shared.image_model(
            prompt=prompt,
@ -192,150 +382,128 @@ def generate(prompt, neg_prompt, width, height, steps, seed, batch_size_parallel
            num_images_per_prompt=int(batch_size_parallel),
            generator=generator,
        ).images
-
+        
        all_images.extend(batch_results)
-
+    
    # Save to disk
    save_generated_images(all_images, prompt, seed)
-
+    
    return all_images, f"Seed: {seed}"


-# --- File Saving Logic ---
+def load_image_model_wrapper(model_name, dtype, attn_backend, cpu_offload, compile_model):
+    """Load model and save settings."""
+    if model_name == 'None' or not model_name:
+        yield "No model selected"
+        return
+    
+    try:
+        yield f"Loading `{model_name}`..."
+        
+        # Unload existing model first
+        unload_image_model()
+        
+        # Load the new model
+        result = load_image_model(
+            model_name,
+            dtype=dtype,
+            attn_backend=attn_backend,
+            cpu_offload=cpu_offload,
+            compile_model=compile_model
+        )
+        
+        if result is not None:
+            # Save settings to yaml
+            save_image_model_settings(model_name, dtype, attn_backend, cpu_offload, compile_model)
+            yield f"✓ Loaded **{model_name}**"
+        else:
+            yield f"✗ Failed to load `{model_name}`"
+            
+    except Exception:
+        exc = traceback.format_exc()
+        yield f"Error:\n```\n{exc}\n```"
+
+
+def unload_image_model_wrapper():
+    """Unload model wrapper."""
+    unload_image_model()
+    
+    if shared.image_model_name != 'None':
+        return f"Model: **{shared.image_model_name}** (not loaded)"
+    else:
+        return "No model loaded"
+
+
+def download_image_model_wrapper(model_path):
+    """Download a model from Hugging Face."""
+    from huggingface_hub import snapshot_download
+    
+    if not model_path:
+        yield "No model specified", gr.update()
+        return
+    
+    try:
+        # Parse model name and branch
+        if ':' in model_path:
+            model_id, branch = model_path.rsplit(':', 1)
+        else:
+            model_id, branch = model_path, 'main'
+        
+        # Output folder name
+        folder_name = model_id.split('/')[-1]
+        output_folder = Path(shared.args.image_model_dir) / folder_name
+        
+        yield f"Downloading `{model_id}` (branch: {branch})...", gr.update()
+        
+        snapshot_download(
+            repo_id=model_id,
+            revision=branch,
+            local_dir=output_folder,
+            local_dir_use_symlinks=False,
+        )
+        
+        # Refresh the model list
+        new_choices = utils.get_available_image_models()
+        
+        yield f"✓ Downloaded to `{output_folder}`", gr.update(choices=new_choices, value=folder_name)
+        
+    except Exception:
+        exc = traceback.format_exc()
+        yield f"Error:\n```\n{exc}\n```", gr.update()
+
+
 def save_generated_images(images, prompt, seed):
-    # Create folder structure: outputs/YYYY-MM-DD/
+    """Save generated images to disk."""
    date_str = datetime.now().strftime("%Y-%m-%d")
-    folder_path = os.path.join("outputs", date_str)
+    folder_path = os.path.join("user_data", "image_outputs", date_str)
    os.makedirs(folder_path, exist_ok=True)

    saved_paths = []

    for idx, img in enumerate(images):
        timestamp = datetime.now().strftime("%H-%M-%S")
-        # Filename: Time_Seed_Index.png
        filename = f"{timestamp}_{seed}_{idx}.png"
        full_path = os.path.join(folder_path, filename)

-        # Save image
        img.save(full_path)
        saved_paths.append(full_path)

-        # Optional: Save prompt metadata in a text file next to it?
-        # For now, we just save the image.
-
    return saved_paths


-# --- History Logic ---
 def get_history_images():
-    """Scans the outputs folder and returns all images, newest first"""
-    if not os.path.exists("outputs"):
+    """Scan the outputs folder and return all images, newest first."""
+    output_dir = os.path.join("user_data", "image_outputs")
+    if not os.path.exists(output_dir):
        return []

    image_files = []
-    for root, dirs, files in os.walk("outputs"):
+    for root, dirs, files in os.walk(output_dir):
        for file in files:
            if file.endswith((".png", ".jpg", ".jpeg")):
                full_path = os.path.join(root, file)
-                # Get creation time for sorting
                mtime = os.path.getmtime(full_path)
                image_files.append((full_path, mtime))

-    # Sort by time, newest first
    image_files.sort(key=lambda x: x[1], reverse=True)
    return [x[0] for x in image_files]
-
-
-def round_to_step(value, step=STEP):
-    """Round a value to the nearest step."""
-    return round(value / step) * step
-
-
-def clamp(value, min_val, max_val):
-    """Clamp value between min and max."""
-    return max(min_val, min(max_val, value))
-
-
-def apply_aspect_ratio(aspect_ratio, current_width, current_height):
-    """
-    Apply an aspect ratio preset.
-
-    Logic to prevent dimension creep:
-    - For tall ratios (like 9:16): keep width fixed, calculate height
-    - For wide ratios (like 16:9): keep height fixed, calculate width
-    - For square (1:1): use the smaller of the current dimensions
-
-    Returns (new_width, new_height).
-    """
-    if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
-        return current_width, current_height
-
-    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
-
-    if w_ratio == h_ratio:
-        # Square ratio - use the smaller current dimension to prevent creep
-        base = min(current_width, current_height)
-        new_width = base
-        new_height = base
-    elif w_ratio < h_ratio:
-        # Tall ratio (like 9:16) - width is the smaller side, keep it fixed
-        new_width = current_width
-        new_height = round_to_step(current_width * h_ratio / w_ratio)
-    else:
-        # Wide ratio (like 16:9) - height is the smaller side, keep it fixed
-        new_height = current_height
-        new_width = round_to_step(current_height * w_ratio / h_ratio)
-
-    # Clamp to slider bounds
-    new_width = clamp(new_width, 256, 2048)
-    new_height = clamp(new_height, 256, 2048)
-
-    return int(new_width), int(new_height)
-
-
-def update_height_from_width(width, aspect_ratio):
-    """Update height when width changes (if not Custom)."""
-    if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
-        return gr.update()
-
-    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
-    new_height = round_to_step(width * h_ratio / w_ratio)
-    new_height = clamp(new_height, 256, 2048)
-
-    return int(new_height)
-
-
-def update_width_from_height(height, aspect_ratio):
-    """Update width when height changes (if not Custom)."""
-    if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
-        return gr.update()
-
-    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
-    new_width = round_to_step(height * w_ratio / h_ratio)
-    new_width = clamp(new_width, 256, 2048)
-
-    return int(new_width)
-
-
-def swap_dimensions(width, height):
-    """Swap width and height values."""
-    return height, width
-
-
-def swap_dimensions_and_update_ratio(width, height, aspect_ratio):
-    """Swap dimensions and update aspect ratio to match (or set to Custom)."""
-    new_width, new_height = height, width
-
-    # Try to find a matching aspect ratio for the swapped dimensions
-    new_ratio = "Custom"
-    for name, ratios in ASPECT_RATIOS.items():
-        if ratios is None:
-            continue
-        w_r, h_r = ratios
-        # Check if the swapped dimensions match this ratio (within tolerance)
-        expected_height = new_width * h_r / w_r
-        if abs(expected_height - new_height) < STEP:
-            new_ratio = name
-            break
-
-    return new_width, new_height, new_ratio
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@ -27,149 +27,112 @@ def create_ui():
    mu = shared.args.multi_user

    with gr.Tab("Model", elem_id="model-tab"):
-        with gr.Tab("Text model"):
-            with gr.Row():
-                with gr.Column():
-                    with gr.Row():
-                        shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=lambda: shared.model_name, label='Model', elem_classes='slim-dropdown', interactive=not mu)
-                        ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
-                        shared.gradio['load_model'] = gr.Button("Load", elem_classes='refresh-button', interactive=not mu)
-                        shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu)
-                        shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu)
+        with gr.Row():
+            with gr.Column():
+                with gr.Row():
+                    shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=lambda: shared.model_name, label='Model', elem_classes='slim-dropdown', interactive=not mu)
+                    ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
+                    shared.gradio['load_model'] = gr.Button("Load", elem_classes='refresh-button', interactive=not mu)
+                    shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu)
+                    shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu)

-                    shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys() if not shared.args.portable else ['llama.cpp'], value=None)
-                    with gr.Blocks():
-                        gr.Markdown("## Main options")
+                shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys() if not shared.args.portable else ['llama.cpp'], value=None)
+                with gr.Blocks():
+                    gr.Markdown("## Main options")
+                    with gr.Row():
+                        with gr.Column():
+                            shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
+                            shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.')
+                            shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
+                            shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.')
+                            shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
+                            shared.gradio['tp_backend'] = gr.Dropdown(label="tp-backend", choices=['native', 'nccl'], value=shared.args.tp_backend, info='The backend for tensor parallelism.')
+
+                        with gr.Column():
+                            shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
+                            shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
+                            shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
+                            shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
+                            shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
+                            shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant, info='Used by load-in-4bit.')
+                            shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.')
+                            shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info='Enable tensor parallelism (TP).')
+                            shared.gradio['cpp_runner'] = gr.Checkbox(label="cpp-runner", value=shared.args.cpp_runner, info='Enable inference with ModelRunnerCpp, which is faster than the default ModelRunner.')
+                            shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `ctx_size` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
+
+                            # Multimodal
+                            with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
+                                with gr.Row():
+                                    shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info='Select a file that matches your model. Must be placed in user_data/mmproj/', interactive=not mu)
+                                    ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
+
+                            # Speculative decoding
+                            with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
+                                with gr.Row():
+                                    shared.gradio['model_draft'] = gr.Dropdown(label="model-draft", choices=['None'] + utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', info='Draft model. Speculative decoding only works with models sharing the same vocabulary (e.g., same model family).', interactive=not mu)
+                                    ui.create_refresh_button(shared.gradio['model_draft'], lambda: None, lambda: {'choices': ['None'] + utils.get_available_models()}, 'refresh-button', interactive=not mu)
+
+                                shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.')
+                                shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Number of tokens to draft for speculative decoding. Recommended value: 4.')
+                                shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
+                                shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
+
+                    gr.Markdown("## Other options")
+                    with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
                        with gr.Row():
                            with gr.Column():
-                                shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
-                                shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.')
-                                shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
-                                shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.')
-                                shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
-                                shared.gradio['tp_backend'] = gr.Dropdown(label="tp-backend", choices=['native', 'nccl'], value=shared.args.tp_backend, info='The backend for tensor parallelism.')
+                                shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
+                                shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
+                                shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
+                                shared.gradio['ubatch_size'] = gr.Slider(label="ubatch_size", minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)
+                                shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
+                                shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"', value=shared.args.extra_flags)
+                                shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
+                                shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.')
+                                shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.')
+                                shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=2, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.')
+                                shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype, info='Used by load-in-4bit.')
+                                shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type, info='Used by load-in-4bit.')
+                                shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')

                            with gr.Column():
-                                shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
-                                shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
-                                shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
-                                shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
-                                shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
-                                shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant, info='Used by load-in-4bit.')
-                                shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.')
-                                shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info='Enable tensor parallelism (TP).')
-                                shared.gradio['cpp_runner'] = gr.Checkbox(label="cpp-runner", value=shared.args.cpp_runner, info='Enable inference with ModelRunnerCpp, which is faster than the default ModelRunner.')
-                                shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `ctx_size` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
-
-                                # Multimodal
-                                with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
+                                shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
+                                shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
+                                shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
+                                shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the  K, Q, V to the GPU. This saves VRAM but reduces the performance.')
+                                shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
+                                shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
+                                shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
+                                shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
+                                shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn)
+                                shared.gradio['no_xformers'] = gr.Checkbox(label="no_xformers", value=shared.args.no_xformers)
+                                shared.gradio['no_sdpa'] = gr.Checkbox(label="no_sdpa", value=shared.args.no_sdpa)
+                                shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.')
+                                shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
+                                if not shared.args.portable:
                                    with gr.Row():
-                                        shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info='Select a file that matches your model. Must be placed in user_data/mmproj/', interactive=not mu)
-                                        ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
-
-                                # Speculative decoding
-                                with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
-                                    with gr.Row():
-                                        shared.gradio['model_draft'] = gr.Dropdown(label="model-draft", choices=['None'] + utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', info='Draft model. Speculative decoding only works with models sharing the same vocabulary (e.g., same model family).', interactive=not mu)
-                                        ui.create_refresh_button(shared.gradio['model_draft'], lambda: None, lambda: {'choices': ['None'] + utils.get_available_models()}, 'refresh-button', interactive=not mu)
-
-                                    shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.')
-                                    shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Number of tokens to draft for speculative decoding. Recommended value: 4.')
-                                    shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
-                                    shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
-
-                        gr.Markdown("## Other options")
-                        with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
-                            with gr.Row():
-                                with gr.Column():
-                                    shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
-                                    shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
-                                    shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
-                                    shared.gradio['ubatch_size'] = gr.Slider(label="ubatch_size", minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)
-                                    shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
-                                    shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"', value=shared.args.extra_flags)
-                                    shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
-                                    shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.')
-                                    shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.')
-                                    shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=2, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.')
-                                    shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype, info='Used by load-in-4bit.')
-                                    shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type, info='Used by load-in-4bit.')
-                                    shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
-
-                                with gr.Column():
-                                    shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
-                                    shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
-                                    shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
-                                    shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the  K, Q, V to the GPU. This saves VRAM but reduces the performance.')
-                                    shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
-                                    shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
-                                    shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
-                                    shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
-                                    shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn)
-                                    shared.gradio['no_xformers'] = gr.Checkbox(label="no_xformers", value=shared.args.no_xformers)
-                                    shared.gradio['no_sdpa'] = gr.Checkbox(label="no_sdpa", value=shared.args.no_sdpa)
-                                    shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.')
-                                    shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
-                                    if not shared.args.portable:
-                                        with gr.Row():
-                                            shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=utils.get_available_loras(), value=shared.lora_names, label='LoRA(s)', elem_classes='slim-dropdown', interactive=not mu)
-                                            ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': utils.get_available_loras(), 'value': shared.lora_names}, 'refresh-button', interactive=not mu)
-                                            shared.gradio['lora_menu_apply'] = gr.Button(value='Apply LoRAs', elem_classes='refresh-button', interactive=not mu)
-
-                with gr.Column():
-                    with gr.Tab("Download"):
-                        shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
-                        shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
-                        with gr.Row():
-                            shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
-                            shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
-
-                    with gr.Tab("Customize instruction template"):
-                        with gr.Row():
-                            shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label='Select the desired instruction template', elem_classes='slim-dropdown')
-                            ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
-
-                        shared.gradio['customized_template_submit'] = gr.Button("Submit", variant="primary", interactive=not mu)
-                        gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.")
+                                        shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=utils.get_available_loras(), value=shared.lora_names, label='LoRA(s)', elem_classes='slim-dropdown', interactive=not mu)
+                                        ui.create_refresh_button(shared.gradio['lora_menu'], lambda: None, lambda: {'choices': utils.get_available_loras(), 'value': shared.lora_names}, 'refresh-button', interactive=not mu)
+                                        shared.gradio['lora_menu_apply'] = gr.Button(value='Apply LoRAs', elem_classes='refresh-button', interactive=not mu)

+            with gr.Column():
+                with gr.Tab("Download"):
+                    shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
+                    shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
                    with gr.Row():
-                        shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
+                        shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
+                        shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)

-        with gr.Tab("Image model"):
-            with gr.Row():
-                with gr.Column():
+                with gr.Tab("Customize instruction template"):
                    with gr.Row():
-                        shared.gradio['image_model_menu'] = gr.Dropdown(choices=utils.get_available_image_models(), value=lambda: shared.image_model_name, label='Model', elem_classes='slim-dropdown', interactive=not mu)
-                        ui.create_refresh_button(shared.gradio['image_model_menu'], lambda: None, lambda: {'choices': utils.get_available_image_models()}, 'refresh-button', interactive=not mu)
-                        shared.gradio['image_load_model'] = gr.Button("Load", elem_classes='refresh-button', interactive=not mu)
-                        shared.gradio['image_unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu)
-                        shared.gradio['image_save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu)
+                        shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label='Select the desired instruction template', elem_classes='slim-dropdown')
+                        ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)

-                    with gr.Blocks():
-                        gr.Markdown("## Main options")
-                        with gr.Row():
-                            with gr.Column():
-                                pass
+                    shared.gradio['customized_template_submit'] = gr.Button("Submit", variant="primary", interactive=not mu)
+                    gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.")

-                            with gr.Column():
-                                pass
-
-                        gr.Markdown("## Other options")
-                        with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
-                            with gr.Row():
-                                with gr.Column():
-                                    pass
-
-                                with gr.Column():
-                                    pass
-
-                with gr.Column():
-                    shared.gradio['image_custom_model_menu'] = gr.Textbox(label="Download model (diffusers format)", info="Enter the Hugging Face username/model path, for instance: Tongyi-MAI/Z-Image-Turbo. To specify a branch, add it at the end after a \":\" character like this: Tongyi-MAI/Z-Image-Turbo:main.", interactive=not mu)
-                    with gr.Row():
-                        shared.gradio['image_download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
-
-                    with gr.Row():
-                        shared.gradio['image_model_status'] = gr.Markdown('No model is loaded' if shared.image_model_name == 'None' else 'Ready')
+                with gr.Row():
+                    shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')


 def create_event_handlers():
@ -220,28 +183,6 @@ def create_event_handlers():
    shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
    shared.gradio['customized_template_submit'].click(save_instruction_template, gradio('model_menu', 'customized_template'), gradio('model_status'), show_progress=True)

-    # Image model event handlers
-    shared.gradio['image_load_model'].click(
-        load_image_model_wrapper,
-        gradio('image_model_menu'),
-        gradio('image_model_status'),
-        show_progress=True
-    )
-
-    shared.gradio['image_unload_model'].click(
-        handle_unload_image_model_click,
-        None,
-        gradio('image_model_status'),
-        show_progress=False
-    )
-
-    shared.gradio['image_download_model_button'].click(
-        download_image_model_wrapper,
-        gradio('image_custom_model_menu'),
-        gradio('image_model_status'),
-        show_progress=True
-    )
-

 def load_model_wrapper(selected_model, loader, autoload=False):
    try: