From e24ba92ef231963387f5719125943acd83b46c46 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 1 Dec 2025 08:05:21 -0800
Subject: [PATCH 01/61] UI: Optimize typing in all textareas

---
 js/main.js | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 61 insertions(+)

diff --git a/js/main.js b/js/main.js
index 67f60279..337131c2 100644
--- a/js/main.js
+++ b/js/main.js
@@ -1105,3 +1105,64 @@ document.fonts.addEventListener("loadingdone", (event) => {
   // Initial call to set the margin based on current state
   updateMargin();
 })();
+
+//------------------------------------------------
+// Optimize typing in all textareas
+//------------------------------------------------
+
+(function() {
+  document.querySelectorAll("textarea").forEach(textarea => {
+    const computedStyle = getComputedStyle(textarea);
+    const MIN_HEIGHT = parseInt(computedStyle.minHeight) || textarea.offsetHeight || 42;
+    const configuredMax = parseInt(computedStyle.maxHeight) || 400;
+
+    let rafId = null;
+    let isOurResize = false;
+
+    function doResize() {
+      rafId = null;
+      isOurResize = true;
+
+      // Recalculate max height each time
+      const maxHeight = Math.min(configuredMax, window.innerHeight * 0.5);
+
+      textarea.style.height = "auto";
+      const contentHeight = textarea.scrollHeight;
+      const clampedHeight = Math.min(maxHeight, Math.max(MIN_HEIGHT, contentHeight));
+
+      textarea.style.height = clampedHeight + "px";
+      textarea.style.overflowY = contentHeight > maxHeight ? "auto" : "hidden";
+
+      isOurResize = false;
+    }
+
+    function scheduleResize() {
+      if (rafId === null) {
+        rafId = requestAnimationFrame(doResize);
+      }
+    }
+
+    const desc = Object.getOwnPropertyDescriptor(CSSStyleDeclaration.prototype, "height");
+    const originalSet = desc?.set || function(v) { this.setProperty("height", v); };
+    const originalGet = desc?.get || function() { return this.getPropertyValue("height"); };
+
+    Object.defineProperty(textarea.style, "height", {
+      get() { return originalGet.call(this); },
+      set(value) {
+        if (isOurResize) originalSet.call(this, value);
+        else scheduleResize();
+      },
+      configurable: true
+    });
+
+    textarea.addEventListener("input", scheduleResize, { passive: true });
+    doResize();
+  });
+
+  // Trigger resize on all textareas when window resizes
+  window.addEventListener("resize", () => {
+    document.querySelectorAll("textarea").forEach(ta => {
+      ta.dispatchEvent(new Event("input"));
+    });
+  }, { passive: true });
+})();

From a83821e941f8f6d33556d101d7aa241b41db562e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 1 Dec 2025 10:34:23 -0800
Subject: [PATCH 02/61] Revert "UI: Optimize typing in all textareas"

This reverts commit e24ba92ef231963387f5719125943acd83b46c46.
---
 js/main.js | 61 ------------------------------------------------------
 1 file changed, 61 deletions(-)

diff --git a/js/main.js b/js/main.js
index 337131c2..67f60279 100644
--- a/js/main.js
+++ b/js/main.js
@@ -1105,64 +1105,3 @@ document.fonts.addEventListener("loadingdone", (event) => {
   // Initial call to set the margin based on current state
   updateMargin();
 })();
-
-//------------------------------------------------
-// Optimize typing in all textareas
-//------------------------------------------------
-
-(function() {
-  document.querySelectorAll("textarea").forEach(textarea => {
-    const computedStyle = getComputedStyle(textarea);
-    const MIN_HEIGHT = parseInt(computedStyle.minHeight) || textarea.offsetHeight || 42;
-    const configuredMax = parseInt(computedStyle.maxHeight) || 400;
-
-    let rafId = null;
-    let isOurResize = false;
-
-    function doResize() {
-      rafId = null;
-      isOurResize = true;
-
-      // Recalculate max height each time
-      const maxHeight = Math.min(configuredMax, window.innerHeight * 0.5);
-
-      textarea.style.height = "auto";
-      const contentHeight = textarea.scrollHeight;
-      const clampedHeight = Math.min(maxHeight, Math.max(MIN_HEIGHT, contentHeight));
-
-      textarea.style.height = clampedHeight + "px";
-      textarea.style.overflowY = contentHeight > maxHeight ? "auto" : "hidden";
-
-      isOurResize = false;
-    }
-
-    function scheduleResize() {
-      if (rafId === null) {
-        rafId = requestAnimationFrame(doResize);
-      }
-    }
-
-    const desc = Object.getOwnPropertyDescriptor(CSSStyleDeclaration.prototype, "height");
-    const originalSet = desc?.set || function(v) { this.setProperty("height", v); };
-    const originalGet = desc?.get || function() { return this.getPropertyValue("height"); };
-
-    Object.defineProperty(textarea.style, "height", {
-      get() { return originalGet.call(this); },
-      set(value) {
-        if (isOurResize) originalSet.call(this, value);
-        else scheduleResize();
-      },
-      configurable: true
-    });
-
-    textarea.addEventListener("input", scheduleResize, { passive: true });
-    doResize();
-  });
-
-  // Trigger resize on all textareas when window resizes
-  window.addEventListener("resize", () => {
-    document.querySelectorAll("textarea").forEach(ta => {
-      ta.dispatchEvent(new Event("input"));
-    });
-  }, { passive: true });
-})();

From b3666e140de349a651aea22a6f418572925a5c62 Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Tue, 2 Dec 2025 14:55:38 -0300
Subject: [PATCH 03/61] Add image generation support (#7328)

---
 README.md                                     |   7 +-
 css/main.css                                  |  96 +-
 docs/Image Generation Tutorial.md             |  20 +
 modules/image_models.py                       | 183 ++++
 modules/shared.py                             |  53 +-
 modules/ui.py                                 |  40 +-
 modules/ui_image_generation.py                | 847 ++++++++++++++++++
 modules/utils.py                              |  22 +-
 requirements/full/requirements.txt            |   4 +
 requirements/full/requirements_amd.txt        |   4 +
 requirements/full/requirements_amd_noavx2.txt |   4 +
 .../full/requirements_apple_intel.txt         |   4 +
 .../full/requirements_apple_silicon.txt       |   4 +
 requirements/full/requirements_cpu_only.txt   |   4 +
 .../full/requirements_cpu_only_noavx2.txt     |   4 +
 requirements/full/requirements_noavx2.txt     |   4 +
 requirements/full/requirements_nowheels.txt   |   4 +
 server.py                                     |  24 +
 18 files changed, 1314 insertions(+), 14 deletions(-)
 create mode 100644 docs/Image Generation Tutorial.md
 create mode 100644 modules/image_models.py
 create mode 100644 modules/ui_image_generation.py

diff --git a/README.md b/README.md
index d350d959..174fc2ac 100644
--- a/README.md
+++ b/README.md
@@ -28,6 +28,8 @@ A Gradio web UI for Large Language Models.
 - 100% offline and private, with zero telemetry, external resources, or remote update requests.
 - **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
 - **Vision (multimodal models)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
+Image generation: A dedicated tab for diffusers models like Z-Image-Turbo and Qwen-Image. Features 4-bit/8-bit quantization and a persistent gallery with metadata (tutorial).
+- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo** and **Qwen-Image**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)).
 - **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
 - Aesthetic UI with dark and light themes.
 - Syntax highlighting for code blocks and LaTeX rendering for mathematical expressions.
@@ -432,6 +434,7 @@ https://colab.research.google.com/github/oobabooga/text-generation-webui/blob/ma
 
 https://www.reddit.com/r/Oobabooga/
 
-## Acknowledgment
+## Acknowledgments
 
-In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.
+- In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.
+- This project was inspired by [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) and wouldn't exist without it.
diff --git a/css/main.css b/css/main.css
index fd79d24c..5c1c356d 100644
--- a/css/main.css
+++ b/css/main.css
@@ -93,11 +93,11 @@ ol li p, ul li p {
     display: inline-block;
 }
 
-#notebook-parent-tab, #chat-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab {
+#notebook-parent-tab, #chat-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab, #image-ai-tab {
     border: 0;
 }
 
-#notebook-parent-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab {
+#notebook-parent-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab, #image-ai-tab {
     padding: 1rem;
 }
 
@@ -244,37 +244,46 @@ button {
     font-size: 100% !important;
 }
 
-.pretty_scrollbar::-webkit-scrollbar {
+.pretty_scrollbar::-webkit-scrollbar,
+#image-history-gallery > :nth-child(2)::-webkit-scrollbar {
     width: 8px;
     height: 8px;
 }
 
-.pretty_scrollbar::-webkit-scrollbar-track {
+.pretty_scrollbar::-webkit-scrollbar-track,
+#image-history-gallery > :nth-child(2)::-webkit-scrollbar-track {
     background: transparent;
 }
 
 .pretty_scrollbar::-webkit-scrollbar-thumb,
-.pretty_scrollbar::-webkit-scrollbar-thumb:hover {
+.pretty_scrollbar::-webkit-scrollbar-thumb:hover,
+#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
+#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
     background: var(--neutral-300);
     border-radius: 30px;
 }
 
 .dark .pretty_scrollbar::-webkit-scrollbar-thumb,
-.dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
+.dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover,
+.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
+.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
     background: rgb(255 255 255 / 6.25%);
     border-radius: 10px;
 }
 
-.pretty_scrollbar::-webkit-resizer {
+.pretty_scrollbar::-webkit-resizer,
+#image-history-gallery > :nth-child(2)::-webkit-resizer {
     background: #c5c5d2;
 }
 
-.dark .pretty_scrollbar::-webkit-resizer {
+.dark .pretty_scrollbar::-webkit-resizer,
+.dark #image-history-gallery > :nth-child(2)::-webkit-resizer {
     background: #ccc;
     border-radius: 10px;
 }
 
-.pretty_scrollbar::-webkit-scrollbar-corner {
+.pretty_scrollbar::-webkit-scrollbar-corner,
+#image-history-gallery > :nth-child(2)::-webkit-scrollbar-corner {
     background: transparent;
 }
 
@@ -1674,3 +1683,72 @@ button:focus {
 .dark .sidebar-vertical-separator {
     border-bottom: 1px solid rgb(255 255 255 / 10%);
 }
+
+button#swap-height-width {
+    position: absolute;
+    top: -50px;
+    right: 0;
+    border: 0;
+}
+
+#image-output-gallery, #image-output-gallery > :nth-child(2) {
+    height: calc(100vh - 83px);
+    max-height: calc(100vh - 83px);
+}
+
+#image-history-gallery, #image-history-gallery > :nth-child(2) {
+    height: calc(100vh - 174px);
+    max-height: calc(100vh - 174px);
+}
+
+/* Additional CSS for the paginated image gallery */
+
+/* Page info styling */
+#image-page-info {
+    display: flex;
+    align-items: center;
+    justify-content: center;
+    min-width: 200px;
+    font-size: 0.9em;
+    color: var(--body-text-color-subdued);
+}
+
+/* Settings display panel */
+#image-ai-tab .settings-display-panel {
+    background: var(--background-fill-secondary);
+    padding: 12px;
+    border-radius: 8px;
+    font-size: 0.9em;
+    max-height: 300px;
+    overflow-y: auto;
+    margin-top: 8px;
+}
+
+/* Gallery status message */
+#image-ai-tab .gallery-status {
+    color: var(--color-accent);
+    font-size: 0.85em;
+    margin-top: 4px;
+}
+
+/* Pagination button row alignment */
+#image-ai-tab .pagination-controls {
+    display: flex;
+    align-items: center;
+    gap: 8px;
+    flex-wrap: wrap;
+}
+
+/* Selected image preview container */
+#image-ai-tab .selected-preview-container {
+    border: 1px solid var(--border-color-primary);
+    border-radius: 8px;
+    padding: 8px;
+    background: var(--background-fill-secondary);
+}
+
+/* Fix a gr.Markdown UI glitch when clicking Next in the
+ * Image AI > Gallery tab */
+.min.svelte-1yrv54 {
+    min-height: 0;
+}
diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md
new file mode 100644
index 00000000..e7022c34
--- /dev/null
+++ b/docs/Image Generation Tutorial.md	
@@ -0,0 +1,20 @@
+# Image Generation Tutorial
+
+This feature allows you to generate images using high-speed models like Z-Image-Turbo directly within the web UI.
+
+## How to use
+
+1. Click on the **Image AI** tab at the top of the interface.
+2. Select the **Model** sub-tab.
+3. Copy and paste the following link into the **Download model** box:
+
+```
+https://huggingface.co/Tongyi-MAI/Z-Image-Turbo
+```
+
+4. Click the **Download** button and wait for the confirmation message.
+5. In the **Model** dropdown menu, select the model you just downloaded (if you don't see it, click the 🔄 refresh button).
+6. Click **Load**.
+7. Go to the **Generate** sub-tab, type a prompt, and click **GENERATE**.
+
+> **Note for Z-Image-Turbo:** For the best results with this specific model, keep the **CFG Scale** slider at **0**.
diff --git a/modules/image_models.py b/modules/image_models.py
new file mode 100644
index 00000000..e6f9a172
--- /dev/null
+++ b/modules/image_models.py
@@ -0,0 +1,183 @@
+import time
+
+import modules.shared as shared
+from modules.logging_colors import logger
+from modules.torch_utils import get_device
+from modules.utils import resolve_model_path
+
+
+def get_quantization_config(quant_method):
+    """
+    Get the appropriate quantization config based on the selected method.
+
+    Args:
+        quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'
+
+    Returns:
+        PipelineQuantizationConfig or None
+    """
+    import torch
+    from diffusers import BitsAndBytesConfig, QuantoConfig
+    from diffusers.quantizers import PipelineQuantizationConfig
+
+    if quant_method == 'none' or not quant_method:
+        return None
+
+    # Bitsandbytes 8-bit quantization
+    elif quant_method == 'bnb-8bit':
+        return PipelineQuantizationConfig(
+            quant_mapping={
+                "transformer": BitsAndBytesConfig(
+                    load_in_8bit=True
+                )
+            }
+        )
+
+    # Bitsandbytes 4-bit quantization
+    elif quant_method == 'bnb-4bit':
+        return PipelineQuantizationConfig(
+            quant_mapping={
+                "transformer": BitsAndBytesConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_quant_type="nf4",
+                    bnb_4bit_compute_dtype=torch.bfloat16,
+                    bnb_4bit_use_double_quant=True
+                )
+            }
+        )
+
+    # Quanto 8-bit quantization
+    elif quant_method == 'quanto-8bit':
+        return PipelineQuantizationConfig(
+            quant_mapping={
+                "transformer": QuantoConfig(weights_dtype="int8")
+            }
+        )
+
+    # Quanto 4-bit quantization
+    elif quant_method == 'quanto-4bit':
+        return PipelineQuantizationConfig(
+            quant_mapping={
+                "transformer": QuantoConfig(weights_dtype="int4")
+            }
+        )
+
+    # Quanto 2-bit quantization
+    elif quant_method == 'quanto-2bit':
+        return PipelineQuantizationConfig(
+            quant_mapping={
+                "transformer": QuantoConfig(weights_dtype="int2")
+            }
+        )
+
+    else:
+        logger.warning(f"Unknown quantization method: {quant_method}. Loading without quantization.")
+        return None
+
+
+def get_pipeline_type(pipe):
+    """
+    Detect the pipeline type based on the loaded pipeline class.
+
+    Returns:
+        str: 'zimage', 'qwenimage', or 'unknown'
+    """
+    class_name = pipe.__class__.__name__
+    if class_name == 'ZImagePipeline':
+        return 'zimage'
+    elif class_name == 'QwenImagePipeline':
+        return 'qwenimage'
+    else:
+        return 'unknown'
+
+
+def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offload=False, compile_model=False, quant_method='none'):
+    """
+    Load a diffusers image generation model.
+
+    Args:
+        model_name: Name of the model directory
+        dtype: 'bfloat16' or 'float16'
+        attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3'
+        cpu_offload: Enable CPU offloading for low VRAM
+        compile_model: Compile the model for faster inference (slow first run)
+        quant_method: Quantization method - 'none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'
+    """
+    import torch
+    from diffusers import DiffusionPipeline
+
+    logger.info(f"Loading image model \"{model_name}\" with quantization: {quant_method}")
+    t0 = time.time()
+
+    dtype_map = {"bfloat16": torch.bfloat16, "float16": torch.float16}
+    target_dtype = dtype_map.get(dtype, torch.bfloat16)
+
+    model_path = resolve_model_path(model_name, image_model=True)
+
+    try:
+        # Get quantization config based on selected method
+        pipeline_quant_config = get_quantization_config(quant_method)
+
+        # Load the pipeline
+        load_kwargs = {
+            "torch_dtype": target_dtype,
+            "low_cpu_mem_usage": True,
+        }
+
+        if pipeline_quant_config is not None:
+            load_kwargs["quantization_config"] = pipeline_quant_config
+
+        # Use DiffusionPipeline for automatic pipeline detection
+        # This handles both ZImagePipeline and QwenImagePipeline
+        pipe = DiffusionPipeline.from_pretrained(
+            str(model_path),
+            **load_kwargs
+        )
+
+        pipeline_type = get_pipeline_type(pipe)
+
+        if not cpu_offload:
+            pipe.to(get_device())
+
+        # Set attention backend (if supported by the pipeline)
+        if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'set_attention_backend'):
+            if attn_backend == 'flash_attention_2':
+                pipe.transformer.set_attention_backend("flash")
+            elif attn_backend == 'flash_attention_3':
+                pipe.transformer.set_attention_backend("_flash_3")
+            # sdpa is the default, no action needed
+
+        if compile_model:
+            if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'compile'):
+                logger.info("Compiling model (first run will be slow)...")
+                pipe.transformer.compile()
+
+        if cpu_offload:
+            pipe.enable_model_cpu_offload()
+
+        shared.image_model = pipe
+        shared.image_model_name = model_name
+        shared.image_pipeline_type = pipeline_type
+
+        logger.info(f"Loaded image model \"{model_name}\" in {(time.time() - t0):.2f} seconds.")
+        return pipe
+
+    except Exception as e:
+        logger.error(f"Failed to load image model: {str(e)}")
+        return None
+
+
+def unload_image_model():
+    """Unload the current image model and free VRAM."""
+    if shared.image_model is None:
+        return
+
+    del shared.image_model
+    shared.image_model = None
+    shared.image_model_name = 'None'
+    shared.image_pipeline_type = None
+
+    from modules.torch_utils import clear_torch_cache
+    clear_torch_cache()
+
+    logger.info("Image model unloaded.")
diff --git a/modules/shared.py b/modules/shared.py
index 134c0cac..316f7729 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -11,7 +11,7 @@ import yaml
 from modules.logging_colors import logger
 from modules.presets import default_preset
 
-# Model variables
+# Text model variables
 model = None
 tokenizer = None
 model_name = 'None'
@@ -20,6 +20,11 @@ is_multimodal = False
 model_dirty_from_training = False
 lora_names = []
 
+# Image model variables
+image_model = None
+image_model_name = 'None'
+image_pipeline_type = None
+
 # Generation variables
 stop_everything = False
 generation_lock = None
@@ -46,6 +51,18 @@ group.add_argument('--extensions', type=str, nargs='+', help='The list of extens
 group.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
 group.add_argument('--idle-timeout', type=int, default=0, help='Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.')
 
+# Image generation
+group = parser.add_argument_group('Image model')
+group.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).')
+group.add_argument('--image-model-dir', type=str, default='user_data/image_models', help='Path to directory with all the image models.')
+group.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.')
+group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], help='Attention backend for image model.')
+group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.')
+group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.')
+group.add_argument('--image-quant', type=str, default=None,
+                   choices=['none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'],
+                   help='Quantization method for image model.')
+
 # Model loader
 group = parser.add_argument_group('Model loader')
 group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, TensorRT-LLM.')
@@ -290,6 +307,24 @@ settings = {
 
     # Extensions
     'default_extensions': [],
+
+    # Image generation settings
+    'image_prompt': '',
+    'image_neg_prompt': '',
+    'image_width': 1024,
+    'image_height': 1024,
+    'image_aspect_ratio': '1:1 Square',
+    'image_steps': 9,
+    'image_cfg_scale': 0.0,
+    'image_seed': -1,
+    'image_batch_size': 1,
+    'image_batch_count': 1,
+    'image_model_menu': 'None',
+    'image_dtype': 'bfloat16',
+    'image_attn_backend': 'sdpa',
+    'image_cpu_offload': False,
+    'image_compile': False,
+    'image_quant': 'none',
 }
 
 default_settings = copy.deepcopy(settings)
@@ -314,6 +349,22 @@ def do_cmd_flags_warnings():
                 logger.warning('\nThe multi-user mode is highly experimental and should not be shared publicly.')
 
 
+def apply_image_model_cli_overrides():
+    """Apply command-line overrides for image model settings."""
+    if args.image_model is not None:
+        settings['image_model_menu'] = args.image_model
+    if args.image_dtype is not None:
+        settings['image_dtype'] = args.image_dtype
+    if args.image_attn_backend is not None:
+        settings['image_attn_backend'] = args.image_attn_backend
+    if args.image_cpu_offload:
+        settings['image_cpu_offload'] = True
+    if args.image_compile:
+        settings['image_compile'] = True
+    if args.image_quant is not None:
+        settings['image_quant'] = args.image_quant
+
+
 def fix_loader_name(name):
     if not name:
         return name
diff --git a/modules/ui.py b/modules/ui.py
index f99e8b6a..9700d297 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -280,6 +280,26 @@ def list_interface_input_elements():
         'include_past_attachments',
     ]
 
+    # Image generation elements
+    elements += [
+        'image_prompt',
+        'image_neg_prompt',
+        'image_width',
+        'image_height',
+        'image_aspect_ratio',
+        'image_steps',
+        'image_cfg_scale',
+        'image_seed',
+        'image_batch_size',
+        'image_batch_count',
+        'image_model_menu',
+        'image_dtype',
+        'image_attn_backend',
+        'image_compile',
+        'image_cpu_offload',
+        'image_quant',
+    ]
+
     return elements
 
 
@@ -509,7 +529,25 @@ def setup_auto_save():
         'theme_state',
         'show_two_notebook_columns',
         'paste_to_attachment',
-        'include_past_attachments'
+        'include_past_attachments',
+
+        # Image generation tab (ui_image_generation.py)
+        'image_prompt',
+        'image_neg_prompt',
+        'image_width',
+        'image_height',
+        'image_aspect_ratio',
+        'image_steps',
+        'image_cfg_scale',
+        'image_seed',
+        'image_batch_size',
+        'image_batch_count',
+        'image_model_menu',
+        'image_dtype',
+        'image_attn_backend',
+        'image_compile',
+        'image_cpu_offload',
+        'image_quant',
     ]
 
     for element_name in change_elements:
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
new file mode 100644
index 00000000..92c447c8
--- /dev/null
+++ b/modules/ui_image_generation.py
@@ -0,0 +1,847 @@
+import json
+import os
+import time
+import traceback
+from datetime import datetime
+from pathlib import Path
+
+import gradio as gr
+import numpy as np
+from PIL import Image
+from PIL.PngImagePlugin import PngInfo
+
+from modules import shared, ui, utils
+from modules.image_models import (
+    get_pipeline_type,
+    load_image_model,
+    unload_image_model
+)
+from modules.logging_colors import logger
+from modules.text_generation import stop_everything_event
+from modules.torch_utils import get_device
+from modules.utils import gradio
+
+ASPECT_RATIOS = {
+    "1:1 Square": (1, 1),
+    "16:9 Cinema": (16, 9),
+    "9:16 Mobile": (9, 16),
+    "4:3 Photo": (4, 3),
+    "Custom": None,
+}
+
+STEP = 16
+IMAGES_PER_PAGE = 64
+
+# Settings keys to save in PNG metadata (Generate tab only)
+METADATA_SETTINGS_KEYS = [
+    'image_prompt',
+    'image_neg_prompt',
+    'image_width',
+    'image_height',
+    'image_aspect_ratio',
+    'image_steps',
+    'image_seed',
+    'image_batch_size',
+    'image_batch_count',
+    'image_cfg_scale',
+]
+
+# Cache for all image paths
+_image_cache = []
+_cache_timestamp = 0
+
+
+def round_to_step(value, step=STEP):
+    return round(value / step) * step
+
+
+def clamp(value, min_val, max_val):
+    return max(min_val, min(max_val, value))
+
+
+def apply_aspect_ratio(aspect_ratio, current_width, current_height):
+    if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
+        return current_width, current_height
+
+    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
+
+    if w_ratio == h_ratio:
+        base = min(current_width, current_height)
+        new_width = base
+        new_height = base
+    elif w_ratio < h_ratio:
+        new_width = current_width
+        new_height = round_to_step(current_width * h_ratio / w_ratio)
+    else:
+        new_height = current_height
+        new_width = round_to_step(current_height * w_ratio / h_ratio)
+
+    new_width = clamp(new_width, 256, 2048)
+    new_height = clamp(new_height, 256, 2048)
+
+    return int(new_width), int(new_height)
+
+
+def update_height_from_width(width, aspect_ratio):
+    if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
+        return gr.update()
+
+    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
+    new_height = round_to_step(width * h_ratio / w_ratio)
+    new_height = clamp(new_height, 256, 2048)
+
+    return int(new_height)
+
+
+def update_width_from_height(height, aspect_ratio):
+    if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
+        return gr.update()
+
+    w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
+    new_width = round_to_step(height * w_ratio / h_ratio)
+    new_width = clamp(new_width, 256, 2048)
+
+    return int(new_width)
+
+
+def swap_dimensions_and_update_ratio(width, height, aspect_ratio):
+    new_width, new_height = height, width
+
+    new_ratio = "Custom"
+    for name, ratios in ASPECT_RATIOS.items():
+        if ratios is None:
+            continue
+        w_r, h_r = ratios
+        expected_height = new_width * h_r / w_r
+        if abs(expected_height - new_height) < STEP:
+            new_ratio = name
+            break
+
+    return new_width, new_height, new_ratio
+
+
+def build_generation_metadata(state, actual_seed):
+    """Build metadata dict from generation settings."""
+    metadata = {}
+    for key in METADATA_SETTINGS_KEYS:
+        if key in state:
+            metadata[key] = state[key]
+
+    # Store the actual seed used (not -1)
+    metadata['image_seed'] = actual_seed
+    metadata['generated_at'] = datetime.now().isoformat()
+    metadata['model'] = shared.image_model_name
+
+    return metadata
+
+
+def save_generated_images(images, state, actual_seed):
+    """Save images with generation metadata embedded in PNG."""
+    date_str = datetime.now().strftime("%Y-%m-%d")
+    folder_path = os.path.join("user_data", "image_outputs", date_str)
+    os.makedirs(folder_path, exist_ok=True)
+
+    metadata = build_generation_metadata(state, actual_seed)
+    metadata_json = json.dumps(metadata, ensure_ascii=False)
+
+    for idx, img in enumerate(images):
+        timestamp = datetime.now().strftime("%H-%M-%S")
+        filename = f"{timestamp}_{actual_seed:010d}_{idx:03d}.png"
+        filepath = os.path.join(folder_path, filename)
+
+        # Create PNG metadata
+        png_info = PngInfo()
+        png_info.add_text("image_gen_settings", metadata_json)
+
+        # Save with metadata
+        img.save(filepath, pnginfo=png_info)
+
+
+def read_image_metadata(image_path):
+    """Read generation metadata from PNG file."""
+    try:
+        with Image.open(image_path) as img:
+            if hasattr(img, 'text') and 'image_gen_settings' in img.text:
+                return json.loads(img.text['image_gen_settings'])
+    except Exception as e:
+        logger.debug(f"Could not read metadata from {image_path}: {e}")
+    return None
+
+
+def format_metadata_for_display(metadata):
+    """Format metadata as readable text."""
+    if not metadata:
+        return "No generation settings found in this image."
+
+    lines = ["**Generation Settings**", ""]
+
+    # Display in a nice order
+    display_order = [
+        ('image_prompt', 'Prompt'),
+        ('image_neg_prompt', 'Negative Prompt'),
+        ('image_width', 'Width'),
+        ('image_height', 'Height'),
+        ('image_aspect_ratio', 'Aspect Ratio'),
+        ('image_steps', 'Steps'),
+        ('image_cfg_scale', 'CFG Scale'),
+        ('image_seed', 'Seed'),
+        ('image_batch_size', 'Batch Size'),
+        ('image_batch_count', 'Batch Count'),
+        ('model', 'Model'),
+        ('generated_at', 'Generated At'),
+    ]
+
+    for key, label in display_order:
+        if key in metadata:
+            value = metadata[key]
+            if key in ['image_prompt', 'image_neg_prompt'] and value:
+                # Truncate long prompts for display
+                if len(str(value)) > 200:
+                    value = str(value)[:200] + "..."
+            lines.append(f"**{label}:** {value}")
+
+    return "\n\n".join(lines)
+
+
+def get_all_history_images(force_refresh=False):
+    """Get all history images sorted by modification time (newest first). Uses caching."""
+    global _image_cache, _cache_timestamp
+
+    output_dir = os.path.join("user_data", "image_outputs")
+    if not os.path.exists(output_dir):
+        return []
+
+    # Check if we need to refresh cache
+    current_time = time.time()
+    if not force_refresh and _image_cache and (current_time - _cache_timestamp) < 2:
+        return _image_cache
+
+    image_files = []
+    for root, _, files in os.walk(output_dir):
+        for file in files:
+            if file.endswith((".png", ".jpg", ".jpeg")):
+                full_path = os.path.join(root, file)
+                image_files.append((full_path, os.path.getmtime(full_path)))
+
+    image_files.sort(key=lambda x: x[1], reverse=True)
+    _image_cache = [x[0] for x in image_files]
+    _cache_timestamp = current_time
+
+    return _image_cache
+
+
+def get_paginated_images(page=0, force_refresh=False):
+    """Get images for a specific page."""
+    all_images = get_all_history_images(force_refresh)
+    total_images = len(all_images)
+    total_pages = max(1, (total_images + IMAGES_PER_PAGE - 1) // IMAGES_PER_PAGE)
+
+    # Clamp page to valid range
+    page = max(0, min(page, total_pages - 1))
+
+    start_idx = page * IMAGES_PER_PAGE
+    end_idx = min(start_idx + IMAGES_PER_PAGE, total_images)
+
+    page_images = all_images[start_idx:end_idx]
+
+    return page_images, page, total_pages, total_images
+
+
+def get_initial_page_info():
+    """Get page info string for initial load."""
+    _, page, total_pages, total_images = get_paginated_images(0)
+    return f"Page {page + 1} of {total_pages} ({total_images} total images)"
+
+
+def refresh_gallery(current_page=0):
+    """Refresh gallery with current page."""
+    images, page, total_pages, total_images = get_paginated_images(current_page, force_refresh=True)
+    page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)"
+    return images, page, page_info
+
+
+def go_to_page(page_num, current_page):
+    """Go to a specific page (1-indexed input)."""
+    try:
+        page = int(page_num) - 1  # Convert to 0-indexed
+    except (ValueError, TypeError):
+        page = current_page
+
+    images, page, total_pages, total_images = get_paginated_images(page)
+    page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)"
+    return images, page, page_info
+
+
+def next_page(current_page):
+    """Go to next page."""
+    images, page, total_pages, total_images = get_paginated_images(current_page + 1)
+    page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)"
+    return images, page, page_info
+
+
+def prev_page(current_page):
+    """Go to previous page."""
+    images, page, total_pages, total_images = get_paginated_images(current_page - 1)
+    page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)"
+    return images, page, page_info
+
+
+def on_gallery_select(evt: gr.SelectData, current_page):
+    """Handle image selection from gallery."""
+    if evt.index is None:
+        return "", "Select an image to view its settings"
+
+    # Get the current page's images to find the actual file path
+    all_images = get_all_history_images()
+    total_images = len(all_images)
+
+    # Calculate the actual index in the full list
+    start_idx = current_page * IMAGES_PER_PAGE
+    actual_idx = start_idx + evt.index
+
+    if actual_idx >= total_images:
+        return "", "Image not found"
+
+    image_path = all_images[actual_idx]
+    metadata = read_image_metadata(image_path)
+    metadata_display = format_metadata_for_display(metadata)
+
+    return image_path, metadata_display
+
+
+def send_to_generate(selected_image_path):
+    """Load settings from selected image and return updates for all Generate tab inputs."""
+    if not selected_image_path or not os.path.exists(selected_image_path):
+        return [gr.update()] * 10 + ["No image selected"]
+
+    metadata = read_image_metadata(selected_image_path)
+    if not metadata:
+        return [gr.update()] * 10 + ["No settings found in this image"]
+
+    # Return updates for each input element in order
+    updates = [
+        gr.update(value=metadata.get('image_prompt', '')),
+        gr.update(value=metadata.get('image_neg_prompt', '')),
+        gr.update(value=metadata.get('image_width', 1024)),
+        gr.update(value=metadata.get('image_height', 1024)),
+        gr.update(value=metadata.get('image_aspect_ratio', '1:1 Square')),
+        gr.update(value=metadata.get('image_steps', 9)),
+        gr.update(value=metadata.get('image_seed', -1)),
+        gr.update(value=metadata.get('image_batch_size', 1)),
+        gr.update(value=metadata.get('image_batch_count', 1)),
+        gr.update(value=metadata.get('image_cfg_scale', 0.0)),
+    ]
+
+    status = f"✓ Settings loaded from image (seed: {metadata.get('image_seed', 'unknown')})"
+    return updates + [status]
+
+
+def read_dropped_image_metadata(image_path):
+    """Read metadata from a dropped/uploaded image."""
+    if not image_path:
+        return "Drop an image to view its generation settings."
+
+    metadata = read_image_metadata(image_path)
+    return format_metadata_for_display(metadata)
+
+
+def create_ui():
+    if shared.settings['image_model_menu'] != 'None':
+        shared.image_model_name = shared.settings['image_model_menu']
+
+    with gr.Tab("Image AI", elem_id="image-ai-tab"):
+        with gr.Tabs():
+            # TAB 1: GENERATE
+            with gr.TabItem("Generate"):
+                with gr.Row():
+                    with gr.Column(scale=4, min_width=350):
+                        shared.gradio['image_prompt'] = gr.Textbox(
+                            label="Prompt",
+                            placeholder="Describe your imagination...",
+                            lines=3,
+                            autofocus=True,
+                            value=shared.settings['image_prompt']
+                        )
+                        shared.gradio['image_neg_prompt'] = gr.Textbox(
+                            label="Negative Prompt",
+                            placeholder="Low quality...",
+                            lines=3,
+                            value=shared.settings['image_neg_prompt']
+                        )
+
+                        shared.gradio['image_generate_btn'] = gr.Button("Generate", variant="primary", size="lg")
+                        shared.gradio['image_stop_btn'] = gr.Button("Stop", size="lg", visible=False)
+                        gr.HTML("<hr style='border-top: 1px solid #444; margin: 20px 0;'>")
+
+                        gr.Markdown("### Dimensions")
+                        with gr.Row():
+                            with gr.Column():
+                                shared.gradio['image_width'] = gr.Slider(256, 2048, value=shared.settings['image_width'], step=STEP, label="Width")
+                            with gr.Column():
+                                shared.gradio['image_height'] = gr.Slider(256, 2048, value=shared.settings['image_height'], step=STEP, label="Height")
+                            shared.gradio['image_swap_btn'] = gr.Button("⇄ Swap", elem_classes='refresh-button', scale=0, min_width=80, elem_id="swap-height-width")
+
+                        with gr.Row():
+                            shared.gradio['image_aspect_ratio'] = gr.Radio(
+                                choices=["1:1 Square", "16:9 Cinema", "9:16 Mobile", "4:3 Photo", "Custom"],
+                                value=shared.settings['image_aspect_ratio'],
+                                label="Aspect Ratio",
+                                interactive=True
+                            )
+
+                        gr.Markdown("### Config")
+                        with gr.Row():
+                            with gr.Column():
+                                shared.gradio['image_steps'] = gr.Slider(1, 100, value=shared.settings['image_steps'], step=1, label="Steps")
+                                shared.gradio['image_cfg_scale'] = gr.Slider(
+                                    0.0, 10.0,
+                                    value=shared.settings['image_cfg_scale'],
+                                    step=0.1,
+                                    label="CFG Scale",
+                                    info="Z-Image Turbo: 0.0 | Qwen: 4.0"
+                                )
+                                shared.gradio['image_seed'] = gr.Number(label="Seed", value=shared.settings['image_seed'], precision=0, info="-1 = Random")
+                            with gr.Column():
+                                shared.gradio['image_batch_size'] = gr.Slider(1, 32, value=shared.settings['image_batch_size'], step=1, label="Batch Size (VRAM Heavy)", info="Generates N images at once.")
+                                shared.gradio['image_batch_count'] = gr.Slider(1, 128, value=shared.settings['image_batch_count'], step=1, label="Sequential Count (Loop)", info="Repeats the generation N times.")
+
+                    with gr.Column(scale=6, min_width=500):
+                        with gr.Column(elem_classes=["viewport-container"]):
+                            shared.gradio['image_output_gallery'] = gr.Gallery(label="Output", show_label=False, columns=2, rows=2, height="80vh", object_fit="contain", preview=True, elem_id="image-output-gallery")
+
+            # TAB 2: GALLERY (with pagination)
+            with gr.TabItem("Gallery"):
+                with gr.Row():
+                    with gr.Column(scale=3):
+                        # Pagination controls
+                        with gr.Row():
+                            shared.gradio['image_refresh_history'] = gr.Button("🔄 Refresh", elem_classes="refresh-button")
+                            shared.gradio['image_prev_page'] = gr.Button("◀ Prev", elem_classes="refresh-button")
+                            shared.gradio['image_page_info'] = gr.Markdown(value=get_initial_page_info, elem_id="image-page-info")
+                            shared.gradio['image_next_page'] = gr.Button("Next ▶", elem_classes="refresh-button")
+                            shared.gradio['image_page_input'] = gr.Number(value=1, label="Page", precision=0, minimum=1, scale=0, min_width=80)
+                            shared.gradio['image_go_to_page'] = gr.Button("Go", elem_classes="refresh-button", scale=0, min_width=50)
+
+                        # State for current page and selected image path
+                        shared.gradio['image_current_page'] = gr.State(value=0)
+                        shared.gradio['image_selected_path'] = gr.State(value="")
+
+                        # Paginated gallery using gr.Gallery
+                        shared.gradio['image_history_gallery'] = gr.Gallery(
+                            value=lambda: get_paginated_images(0)[0],
+                            label="Image History",
+                            show_label=False,
+                            columns=6,
+                            object_fit="cover",
+                            height="auto",
+                            allow_preview=True,
+                            elem_id="image-history-gallery"
+                        )
+
+                    with gr.Column(scale=1):
+                        gr.Markdown("### Selected Image")
+                        shared.gradio['image_settings_display'] = gr.Markdown("Select an image to view its settings")
+                        shared.gradio['image_send_to_generate'] = gr.Button("Send to Generate", variant="primary")
+                        shared.gradio['image_gallery_status'] = gr.Markdown("")
+
+                        gr.Markdown("### Import Image")
+                        shared.gradio['image_drop_upload'] = gr.Image(
+                            label="Drop image here to view settings",
+                            type="filepath",
+                            height=150
+                        )
+
+            # TAB 3: MODEL
+            with gr.TabItem("Model"):
+                with gr.Row():
+                    with gr.Column():
+                        with gr.Row():
+                            shared.gradio['image_model_menu'] = gr.Dropdown(
+                                choices=utils.get_available_image_models(),
+                                value=shared.settings['image_model_menu'],
+                                label='Model',
+                                elem_classes='slim-dropdown'
+                            )
+                            shared.gradio['image_refresh_models'] = gr.Button("🔄", elem_classes='refresh-button', scale=0, min_width=40)
+                            shared.gradio['image_load_model'] = gr.Button("Load", variant='primary', elem_classes='refresh-button')
+                            shared.gradio['image_unload_model'] = gr.Button("Unload", elem_classes='refresh-button')
+
+                        gr.Markdown("## Settings")
+                        with gr.Row():
+                            with gr.Column():
+                                shared.gradio['image_quant'] = gr.Dropdown(
+                                    label='Quantization',
+                                    choices=['none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'],
+                                    value=shared.settings['image_quant'],
+                                    info='Quantization method for reduced VRAM usage. Quanto supports lower precisions (2-bit, 4-bit, 8-bit).'
+                                )
+
+                                shared.gradio['image_dtype'] = gr.Dropdown(
+                                    choices=['bfloat16', 'float16'],
+                                    value=shared.settings['image_dtype'],
+                                    label='Data Type',
+                                    info='bfloat16 recommended for modern GPUs'
+                                )
+                                shared.gradio['image_attn_backend'] = gr.Dropdown(
+                                    choices=['sdpa', 'flash_attention_2', 'flash_attention_3'],
+                                    value=shared.settings['image_attn_backend'],
+                                    label='Attention Backend',
+                                    info='SDPA is default. Flash Attention requires compatible GPU.'
+                                )
+                            with gr.Column():
+                                shared.gradio['image_compile'] = gr.Checkbox(
+                                    value=shared.settings['image_compile'],
+                                    label='Compile Model',
+                                    info='Faster inference after first run. First run will be slow.'
+                                )
+                                shared.gradio['image_cpu_offload'] = gr.Checkbox(
+                                    value=shared.settings['image_cpu_offload'],
+                                    label='CPU Offload',
+                                    info='Enable for low VRAM GPUs. Slower but uses less memory.'
+                                )
+
+                    with gr.Column():
+                        shared.gradio['image_download_path'] = gr.Textbox(
+                            label="Download model",
+                            placeholder="Tongyi-MAI/Z-Image-Turbo",
+                            info="Enter HuggingFace path. Use : for branch, e.g. user/model:main"
+                        )
+                        shared.gradio['image_download_btn'] = gr.Button("Download", variant='primary')
+                        shared.gradio['image_model_status'] = gr.Markdown(
+                            value=f"Model: **{shared.settings['image_model_menu']}** (not loaded)" if shared.settings['image_model_menu'] != 'None' else "No model selected"
+                        )
+
+
+def create_event_handlers():
+    # Dimension controls
+    shared.gradio['image_aspect_ratio'].change(
+        apply_aspect_ratio,
+        gradio('image_aspect_ratio', 'image_width', 'image_height'),
+        gradio('image_width', 'image_height'),
+        show_progress=False
+    )
+
+    shared.gradio['image_width'].release(
+        update_height_from_width,
+        gradio('image_width', 'image_aspect_ratio'),
+        gradio('image_height'),
+        show_progress=False
+    )
+
+    shared.gradio['image_height'].release(
+        update_width_from_height,
+        gradio('image_height', 'image_aspect_ratio'),
+        gradio('image_width'),
+        show_progress=False
+    )
+
+    shared.gradio['image_swap_btn'].click(
+        swap_dimensions_and_update_ratio,
+        gradio('image_width', 'image_height', 'image_aspect_ratio'),
+        gradio('image_width', 'image_height', 'image_aspect_ratio'),
+        show_progress=False
+    )
+
+    # Generation
+    shared.gradio['image_generate_btn'].click(
+        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(
+        generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then(
+        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))
+
+    shared.gradio['image_prompt'].submit(
+        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(
+        generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then(
+        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))
+
+    shared.gradio['image_neg_prompt'].submit(
+        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+        lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(
+        generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then(
+        lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))
+
+    # Stop button
+    shared.gradio['image_stop_btn'].click(
+        stop_everything_event, None, None, show_progress=False
+    )
+
+    # Model management
+    shared.gradio['image_refresh_models'].click(
+        lambda: gr.update(choices=utils.get_available_image_models()),
+        None,
+        gradio('image_model_menu'),
+        show_progress=False
+    )
+
+    shared.gradio['image_load_model'].click(
+        load_image_model_wrapper,
+        gradio('image_model_menu', 'image_dtype', 'image_attn_backend', 'image_cpu_offload', 'image_compile', 'image_quant'),
+        gradio('image_model_status'),
+        show_progress=True
+    )
+
+    shared.gradio['image_unload_model'].click(
+        unload_image_model_wrapper,
+        None,
+        gradio('image_model_status'),
+        show_progress=False
+    )
+
+    shared.gradio['image_download_btn'].click(
+        download_image_model_wrapper,
+        gradio('image_download_path'),
+        gradio('image_model_status', 'image_model_menu'),
+        show_progress=True
+    )
+
+    # Gallery pagination handlers
+    shared.gradio['image_refresh_history'].click(
+        refresh_gallery,
+        gradio('image_current_page'),
+        gradio('image_history_gallery', 'image_current_page', 'image_page_info'),
+        show_progress=False
+    )
+
+    shared.gradio['image_next_page'].click(
+        next_page,
+        gradio('image_current_page'),
+        gradio('image_history_gallery', 'image_current_page', 'image_page_info'),
+        show_progress=False
+    )
+
+    shared.gradio['image_prev_page'].click(
+        prev_page,
+        gradio('image_current_page'),
+        gradio('image_history_gallery', 'image_current_page', 'image_page_info'),
+        show_progress=False
+    )
+
+    shared.gradio['image_go_to_page'].click(
+        go_to_page,
+        gradio('image_page_input', 'image_current_page'),
+        gradio('image_history_gallery', 'image_current_page', 'image_page_info'),
+        show_progress=False
+    )
+
+    # Image selection from gallery
+    shared.gradio['image_history_gallery'].select(
+        on_gallery_select,
+        gradio('image_current_page'),
+        gradio('image_selected_path', 'image_settings_display'),
+        show_progress=False
+    )
+
+    # Send to Generate
+    shared.gradio['image_send_to_generate'].click(
+        send_to_generate,
+        gradio('image_selected_path'),
+        gradio(
+            'image_prompt',
+            'image_neg_prompt',
+            'image_width',
+            'image_height',
+            'image_aspect_ratio',
+            'image_steps',
+            'image_seed',
+            'image_batch_size',
+            'image_batch_count',
+            'image_cfg_scale',
+            'image_gallery_status'
+        ),
+        show_progress=False
+    )
+
+    shared.gradio['image_drop_upload'].change(
+        read_dropped_image_metadata,
+        gradio('image_drop_upload'),
+        gradio('image_settings_display'),
+        show_progress=False
+    )
+
+
+def generate(state):
+    """
+    Generate images using the loaded model.
+    Automatically adjusts parameters based on pipeline type.
+    """
+    import torch
+
+    from modules.torch_utils import clear_torch_cache
+
+    clear_torch_cache()
+
+    try:
+        model_name = state['image_model_menu']
+
+        if not model_name or model_name == 'None':
+            logger.error("No image model selected. Go to the Model tab and select a model.")
+            return []
+
+        if shared.image_model is None:
+            result = load_image_model(
+                model_name,
+                dtype=state['image_dtype'],
+                attn_backend=state['image_attn_backend'],
+                cpu_offload=state['image_cpu_offload'],
+                compile_model=state['image_compile'],
+                quant_method=state['image_quant']
+            )
+            if result is None:
+                logger.error(f"Failed to load model `{model_name}`.")
+                return []
+
+            shared.image_model_name = model_name
+
+        seed = state['image_seed']
+        if seed == -1:
+            seed = np.random.randint(0, 2**32 - 1)
+
+        device = get_device()
+        if device is None:
+            device = "cpu"
+        generator = torch.Generator(device).manual_seed(int(seed))
+
+        all_images = []
+
+        # Get pipeline type for parameter adjustment
+        pipeline_type = getattr(shared, 'image_pipeline_type', None)
+        if pipeline_type is None:
+            pipeline_type = get_pipeline_type(shared.image_model)
+
+        # Process Prompt
+        prompt = state['image_prompt']
+
+        # Apply "Positive Magic" for Qwen models only
+        if pipeline_type == 'qwenimage':
+            magic_suffix = ", Ultra HD, 4K, cinematic composition"
+            # Avoid duplication if user already added it
+            if magic_suffix.strip(", ") not in prompt:
+                prompt += magic_suffix
+
+        # Reset stop flag at start
+        shared.stop_everything = False
+
+        # Callback to check for interruption during diffusion steps
+        def interrupt_callback(pipe, step_index, timestep, callback_kwargs):
+            if shared.stop_everything:
+                pipe._interrupt = True
+
+            return callback_kwargs
+
+        # Build generation kwargs
+        gen_kwargs = {
+            "prompt": prompt,
+            "negative_prompt": state['image_neg_prompt'],
+            "height": int(state['image_height']),
+            "width": int(state['image_width']),
+            "num_inference_steps": int(state['image_steps']),
+            "num_images_per_prompt": int(state['image_batch_size']),
+            "generator": generator,
+            "callback_on_step_end": interrupt_callback,
+        }
+
+        # Add pipeline-specific parameters for CFG
+        cfg_val = state.get('image_cfg_scale', 0.0)
+
+        if pipeline_type == 'qwenimage':
+            # Qwen-Image uses true_cfg_scale (typically 4.0)
+            gen_kwargs["true_cfg_scale"] = cfg_val
+        else:
+            # Z-Image and others use guidance_scale (typically 0.0 for Turbo)
+            gen_kwargs["guidance_scale"] = cfg_val
+
+        t0 = time.time()
+        for i in range(int(state['image_batch_count'])):
+            if shared.stop_everything:
+                break
+
+            generator.manual_seed(int(seed + i))
+            batch_results = shared.image_model(**gen_kwargs).images
+            all_images.extend(batch_results)
+
+        t1 = time.time()
+        save_generated_images(all_images, state, seed)
+
+        total_images = int(state['image_batch_count']) * int(state['image_batch_size'])
+        total_steps = state["image_steps"] * int(state['image_batch_count'])
+        logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})')
+
+        return all_images
+
+    except Exception as e:
+        logger.error(f"Image generation failed: {e}")
+        traceback.print_exc()
+        return []
+
+
+def load_image_model_wrapper(model_name, dtype, attn_backend, cpu_offload, compile_model, quant_method):
+    if not model_name or model_name == 'None':
+        yield "No model selected"
+        return
+
+    try:
+        yield f"Loading `{model_name}`..."
+        unload_image_model()
+
+        result = load_image_model(
+            model_name,
+            dtype=dtype,
+            attn_backend=attn_backend,
+            cpu_offload=cpu_offload,
+            compile_model=compile_model,
+            quant_method=quant_method
+        )
+
+        if result is not None:
+            shared.image_model_name = model_name
+            yield f"✓ Loaded **{model_name}** (quantization: {quant_method})"
+        else:
+            yield f"✗ Failed to load `{model_name}`"
+    except Exception:
+        yield f"Error:\n```\n{traceback.format_exc()}\n```"
+
+
+def unload_image_model_wrapper():
+    previous_name = shared.image_model_name
+    unload_image_model()
+    if previous_name != 'None':
+        return f"Model: **{previous_name}** (unloaded)"
+    return "No model loaded"
+
+
+def download_image_model_wrapper(model_path):
+    from huggingface_hub import snapshot_download
+
+    if not model_path:
+        yield "No model specified", gr.update()
+        return
+
+    try:
+        model_path = model_path.strip()
+        if model_path.startswith('https://huggingface.co/'):
+            model_path = model_path[len('https://huggingface.co/'):]
+        elif model_path.startswith('huggingface.co/'):
+            model_path = model_path[len('huggingface.co/'):]
+
+        if ':' in model_path:
+            model_id, branch = model_path.rsplit(':', 1)
+        else:
+            model_id, branch = model_path, 'main'
+
+        folder_name = model_id.replace('/', '_')
+        output_folder = Path(shared.args.image_model_dir) / folder_name
+
+        yield f"Downloading `{model_id}` (branch: {branch})...", gr.update()
+
+        snapshot_download(
+            repo_id=model_id,
+            revision=branch,
+            local_dir=output_folder,
+            local_dir_use_symlinks=False,
+        )
+
+        new_choices = utils.get_available_image_models()
+        yield f"✓ Downloaded to `{output_folder}`", gr.update(choices=new_choices, value=folder_name)
+    except Exception:
+        yield f"Error:\n```\n{traceback.format_exc()}\n```", gr.update()
diff --git a/modules/utils.py b/modules/utils.py
index e8d23a02..b478f066 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -86,7 +86,7 @@ def check_model_loaded():
     return True, None
 
 
-def resolve_model_path(model_name_or_path):
+def resolve_model_path(model_name_or_path, image_model=False):
     """
     Resolves a model path, checking for a direct path
     before the default models directory.
@@ -95,6 +95,8 @@ def resolve_model_path(model_name_or_path):
     path_candidate = Path(model_name_or_path)
     if path_candidate.exists():
         return path_candidate
+    elif image_model:
+        return Path(f'{shared.args.image_model_dir}/{model_name_or_path}')
     else:
         return Path(f'{shared.args.model_dir}/{model_name_or_path}')
 
@@ -153,6 +155,24 @@ def get_available_models():
     return filtered_gguf_files + model_dirs
 
 
+def get_available_image_models():
+    model_dir = Path(shared.args.image_model_dir)
+    model_dir.mkdir(parents=True, exist_ok=True)
+
+    # Find valid model directories
+    model_dirs = []
+    for item in os.listdir(model_dir):
+        item_path = model_dir / item
+        if not item_path.is_dir():
+            continue
+
+        model_dirs.append(item)
+
+    model_dirs = sorted(model_dirs, key=natural_keys)
+
+    return model_dirs
+
+
 def get_available_ggufs():
     model_list = []
     model_dir = Path(shared.args.model_dir)
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 4be87c56..7076df4f 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -11,6 +11,7 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
+optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
@@ -34,6 +35,9 @@ wandb
 gradio==4.37.*
 https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
 
+# Diffusers
+diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
+
 # API
 flask_cloudflared==0.0.14
 sse-starlette==1.6.5
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index d9bf64cb..d6264089 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -9,6 +9,7 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
+optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
@@ -32,6 +33,9 @@ wandb
 gradio==4.37.*
 https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
 
+# Diffusers
+diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
+
 # API
 flask_cloudflared==0.0.14
 sse-starlette==1.6.5
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 2c06c22f..985511fa 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -9,6 +9,7 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
+optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
@@ -32,6 +33,9 @@ wandb
 gradio==4.37.*
 https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
 
+# Diffusers
+diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
+
 # API
 flask_cloudflared==0.0.14
 sse-starlette==1.6.5
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index a4929d40..1c75806d 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -9,6 +9,7 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
+optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
@@ -32,6 +33,9 @@ wandb
 gradio==4.37.*
 https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
 
+# Diffusers
+diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
+
 # API
 flask_cloudflared==0.0.14
 sse-starlette==1.6.5
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 6a7ce8a6..0f536b6a 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -9,6 +9,7 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
+optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
@@ -32,6 +33,9 @@ wandb
 gradio==4.37.*
 https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
 
+# Diffusers
+diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
+
 # API
 flask_cloudflared==0.0.14
 sse-starlette==1.6.5
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 05957700..0f8970d0 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -9,6 +9,7 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
+optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
@@ -32,6 +33,9 @@ wandb
 gradio==4.37.*
 https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
 
+# Diffusers
+diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
+
 # API
 flask_cloudflared==0.0.14
 sse-starlette==1.6.5
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 9e43c409..e862c1a1 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -9,6 +9,7 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
+optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
@@ -32,6 +33,9 @@ wandb
 gradio==4.37.*
 https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
 
+# Diffusers
+diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
+
 # API
 flask_cloudflared==0.0.14
 sse-starlette==1.6.5
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 9931e1dc..a734ce82 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -11,6 +11,7 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
+optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
@@ -34,6 +35,9 @@ wandb
 gradio==4.37.*
 https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
 
+# Diffusers
+diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
+
 # API
 flask_cloudflared==0.0.14
 sse-starlette==1.6.5
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 818d2244..8561462c 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -9,6 +9,7 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
+optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
@@ -32,6 +33,9 @@ wandb
 gradio==4.37.*
 https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
 
+# Diffusers
+diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
+
 # API
 flask_cloudflared==0.0.14
 sse-starlette==1.6.5
diff --git a/server.py b/server.py
index c804c342..b02c50a2 100644
--- a/server.py
+++ b/server.py
@@ -5,6 +5,7 @@ from pathlib import Path
 
 from modules import shared
 from modules.block_requests import OpenMonkeyPatch, RequestBlocker
+from modules.image_models import load_image_model
 from modules.logging_colors import logger
 from modules.prompts import load_prompt
 
@@ -50,6 +51,7 @@ from modules import (
     ui_chat,
     ui_default,
     ui_file_saving,
+    ui_image_generation,
     ui_model_menu,
     ui_notebook,
     ui_parameters,
@@ -163,6 +165,7 @@ def create_interface():
         ui_chat.create_character_settings_ui()  # Character tab
         ui_model_menu.create_ui()  # Model tab
         if not shared.args.portable:
+            ui_image_generation.create_ui()  # Image generation tab
             training.create_ui()  # Training tab
         ui_session.create_ui()  # Session tab
 
@@ -170,6 +173,8 @@ def create_interface():
         ui_chat.create_event_handlers()
         ui_default.create_event_handlers()
         ui_notebook.create_event_handlers()
+        if not shared.args.portable:
+            ui_image_generation.create_event_handlers()
 
         # Other events
         ui_file_saving.create_event_handlers()
@@ -256,6 +261,9 @@ if __name__ == "__main__":
         if new_settings:
             shared.settings.update(new_settings)
 
+    # Apply CLI overrides for image model settings (CLI flags take precedence over saved settings)
+    shared.apply_image_model_cli_overrides()
+
     # Fallback settings for models
     shared.model_config['.*'] = get_fallback_settings()
     shared.model_config.move_to_end('.*', last=False)  # Move to the beginning
@@ -313,6 +321,22 @@ if __name__ == "__main__":
         if shared.args.lora:
             add_lora_to_model(shared.args.lora)
 
+    # Load image model if specified via CLI
+    if shared.args.image_model:
+        logger.info(f"Loading image model: {shared.args.image_model}")
+        result = load_image_model(
+            shared.args.image_model,
+            dtype=shared.settings.get('image_dtype', 'bfloat16'),
+            attn_backend=shared.settings.get('image_attn_backend', 'sdpa'),
+            cpu_offload=shared.settings.get('image_cpu_offload', False),
+            compile_model=shared.settings.get('image_compile', False),
+            quant_method=shared.settings.get('image_quant', 'none')
+        )
+        if result is not None:
+            shared.image_model_name = shared.args.image_model
+        else:
+            logger.error(f"Failed to load image model: {shared.args.image_model}")
+
     shared.generation_lock = Lock()
 
     if shared.args.idle_timeout > 0:

From 6291e72129aa807768aa86390498613ee1408419 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 2 Dec 2025 09:57:04 -0800
Subject: [PATCH 04/61] Remove quanto for now (requires messy compilation)

---
 modules/image_models.py                       | 28 ++-----------------
 modules/shared.py                             |  2 +-
 modules/ui_image_generation.py                |  2 +-
 requirements/full/requirements.txt            |  1 -
 requirements/full/requirements_amd.txt        |  1 -
 requirements/full/requirements_amd_noavx2.txt |  1 -
 .../full/requirements_apple_intel.txt         |  1 -
 .../full/requirements_apple_silicon.txt       |  1 -
 requirements/full/requirements_cpu_only.txt   |  1 -
 .../full/requirements_cpu_only_noavx2.txt     |  1 -
 requirements/full/requirements_noavx2.txt     |  1 -
 requirements/full/requirements_nowheels.txt   |  1 -
 12 files changed, 4 insertions(+), 37 deletions(-)

diff --git a/modules/image_models.py b/modules/image_models.py
index e6f9a172..4d7cc5f9 100644
--- a/modules/image_models.py
+++ b/modules/image_models.py
@@ -11,7 +11,7 @@ def get_quantization_config(quant_method):
     Get the appropriate quantization config based on the selected method.
 
     Args:
-        quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'
+        quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit'
 
     Returns:
         PipelineQuantizationConfig or None
@@ -46,30 +46,6 @@ def get_quantization_config(quant_method):
             }
         )
 
-    # Quanto 8-bit quantization
-    elif quant_method == 'quanto-8bit':
-        return PipelineQuantizationConfig(
-            quant_mapping={
-                "transformer": QuantoConfig(weights_dtype="int8")
-            }
-        )
-
-    # Quanto 4-bit quantization
-    elif quant_method == 'quanto-4bit':
-        return PipelineQuantizationConfig(
-            quant_mapping={
-                "transformer": QuantoConfig(weights_dtype="int4")
-            }
-        )
-
-    # Quanto 2-bit quantization
-    elif quant_method == 'quanto-2bit':
-        return PipelineQuantizationConfig(
-            quant_mapping={
-                "transformer": QuantoConfig(weights_dtype="int2")
-            }
-        )
-
     else:
         logger.warning(f"Unknown quantization method: {quant_method}. Loading without quantization.")
         return None
@@ -101,7 +77,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
         attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3'
         cpu_offload: Enable CPU offloading for low VRAM
         compile_model: Compile the model for faster inference (slow first run)
-        quant_method: Quantization method - 'none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'
+        quant_method: Quantization method - 'none', 'bnb-8bit', 'bnb-4bit'
     """
     import torch
     from diffusers import DiffusionPipeline
diff --git a/modules/shared.py b/modules/shared.py
index 316f7729..a96cd70c 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -60,7 +60,7 @@ group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdp
 group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.')
 group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.')
 group.add_argument('--image-quant', type=str, default=None,
-                   choices=['none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'],
+                   choices=['none', 'bnb-8bit', 'bnb-4bit'],
                    help='Quantization method for image model.')
 
 # Model loader
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 92c447c8..5ef66820 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -471,7 +471,7 @@ def create_ui():
                             with gr.Column():
                                 shared.gradio['image_quant'] = gr.Dropdown(
                                     label='Quantization',
-                                    choices=['none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'],
+                                    choices=['none', 'bnb-8bit', 'bnb-4bit'],
                                     value=shared.settings['image_quant'],
                                     info='Quantization method for reduced VRAM usage. Quanto supports lower precisions (2-bit, 4-bit, 8-bit).'
                                 )
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 7076df4f..a031bbab 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -11,7 +11,6 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
-optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index d6264089..ee8d67ac 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -9,7 +9,6 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
-optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 985511fa..7b36b151 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -9,7 +9,6 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
-optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 1c75806d..4f72d5ac 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -9,7 +9,6 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
-optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 0f536b6a..7942b9f0 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -9,7 +9,6 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
-optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 0f8970d0..96013c35 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -9,7 +9,6 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
-optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index e862c1a1..0813f06c 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -9,7 +9,6 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
-optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index a734ce82..ea7edf6c 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -11,7 +11,6 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
-optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 8561462c..15247d72 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -9,7 +9,6 @@ huggingface-hub==0.36.0
 jinja2==3.1.6
 markdown
 numpy==2.2.*
-optimum-quanto==0.2.7
 pandas
 peft==0.18.*
 Pillow>=9.5.0

From 9d07d3a229e3e949cfb32e9ef1fe32b328759985 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 2 Dec 2025 10:06:57 -0800
Subject: [PATCH 05/61] Make portable builds functional again after
 b3666e140de349a651aea22a6f418572925a5c62

---
 modules/image_models.py        |  3 +-
 modules/ui.py                  | 77 ++++++++++++++++++----------------
 modules/ui_image_generation.py |  3 +-
 3 files changed, 44 insertions(+), 39 deletions(-)

diff --git a/modules/image_models.py b/modules/image_models.py
index 4d7cc5f9..f2e0276a 100644
--- a/modules/image_models.py
+++ b/modules/image_models.py
@@ -2,7 +2,6 @@ import time
 
 import modules.shared as shared
 from modules.logging_colors import logger
-from modules.torch_utils import get_device
 from modules.utils import resolve_model_path
 
 
@@ -82,6 +81,8 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
     import torch
     from diffusers import DiffusionPipeline
 
+    from modules.torch_utils import get_device
+
     logger.info(f"Loading image model \"{model_name}\" with quantization: {quant_method}")
     t0 = time.time()
 
diff --git a/modules/ui.py b/modules/ui.py
index 9700d297..ff5686e8 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -280,25 +280,26 @@ def list_interface_input_elements():
         'include_past_attachments',
     ]
 
-    # Image generation elements
-    elements += [
-        'image_prompt',
-        'image_neg_prompt',
-        'image_width',
-        'image_height',
-        'image_aspect_ratio',
-        'image_steps',
-        'image_cfg_scale',
-        'image_seed',
-        'image_batch_size',
-        'image_batch_count',
-        'image_model_menu',
-        'image_dtype',
-        'image_attn_backend',
-        'image_compile',
-        'image_cpu_offload',
-        'image_quant',
-    ]
+    if not shared.args.portable:
+        # Image generation elements
+        elements += [
+            'image_prompt',
+            'image_neg_prompt',
+            'image_width',
+            'image_height',
+            'image_aspect_ratio',
+            'image_steps',
+            'image_cfg_scale',
+            'image_seed',
+            'image_batch_size',
+            'image_batch_count',
+            'image_model_menu',
+            'image_dtype',
+            'image_attn_backend',
+            'image_compile',
+            'image_cpu_offload',
+            'image_quant',
+        ]
 
     return elements
 
@@ -531,25 +532,29 @@ def setup_auto_save():
         'paste_to_attachment',
         'include_past_attachments',
 
-        # Image generation tab (ui_image_generation.py)
-        'image_prompt',
-        'image_neg_prompt',
-        'image_width',
-        'image_height',
-        'image_aspect_ratio',
-        'image_steps',
-        'image_cfg_scale',
-        'image_seed',
-        'image_batch_size',
-        'image_batch_count',
-        'image_model_menu',
-        'image_dtype',
-        'image_attn_backend',
-        'image_compile',
-        'image_cpu_offload',
-        'image_quant',
     ]
 
+    if not shared.args.portable:
+        # Image generation tab (ui_image_generation.py)
+        change_elements += [
+            'image_prompt',
+            'image_neg_prompt',
+            'image_width',
+            'image_height',
+            'image_aspect_ratio',
+            'image_steps',
+            'image_cfg_scale',
+            'image_seed',
+            'image_batch_size',
+            'image_batch_count',
+            'image_model_menu',
+            'image_dtype',
+            'image_attn_backend',
+            'image_compile',
+            'image_cpu_offload',
+            'image_quant',
+        ]
+
     for element_name in change_elements:
         if element_name in shared.gradio:
             shared.gradio[element_name].change(
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 5ef66820..4f90d352 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -18,7 +18,6 @@ from modules.image_models import (
 )
 from modules.logging_colors import logger
 from modules.text_generation import stop_everything_event
-from modules.torch_utils import get_device
 from modules.utils import gradio
 
 ASPECT_RATIOS = {
@@ -667,7 +666,7 @@ def generate(state):
     """
     import torch
 
-    from modules.torch_utils import clear_torch_cache
+    from modules.torch_utils import clear_torch_cache, get_device
 
     clear_torch_cache()
 

From 97281ff83120c20180840ce8db3be80a255c861e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 2 Dec 2025 11:20:52 -0800
Subject: [PATCH 06/61] UI: Fix an index error in the new image gallery

---
 modules/ui_image_generation.py | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 4f90d352..8ec879e9 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -290,8 +290,11 @@ def on_gallery_select(evt: gr.SelectData, current_page):
     if evt.index is None:
         return "", "Select an image to view its settings"
 
+    if not _image_cache:
+        get_all_history_images()
+
     # Get the current page's images to find the actual file path
-    all_images = get_all_history_images()
+    all_images = _image_cache
     total_images = len(all_images)
 
     # Calculate the actual index in the full list
@@ -506,9 +509,7 @@ def create_ui():
                             info="Enter HuggingFace path. Use : for branch, e.g. user/model:main"
                         )
                         shared.gradio['image_download_btn'] = gr.Button("Download", variant='primary')
-                        shared.gradio['image_model_status'] = gr.Markdown(
-                            value=f"Model: **{shared.settings['image_model_menu']}** (not loaded)" if shared.settings['image_model_menu'] != 'None' else "No model selected"
-                        )
+                        shared.gradio['image_model_status'] = gr.Markdown(value="")
 
 
 def create_event_handlers():

From 9448bf1caa8ad5bd704ae2610bc0b2738c9ca51f Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 2 Dec 2025 14:22:51 -0800
Subject: [PATCH 07/61] Image generation: add torchao quantization (supports
 torch.compile)

---
 modules/image_models.py                       | 31 +++++++++++++++++--
 modules/shared.py                             |  2 +-
 modules/ui_image_generation.py                |  4 +--
 requirements/full/requirements.txt            |  1 +
 requirements/full/requirements_amd.txt        |  1 +
 requirements/full/requirements_amd_noavx2.txt |  1 +
 .../full/requirements_apple_intel.txt         |  1 +
 .../full/requirements_apple_silicon.txt       |  1 +
 requirements/full/requirements_cpu_only.txt   |  1 +
 .../full/requirements_cpu_only_noavx2.txt     |  1 +
 requirements/full/requirements_noavx2.txt     |  1 +
 requirements/full/requirements_nowheels.txt   |  1 +
 12 files changed, 40 insertions(+), 6 deletions(-)

diff --git a/modules/image_models.py b/modules/image_models.py
index f2e0276a..2ef1e730 100644
--- a/modules/image_models.py
+++ b/modules/image_models.py
@@ -10,13 +10,14 @@ def get_quantization_config(quant_method):
     Get the appropriate quantization config based on the selected method.
 
     Args:
-        quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit'
+        quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit',
+                      'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'
 
     Returns:
         PipelineQuantizationConfig or None
     """
     import torch
-    from diffusers import BitsAndBytesConfig, QuantoConfig
+    from diffusers import BitsAndBytesConfig, TorchAoConfig
     from diffusers.quantizers import PipelineQuantizationConfig
 
     if quant_method == 'none' or not quant_method:
@@ -45,6 +46,30 @@ def get_quantization_config(quant_method):
             }
         )
 
+    # torchao int8 weight-only
+    elif quant_method == 'torchao-int8wo':
+        return PipelineQuantizationConfig(
+            quant_mapping={
+                "transformer": TorchAoConfig("int8wo")
+            }
+        )
+
+    # torchao fp4 (e2m1)
+    elif quant_method == 'torchao-fp4':
+        return PipelineQuantizationConfig(
+            quant_mapping={
+                "transformer": TorchAoConfig("fp4_e2m1")
+            }
+        )
+
+    # torchao float8 weight-only
+    elif quant_method == 'torchao-float8wo':
+        return PipelineQuantizationConfig(
+            quant_mapping={
+                "transformer": TorchAoConfig("float8wo")
+            }
+        )
+
     else:
         logger.warning(f"Unknown quantization method: {quant_method}. Loading without quantization.")
         return None
@@ -76,7 +101,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
         attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3'
         cpu_offload: Enable CPU offloading for low VRAM
         compile_model: Compile the model for faster inference (slow first run)
-        quant_method: Quantization method - 'none', 'bnb-8bit', 'bnb-4bit'
+        quant_method: 'none', 'bnb-8bit', 'bnb-4bit', or torchao options (int8wo, fp4, float8wo)
     """
     import torch
     from diffusers import DiffusionPipeline
diff --git a/modules/shared.py b/modules/shared.py
index a96cd70c..4a0fd986 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -60,7 +60,7 @@ group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdp
 group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.')
 group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.')
 group.add_argument('--image-quant', type=str, default=None,
-                   choices=['none', 'bnb-8bit', 'bnb-4bit'],
+                   choices=['none', 'bnb-8bit', 'bnb-4bit', 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'],
                    help='Quantization method for image model.')
 
 # Model loader
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 8ec879e9..1b0e58f3 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -473,9 +473,9 @@ def create_ui():
                             with gr.Column():
                                 shared.gradio['image_quant'] = gr.Dropdown(
                                     label='Quantization',
-                                    choices=['none', 'bnb-8bit', 'bnb-4bit'],
+                                    choices=['none', 'bnb-8bit', 'bnb-4bit', 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'],
                                     value=shared.settings['image_quant'],
-                                    info='Quantization method for reduced VRAM usage. Quanto supports lower precisions (2-bit, 4-bit, 8-bit).'
+                                    info='BnB: bitsandbytes quantization. torchao: int8wo, fp4, float8wo.'
                                 )
 
                                 shared.gradio['image_dtype'] = gr.Dropdown(
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index a031bbab..4e02d76f 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -25,6 +25,7 @@ safetensors==0.6.*
 scipy
 sentencepiece
 tensorboard
+torchao==0.14.*
 transformers==4.57.*
 triton-windows==3.5.1.post21; platform_system == "Windows"
 tqdm
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index ee8d67ac..c0fbd9ab 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -23,6 +23,7 @@ safetensors==0.6.*
 scipy
 sentencepiece
 tensorboard
+torchao==0.14.*
 transformers==4.57.*
 triton-windows==3.5.1.post21; platform_system == "Windows"
 tqdm
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 7b36b151..b330646a 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -23,6 +23,7 @@ safetensors==0.6.*
 scipy
 sentencepiece
 tensorboard
+torchao==0.14.*
 transformers==4.57.*
 triton-windows==3.5.1.post21; platform_system == "Windows"
 tqdm
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 4f72d5ac..c2c64337 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -23,6 +23,7 @@ safetensors==0.6.*
 scipy
 sentencepiece
 tensorboard
+torchao==0.14.*
 transformers==4.57.*
 triton-windows==3.5.1.post21; platform_system == "Windows"
 tqdm
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 7942b9f0..10889215 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -23,6 +23,7 @@ safetensors==0.6.*
 scipy
 sentencepiece
 tensorboard
+torchao==0.14.*
 transformers==4.57.*
 triton-windows==3.5.1.post21; platform_system == "Windows"
 tqdm
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 96013c35..ffcd6473 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -23,6 +23,7 @@ safetensors==0.6.*
 scipy
 sentencepiece
 tensorboard
+torchao==0.14.*
 transformers==4.57.*
 triton-windows==3.5.1.post21; platform_system == "Windows"
 tqdm
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 0813f06c..daa2444f 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -23,6 +23,7 @@ safetensors==0.6.*
 scipy
 sentencepiece
 tensorboard
+torchao==0.14.*
 transformers==4.57.*
 triton-windows==3.5.1.post21; platform_system == "Windows"
 tqdm
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index ea7edf6c..a43a7724 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -25,6 +25,7 @@ safetensors==0.6.*
 scipy
 sentencepiece
 tensorboard
+torchao==0.14.*
 transformers==4.57.*
 triton-windows==3.5.1.post21; platform_system == "Windows"
 tqdm
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 15247d72..ac24f0cb 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -23,6 +23,7 @@ safetensors==0.6.*
 scipy
 sentencepiece
 tensorboard
+torchao==0.14.*
 transformers==4.57.*
 triton-windows==3.5.1.post21; platform_system == "Windows"
 tqdm

From 5433ef33331b3b24da9d58b73287398457459de1 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 3 Dec 2025 11:50:35 -0800
Subject: [PATCH 08/61] Add an API endpoint for generating images

---
 extensions/openai/images.py | 184 ++++++++++++++++++++++++++----------
 extensions/openai/script.py |  48 +++++-----
 extensions/openai/typing.py |  40 ++++++++
 3 files changed, 200 insertions(+), 72 deletions(-)

diff --git a/extensions/openai/images.py b/extensions/openai/images.py
index 92bd85f0..3a5288e6 100644
--- a/extensions/openai/images.py
+++ b/extensions/openai/images.py
@@ -1,70 +1,154 @@
+"""
+OpenAI-compatible image generation using local diffusion models.
+"""
+
+import base64
+import io
+import json
 import os
 import time
+from datetime import datetime
 
-import requests
-
+import numpy as np
 from extensions.openai.errors import ServiceUnavailableError
+from modules import shared
+from modules.logging_colors import logger
+from PIL.PngImagePlugin import PngInfo
 
 
-def generations(prompt: str, size: str, response_format: str, n: int):
-    # Stable Diffusion callout wrapper for txt2img
-    # Low effort implementation for compatibility. With only "prompt" being passed and assuming DALL-E
-    # the results will be limited and likely poor. SD has hundreds of models and dozens of settings.
-    # If you want high quality tailored results you should just use the Stable Diffusion API directly.
-    # it's too general an API to try and shape the result with specific tags like negative prompts
-    # or "masterpiece", etc. SD configuration is beyond the scope of this API.
-    # At this point I will not add the edits and variations endpoints (ie. img2img) because they
-    # require changing the form data handling to accept multipart form data, also to properly support
-    # url return types will require file management and a web serving files... Perhaps later!
-    base_model_size = 512 if 'SD_BASE_MODEL_SIZE' not in os.environ else int(os.environ.get('SD_BASE_MODEL_SIZE', 512))
-    sd_defaults = {
-        'sampler_name': 'DPM++ 2M Karras',  # vast improvement
-        'steps': 30,
+def generations(prompt: str, size: str, response_format: str, n: int,
+                negative_prompt: str = "", steps: int = 9, seed: int = -1,
+                cfg_scale: float = 0.0, batch_count: int = 1):
+    """
+    Generate images using the loaded diffusion model.
+
+    Args:
+        prompt: Text description of the desired image
+        size: Image dimensions as "WIDTHxHEIGHT"
+        response_format: 'url' or 'b64_json'
+        n: Number of images per batch
+        negative_prompt: What to avoid in the image
+        steps: Number of inference steps
+        seed: Random seed (-1 for random)
+        cfg_scale: Classifier-free guidance scale
+        batch_count: Number of sequential batches
+
+    Returns:
+        dict with 'created' timestamp and 'data' list of images
+    """
+    import torch
+    from modules.image_models import get_pipeline_type
+    from modules.torch_utils import clear_torch_cache, get_device
+
+    if shared.image_model is None:
+        raise ServiceUnavailableError("No image model loaded. Load a model via the UI first.")
+
+    clear_torch_cache()
+
+    # Parse dimensions
+    try:
+        width, height = [int(x) for x in size.split('x')]
+    except (ValueError, IndexError):
+        width, height = 1024, 1024
+
+    # Handle seed
+    if seed == -1:
+        seed = np.random.randint(0, 2**32 - 1)
+
+    device = get_device() or "cpu"
+    generator = torch.Generator(device).manual_seed(int(seed))
+
+    # Get pipeline type for CFG parameter name
+    pipeline_type = getattr(shared, 'image_pipeline_type', None) or get_pipeline_type(shared.image_model)
+
+    # Build generation kwargs
+    gen_kwargs = {
+        "prompt": prompt,
+        "negative_prompt": negative_prompt,
+        "height": height,
+        "width": width,
+        "num_inference_steps": steps,
+        "num_images_per_prompt": n,
+        "generator": generator,
     }
 
-    width, height = [int(x) for x in size.split('x')]  # ignore the restrictions on size
+    # Pipeline-specific CFG parameter
+    if pipeline_type == 'qwenimage':
+        gen_kwargs["true_cfg_scale"] = cfg_scale
+    else:
+        gen_kwargs["guidance_scale"] = cfg_scale
 
-    # to hack on better generation, edit default payload.
-    payload = {
-        'prompt': prompt,  # ignore prompt limit of 1000 characters
-        'width': width,
-        'height': height,
-        'batch_size': n,
-    }
-    payload.update(sd_defaults)
+    # Generate
+    all_images = []
+    t0 = time.time()
 
-    scale = min(width, height) / base_model_size
-    if scale >= 1.2:
-        # for better performance with the default size (1024), and larger res.
-        scaler = {
-            'width': width // scale,
-            'height': height // scale,
-            'hr_scale': scale,
-            'enable_hr': True,
-            'hr_upscaler': 'Latent',
-            'denoising_strength': 0.68,
-        }
-        payload.update(scaler)
+    shared.stop_everything = False
 
+    def interrupt_callback(pipe, step_index, timestep, callback_kwargs):
+        if shared.stop_everything:
+            pipe._interrupt = True
+        return callback_kwargs
+
+    gen_kwargs["callback_on_step_end"] = interrupt_callback
+
+    for i in range(batch_count):
+        if shared.stop_everything:
+            break
+        generator.manual_seed(int(seed + i))
+        batch_results = shared.image_model(**gen_kwargs).images
+        all_images.extend(batch_results)
+
+    t1 = time.time()
+    total_images = len(all_images)
+    total_steps = steps * batch_count
+    logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})')
+
+    # Save images
+    _save_images(all_images, prompt, negative_prompt, width, height, steps, seed, cfg_scale)
+
+    # Build response
     resp = {
         'created': int(time.time()),
         'data': []
     }
-    from extensions.openai.script import params
 
-    # TODO: support SD_WEBUI_AUTH username:password pair.
-    sd_url = f"{os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', ''))}/sdapi/v1/txt2img"
-
-    response = requests.post(url=sd_url, json=payload)
-    r = response.json()
-    if response.status_code != 200 or 'images' not in r:
-        print(r)
-        raise ServiceUnavailableError(r.get('error', 'Unknown error calling Stable Diffusion'), code=response.status_code, internal_message=r.get('errors', None))
-    # r['parameters']...
-    for b64_json in r['images']:
+    for img in all_images:
+        b64 = _image_to_base64(img)
         if response_format == 'b64_json':
-            resp['data'].extend([{'b64_json': b64_json}])
+            resp['data'].append({'b64_json': b64})
         else:
-            resp['data'].extend([{'url': f'data:image/png;base64,{b64_json}'}])  # yeah it's lazy. requests.get() will not work with this
+            resp['data'].append({'url': f'data:image/png;base64,{b64}'})
 
     return resp
+
+
+def _image_to_base64(image) -> str:
+    buffered = io.BytesIO()
+    image.save(buffered, format="PNG")
+    return base64.b64encode(buffered.getvalue()).decode('utf-8')
+
+
+def _save_images(images, prompt, negative_prompt, width, height, steps, seed, cfg_scale):
+    """Save images with metadata."""
+    date_str = datetime.now().strftime("%Y-%m-%d")
+    folder = os.path.join("user_data", "image_outputs", date_str)
+    os.makedirs(folder, exist_ok=True)
+
+    metadata = {
+        'image_prompt': prompt,
+        'image_neg_prompt': negative_prompt,
+        'image_width': width,
+        'image_height': height,
+        'image_steps': steps,
+        'image_seed': seed,
+        'image_cfg_scale': cfg_scale,
+        'model': getattr(shared, 'image_model_name', 'unknown'),
+    }
+
+    for idx, img in enumerate(images):
+        ts = datetime.now().strftime("%H-%M-%S")
+        filepath = os.path.join(folder, f"{ts}_{seed:010d}_{idx:03d}.png")
+
+        png_info = PngInfo()
+        png_info.add_text("image_gen_settings", json.dumps(metadata))
+        img.save(filepath, pnginfo=png_info)
diff --git a/extensions/openai/script.py b/extensions/openai/script.py
index 9440cb1e..1e982731 100644
--- a/extensions/openai/script.py
+++ b/extensions/openai/script.py
@@ -7,26 +7,23 @@ import traceback
 from collections import deque
 from threading import Thread
 
+import extensions.openai.completions as OAIcompletions
+import extensions.openai.logits as OAIlogits
+import extensions.openai.models as OAImodels
 import uvicorn
+from extensions.openai.tokens import token_count, token_decode, token_encode
+from extensions.openai.utils import _start_cloudflared
 from fastapi import Depends, FastAPI, Header, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.requests import Request
 from fastapi.responses import JSONResponse
-from pydub import AudioSegment
-from sse_starlette import EventSourceResponse
-from starlette.concurrency import iterate_in_threadpool
-
-import extensions.openai.completions as OAIcompletions
-import extensions.openai.images as OAIimages
-import extensions.openai.logits as OAIlogits
-import extensions.openai.models as OAImodels
-from extensions.openai.errors import ServiceUnavailableError
-from extensions.openai.tokens import token_count, token_decode, token_encode
-from extensions.openai.utils import _start_cloudflared
 from modules import shared
 from modules.logging_colors import logger
 from modules.models import unload_model
 from modules.text_generation import stop_everything_event
+from pydub import AudioSegment
+from sse_starlette import EventSourceResponse
+from starlette.concurrency import iterate_in_threadpool
 
 from .typing import (
     ChatCompletionRequest,
@@ -40,6 +37,8 @@ from .typing import (
     EmbeddingsResponse,
     EncodeRequest,
     EncodeResponse,
+    ImageGenerationRequest,
+    ImageGenerationResponse,
     LoadLorasRequest,
     LoadModelRequest,
     LogitsRequest,
@@ -228,19 +227,24 @@ async def handle_audio_transcription(request: Request):
     return JSONResponse(content=transcription)
 
 
-@app.post('/v1/images/generations', dependencies=check_key)
-async def handle_image_generation(request: Request):
+@app.post('/v1/images/generations', response_model=ImageGenerationResponse, dependencies=check_key)
+async def handle_image_generation(request_data: ImageGenerationRequest):
+    import extensions.openai.images as OAIimages
 
-    if not os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', '')):
-        raise ServiceUnavailableError("Stable Diffusion not available. SD_WEBUI_URL not set.")
+    width, height = request_data.get_width_height()
 
-    body = await request.json()
-    prompt = body['prompt']
-    size = body.get('size', '1024x1024')
-    response_format = body.get('response_format', 'url')  # or b64_json
-    n = body.get('n', 1)  # ignore the batch limits of max 10
-
-    response = await OAIimages.generations(prompt=prompt, size=size, response_format=response_format, n=n)
+    response = await asyncio.to_thread(
+        OAIimages.generations,
+        prompt=request_data.prompt,
+        size=f"{width}x{height}",
+        response_format=request_data.response_format,
+        n=request_data.batch_size,  # <-- use resolved batch_size
+        negative_prompt=request_data.negative_prompt,
+        steps=request_data.steps,
+        seed=request_data.seed,
+        cfg_scale=request_data.cfg_scale,
+        batch_count=request_data.batch_count,
+    )
     return JSONResponse(response)
 
 
diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py
index 56d91582..a24b844b 100644
--- a/extensions/openai/typing.py
+++ b/extensions/openai/typing.py
@@ -264,6 +264,46 @@ class LoadLorasRequest(BaseModel):
     lora_names: List[str]
 
 
+class ImageGenerationRequest(BaseModel):
+    """OpenAI-compatible image generation request with extended parameters."""
+    # Required
+    prompt: str
+
+    # Generation parameters
+    negative_prompt: str = ""
+    size: str = Field(default="1024x1024", description="'WIDTHxHEIGHT'")
+    steps: int = Field(default=9, ge=1)
+    cfg_scale: float = Field(default=0.0, ge=0.0)
+    seed: int = Field(default=-1, description="-1 for random")
+    batch_size: int | None = Field(default=None, ge=1, description="Parallel batch size (VRAM heavy)")
+    n: int = Field(default=1, ge=1, description="Alias for batch_size (OpenAI compatibility)")
+    batch_count: int = Field(default=1, ge=1, description="Sequential batch count")
+
+    # OpenAI compatibility (unused)
+    model: str | None = None
+    response_format: str = "b64_json"
+    user: str | None = None
+
+    @model_validator(mode='after')
+    def resolve_batch_size(self):
+        """Use batch_size if provided, otherwise fall back to n."""
+        if self.batch_size is None:
+            self.batch_size = self.n
+        return self
+
+    def get_width_height(self) -> tuple[int, int]:
+        try:
+            parts = self.size.lower().split('x')
+            return int(parts[0]), int(parts[1])
+        except (ValueError, IndexError):
+            return 1024, 1024
+
+
+class ImageGenerationResponse(BaseModel):
+    created: int = int(time.time())
+    data: List[dict]
+
+
 def to_json(obj):
     return json.dumps(obj.__dict__, indent=4)
 

From 5ad174fad2afbbe2c06e841c87e06515a33093ed Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 3 Dec 2025 11:58:54 -0800
Subject: [PATCH 09/61] docs: Add an image generation API example

---
 docs/12 - OpenAI API.md | 31 +++++++++++++++++++++++++++++--
 1 file changed, 29 insertions(+), 2 deletions(-)

diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md
index 227541a3..94a95b10 100644
--- a/docs/12 - OpenAI API.md	
+++ b/docs/12 - OpenAI API.md	
@@ -139,6 +139,35 @@ curl http://127.0.0.1:5000/v1/completions \
 
 For base64-encoded images, just replace the inner "url" values with this format: `data:image/FORMAT;base64,BASE64_STRING` where FORMAT is the file type (png, jpeg, gif, etc.) and BASE64_STRING is your base64-encoded image data.
 
+#### Image generation
+
+```shell
+curl http://127.0.0.1:5000/v1/images/generations \
+  -H "Content-Type: application/json" \
+  -d '{
+    "prompt": "an orange tree",
+    "steps": 9,
+    "cfg_scale": 0,
+    "batch_size": 1,
+    "batch_count": 1
+  }'
+```
+
+You need to load an image model first. You can do this via the UI, or by adding `--image-model your_model_name` when launching the server.
+
+The output is a JSON object containing a `data` array. Each element has a `b64_json` field with the base64-encoded PNG image:
+
+```json
+{
+  "created": 1764791227,
+  "data": [
+    {
+      "b64_json": "iVBORw0KGgo..."
+    }
+  ]
+}
+```
+
 #### SSE streaming
 
 ```shell
@@ -419,7 +448,6 @@ The following environment variables can be used (they take precedence over every
 | `OPENEDAI_CERT_PATH`      | SSL certificate file path         |            cert.pem                |
 | `OPENEDAI_KEY_PATH`       | SSL key file path                    |             key.pem               |
 | `OPENEDAI_DEBUG`          | Enable debugging (set to 1)    | 1                          |
-| `SD_WEBUI_URL`           | WebUI URL (used by endpoint) | http://127.0.0.1:7861 |
 | `OPENEDAI_EMBEDDING_MODEL` | Embedding model (if applicable) |          sentence-transformers/all-mpnet-base-v2                  |
 | `OPENEDAI_EMBEDDING_DEVICE` | Embedding device (if applicable) |           cuda                 |
 
@@ -430,7 +458,6 @@ You can also set the following variables in your `settings.yaml` file:
 ```
 openai-embedding_device: cuda
 openai-embedding_model: "sentence-transformers/all-mpnet-base-v2"
-openai-sd_webui_url: http://127.0.0.1:7861
 openai-debug: 1
 ```
 

From 4468c49439685dc8bc68e9d7a6109694a2eab72b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 3 Dec 2025 12:02:47 -0800
Subject: [PATCH 10/61] Add semaphore to image generation API endpoint

---
 extensions/openai/script.py | 31 ++++++++++++++++---------------
 1 file changed, 16 insertions(+), 15 deletions(-)

diff --git a/extensions/openai/script.py b/extensions/openai/script.py
index 1e982731..65805629 100644
--- a/extensions/openai/script.py
+++ b/extensions/openai/script.py
@@ -53,12 +53,12 @@ from .typing import (
 params = {
     'embedding_device': 'cpu',
     'embedding_model': 'sentence-transformers/all-mpnet-base-v2',
-    'sd_webui_url': '',
     'debug': 0
 }
 
 
 streaming_semaphore = asyncio.Semaphore(1)
+image_generation_semaphore = asyncio.Semaphore(1)
 
 
 def verify_api_key(authorization: str = Header(None)) -> None:
@@ -231,21 +231,22 @@ async def handle_audio_transcription(request: Request):
 async def handle_image_generation(request_data: ImageGenerationRequest):
     import extensions.openai.images as OAIimages
 
-    width, height = request_data.get_width_height()
+    async with image_generation_semaphore:
+        width, height = request_data.get_width_height()
 
-    response = await asyncio.to_thread(
-        OAIimages.generations,
-        prompt=request_data.prompt,
-        size=f"{width}x{height}",
-        response_format=request_data.response_format,
-        n=request_data.batch_size,  # <-- use resolved batch_size
-        negative_prompt=request_data.negative_prompt,
-        steps=request_data.steps,
-        seed=request_data.seed,
-        cfg_scale=request_data.cfg_scale,
-        batch_count=request_data.batch_count,
-    )
-    return JSONResponse(response)
+        response = await asyncio.to_thread(
+            OAIimages.generations,
+            prompt=request_data.prompt,
+            size=f"{width}x{height}",
+            response_format=request_data.response_format,
+            n=request_data.batch_size,  # <-- use resolved batch_size
+            negative_prompt=request_data.negative_prompt,
+            steps=request_data.steps,
+            seed=request_data.seed,
+            cfg_scale=request_data.cfg_scale,
+            batch_count=request_data.batch_count,
+        )
+        return JSONResponse(response)
 
 
 @app.post("/v1/embeddings", response_model=EmbeddingsResponse, dependencies=check_key)

From 906dc549697478be5b8816e12b7dc94fd34133b0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 3 Dec 2025 12:15:38 -0800
Subject: [PATCH 11/61] Load `--image-model` before `--model`

---
 server.py | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/server.py b/server.py
index b02c50a2..0c5d14ce 100644
--- a/server.py
+++ b/server.py
@@ -275,6 +275,22 @@ if __name__ == "__main__":
         if extension not in shared.args.extensions:
             shared.args.extensions.append(extension)
 
+    # Load image model if specified via CLI
+    if shared.args.image_model:
+        logger.info(f"Loading image model: {shared.args.image_model}")
+        result = load_image_model(
+            shared.args.image_model,
+            dtype=shared.settings.get('image_dtype', 'bfloat16'),
+            attn_backend=shared.settings.get('image_attn_backend', 'sdpa'),
+            cpu_offload=shared.settings.get('image_cpu_offload', False),
+            compile_model=shared.settings.get('image_compile', False),
+            quant_method=shared.settings.get('image_quant', 'none')
+        )
+        if result is not None:
+            shared.image_model_name = shared.args.image_model
+        else:
+            logger.error(f"Failed to load image model: {shared.args.image_model}")
+
     available_models = utils.get_available_models()
 
     # Model defined through --model
@@ -321,22 +337,6 @@ if __name__ == "__main__":
         if shared.args.lora:
             add_lora_to_model(shared.args.lora)
 
-    # Load image model if specified via CLI
-    if shared.args.image_model:
-        logger.info(f"Loading image model: {shared.args.image_model}")
-        result = load_image_model(
-            shared.args.image_model,
-            dtype=shared.settings.get('image_dtype', 'bfloat16'),
-            attn_backend=shared.settings.get('image_attn_backend', 'sdpa'),
-            cpu_offload=shared.settings.get('image_cpu_offload', False),
-            compile_model=shared.settings.get('image_compile', False),
-            quant_method=shared.settings.get('image_quant', 'none')
-        )
-        if result is not None:
-            shared.image_model_name = shared.args.image_model
-        else:
-            logger.error(f"Failed to load image model: {shared.args.image_model}")
-
     shared.generation_lock = Lock()
 
     if shared.args.idle_timeout > 0:

From 373baa5c9cb46b629b5f7362986ac3ceda97bc08 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 3 Dec 2025 14:43:56 -0800
Subject: [PATCH 12/61] UI: Minor image gallery improvements

---
 modules/ui_image_generation.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 1b0e58f3..2ea34551 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -29,7 +29,7 @@ ASPECT_RATIOS = {
 }
 
 STEP = 16
-IMAGES_PER_PAGE = 64
+IMAGES_PER_PAGE = 32
 
 # Settings keys to save in PNG metadata (Generate tab only)
 METADATA_SETTINGS_KEYS = [
@@ -172,7 +172,7 @@ def format_metadata_for_display(metadata):
     if not metadata:
         return "No generation settings found in this image."
 
-    lines = ["**Generation Settings**", ""]
+    lines = []
 
     # Display in a nice order
     display_order = [
@@ -441,7 +441,7 @@ def create_ui():
                         )
 
                     with gr.Column(scale=1):
-                        gr.Markdown("### Selected Image")
+                        gr.Markdown("### Generation Settings")
                         shared.gradio['image_settings_display'] = gr.Markdown("Select an image to view its settings")
                         shared.gradio['image_send_to_generate'] = gr.Button("Send to Generate", variant="primary")
                         shared.gradio['image_gallery_status'] = gr.Markdown("")

From 59285d501d505e0e1939861baea161870c4ed4c6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 3 Dec 2025 16:03:31 -0800
Subject: [PATCH 13/61] Image generation: Small UI improvements

---
 js/switch_tabs.js              | 14 ++++++++++++++
 modules/ui_image_generation.py |  5 +++--
 2 files changed, 17 insertions(+), 2 deletions(-)

diff --git a/js/switch_tabs.js b/js/switch_tabs.js
index 7fb78aea..36e5736b 100644
--- a/js/switch_tabs.js
+++ b/js/switch_tabs.js
@@ -36,3 +36,17 @@ function switch_to_character() {
   document.getElementById("character-tab-button").click();
   scrollToTop();
 }
+
+function switch_to_image_ai_generate() {
+  const container = document.querySelector("#image-ai-tab");
+  const buttons = container.getElementsByTagName("button");
+
+  for (let i = 0; i < buttons.length; i++) {
+    if (buttons[i].textContent.trim() === "Generate") {
+      buttons[i].click();
+      break;
+    }
+  }
+
+  scrollToTop();
+}
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 2ea34551..3a64bc8b 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -418,9 +418,9 @@ def create_ui():
                         # Pagination controls
                         with gr.Row():
                             shared.gradio['image_refresh_history'] = gr.Button("🔄 Refresh", elem_classes="refresh-button")
-                            shared.gradio['image_prev_page'] = gr.Button("◀ Prev", elem_classes="refresh-button")
+                            shared.gradio['image_prev_page'] = gr.Button("◀ Prev Page", elem_classes="refresh-button")
                             shared.gradio['image_page_info'] = gr.Markdown(value=get_initial_page_info, elem_id="image-page-info")
-                            shared.gradio['image_next_page'] = gr.Button("Next ▶", elem_classes="refresh-button")
+                            shared.gradio['image_next_page'] = gr.Button("Next Page ▶", elem_classes="refresh-button")
                             shared.gradio['image_page_input'] = gr.Number(value=1, label="Page", precision=0, minimum=1, scale=0, min_width=80)
                             shared.gradio['image_go_to_page'] = gr.Button("Go", elem_classes="refresh-button", scale=0, min_width=50)
 
@@ -649,6 +649,7 @@ def create_event_handlers():
             'image_cfg_scale',
             'image_gallery_status'
         ),
+        js=f'() => {{{ui.switch_tabs_js}; switch_to_image_ai_generate()}}',
         show_progress=False
     )
 

From 49c60882bf8f4806d0f47e890d7f434007941796 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 3 Dec 2025 16:07:51 -0800
Subject: [PATCH 14/61] Image generation: Safer image uploading

---
 modules/chat.py                | 16 +---------------
 modules/image_utils.py         | 20 ++++++++++++++++----
 modules/ui_image_generation.py |  4 ++--
 3 files changed, 19 insertions(+), 21 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index 55984d7a..acfc2f66 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -3,7 +3,6 @@ import copy
 import functools
 import html
 import json
-import os
 import pprint
 import re
 import shutil
@@ -26,6 +25,7 @@ from modules.html_generator import (
     convert_to_markdown,
     make_thumbnail
 )
+from modules.image_utils import open_image_safely
 from modules.logging_colors import logger
 from modules.text_generation import (
     generate_reply,
@@ -1516,20 +1516,6 @@ def load_instruction_template_memoized(template):
     return load_instruction_template(template)
 
 
-def open_image_safely(path):
-    if path is None or not isinstance(path, str) or not Path(path).exists():
-        return None
-
-    if os.path.islink(path):
-        return None
-
-    try:
-        return Image.open(path)
-    except Exception as e:
-        logger.error(f"Failed to open image file: {path}. Reason: {e}")
-        return None
-
-
 def upload_character(file, img_path, tavern=False):
     img = open_image_safely(img_path)
     decoded_file = file if isinstance(file, str) else file.decode('utf-8')
diff --git a/modules/image_utils.py b/modules/image_utils.py
index 658f00d7..d2809fef 100644
--- a/modules/image_utils.py
+++ b/modules/image_utils.py
@@ -1,9 +1,7 @@
-"""
-Shared image processing utilities for multimodal support.
-Used by both ExLlamaV3 and llama.cpp implementations.
-"""
 import base64
 import io
+import os
+from pathlib import Path
 from typing import Any, List, Tuple
 
 from PIL import Image
@@ -11,6 +9,20 @@ from PIL import Image
 from modules.logging_colors import logger
 
 
+def open_image_safely(path):
+    if path is None or not isinstance(path, str) or not Path(path).exists():
+        return None
+
+    if os.path.islink(path):
+        return None
+
+    try:
+        return Image.open(path)
+    except Exception as e:
+        logger.error(f"Failed to open image file: {path}. Reason: {e}")
+        return None
+
+
 def convert_pil_to_base64(image: Image.Image) -> str:
     """Converts a PIL Image to a base64 encoded string."""
     buffered = io.BytesIO()
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 3a64bc8b..06ef3e82 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -7,7 +7,6 @@ from pathlib import Path
 
 import gradio as gr
 import numpy as np
-from PIL import Image
 from PIL.PngImagePlugin import PngInfo
 
 from modules import shared, ui, utils
@@ -16,6 +15,7 @@ from modules.image_models import (
     load_image_model,
     unload_image_model
 )
+from modules.image_utils import open_image_safely
 from modules.logging_colors import logger
 from modules.text_generation import stop_everything_event
 from modules.utils import gradio
@@ -159,7 +159,7 @@ def save_generated_images(images, state, actual_seed):
 def read_image_metadata(image_path):
     """Read generation metadata from PNG file."""
     try:
-        with Image.open(image_path) as img:
+        with open_image_safely(image_path) as img:
             if hasattr(img, 'text') and 'image_gen_settings' in img.text:
                 return json.loads(img.text['image_gen_settings'])
     except Exception as e:

From fbca54957eec4ca49c73eac56159fa92603724b7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 3 Dec 2025 16:13:07 -0800
Subject: [PATCH 15/61] Image generation: Yield partial results for batch count
 > 1

---
 modules/ui_image_generation.py | 11 +++++++----
 1 file changed, 7 insertions(+), 4 deletions(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 06ef3e82..08cf3f64 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -677,7 +677,8 @@ def generate(state):
 
         if not model_name or model_name == 'None':
             logger.error("No image model selected. Go to the Model tab and select a model.")
-            return []
+            yield []
+            return
 
         if shared.image_model is None:
             result = load_image_model(
@@ -690,7 +691,8 @@ def generate(state):
             )
             if result is None:
                 logger.error(f"Failed to load model `{model_name}`.")
-                return []
+                yield []
+                return
 
             shared.image_model_name = model_name
 
@@ -760,6 +762,7 @@ def generate(state):
             generator.manual_seed(int(seed + i))
             batch_results = shared.image_model(**gen_kwargs).images
             all_images.extend(batch_results)
+            yield all_images
 
         t1 = time.time()
         save_generated_images(all_images, state, seed)
@@ -768,12 +771,12 @@ def generate(state):
         total_steps = state["image_steps"] * int(state['image_batch_count'])
         logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})')
 
-        return all_images
+        yield all_images
 
     except Exception as e:
         logger.error(f"Image generation failed: {e}")
         traceback.print_exc()
-        return []
+        yield []
 
 
 def load_image_model_wrapper(model_name, dtype, attn_backend, cpu_offload, compile_model, quant_method):

From c93d27add3b15c0494d436f1a3361c0b4862c626 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 3 Dec 2025 18:29:43 -0800
Subject: [PATCH 16/61] Update llama.cpp

---
 requirements/full/requirements.txt                     | 4 ++--
 requirements/full/requirements_amd.txt                 | 4 ++--
 requirements/full/requirements_amd_noavx2.txt          | 4 ++--
 requirements/full/requirements_apple_intel.txt         | 4 ++--
 requirements/full/requirements_apple_silicon.txt       | 4 ++--
 requirements/full/requirements_cpu_only.txt            | 4 ++--
 requirements/full/requirements_cpu_only_noavx2.txt     | 4 ++--
 requirements/full/requirements_noavx2.txt              | 4 ++--
 requirements/portable/requirements.txt                 | 4 ++--
 requirements/portable/requirements_amd.txt             | 4 ++--
 requirements/portable/requirements_amd_noavx2.txt      | 4 ++--
 requirements/portable/requirements_apple_intel.txt     | 4 ++--
 requirements/portable/requirements_apple_silicon.txt   | 4 ++--
 requirements/portable/requirements_cpu_only.txt        | 4 ++--
 requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++--
 requirements/portable/requirements_noavx2.txt          | 4 ++--
 requirements/portable/requirements_vulkan.txt          | 4 ++--
 requirements/portable/requirements_vulkan_noavx2.txt   | 4 ++--
 18 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 4e02d76f..c1f87988 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -44,8 +44,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index c0fbd9ab..512231e0 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -42,7 +42,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index b330646a..1ccc507c 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -42,7 +42,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index c2c64337..4d8aa771 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 10889215..5290aa71 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index ffcd6473..d9c76a31 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index daa2444f..1a796c21 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index a43a7724..8105abaa 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -44,8 +44,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 5f9b69e8..0b944d48 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index 10a6b5af..9b43c901 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd_noavx2.txt b/requirements/portable/requirements_amd_noavx2.txt
index 4686b9de..825e6dec 100644
--- a/requirements/portable/requirements_amd_noavx2.txt
+++ b/requirements/portable/requirements_amd_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index 890568f4..864b6775 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index e480db8f..03e090cf 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index c2a7d040..7dd851e8 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index e243d4be..63949d9f 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index 3db163b0..34652264 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 7758131c..28849ff7 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Vulkan wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index dcee8d05..365a798e 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

From c357eed4c73099343418f5340dc1c28013cb7486 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 3 Dec 2025 18:40:34 -0800
Subject: [PATCH 17/61] Image: Remove the flash_attention_3 option (no idea how
 to get it working)

---
 modules/image_models.py        | 4 +---
 modules/shared.py              | 2 +-
 modules/ui_image_generation.py | 2 +-
 3 files changed, 3 insertions(+), 5 deletions(-)

diff --git a/modules/image_models.py b/modules/image_models.py
index 2ef1e730..8363533b 100644
--- a/modules/image_models.py
+++ b/modules/image_models.py
@@ -98,7 +98,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
     Args:
         model_name: Name of the model directory
         dtype: 'bfloat16' or 'float16'
-        attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3'
+        attn_backend: 'sdpa' or 'flash_attention_2'
         cpu_offload: Enable CPU offloading for low VRAM
         compile_model: Compile the model for faster inference (slow first run)
         quant_method: 'none', 'bnb-8bit', 'bnb-4bit', or torchao options (int8wo, fp4, float8wo)
@@ -145,8 +145,6 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
         if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'set_attention_backend'):
             if attn_backend == 'flash_attention_2':
                 pipe.transformer.set_attention_backend("flash")
-            elif attn_backend == 'flash_attention_3':
-                pipe.transformer.set_attention_backend("_flash_3")
             # sdpa is the default, no action needed
 
         if compile_model:
diff --git a/modules/shared.py b/modules/shared.py
index 4a0fd986..4e17497b 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -56,7 +56,7 @@ group = parser.add_argument_group('Image model')
 group.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).')
 group.add_argument('--image-model-dir', type=str, default='user_data/image_models', help='Path to directory with all the image models.')
 group.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.')
-group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], help='Attention backend for image model.')
+group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2'], help='Attention backend for image model.')
 group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.')
 group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.')
 group.add_argument('--image-quant', type=str, default=None,
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 08cf3f64..fdf1af86 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -485,7 +485,7 @@ def create_ui():
                                     info='bfloat16 recommended for modern GPUs'
                                 )
                                 shared.gradio['image_attn_backend'] = gr.Dropdown(
-                                    choices=['sdpa', 'flash_attention_2', 'flash_attention_3'],
+                                    choices=['sdpa', 'flash_attention_2'],
                                     value=shared.settings['image_attn_backend'],
                                     label='Attention Backend',
                                     info='SDPA is default. Flash Attention requires compatible GPU.'

From 235b94f097d7efa37f3f33de6b1787ce5673b742 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 3 Dec 2025 18:43:30 -0800
Subject: [PATCH 18/61] Image: Add placeholder file for user_data/image_models

---
 user_data/image_models/place-your-models-here.txt | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 user_data/image_models/place-your-models-here.txt

diff --git a/user_data/image_models/place-your-models-here.txt b/user_data/image_models/place-your-models-here.txt
new file mode 100644
index 00000000..e69de29b

From 14dbc3488e5780be50a7b6211742ec2137cdc503 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 05:32:58 -0800
Subject: [PATCH 19/61] Image: Clear the torch cache after generation, not
 before

---
 modules/ui_image_generation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index fdf1af86..7c393f13 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -670,8 +670,6 @@ def generate(state):
 
     from modules.torch_utils import clear_torch_cache, get_device
 
-    clear_torch_cache()
-
     try:
         model_name = state['image_model_menu']
 
@@ -772,11 +770,13 @@ def generate(state):
         logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})')
 
         yield all_images
+        clear_torch_cache()
 
     except Exception as e:
         logger.error(f"Image generation failed: {e}")
         traceback.print_exc()
         yield []
+        clear_torch_cache()
 
 
 def load_image_model_wrapper(model_name, dtype, attn_backend, cpu_offload, compile_model, quant_method):

From a838223d185597071ba4d346190f24708fa51062 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 05:49:57 -0800
Subject: [PATCH 20/61] Image: Add a progress bar during generation

---
 modules/ui_image_generation.py | 98 +++++++++++++++++++++++++---------
 1 file changed, 72 insertions(+), 26 deletions(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 7c393f13..62760a8a 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -373,7 +373,10 @@ def create_ui():
 
                         shared.gradio['image_generate_btn'] = gr.Button("Generate", variant="primary", size="lg")
                         shared.gradio['image_stop_btn'] = gr.Button("Stop", size="lg", visible=False)
-                        gr.HTML("<hr style='border-top: 1px solid #444; margin: 20px 0;'>")
+                        shared.gradio['image_progress'] = gr.HTML(
+                            value=progress_bar_html(),
+                            elem_id="image-progress"
+                        )
 
                         gr.Markdown("### Dimensions")
                         with gr.Row():
@@ -546,19 +549,19 @@ def create_event_handlers():
     shared.gradio['image_generate_btn'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(
-        generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then(
+        generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then(
         lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))
 
     shared.gradio['image_prompt'].submit(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(
-        generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then(
+        generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then(
         lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))
 
     shared.gradio['image_neg_prompt'].submit(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(
-        generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then(
+        generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then(
         lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))
 
     # Stop button
@@ -661,11 +664,27 @@ def create_event_handlers():
     )
 
 
+def progress_bar_html(progress=0, text=""):
+    """Generate HTML for progress bar. Empty div when progress <= 0."""
+    if progress <= 0:
+        return '<div style="height: 24px; margin: 20px 0; border-top: 1px solid #444;"></div>'
+
+    return f'''<div style="height: 24px; margin: 20px 0;">
+        <div style="background: #333; border-radius: 4px; overflow: hidden; height: 8px;">
+            <div style="background: #4a9eff; height: 100%; width: {progress*100:.1f}%;"></div>
+        </div>
+        <div style="text-align: center; font-size: 11px; color: #888; margin-top: 4px;">{text}</div>
+    </div>'''
+
+
 def generate(state):
     """
     Generate images using the loaded model.
     Automatically adjusts parameters based on pipeline type.
     """
+    import queue
+    import threading
+
     import torch
 
     from modules.torch_utils import clear_torch_cache, get_device
@@ -675,7 +694,7 @@ def generate(state):
 
         if not model_name or model_name == 'None':
             logger.error("No image model selected. Go to the Model tab and select a model.")
-            yield []
+            yield [], progress_bar_html()
             return
 
         if shared.image_model is None:
@@ -689,7 +708,7 @@ def generate(state):
             )
             if result is None:
                 logger.error(f"Failed to load model `{model_name}`.")
-                yield []
+                yield [], progress_bar_html()
                 return
 
             shared.image_model_name = model_name
@@ -713,69 +732,96 @@ def generate(state):
         # Process Prompt
         prompt = state['image_prompt']
 
-        # Apply "Positive Magic" for Qwen models only
         if pipeline_type == 'qwenimage':
             magic_suffix = ", Ultra HD, 4K, cinematic composition"
-            # Avoid duplication if user already added it
             if magic_suffix.strip(", ") not in prompt:
                 prompt += magic_suffix
 
-        # Reset stop flag at start
         shared.stop_everything = False
 
-        # Callback to check for interruption during diffusion steps
+        batch_count = int(state['image_batch_count'])
+        steps_per_batch = int(state['image_steps'])
+        total_steps = steps_per_batch * batch_count
+
+        # Queue for progress updates from callback
+        progress_queue = queue.Queue()
+
         def interrupt_callback(pipe, step_index, timestep, callback_kwargs):
             if shared.stop_everything:
                 pipe._interrupt = True
-
+            progress_queue.put(step_index + 1)
             return callback_kwargs
 
-        # Build generation kwargs
         gen_kwargs = {
             "prompt": prompt,
             "negative_prompt": state['image_neg_prompt'],
             "height": int(state['image_height']),
             "width": int(state['image_width']),
-            "num_inference_steps": int(state['image_steps']),
+            "num_inference_steps": steps_per_batch,
             "num_images_per_prompt": int(state['image_batch_size']),
             "generator": generator,
             "callback_on_step_end": interrupt_callback,
         }
 
-        # Add pipeline-specific parameters for CFG
         cfg_val = state.get('image_cfg_scale', 0.0)
-
         if pipeline_type == 'qwenimage':
-            # Qwen-Image uses true_cfg_scale (typically 4.0)
             gen_kwargs["true_cfg_scale"] = cfg_val
         else:
-            # Z-Image and others use guidance_scale (typically 0.0 for Turbo)
             gen_kwargs["guidance_scale"] = cfg_val
 
         t0 = time.time()
-        for i in range(int(state['image_batch_count'])):
+
+        for batch_idx in range(batch_count):
             if shared.stop_everything:
                 break
 
-            generator.manual_seed(int(seed + i))
-            batch_results = shared.image_model(**gen_kwargs).images
-            all_images.extend(batch_results)
-            yield all_images
+            generator.manual_seed(int(seed + batch_idx))
+
+            # Run generation in thread so we can yield progress
+            result_holder = []
+            error_holder = []
+
+            def run_batch():
+                try:
+                    result_holder.extend(shared.image_model(**gen_kwargs).images)
+                except Exception as e:
+                    error_holder.append(e)
+
+            thread = threading.Thread(target=run_batch)
+            thread.start()
+
+            # Yield progress updates while generation runs
+            while thread.is_alive():
+                try:
+                    step = progress_queue.get(timeout=0.1)
+                    absolute_step = batch_idx * steps_per_batch + step
+                    pct = absolute_step / total_steps
+                    text = f"Batch {batch_idx + 1}/{batch_count} — Step {step}/{steps_per_batch}"
+                    yield all_images, progress_bar_html(pct, text)
+                except queue.Empty:
+                    pass
+
+            thread.join()
+
+            if error_holder:
+                raise error_holder[0]
+
+            all_images.extend(result_holder)
+            yield all_images, progress_bar_html((batch_idx + 1) / batch_count, f"Batch {batch_idx + 1}/{batch_count} complete")
 
         t1 = time.time()
         save_generated_images(all_images, state, seed)
 
-        total_images = int(state['image_batch_count']) * int(state['image_batch_size'])
-        total_steps = state["image_steps"] * int(state['image_batch_count'])
+        total_images = batch_count * int(state['image_batch_size'])
         logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})')
 
-        yield all_images
+        yield all_images, progress_bar_html()
         clear_torch_cache()
 
     except Exception as e:
         logger.error(f"Image generation failed: {e}")
         traceback.print_exc()
-        yield []
+        yield [], progress_bar_html()
         clear_torch_cache()
 
 

From 7fb9f19bd8866ceb47765cfc428e6a21d105d498 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 06:17:40 -0800
Subject: [PATCH 21/61] Progress bar style improvements

---
 css/main.css                   | 39 ++++++++++++++++++++++++++++++++++
 modules/ui_image_generation.py | 10 ++++-----
 2 files changed, 44 insertions(+), 5 deletions(-)

diff --git a/css/main.css b/css/main.css
index 5c1c356d..4dea6b9e 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1752,3 +1752,42 @@ button#swap-height-width {
 .min.svelte-1yrv54 {
     min-height: 0;
 }
+
+/* Image Generation Progress Bar */
+#image-progress .image-ai-separator {
+    height: 24px;
+    margin: 20px 0;
+    border-top: 1px solid var(--input-border-color);
+}
+
+#image-progress .image-ai-progress-wrapper {
+    height: 24px;
+    margin: 20px 0;
+}
+
+#image-progress .image-ai-progress-track {
+    background: #e5e7eb;
+    border-radius: 4px;
+    overflow: hidden;
+    height: 8px;
+}
+
+.dark #image-progress .image-ai-progress-track {
+    background: #333;
+}
+
+#image-progress .image-ai-progress-fill {
+    background: #4a9eff;
+    height: 100%;
+}
+
+#image-progress .image-ai-progress-text {
+    text-align: center;
+    font-size: 12px;
+    color: #666;
+    margin-top: 4px;
+}
+
+.dark #image-progress .image-ai-progress-text {
+    color: #888;
+}
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 62760a8a..e85f1520 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -667,13 +667,13 @@ def create_event_handlers():
 def progress_bar_html(progress=0, text=""):
     """Generate HTML for progress bar. Empty div when progress <= 0."""
     if progress <= 0:
-        return '<div style="height: 24px; margin: 20px 0; border-top: 1px solid #444;"></div>'
+        return '<div class="image-ai-separator"></div>'
 
-    return f'''<div style="height: 24px; margin: 20px 0;">
-        <div style="background: #333; border-radius: 4px; overflow: hidden; height: 8px;">
-            <div style="background: #4a9eff; height: 100%; width: {progress*100:.1f}%;"></div>
+    return f'''<div class="image-ai-progress-wrapper">
+        <div class="image-ai-progress-track">
+            <div class="image-ai-progress-fill" style="width: {progress*100:.1f}%;"></div>
         </div>
-        <div style="text-align: center; font-size: 11px; color: #888; margin-top: 4px;">{text}</div>
+        <div class="image-ai-progress-text">{text}</div>
     </div>'''
 
 

From 27931537176fef1bc1335815097ebc780cbf1dbf Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 07:57:23 -0800
Subject: [PATCH 22/61] Image: Add LLM-generated prompt variations

---
 modules/shared.py              |  1 +
 modules/ui.py                  |  2 ++
 modules/ui_image_generation.py | 58 ++++++++++++++++++++++++++++++++++
 3 files changed, 61 insertions(+)

diff --git a/modules/shared.py b/modules/shared.py
index 4e17497b..1ecc0d28 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -319,6 +319,7 @@ settings = {
     'image_seed': -1,
     'image_batch_size': 1,
     'image_batch_count': 1,
+    'image_llm_variations': False,
     'image_model_menu': 'None',
     'image_dtype': 'bfloat16',
     'image_attn_backend': 'sdpa',
diff --git a/modules/ui.py b/modules/ui.py
index ff5686e8..d95f7bc6 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -293,6 +293,7 @@ def list_interface_input_elements():
             'image_seed',
             'image_batch_size',
             'image_batch_count',
+            'image_llm_variations',
             'image_model_menu',
             'image_dtype',
             'image_attn_backend',
@@ -547,6 +548,7 @@ def setup_auto_save():
             'image_seed',
             'image_batch_size',
             'image_batch_count',
+            'image_llm_variations',
             'image_model_menu',
             'image_dtype',
             'image_attn_backend',
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index e85f1520..ceb470ff 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -10,6 +10,7 @@ import numpy as np
 from PIL.PngImagePlugin import PngInfo
 
 from modules import shared, ui, utils
+from modules.utils import check_model_loaded
 from modules.image_models import (
     get_pipeline_type,
     load_image_model,
@@ -409,6 +410,11 @@ def create_ui():
                             with gr.Column():
                                 shared.gradio['image_batch_size'] = gr.Slider(1, 32, value=shared.settings['image_batch_size'], step=1, label="Batch Size (VRAM Heavy)", info="Generates N images at once.")
                                 shared.gradio['image_batch_count'] = gr.Slider(1, 128, value=shared.settings['image_batch_count'], step=1, label="Sequential Count (Loop)", info="Repeats the generation N times.")
+                                shared.gradio['image_llm_variations'] = gr.Checkbox(
+                                    value=shared.settings['image_llm_variations'],
+                                    label='LLM Prompt Variations',
+                                    info='Use the loaded LLM to generate creative prompt variations for each sequential batch.'
+                                )
 
                     with gr.Column(scale=6, min_width=500):
                         with gr.Column(elem_classes=["viewport-container"]):
@@ -664,6 +670,54 @@ def create_event_handlers():
     )
 
 
+def generate_prompt_variation(state):
+    """Generate a creative variation of the image prompt using the LLM."""
+    from modules.chat import generate_chat_prompt
+    from modules.text_generation import generate_reply
+
+    prompt = state['image_prompt']
+
+    # Check if LLM is loaded
+    model_loaded, _ = check_model_loaded()
+    if not model_loaded:
+        logger.warning("No LLM loaded for prompt variation. Using original prompt.")
+        return prompt
+
+    augmented_message = f"{prompt}\n\n=====\n\nPlease create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else."
+
+    # Use minimal state for generation
+    var_state = state.copy()
+    var_state['history'] = {'internal': [], 'visible': [], 'metadata': {}}
+    var_state['auto_max_new_tokens'] = True
+    var_state['enable_thinking'] = False
+    var_state['reasoning_effort'] = 'low'
+    var_state['start_with'] = ""
+
+    formatted_prompt = generate_chat_prompt(augmented_message, var_state)
+
+    variation = ""
+    for reply in generate_reply(formatted_prompt, var_state, stopping_strings=[], is_chat=True):
+        variation = reply
+
+    # Strip thinking blocks if present
+    if "</think>" in variation:
+        variation = variation.rsplit("</think>", 1)[1]
+    elif "<|start|>assistant<|channel|>final<|message|>" in variation:
+        variation = variation.rsplit("<|start|>assistant<|channel|>final<|message|>", 1)[1]
+    elif "</seed:think>" in variation:
+        variation = variation.rsplit("</seed:think>", 1)[1]
+
+    variation = variation.strip()
+    if len(variation) >= 2 and variation.startswith('"') and variation.endswith('"'):
+        variation = variation[1:-1]
+
+    if variation:
+        logger.info(f"Prompt variation: {variation}...")
+        return variation
+
+    return prompt
+
+
 def progress_bar_html(progress=0, text=""):
     """Generate HTML for progress bar. Empty div when progress <= 0."""
     if progress <= 0:
@@ -777,6 +831,10 @@ def generate(state):
 
             generator.manual_seed(int(seed + batch_idx))
 
+            # Generate prompt variation if enabled
+            if state['image_llm_variations']:
+                gen_kwargs["prompt"] = generate_prompt_variation(state)
+
             # Run generation in thread so we can yield progress
             result_holder = []
             error_holder = []

From 5763947c375eef6cb1df55e6788538ab30ac4e99 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 10:23:00 -0800
Subject: [PATCH 23/61] Image: Simplify the API code, add the llm_variations
 option

---
 extensions/openai/images.py    | 145 ++++++---------------------------
 extensions/openai/script.py    |  32 +++-----
 extensions/openai/typing.py    |  15 ++--
 modules/ui_image_generation.py |  10 +--
 4 files changed, 49 insertions(+), 153 deletions(-)

diff --git a/extensions/openai/images.py b/extensions/openai/images.py
index 3a5288e6..1ecb1e63 100644
--- a/extensions/openai/images.py
+++ b/extensions/openai/images.py
@@ -4,117 +4,50 @@ OpenAI-compatible image generation using local diffusion models.
 
 import base64
 import io
-import json
-import os
 import time
-from datetime import datetime
 
-import numpy as np
 from extensions.openai.errors import ServiceUnavailableError
 from modules import shared
-from modules.logging_colors import logger
-from PIL.PngImagePlugin import PngInfo
 
 
-def generations(prompt: str, size: str, response_format: str, n: int,
-                negative_prompt: str = "", steps: int = 9, seed: int = -1,
-                cfg_scale: float = 0.0, batch_count: int = 1):
+def generations(request):
     """
     Generate images using the loaded diffusion model.
-
-    Args:
-        prompt: Text description of the desired image
-        size: Image dimensions as "WIDTHxHEIGHT"
-        response_format: 'url' or 'b64_json'
-        n: Number of images per batch
-        negative_prompt: What to avoid in the image
-        steps: Number of inference steps
-        seed: Random seed (-1 for random)
-        cfg_scale: Classifier-free guidance scale
-        batch_count: Number of sequential batches
-
-    Returns:
-        dict with 'created' timestamp and 'data' list of images
+    Returns dict with 'created' timestamp and 'data' list of images.
     """
-    import torch
-    from modules.image_models import get_pipeline_type
-    from modules.torch_utils import clear_torch_cache, get_device
+    from modules.ui_image_generation import generate
 
     if shared.image_model is None:
         raise ServiceUnavailableError("No image model loaded. Load a model via the UI first.")
 
-    clear_torch_cache()
+    width, height = request.get_width_height()
 
-    # Parse dimensions
-    try:
-        width, height = [int(x) for x in size.split('x')]
-    except (ValueError, IndexError):
-        width, height = 1024, 1024
+    # Build state dict: GenerationOptions fields + image-specific keys
+    state = request.model_dump()
+    state.update({
+        'image_model_menu': shared.image_model_name,
+        'image_prompt': request.prompt,
+        'image_neg_prompt': request.negative_prompt,
+        'image_width': width,
+        'image_height': height,
+        'image_steps': request.steps,
+        'image_seed': request.image_seed,
+        'image_batch_size': request.batch_size,
+        'image_batch_count': request.batch_count,
+        'image_cfg_scale': request.cfg_scale,
+        'image_llm_variations': request.llm_variations,
+    })
 
-    # Handle seed
-    if seed == -1:
-        seed = np.random.randint(0, 2**32 - 1)
-
-    device = get_device() or "cpu"
-    generator = torch.Generator(device).manual_seed(int(seed))
-
-    # Get pipeline type for CFG parameter name
-    pipeline_type = getattr(shared, 'image_pipeline_type', None) or get_pipeline_type(shared.image_model)
-
-    # Build generation kwargs
-    gen_kwargs = {
-        "prompt": prompt,
-        "negative_prompt": negative_prompt,
-        "height": height,
-        "width": width,
-        "num_inference_steps": steps,
-        "num_images_per_prompt": n,
-        "generator": generator,
-    }
-
-    # Pipeline-specific CFG parameter
-    if pipeline_type == 'qwenimage':
-        gen_kwargs["true_cfg_scale"] = cfg_scale
-    else:
-        gen_kwargs["guidance_scale"] = cfg_scale
-
-    # Generate
-    all_images = []
-    t0 = time.time()
-
-    shared.stop_everything = False
-
-    def interrupt_callback(pipe, step_index, timestep, callback_kwargs):
-        if shared.stop_everything:
-            pipe._interrupt = True
-        return callback_kwargs
-
-    gen_kwargs["callback_on_step_end"] = interrupt_callback
-
-    for i in range(batch_count):
-        if shared.stop_everything:
-            break
-        generator.manual_seed(int(seed + i))
-        batch_results = shared.image_model(**gen_kwargs).images
-        all_images.extend(batch_results)
-
-    t1 = time.time()
-    total_images = len(all_images)
-    total_steps = steps * batch_count
-    logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})')
-
-    # Save images
-    _save_images(all_images, prompt, negative_prompt, width, height, steps, seed, cfg_scale)
+    # Exhaust generator, keep final result
+    images = []
+    for images, _ in generate(state, save_images=False):
+        pass
 
     # Build response
-    resp = {
-        'created': int(time.time()),
-        'data': []
-    }
-
-    for img in all_images:
+    resp = {'created': int(time.time()), 'data': []}
+    for img in images:
         b64 = _image_to_base64(img)
-        if response_format == 'b64_json':
+        if request.response_format == 'b64_json':
             resp['data'].append({'b64_json': b64})
         else:
             resp['data'].append({'url': f'data:image/png;base64,{b64}'})
@@ -126,29 +59,3 @@ def _image_to_base64(image) -> str:
     buffered = io.BytesIO()
     image.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode('utf-8')
-
-
-def _save_images(images, prompt, negative_prompt, width, height, steps, seed, cfg_scale):
-    """Save images with metadata."""
-    date_str = datetime.now().strftime("%Y-%m-%d")
-    folder = os.path.join("user_data", "image_outputs", date_str)
-    os.makedirs(folder, exist_ok=True)
-
-    metadata = {
-        'image_prompt': prompt,
-        'image_neg_prompt': negative_prompt,
-        'image_width': width,
-        'image_height': height,
-        'image_steps': steps,
-        'image_seed': seed,
-        'image_cfg_scale': cfg_scale,
-        'model': getattr(shared, 'image_model_name', 'unknown'),
-    }
-
-    for idx, img in enumerate(images):
-        ts = datetime.now().strftime("%H-%M-%S")
-        filepath = os.path.join(folder, f"{ts}_{seed:010d}_{idx:03d}.png")
-
-        png_info = PngInfo()
-        png_info.add_text("image_gen_settings", json.dumps(metadata))
-        img.save(filepath, pnginfo=png_info)
diff --git a/extensions/openai/script.py b/extensions/openai/script.py
index 65805629..12f99ba4 100644
--- a/extensions/openai/script.py
+++ b/extensions/openai/script.py
@@ -7,23 +7,24 @@ import traceback
 from collections import deque
 from threading import Thread
 
-import extensions.openai.completions as OAIcompletions
-import extensions.openai.logits as OAIlogits
-import extensions.openai.models as OAImodels
 import uvicorn
-from extensions.openai.tokens import token_count, token_decode, token_encode
-from extensions.openai.utils import _start_cloudflared
 from fastapi import Depends, FastAPI, Header, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.requests import Request
 from fastapi.responses import JSONResponse
+from pydub import AudioSegment
+from sse_starlette import EventSourceResponse
+from starlette.concurrency import iterate_in_threadpool
+
+import extensions.openai.completions as OAIcompletions
+import extensions.openai.logits as OAIlogits
+import extensions.openai.models as OAImodels
+from extensions.openai.tokens import token_count, token_decode, token_encode
+from extensions.openai.utils import _start_cloudflared
 from modules import shared
 from modules.logging_colors import logger
 from modules.models import unload_model
 from modules.text_generation import stop_everything_event
-from pydub import AudioSegment
-from sse_starlette import EventSourceResponse
-from starlette.concurrency import iterate_in_threadpool
 
 from .typing import (
     ChatCompletionRequest,
@@ -232,20 +233,7 @@ async def handle_image_generation(request_data: ImageGenerationRequest):
     import extensions.openai.images as OAIimages
 
     async with image_generation_semaphore:
-        width, height = request_data.get_width_height()
-
-        response = await asyncio.to_thread(
-            OAIimages.generations,
-            prompt=request_data.prompt,
-            size=f"{width}x{height}",
-            response_format=request_data.response_format,
-            n=request_data.batch_size,  # <-- use resolved batch_size
-            negative_prompt=request_data.negative_prompt,
-            steps=request_data.steps,
-            seed=request_data.seed,
-            cfg_scale=request_data.cfg_scale,
-            batch_count=request_data.batch_count,
-        )
+        response = await asyncio.to_thread(OAIimages.generations, request_data)
         return JSONResponse(response)
 
 
diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py
index a24b844b..dfdb9a7e 100644
--- a/extensions/openai/typing.py
+++ b/extensions/openai/typing.py
@@ -264,20 +264,18 @@ class LoadLorasRequest(BaseModel):
     lora_names: List[str]
 
 
-class ImageGenerationRequest(BaseModel):
-    """OpenAI-compatible image generation request with extended parameters."""
-    # Required
+class ImageGenerationRequestParams(BaseModel):
+    """Image-specific parameters for generation."""
     prompt: str
-
-    # Generation parameters
     negative_prompt: str = ""
     size: str = Field(default="1024x1024", description="'WIDTHxHEIGHT'")
     steps: int = Field(default=9, ge=1)
     cfg_scale: float = Field(default=0.0, ge=0.0)
-    seed: int = Field(default=-1, description="-1 for random")
+    image_seed: int = Field(default=-1, description="-1 for random")
     batch_size: int | None = Field(default=None, ge=1, description="Parallel batch size (VRAM heavy)")
     n: int = Field(default=1, ge=1, description="Alias for batch_size (OpenAI compatibility)")
     batch_count: int = Field(default=1, ge=1, description="Sequential batch count")
+    llm_variations: bool = False
 
     # OpenAI compatibility (unused)
     model: str | None = None
@@ -286,7 +284,6 @@ class ImageGenerationRequest(BaseModel):
 
     @model_validator(mode='after')
     def resolve_batch_size(self):
-        """Use batch_size if provided, otherwise fall back to n."""
         if self.batch_size is None:
             self.batch_size = self.n
         return self
@@ -299,6 +296,10 @@ class ImageGenerationRequest(BaseModel):
             return 1024, 1024
 
 
+class ImageGenerationRequest(GenerationOptions, ImageGenerationRequestParams):
+    pass
+
+
 class ImageGenerationResponse(BaseModel):
     created: int = int(time.time())
     data: List[dict]
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index ceb470ff..6ac0bc24 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -10,7 +10,6 @@ import numpy as np
 from PIL.PngImagePlugin import PngInfo
 
 from modules import shared, ui, utils
-from modules.utils import check_model_loaded
 from modules.image_models import (
     get_pipeline_type,
     load_image_model,
@@ -19,7 +18,7 @@ from modules.image_models import (
 from modules.image_utils import open_image_safely
 from modules.logging_colors import logger
 from modules.text_generation import stop_everything_event
-from modules.utils import gradio
+from modules.utils import check_model_loaded, gradio
 
 ASPECT_RATIOS = {
     "1:1 Square": (1, 1),
@@ -725,13 +724,13 @@ def progress_bar_html(progress=0, text=""):
 
     return f'''<div class="image-ai-progress-wrapper">
         <div class="image-ai-progress-track">
-            <div class="image-ai-progress-fill" style="width: {progress*100:.1f}%;"></div>
+            <div class="image-ai-progress-fill" style="width: {progress * 100:.1f}%;"></div>
         </div>
         <div class="image-ai-progress-text">{text}</div>
     </div>'''
 
 
-def generate(state):
+def generate(state, save_images=True):
     """
     Generate images using the loaded model.
     Automatically adjusts parameters based on pipeline type.
@@ -868,7 +867,8 @@ def generate(state):
             yield all_images, progress_bar_html((batch_idx + 1) / batch_count, f"Batch {batch_idx + 1}/{batch_count} complete")
 
         t1 = time.time()
-        save_generated_images(all_images, state, seed)
+        if save_images:
+            save_generated_images(all_images, state, seed)
 
         total_images = batch_count * int(state['image_batch_size'])
         logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})')

From ffef3c7b1dc5f3678a03abfa98fd54547de23796 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 10:44:35 -0800
Subject: [PATCH 24/61] Image: Make the LLM Variations prompt configurable

---
 extensions/openai/images.py    |  1 +
 extensions/openai/typing.py    |  1 +
 modules/shared.py              |  1 +
 modules/ui.py                  |  2 ++
 modules/ui_image_generation.py | 36 ++++++++++++++++++++++++++++------
 5 files changed, 35 insertions(+), 6 deletions(-)

diff --git a/extensions/openai/images.py b/extensions/openai/images.py
index 1ecb1e63..0bb91a1e 100644
--- a/extensions/openai/images.py
+++ b/extensions/openai/images.py
@@ -36,6 +36,7 @@ def generations(request):
         'image_batch_count': request.batch_count,
         'image_cfg_scale': request.cfg_scale,
         'image_llm_variations': request.llm_variations,
+        'image_llm_variations_prompt': request.llm_variations_prompt or shared.settings.get('image_llm_variations_prompt', ''),
     })
 
     # Exhaust generator, keep final result
diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py
index dfdb9a7e..31a5dc6d 100644
--- a/extensions/openai/typing.py
+++ b/extensions/openai/typing.py
@@ -276,6 +276,7 @@ class ImageGenerationRequestParams(BaseModel):
     n: int = Field(default=1, ge=1, description="Alias for batch_size (OpenAI compatibility)")
     batch_count: int = Field(default=1, ge=1, description="Sequential batch count")
     llm_variations: bool = False
+    llm_variations_prompt: str | None = None
 
     # OpenAI compatibility (unused)
     model: str | None = None
diff --git a/modules/shared.py b/modules/shared.py
index 1ecc0d28..7fe9dbcf 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -320,6 +320,7 @@ settings = {
     'image_batch_size': 1,
     'image_batch_count': 1,
     'image_llm_variations': False,
+    'image_llm_variations_prompt': 'Please create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else.',
     'image_model_menu': 'None',
     'image_dtype': 'bfloat16',
     'image_attn_backend': 'sdpa',
diff --git a/modules/ui.py b/modules/ui.py
index d95f7bc6..919a5740 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -294,6 +294,7 @@ def list_interface_input_elements():
             'image_batch_size',
             'image_batch_count',
             'image_llm_variations',
+            'image_llm_variations_prompt',
             'image_model_menu',
             'image_dtype',
             'image_attn_backend',
@@ -549,6 +550,7 @@ def setup_auto_save():
             'image_batch_size',
             'image_batch_count',
             'image_llm_variations',
+            'image_llm_variations_prompt',
             'image_model_menu',
             'image_dtype',
             'image_attn_backend',
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 6ac0bc24..6eeb3d51 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -406,14 +406,25 @@ def create_ui():
                                     info="Z-Image Turbo: 0.0 | Qwen: 4.0"
                                 )
                                 shared.gradio['image_seed'] = gr.Number(label="Seed", value=shared.settings['image_seed'], precision=0, info="-1 = Random")
+
                             with gr.Column():
                                 shared.gradio['image_batch_size'] = gr.Slider(1, 32, value=shared.settings['image_batch_size'], step=1, label="Batch Size (VRAM Heavy)", info="Generates N images at once.")
                                 shared.gradio['image_batch_count'] = gr.Slider(1, 128, value=shared.settings['image_batch_count'], step=1, label="Sequential Count (Loop)", info="Repeats the generation N times.")
-                                shared.gradio['image_llm_variations'] = gr.Checkbox(
-                                    value=shared.settings['image_llm_variations'],
-                                    label='LLM Prompt Variations',
-                                    info='Use the loaded LLM to generate creative prompt variations for each sequential batch.'
-                                )
+
+                        gr.Markdown("### LLM Variations")
+                        shared.gradio['image_llm_variations'] = gr.Checkbox(
+                            value=shared.settings['image_llm_variations'],
+                            label='Activate',
+                            info='Use the loaded LLM to generate creative prompt variations for each sequential batch.'
+                        )
+                        shared.gradio['image_llm_variations_prompt'] = gr.Textbox(
+                            value=shared.settings['image_llm_variations_prompt'],
+                            label='Variation Prompt',
+                            lines=3,
+                            placeholder='Instructions for generating prompt variations...',
+                            visible=shared.settings['image_llm_variations'],
+                            info='The instruction given to the LLM for generating variations.'
+                        )
 
                     with gr.Column(scale=6, min_width=500):
                         with gr.Column(elem_classes=["viewport-container"]):
@@ -668,6 +679,14 @@ def create_event_handlers():
         show_progress=False
     )
 
+    # LLM Variations visibility toggle
+    shared.gradio['image_llm_variations'].change(
+        lambda x: gr.update(visible=x),
+        gradio('image_llm_variations'),
+        gradio('image_llm_variations_prompt'),
+        show_progress=False
+    )
+
 
 def generate_prompt_variation(state):
     """Generate a creative variation of the image prompt using the LLM."""
@@ -682,7 +701,12 @@ def generate_prompt_variation(state):
         logger.warning("No LLM loaded for prompt variation. Using original prompt.")
         return prompt
 
-    augmented_message = f"{prompt}\n\n=====\n\nPlease create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else."
+    # Get the custom variation prompt or use default
+    variation_instruction = state.get('image_llm_variations_prompt', '')
+    if not variation_instruction:
+        variation_instruction = 'Please create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else.'
+
+    augmented_message = f"{prompt}\n\n=====\n\n{variation_instruction}"
 
     # Use minimal state for generation
     var_state = state.copy()

From a90739f498e333ff3ffa0cba418139b1d1e00822 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 10:50:40 -0800
Subject: [PATCH 25/61] Image: Better LLM variation default prompt

---
 modules/shared.py              | 2 +-
 modules/ui_image_generation.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 7fe9dbcf..2f39e495 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -320,7 +320,7 @@ settings = {
     'image_batch_size': 1,
     'image_batch_count': 1,
     'image_llm_variations': False,
-    'image_llm_variations_prompt': 'Please create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else.',
+    'image_llm_variations_prompt': 'Your task is to create a creative variation of the image generation prompt above. Keep the main subject but feel free to add an interesting setting, scenario, pose, atmosphere, or stylistic twist. Be specific and vivid. Respond with only the new prompt, nothing else.',
     'image_model_menu': 'None',
     'image_dtype': 'bfloat16',
     'image_attn_backend': 'sdpa',
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 6eeb3d51..1cf1b955 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -704,7 +704,7 @@ def generate_prompt_variation(state):
     # Get the custom variation prompt or use default
     variation_instruction = state.get('image_llm_variations_prompt', '')
     if not variation_instruction:
-        variation_instruction = 'Please create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else.'
+        variation_instruction = 'Your task is to create a creative variation of the image generation prompt above. Keep the main subject but feel free to add an interesting setting, scenario, pose, atmosphere, or stylistic twist. Be specific and vivid. Respond with only the new prompt, nothing else.'
 
     augmented_message = f"{prompt}\n\n=====\n\n{variation_instruction}"
 

From ac31a7c0082c08ae126c7140434c7fa053e53ea9 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 15:45:04 -0800
Subject: [PATCH 26/61] Image: Organize the UI

---
 css/main.css                   |  6 ++++++
 modules/ui_image_generation.py | 30 ++++++++++++++----------------
 2 files changed, 20 insertions(+), 16 deletions(-)

diff --git a/css/main.css b/css/main.css
index 4dea6b9e..e7586960 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1791,3 +1791,9 @@ button#swap-height-width {
 .dark #image-progress .image-ai-progress-text {
     color: #888;
 }
+
+#llm-prompt-variations {
+    position: absolute;
+    top: 0;
+    left: calc(100% - 174px);
+}
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 1cf1b955..5a1a8f79 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -370,6 +370,19 @@ def create_ui():
                             lines=3,
                             value=shared.settings['image_neg_prompt']
                         )
+                        shared.gradio['image_llm_variations'] = gr.Checkbox(
+                            value=shared.settings['image_llm_variations'],
+                            label='LLM Prompt Variations',
+                            elem_id="llm-prompt-variations",
+                        )
+                        shared.gradio['image_llm_variations_prompt'] = gr.Textbox(
+                            value=shared.settings['image_llm_variations_prompt'],
+                            label='Variation Prompt',
+                            lines=3,
+                            placeholder='Instructions for generating prompt variations...',
+                            visible=shared.settings['image_llm_variations'],
+                            info='Use the loaded LLM to generate creative prompt variations for each sequential batch.'
+                        )
 
                         shared.gradio['image_generate_btn'] = gr.Button("Generate", variant="primary", size="lg")
                         shared.gradio['image_stop_btn'] = gr.Button("Stop", size="lg", visible=False)
@@ -411,21 +424,6 @@ def create_ui():
                                 shared.gradio['image_batch_size'] = gr.Slider(1, 32, value=shared.settings['image_batch_size'], step=1, label="Batch Size (VRAM Heavy)", info="Generates N images at once.")
                                 shared.gradio['image_batch_count'] = gr.Slider(1, 128, value=shared.settings['image_batch_count'], step=1, label="Sequential Count (Loop)", info="Repeats the generation N times.")
 
-                        gr.Markdown("### LLM Variations")
-                        shared.gradio['image_llm_variations'] = gr.Checkbox(
-                            value=shared.settings['image_llm_variations'],
-                            label='Activate',
-                            info='Use the loaded LLM to generate creative prompt variations for each sequential batch.'
-                        )
-                        shared.gradio['image_llm_variations_prompt'] = gr.Textbox(
-                            value=shared.settings['image_llm_variations_prompt'],
-                            label='Variation Prompt',
-                            lines=3,
-                            placeholder='Instructions for generating prompt variations...',
-                            visible=shared.settings['image_llm_variations'],
-                            info='The instruction given to the LLM for generating variations.'
-                        )
-
                     with gr.Column(scale=6, min_width=500):
                         with gr.Column(elem_classes=["viewport-container"]):
                             shared.gradio['image_output_gallery'] = gr.Gallery(label="Output", show_label=False, columns=2, rows=2, height="80vh", object_fit="contain", preview=True, elem_id="image-output-gallery")
@@ -735,7 +733,7 @@ def generate_prompt_variation(state):
         variation = variation[1:-1]
 
     if variation:
-        logger.info(f"Prompt variation: {variation}...")
+        logger.info(f"Prompt variation: {variation}")
         return variation
 
     return prompt

From 47a0fcd614b89a5705b65d93eaf89de544ab8880 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 16:11:35 -0800
Subject: [PATCH 27/61] Image: PNG metadata improvements

---
 modules/ui_image_generation.py | 38 +++++++++++++++++-----------------
 1 file changed, 19 insertions(+), 19 deletions(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 5a1a8f79..a377f329 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -40,8 +40,6 @@ METADATA_SETTINGS_KEYS = [
     'image_aspect_ratio',
     'image_steps',
     'image_seed',
-    'image_batch_size',
-    'image_batch_count',
     'image_cfg_scale',
 ]
 
@@ -184,8 +182,6 @@ def format_metadata_for_display(metadata):
         ('image_steps', 'Steps'),
         ('image_cfg_scale', 'CFG Scale'),
         ('image_seed', 'Seed'),
-        ('image_batch_size', 'Batch Size'),
-        ('image_batch_count', 'Batch Count'),
         ('model', 'Model'),
         ('generated_at', 'Generated At'),
     ]
@@ -314,11 +310,11 @@ def on_gallery_select(evt: gr.SelectData, current_page):
 def send_to_generate(selected_image_path):
     """Load settings from selected image and return updates for all Generate tab inputs."""
     if not selected_image_path or not os.path.exists(selected_image_path):
-        return [gr.update()] * 10 + ["No image selected"]
+        return [gr.update()] * 8 + ["No image selected"]
 
     metadata = read_image_metadata(selected_image_path)
     if not metadata:
-        return [gr.update()] * 10 + ["No settings found in this image"]
+        return [gr.update()] * 8 + ["No settings found in this image"]
 
     # Return updates for each input element in order
     updates = [
@@ -329,8 +325,6 @@ def send_to_generate(selected_image_path):
         gr.update(value=metadata.get('image_aspect_ratio', '1:1 Square')),
         gr.update(value=metadata.get('image_steps', 9)),
         gr.update(value=metadata.get('image_seed', -1)),
-        gr.update(value=metadata.get('image_batch_size', 1)),
-        gr.update(value=metadata.get('image_batch_count', 1)),
         gr.update(value=metadata.get('image_cfg_scale', 0.0)),
     ]
 
@@ -661,8 +655,6 @@ def create_event_handlers():
             'image_aspect_ratio',
             'image_steps',
             'image_seed',
-            'image_batch_size',
-            'image_batch_count',
             'image_cfg_scale',
             'image_gallery_status'
         ),
@@ -795,7 +787,7 @@ def generate(state, save_images=True):
         device = get_device()
         if device is None:
             device = "cpu"
-        generator = torch.Generator(device).manual_seed(int(seed))
+        generator = torch.Generator(device)
 
         all_images = []
 
@@ -804,14 +796,8 @@ def generate(state, save_images=True):
         if pipeline_type is None:
             pipeline_type = get_pipeline_type(shared.image_model)
 
-        # Process Prompt
         prompt = state['image_prompt']
 
-        if pipeline_type == 'qwenimage':
-            magic_suffix = ", Ultra HD, 4K, cinematic composition"
-            if magic_suffix.strip(", ") not in prompt:
-                prompt += magic_suffix
-
         shared.stop_everything = False
 
         batch_count = int(state['image_batch_count'])
@@ -862,7 +848,15 @@ def generate(state, save_images=True):
 
             def run_batch():
                 try:
+                    # Apply magic suffix only at generation time for qwenimage
+                    clean_prompt = gen_kwargs["prompt"]
+                    if pipeline_type == 'qwenimage':
+                        magic_suffix = ", Ultra HD, 4K, cinematic composition"
+                        if magic_suffix.strip(", ") not in clean_prompt:
+                            gen_kwargs["prompt"] = clean_prompt + magic_suffix
+
                     result_holder.extend(shared.image_model(**gen_kwargs).images)
+                    gen_kwargs["prompt"] = clean_prompt  # restore
                 except Exception as e:
                     error_holder.append(e)
 
@@ -885,12 +879,18 @@ def generate(state, save_images=True):
             if error_holder:
                 raise error_holder[0]
 
+            # Save this batch's images with the actual prompt and seed used
+            if save_images:
+                batch_seed = seed + batch_idx
+                original_prompt = state['image_prompt']
+                state['image_prompt'] = gen_kwargs["prompt"]
+                save_generated_images(result_holder, state, batch_seed)
+                state['image_prompt'] = original_prompt
+
             all_images.extend(result_holder)
             yield all_images, progress_bar_html((batch_idx + 1) / batch_count, f"Batch {batch_idx + 1}/{batch_count} complete")
 
         t1 = time.time()
-        if save_images:
-            save_generated_images(all_images, state, seed)
 
         total_images = batch_count * int(state['image_batch_size'])
         logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})')

From b451bac082fadf00eca81e1e569d59a4df3ec8ac Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 16:33:46 -0800
Subject: [PATCH 28/61] Image: Improve a log message

---
 modules/ui_image_generation.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index a377f329..2cad5dc4 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -725,7 +725,8 @@ def generate_prompt_variation(state):
         variation = variation[1:-1]
 
     if variation:
-        logger.info(f"Prompt variation: {variation}")
+        logger.info("Prompt variation:")
+        print(variation)
         return variation
 
     return prompt

From c7ad28a4cd076a025addc0656e42a74d91555dd8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 17:20:50 -0800
Subject: [PATCH 29/61] Image: Add the LLM-generated prompt to the API result

---
 extensions/openai/images.py    | 10 ++++++++--
 modules/ui_image_generation.py |  8 +++++++-
 2 files changed, 15 insertions(+), 3 deletions(-)

diff --git a/extensions/openai/images.py b/extensions/openai/images.py
index 0bb91a1e..e60470c3 100644
--- a/extensions/openai/images.py
+++ b/extensions/openai/images.py
@@ -48,10 +48,16 @@ def generations(request):
     resp = {'created': int(time.time()), 'data': []}
     for img in images:
         b64 = _image_to_base64(img)
+        image_obj = {
+            'revised_prompt': img.info.get('revised_prompt', request.prompt)
+        }
+
         if request.response_format == 'b64_json':
-            resp['data'].append({'b64_json': b64})
+            image_obj['b64_json'] = b64
         else:
-            resp['data'].append({'url': f'data:image/png;base64,{b64}'})
+            image_obj['url'] = f'data:image/png;base64,{b64}'
+
+        resp['data'].append(image_obj)
 
     return resp
 
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 2cad5dc4..424589b6 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -856,7 +856,13 @@ def generate(state, save_images=True):
                         if magic_suffix.strip(", ") not in clean_prompt:
                             gen_kwargs["prompt"] = clean_prompt + magic_suffix
 
-                    result_holder.extend(shared.image_model(**gen_kwargs).images)
+                    batch_results = shared.image_model(**gen_kwargs).images
+
+                    # Store the modified prompt in the metadata
+                    for img in batch_results:
+                        img.info["revised_prompt"] = clean_prompt
+
+                    result_holder.extend(batch_results)
                     gen_kwargs["prompt"] = clean_prompt  # restore
                 except Exception as e:
                     error_holder.append(e)

From 3ef428efaa0e447d8d553e9387990b890aac5c6b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 17:34:17 -0800
Subject: [PATCH 30/61] Image: Remove llm_variations from the API

---
 extensions/openai/images.py | 3 +--
 extensions/openai/typing.py | 8 +-------
 2 files changed, 2 insertions(+), 9 deletions(-)

diff --git a/extensions/openai/images.py b/extensions/openai/images.py
index e60470c3..f46d549d 100644
--- a/extensions/openai/images.py
+++ b/extensions/openai/images.py
@@ -35,8 +35,7 @@ def generations(request):
         'image_batch_size': request.batch_size,
         'image_batch_count': request.batch_count,
         'image_cfg_scale': request.cfg_scale,
-        'image_llm_variations': request.llm_variations,
-        'image_llm_variations_prompt': request.llm_variations_prompt or shared.settings.get('image_llm_variations_prompt', ''),
+        'image_llm_variations': False,
     })
 
     # Exhaust generator, keep final result
diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py
index 31a5dc6d..5ac9f6ef 100644
--- a/extensions/openai/typing.py
+++ b/extensions/openai/typing.py
@@ -264,7 +264,7 @@ class LoadLorasRequest(BaseModel):
     lora_names: List[str]
 
 
-class ImageGenerationRequestParams(BaseModel):
+class ImageGenerationRequest(BaseModel):
     """Image-specific parameters for generation."""
     prompt: str
     negative_prompt: str = ""
@@ -275,8 +275,6 @@ class ImageGenerationRequestParams(BaseModel):
     batch_size: int | None = Field(default=None, ge=1, description="Parallel batch size (VRAM heavy)")
     n: int = Field(default=1, ge=1, description="Alias for batch_size (OpenAI compatibility)")
     batch_count: int = Field(default=1, ge=1, description="Sequential batch count")
-    llm_variations: bool = False
-    llm_variations_prompt: str | None = None
 
     # OpenAI compatibility (unused)
     model: str | None = None
@@ -297,10 +295,6 @@ class ImageGenerationRequestParams(BaseModel):
             return 1024, 1024
 
 
-class ImageGenerationRequest(GenerationOptions, ImageGenerationRequestParams):
-    pass
-
-
 class ImageGenerationResponse(BaseModel):
     created: int = int(time.time())
     data: List[dict]

From 56f2a9512fbbc178d3ea5e5393c1dcc45219fed3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 17:34:27 -0800
Subject: [PATCH 31/61] Revert "Image: Add the LLM-generated prompt to the API
 result"

This reverts commit c7ad28a4cd076a025addc0656e42a74d91555dd8.
---
 extensions/openai/images.py    | 10 ++--------
 modules/ui_image_generation.py |  8 +-------
 2 files changed, 3 insertions(+), 15 deletions(-)

diff --git a/extensions/openai/images.py b/extensions/openai/images.py
index f46d549d..ef3f4169 100644
--- a/extensions/openai/images.py
+++ b/extensions/openai/images.py
@@ -47,16 +47,10 @@ def generations(request):
     resp = {'created': int(time.time()), 'data': []}
     for img in images:
         b64 = _image_to_base64(img)
-        image_obj = {
-            'revised_prompt': img.info.get('revised_prompt', request.prompt)
-        }
-
         if request.response_format == 'b64_json':
-            image_obj['b64_json'] = b64
+            resp['data'].append({'b64_json': b64})
         else:
-            image_obj['url'] = f'data:image/png;base64,{b64}'
-
-        resp['data'].append(image_obj)
+            resp['data'].append({'url': f'data:image/png;base64,{b64}'})
 
     return resp
 
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 424589b6..2cad5dc4 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -856,13 +856,7 @@ def generate(state, save_images=True):
                         if magic_suffix.strip(", ") not in clean_prompt:
                             gen_kwargs["prompt"] = clean_prompt + magic_suffix
 
-                    batch_results = shared.image_model(**gen_kwargs).images
-
-                    # Store the modified prompt in the metadata
-                    for img in batch_results:
-                        img.info["revised_prompt"] = clean_prompt
-
-                    result_holder.extend(batch_results)
+                    result_holder.extend(shared.image_model(**gen_kwargs).images)
                     gen_kwargs["prompt"] = clean_prompt  # restore
                 except Exception as e:
                     error_holder.append(e)

From 15c6e43597203a0468501621f0e9ea443d22e8fa Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 17:41:09 -0800
Subject: [PATCH 32/61] Image: Add a revised_prompt field to API results for
 OpenAI compatibility

---
 extensions/openai/images.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/extensions/openai/images.py b/extensions/openai/images.py
index ef3f4169..1337771a 100644
--- a/extensions/openai/images.py
+++ b/extensions/openai/images.py
@@ -47,10 +47,15 @@ def generations(request):
     resp = {'created': int(time.time()), 'data': []}
     for img in images:
         b64 = _image_to_base64(img)
+
+        image_obj = {'revised_prompt': request.prompt}
+
         if request.response_format == 'b64_json':
-            resp['data'].append({'b64_json': b64})
+            image_obj['b64_json'] = b64
         else:
-            resp['data'].append({'url': f'data:image/png;base64,{b64}'})
+            image_obj['url'] = f'data:image/png;base64,{b64}'
+
+        resp['data'].append(image_obj)
 
     return resp
 

From b4f06a50b024f5c279d031f0d188e81249b1f98d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 19:11:31 -0800
Subject: [PATCH 33/61] fix: Pass bos_token and eos_token from metadata to
 jinja2

Fixes loading Seed-Instruct-36B
---
 modules/chat.py            |  6 ++++--
 modules/models_settings.py | 10 +++++++---
 modules/shared.py          |  2 ++
 3 files changed, 13 insertions(+), 5 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index acfc2f66..d1474cfe 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -112,7 +112,9 @@ def generate_chat_prompt(user_input, state, **kwargs):
         add_generation_prompt=False,
         enable_thinking=state['enable_thinking'],
         reasoning_effort=state['reasoning_effort'],
-        thinking_budget=-1 if state.get('enable_thinking', True) else 0
+        thinking_budget=-1 if state.get('enable_thinking', True) else 0,
+        bos_token=shared.bos_token,
+        eos_token=shared.eos_token,
     )
 
     chat_renderer = partial(
@@ -475,7 +477,7 @@ def get_stopping_strings(state):
 
     if state['mode'] in ['instruct', 'chat-instruct']:
         template = jinja_env.from_string(state['instruction_template_str'])
-        renderer = partial(template.render, add_generation_prompt=False)
+        renderer = partial(template.render, add_generation_prompt=False, bos_token=shared.bos_token, eos_token=shared.eos_token)
         renderers.append(renderer)
 
     if state['mode'] in ['chat']:
diff --git a/modules/models_settings.py b/modules/models_settings.py
index 6dc000b4..d333e269 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -89,8 +89,9 @@ def get_model_metadata(model):
             else:
                 bos_token = ""
 
-            template = template.replace('eos_token', "'{}'".format(eos_token))
-            template = template.replace('bos_token', "'{}'".format(bos_token))
+
+            shared.bos_token = bos_token
+            shared.eos_token = eos_token
 
             template = re.sub(r"\{\{-?\s*raise_exception\(.*?\)\s*-?\}\}", "", template, flags=re.DOTALL)
             template = re.sub(r'raise_exception\([^)]*\)', "''", template)
@@ -160,13 +161,16 @@ def get_model_metadata(model):
 
         # 4. If a template was found from any source, process it
         if template:
+            shared.bos_token = '<s>'
+            shared.eos_token = '</s>'
+
             for k in ['eos_token', 'bos_token']:
                 if k in metadata:
                     value = metadata[k]
                     if isinstance(value, dict):
                         value = value['content']
 
-                    template = template.replace(k, "'{}'".format(value))
+                    setattr(shared, k, value)
 
             template = re.sub(r"\{\{-?\s*raise_exception\(.*?\)\s*-?\}\}", "", template, flags=re.DOTALL)
             template = re.sub(r'raise_exception\([^)]*\)', "''", template)
diff --git a/modules/shared.py b/modules/shared.py
index 2f39e495..7b572dec 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -19,6 +19,8 @@ is_seq2seq = False
 is_multimodal = False
 model_dirty_from_training = False
 lora_names = []
+bos_token = '<s>'
+eos_token = '</s>'
 
 # Image model variables
 image_model = None

From 8eac99599ad9a645f608d3814c63a0297e877de8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 4 Dec 2025 19:58:06 -0800
Subject: [PATCH 34/61] Image: Better LLM variation default prompt

---
 modules/shared.py              | 2 +-
 modules/ui_image_generation.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 7b572dec..f6e86bdf 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -322,7 +322,7 @@ settings = {
     'image_batch_size': 1,
     'image_batch_count': 1,
     'image_llm_variations': False,
-    'image_llm_variations_prompt': 'Your task is to create a creative variation of the image generation prompt above. Keep the main subject but feel free to add an interesting setting, scenario, pose, atmosphere, or stylistic twist. Be specific and vivid. Respond with only the new prompt, nothing else.',
+    'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Your reply should contain the new prompt and nothing else.',
     'image_model_menu': 'None',
     'image_dtype': 'bfloat16',
     'image_attn_backend': 'sdpa',
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 2cad5dc4..139fd891 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -694,7 +694,7 @@ def generate_prompt_variation(state):
     # Get the custom variation prompt or use default
     variation_instruction = state.get('image_llm_variations_prompt', '')
     if not variation_instruction:
-        variation_instruction = 'Your task is to create a creative variation of the image generation prompt above. Keep the main subject but feel free to add an interesting setting, scenario, pose, atmosphere, or stylistic twist. Be specific and vivid. Respond with only the new prompt, nothing else.'
+        variation_instruction = 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Your reply should contain the new prompt and nothing else.'
 
     augmented_message = f"{prompt}\n\n=====\n\n{variation_instruction}"
 

From afa29b9554c2f55213799af1d8e79735f2129eee Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Dec 2025 05:53:22 -0800
Subject: [PATCH 35/61] Image: Several fixes

---
 README.md                      |  3 +--
 extensions/openai/images.py    |  3 +++
 modules/image_models.py        | 24 ++++++++++++-------
 modules/ui_image_generation.py | 42 +++++++++++++---------------------
 4 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/README.md b/README.md
index 174fc2ac..ef4b2160 100644
--- a/README.md
+++ b/README.md
@@ -28,8 +28,7 @@ A Gradio web UI for Large Language Models.
 - 100% offline and private, with zero telemetry, external resources, or remote update requests.
 - **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
 - **Vision (multimodal models)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
-Image generation: A dedicated tab for diffusers models like Z-Image-Turbo and Qwen-Image. Features 4-bit/8-bit quantization and a persistent gallery with metadata (tutorial).
-- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo** and **Qwen-Image**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)).
+- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)).
 - **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
 - Aesthetic UI with dark and light themes.
 - Syntax highlighting for code blocks and LaTeX rendering for mathematical expressions.
diff --git a/extensions/openai/images.py b/extensions/openai/images.py
index 1337771a..f7be3d22 100644
--- a/extensions/openai/images.py
+++ b/extensions/openai/images.py
@@ -43,6 +43,9 @@ def generations(request):
     for images, _ in generate(state, save_images=False):
         pass
 
+    if not images:
+        raise ServiceUnavailableError("Image generation failed or produced no images.")
+
     # Build response
     resp = {'created': int(time.time()), 'data': []}
     for img in images:
diff --git a/modules/image_models.py b/modules/image_models.py
index 8363533b..28b2bb4f 100644
--- a/modules/image_models.py
+++ b/modules/image_models.py
@@ -141,16 +141,24 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
         if not cpu_offload:
             pipe.to(get_device())
 
-        # Set attention backend (if supported by the pipeline)
-        if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'set_attention_backend'):
-            if attn_backend == 'flash_attention_2':
-                pipe.transformer.set_attention_backend("flash")
-            # sdpa is the default, no action needed
+        modules = ["transformer", "unet"]
 
+        # Set attention backend
+        if attn_backend == 'flash_attention_2':
+            for name in modules:
+                mod = getattr(pipe, name, None)
+                if hasattr(mod, "set_attention_backend"):
+                    mod.set_attention_backend("flash")
+                    break
+
+        # Compile model
         if compile_model:
-            if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'compile'):
-                logger.info("Compiling model (first run will be slow)...")
-                pipe.transformer.compile()
+            for name in modules:
+                mod = getattr(pipe, name, None)
+                if hasattr(mod, "compile"):
+                    logger.info("Compiling model (first run will be slow)...")
+                    mod.compile()
+                    break
 
         if cpu_offload:
             pipe.enable_model_cpu_offload()
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 139fd891..0a1b6891 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -43,10 +43,6 @@ METADATA_SETTINGS_KEYS = [
     'image_cfg_scale',
 ]
 
-# Cache for all image paths
-_image_cache = []
-_cache_timestamp = 0
-
 
 def round_to_step(value, step=STEP):
     return round(value / step) * step
@@ -134,6 +130,9 @@ def build_generation_metadata(state, actual_seed):
 
 def save_generated_images(images, state, actual_seed):
     """Save images with generation metadata embedded in PNG."""
+    if shared.args.multi_user:
+        return
+
     date_str = datetime.now().strftime("%Y-%m-%d")
     folder_path = os.path.join("user_data", "image_outputs", date_str)
     os.makedirs(folder_path, exist_ok=True)
@@ -157,9 +156,14 @@ def save_generated_images(images, state, actual_seed):
 def read_image_metadata(image_path):
     """Read generation metadata from PNG file."""
     try:
-        with open_image_safely(image_path) as img:
+        img = open_image_safely(image_path)
+        if img is None:
+            return None
+        try:
             if hasattr(img, 'text') and 'image_gen_settings' in img.text:
                 return json.loads(img.text['image_gen_settings'])
+        finally:
+            img.close()
     except Exception as e:
         logger.debug(f"Could not read metadata from {image_path}: {e}")
     return None
@@ -198,19 +202,12 @@ def format_metadata_for_display(metadata):
     return "\n\n".join(lines)
 
 
-def get_all_history_images(force_refresh=False):
-    """Get all history images sorted by modification time (newest first). Uses caching."""
-    global _image_cache, _cache_timestamp
-
+def get_all_history_images():
+    """Get all history images sorted by modification time (newest first)."""
     output_dir = os.path.join("user_data", "image_outputs")
     if not os.path.exists(output_dir):
         return []
 
-    # Check if we need to refresh cache
-    current_time = time.time()
-    if not force_refresh and _image_cache and (current_time - _cache_timestamp) < 2:
-        return _image_cache
-
     image_files = []
     for root, _, files in os.walk(output_dir):
         for file in files:
@@ -219,15 +216,12 @@ def get_all_history_images(force_refresh=False):
                 image_files.append((full_path, os.path.getmtime(full_path)))
 
     image_files.sort(key=lambda x: x[1], reverse=True)
-    _image_cache = [x[0] for x in image_files]
-    _cache_timestamp = current_time
-
-    return _image_cache
+    return [x[0] for x in image_files]
 
 
-def get_paginated_images(page=0, force_refresh=False):
+def get_paginated_images(page=0):
     """Get images for a specific page."""
-    all_images = get_all_history_images(force_refresh)
+    all_images = get_all_history_images()
     total_images = len(all_images)
     total_pages = max(1, (total_images + IMAGES_PER_PAGE - 1) // IMAGES_PER_PAGE)
 
@@ -250,7 +244,7 @@ def get_initial_page_info():
 
 def refresh_gallery(current_page=0):
     """Refresh gallery with current page."""
-    images, page, total_pages, total_images = get_paginated_images(current_page, force_refresh=True)
+    images, page, total_pages, total_images = get_paginated_images(current_page)
     page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)"
     return images, page, page_info
 
@@ -286,11 +280,7 @@ def on_gallery_select(evt: gr.SelectData, current_page):
     if evt.index is None:
         return "", "Select an image to view its settings"
 
-    if not _image_cache:
-        get_all_history_images()
-
-    # Get the current page's images to find the actual file path
-    all_images = _image_cache
+    all_images = get_all_history_images()
     total_images = len(all_images)
 
     # Calculate the actual index in the full list

From b63d57158d02be08b76d215479a9e6b9ccef2881 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Dec 2025 05:59:54 -0800
Subject: [PATCH 36/61] Image: Add TGW as a prefix to output images

---
 modules/ui_image_generation.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 0a1b6891..92704834 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -142,7 +142,7 @@ def save_generated_images(images, state, actual_seed):
 
     for idx, img in enumerate(images):
         timestamp = datetime.now().strftime("%H-%M-%S")
-        filename = f"{timestamp}_{actual_seed:010d}_{idx:03d}.png"
+        filename = f"TGW_{timestamp}_{actual_seed:010d}_{idx:03d}.png"
         filepath = os.path.join(folder_path, filename)
 
         # Create PNG metadata

From 0dd468245c41ccd649c725689418ffd4116ef1e8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Dec 2025 07:11:38 -0800
Subject: [PATCH 37/61] Image: Add back the gallery cache (for performance)

---
 modules/ui_image_generation.py | 31 ++++++++++++++++++++++++-------
 1 file changed, 24 insertions(+), 7 deletions(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 92704834..28d4c627 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -43,6 +43,10 @@ METADATA_SETTINGS_KEYS = [
     'image_cfg_scale',
 ]
 
+# Cache for all image paths
+_image_cache = []
+_cache_timestamp = 0
+
 
 def round_to_step(value, step=STEP):
     return round(value / step) * step
@@ -202,12 +206,19 @@ def format_metadata_for_display(metadata):
     return "\n\n".join(lines)
 
 
-def get_all_history_images():
-    """Get all history images sorted by modification time (newest first)."""
+def get_all_history_images(force_refresh=False):
+    """Get all history images sorted by modification time (newest first). Uses caching."""
+    global _image_cache, _cache_timestamp
+
     output_dir = os.path.join("user_data", "image_outputs")
     if not os.path.exists(output_dir):
         return []
 
+    # Check if we need to refresh cache
+    current_time = time.time()
+    if not force_refresh and _image_cache and (current_time - _cache_timestamp) < 2:
+        return _image_cache
+
     image_files = []
     for root, _, files in os.walk(output_dir):
         for file in files:
@@ -216,12 +227,15 @@ def get_all_history_images():
                 image_files.append((full_path, os.path.getmtime(full_path)))
 
     image_files.sort(key=lambda x: x[1], reverse=True)
-    return [x[0] for x in image_files]
+    _image_cache = [x[0] for x in image_files]
+    _cache_timestamp = current_time
+
+    return _image_cache
 
 
-def get_paginated_images(page=0):
+def get_paginated_images(page=0, force_refresh=False):
     """Get images for a specific page."""
-    all_images = get_all_history_images()
+    all_images = get_all_history_images(force_refresh)
     total_images = len(all_images)
     total_pages = max(1, (total_images + IMAGES_PER_PAGE - 1) // IMAGES_PER_PAGE)
 
@@ -244,7 +258,7 @@ def get_initial_page_info():
 
 def refresh_gallery(current_page=0):
     """Refresh gallery with current page."""
-    images, page, total_pages, total_images = get_paginated_images(current_page)
+    images, page, total_pages, total_images = get_paginated_images(current_page, force_refresh=True)
     page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)"
     return images, page, page_info
 
@@ -280,7 +294,10 @@ def on_gallery_select(evt: gr.SelectData, current_page):
     if evt.index is None:
         return "", "Select an image to view its settings"
 
-    all_images = get_all_history_images()
+    if not _image_cache:
+        get_all_history_images()
+
+    all_images = _image_cache
     total_images = len(all_images)
 
     # Calculate the actual index in the full list

From c11c14590aa5d34985fd12a0b1100d4e51343ae9 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Dec 2025 08:08:11 -0800
Subject: [PATCH 38/61] Image: Better LLM variation default prompt

---
 modules/shared.py              | 2 +-
 modules/ui_image_generation.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index f6e86bdf..3f3742de 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -322,7 +322,7 @@ settings = {
     'image_batch_size': 1,
     'image_batch_count': 1,
     'image_llm_variations': False,
-    'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Your reply should contain the new prompt and nothing else.',
+    'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.',
     'image_model_menu': 'None',
     'image_dtype': 'bfloat16',
     'image_attn_backend': 'sdpa',
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 28d4c627..c55d2438 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -701,7 +701,7 @@ def generate_prompt_variation(state):
     # Get the custom variation prompt or use default
     variation_instruction = state.get('image_llm_variations_prompt', '')
     if not variation_instruction:
-        variation_instruction = 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Your reply should contain the new prompt and nothing else.'
+        variation_instruction = 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.'
 
     augmented_message = f"{prompt}\n\n=====\n\n{variation_instruction}"
 

From 5848c7884d641ec3c76799585e6f44f379d8b5da Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Dec 2025 10:24:51 -0800
Subject: [PATCH 39/61] Increase the height of the image output gallery

---
 css/main.css | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/css/main.css b/css/main.css
index e7586960..dad53c30 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1692,8 +1692,8 @@ button#swap-height-width {
 }
 
 #image-output-gallery, #image-output-gallery > :nth-child(2) {
-    height: calc(100vh - 83px);
-    max-height: calc(100vh - 83px);
+    height: calc(100vh - 66px);
+    max-height: calc(100vh - 66px);
 }
 
 #image-history-gallery, #image-history-gallery > :nth-child(2) {

From eba8a594665cd9407f2657fb26a1c4bbac60baf0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Dec 2025 12:10:41 -0800
Subject: [PATCH 40/61] docs: Improve the image generation tutorial

---
 docs/Image Generation Tutorial.md | 67 ++++++++++++++++++++++++++-----
 1 file changed, 56 insertions(+), 11 deletions(-)

diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md
index e7022c34..0c9eb848 100644
--- a/docs/Image Generation Tutorial.md	
+++ b/docs/Image Generation Tutorial.md	
@@ -1,20 +1,65 @@
 # Image Generation Tutorial
 
-This feature allows you to generate images using high-speed models like Z-Image-Turbo directly within the web UI.
+This feature allows you to generate images using `diffusers` models like [Tongyi-MAI/Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) directly within the web UI.
 
-## How to use
+## Installation
 
-1. Click on the **Image AI** tab at the top of the interface.
-2. Select the **Model** sub-tab.
-3. Copy and paste the following link into the **Download model** box:
+1. Clone the repository with
 
 ```
-https://huggingface.co/Tongyi-MAI/Z-Image-Turbo
+git clone https://github.com/oobabooga/text-generation-webui
 ```
 
-4. Click the **Download** button and wait for the confirmation message.
-5. In the **Model** dropdown menu, select the model you just downloaded (if you don't see it, click the 🔄 refresh button).
-6. Click **Load**.
-7. Go to the **Generate** sub-tab, type a prompt, and click **GENERATE**.
+or download it from [here](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) and unzip it.
+
+2. Use the one-click installer.
+
+- Windows: Double click on `start_windows.bat`
+- Linux: Run `./start_linux.sh`
+- macOS: Run `./start_macos.sh`
+
+Note: Image generation does not work with the portable builds in `.zip` format in the [Releases page](https://github.com/oobabooga/text-generation-webui/releases). You need the "full" version of the web UI.
+
+## Downloading a model
+
+1. Once installation ends, browse to `http://127.0.0.1:7860/`.
+2. Click on "Image AI" on the left.
+3. Click on "Model" at the top.
+4. In the "Download model" field, paste `https://huggingface.co/Tongyi-MAI/Z-Image-Turbo` and click "Download".
+5. Wait for the download to finish (it's 31 GB).
+
+## Loading the model
+
+Select the quantization option in the "Quantization" menu and click "Load".
+
+The memory usage for `Z-Image-Turbo` for each option is:
+
+If you have less GPU memory than _, check the "CPU Offload" option.
+
+Note: The next time you launch the web UI, the model will get automatically loaded with your last settings when you try to generate an image. You do not need to go to the Model tab and click "Load" each time.
+
+## Generating images:
+
+1. While still in the "Image AI" page, go to the "Generate" tab.
+2. Type your prompt and click on the Generate button.
+
+### LLM Prompt Variations
+
+To use this feature, you need to load an LLM in the main "Model" tab on the left.
+
+If you have no idea what to use, do this to get started:
+
+1. Download [Qwen3-4B-Q3_K_M.gguf](https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q3_K_M.gguf) to your `text-generation-webui/user_data/models` folder.
+2. Select the model in the dropdown menu in the "Model" page.
+3. Click Load.
+
+Then go back to the "Image AI" page and check "LLM Prompt Variations".
+
+After that, your prompts will be automatically updated by the LLM each time you generate an image. If you use sequential batch count value greater than 1, a new prompt will be created for each sequential batch.
+
+The improvement in creativity is striking:
+
+### Model-specific settings
+
+- For Z-Image-Turbo, make sure to keep CFG Scale at 0 and Steps at 9. Do not write a Negative Prompt as it will get ignored with this CFG Scale value.
 
-> **Note for Z-Image-Turbo:** For the best results with this specific model, keep the **CFG Scale** slider at **0**.

From 11937de517e6b661c3a112faa29852f474a3b9bc Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Dec 2025 12:13:24 -0800
Subject: [PATCH 41/61] Use flash attention for image generation by default

---
 modules/shared.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 3f3742de..0a27f33d 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -58,7 +58,7 @@ group = parser.add_argument_group('Image model')
 group.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).')
 group.add_argument('--image-model-dir', type=str, default='user_data/image_models', help='Path to directory with all the image models.')
 group.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.')
-group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2'], help='Attention backend for image model.')
+group.add_argument('--image-attn-backend', type=str, default=None, choices=['flash_attention_2', 'sdpa'], help='Attention backend for image model.')
 group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.')
 group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.')
 group.add_argument('--image-quant', type=str, default=None,
@@ -325,7 +325,7 @@ settings = {
     'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.',
     'image_model_menu': 'None',
     'image_dtype': 'bfloat16',
-    'image_attn_backend': 'sdpa',
+    'image_attn_backend': 'flash_attention_2',
     'image_cpu_offload': False,
     'image_compile': False,
     'image_quant': 'none',

From 6ca99910ba82a8b2212562ccaaf2e584aa369642 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Dec 2025 13:08:46 -0800
Subject: [PATCH 42/61] Image: Quantize the text encoder for lower VRAM

---
 modules/image_models.py | 35 ++++++++++++++++++++++-------------
 1 file changed, 22 insertions(+), 13 deletions(-)

diff --git a/modules/image_models.py b/modules/image_models.py
index 28b2bb4f..290aaf19 100644
--- a/modules/image_models.py
+++ b/modules/image_models.py
@@ -8,17 +8,14 @@ from modules.utils import resolve_model_path
 def get_quantization_config(quant_method):
     """
     Get the appropriate quantization config based on the selected method.
-
-    Args:
-        quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit',
-                      'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'
-
-    Returns:
-        PipelineQuantizationConfig or None
+    Applies quantization to both the transformer and the text_encoder.
     """
     import torch
-    from diffusers import BitsAndBytesConfig, TorchAoConfig
+    # Import BitsAndBytesConfig from BOTH libraries to be safe
+    from diffusers import BitsAndBytesConfig as DiffusersBnBConfig
+    from diffusers import TorchAoConfig
     from diffusers.quantizers import PipelineQuantizationConfig
+    from transformers import BitsAndBytesConfig as TransformersBnBConfig
 
     if quant_method == 'none' or not quant_method:
         return None
@@ -27,7 +24,10 @@ def get_quantization_config(quant_method):
     elif quant_method == 'bnb-8bit':
         return PipelineQuantizationConfig(
             quant_mapping={
-                "transformer": BitsAndBytesConfig(
+                "transformer": DiffusersBnBConfig(
+                    load_in_8bit=True
+                ),
+                "text_encoder": TransformersBnBConfig(
                     load_in_8bit=True
                 )
             }
@@ -37,7 +37,13 @@ def get_quantization_config(quant_method):
     elif quant_method == 'bnb-4bit':
         return PipelineQuantizationConfig(
             quant_mapping={
-                "transformer": BitsAndBytesConfig(
+                "transformer": DiffusersBnBConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_quant_type="nf4",
+                    bnb_4bit_compute_dtype=torch.bfloat16,
+                    bnb_4bit_use_double_quant=True
+                ),
+                "text_encoder": TransformersBnBConfig(
                     load_in_4bit=True,
                     bnb_4bit_quant_type="nf4",
                     bnb_4bit_compute_dtype=torch.bfloat16,
@@ -50,7 +56,8 @@ def get_quantization_config(quant_method):
     elif quant_method == 'torchao-int8wo':
         return PipelineQuantizationConfig(
             quant_mapping={
-                "transformer": TorchAoConfig("int8wo")
+                "transformer": TorchAoConfig("int8wo"),
+                "text_encoder": TorchAoConfig("int8wo")
             }
         )
 
@@ -58,7 +65,8 @@ def get_quantization_config(quant_method):
     elif quant_method == 'torchao-fp4':
         return PipelineQuantizationConfig(
             quant_mapping={
-                "transformer": TorchAoConfig("fp4_e2m1")
+                "transformer": TorchAoConfig("fp4_e2m1"),
+                "text_encoder": TorchAoConfig("fp4_e2m1")
             }
         )
 
@@ -66,7 +74,8 @@ def get_quantization_config(quant_method):
     elif quant_method == 'torchao-float8wo':
         return PipelineQuantizationConfig(
             quant_mapping={
-                "transformer": TorchAoConfig("float8wo")
+                "transformer": TorchAoConfig("float8wo"),
+                "text_encoder": TorchAoConfig("float8wo")
             }
         )
 

From e20b2d38ff38fbd6451c8ff53c9e12fc9a327a14 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Dec 2025 14:12:08 -0800
Subject: [PATCH 43/61] docs: Add VRAM measurements for Z-Image-Turbo

---
 docs/Image Generation Tutorial.md | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md
index 0c9eb848..a74a4ddd 100644
--- a/docs/Image Generation Tutorial.md	
+++ b/docs/Image Generation Tutorial.md	
@@ -34,7 +34,15 @@ Select the quantization option in the "Quantization" menu and click "Load".
 
 The memory usage for `Z-Image-Turbo` for each option is:
 
-If you have less GPU memory than _, check the "CPU Offload" option.
+| Quantization Method | VRAM Usage |
+| :--- | :--- |
+| **None (FP16/BF16)** | 25613 MiB |
+| **bnb-8bit** | 16301 MiB |
+| **bnb-8bit + CPU Offload** | 16235 MiB |
+| **bnb-4bit** | 11533 MiB |
+| **bnb-4bit + CPU Offload** | 7677 MiB |
+
+The `torchao` options support `torch.compile` for faster image generation, with `float8wo` specifically providing native hardware acceleration for RTX 40-series and newer GPUs.
 
 Note: The next time you launch the web UI, the model will get automatically loaded with your last settings when you try to generate an image. You do not need to go to the Model tab and click "Load" each time.
 

From 17b12567d82e7459438cd54f361805993ca4ca59 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 5 Dec 2025 14:15:15 -0800
Subject: [PATCH 44/61] docs: Small changes

---
 docs/Image Generation Tutorial.md | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md
index a74a4ddd..85f891d5 100644
--- a/docs/Image Generation Tutorial.md	
+++ b/docs/Image Generation Tutorial.md	
@@ -51,6 +51,10 @@ Note: The next time you launch the web UI, the model will get automatically load
 1. While still in the "Image AI" page, go to the "Generate" tab.
 2. Type your prompt and click on the Generate button.
 
+### Model-specific settings
+
+- For Z-Image-Turbo, make sure to keep CFG Scale at 0 and Steps at 9. Do not write a Negative Prompt as it will get ignored with this CFG Scale value.
+
 ### LLM Prompt Variations
 
 To use this feature, you need to load an LLM in the main "Model" tab on the left.
@@ -63,11 +67,6 @@ If you have no idea what to use, do this to get started:
 
 Then go back to the "Image AI" page and check "LLM Prompt Variations".
 
-After that, your prompts will be automatically updated by the LLM each time you generate an image. If you use sequential batch count value greater than 1, a new prompt will be created for each sequential batch.
+After that, your prompts will be automatically updated by the LLM each time you generate an image. If you use a "Sequential Count" value greater than 1, a new prompt will be created for each sequential batch.
 
 The improvement in creativity is striking:
-
-### Model-specific settings
-
-- For Z-Image-Turbo, make sure to keep CFG Scale at 0 and Steps at 9. Do not write a Negative Prompt as it will get ignored with this CFG Scale value.
-

From 1a9ed1fe98a2d51a860d3c4f747ba46c543d0bf3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Dec 2025 05:21:26 -0800
Subject: [PATCH 45/61] Fix the height of the image output gallery

---
 css/main.css | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/css/main.css b/css/main.css
index dad53c30..e7586960 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1692,8 +1692,8 @@ button#swap-height-width {
 }
 
 #image-output-gallery, #image-output-gallery > :nth-child(2) {
-    height: calc(100vh - 66px);
-    max-height: calc(100vh - 66px);
+    height: calc(100vh - 83px);
+    max-height: calc(100vh - 83px);
 }
 
 #image-history-gallery, #image-history-gallery > :nth-child(2) {

From 455dc06db08fc347f1bbd09c4ac74134954fc641 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Dec 2025 05:43:00 -0800
Subject: [PATCH 46/61] Serve the original PNG images in the UI instead of webp

---
 modules/ui_image_generation.py | 16 ++++++++++++----
 1 file changed, 12 insertions(+), 4 deletions(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index c55d2438..2fb93fd8 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -133,9 +133,9 @@ def build_generation_metadata(state, actual_seed):
 
 
 def save_generated_images(images, state, actual_seed):
-    """Save images with generation metadata embedded in PNG."""
+    """Save images with generation metadata embedded in PNG. Returns list of saved file paths."""
     if shared.args.multi_user:
-        return
+        return []
 
     date_str = datetime.now().strftime("%Y-%m-%d")
     folder_path = os.path.join("user_data", "image_outputs", date_str)
@@ -144,6 +144,7 @@ def save_generated_images(images, state, actual_seed):
     metadata = build_generation_metadata(state, actual_seed)
     metadata_json = json.dumps(metadata, ensure_ascii=False)
 
+    saved_paths = []
     for idx, img in enumerate(images):
         timestamp = datetime.now().strftime("%H-%M-%S")
         filename = f"TGW_{timestamp}_{actual_seed:010d}_{idx:03d}.png"
@@ -155,6 +156,9 @@ def save_generated_images(images, state, actual_seed):
 
         # Save with metadata
         img.save(filepath, pnginfo=png_info)
+        saved_paths.append(filepath)
+
+    return saved_paths
 
 
 def read_image_metadata(image_path):
@@ -892,10 +896,14 @@ def generate(state, save_images=True):
                 batch_seed = seed + batch_idx
                 original_prompt = state['image_prompt']
                 state['image_prompt'] = gen_kwargs["prompt"]
-                save_generated_images(result_holder, state, batch_seed)
+                saved_paths = save_generated_images(result_holder, state, batch_seed)
                 state['image_prompt'] = original_prompt
+                # Use file paths so gallery serves actual PNGs with metadata
+                all_images.extend(saved_paths)
+            else:
+                # Fallback to PIL objects if not saving
+                all_images.extend(result_holder)
 
-            all_images.extend(result_holder)
             yield all_images, progress_bar_html((batch_idx + 1) / batch_count, f"Batch {batch_idx + 1}/{batch_count} complete")
 
         t1 = time.time()

From 6411142111db1736a4fffac72fd5ebc63ea5de11 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Dec 2025 06:36:16 -0800
Subject: [PATCH 47/61] docs: Small changes

---
 docs/Image Generation Tutorial.md | 15 ++++++++++-----
 1 file changed, 10 insertions(+), 5 deletions(-)

diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md
index 85f891d5..875d1d44 100644
--- a/docs/Image Generation Tutorial.md	
+++ b/docs/Image Generation Tutorial.md	
@@ -2,6 +2,9 @@
 
 This feature allows you to generate images using `diffusers` models like [Tongyi-MAI/Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) directly within the web UI.
 
+<img alt="print" src="https://github.com/user-attachments/assets/5108de50-658b-4e93-b2ae-4656d076bc9d" />
+
+
 ## Installation
 
 1. Clone the repository with
@@ -36,11 +39,11 @@ The memory usage for `Z-Image-Turbo` for each option is:
 
 | Quantization Method | VRAM Usage |
 | :--- | :--- |
-| **None (FP16/BF16)** | 25613 MiB |
-| **bnb-8bit** | 16301 MiB |
-| **bnb-8bit + CPU Offload** | 16235 MiB |
-| **bnb-4bit** | 11533 MiB |
-| **bnb-4bit + CPU Offload** | 7677 MiB |
+| None (FP16/BF16) | 25613 MiB |
+| bnb-8bit | 16301 MiB |
+| bnb-8bit + CPU Offload | 16235 MiB |
+| bnb-4bit | 11533 MiB |
+| bnb-4bit + CPU Offload | 7677 MiB |
 
 The `torchao` options support `torch.compile` for faster image generation, with `float8wo` specifically providing native hardware acceleration for RTX 40-series and newer GPUs.
 
@@ -70,3 +73,5 @@ Then go back to the "Image AI" page and check "LLM Prompt Variations".
 After that, your prompts will be automatically updated by the LLM each time you generate an image. If you use a "Sequential Count" value greater than 1, a new prompt will be created for each sequential batch.
 
 The improvement in creativity is striking:
+
+<img  alt="comparison_collage" src="https://github.com/user-attachments/assets/67884832-2800-41cb-a146-e88e25af89c4" />

From 0100ad1bd7c0fbb068585902db78d0edce3d497d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Dec 2025 06:39:30 -0800
Subject: [PATCH 48/61] Add user_data/image_outputs to the Gradio allowed paths

---
 server.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/server.py b/server.py
index 0c5d14ce..d8fb2c22 100644
--- a/server.py
+++ b/server.py
@@ -101,6 +101,11 @@ def create_interface():
             auth.extend(x.strip() for line in file for x in line.split(',') if x.strip())
     auth = [tuple(cred.split(':')) for cred in auth]
 
+    # Allowed paths
+    allowed_paths = ["css", "js", "extensions", "user_data/cache"]
+    if not shared.args.multi_user:
+        allowed_paths.append("user_data/image_outputs")
+
     # Import the extensions and execute their setup() functions
     if shared.args.extensions is not None and len(shared.args.extensions) > 0:
         extensions_module.load_extensions()
@@ -237,7 +242,7 @@ def create_interface():
             ssl_keyfile=shared.args.ssl_keyfile,
             ssl_certfile=shared.args.ssl_certfile,
             root_path=shared.args.subpath,
-            allowed_paths=["css", "js", "extensions", "user_data/cache"]
+            allowed_paths=allowed_paths,
         )
 
 

From 02518a96a9c9e75947a4d41e22a758fe9d83310c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Dec 2025 06:55:06 -0800
Subject: [PATCH 49/61] Lint

---
 modules/models_settings.py | 1 -
 1 file changed, 1 deletion(-)

diff --git a/modules/models_settings.py b/modules/models_settings.py
index d333e269..e9f19a06 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -89,7 +89,6 @@ def get_model_metadata(model):
             else:
                 bos_token = ""
 
-
             shared.bos_token = bos_token
             shared.eos_token = eos_token
 

From 1c36559e2b1b453a526b682b7db89486f9c3753c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Dec 2025 07:05:00 -0800
Subject: [PATCH 50/61] Add a News section to the README

---
 README.md | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/README.md b/README.md
index ef4b2160..8065ca71 100644
--- a/README.md
+++ b/README.md
@@ -21,6 +21,10 @@ A Gradio web UI for Large Language Models.
 |:---:|:---:|
 |![Image1](https://github.com/oobabooga/screenshots/raw/main/DEFAULT-3.5.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/PARAMETERS-3.5.png) |
 
+## 🔥 News
+
+- The project now supports **image generation**! Including Z-Image-Turbo, 4bit/8bit quantization, `torch.compile`, and LLM-generated prompt variations ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)).
+
 ## Features
 
 - Supports multiple local text generation backends, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)).

From 194e4c285fe0358a5ed15109cda1aaad204ee023 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Dec 2025 08:14:48 -0800
Subject: [PATCH 51/61] Update llama.cpp

---
 requirements/full/requirements.txt                     | 4 ++--
 requirements/full/requirements_amd.txt                 | 4 ++--
 requirements/full/requirements_amd_noavx2.txt          | 4 ++--
 requirements/full/requirements_apple_intel.txt         | 4 ++--
 requirements/full/requirements_apple_silicon.txt       | 4 ++--
 requirements/full/requirements_cpu_only.txt            | 4 ++--
 requirements/full/requirements_cpu_only_noavx2.txt     | 4 ++--
 requirements/full/requirements_noavx2.txt              | 4 ++--
 requirements/portable/requirements.txt                 | 4 ++--
 requirements/portable/requirements_amd.txt             | 4 ++--
 requirements/portable/requirements_amd_noavx2.txt      | 4 ++--
 requirements/portable/requirements_apple_intel.txt     | 4 ++--
 requirements/portable/requirements_apple_silicon.txt   | 4 ++--
 requirements/portable/requirements_cpu_only.txt        | 4 ++--
 requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++--
 requirements/portable/requirements_noavx2.txt          | 4 ++--
 requirements/portable/requirements_vulkan.txt          | 4 ++--
 requirements/portable/requirements_vulkan_noavx2.txt   | 4 ++--
 18 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index c1f87988..2f1c0f09 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -44,8 +44,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index 512231e0..8d9f8c43 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -42,7 +42,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 1ccc507c..f69c2466 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -42,7 +42,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 4d8aa771..fd2dad7f 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 5290aa71..d71dfbe5 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index d9c76a31..2d174713 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 1a796c21..2e047b42 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 8105abaa..4baca6c9 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -44,8 +44,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 0b944d48..21cef622 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index 9b43c901..bce5a9e0 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd_noavx2.txt b/requirements/portable/requirements_amd_noavx2.txt
index 825e6dec..18257eea 100644
--- a/requirements/portable/requirements_amd_noavx2.txt
+++ b/requirements/portable/requirements_amd_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index 864b6775..e97ff558 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 03e090cf..d113431c 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 7dd851e8..1553096c 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index 63949d9f..ce259dcc 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index 34652264..887a4542 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 28849ff7..d9eff1ac 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Vulkan wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 365a798e..4a5ee8a3 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

From c026dbaf647a1a6ead61f88848db4bb5f292b47a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Dec 2025 08:23:21 -0800
Subject: [PATCH 52/61] Fix API requests always returning the same 'created'
 time

---
 extensions/openai/typing.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py
index 5ac9f6ef..5fea2324 100644
--- a/extensions/openai/typing.py
+++ b/extensions/openai/typing.py
@@ -130,7 +130,7 @@ class CompletionRequest(GenerationOptions, CompletionRequestParams):
 class CompletionResponse(BaseModel):
     id: str
     choices: List[dict]
-    created: int = int(time.time())
+    created: int = Field(default_factory=lambda: int(time.time()))
     model: str
     object: str = "text_completion"
     usage: dict
@@ -178,7 +178,7 @@ class ChatCompletionRequest(GenerationOptions, ChatCompletionRequestParams):
 class ChatCompletionResponse(BaseModel):
     id: str
     choices: List[dict]
-    created: int = int(time.time())
+    created: int = Field(default_factory=lambda: int(time.time()))
     model: str
     object: str = "chat.completion"
     usage: dict
@@ -296,7 +296,7 @@ class ImageGenerationRequest(BaseModel):
 
 
 class ImageGenerationResponse(BaseModel):
-    created: int = int(time.time())
+    created: int = Field(default_factory=lambda: int(time.time()))
     data: List[dict]
 
 

From f93cc4b5c37a929aa1a91c2648c571908d9a47cf Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Dec 2025 08:33:06 -0800
Subject: [PATCH 53/61] Add an API example to the image generation tutorial

---
 docs/Image Generation Tutorial.md | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md
index 875d1d44..0d562041 100644
--- a/docs/Image Generation Tutorial.md	
+++ b/docs/Image Generation Tutorial.md	
@@ -75,3 +75,24 @@ After that, your prompts will be automatically updated by the LLM each time you
 The improvement in creativity is striking:
 
 <img  alt="comparison_collage" src="https://github.com/user-attachments/assets/67884832-2800-41cb-a146-e88e25af89c4" />
+
+## Generating images over API
+
+It is possible to generate images usign the project's API. Just make sure to start the server with `--api`, either by
+
+1. Passing the `--api` flag to your `start` script, like `./start_linux.sh --api`, or
+2. Writing `--api` to your `user_data/CMD_FLAGS.txt` file and relaunching the web UI.
+
+Here is an API call example:
+
+```
+curl http://127.0.0.1:5000/v1/images/generations \
+  -H "Content-Type: application/json" \
+  -d '{
+    "prompt": "an orange tree",
+    "steps": 9,
+    "cfg_scale": 0,
+    "batch_size": 1,
+    "batch_count": 1
+  }'
+```

From 160a25165af5f5a0b759269657a241fe75f684d5 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Dec 2025 08:41:12 -0800
Subject: [PATCH 54/61] docs: Small change

---
 docs/Image Generation Tutorial.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md
index 0d562041..b285bbc1 100644
--- a/docs/Image Generation Tutorial.md	
+++ b/docs/Image Generation Tutorial.md	
@@ -72,7 +72,7 @@ Then go back to the "Image AI" page and check "LLM Prompt Variations".
 
 After that, your prompts will be automatically updated by the LLM each time you generate an image. If you use a "Sequential Count" value greater than 1, a new prompt will be created for each sequential batch.
 
-The improvement in creativity is striking:
+The improvement in creativity is striking (prompt: `Photo of a beautiful woman at night under moonlight`):
 
 <img  alt="comparison_collage" src="https://github.com/user-attachments/assets/67884832-2800-41cb-a146-e88e25af89c4" />
 

From 1762312fb48e74cca45c916b92d616ed14d6ee9e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 6 Dec 2025 20:10:32 -0800
Subject: [PATCH 55/61] Use random instead of np.random for image seeds (makes
 it work on Windows)

---
 modules/ui_image_generation.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 2fb93fd8..6b39c5b5 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -1,12 +1,12 @@
 import json
 import os
+import random
 import time
 import traceback
 from datetime import datetime
 from pathlib import Path
 
 import gradio as gr
-import numpy as np
 from PIL.PngImagePlugin import PngInfo
 
 from modules import shared, ui, utils
@@ -794,7 +794,7 @@ def generate(state, save_images=True):
 
         seed = state['image_seed']
         if seed == -1:
-            seed = np.random.randint(0, 2**32 - 1)
+            seed = random.randint(0, 2**32 - 1)
 
         device = get_device()
         if device is None:

From 85f2df92e9e736708cab5ba8ea9b7484036d7ded Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 7 Dec 2025 06:56:58 -0800
Subject: [PATCH 56/61] Use flash_attention_2 by default for Transformers
 models

---
 modules/shared.py        | 2 +-
 modules/ui_model_menu.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 0a27f33d..b2aeadc6 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -112,7 +112,7 @@ group.add_argument('--no-cache', action='store_true', help='Set use_cache to Fal
 group.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.')
 group.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')
 group.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.')
-group.add_argument('--attn-implementation', type=str, default='sdpa', metavar="IMPLEMENTATION", help='Attention implementation. Valid options: sdpa, eager, flash_attention_2.')
+group.add_argument('--attn-implementation', type=str, default='flash_attention_2', metavar="IMPLEMENTATION", help='Attention implementation. Valid options: flash_attention_2, sdpa, eager.')
 
 # bitsandbytes 4-bit
 group = parser.add_argument_group('bitsandbytes 4-bit')
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 86adc229..aab0fcaf 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -44,7 +44,7 @@ def create_ui():
                             shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
                             shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.')
                             shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
-                            shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.')
+                            shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['flash_attention_2', 'sdpa', 'eager'], value=shared.args.attn_implementation, info='Attention implementation.')
                             shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
                             shared.gradio['tp_backend'] = gr.Dropdown(label="tp-backend", choices=['native', 'nccl'], value=shared.args.tp_backend, info='The backend for tensor parallelism.')
 

From 17bd8d10f080fc744080bc104dff0b2c47371d80 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 7 Dec 2025 09:37:18 -0800
Subject: [PATCH 57/61] Update exllamav3 to 0.0.17

---
 requirements/full/requirements.txt        | 4 ++--
 requirements/full/requirements_noavx2.txt | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 2f1c0f09..b1dd9d83 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -46,8 +46,8 @@ tiktoken
 # CUDA wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 4baca6c9..072f65bb 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -46,8 +46,8 @@ tiktoken
 # CUDA wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"

From 058e78411d97737011420d4c4341dc8d2d4643b9 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 7 Dec 2025 10:16:08 -0800
Subject: [PATCH 58/61] docs: Small changes

---
 docs/Image Generation Tutorial.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md
index b285bbc1..43d6de7c 100644
--- a/docs/Image Generation Tutorial.md	
+++ b/docs/Image Generation Tutorial.md	
@@ -60,7 +60,7 @@ Note: The next time you launch the web UI, the model will get automatically load
 
 ### LLM Prompt Variations
 
-To use this feature, you need to load an LLM in the main "Model" tab on the left.
+To use this feature, you need to load an LLM in the main "Model" page on the left.
 
 If you have no idea what to use, do this to get started:
 
@@ -78,7 +78,7 @@ The improvement in creativity is striking (prompt: `Photo of a beautiful woman a
 
 ## Generating images over API
 
-It is possible to generate images usign the project's API. Just make sure to start the server with `--api`, either by
+It is possible to generate images using the project's API. Just make sure to start the server with `--api`, either by
 
 1. Passing the `--api` flag to your `start` script, like `./start_linux.sh --api`, or
 2. Writing `--api` to your `user_data/CMD_FLAGS.txt` file and relaunching the web UI.

From 3b8369a6798fbc504a980e50f579b7e042336762 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 7 Dec 2025 11:18:36 -0800
Subject: [PATCH 59/61] Update llama.cpp

---
 requirements/full/requirements.txt                     | 4 ++--
 requirements/full/requirements_amd.txt                 | 4 ++--
 requirements/full/requirements_amd_noavx2.txt          | 4 ++--
 requirements/full/requirements_apple_intel.txt         | 4 ++--
 requirements/full/requirements_apple_silicon.txt       | 4 ++--
 requirements/full/requirements_cpu_only.txt            | 4 ++--
 requirements/full/requirements_cpu_only_noavx2.txt     | 4 ++--
 requirements/full/requirements_noavx2.txt              | 4 ++--
 requirements/portable/requirements.txt                 | 4 ++--
 requirements/portable/requirements_amd.txt             | 4 ++--
 requirements/portable/requirements_amd_noavx2.txt      | 4 ++--
 requirements/portable/requirements_apple_intel.txt     | 4 ++--
 requirements/portable/requirements_apple_silicon.txt   | 4 ++--
 requirements/portable/requirements_cpu_only.txt        | 4 ++--
 requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++--
 requirements/portable/requirements_noavx2.txt          | 4 ++--
 requirements/portable/requirements_vulkan.txt          | 4 ++--
 requirements/portable/requirements_vulkan_noavx2.txt   | 4 ++--
 18 files changed, 36 insertions(+), 36 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index b1dd9d83..8bfa1318 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -44,8 +44,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index 8d9f8c43..088f7713 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -42,7 +42,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index f69c2466..e4e55e4d 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -42,7 +42,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index fd2dad7f..5903fe60 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index d71dfbe5..47517578 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 2d174713..1fee7121 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 2e047b42..204af32a 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -42,5 +42,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 072f65bb..86208b9f 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -44,8 +44,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 21cef622..4a92fb8d 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index bce5a9e0..5ad1fe3a 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd_noavx2.txt b/requirements/portable/requirements_amd_noavx2.txt
index 18257eea..9c3e2851 100644
--- a/requirements/portable/requirements_amd_noavx2.txt
+++ b/requirements/portable/requirements_amd_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index e97ff558..8a618e35 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index d113431c..c1c643e8 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 1553096c..f4bc8cab 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index ce259dcc..81f9bf2e 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index 887a4542..6cc87a4e 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index d9eff1ac..25c3638f 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Vulkan wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 4a5ee8a3..54d0cd1a 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -23,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

From 1ec9f708e5cbb7a2d74fa35f56735d80991163c2 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 7 Dec 2025 11:49:22 -0800
Subject: [PATCH 60/61] Clear the torch cache between sequential image
 generations

---
 modules/ui_image_generation.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 6b39c5b5..bcc5bb2d 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -848,6 +848,9 @@ def generate(state, save_images=True):
             if shared.stop_everything:
                 break
 
+            if batch_idx > 0:
+                clear_torch_cache()
+
             generator.manual_seed(int(seed + batch_idx))
 
             # Generate prompt variation if enabled

From b758059e950c2442cf6750082a1914e1c20b9e10 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 7 Dec 2025 12:23:19 -0800
Subject: [PATCH 61/61] Revert "Clear the torch cache between sequential image
 generations"

This reverts commit 1ec9f708e5cbb7a2d74fa35f56735d80991163c2.
---
 modules/ui_image_generation.py | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index bcc5bb2d..6b39c5b5 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -848,9 +848,6 @@ def generate(state, save_images=True):
             if shared.stop_everything:
                 break
 
-            if batch_idx > 0:
-                clear_torch_cache()
-
             generator.manual_seed(int(seed + batch_idx))
 
             # Generate prompt variation if enabled