From e24ba92ef231963387f5719125943acd83b46c46 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 1 Dec 2025 08:05:21 -0800 Subject: [PATCH 01/61] UI: Optimize typing in all textareas --- js/main.js | 61 ++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 61 insertions(+) diff --git a/js/main.js b/js/main.js index 67f60279..337131c2 100644 --- a/js/main.js +++ b/js/main.js @@ -1105,3 +1105,64 @@ document.fonts.addEventListener("loadingdone", (event) => { // Initial call to set the margin based on current state updateMargin(); })(); + +//------------------------------------------------ +// Optimize typing in all textareas +//------------------------------------------------ + +(function() { + document.querySelectorAll("textarea").forEach(textarea => { + const computedStyle = getComputedStyle(textarea); + const MIN_HEIGHT = parseInt(computedStyle.minHeight) || textarea.offsetHeight || 42; + const configuredMax = parseInt(computedStyle.maxHeight) || 400; + + let rafId = null; + let isOurResize = false; + + function doResize() { + rafId = null; + isOurResize = true; + + // Recalculate max height each time + const maxHeight = Math.min(configuredMax, window.innerHeight * 0.5); + + textarea.style.height = "auto"; + const contentHeight = textarea.scrollHeight; + const clampedHeight = Math.min(maxHeight, Math.max(MIN_HEIGHT, contentHeight)); + + textarea.style.height = clampedHeight + "px"; + textarea.style.overflowY = contentHeight > maxHeight ? "auto" : "hidden"; + + isOurResize = false; + } + + function scheduleResize() { + if (rafId === null) { + rafId = requestAnimationFrame(doResize); + } + } + + const desc = Object.getOwnPropertyDescriptor(CSSStyleDeclaration.prototype, "height"); + const originalSet = desc?.set || function(v) { this.setProperty("height", v); }; + const originalGet = desc?.get || function() { return this.getPropertyValue("height"); }; + + Object.defineProperty(textarea.style, "height", { + get() { return originalGet.call(this); }, + set(value) { + if (isOurResize) originalSet.call(this, value); + else scheduleResize(); + }, + configurable: true + }); + + textarea.addEventListener("input", scheduleResize, { passive: true }); + doResize(); + }); + + // Trigger resize on all textareas when window resizes + window.addEventListener("resize", () => { + document.querySelectorAll("textarea").forEach(ta => { + ta.dispatchEvent(new Event("input")); + }); + }, { passive: true }); +})(); From a83821e941f8f6d33556d101d7aa241b41db562e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 1 Dec 2025 10:34:23 -0800 Subject: [PATCH 02/61] Revert "UI: Optimize typing in all textareas" This reverts commit e24ba92ef231963387f5719125943acd83b46c46. --- js/main.js | 61 ------------------------------------------------------ 1 file changed, 61 deletions(-) diff --git a/js/main.js b/js/main.js index 337131c2..67f60279 100644 --- a/js/main.js +++ b/js/main.js @@ -1105,64 +1105,3 @@ document.fonts.addEventListener("loadingdone", (event) => { // Initial call to set the margin based on current state updateMargin(); })(); - -//------------------------------------------------ -// Optimize typing in all textareas -//------------------------------------------------ - -(function() { - document.querySelectorAll("textarea").forEach(textarea => { - const computedStyle = getComputedStyle(textarea); - const MIN_HEIGHT = parseInt(computedStyle.minHeight) || textarea.offsetHeight || 42; - const configuredMax = parseInt(computedStyle.maxHeight) || 400; - - let rafId = null; - let isOurResize = false; - - function doResize() { - rafId = null; - isOurResize = true; - - // Recalculate max height each time - const maxHeight = Math.min(configuredMax, window.innerHeight * 0.5); - - textarea.style.height = "auto"; - const contentHeight = textarea.scrollHeight; - const clampedHeight = Math.min(maxHeight, Math.max(MIN_HEIGHT, contentHeight)); - - textarea.style.height = clampedHeight + "px"; - textarea.style.overflowY = contentHeight > maxHeight ? "auto" : "hidden"; - - isOurResize = false; - } - - function scheduleResize() { - if (rafId === null) { - rafId = requestAnimationFrame(doResize); - } - } - - const desc = Object.getOwnPropertyDescriptor(CSSStyleDeclaration.prototype, "height"); - const originalSet = desc?.set || function(v) { this.setProperty("height", v); }; - const originalGet = desc?.get || function() { return this.getPropertyValue("height"); }; - - Object.defineProperty(textarea.style, "height", { - get() { return originalGet.call(this); }, - set(value) { - if (isOurResize) originalSet.call(this, value); - else scheduleResize(); - }, - configurable: true - }); - - textarea.addEventListener("input", scheduleResize, { passive: true }); - doResize(); - }); - - // Trigger resize on all textareas when window resizes - window.addEventListener("resize", () => { - document.querySelectorAll("textarea").forEach(ta => { - ta.dispatchEvent(new Event("input")); - }); - }, { passive: true }); -})(); From b3666e140de349a651aea22a6f418572925a5c62 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Tue, 2 Dec 2025 14:55:38 -0300 Subject: [PATCH 03/61] Add image generation support (#7328) --- README.md | 7 +- css/main.css | 96 +- docs/Image Generation Tutorial.md | 20 + modules/image_models.py | 183 ++++ modules/shared.py | 53 +- modules/ui.py | 40 +- modules/ui_image_generation.py | 847 ++++++++++++++++++ modules/utils.py | 22 +- requirements/full/requirements.txt | 4 + requirements/full/requirements_amd.txt | 4 + requirements/full/requirements_amd_noavx2.txt | 4 + .../full/requirements_apple_intel.txt | 4 + .../full/requirements_apple_silicon.txt | 4 + requirements/full/requirements_cpu_only.txt | 4 + .../full/requirements_cpu_only_noavx2.txt | 4 + requirements/full/requirements_noavx2.txt | 4 + requirements/full/requirements_nowheels.txt | 4 + server.py | 24 + 18 files changed, 1314 insertions(+), 14 deletions(-) create mode 100644 docs/Image Generation Tutorial.md create mode 100644 modules/image_models.py create mode 100644 modules/ui_image_generation.py diff --git a/README.md b/README.md index d350d959..174fc2ac 100644 --- a/README.md +++ b/README.md @@ -28,6 +28,8 @@ A Gradio web UI for Large Language Models. - 100% offline and private, with zero telemetry, external resources, or remote update requests. - **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents. - **Vision (multimodal models)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)). +Image generation: A dedicated tab for diffusers models like Z-Image-Turbo and Qwen-Image. Features 4-bit/8-bit quantization and a persistent gallery with metadata (tutorial). +- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo** and **Qwen-Image**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)). - **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation. - Aesthetic UI with dark and light themes. - Syntax highlighting for code blocks and LaTeX rendering for mathematical expressions. @@ -432,6 +434,7 @@ https://colab.research.google.com/github/oobabooga/text-generation-webui/blob/ma https://www.reddit.com/r/Oobabooga/ -## Acknowledgment +## Acknowledgments -In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition. +- In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition. +- This project was inspired by [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) and wouldn't exist without it. diff --git a/css/main.css b/css/main.css index fd79d24c..5c1c356d 100644 --- a/css/main.css +++ b/css/main.css @@ -93,11 +93,11 @@ ol li p, ul li p { display: inline-block; } -#notebook-parent-tab, #chat-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab { +#notebook-parent-tab, #chat-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab, #image-ai-tab { border: 0; } -#notebook-parent-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab { +#notebook-parent-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab, #image-ai-tab { padding: 1rem; } @@ -244,37 +244,46 @@ button { font-size: 100% !important; } -.pretty_scrollbar::-webkit-scrollbar { +.pretty_scrollbar::-webkit-scrollbar, +#image-history-gallery > :nth-child(2)::-webkit-scrollbar { width: 8px; height: 8px; } -.pretty_scrollbar::-webkit-scrollbar-track { +.pretty_scrollbar::-webkit-scrollbar-track, +#image-history-gallery > :nth-child(2)::-webkit-scrollbar-track { background: transparent; } .pretty_scrollbar::-webkit-scrollbar-thumb, -.pretty_scrollbar::-webkit-scrollbar-thumb:hover { +.pretty_scrollbar::-webkit-scrollbar-thumb:hover, +#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb, +#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover { background: var(--neutral-300); border-radius: 30px; } .dark .pretty_scrollbar::-webkit-scrollbar-thumb, -.dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover { +.dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover, +.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb, +.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover { background: rgb(255 255 255 / 6.25%); border-radius: 10px; } -.pretty_scrollbar::-webkit-resizer { +.pretty_scrollbar::-webkit-resizer, +#image-history-gallery > :nth-child(2)::-webkit-resizer { background: #c5c5d2; } -.dark .pretty_scrollbar::-webkit-resizer { +.dark .pretty_scrollbar::-webkit-resizer, +.dark #image-history-gallery > :nth-child(2)::-webkit-resizer { background: #ccc; border-radius: 10px; } -.pretty_scrollbar::-webkit-scrollbar-corner { +.pretty_scrollbar::-webkit-scrollbar-corner, +#image-history-gallery > :nth-child(2)::-webkit-scrollbar-corner { background: transparent; } @@ -1674,3 +1683,72 @@ button:focus { .dark .sidebar-vertical-separator { border-bottom: 1px solid rgb(255 255 255 / 10%); } + +button#swap-height-width { + position: absolute; + top: -50px; + right: 0; + border: 0; +} + +#image-output-gallery, #image-output-gallery > :nth-child(2) { + height: calc(100vh - 83px); + max-height: calc(100vh - 83px); +} + +#image-history-gallery, #image-history-gallery > :nth-child(2) { + height: calc(100vh - 174px); + max-height: calc(100vh - 174px); +} + +/* Additional CSS for the paginated image gallery */ + +/* Page info styling */ +#image-page-info { + display: flex; + align-items: center; + justify-content: center; + min-width: 200px; + font-size: 0.9em; + color: var(--body-text-color-subdued); +} + +/* Settings display panel */ +#image-ai-tab .settings-display-panel { + background: var(--background-fill-secondary); + padding: 12px; + border-radius: 8px; + font-size: 0.9em; + max-height: 300px; + overflow-y: auto; + margin-top: 8px; +} + +/* Gallery status message */ +#image-ai-tab .gallery-status { + color: var(--color-accent); + font-size: 0.85em; + margin-top: 4px; +} + +/* Pagination button row alignment */ +#image-ai-tab .pagination-controls { + display: flex; + align-items: center; + gap: 8px; + flex-wrap: wrap; +} + +/* Selected image preview container */ +#image-ai-tab .selected-preview-container { + border: 1px solid var(--border-color-primary); + border-radius: 8px; + padding: 8px; + background: var(--background-fill-secondary); +} + +/* Fix a gr.Markdown UI glitch when clicking Next in the + * Image AI > Gallery tab */ +.min.svelte-1yrv54 { + min-height: 0; +} diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md new file mode 100644 index 00000000..e7022c34 --- /dev/null +++ b/docs/Image Generation Tutorial.md @@ -0,0 +1,20 @@ +# Image Generation Tutorial + +This feature allows you to generate images using high-speed models like Z-Image-Turbo directly within the web UI. + +## How to use + +1. Click on the **Image AI** tab at the top of the interface. +2. Select the **Model** sub-tab. +3. Copy and paste the following link into the **Download model** box: + +``` +https://huggingface.co/Tongyi-MAI/Z-Image-Turbo +``` + +4. Click the **Download** button and wait for the confirmation message. +5. In the **Model** dropdown menu, select the model you just downloaded (if you don't see it, click the 🔄 refresh button). +6. Click **Load**. +7. Go to the **Generate** sub-tab, type a prompt, and click **GENERATE**. + +> **Note for Z-Image-Turbo:** For the best results with this specific model, keep the **CFG Scale** slider at **0**. diff --git a/modules/image_models.py b/modules/image_models.py new file mode 100644 index 00000000..e6f9a172 --- /dev/null +++ b/modules/image_models.py @@ -0,0 +1,183 @@ +import time + +import modules.shared as shared +from modules.logging_colors import logger +from modules.torch_utils import get_device +from modules.utils import resolve_model_path + + +def get_quantization_config(quant_method): + """ + Get the appropriate quantization config based on the selected method. + + Args: + quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit' + + Returns: + PipelineQuantizationConfig or None + """ + import torch + from diffusers import BitsAndBytesConfig, QuantoConfig + from diffusers.quantizers import PipelineQuantizationConfig + + if quant_method == 'none' or not quant_method: + return None + + # Bitsandbytes 8-bit quantization + elif quant_method == 'bnb-8bit': + return PipelineQuantizationConfig( + quant_mapping={ + "transformer": BitsAndBytesConfig( + load_in_8bit=True + ) + } + ) + + # Bitsandbytes 4-bit quantization + elif quant_method == 'bnb-4bit': + return PipelineQuantizationConfig( + quant_mapping={ + "transformer": BitsAndBytesConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.bfloat16, + bnb_4bit_use_double_quant=True + ) + } + ) + + # Quanto 8-bit quantization + elif quant_method == 'quanto-8bit': + return PipelineQuantizationConfig( + quant_mapping={ + "transformer": QuantoConfig(weights_dtype="int8") + } + ) + + # Quanto 4-bit quantization + elif quant_method == 'quanto-4bit': + return PipelineQuantizationConfig( + quant_mapping={ + "transformer": QuantoConfig(weights_dtype="int4") + } + ) + + # Quanto 2-bit quantization + elif quant_method == 'quanto-2bit': + return PipelineQuantizationConfig( + quant_mapping={ + "transformer": QuantoConfig(weights_dtype="int2") + } + ) + + else: + logger.warning(f"Unknown quantization method: {quant_method}. Loading without quantization.") + return None + + +def get_pipeline_type(pipe): + """ + Detect the pipeline type based on the loaded pipeline class. + + Returns: + str: 'zimage', 'qwenimage', or 'unknown' + """ + class_name = pipe.__class__.__name__ + if class_name == 'ZImagePipeline': + return 'zimage' + elif class_name == 'QwenImagePipeline': + return 'qwenimage' + else: + return 'unknown' + + +def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offload=False, compile_model=False, quant_method='none'): + """ + Load a diffusers image generation model. + + Args: + model_name: Name of the model directory + dtype: 'bfloat16' or 'float16' + attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3' + cpu_offload: Enable CPU offloading for low VRAM + compile_model: Compile the model for faster inference (slow first run) + quant_method: Quantization method - 'none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit' + """ + import torch + from diffusers import DiffusionPipeline + + logger.info(f"Loading image model \"{model_name}\" with quantization: {quant_method}") + t0 = time.time() + + dtype_map = {"bfloat16": torch.bfloat16, "float16": torch.float16} + target_dtype = dtype_map.get(dtype, torch.bfloat16) + + model_path = resolve_model_path(model_name, image_model=True) + + try: + # Get quantization config based on selected method + pipeline_quant_config = get_quantization_config(quant_method) + + # Load the pipeline + load_kwargs = { + "torch_dtype": target_dtype, + "low_cpu_mem_usage": True, + } + + if pipeline_quant_config is not None: + load_kwargs["quantization_config"] = pipeline_quant_config + + # Use DiffusionPipeline for automatic pipeline detection + # This handles both ZImagePipeline and QwenImagePipeline + pipe = DiffusionPipeline.from_pretrained( + str(model_path), + **load_kwargs + ) + + pipeline_type = get_pipeline_type(pipe) + + if not cpu_offload: + pipe.to(get_device()) + + # Set attention backend (if supported by the pipeline) + if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'set_attention_backend'): + if attn_backend == 'flash_attention_2': + pipe.transformer.set_attention_backend("flash") + elif attn_backend == 'flash_attention_3': + pipe.transformer.set_attention_backend("_flash_3") + # sdpa is the default, no action needed + + if compile_model: + if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'compile'): + logger.info("Compiling model (first run will be slow)...") + pipe.transformer.compile() + + if cpu_offload: + pipe.enable_model_cpu_offload() + + shared.image_model = pipe + shared.image_model_name = model_name + shared.image_pipeline_type = pipeline_type + + logger.info(f"Loaded image model \"{model_name}\" in {(time.time() - t0):.2f} seconds.") + return pipe + + except Exception as e: + logger.error(f"Failed to load image model: {str(e)}") + return None + + +def unload_image_model(): + """Unload the current image model and free VRAM.""" + if shared.image_model is None: + return + + del shared.image_model + shared.image_model = None + shared.image_model_name = 'None' + shared.image_pipeline_type = None + + from modules.torch_utils import clear_torch_cache + clear_torch_cache() + + logger.info("Image model unloaded.") diff --git a/modules/shared.py b/modules/shared.py index 134c0cac..316f7729 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -11,7 +11,7 @@ import yaml from modules.logging_colors import logger from modules.presets import default_preset -# Model variables +# Text model variables model = None tokenizer = None model_name = 'None' @@ -20,6 +20,11 @@ is_multimodal = False model_dirty_from_training = False lora_names = [] +# Image model variables +image_model = None +image_model_name = 'None' +image_pipeline_type = None + # Generation variables stop_everything = False generation_lock = None @@ -46,6 +51,18 @@ group.add_argument('--extensions', type=str, nargs='+', help='The list of extens group.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.') group.add_argument('--idle-timeout', type=int, default=0, help='Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.') +# Image generation +group = parser.add_argument_group('Image model') +group.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).') +group.add_argument('--image-model-dir', type=str, default='user_data/image_models', help='Path to directory with all the image models.') +group.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.') +group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], help='Attention backend for image model.') +group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.') +group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.') +group.add_argument('--image-quant', type=str, default=None, + choices=['none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'], + help='Quantization method for image model.') + # Model loader group = parser.add_argument_group('Model loader') group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, TensorRT-LLM.') @@ -290,6 +307,24 @@ settings = { # Extensions 'default_extensions': [], + + # Image generation settings + 'image_prompt': '', + 'image_neg_prompt': '', + 'image_width': 1024, + 'image_height': 1024, + 'image_aspect_ratio': '1:1 Square', + 'image_steps': 9, + 'image_cfg_scale': 0.0, + 'image_seed': -1, + 'image_batch_size': 1, + 'image_batch_count': 1, + 'image_model_menu': 'None', + 'image_dtype': 'bfloat16', + 'image_attn_backend': 'sdpa', + 'image_cpu_offload': False, + 'image_compile': False, + 'image_quant': 'none', } default_settings = copy.deepcopy(settings) @@ -314,6 +349,22 @@ def do_cmd_flags_warnings(): logger.warning('\nThe multi-user mode is highly experimental and should not be shared publicly.') +def apply_image_model_cli_overrides(): + """Apply command-line overrides for image model settings.""" + if args.image_model is not None: + settings['image_model_menu'] = args.image_model + if args.image_dtype is not None: + settings['image_dtype'] = args.image_dtype + if args.image_attn_backend is not None: + settings['image_attn_backend'] = args.image_attn_backend + if args.image_cpu_offload: + settings['image_cpu_offload'] = True + if args.image_compile: + settings['image_compile'] = True + if args.image_quant is not None: + settings['image_quant'] = args.image_quant + + def fix_loader_name(name): if not name: return name diff --git a/modules/ui.py b/modules/ui.py index f99e8b6a..9700d297 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -280,6 +280,26 @@ def list_interface_input_elements(): 'include_past_attachments', ] + # Image generation elements + elements += [ + 'image_prompt', + 'image_neg_prompt', + 'image_width', + 'image_height', + 'image_aspect_ratio', + 'image_steps', + 'image_cfg_scale', + 'image_seed', + 'image_batch_size', + 'image_batch_count', + 'image_model_menu', + 'image_dtype', + 'image_attn_backend', + 'image_compile', + 'image_cpu_offload', + 'image_quant', + ] + return elements @@ -509,7 +529,25 @@ def setup_auto_save(): 'theme_state', 'show_two_notebook_columns', 'paste_to_attachment', - 'include_past_attachments' + 'include_past_attachments', + + # Image generation tab (ui_image_generation.py) + 'image_prompt', + 'image_neg_prompt', + 'image_width', + 'image_height', + 'image_aspect_ratio', + 'image_steps', + 'image_cfg_scale', + 'image_seed', + 'image_batch_size', + 'image_batch_count', + 'image_model_menu', + 'image_dtype', + 'image_attn_backend', + 'image_compile', + 'image_cpu_offload', + 'image_quant', ] for element_name in change_elements: diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py new file mode 100644 index 00000000..92c447c8 --- /dev/null +++ b/modules/ui_image_generation.py @@ -0,0 +1,847 @@ +import json +import os +import time +import traceback +from datetime import datetime +from pathlib import Path + +import gradio as gr +import numpy as np +from PIL import Image +from PIL.PngImagePlugin import PngInfo + +from modules import shared, ui, utils +from modules.image_models import ( + get_pipeline_type, + load_image_model, + unload_image_model +) +from modules.logging_colors import logger +from modules.text_generation import stop_everything_event +from modules.torch_utils import get_device +from modules.utils import gradio + +ASPECT_RATIOS = { + "1:1 Square": (1, 1), + "16:9 Cinema": (16, 9), + "9:16 Mobile": (9, 16), + "4:3 Photo": (4, 3), + "Custom": None, +} + +STEP = 16 +IMAGES_PER_PAGE = 64 + +# Settings keys to save in PNG metadata (Generate tab only) +METADATA_SETTINGS_KEYS = [ + 'image_prompt', + 'image_neg_prompt', + 'image_width', + 'image_height', + 'image_aspect_ratio', + 'image_steps', + 'image_seed', + 'image_batch_size', + 'image_batch_count', + 'image_cfg_scale', +] + +# Cache for all image paths +_image_cache = [] +_cache_timestamp = 0 + + +def round_to_step(value, step=STEP): + return round(value / step) * step + + +def clamp(value, min_val, max_val): + return max(min_val, min(max_val, value)) + + +def apply_aspect_ratio(aspect_ratio, current_width, current_height): + if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS: + return current_width, current_height + + w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio] + + if w_ratio == h_ratio: + base = min(current_width, current_height) + new_width = base + new_height = base + elif w_ratio < h_ratio: + new_width = current_width + new_height = round_to_step(current_width * h_ratio / w_ratio) + else: + new_height = current_height + new_width = round_to_step(current_height * w_ratio / h_ratio) + + new_width = clamp(new_width, 256, 2048) + new_height = clamp(new_height, 256, 2048) + + return int(new_width), int(new_height) + + +def update_height_from_width(width, aspect_ratio): + if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS: + return gr.update() + + w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio] + new_height = round_to_step(width * h_ratio / w_ratio) + new_height = clamp(new_height, 256, 2048) + + return int(new_height) + + +def update_width_from_height(height, aspect_ratio): + if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS: + return gr.update() + + w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio] + new_width = round_to_step(height * w_ratio / h_ratio) + new_width = clamp(new_width, 256, 2048) + + return int(new_width) + + +def swap_dimensions_and_update_ratio(width, height, aspect_ratio): + new_width, new_height = height, width + + new_ratio = "Custom" + for name, ratios in ASPECT_RATIOS.items(): + if ratios is None: + continue + w_r, h_r = ratios + expected_height = new_width * h_r / w_r + if abs(expected_height - new_height) < STEP: + new_ratio = name + break + + return new_width, new_height, new_ratio + + +def build_generation_metadata(state, actual_seed): + """Build metadata dict from generation settings.""" + metadata = {} + for key in METADATA_SETTINGS_KEYS: + if key in state: + metadata[key] = state[key] + + # Store the actual seed used (not -1) + metadata['image_seed'] = actual_seed + metadata['generated_at'] = datetime.now().isoformat() + metadata['model'] = shared.image_model_name + + return metadata + + +def save_generated_images(images, state, actual_seed): + """Save images with generation metadata embedded in PNG.""" + date_str = datetime.now().strftime("%Y-%m-%d") + folder_path = os.path.join("user_data", "image_outputs", date_str) + os.makedirs(folder_path, exist_ok=True) + + metadata = build_generation_metadata(state, actual_seed) + metadata_json = json.dumps(metadata, ensure_ascii=False) + + for idx, img in enumerate(images): + timestamp = datetime.now().strftime("%H-%M-%S") + filename = f"{timestamp}_{actual_seed:010d}_{idx:03d}.png" + filepath = os.path.join(folder_path, filename) + + # Create PNG metadata + png_info = PngInfo() + png_info.add_text("image_gen_settings", metadata_json) + + # Save with metadata + img.save(filepath, pnginfo=png_info) + + +def read_image_metadata(image_path): + """Read generation metadata from PNG file.""" + try: + with Image.open(image_path) as img: + if hasattr(img, 'text') and 'image_gen_settings' in img.text: + return json.loads(img.text['image_gen_settings']) + except Exception as e: + logger.debug(f"Could not read metadata from {image_path}: {e}") + return None + + +def format_metadata_for_display(metadata): + """Format metadata as readable text.""" + if not metadata: + return "No generation settings found in this image." + + lines = ["**Generation Settings**", ""] + + # Display in a nice order + display_order = [ + ('image_prompt', 'Prompt'), + ('image_neg_prompt', 'Negative Prompt'), + ('image_width', 'Width'), + ('image_height', 'Height'), + ('image_aspect_ratio', 'Aspect Ratio'), + ('image_steps', 'Steps'), + ('image_cfg_scale', 'CFG Scale'), + ('image_seed', 'Seed'), + ('image_batch_size', 'Batch Size'), + ('image_batch_count', 'Batch Count'), + ('model', 'Model'), + ('generated_at', 'Generated At'), + ] + + for key, label in display_order: + if key in metadata: + value = metadata[key] + if key in ['image_prompt', 'image_neg_prompt'] and value: + # Truncate long prompts for display + if len(str(value)) > 200: + value = str(value)[:200] + "..." + lines.append(f"**{label}:** {value}") + + return "\n\n".join(lines) + + +def get_all_history_images(force_refresh=False): + """Get all history images sorted by modification time (newest first). Uses caching.""" + global _image_cache, _cache_timestamp + + output_dir = os.path.join("user_data", "image_outputs") + if not os.path.exists(output_dir): + return [] + + # Check if we need to refresh cache + current_time = time.time() + if not force_refresh and _image_cache and (current_time - _cache_timestamp) < 2: + return _image_cache + + image_files = [] + for root, _, files in os.walk(output_dir): + for file in files: + if file.endswith((".png", ".jpg", ".jpeg")): + full_path = os.path.join(root, file) + image_files.append((full_path, os.path.getmtime(full_path))) + + image_files.sort(key=lambda x: x[1], reverse=True) + _image_cache = [x[0] for x in image_files] + _cache_timestamp = current_time + + return _image_cache + + +def get_paginated_images(page=0, force_refresh=False): + """Get images for a specific page.""" + all_images = get_all_history_images(force_refresh) + total_images = len(all_images) + total_pages = max(1, (total_images + IMAGES_PER_PAGE - 1) // IMAGES_PER_PAGE) + + # Clamp page to valid range + page = max(0, min(page, total_pages - 1)) + + start_idx = page * IMAGES_PER_PAGE + end_idx = min(start_idx + IMAGES_PER_PAGE, total_images) + + page_images = all_images[start_idx:end_idx] + + return page_images, page, total_pages, total_images + + +def get_initial_page_info(): + """Get page info string for initial load.""" + _, page, total_pages, total_images = get_paginated_images(0) + return f"Page {page + 1} of {total_pages} ({total_images} total images)" + + +def refresh_gallery(current_page=0): + """Refresh gallery with current page.""" + images, page, total_pages, total_images = get_paginated_images(current_page, force_refresh=True) + page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)" + return images, page, page_info + + +def go_to_page(page_num, current_page): + """Go to a specific page (1-indexed input).""" + try: + page = int(page_num) - 1 # Convert to 0-indexed + except (ValueError, TypeError): + page = current_page + + images, page, total_pages, total_images = get_paginated_images(page) + page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)" + return images, page, page_info + + +def next_page(current_page): + """Go to next page.""" + images, page, total_pages, total_images = get_paginated_images(current_page + 1) + page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)" + return images, page, page_info + + +def prev_page(current_page): + """Go to previous page.""" + images, page, total_pages, total_images = get_paginated_images(current_page - 1) + page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)" + return images, page, page_info + + +def on_gallery_select(evt: gr.SelectData, current_page): + """Handle image selection from gallery.""" + if evt.index is None: + return "", "Select an image to view its settings" + + # Get the current page's images to find the actual file path + all_images = get_all_history_images() + total_images = len(all_images) + + # Calculate the actual index in the full list + start_idx = current_page * IMAGES_PER_PAGE + actual_idx = start_idx + evt.index + + if actual_idx >= total_images: + return "", "Image not found" + + image_path = all_images[actual_idx] + metadata = read_image_metadata(image_path) + metadata_display = format_metadata_for_display(metadata) + + return image_path, metadata_display + + +def send_to_generate(selected_image_path): + """Load settings from selected image and return updates for all Generate tab inputs.""" + if not selected_image_path or not os.path.exists(selected_image_path): + return [gr.update()] * 10 + ["No image selected"] + + metadata = read_image_metadata(selected_image_path) + if not metadata: + return [gr.update()] * 10 + ["No settings found in this image"] + + # Return updates for each input element in order + updates = [ + gr.update(value=metadata.get('image_prompt', '')), + gr.update(value=metadata.get('image_neg_prompt', '')), + gr.update(value=metadata.get('image_width', 1024)), + gr.update(value=metadata.get('image_height', 1024)), + gr.update(value=metadata.get('image_aspect_ratio', '1:1 Square')), + gr.update(value=metadata.get('image_steps', 9)), + gr.update(value=metadata.get('image_seed', -1)), + gr.update(value=metadata.get('image_batch_size', 1)), + gr.update(value=metadata.get('image_batch_count', 1)), + gr.update(value=metadata.get('image_cfg_scale', 0.0)), + ] + + status = f"✓ Settings loaded from image (seed: {metadata.get('image_seed', 'unknown')})" + return updates + [status] + + +def read_dropped_image_metadata(image_path): + """Read metadata from a dropped/uploaded image.""" + if not image_path: + return "Drop an image to view its generation settings." + + metadata = read_image_metadata(image_path) + return format_metadata_for_display(metadata) + + +def create_ui(): + if shared.settings['image_model_menu'] != 'None': + shared.image_model_name = shared.settings['image_model_menu'] + + with gr.Tab("Image AI", elem_id="image-ai-tab"): + with gr.Tabs(): + # TAB 1: GENERATE + with gr.TabItem("Generate"): + with gr.Row(): + with gr.Column(scale=4, min_width=350): + shared.gradio['image_prompt'] = gr.Textbox( + label="Prompt", + placeholder="Describe your imagination...", + lines=3, + autofocus=True, + value=shared.settings['image_prompt'] + ) + shared.gradio['image_neg_prompt'] = gr.Textbox( + label="Negative Prompt", + placeholder="Low quality...", + lines=3, + value=shared.settings['image_neg_prompt'] + ) + + shared.gradio['image_generate_btn'] = gr.Button("Generate", variant="primary", size="lg") + shared.gradio['image_stop_btn'] = gr.Button("Stop", size="lg", visible=False) + gr.HTML("
") + + gr.Markdown("### Dimensions") + with gr.Row(): + with gr.Column(): + shared.gradio['image_width'] = gr.Slider(256, 2048, value=shared.settings['image_width'], step=STEP, label="Width") + with gr.Column(): + shared.gradio['image_height'] = gr.Slider(256, 2048, value=shared.settings['image_height'], step=STEP, label="Height") + shared.gradio['image_swap_btn'] = gr.Button("⇄ Swap", elem_classes='refresh-button', scale=0, min_width=80, elem_id="swap-height-width") + + with gr.Row(): + shared.gradio['image_aspect_ratio'] = gr.Radio( + choices=["1:1 Square", "16:9 Cinema", "9:16 Mobile", "4:3 Photo", "Custom"], + value=shared.settings['image_aspect_ratio'], + label="Aspect Ratio", + interactive=True + ) + + gr.Markdown("### Config") + with gr.Row(): + with gr.Column(): + shared.gradio['image_steps'] = gr.Slider(1, 100, value=shared.settings['image_steps'], step=1, label="Steps") + shared.gradio['image_cfg_scale'] = gr.Slider( + 0.0, 10.0, + value=shared.settings['image_cfg_scale'], + step=0.1, + label="CFG Scale", + info="Z-Image Turbo: 0.0 | Qwen: 4.0" + ) + shared.gradio['image_seed'] = gr.Number(label="Seed", value=shared.settings['image_seed'], precision=0, info="-1 = Random") + with gr.Column(): + shared.gradio['image_batch_size'] = gr.Slider(1, 32, value=shared.settings['image_batch_size'], step=1, label="Batch Size (VRAM Heavy)", info="Generates N images at once.") + shared.gradio['image_batch_count'] = gr.Slider(1, 128, value=shared.settings['image_batch_count'], step=1, label="Sequential Count (Loop)", info="Repeats the generation N times.") + + with gr.Column(scale=6, min_width=500): + with gr.Column(elem_classes=["viewport-container"]): + shared.gradio['image_output_gallery'] = gr.Gallery(label="Output", show_label=False, columns=2, rows=2, height="80vh", object_fit="contain", preview=True, elem_id="image-output-gallery") + + # TAB 2: GALLERY (with pagination) + with gr.TabItem("Gallery"): + with gr.Row(): + with gr.Column(scale=3): + # Pagination controls + with gr.Row(): + shared.gradio['image_refresh_history'] = gr.Button("🔄 Refresh", elem_classes="refresh-button") + shared.gradio['image_prev_page'] = gr.Button("◀ Prev", elem_classes="refresh-button") + shared.gradio['image_page_info'] = gr.Markdown(value=get_initial_page_info, elem_id="image-page-info") + shared.gradio['image_next_page'] = gr.Button("Next ▶", elem_classes="refresh-button") + shared.gradio['image_page_input'] = gr.Number(value=1, label="Page", precision=0, minimum=1, scale=0, min_width=80) + shared.gradio['image_go_to_page'] = gr.Button("Go", elem_classes="refresh-button", scale=0, min_width=50) + + # State for current page and selected image path + shared.gradio['image_current_page'] = gr.State(value=0) + shared.gradio['image_selected_path'] = gr.State(value="") + + # Paginated gallery using gr.Gallery + shared.gradio['image_history_gallery'] = gr.Gallery( + value=lambda: get_paginated_images(0)[0], + label="Image History", + show_label=False, + columns=6, + object_fit="cover", + height="auto", + allow_preview=True, + elem_id="image-history-gallery" + ) + + with gr.Column(scale=1): + gr.Markdown("### Selected Image") + shared.gradio['image_settings_display'] = gr.Markdown("Select an image to view its settings") + shared.gradio['image_send_to_generate'] = gr.Button("Send to Generate", variant="primary") + shared.gradio['image_gallery_status'] = gr.Markdown("") + + gr.Markdown("### Import Image") + shared.gradio['image_drop_upload'] = gr.Image( + label="Drop image here to view settings", + type="filepath", + height=150 + ) + + # TAB 3: MODEL + with gr.TabItem("Model"): + with gr.Row(): + with gr.Column(): + with gr.Row(): + shared.gradio['image_model_menu'] = gr.Dropdown( + choices=utils.get_available_image_models(), + value=shared.settings['image_model_menu'], + label='Model', + elem_classes='slim-dropdown' + ) + shared.gradio['image_refresh_models'] = gr.Button("🔄", elem_classes='refresh-button', scale=0, min_width=40) + shared.gradio['image_load_model'] = gr.Button("Load", variant='primary', elem_classes='refresh-button') + shared.gradio['image_unload_model'] = gr.Button("Unload", elem_classes='refresh-button') + + gr.Markdown("## Settings") + with gr.Row(): + with gr.Column(): + shared.gradio['image_quant'] = gr.Dropdown( + label='Quantization', + choices=['none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'], + value=shared.settings['image_quant'], + info='Quantization method for reduced VRAM usage. Quanto supports lower precisions (2-bit, 4-bit, 8-bit).' + ) + + shared.gradio['image_dtype'] = gr.Dropdown( + choices=['bfloat16', 'float16'], + value=shared.settings['image_dtype'], + label='Data Type', + info='bfloat16 recommended for modern GPUs' + ) + shared.gradio['image_attn_backend'] = gr.Dropdown( + choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], + value=shared.settings['image_attn_backend'], + label='Attention Backend', + info='SDPA is default. Flash Attention requires compatible GPU.' + ) + with gr.Column(): + shared.gradio['image_compile'] = gr.Checkbox( + value=shared.settings['image_compile'], + label='Compile Model', + info='Faster inference after first run. First run will be slow.' + ) + shared.gradio['image_cpu_offload'] = gr.Checkbox( + value=shared.settings['image_cpu_offload'], + label='CPU Offload', + info='Enable for low VRAM GPUs. Slower but uses less memory.' + ) + + with gr.Column(): + shared.gradio['image_download_path'] = gr.Textbox( + label="Download model", + placeholder="Tongyi-MAI/Z-Image-Turbo", + info="Enter HuggingFace path. Use : for branch, e.g. user/model:main" + ) + shared.gradio['image_download_btn'] = gr.Button("Download", variant='primary') + shared.gradio['image_model_status'] = gr.Markdown( + value=f"Model: **{shared.settings['image_model_menu']}** (not loaded)" if shared.settings['image_model_menu'] != 'None' else "No model selected" + ) + + +def create_event_handlers(): + # Dimension controls + shared.gradio['image_aspect_ratio'].change( + apply_aspect_ratio, + gradio('image_aspect_ratio', 'image_width', 'image_height'), + gradio('image_width', 'image_height'), + show_progress=False + ) + + shared.gradio['image_width'].release( + update_height_from_width, + gradio('image_width', 'image_aspect_ratio'), + gradio('image_height'), + show_progress=False + ) + + shared.gradio['image_height'].release( + update_width_from_height, + gradio('image_height', 'image_aspect_ratio'), + gradio('image_width'), + show_progress=False + ) + + shared.gradio['image_swap_btn'].click( + swap_dimensions_and_update_ratio, + gradio('image_width', 'image_height', 'image_aspect_ratio'), + gradio('image_width', 'image_height', 'image_aspect_ratio'), + show_progress=False + ) + + # Generation + shared.gradio['image_generate_btn'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then( + generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then( + lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn')) + + shared.gradio['image_prompt'].submit( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then( + generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then( + lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn')) + + shared.gradio['image_neg_prompt'].submit( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then( + generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then( + lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn')) + + # Stop button + shared.gradio['image_stop_btn'].click( + stop_everything_event, None, None, show_progress=False + ) + + # Model management + shared.gradio['image_refresh_models'].click( + lambda: gr.update(choices=utils.get_available_image_models()), + None, + gradio('image_model_menu'), + show_progress=False + ) + + shared.gradio['image_load_model'].click( + load_image_model_wrapper, + gradio('image_model_menu', 'image_dtype', 'image_attn_backend', 'image_cpu_offload', 'image_compile', 'image_quant'), + gradio('image_model_status'), + show_progress=True + ) + + shared.gradio['image_unload_model'].click( + unload_image_model_wrapper, + None, + gradio('image_model_status'), + show_progress=False + ) + + shared.gradio['image_download_btn'].click( + download_image_model_wrapper, + gradio('image_download_path'), + gradio('image_model_status', 'image_model_menu'), + show_progress=True + ) + + # Gallery pagination handlers + shared.gradio['image_refresh_history'].click( + refresh_gallery, + gradio('image_current_page'), + gradio('image_history_gallery', 'image_current_page', 'image_page_info'), + show_progress=False + ) + + shared.gradio['image_next_page'].click( + next_page, + gradio('image_current_page'), + gradio('image_history_gallery', 'image_current_page', 'image_page_info'), + show_progress=False + ) + + shared.gradio['image_prev_page'].click( + prev_page, + gradio('image_current_page'), + gradio('image_history_gallery', 'image_current_page', 'image_page_info'), + show_progress=False + ) + + shared.gradio['image_go_to_page'].click( + go_to_page, + gradio('image_page_input', 'image_current_page'), + gradio('image_history_gallery', 'image_current_page', 'image_page_info'), + show_progress=False + ) + + # Image selection from gallery + shared.gradio['image_history_gallery'].select( + on_gallery_select, + gradio('image_current_page'), + gradio('image_selected_path', 'image_settings_display'), + show_progress=False + ) + + # Send to Generate + shared.gradio['image_send_to_generate'].click( + send_to_generate, + gradio('image_selected_path'), + gradio( + 'image_prompt', + 'image_neg_prompt', + 'image_width', + 'image_height', + 'image_aspect_ratio', + 'image_steps', + 'image_seed', + 'image_batch_size', + 'image_batch_count', + 'image_cfg_scale', + 'image_gallery_status' + ), + show_progress=False + ) + + shared.gradio['image_drop_upload'].change( + read_dropped_image_metadata, + gradio('image_drop_upload'), + gradio('image_settings_display'), + show_progress=False + ) + + +def generate(state): + """ + Generate images using the loaded model. + Automatically adjusts parameters based on pipeline type. + """ + import torch + + from modules.torch_utils import clear_torch_cache + + clear_torch_cache() + + try: + model_name = state['image_model_menu'] + + if not model_name or model_name == 'None': + logger.error("No image model selected. Go to the Model tab and select a model.") + return [] + + if shared.image_model is None: + result = load_image_model( + model_name, + dtype=state['image_dtype'], + attn_backend=state['image_attn_backend'], + cpu_offload=state['image_cpu_offload'], + compile_model=state['image_compile'], + quant_method=state['image_quant'] + ) + if result is None: + logger.error(f"Failed to load model `{model_name}`.") + return [] + + shared.image_model_name = model_name + + seed = state['image_seed'] + if seed == -1: + seed = np.random.randint(0, 2**32 - 1) + + device = get_device() + if device is None: + device = "cpu" + generator = torch.Generator(device).manual_seed(int(seed)) + + all_images = [] + + # Get pipeline type for parameter adjustment + pipeline_type = getattr(shared, 'image_pipeline_type', None) + if pipeline_type is None: + pipeline_type = get_pipeline_type(shared.image_model) + + # Process Prompt + prompt = state['image_prompt'] + + # Apply "Positive Magic" for Qwen models only + if pipeline_type == 'qwenimage': + magic_suffix = ", Ultra HD, 4K, cinematic composition" + # Avoid duplication if user already added it + if magic_suffix.strip(", ") not in prompt: + prompt += magic_suffix + + # Reset stop flag at start + shared.stop_everything = False + + # Callback to check for interruption during diffusion steps + def interrupt_callback(pipe, step_index, timestep, callback_kwargs): + if shared.stop_everything: + pipe._interrupt = True + + return callback_kwargs + + # Build generation kwargs + gen_kwargs = { + "prompt": prompt, + "negative_prompt": state['image_neg_prompt'], + "height": int(state['image_height']), + "width": int(state['image_width']), + "num_inference_steps": int(state['image_steps']), + "num_images_per_prompt": int(state['image_batch_size']), + "generator": generator, + "callback_on_step_end": interrupt_callback, + } + + # Add pipeline-specific parameters for CFG + cfg_val = state.get('image_cfg_scale', 0.0) + + if pipeline_type == 'qwenimage': + # Qwen-Image uses true_cfg_scale (typically 4.0) + gen_kwargs["true_cfg_scale"] = cfg_val + else: + # Z-Image and others use guidance_scale (typically 0.0 for Turbo) + gen_kwargs["guidance_scale"] = cfg_val + + t0 = time.time() + for i in range(int(state['image_batch_count'])): + if shared.stop_everything: + break + + generator.manual_seed(int(seed + i)) + batch_results = shared.image_model(**gen_kwargs).images + all_images.extend(batch_results) + + t1 = time.time() + save_generated_images(all_images, state, seed) + + total_images = int(state['image_batch_count']) * int(state['image_batch_size']) + total_steps = state["image_steps"] * int(state['image_batch_count']) + logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})') + + return all_images + + except Exception as e: + logger.error(f"Image generation failed: {e}") + traceback.print_exc() + return [] + + +def load_image_model_wrapper(model_name, dtype, attn_backend, cpu_offload, compile_model, quant_method): + if not model_name or model_name == 'None': + yield "No model selected" + return + + try: + yield f"Loading `{model_name}`..." + unload_image_model() + + result = load_image_model( + model_name, + dtype=dtype, + attn_backend=attn_backend, + cpu_offload=cpu_offload, + compile_model=compile_model, + quant_method=quant_method + ) + + if result is not None: + shared.image_model_name = model_name + yield f"✓ Loaded **{model_name}** (quantization: {quant_method})" + else: + yield f"✗ Failed to load `{model_name}`" + except Exception: + yield f"Error:\n```\n{traceback.format_exc()}\n```" + + +def unload_image_model_wrapper(): + previous_name = shared.image_model_name + unload_image_model() + if previous_name != 'None': + return f"Model: **{previous_name}** (unloaded)" + return "No model loaded" + + +def download_image_model_wrapper(model_path): + from huggingface_hub import snapshot_download + + if not model_path: + yield "No model specified", gr.update() + return + + try: + model_path = model_path.strip() + if model_path.startswith('https://huggingface.co/'): + model_path = model_path[len('https://huggingface.co/'):] + elif model_path.startswith('huggingface.co/'): + model_path = model_path[len('huggingface.co/'):] + + if ':' in model_path: + model_id, branch = model_path.rsplit(':', 1) + else: + model_id, branch = model_path, 'main' + + folder_name = model_id.replace('/', '_') + output_folder = Path(shared.args.image_model_dir) / folder_name + + yield f"Downloading `{model_id}` (branch: {branch})...", gr.update() + + snapshot_download( + repo_id=model_id, + revision=branch, + local_dir=output_folder, + local_dir_use_symlinks=False, + ) + + new_choices = utils.get_available_image_models() + yield f"✓ Downloaded to `{output_folder}`", gr.update(choices=new_choices, value=folder_name) + except Exception: + yield f"Error:\n```\n{traceback.format_exc()}\n```", gr.update() diff --git a/modules/utils.py b/modules/utils.py index e8d23a02..b478f066 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -86,7 +86,7 @@ def check_model_loaded(): return True, None -def resolve_model_path(model_name_or_path): +def resolve_model_path(model_name_or_path, image_model=False): """ Resolves a model path, checking for a direct path before the default models directory. @@ -95,6 +95,8 @@ def resolve_model_path(model_name_or_path): path_candidate = Path(model_name_or_path) if path_candidate.exists(): return path_candidate + elif image_model: + return Path(f'{shared.args.image_model_dir}/{model_name_or_path}') else: return Path(f'{shared.args.model_dir}/{model_name_or_path}') @@ -153,6 +155,24 @@ def get_available_models(): return filtered_gguf_files + model_dirs +def get_available_image_models(): + model_dir = Path(shared.args.image_model_dir) + model_dir.mkdir(parents=True, exist_ok=True) + + # Find valid model directories + model_dirs = [] + for item in os.listdir(model_dir): + item_path = model_dir / item + if not item_path.is_dir(): + continue + + model_dirs.append(item) + + model_dirs = sorted(model_dirs, key=natural_keys) + + return model_dirs + + def get_available_ggufs(): model_list = [] model_dir = Path(shared.args.model_dir) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 4be87c56..7076df4f 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -11,6 +11,7 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* +optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 @@ -34,6 +35,9 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl +# Diffusers +diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 + # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index d9bf64cb..d6264089 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -9,6 +9,7 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* +optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 @@ -32,6 +33,9 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl +# Diffusers +diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 + # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 2c06c22f..985511fa 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -9,6 +9,7 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* +optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 @@ -32,6 +33,9 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl +# Diffusers +diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 + # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index a4929d40..1c75806d 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -9,6 +9,7 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* +optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 @@ -32,6 +33,9 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl +# Diffusers +diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 + # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 6a7ce8a6..0f536b6a 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -9,6 +9,7 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* +optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 @@ -32,6 +33,9 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl +# Diffusers +diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 + # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 05957700..0f8970d0 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -9,6 +9,7 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* +optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 @@ -32,6 +33,9 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl +# Diffusers +diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 + # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 9e43c409..e862c1a1 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -9,6 +9,7 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* +optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 @@ -32,6 +33,9 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl +# Diffusers +diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 + # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 9931e1dc..a734ce82 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -11,6 +11,7 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* +optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 @@ -34,6 +35,9 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl +# Diffusers +diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 + # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 818d2244..8561462c 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -9,6 +9,7 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* +optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 @@ -32,6 +33,9 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl +# Diffusers +diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 + # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 diff --git a/server.py b/server.py index c804c342..b02c50a2 100644 --- a/server.py +++ b/server.py @@ -5,6 +5,7 @@ from pathlib import Path from modules import shared from modules.block_requests import OpenMonkeyPatch, RequestBlocker +from modules.image_models import load_image_model from modules.logging_colors import logger from modules.prompts import load_prompt @@ -50,6 +51,7 @@ from modules import ( ui_chat, ui_default, ui_file_saving, + ui_image_generation, ui_model_menu, ui_notebook, ui_parameters, @@ -163,6 +165,7 @@ def create_interface(): ui_chat.create_character_settings_ui() # Character tab ui_model_menu.create_ui() # Model tab if not shared.args.portable: + ui_image_generation.create_ui() # Image generation tab training.create_ui() # Training tab ui_session.create_ui() # Session tab @@ -170,6 +173,8 @@ def create_interface(): ui_chat.create_event_handlers() ui_default.create_event_handlers() ui_notebook.create_event_handlers() + if not shared.args.portable: + ui_image_generation.create_event_handlers() # Other events ui_file_saving.create_event_handlers() @@ -256,6 +261,9 @@ if __name__ == "__main__": if new_settings: shared.settings.update(new_settings) + # Apply CLI overrides for image model settings (CLI flags take precedence over saved settings) + shared.apply_image_model_cli_overrides() + # Fallback settings for models shared.model_config['.*'] = get_fallback_settings() shared.model_config.move_to_end('.*', last=False) # Move to the beginning @@ -313,6 +321,22 @@ if __name__ == "__main__": if shared.args.lora: add_lora_to_model(shared.args.lora) + # Load image model if specified via CLI + if shared.args.image_model: + logger.info(f"Loading image model: {shared.args.image_model}") + result = load_image_model( + shared.args.image_model, + dtype=shared.settings.get('image_dtype', 'bfloat16'), + attn_backend=shared.settings.get('image_attn_backend', 'sdpa'), + cpu_offload=shared.settings.get('image_cpu_offload', False), + compile_model=shared.settings.get('image_compile', False), + quant_method=shared.settings.get('image_quant', 'none') + ) + if result is not None: + shared.image_model_name = shared.args.image_model + else: + logger.error(f"Failed to load image model: {shared.args.image_model}") + shared.generation_lock = Lock() if shared.args.idle_timeout > 0: From 6291e72129aa807768aa86390498613ee1408419 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 2 Dec 2025 09:57:04 -0800 Subject: [PATCH 04/61] Remove quanto for now (requires messy compilation) --- modules/image_models.py | 28 ++----------------- modules/shared.py | 2 +- modules/ui_image_generation.py | 2 +- requirements/full/requirements.txt | 1 - requirements/full/requirements_amd.txt | 1 - requirements/full/requirements_amd_noavx2.txt | 1 - .../full/requirements_apple_intel.txt | 1 - .../full/requirements_apple_silicon.txt | 1 - requirements/full/requirements_cpu_only.txt | 1 - .../full/requirements_cpu_only_noavx2.txt | 1 - requirements/full/requirements_noavx2.txt | 1 - requirements/full/requirements_nowheels.txt | 1 - 12 files changed, 4 insertions(+), 37 deletions(-) diff --git a/modules/image_models.py b/modules/image_models.py index e6f9a172..4d7cc5f9 100644 --- a/modules/image_models.py +++ b/modules/image_models.py @@ -11,7 +11,7 @@ def get_quantization_config(quant_method): Get the appropriate quantization config based on the selected method. Args: - quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit' + quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit' Returns: PipelineQuantizationConfig or None @@ -46,30 +46,6 @@ def get_quantization_config(quant_method): } ) - # Quanto 8-bit quantization - elif quant_method == 'quanto-8bit': - return PipelineQuantizationConfig( - quant_mapping={ - "transformer": QuantoConfig(weights_dtype="int8") - } - ) - - # Quanto 4-bit quantization - elif quant_method == 'quanto-4bit': - return PipelineQuantizationConfig( - quant_mapping={ - "transformer": QuantoConfig(weights_dtype="int4") - } - ) - - # Quanto 2-bit quantization - elif quant_method == 'quanto-2bit': - return PipelineQuantizationConfig( - quant_mapping={ - "transformer": QuantoConfig(weights_dtype="int2") - } - ) - else: logger.warning(f"Unknown quantization method: {quant_method}. Loading without quantization.") return None @@ -101,7 +77,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3' cpu_offload: Enable CPU offloading for low VRAM compile_model: Compile the model for faster inference (slow first run) - quant_method: Quantization method - 'none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit' + quant_method: Quantization method - 'none', 'bnb-8bit', 'bnb-4bit' """ import torch from diffusers import DiffusionPipeline diff --git a/modules/shared.py b/modules/shared.py index 316f7729..a96cd70c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -60,7 +60,7 @@ group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdp group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.') group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.') group.add_argument('--image-quant', type=str, default=None, - choices=['none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'], + choices=['none', 'bnb-8bit', 'bnb-4bit'], help='Quantization method for image model.') # Model loader diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 92c447c8..5ef66820 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -471,7 +471,7 @@ def create_ui(): with gr.Column(): shared.gradio['image_quant'] = gr.Dropdown( label='Quantization', - choices=['none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'], + choices=['none', 'bnb-8bit', 'bnb-4bit'], value=shared.settings['image_quant'], info='Quantization method for reduced VRAM usage. Quanto supports lower precisions (2-bit, 4-bit, 8-bit).' ) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 7076df4f..a031bbab 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -11,7 +11,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index d6264089..ee8d67ac 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 985511fa..7b36b151 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 1c75806d..4f72d5ac 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 0f536b6a..7942b9f0 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 0f8970d0..96013c35 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index e862c1a1..0813f06c 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index a734ce82..ea7edf6c 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -11,7 +11,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 8561462c..15247d72 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 From 9d07d3a229e3e949cfb32e9ef1fe32b328759985 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 2 Dec 2025 10:06:57 -0800 Subject: [PATCH 05/61] Make portable builds functional again after b3666e140de349a651aea22a6f418572925a5c62 --- modules/image_models.py | 3 +- modules/ui.py | 77 ++++++++++++++++++---------------- modules/ui_image_generation.py | 3 +- 3 files changed, 44 insertions(+), 39 deletions(-) diff --git a/modules/image_models.py b/modules/image_models.py index 4d7cc5f9..f2e0276a 100644 --- a/modules/image_models.py +++ b/modules/image_models.py @@ -2,7 +2,6 @@ import time import modules.shared as shared from modules.logging_colors import logger -from modules.torch_utils import get_device from modules.utils import resolve_model_path @@ -82,6 +81,8 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl import torch from diffusers import DiffusionPipeline + from modules.torch_utils import get_device + logger.info(f"Loading image model \"{model_name}\" with quantization: {quant_method}") t0 = time.time() diff --git a/modules/ui.py b/modules/ui.py index 9700d297..ff5686e8 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -280,25 +280,26 @@ def list_interface_input_elements(): 'include_past_attachments', ] - # Image generation elements - elements += [ - 'image_prompt', - 'image_neg_prompt', - 'image_width', - 'image_height', - 'image_aspect_ratio', - 'image_steps', - 'image_cfg_scale', - 'image_seed', - 'image_batch_size', - 'image_batch_count', - 'image_model_menu', - 'image_dtype', - 'image_attn_backend', - 'image_compile', - 'image_cpu_offload', - 'image_quant', - ] + if not shared.args.portable: + # Image generation elements + elements += [ + 'image_prompt', + 'image_neg_prompt', + 'image_width', + 'image_height', + 'image_aspect_ratio', + 'image_steps', + 'image_cfg_scale', + 'image_seed', + 'image_batch_size', + 'image_batch_count', + 'image_model_menu', + 'image_dtype', + 'image_attn_backend', + 'image_compile', + 'image_cpu_offload', + 'image_quant', + ] return elements @@ -531,25 +532,29 @@ def setup_auto_save(): 'paste_to_attachment', 'include_past_attachments', - # Image generation tab (ui_image_generation.py) - 'image_prompt', - 'image_neg_prompt', - 'image_width', - 'image_height', - 'image_aspect_ratio', - 'image_steps', - 'image_cfg_scale', - 'image_seed', - 'image_batch_size', - 'image_batch_count', - 'image_model_menu', - 'image_dtype', - 'image_attn_backend', - 'image_compile', - 'image_cpu_offload', - 'image_quant', ] + if not shared.args.portable: + # Image generation tab (ui_image_generation.py) + change_elements += [ + 'image_prompt', + 'image_neg_prompt', + 'image_width', + 'image_height', + 'image_aspect_ratio', + 'image_steps', + 'image_cfg_scale', + 'image_seed', + 'image_batch_size', + 'image_batch_count', + 'image_model_menu', + 'image_dtype', + 'image_attn_backend', + 'image_compile', + 'image_cpu_offload', + 'image_quant', + ] + for element_name in change_elements: if element_name in shared.gradio: shared.gradio[element_name].change( diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 5ef66820..4f90d352 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -18,7 +18,6 @@ from modules.image_models import ( ) from modules.logging_colors import logger from modules.text_generation import stop_everything_event -from modules.torch_utils import get_device from modules.utils import gradio ASPECT_RATIOS = { @@ -667,7 +666,7 @@ def generate(state): """ import torch - from modules.torch_utils import clear_torch_cache + from modules.torch_utils import clear_torch_cache, get_device clear_torch_cache() From 97281ff83120c20180840ce8db3be80a255c861e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 2 Dec 2025 11:20:52 -0800 Subject: [PATCH 06/61] UI: Fix an index error in the new image gallery --- modules/ui_image_generation.py | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 4f90d352..8ec879e9 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -290,8 +290,11 @@ def on_gallery_select(evt: gr.SelectData, current_page): if evt.index is None: return "", "Select an image to view its settings" + if not _image_cache: + get_all_history_images() + # Get the current page's images to find the actual file path - all_images = get_all_history_images() + all_images = _image_cache total_images = len(all_images) # Calculate the actual index in the full list @@ -506,9 +509,7 @@ def create_ui(): info="Enter HuggingFace path. Use : for branch, e.g. user/model:main" ) shared.gradio['image_download_btn'] = gr.Button("Download", variant='primary') - shared.gradio['image_model_status'] = gr.Markdown( - value=f"Model: **{shared.settings['image_model_menu']}** (not loaded)" if shared.settings['image_model_menu'] != 'None' else "No model selected" - ) + shared.gradio['image_model_status'] = gr.Markdown(value="") def create_event_handlers(): From 9448bf1caa8ad5bd704ae2610bc0b2738c9ca51f Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 2 Dec 2025 14:22:51 -0800 Subject: [PATCH 07/61] Image generation: add torchao quantization (supports torch.compile) --- modules/image_models.py | 31 +++++++++++++++++-- modules/shared.py | 2 +- modules/ui_image_generation.py | 4 +-- requirements/full/requirements.txt | 1 + requirements/full/requirements_amd.txt | 1 + requirements/full/requirements_amd_noavx2.txt | 1 + .../full/requirements_apple_intel.txt | 1 + .../full/requirements_apple_silicon.txt | 1 + requirements/full/requirements_cpu_only.txt | 1 + .../full/requirements_cpu_only_noavx2.txt | 1 + requirements/full/requirements_noavx2.txt | 1 + requirements/full/requirements_nowheels.txt | 1 + 12 files changed, 40 insertions(+), 6 deletions(-) diff --git a/modules/image_models.py b/modules/image_models.py index f2e0276a..2ef1e730 100644 --- a/modules/image_models.py +++ b/modules/image_models.py @@ -10,13 +10,14 @@ def get_quantization_config(quant_method): Get the appropriate quantization config based on the selected method. Args: - quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit' + quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit', + 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo' Returns: PipelineQuantizationConfig or None """ import torch - from diffusers import BitsAndBytesConfig, QuantoConfig + from diffusers import BitsAndBytesConfig, TorchAoConfig from diffusers.quantizers import PipelineQuantizationConfig if quant_method == 'none' or not quant_method: @@ -45,6 +46,30 @@ def get_quantization_config(quant_method): } ) + # torchao int8 weight-only + elif quant_method == 'torchao-int8wo': + return PipelineQuantizationConfig( + quant_mapping={ + "transformer": TorchAoConfig("int8wo") + } + ) + + # torchao fp4 (e2m1) + elif quant_method == 'torchao-fp4': + return PipelineQuantizationConfig( + quant_mapping={ + "transformer": TorchAoConfig("fp4_e2m1") + } + ) + + # torchao float8 weight-only + elif quant_method == 'torchao-float8wo': + return PipelineQuantizationConfig( + quant_mapping={ + "transformer": TorchAoConfig("float8wo") + } + ) + else: logger.warning(f"Unknown quantization method: {quant_method}. Loading without quantization.") return None @@ -76,7 +101,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3' cpu_offload: Enable CPU offloading for low VRAM compile_model: Compile the model for faster inference (slow first run) - quant_method: Quantization method - 'none', 'bnb-8bit', 'bnb-4bit' + quant_method: 'none', 'bnb-8bit', 'bnb-4bit', or torchao options (int8wo, fp4, float8wo) """ import torch from diffusers import DiffusionPipeline diff --git a/modules/shared.py b/modules/shared.py index a96cd70c..4a0fd986 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -60,7 +60,7 @@ group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdp group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.') group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.') group.add_argument('--image-quant', type=str, default=None, - choices=['none', 'bnb-8bit', 'bnb-4bit'], + choices=['none', 'bnb-8bit', 'bnb-4bit', 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'], help='Quantization method for image model.') # Model loader diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 8ec879e9..1b0e58f3 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -473,9 +473,9 @@ def create_ui(): with gr.Column(): shared.gradio['image_quant'] = gr.Dropdown( label='Quantization', - choices=['none', 'bnb-8bit', 'bnb-4bit'], + choices=['none', 'bnb-8bit', 'bnb-4bit', 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'], value=shared.settings['image_quant'], - info='Quantization method for reduced VRAM usage. Quanto supports lower precisions (2-bit, 4-bit, 8-bit).' + info='BnB: bitsandbytes quantization. torchao: int8wo, fp4, float8wo.' ) shared.gradio['image_dtype'] = gr.Dropdown( diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index a031bbab..4e02d76f 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -25,6 +25,7 @@ safetensors==0.6.* scipy sentencepiece tensorboard +torchao==0.14.* transformers==4.57.* triton-windows==3.5.1.post21; platform_system == "Windows" tqdm diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index ee8d67ac..c0fbd9ab 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -23,6 +23,7 @@ safetensors==0.6.* scipy sentencepiece tensorboard +torchao==0.14.* transformers==4.57.* triton-windows==3.5.1.post21; platform_system == "Windows" tqdm diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 7b36b151..b330646a 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -23,6 +23,7 @@ safetensors==0.6.* scipy sentencepiece tensorboard +torchao==0.14.* transformers==4.57.* triton-windows==3.5.1.post21; platform_system == "Windows" tqdm diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 4f72d5ac..c2c64337 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -23,6 +23,7 @@ safetensors==0.6.* scipy sentencepiece tensorboard +torchao==0.14.* transformers==4.57.* triton-windows==3.5.1.post21; platform_system == "Windows" tqdm diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 7942b9f0..10889215 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -23,6 +23,7 @@ safetensors==0.6.* scipy sentencepiece tensorboard +torchao==0.14.* transformers==4.57.* triton-windows==3.5.1.post21; platform_system == "Windows" tqdm diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 96013c35..ffcd6473 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -23,6 +23,7 @@ safetensors==0.6.* scipy sentencepiece tensorboard +torchao==0.14.* transformers==4.57.* triton-windows==3.5.1.post21; platform_system == "Windows" tqdm diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 0813f06c..daa2444f 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -23,6 +23,7 @@ safetensors==0.6.* scipy sentencepiece tensorboard +torchao==0.14.* transformers==4.57.* triton-windows==3.5.1.post21; platform_system == "Windows" tqdm diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index ea7edf6c..a43a7724 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -25,6 +25,7 @@ safetensors==0.6.* scipy sentencepiece tensorboard +torchao==0.14.* transformers==4.57.* triton-windows==3.5.1.post21; platform_system == "Windows" tqdm diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 15247d72..ac24f0cb 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -23,6 +23,7 @@ safetensors==0.6.* scipy sentencepiece tensorboard +torchao==0.14.* transformers==4.57.* triton-windows==3.5.1.post21; platform_system == "Windows" tqdm From 5433ef33331b3b24da9d58b73287398457459de1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:50:35 -0800 Subject: [PATCH 08/61] Add an API endpoint for generating images --- extensions/openai/images.py | 184 ++++++++++++++++++++++++++---------- extensions/openai/script.py | 48 +++++----- extensions/openai/typing.py | 40 ++++++++ 3 files changed, 200 insertions(+), 72 deletions(-) diff --git a/extensions/openai/images.py b/extensions/openai/images.py index 92bd85f0..3a5288e6 100644 --- a/extensions/openai/images.py +++ b/extensions/openai/images.py @@ -1,70 +1,154 @@ +""" +OpenAI-compatible image generation using local diffusion models. +""" + +import base64 +import io +import json import os import time +from datetime import datetime -import requests - +import numpy as np from extensions.openai.errors import ServiceUnavailableError +from modules import shared +from modules.logging_colors import logger +from PIL.PngImagePlugin import PngInfo -def generations(prompt: str, size: str, response_format: str, n: int): - # Stable Diffusion callout wrapper for txt2img - # Low effort implementation for compatibility. With only "prompt" being passed and assuming DALL-E - # the results will be limited and likely poor. SD has hundreds of models and dozens of settings. - # If you want high quality tailored results you should just use the Stable Diffusion API directly. - # it's too general an API to try and shape the result with specific tags like negative prompts - # or "masterpiece", etc. SD configuration is beyond the scope of this API. - # At this point I will not add the edits and variations endpoints (ie. img2img) because they - # require changing the form data handling to accept multipart form data, also to properly support - # url return types will require file management and a web serving files... Perhaps later! - base_model_size = 512 if 'SD_BASE_MODEL_SIZE' not in os.environ else int(os.environ.get('SD_BASE_MODEL_SIZE', 512)) - sd_defaults = { - 'sampler_name': 'DPM++ 2M Karras', # vast improvement - 'steps': 30, +def generations(prompt: str, size: str, response_format: str, n: int, + negative_prompt: str = "", steps: int = 9, seed: int = -1, + cfg_scale: float = 0.0, batch_count: int = 1): + """ + Generate images using the loaded diffusion model. + + Args: + prompt: Text description of the desired image + size: Image dimensions as "WIDTHxHEIGHT" + response_format: 'url' or 'b64_json' + n: Number of images per batch + negative_prompt: What to avoid in the image + steps: Number of inference steps + seed: Random seed (-1 for random) + cfg_scale: Classifier-free guidance scale + batch_count: Number of sequential batches + + Returns: + dict with 'created' timestamp and 'data' list of images + """ + import torch + from modules.image_models import get_pipeline_type + from modules.torch_utils import clear_torch_cache, get_device + + if shared.image_model is None: + raise ServiceUnavailableError("No image model loaded. Load a model via the UI first.") + + clear_torch_cache() + + # Parse dimensions + try: + width, height = [int(x) for x in size.split('x')] + except (ValueError, IndexError): + width, height = 1024, 1024 + + # Handle seed + if seed == -1: + seed = np.random.randint(0, 2**32 - 1) + + device = get_device() or "cpu" + generator = torch.Generator(device).manual_seed(int(seed)) + + # Get pipeline type for CFG parameter name + pipeline_type = getattr(shared, 'image_pipeline_type', None) or get_pipeline_type(shared.image_model) + + # Build generation kwargs + gen_kwargs = { + "prompt": prompt, + "negative_prompt": negative_prompt, + "height": height, + "width": width, + "num_inference_steps": steps, + "num_images_per_prompt": n, + "generator": generator, } - width, height = [int(x) for x in size.split('x')] # ignore the restrictions on size + # Pipeline-specific CFG parameter + if pipeline_type == 'qwenimage': + gen_kwargs["true_cfg_scale"] = cfg_scale + else: + gen_kwargs["guidance_scale"] = cfg_scale - # to hack on better generation, edit default payload. - payload = { - 'prompt': prompt, # ignore prompt limit of 1000 characters - 'width': width, - 'height': height, - 'batch_size': n, - } - payload.update(sd_defaults) + # Generate + all_images = [] + t0 = time.time() - scale = min(width, height) / base_model_size - if scale >= 1.2: - # for better performance with the default size (1024), and larger res. - scaler = { - 'width': width // scale, - 'height': height // scale, - 'hr_scale': scale, - 'enable_hr': True, - 'hr_upscaler': 'Latent', - 'denoising_strength': 0.68, - } - payload.update(scaler) + shared.stop_everything = False + def interrupt_callback(pipe, step_index, timestep, callback_kwargs): + if shared.stop_everything: + pipe._interrupt = True + return callback_kwargs + + gen_kwargs["callback_on_step_end"] = interrupt_callback + + for i in range(batch_count): + if shared.stop_everything: + break + generator.manual_seed(int(seed + i)) + batch_results = shared.image_model(**gen_kwargs).images + all_images.extend(batch_results) + + t1 = time.time() + total_images = len(all_images) + total_steps = steps * batch_count + logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})') + + # Save images + _save_images(all_images, prompt, negative_prompt, width, height, steps, seed, cfg_scale) + + # Build response resp = { 'created': int(time.time()), 'data': [] } - from extensions.openai.script import params - # TODO: support SD_WEBUI_AUTH username:password pair. - sd_url = f"{os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', ''))}/sdapi/v1/txt2img" - - response = requests.post(url=sd_url, json=payload) - r = response.json() - if response.status_code != 200 or 'images' not in r: - print(r) - raise ServiceUnavailableError(r.get('error', 'Unknown error calling Stable Diffusion'), code=response.status_code, internal_message=r.get('errors', None)) - # r['parameters']... - for b64_json in r['images']: + for img in all_images: + b64 = _image_to_base64(img) if response_format == 'b64_json': - resp['data'].extend([{'b64_json': b64_json}]) + resp['data'].append({'b64_json': b64}) else: - resp['data'].extend([{'url': f'data:image/png;base64,{b64_json}'}]) # yeah it's lazy. requests.get() will not work with this + resp['data'].append({'url': f'data:image/png;base64,{b64}'}) return resp + + +def _image_to_base64(image) -> str: + buffered = io.BytesIO() + image.save(buffered, format="PNG") + return base64.b64encode(buffered.getvalue()).decode('utf-8') + + +def _save_images(images, prompt, negative_prompt, width, height, steps, seed, cfg_scale): + """Save images with metadata.""" + date_str = datetime.now().strftime("%Y-%m-%d") + folder = os.path.join("user_data", "image_outputs", date_str) + os.makedirs(folder, exist_ok=True) + + metadata = { + 'image_prompt': prompt, + 'image_neg_prompt': negative_prompt, + 'image_width': width, + 'image_height': height, + 'image_steps': steps, + 'image_seed': seed, + 'image_cfg_scale': cfg_scale, + 'model': getattr(shared, 'image_model_name', 'unknown'), + } + + for idx, img in enumerate(images): + ts = datetime.now().strftime("%H-%M-%S") + filepath = os.path.join(folder, f"{ts}_{seed:010d}_{idx:03d}.png") + + png_info = PngInfo() + png_info.add_text("image_gen_settings", json.dumps(metadata)) + img.save(filepath, pnginfo=png_info) diff --git a/extensions/openai/script.py b/extensions/openai/script.py index 9440cb1e..1e982731 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -7,26 +7,23 @@ import traceback from collections import deque from threading import Thread +import extensions.openai.completions as OAIcompletions +import extensions.openai.logits as OAIlogits +import extensions.openai.models as OAImodels import uvicorn +from extensions.openai.tokens import token_count, token_decode, token_encode +from extensions.openai.utils import _start_cloudflared from fastapi import Depends, FastAPI, Header, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.requests import Request from fastapi.responses import JSONResponse -from pydub import AudioSegment -from sse_starlette import EventSourceResponse -from starlette.concurrency import iterate_in_threadpool - -import extensions.openai.completions as OAIcompletions -import extensions.openai.images as OAIimages -import extensions.openai.logits as OAIlogits -import extensions.openai.models as OAImodels -from extensions.openai.errors import ServiceUnavailableError -from extensions.openai.tokens import token_count, token_decode, token_encode -from extensions.openai.utils import _start_cloudflared from modules import shared from modules.logging_colors import logger from modules.models import unload_model from modules.text_generation import stop_everything_event +from pydub import AudioSegment +from sse_starlette import EventSourceResponse +from starlette.concurrency import iterate_in_threadpool from .typing import ( ChatCompletionRequest, @@ -40,6 +37,8 @@ from .typing import ( EmbeddingsResponse, EncodeRequest, EncodeResponse, + ImageGenerationRequest, + ImageGenerationResponse, LoadLorasRequest, LoadModelRequest, LogitsRequest, @@ -228,19 +227,24 @@ async def handle_audio_transcription(request: Request): return JSONResponse(content=transcription) -@app.post('/v1/images/generations', dependencies=check_key) -async def handle_image_generation(request: Request): +@app.post('/v1/images/generations', response_model=ImageGenerationResponse, dependencies=check_key) +async def handle_image_generation(request_data: ImageGenerationRequest): + import extensions.openai.images as OAIimages - if not os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', '')): - raise ServiceUnavailableError("Stable Diffusion not available. SD_WEBUI_URL not set.") + width, height = request_data.get_width_height() - body = await request.json() - prompt = body['prompt'] - size = body.get('size', '1024x1024') - response_format = body.get('response_format', 'url') # or b64_json - n = body.get('n', 1) # ignore the batch limits of max 10 - - response = await OAIimages.generations(prompt=prompt, size=size, response_format=response_format, n=n) + response = await asyncio.to_thread( + OAIimages.generations, + prompt=request_data.prompt, + size=f"{width}x{height}", + response_format=request_data.response_format, + n=request_data.batch_size, # <-- use resolved batch_size + negative_prompt=request_data.negative_prompt, + steps=request_data.steps, + seed=request_data.seed, + cfg_scale=request_data.cfg_scale, + batch_count=request_data.batch_count, + ) return JSONResponse(response) diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index 56d91582..a24b844b 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -264,6 +264,46 @@ class LoadLorasRequest(BaseModel): lora_names: List[str] +class ImageGenerationRequest(BaseModel): + """OpenAI-compatible image generation request with extended parameters.""" + # Required + prompt: str + + # Generation parameters + negative_prompt: str = "" + size: str = Field(default="1024x1024", description="'WIDTHxHEIGHT'") + steps: int = Field(default=9, ge=1) + cfg_scale: float = Field(default=0.0, ge=0.0) + seed: int = Field(default=-1, description="-1 for random") + batch_size: int | None = Field(default=None, ge=1, description="Parallel batch size (VRAM heavy)") + n: int = Field(default=1, ge=1, description="Alias for batch_size (OpenAI compatibility)") + batch_count: int = Field(default=1, ge=1, description="Sequential batch count") + + # OpenAI compatibility (unused) + model: str | None = None + response_format: str = "b64_json" + user: str | None = None + + @model_validator(mode='after') + def resolve_batch_size(self): + """Use batch_size if provided, otherwise fall back to n.""" + if self.batch_size is None: + self.batch_size = self.n + return self + + def get_width_height(self) -> tuple[int, int]: + try: + parts = self.size.lower().split('x') + return int(parts[0]), int(parts[1]) + except (ValueError, IndexError): + return 1024, 1024 + + +class ImageGenerationResponse(BaseModel): + created: int = int(time.time()) + data: List[dict] + + def to_json(obj): return json.dumps(obj.__dict__, indent=4) From 5ad174fad2afbbe2c06e841c87e06515a33093ed Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Dec 2025 11:58:54 -0800 Subject: [PATCH 09/61] docs: Add an image generation API example --- docs/12 - OpenAI API.md | 31 +++++++++++++++++++++++++++++-- 1 file changed, 29 insertions(+), 2 deletions(-) diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md index 227541a3..94a95b10 100644 --- a/docs/12 - OpenAI API.md +++ b/docs/12 - OpenAI API.md @@ -139,6 +139,35 @@ curl http://127.0.0.1:5000/v1/completions \ For base64-encoded images, just replace the inner "url" values with this format: `data:image/FORMAT;base64,BASE64_STRING` where FORMAT is the file type (png, jpeg, gif, etc.) and BASE64_STRING is your base64-encoded image data. +#### Image generation + +```shell +curl http://127.0.0.1:5000/v1/images/generations \ + -H "Content-Type: application/json" \ + -d '{ + "prompt": "an orange tree", + "steps": 9, + "cfg_scale": 0, + "batch_size": 1, + "batch_count": 1 + }' +``` + +You need to load an image model first. You can do this via the UI, or by adding `--image-model your_model_name` when launching the server. + +The output is a JSON object containing a `data` array. Each element has a `b64_json` field with the base64-encoded PNG image: + +```json +{ + "created": 1764791227, + "data": [ + { + "b64_json": "iVBORw0KGgo..." + } + ] +} +``` + #### SSE streaming ```shell @@ -419,7 +448,6 @@ The following environment variables can be used (they take precedence over every | `OPENEDAI_CERT_PATH` | SSL certificate file path | cert.pem | | `OPENEDAI_KEY_PATH` | SSL key file path | key.pem | | `OPENEDAI_DEBUG` | Enable debugging (set to 1) | 1 | -| `SD_WEBUI_URL` | WebUI URL (used by endpoint) | http://127.0.0.1:7861 | | `OPENEDAI_EMBEDDING_MODEL` | Embedding model (if applicable) | sentence-transformers/all-mpnet-base-v2 | | `OPENEDAI_EMBEDDING_DEVICE` | Embedding device (if applicable) | cuda | @@ -430,7 +458,6 @@ You can also set the following variables in your `settings.yaml` file: ``` openai-embedding_device: cuda openai-embedding_model: "sentence-transformers/all-mpnet-base-v2" -openai-sd_webui_url: http://127.0.0.1:7861 openai-debug: 1 ``` From 4468c49439685dc8bc68e9d7a6109694a2eab72b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Dec 2025 12:02:47 -0800 Subject: [PATCH 10/61] Add semaphore to image generation API endpoint --- extensions/openai/script.py | 31 ++++++++++++++++--------------- 1 file changed, 16 insertions(+), 15 deletions(-) diff --git a/extensions/openai/script.py b/extensions/openai/script.py index 1e982731..65805629 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -53,12 +53,12 @@ from .typing import ( params = { 'embedding_device': 'cpu', 'embedding_model': 'sentence-transformers/all-mpnet-base-v2', - 'sd_webui_url': '', 'debug': 0 } streaming_semaphore = asyncio.Semaphore(1) +image_generation_semaphore = asyncio.Semaphore(1) def verify_api_key(authorization: str = Header(None)) -> None: @@ -231,21 +231,22 @@ async def handle_audio_transcription(request: Request): async def handle_image_generation(request_data: ImageGenerationRequest): import extensions.openai.images as OAIimages - width, height = request_data.get_width_height() + async with image_generation_semaphore: + width, height = request_data.get_width_height() - response = await asyncio.to_thread( - OAIimages.generations, - prompt=request_data.prompt, - size=f"{width}x{height}", - response_format=request_data.response_format, - n=request_data.batch_size, # <-- use resolved batch_size - negative_prompt=request_data.negative_prompt, - steps=request_data.steps, - seed=request_data.seed, - cfg_scale=request_data.cfg_scale, - batch_count=request_data.batch_count, - ) - return JSONResponse(response) + response = await asyncio.to_thread( + OAIimages.generations, + prompt=request_data.prompt, + size=f"{width}x{height}", + response_format=request_data.response_format, + n=request_data.batch_size, # <-- use resolved batch_size + negative_prompt=request_data.negative_prompt, + steps=request_data.steps, + seed=request_data.seed, + cfg_scale=request_data.cfg_scale, + batch_count=request_data.batch_count, + ) + return JSONResponse(response) @app.post("/v1/embeddings", response_model=EmbeddingsResponse, dependencies=check_key) From 906dc549697478be5b8816e12b7dc94fd34133b0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Dec 2025 12:15:38 -0800 Subject: [PATCH 11/61] Load `--image-model` before `--model` --- server.py | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) diff --git a/server.py b/server.py index b02c50a2..0c5d14ce 100644 --- a/server.py +++ b/server.py @@ -275,6 +275,22 @@ if __name__ == "__main__": if extension not in shared.args.extensions: shared.args.extensions.append(extension) + # Load image model if specified via CLI + if shared.args.image_model: + logger.info(f"Loading image model: {shared.args.image_model}") + result = load_image_model( + shared.args.image_model, + dtype=shared.settings.get('image_dtype', 'bfloat16'), + attn_backend=shared.settings.get('image_attn_backend', 'sdpa'), + cpu_offload=shared.settings.get('image_cpu_offload', False), + compile_model=shared.settings.get('image_compile', False), + quant_method=shared.settings.get('image_quant', 'none') + ) + if result is not None: + shared.image_model_name = shared.args.image_model + else: + logger.error(f"Failed to load image model: {shared.args.image_model}") + available_models = utils.get_available_models() # Model defined through --model @@ -321,22 +337,6 @@ if __name__ == "__main__": if shared.args.lora: add_lora_to_model(shared.args.lora) - # Load image model if specified via CLI - if shared.args.image_model: - logger.info(f"Loading image model: {shared.args.image_model}") - result = load_image_model( - shared.args.image_model, - dtype=shared.settings.get('image_dtype', 'bfloat16'), - attn_backend=shared.settings.get('image_attn_backend', 'sdpa'), - cpu_offload=shared.settings.get('image_cpu_offload', False), - compile_model=shared.settings.get('image_compile', False), - quant_method=shared.settings.get('image_quant', 'none') - ) - if result is not None: - shared.image_model_name = shared.args.image_model - else: - logger.error(f"Failed to load image model: {shared.args.image_model}") - shared.generation_lock = Lock() if shared.args.idle_timeout > 0: From 373baa5c9cb46b629b5f7362986ac3ceda97bc08 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Dec 2025 14:43:56 -0800 Subject: [PATCH 12/61] UI: Minor image gallery improvements --- modules/ui_image_generation.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 1b0e58f3..2ea34551 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -29,7 +29,7 @@ ASPECT_RATIOS = { } STEP = 16 -IMAGES_PER_PAGE = 64 +IMAGES_PER_PAGE = 32 # Settings keys to save in PNG metadata (Generate tab only) METADATA_SETTINGS_KEYS = [ @@ -172,7 +172,7 @@ def format_metadata_for_display(metadata): if not metadata: return "No generation settings found in this image." - lines = ["**Generation Settings**", ""] + lines = [] # Display in a nice order display_order = [ @@ -441,7 +441,7 @@ def create_ui(): ) with gr.Column(scale=1): - gr.Markdown("### Selected Image") + gr.Markdown("### Generation Settings") shared.gradio['image_settings_display'] = gr.Markdown("Select an image to view its settings") shared.gradio['image_send_to_generate'] = gr.Button("Send to Generate", variant="primary") shared.gradio['image_gallery_status'] = gr.Markdown("") From 59285d501d505e0e1939861baea161870c4ed4c6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Dec 2025 16:03:31 -0800 Subject: [PATCH 13/61] Image generation: Small UI improvements --- js/switch_tabs.js | 14 ++++++++++++++ modules/ui_image_generation.py | 5 +++-- 2 files changed, 17 insertions(+), 2 deletions(-) diff --git a/js/switch_tabs.js b/js/switch_tabs.js index 7fb78aea..36e5736b 100644 --- a/js/switch_tabs.js +++ b/js/switch_tabs.js @@ -36,3 +36,17 @@ function switch_to_character() { document.getElementById("character-tab-button").click(); scrollToTop(); } + +function switch_to_image_ai_generate() { + const container = document.querySelector("#image-ai-tab"); + const buttons = container.getElementsByTagName("button"); + + for (let i = 0; i < buttons.length; i++) { + if (buttons[i].textContent.trim() === "Generate") { + buttons[i].click(); + break; + } + } + + scrollToTop(); +} diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 2ea34551..3a64bc8b 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -418,9 +418,9 @@ def create_ui(): # Pagination controls with gr.Row(): shared.gradio['image_refresh_history'] = gr.Button("🔄 Refresh", elem_classes="refresh-button") - shared.gradio['image_prev_page'] = gr.Button("◀ Prev", elem_classes="refresh-button") + shared.gradio['image_prev_page'] = gr.Button("◀ Prev Page", elem_classes="refresh-button") shared.gradio['image_page_info'] = gr.Markdown(value=get_initial_page_info, elem_id="image-page-info") - shared.gradio['image_next_page'] = gr.Button("Next ▶", elem_classes="refresh-button") + shared.gradio['image_next_page'] = gr.Button("Next Page ▶", elem_classes="refresh-button") shared.gradio['image_page_input'] = gr.Number(value=1, label="Page", precision=0, minimum=1, scale=0, min_width=80) shared.gradio['image_go_to_page'] = gr.Button("Go", elem_classes="refresh-button", scale=0, min_width=50) @@ -649,6 +649,7 @@ def create_event_handlers(): 'image_cfg_scale', 'image_gallery_status' ), + js=f'() => {{{ui.switch_tabs_js}; switch_to_image_ai_generate()}}', show_progress=False ) From 49c60882bf8f4806d0f47e890d7f434007941796 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Dec 2025 16:07:51 -0800 Subject: [PATCH 14/61] Image generation: Safer image uploading --- modules/chat.py | 16 +--------------- modules/image_utils.py | 20 ++++++++++++++++---- modules/ui_image_generation.py | 4 ++-- 3 files changed, 19 insertions(+), 21 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 55984d7a..acfc2f66 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -3,7 +3,6 @@ import copy import functools import html import json -import os import pprint import re import shutil @@ -26,6 +25,7 @@ from modules.html_generator import ( convert_to_markdown, make_thumbnail ) +from modules.image_utils import open_image_safely from modules.logging_colors import logger from modules.text_generation import ( generate_reply, @@ -1516,20 +1516,6 @@ def load_instruction_template_memoized(template): return load_instruction_template(template) -def open_image_safely(path): - if path is None or not isinstance(path, str) or not Path(path).exists(): - return None - - if os.path.islink(path): - return None - - try: - return Image.open(path) - except Exception as e: - logger.error(f"Failed to open image file: {path}. Reason: {e}") - return None - - def upload_character(file, img_path, tavern=False): img = open_image_safely(img_path) decoded_file = file if isinstance(file, str) else file.decode('utf-8') diff --git a/modules/image_utils.py b/modules/image_utils.py index 658f00d7..d2809fef 100644 --- a/modules/image_utils.py +++ b/modules/image_utils.py @@ -1,9 +1,7 @@ -""" -Shared image processing utilities for multimodal support. -Used by both ExLlamaV3 and llama.cpp implementations. -""" import base64 import io +import os +from pathlib import Path from typing import Any, List, Tuple from PIL import Image @@ -11,6 +9,20 @@ from PIL import Image from modules.logging_colors import logger +def open_image_safely(path): + if path is None or not isinstance(path, str) or not Path(path).exists(): + return None + + if os.path.islink(path): + return None + + try: + return Image.open(path) + except Exception as e: + logger.error(f"Failed to open image file: {path}. Reason: {e}") + return None + + def convert_pil_to_base64(image: Image.Image) -> str: """Converts a PIL Image to a base64 encoded string.""" buffered = io.BytesIO() diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 3a64bc8b..06ef3e82 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -7,7 +7,6 @@ from pathlib import Path import gradio as gr import numpy as np -from PIL import Image from PIL.PngImagePlugin import PngInfo from modules import shared, ui, utils @@ -16,6 +15,7 @@ from modules.image_models import ( load_image_model, unload_image_model ) +from modules.image_utils import open_image_safely from modules.logging_colors import logger from modules.text_generation import stop_everything_event from modules.utils import gradio @@ -159,7 +159,7 @@ def save_generated_images(images, state, actual_seed): def read_image_metadata(image_path): """Read generation metadata from PNG file.""" try: - with Image.open(image_path) as img: + with open_image_safely(image_path) as img: if hasattr(img, 'text') and 'image_gen_settings' in img.text: return json.loads(img.text['image_gen_settings']) except Exception as e: From fbca54957eec4ca49c73eac56159fa92603724b7 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Dec 2025 16:13:07 -0800 Subject: [PATCH 15/61] Image generation: Yield partial results for batch count > 1 --- modules/ui_image_generation.py | 11 +++++++---- 1 file changed, 7 insertions(+), 4 deletions(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 06ef3e82..08cf3f64 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -677,7 +677,8 @@ def generate(state): if not model_name or model_name == 'None': logger.error("No image model selected. Go to the Model tab and select a model.") - return [] + yield [] + return if shared.image_model is None: result = load_image_model( @@ -690,7 +691,8 @@ def generate(state): ) if result is None: logger.error(f"Failed to load model `{model_name}`.") - return [] + yield [] + return shared.image_model_name = model_name @@ -760,6 +762,7 @@ def generate(state): generator.manual_seed(int(seed + i)) batch_results = shared.image_model(**gen_kwargs).images all_images.extend(batch_results) + yield all_images t1 = time.time() save_generated_images(all_images, state, seed) @@ -768,12 +771,12 @@ def generate(state): total_steps = state["image_steps"] * int(state['image_batch_count']) logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})') - return all_images + yield all_images except Exception as e: logger.error(f"Image generation failed: {e}") traceback.print_exc() - return [] + yield [] def load_image_model_wrapper(model_name, dtype, attn_backend, cpu_offload, compile_model, quant_method): From c93d27add3b15c0494d436f1a3361c0b4862c626 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Dec 2025 18:29:43 -0800 Subject: [PATCH 16/61] Update llama.cpp --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 4 ++-- requirements/full/requirements_apple_silicon.txt | 4 ++-- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/full/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_amd.txt | 4 ++-- requirements/portable/requirements_amd_noavx2.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 4 ++-- requirements/portable/requirements_apple_silicon.txt | 4 ++-- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/portable/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- requirements/portable/requirements_vulkan_noavx2.txt | 4 ++-- 18 files changed, 36 insertions(+), 36 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 4e02d76f..c1f87988 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -44,8 +44,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index c0fbd9ab..512231e0 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -42,7 +42,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index b330646a..1ccc507c 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -42,7 +42,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index c2c64337..4d8aa771 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 10889215..5290aa71 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index ffcd6473..d9c76a31 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index daa2444f..1a796c21 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index a43a7724..8105abaa 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -44,8 +44,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index 5f9b69e8..0b944d48 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt index 10a6b5af..9b43c901 100644 --- a/requirements/portable/requirements_amd.txt +++ b/requirements/portable/requirements_amd.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_amd_noavx2.txt b/requirements/portable/requirements_amd_noavx2.txt index 4686b9de..825e6dec 100644 --- a/requirements/portable/requirements_amd_noavx2.txt +++ b/requirements/portable/requirements_amd_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 890568f4..864b6775 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index e480db8f..03e090cf 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index c2a7d040..7dd851e8 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index e243d4be..63949d9f 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 3db163b0..34652264 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 7758131c..28849ff7 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Vulkan wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index dcee8d05..365a798e 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From c357eed4c73099343418f5340dc1c28013cb7486 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Dec 2025 18:40:34 -0800 Subject: [PATCH 17/61] Image: Remove the flash_attention_3 option (no idea how to get it working) --- modules/image_models.py | 4 +--- modules/shared.py | 2 +- modules/ui_image_generation.py | 2 +- 3 files changed, 3 insertions(+), 5 deletions(-) diff --git a/modules/image_models.py b/modules/image_models.py index 2ef1e730..8363533b 100644 --- a/modules/image_models.py +++ b/modules/image_models.py @@ -98,7 +98,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl Args: model_name: Name of the model directory dtype: 'bfloat16' or 'float16' - attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3' + attn_backend: 'sdpa' or 'flash_attention_2' cpu_offload: Enable CPU offloading for low VRAM compile_model: Compile the model for faster inference (slow first run) quant_method: 'none', 'bnb-8bit', 'bnb-4bit', or torchao options (int8wo, fp4, float8wo) @@ -145,8 +145,6 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'set_attention_backend'): if attn_backend == 'flash_attention_2': pipe.transformer.set_attention_backend("flash") - elif attn_backend == 'flash_attention_3': - pipe.transformer.set_attention_backend("_flash_3") # sdpa is the default, no action needed if compile_model: diff --git a/modules/shared.py b/modules/shared.py index 4a0fd986..4e17497b 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -56,7 +56,7 @@ group = parser.add_argument_group('Image model') group.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).') group.add_argument('--image-model-dir', type=str, default='user_data/image_models', help='Path to directory with all the image models.') group.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.') -group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], help='Attention backend for image model.') +group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2'], help='Attention backend for image model.') group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.') group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.') group.add_argument('--image-quant', type=str, default=None, diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 08cf3f64..fdf1af86 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -485,7 +485,7 @@ def create_ui(): info='bfloat16 recommended for modern GPUs' ) shared.gradio['image_attn_backend'] = gr.Dropdown( - choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], + choices=['sdpa', 'flash_attention_2'], value=shared.settings['image_attn_backend'], label='Attention Backend', info='SDPA is default. Flash Attention requires compatible GPU.' From 235b94f097d7efa37f3f33de6b1787ce5673b742 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 3 Dec 2025 18:43:30 -0800 Subject: [PATCH 18/61] Image: Add placeholder file for user_data/image_models --- user_data/image_models/place-your-models-here.txt | 0 1 file changed, 0 insertions(+), 0 deletions(-) create mode 100644 user_data/image_models/place-your-models-here.txt diff --git a/user_data/image_models/place-your-models-here.txt b/user_data/image_models/place-your-models-here.txt new file mode 100644 index 00000000..e69de29b From 14dbc3488e5780be50a7b6211742ec2137cdc503 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 05:32:58 -0800 Subject: [PATCH 19/61] Image: Clear the torch cache after generation, not before --- modules/ui_image_generation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index fdf1af86..7c393f13 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -670,8 +670,6 @@ def generate(state): from modules.torch_utils import clear_torch_cache, get_device - clear_torch_cache() - try: model_name = state['image_model_menu'] @@ -772,11 +770,13 @@ def generate(state): logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})') yield all_images + clear_torch_cache() except Exception as e: logger.error(f"Image generation failed: {e}") traceback.print_exc() yield [] + clear_torch_cache() def load_image_model_wrapper(model_name, dtype, attn_backend, cpu_offload, compile_model, quant_method): From a838223d185597071ba4d346190f24708fa51062 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 05:49:57 -0800 Subject: [PATCH 20/61] Image: Add a progress bar during generation --- modules/ui_image_generation.py | 98 +++++++++++++++++++++++++--------- 1 file changed, 72 insertions(+), 26 deletions(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 7c393f13..62760a8a 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -373,7 +373,10 @@ def create_ui(): shared.gradio['image_generate_btn'] = gr.Button("Generate", variant="primary", size="lg") shared.gradio['image_stop_btn'] = gr.Button("Stop", size="lg", visible=False) - gr.HTML("
") + shared.gradio['image_progress'] = gr.HTML( + value=progress_bar_html(), + elem_id="image-progress" + ) gr.Markdown("### Dimensions") with gr.Row(): @@ -546,19 +549,19 @@ def create_event_handlers(): shared.gradio['image_generate_btn'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then( - generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then( + generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then( lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn')) shared.gradio['image_prompt'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then( - generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then( + generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then( lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn')) shared.gradio['image_neg_prompt'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then( - generate, gradio('interface_state'), gradio('image_output_gallery'), show_progress=False).then( + generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then( lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn')) # Stop button @@ -661,11 +664,27 @@ def create_event_handlers(): ) +def progress_bar_html(progress=0, text=""): + """Generate HTML for progress bar. Empty div when progress <= 0.""" + if progress <= 0: + return '
' + + return f'''
+
+
+
+
{text}
+
''' + + def generate(state): """ Generate images using the loaded model. Automatically adjusts parameters based on pipeline type. """ + import queue + import threading + import torch from modules.torch_utils import clear_torch_cache, get_device @@ -675,7 +694,7 @@ def generate(state): if not model_name or model_name == 'None': logger.error("No image model selected. Go to the Model tab and select a model.") - yield [] + yield [], progress_bar_html() return if shared.image_model is None: @@ -689,7 +708,7 @@ def generate(state): ) if result is None: logger.error(f"Failed to load model `{model_name}`.") - yield [] + yield [], progress_bar_html() return shared.image_model_name = model_name @@ -713,69 +732,96 @@ def generate(state): # Process Prompt prompt = state['image_prompt'] - # Apply "Positive Magic" for Qwen models only if pipeline_type == 'qwenimage': magic_suffix = ", Ultra HD, 4K, cinematic composition" - # Avoid duplication if user already added it if magic_suffix.strip(", ") not in prompt: prompt += magic_suffix - # Reset stop flag at start shared.stop_everything = False - # Callback to check for interruption during diffusion steps + batch_count = int(state['image_batch_count']) + steps_per_batch = int(state['image_steps']) + total_steps = steps_per_batch * batch_count + + # Queue for progress updates from callback + progress_queue = queue.Queue() + def interrupt_callback(pipe, step_index, timestep, callback_kwargs): if shared.stop_everything: pipe._interrupt = True - + progress_queue.put(step_index + 1) return callback_kwargs - # Build generation kwargs gen_kwargs = { "prompt": prompt, "negative_prompt": state['image_neg_prompt'], "height": int(state['image_height']), "width": int(state['image_width']), - "num_inference_steps": int(state['image_steps']), + "num_inference_steps": steps_per_batch, "num_images_per_prompt": int(state['image_batch_size']), "generator": generator, "callback_on_step_end": interrupt_callback, } - # Add pipeline-specific parameters for CFG cfg_val = state.get('image_cfg_scale', 0.0) - if pipeline_type == 'qwenimage': - # Qwen-Image uses true_cfg_scale (typically 4.0) gen_kwargs["true_cfg_scale"] = cfg_val else: - # Z-Image and others use guidance_scale (typically 0.0 for Turbo) gen_kwargs["guidance_scale"] = cfg_val t0 = time.time() - for i in range(int(state['image_batch_count'])): + + for batch_idx in range(batch_count): if shared.stop_everything: break - generator.manual_seed(int(seed + i)) - batch_results = shared.image_model(**gen_kwargs).images - all_images.extend(batch_results) - yield all_images + generator.manual_seed(int(seed + batch_idx)) + + # Run generation in thread so we can yield progress + result_holder = [] + error_holder = [] + + def run_batch(): + try: + result_holder.extend(shared.image_model(**gen_kwargs).images) + except Exception as e: + error_holder.append(e) + + thread = threading.Thread(target=run_batch) + thread.start() + + # Yield progress updates while generation runs + while thread.is_alive(): + try: + step = progress_queue.get(timeout=0.1) + absolute_step = batch_idx * steps_per_batch + step + pct = absolute_step / total_steps + text = f"Batch {batch_idx + 1}/{batch_count} — Step {step}/{steps_per_batch}" + yield all_images, progress_bar_html(pct, text) + except queue.Empty: + pass + + thread.join() + + if error_holder: + raise error_holder[0] + + all_images.extend(result_holder) + yield all_images, progress_bar_html((batch_idx + 1) / batch_count, f"Batch {batch_idx + 1}/{batch_count} complete") t1 = time.time() save_generated_images(all_images, state, seed) - total_images = int(state['image_batch_count']) * int(state['image_batch_size']) - total_steps = state["image_steps"] * int(state['image_batch_count']) + total_images = batch_count * int(state['image_batch_size']) logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})') - yield all_images + yield all_images, progress_bar_html() clear_torch_cache() except Exception as e: logger.error(f"Image generation failed: {e}") traceback.print_exc() - yield [] + yield [], progress_bar_html() clear_torch_cache() From 7fb9f19bd8866ceb47765cfc428e6a21d105d498 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 06:17:40 -0800 Subject: [PATCH 21/61] Progress bar style improvements --- css/main.css | 39 ++++++++++++++++++++++++++++++++++ modules/ui_image_generation.py | 10 ++++----- 2 files changed, 44 insertions(+), 5 deletions(-) diff --git a/css/main.css b/css/main.css index 5c1c356d..4dea6b9e 100644 --- a/css/main.css +++ b/css/main.css @@ -1752,3 +1752,42 @@ button#swap-height-width { .min.svelte-1yrv54 { min-height: 0; } + +/* Image Generation Progress Bar */ +#image-progress .image-ai-separator { + height: 24px; + margin: 20px 0; + border-top: 1px solid var(--input-border-color); +} + +#image-progress .image-ai-progress-wrapper { + height: 24px; + margin: 20px 0; +} + +#image-progress .image-ai-progress-track { + background: #e5e7eb; + border-radius: 4px; + overflow: hidden; + height: 8px; +} + +.dark #image-progress .image-ai-progress-track { + background: #333; +} + +#image-progress .image-ai-progress-fill { + background: #4a9eff; + height: 100%; +} + +#image-progress .image-ai-progress-text { + text-align: center; + font-size: 12px; + color: #666; + margin-top: 4px; +} + +.dark #image-progress .image-ai-progress-text { + color: #888; +} diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 62760a8a..e85f1520 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -667,13 +667,13 @@ def create_event_handlers(): def progress_bar_html(progress=0, text=""): """Generate HTML for progress bar. Empty div when progress <= 0.""" if progress <= 0: - return '
' + return '
' - return f'''
-
-
+ return f'''
+
+
-
{text}
+
{text}
''' From 27931537176fef1bc1335815097ebc780cbf1dbf Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 07:57:23 -0800 Subject: [PATCH 22/61] Image: Add LLM-generated prompt variations --- modules/shared.py | 1 + modules/ui.py | 2 ++ modules/ui_image_generation.py | 58 ++++++++++++++++++++++++++++++++++ 3 files changed, 61 insertions(+) diff --git a/modules/shared.py b/modules/shared.py index 4e17497b..1ecc0d28 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -319,6 +319,7 @@ settings = { 'image_seed': -1, 'image_batch_size': 1, 'image_batch_count': 1, + 'image_llm_variations': False, 'image_model_menu': 'None', 'image_dtype': 'bfloat16', 'image_attn_backend': 'sdpa', diff --git a/modules/ui.py b/modules/ui.py index ff5686e8..d95f7bc6 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -293,6 +293,7 @@ def list_interface_input_elements(): 'image_seed', 'image_batch_size', 'image_batch_count', + 'image_llm_variations', 'image_model_menu', 'image_dtype', 'image_attn_backend', @@ -547,6 +548,7 @@ def setup_auto_save(): 'image_seed', 'image_batch_size', 'image_batch_count', + 'image_llm_variations', 'image_model_menu', 'image_dtype', 'image_attn_backend', diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index e85f1520..ceb470ff 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -10,6 +10,7 @@ import numpy as np from PIL.PngImagePlugin import PngInfo from modules import shared, ui, utils +from modules.utils import check_model_loaded from modules.image_models import ( get_pipeline_type, load_image_model, @@ -409,6 +410,11 @@ def create_ui(): with gr.Column(): shared.gradio['image_batch_size'] = gr.Slider(1, 32, value=shared.settings['image_batch_size'], step=1, label="Batch Size (VRAM Heavy)", info="Generates N images at once.") shared.gradio['image_batch_count'] = gr.Slider(1, 128, value=shared.settings['image_batch_count'], step=1, label="Sequential Count (Loop)", info="Repeats the generation N times.") + shared.gradio['image_llm_variations'] = gr.Checkbox( + value=shared.settings['image_llm_variations'], + label='LLM Prompt Variations', + info='Use the loaded LLM to generate creative prompt variations for each sequential batch.' + ) with gr.Column(scale=6, min_width=500): with gr.Column(elem_classes=["viewport-container"]): @@ -664,6 +670,54 @@ def create_event_handlers(): ) +def generate_prompt_variation(state): + """Generate a creative variation of the image prompt using the LLM.""" + from modules.chat import generate_chat_prompt + from modules.text_generation import generate_reply + + prompt = state['image_prompt'] + + # Check if LLM is loaded + model_loaded, _ = check_model_loaded() + if not model_loaded: + logger.warning("No LLM loaded for prompt variation. Using original prompt.") + return prompt + + augmented_message = f"{prompt}\n\n=====\n\nPlease create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else." + + # Use minimal state for generation + var_state = state.copy() + var_state['history'] = {'internal': [], 'visible': [], 'metadata': {}} + var_state['auto_max_new_tokens'] = True + var_state['enable_thinking'] = False + var_state['reasoning_effort'] = 'low' + var_state['start_with'] = "" + + formatted_prompt = generate_chat_prompt(augmented_message, var_state) + + variation = "" + for reply in generate_reply(formatted_prompt, var_state, stopping_strings=[], is_chat=True): + variation = reply + + # Strip thinking blocks if present + if "" in variation: + variation = variation.rsplit("", 1)[1] + elif "<|start|>assistant<|channel|>final<|message|>" in variation: + variation = variation.rsplit("<|start|>assistant<|channel|>final<|message|>", 1)[1] + elif "" in variation: + variation = variation.rsplit("", 1)[1] + + variation = variation.strip() + if len(variation) >= 2 and variation.startswith('"') and variation.endswith('"'): + variation = variation[1:-1] + + if variation: + logger.info(f"Prompt variation: {variation}...") + return variation + + return prompt + + def progress_bar_html(progress=0, text=""): """Generate HTML for progress bar. Empty div when progress <= 0.""" if progress <= 0: @@ -777,6 +831,10 @@ def generate(state): generator.manual_seed(int(seed + batch_idx)) + # Generate prompt variation if enabled + if state['image_llm_variations']: + gen_kwargs["prompt"] = generate_prompt_variation(state) + # Run generation in thread so we can yield progress result_holder = [] error_holder = [] From 5763947c375eef6cb1df55e6788538ab30ac4e99 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 10:23:00 -0800 Subject: [PATCH 23/61] Image: Simplify the API code, add the llm_variations option --- extensions/openai/images.py | 145 ++++++--------------------------- extensions/openai/script.py | 32 +++----- extensions/openai/typing.py | 15 ++-- modules/ui_image_generation.py | 10 +-- 4 files changed, 49 insertions(+), 153 deletions(-) diff --git a/extensions/openai/images.py b/extensions/openai/images.py index 3a5288e6..1ecb1e63 100644 --- a/extensions/openai/images.py +++ b/extensions/openai/images.py @@ -4,117 +4,50 @@ OpenAI-compatible image generation using local diffusion models. import base64 import io -import json -import os import time -from datetime import datetime -import numpy as np from extensions.openai.errors import ServiceUnavailableError from modules import shared -from modules.logging_colors import logger -from PIL.PngImagePlugin import PngInfo -def generations(prompt: str, size: str, response_format: str, n: int, - negative_prompt: str = "", steps: int = 9, seed: int = -1, - cfg_scale: float = 0.0, batch_count: int = 1): +def generations(request): """ Generate images using the loaded diffusion model. - - Args: - prompt: Text description of the desired image - size: Image dimensions as "WIDTHxHEIGHT" - response_format: 'url' or 'b64_json' - n: Number of images per batch - negative_prompt: What to avoid in the image - steps: Number of inference steps - seed: Random seed (-1 for random) - cfg_scale: Classifier-free guidance scale - batch_count: Number of sequential batches - - Returns: - dict with 'created' timestamp and 'data' list of images + Returns dict with 'created' timestamp and 'data' list of images. """ - import torch - from modules.image_models import get_pipeline_type - from modules.torch_utils import clear_torch_cache, get_device + from modules.ui_image_generation import generate if shared.image_model is None: raise ServiceUnavailableError("No image model loaded. Load a model via the UI first.") - clear_torch_cache() + width, height = request.get_width_height() - # Parse dimensions - try: - width, height = [int(x) for x in size.split('x')] - except (ValueError, IndexError): - width, height = 1024, 1024 + # Build state dict: GenerationOptions fields + image-specific keys + state = request.model_dump() + state.update({ + 'image_model_menu': shared.image_model_name, + 'image_prompt': request.prompt, + 'image_neg_prompt': request.negative_prompt, + 'image_width': width, + 'image_height': height, + 'image_steps': request.steps, + 'image_seed': request.image_seed, + 'image_batch_size': request.batch_size, + 'image_batch_count': request.batch_count, + 'image_cfg_scale': request.cfg_scale, + 'image_llm_variations': request.llm_variations, + }) - # Handle seed - if seed == -1: - seed = np.random.randint(0, 2**32 - 1) - - device = get_device() or "cpu" - generator = torch.Generator(device).manual_seed(int(seed)) - - # Get pipeline type for CFG parameter name - pipeline_type = getattr(shared, 'image_pipeline_type', None) or get_pipeline_type(shared.image_model) - - # Build generation kwargs - gen_kwargs = { - "prompt": prompt, - "negative_prompt": negative_prompt, - "height": height, - "width": width, - "num_inference_steps": steps, - "num_images_per_prompt": n, - "generator": generator, - } - - # Pipeline-specific CFG parameter - if pipeline_type == 'qwenimage': - gen_kwargs["true_cfg_scale"] = cfg_scale - else: - gen_kwargs["guidance_scale"] = cfg_scale - - # Generate - all_images = [] - t0 = time.time() - - shared.stop_everything = False - - def interrupt_callback(pipe, step_index, timestep, callback_kwargs): - if shared.stop_everything: - pipe._interrupt = True - return callback_kwargs - - gen_kwargs["callback_on_step_end"] = interrupt_callback - - for i in range(batch_count): - if shared.stop_everything: - break - generator.manual_seed(int(seed + i)) - batch_results = shared.image_model(**gen_kwargs).images - all_images.extend(batch_results) - - t1 = time.time() - total_images = len(all_images) - total_steps = steps * batch_count - logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})') - - # Save images - _save_images(all_images, prompt, negative_prompt, width, height, steps, seed, cfg_scale) + # Exhaust generator, keep final result + images = [] + for images, _ in generate(state, save_images=False): + pass # Build response - resp = { - 'created': int(time.time()), - 'data': [] - } - - for img in all_images: + resp = {'created': int(time.time()), 'data': []} + for img in images: b64 = _image_to_base64(img) - if response_format == 'b64_json': + if request.response_format == 'b64_json': resp['data'].append({'b64_json': b64}) else: resp['data'].append({'url': f'data:image/png;base64,{b64}'}) @@ -126,29 +59,3 @@ def _image_to_base64(image) -> str: buffered = io.BytesIO() image.save(buffered, format="PNG") return base64.b64encode(buffered.getvalue()).decode('utf-8') - - -def _save_images(images, prompt, negative_prompt, width, height, steps, seed, cfg_scale): - """Save images with metadata.""" - date_str = datetime.now().strftime("%Y-%m-%d") - folder = os.path.join("user_data", "image_outputs", date_str) - os.makedirs(folder, exist_ok=True) - - metadata = { - 'image_prompt': prompt, - 'image_neg_prompt': negative_prompt, - 'image_width': width, - 'image_height': height, - 'image_steps': steps, - 'image_seed': seed, - 'image_cfg_scale': cfg_scale, - 'model': getattr(shared, 'image_model_name', 'unknown'), - } - - for idx, img in enumerate(images): - ts = datetime.now().strftime("%H-%M-%S") - filepath = os.path.join(folder, f"{ts}_{seed:010d}_{idx:03d}.png") - - png_info = PngInfo() - png_info.add_text("image_gen_settings", json.dumps(metadata)) - img.save(filepath, pnginfo=png_info) diff --git a/extensions/openai/script.py b/extensions/openai/script.py index 65805629..12f99ba4 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -7,23 +7,24 @@ import traceback from collections import deque from threading import Thread -import extensions.openai.completions as OAIcompletions -import extensions.openai.logits as OAIlogits -import extensions.openai.models as OAImodels import uvicorn -from extensions.openai.tokens import token_count, token_decode, token_encode -from extensions.openai.utils import _start_cloudflared from fastapi import Depends, FastAPI, Header, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.requests import Request from fastapi.responses import JSONResponse +from pydub import AudioSegment +from sse_starlette import EventSourceResponse +from starlette.concurrency import iterate_in_threadpool + +import extensions.openai.completions as OAIcompletions +import extensions.openai.logits as OAIlogits +import extensions.openai.models as OAImodels +from extensions.openai.tokens import token_count, token_decode, token_encode +from extensions.openai.utils import _start_cloudflared from modules import shared from modules.logging_colors import logger from modules.models import unload_model from modules.text_generation import stop_everything_event -from pydub import AudioSegment -from sse_starlette import EventSourceResponse -from starlette.concurrency import iterate_in_threadpool from .typing import ( ChatCompletionRequest, @@ -232,20 +233,7 @@ async def handle_image_generation(request_data: ImageGenerationRequest): import extensions.openai.images as OAIimages async with image_generation_semaphore: - width, height = request_data.get_width_height() - - response = await asyncio.to_thread( - OAIimages.generations, - prompt=request_data.prompt, - size=f"{width}x{height}", - response_format=request_data.response_format, - n=request_data.batch_size, # <-- use resolved batch_size - negative_prompt=request_data.negative_prompt, - steps=request_data.steps, - seed=request_data.seed, - cfg_scale=request_data.cfg_scale, - batch_count=request_data.batch_count, - ) + response = await asyncio.to_thread(OAIimages.generations, request_data) return JSONResponse(response) diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index a24b844b..dfdb9a7e 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -264,20 +264,18 @@ class LoadLorasRequest(BaseModel): lora_names: List[str] -class ImageGenerationRequest(BaseModel): - """OpenAI-compatible image generation request with extended parameters.""" - # Required +class ImageGenerationRequestParams(BaseModel): + """Image-specific parameters for generation.""" prompt: str - - # Generation parameters negative_prompt: str = "" size: str = Field(default="1024x1024", description="'WIDTHxHEIGHT'") steps: int = Field(default=9, ge=1) cfg_scale: float = Field(default=0.0, ge=0.0) - seed: int = Field(default=-1, description="-1 for random") + image_seed: int = Field(default=-1, description="-1 for random") batch_size: int | None = Field(default=None, ge=1, description="Parallel batch size (VRAM heavy)") n: int = Field(default=1, ge=1, description="Alias for batch_size (OpenAI compatibility)") batch_count: int = Field(default=1, ge=1, description="Sequential batch count") + llm_variations: bool = False # OpenAI compatibility (unused) model: str | None = None @@ -286,7 +284,6 @@ class ImageGenerationRequest(BaseModel): @model_validator(mode='after') def resolve_batch_size(self): - """Use batch_size if provided, otherwise fall back to n.""" if self.batch_size is None: self.batch_size = self.n return self @@ -299,6 +296,10 @@ class ImageGenerationRequest(BaseModel): return 1024, 1024 +class ImageGenerationRequest(GenerationOptions, ImageGenerationRequestParams): + pass + + class ImageGenerationResponse(BaseModel): created: int = int(time.time()) data: List[dict] diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index ceb470ff..6ac0bc24 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -10,7 +10,6 @@ import numpy as np from PIL.PngImagePlugin import PngInfo from modules import shared, ui, utils -from modules.utils import check_model_loaded from modules.image_models import ( get_pipeline_type, load_image_model, @@ -19,7 +18,7 @@ from modules.image_models import ( from modules.image_utils import open_image_safely from modules.logging_colors import logger from modules.text_generation import stop_everything_event -from modules.utils import gradio +from modules.utils import check_model_loaded, gradio ASPECT_RATIOS = { "1:1 Square": (1, 1), @@ -725,13 +724,13 @@ def progress_bar_html(progress=0, text=""): return f'''
-
+
{text}
''' -def generate(state): +def generate(state, save_images=True): """ Generate images using the loaded model. Automatically adjusts parameters based on pipeline type. @@ -868,7 +867,8 @@ def generate(state): yield all_images, progress_bar_html((batch_idx + 1) / batch_count, f"Batch {batch_idx + 1}/{batch_count} complete") t1 = time.time() - save_generated_images(all_images, state, seed) + if save_images: + save_generated_images(all_images, state, seed) total_images = batch_count * int(state['image_batch_size']) logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})') From ffef3c7b1dc5f3678a03abfa98fd54547de23796 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 10:44:35 -0800 Subject: [PATCH 24/61] Image: Make the LLM Variations prompt configurable --- extensions/openai/images.py | 1 + extensions/openai/typing.py | 1 + modules/shared.py | 1 + modules/ui.py | 2 ++ modules/ui_image_generation.py | 36 ++++++++++++++++++++++++++++------ 5 files changed, 35 insertions(+), 6 deletions(-) diff --git a/extensions/openai/images.py b/extensions/openai/images.py index 1ecb1e63..0bb91a1e 100644 --- a/extensions/openai/images.py +++ b/extensions/openai/images.py @@ -36,6 +36,7 @@ def generations(request): 'image_batch_count': request.batch_count, 'image_cfg_scale': request.cfg_scale, 'image_llm_variations': request.llm_variations, + 'image_llm_variations_prompt': request.llm_variations_prompt or shared.settings.get('image_llm_variations_prompt', ''), }) # Exhaust generator, keep final result diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index dfdb9a7e..31a5dc6d 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -276,6 +276,7 @@ class ImageGenerationRequestParams(BaseModel): n: int = Field(default=1, ge=1, description="Alias for batch_size (OpenAI compatibility)") batch_count: int = Field(default=1, ge=1, description="Sequential batch count") llm_variations: bool = False + llm_variations_prompt: str | None = None # OpenAI compatibility (unused) model: str | None = None diff --git a/modules/shared.py b/modules/shared.py index 1ecc0d28..7fe9dbcf 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -320,6 +320,7 @@ settings = { 'image_batch_size': 1, 'image_batch_count': 1, 'image_llm_variations': False, + 'image_llm_variations_prompt': 'Please create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else.', 'image_model_menu': 'None', 'image_dtype': 'bfloat16', 'image_attn_backend': 'sdpa', diff --git a/modules/ui.py b/modules/ui.py index d95f7bc6..919a5740 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -294,6 +294,7 @@ def list_interface_input_elements(): 'image_batch_size', 'image_batch_count', 'image_llm_variations', + 'image_llm_variations_prompt', 'image_model_menu', 'image_dtype', 'image_attn_backend', @@ -549,6 +550,7 @@ def setup_auto_save(): 'image_batch_size', 'image_batch_count', 'image_llm_variations', + 'image_llm_variations_prompt', 'image_model_menu', 'image_dtype', 'image_attn_backend', diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 6ac0bc24..6eeb3d51 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -406,14 +406,25 @@ def create_ui(): info="Z-Image Turbo: 0.0 | Qwen: 4.0" ) shared.gradio['image_seed'] = gr.Number(label="Seed", value=shared.settings['image_seed'], precision=0, info="-1 = Random") + with gr.Column(): shared.gradio['image_batch_size'] = gr.Slider(1, 32, value=shared.settings['image_batch_size'], step=1, label="Batch Size (VRAM Heavy)", info="Generates N images at once.") shared.gradio['image_batch_count'] = gr.Slider(1, 128, value=shared.settings['image_batch_count'], step=1, label="Sequential Count (Loop)", info="Repeats the generation N times.") - shared.gradio['image_llm_variations'] = gr.Checkbox( - value=shared.settings['image_llm_variations'], - label='LLM Prompt Variations', - info='Use the loaded LLM to generate creative prompt variations for each sequential batch.' - ) + + gr.Markdown("### LLM Variations") + shared.gradio['image_llm_variations'] = gr.Checkbox( + value=shared.settings['image_llm_variations'], + label='Activate', + info='Use the loaded LLM to generate creative prompt variations for each sequential batch.' + ) + shared.gradio['image_llm_variations_prompt'] = gr.Textbox( + value=shared.settings['image_llm_variations_prompt'], + label='Variation Prompt', + lines=3, + placeholder='Instructions for generating prompt variations...', + visible=shared.settings['image_llm_variations'], + info='The instruction given to the LLM for generating variations.' + ) with gr.Column(scale=6, min_width=500): with gr.Column(elem_classes=["viewport-container"]): @@ -668,6 +679,14 @@ def create_event_handlers(): show_progress=False ) + # LLM Variations visibility toggle + shared.gradio['image_llm_variations'].change( + lambda x: gr.update(visible=x), + gradio('image_llm_variations'), + gradio('image_llm_variations_prompt'), + show_progress=False + ) + def generate_prompt_variation(state): """Generate a creative variation of the image prompt using the LLM.""" @@ -682,7 +701,12 @@ def generate_prompt_variation(state): logger.warning("No LLM loaded for prompt variation. Using original prompt.") return prompt - augmented_message = f"{prompt}\n\n=====\n\nPlease create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else." + # Get the custom variation prompt or use default + variation_instruction = state.get('image_llm_variations_prompt', '') + if not variation_instruction: + variation_instruction = 'Please create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else.' + + augmented_message = f"{prompt}\n\n=====\n\n{variation_instruction}" # Use minimal state for generation var_state = state.copy() From a90739f498e333ff3ffa0cba418139b1d1e00822 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 10:50:40 -0800 Subject: [PATCH 25/61] Image: Better LLM variation default prompt --- modules/shared.py | 2 +- modules/ui_image_generation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index 7fe9dbcf..2f39e495 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -320,7 +320,7 @@ settings = { 'image_batch_size': 1, 'image_batch_count': 1, 'image_llm_variations': False, - 'image_llm_variations_prompt': 'Please create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else.', + 'image_llm_variations_prompt': 'Your task is to create a creative variation of the image generation prompt above. Keep the main subject but feel free to add an interesting setting, scenario, pose, atmosphere, or stylistic twist. Be specific and vivid. Respond with only the new prompt, nothing else.', 'image_model_menu': 'None', 'image_dtype': 'bfloat16', 'image_attn_backend': 'sdpa', diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 6eeb3d51..1cf1b955 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -704,7 +704,7 @@ def generate_prompt_variation(state): # Get the custom variation prompt or use default variation_instruction = state.get('image_llm_variations_prompt', '') if not variation_instruction: - variation_instruction = 'Please create a creative variation of the image generation prompt above. Keep the same general subject and style, but vary the details, composition, lighting, or mood. Respond with only the new prompt, nothing else.' + variation_instruction = 'Your task is to create a creative variation of the image generation prompt above. Keep the main subject but feel free to add an interesting setting, scenario, pose, atmosphere, or stylistic twist. Be specific and vivid. Respond with only the new prompt, nothing else.' augmented_message = f"{prompt}\n\n=====\n\n{variation_instruction}" From ac31a7c0082c08ae126c7140434c7fa053e53ea9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 15:45:04 -0800 Subject: [PATCH 26/61] Image: Organize the UI --- css/main.css | 6 ++++++ modules/ui_image_generation.py | 30 ++++++++++++++---------------- 2 files changed, 20 insertions(+), 16 deletions(-) diff --git a/css/main.css b/css/main.css index 4dea6b9e..e7586960 100644 --- a/css/main.css +++ b/css/main.css @@ -1791,3 +1791,9 @@ button#swap-height-width { .dark #image-progress .image-ai-progress-text { color: #888; } + +#llm-prompt-variations { + position: absolute; + top: 0; + left: calc(100% - 174px); +} diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 1cf1b955..5a1a8f79 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -370,6 +370,19 @@ def create_ui(): lines=3, value=shared.settings['image_neg_prompt'] ) + shared.gradio['image_llm_variations'] = gr.Checkbox( + value=shared.settings['image_llm_variations'], + label='LLM Prompt Variations', + elem_id="llm-prompt-variations", + ) + shared.gradio['image_llm_variations_prompt'] = gr.Textbox( + value=shared.settings['image_llm_variations_prompt'], + label='Variation Prompt', + lines=3, + placeholder='Instructions for generating prompt variations...', + visible=shared.settings['image_llm_variations'], + info='Use the loaded LLM to generate creative prompt variations for each sequential batch.' + ) shared.gradio['image_generate_btn'] = gr.Button("Generate", variant="primary", size="lg") shared.gradio['image_stop_btn'] = gr.Button("Stop", size="lg", visible=False) @@ -411,21 +424,6 @@ def create_ui(): shared.gradio['image_batch_size'] = gr.Slider(1, 32, value=shared.settings['image_batch_size'], step=1, label="Batch Size (VRAM Heavy)", info="Generates N images at once.") shared.gradio['image_batch_count'] = gr.Slider(1, 128, value=shared.settings['image_batch_count'], step=1, label="Sequential Count (Loop)", info="Repeats the generation N times.") - gr.Markdown("### LLM Variations") - shared.gradio['image_llm_variations'] = gr.Checkbox( - value=shared.settings['image_llm_variations'], - label='Activate', - info='Use the loaded LLM to generate creative prompt variations for each sequential batch.' - ) - shared.gradio['image_llm_variations_prompt'] = gr.Textbox( - value=shared.settings['image_llm_variations_prompt'], - label='Variation Prompt', - lines=3, - placeholder='Instructions for generating prompt variations...', - visible=shared.settings['image_llm_variations'], - info='The instruction given to the LLM for generating variations.' - ) - with gr.Column(scale=6, min_width=500): with gr.Column(elem_classes=["viewport-container"]): shared.gradio['image_output_gallery'] = gr.Gallery(label="Output", show_label=False, columns=2, rows=2, height="80vh", object_fit="contain", preview=True, elem_id="image-output-gallery") @@ -735,7 +733,7 @@ def generate_prompt_variation(state): variation = variation[1:-1] if variation: - logger.info(f"Prompt variation: {variation}...") + logger.info(f"Prompt variation: {variation}") return variation return prompt From 47a0fcd614b89a5705b65d93eaf89de544ab8880 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 16:11:35 -0800 Subject: [PATCH 27/61] Image: PNG metadata improvements --- modules/ui_image_generation.py | 38 +++++++++++++++++----------------- 1 file changed, 19 insertions(+), 19 deletions(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 5a1a8f79..a377f329 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -40,8 +40,6 @@ METADATA_SETTINGS_KEYS = [ 'image_aspect_ratio', 'image_steps', 'image_seed', - 'image_batch_size', - 'image_batch_count', 'image_cfg_scale', ] @@ -184,8 +182,6 @@ def format_metadata_for_display(metadata): ('image_steps', 'Steps'), ('image_cfg_scale', 'CFG Scale'), ('image_seed', 'Seed'), - ('image_batch_size', 'Batch Size'), - ('image_batch_count', 'Batch Count'), ('model', 'Model'), ('generated_at', 'Generated At'), ] @@ -314,11 +310,11 @@ def on_gallery_select(evt: gr.SelectData, current_page): def send_to_generate(selected_image_path): """Load settings from selected image and return updates for all Generate tab inputs.""" if not selected_image_path or not os.path.exists(selected_image_path): - return [gr.update()] * 10 + ["No image selected"] + return [gr.update()] * 8 + ["No image selected"] metadata = read_image_metadata(selected_image_path) if not metadata: - return [gr.update()] * 10 + ["No settings found in this image"] + return [gr.update()] * 8 + ["No settings found in this image"] # Return updates for each input element in order updates = [ @@ -329,8 +325,6 @@ def send_to_generate(selected_image_path): gr.update(value=metadata.get('image_aspect_ratio', '1:1 Square')), gr.update(value=metadata.get('image_steps', 9)), gr.update(value=metadata.get('image_seed', -1)), - gr.update(value=metadata.get('image_batch_size', 1)), - gr.update(value=metadata.get('image_batch_count', 1)), gr.update(value=metadata.get('image_cfg_scale', 0.0)), ] @@ -661,8 +655,6 @@ def create_event_handlers(): 'image_aspect_ratio', 'image_steps', 'image_seed', - 'image_batch_size', - 'image_batch_count', 'image_cfg_scale', 'image_gallery_status' ), @@ -795,7 +787,7 @@ def generate(state, save_images=True): device = get_device() if device is None: device = "cpu" - generator = torch.Generator(device).manual_seed(int(seed)) + generator = torch.Generator(device) all_images = [] @@ -804,14 +796,8 @@ def generate(state, save_images=True): if pipeline_type is None: pipeline_type = get_pipeline_type(shared.image_model) - # Process Prompt prompt = state['image_prompt'] - if pipeline_type == 'qwenimage': - magic_suffix = ", Ultra HD, 4K, cinematic composition" - if magic_suffix.strip(", ") not in prompt: - prompt += magic_suffix - shared.stop_everything = False batch_count = int(state['image_batch_count']) @@ -862,7 +848,15 @@ def generate(state, save_images=True): def run_batch(): try: + # Apply magic suffix only at generation time for qwenimage + clean_prompt = gen_kwargs["prompt"] + if pipeline_type == 'qwenimage': + magic_suffix = ", Ultra HD, 4K, cinematic composition" + if magic_suffix.strip(", ") not in clean_prompt: + gen_kwargs["prompt"] = clean_prompt + magic_suffix + result_holder.extend(shared.image_model(**gen_kwargs).images) + gen_kwargs["prompt"] = clean_prompt # restore except Exception as e: error_holder.append(e) @@ -885,12 +879,18 @@ def generate(state, save_images=True): if error_holder: raise error_holder[0] + # Save this batch's images with the actual prompt and seed used + if save_images: + batch_seed = seed + batch_idx + original_prompt = state['image_prompt'] + state['image_prompt'] = gen_kwargs["prompt"] + save_generated_images(result_holder, state, batch_seed) + state['image_prompt'] = original_prompt + all_images.extend(result_holder) yield all_images, progress_bar_html((batch_idx + 1) / batch_count, f"Batch {batch_idx + 1}/{batch_count} complete") t1 = time.time() - if save_images: - save_generated_images(all_images, state, seed) total_images = batch_count * int(state['image_batch_size']) logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})') From b451bac082fadf00eca81e1e569d59a4df3ec8ac Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 16:33:46 -0800 Subject: [PATCH 28/61] Image: Improve a log message --- modules/ui_image_generation.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index a377f329..2cad5dc4 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -725,7 +725,8 @@ def generate_prompt_variation(state): variation = variation[1:-1] if variation: - logger.info(f"Prompt variation: {variation}") + logger.info("Prompt variation:") + print(variation) return variation return prompt From c7ad28a4cd076a025addc0656e42a74d91555dd8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 17:20:50 -0800 Subject: [PATCH 29/61] Image: Add the LLM-generated prompt to the API result --- extensions/openai/images.py | 10 ++++++++-- modules/ui_image_generation.py | 8 +++++++- 2 files changed, 15 insertions(+), 3 deletions(-) diff --git a/extensions/openai/images.py b/extensions/openai/images.py index 0bb91a1e..e60470c3 100644 --- a/extensions/openai/images.py +++ b/extensions/openai/images.py @@ -48,10 +48,16 @@ def generations(request): resp = {'created': int(time.time()), 'data': []} for img in images: b64 = _image_to_base64(img) + image_obj = { + 'revised_prompt': img.info.get('revised_prompt', request.prompt) + } + if request.response_format == 'b64_json': - resp['data'].append({'b64_json': b64}) + image_obj['b64_json'] = b64 else: - resp['data'].append({'url': f'data:image/png;base64,{b64}'}) + image_obj['url'] = f'data:image/png;base64,{b64}' + + resp['data'].append(image_obj) return resp diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 2cad5dc4..424589b6 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -856,7 +856,13 @@ def generate(state, save_images=True): if magic_suffix.strip(", ") not in clean_prompt: gen_kwargs["prompt"] = clean_prompt + magic_suffix - result_holder.extend(shared.image_model(**gen_kwargs).images) + batch_results = shared.image_model(**gen_kwargs).images + + # Store the modified prompt in the metadata + for img in batch_results: + img.info["revised_prompt"] = clean_prompt + + result_holder.extend(batch_results) gen_kwargs["prompt"] = clean_prompt # restore except Exception as e: error_holder.append(e) From 3ef428efaa0e447d8d553e9387990b890aac5c6b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 17:34:17 -0800 Subject: [PATCH 30/61] Image: Remove llm_variations from the API --- extensions/openai/images.py | 3 +-- extensions/openai/typing.py | 8 +------- 2 files changed, 2 insertions(+), 9 deletions(-) diff --git a/extensions/openai/images.py b/extensions/openai/images.py index e60470c3..f46d549d 100644 --- a/extensions/openai/images.py +++ b/extensions/openai/images.py @@ -35,8 +35,7 @@ def generations(request): 'image_batch_size': request.batch_size, 'image_batch_count': request.batch_count, 'image_cfg_scale': request.cfg_scale, - 'image_llm_variations': request.llm_variations, - 'image_llm_variations_prompt': request.llm_variations_prompt or shared.settings.get('image_llm_variations_prompt', ''), + 'image_llm_variations': False, }) # Exhaust generator, keep final result diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index 31a5dc6d..5ac9f6ef 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -264,7 +264,7 @@ class LoadLorasRequest(BaseModel): lora_names: List[str] -class ImageGenerationRequestParams(BaseModel): +class ImageGenerationRequest(BaseModel): """Image-specific parameters for generation.""" prompt: str negative_prompt: str = "" @@ -275,8 +275,6 @@ class ImageGenerationRequestParams(BaseModel): batch_size: int | None = Field(default=None, ge=1, description="Parallel batch size (VRAM heavy)") n: int = Field(default=1, ge=1, description="Alias for batch_size (OpenAI compatibility)") batch_count: int = Field(default=1, ge=1, description="Sequential batch count") - llm_variations: bool = False - llm_variations_prompt: str | None = None # OpenAI compatibility (unused) model: str | None = None @@ -297,10 +295,6 @@ class ImageGenerationRequestParams(BaseModel): return 1024, 1024 -class ImageGenerationRequest(GenerationOptions, ImageGenerationRequestParams): - pass - - class ImageGenerationResponse(BaseModel): created: int = int(time.time()) data: List[dict] From 56f2a9512fbbc178d3ea5e5393c1dcc45219fed3 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 17:34:27 -0800 Subject: [PATCH 31/61] Revert "Image: Add the LLM-generated prompt to the API result" This reverts commit c7ad28a4cd076a025addc0656e42a74d91555dd8. --- extensions/openai/images.py | 10 ++-------- modules/ui_image_generation.py | 8 +------- 2 files changed, 3 insertions(+), 15 deletions(-) diff --git a/extensions/openai/images.py b/extensions/openai/images.py index f46d549d..ef3f4169 100644 --- a/extensions/openai/images.py +++ b/extensions/openai/images.py @@ -47,16 +47,10 @@ def generations(request): resp = {'created': int(time.time()), 'data': []} for img in images: b64 = _image_to_base64(img) - image_obj = { - 'revised_prompt': img.info.get('revised_prompt', request.prompt) - } - if request.response_format == 'b64_json': - image_obj['b64_json'] = b64 + resp['data'].append({'b64_json': b64}) else: - image_obj['url'] = f'data:image/png;base64,{b64}' - - resp['data'].append(image_obj) + resp['data'].append({'url': f'data:image/png;base64,{b64}'}) return resp diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 424589b6..2cad5dc4 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -856,13 +856,7 @@ def generate(state, save_images=True): if magic_suffix.strip(", ") not in clean_prompt: gen_kwargs["prompt"] = clean_prompt + magic_suffix - batch_results = shared.image_model(**gen_kwargs).images - - # Store the modified prompt in the metadata - for img in batch_results: - img.info["revised_prompt"] = clean_prompt - - result_holder.extend(batch_results) + result_holder.extend(shared.image_model(**gen_kwargs).images) gen_kwargs["prompt"] = clean_prompt # restore except Exception as e: error_holder.append(e) From 15c6e43597203a0468501621f0e9ea443d22e8fa Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 17:41:09 -0800 Subject: [PATCH 32/61] Image: Add a revised_prompt field to API results for OpenAI compatibility --- extensions/openai/images.py | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/extensions/openai/images.py b/extensions/openai/images.py index ef3f4169..1337771a 100644 --- a/extensions/openai/images.py +++ b/extensions/openai/images.py @@ -47,10 +47,15 @@ def generations(request): resp = {'created': int(time.time()), 'data': []} for img in images: b64 = _image_to_base64(img) + + image_obj = {'revised_prompt': request.prompt} + if request.response_format == 'b64_json': - resp['data'].append({'b64_json': b64}) + image_obj['b64_json'] = b64 else: - resp['data'].append({'url': f'data:image/png;base64,{b64}'}) + image_obj['url'] = f'data:image/png;base64,{b64}' + + resp['data'].append(image_obj) return resp From b4f06a50b024f5c279d031f0d188e81249b1f98d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 19:11:31 -0800 Subject: [PATCH 33/61] fix: Pass bos_token and eos_token from metadata to jinja2 Fixes loading Seed-Instruct-36B --- modules/chat.py | 6 ++++-- modules/models_settings.py | 10 +++++++--- modules/shared.py | 2 ++ 3 files changed, 13 insertions(+), 5 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index acfc2f66..d1474cfe 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -112,7 +112,9 @@ def generate_chat_prompt(user_input, state, **kwargs): add_generation_prompt=False, enable_thinking=state['enable_thinking'], reasoning_effort=state['reasoning_effort'], - thinking_budget=-1 if state.get('enable_thinking', True) else 0 + thinking_budget=-1 if state.get('enable_thinking', True) else 0, + bos_token=shared.bos_token, + eos_token=shared.eos_token, ) chat_renderer = partial( @@ -475,7 +477,7 @@ def get_stopping_strings(state): if state['mode'] in ['instruct', 'chat-instruct']: template = jinja_env.from_string(state['instruction_template_str']) - renderer = partial(template.render, add_generation_prompt=False) + renderer = partial(template.render, add_generation_prompt=False, bos_token=shared.bos_token, eos_token=shared.eos_token) renderers.append(renderer) if state['mode'] in ['chat']: diff --git a/modules/models_settings.py b/modules/models_settings.py index 6dc000b4..d333e269 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -89,8 +89,9 @@ def get_model_metadata(model): else: bos_token = "" - template = template.replace('eos_token', "'{}'".format(eos_token)) - template = template.replace('bos_token', "'{}'".format(bos_token)) + + shared.bos_token = bos_token + shared.eos_token = eos_token template = re.sub(r"\{\{-?\s*raise_exception\(.*?\)\s*-?\}\}", "", template, flags=re.DOTALL) template = re.sub(r'raise_exception\([^)]*\)', "''", template) @@ -160,13 +161,16 @@ def get_model_metadata(model): # 4. If a template was found from any source, process it if template: + shared.bos_token = '' + shared.eos_token = '' + for k in ['eos_token', 'bos_token']: if k in metadata: value = metadata[k] if isinstance(value, dict): value = value['content'] - template = template.replace(k, "'{}'".format(value)) + setattr(shared, k, value) template = re.sub(r"\{\{-?\s*raise_exception\(.*?\)\s*-?\}\}", "", template, flags=re.DOTALL) template = re.sub(r'raise_exception\([^)]*\)', "''", template) diff --git a/modules/shared.py b/modules/shared.py index 2f39e495..7b572dec 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -19,6 +19,8 @@ is_seq2seq = False is_multimodal = False model_dirty_from_training = False lora_names = [] +bos_token = '' +eos_token = '' # Image model variables image_model = None From 8eac99599ad9a645f608d3814c63a0297e877de8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 4 Dec 2025 19:58:06 -0800 Subject: [PATCH 34/61] Image: Better LLM variation default prompt --- modules/shared.py | 2 +- modules/ui_image_generation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index 7b572dec..f6e86bdf 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -322,7 +322,7 @@ settings = { 'image_batch_size': 1, 'image_batch_count': 1, 'image_llm_variations': False, - 'image_llm_variations_prompt': 'Your task is to create a creative variation of the image generation prompt above. Keep the main subject but feel free to add an interesting setting, scenario, pose, atmosphere, or stylistic twist. Be specific and vivid. Respond with only the new prompt, nothing else.', + 'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Your reply should contain the new prompt and nothing else.', 'image_model_menu': 'None', 'image_dtype': 'bfloat16', 'image_attn_backend': 'sdpa', diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 2cad5dc4..139fd891 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -694,7 +694,7 @@ def generate_prompt_variation(state): # Get the custom variation prompt or use default variation_instruction = state.get('image_llm_variations_prompt', '') if not variation_instruction: - variation_instruction = 'Your task is to create a creative variation of the image generation prompt above. Keep the main subject but feel free to add an interesting setting, scenario, pose, atmosphere, or stylistic twist. Be specific and vivid. Respond with only the new prompt, nothing else.' + variation_instruction = 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Your reply should contain the new prompt and nothing else.' augmented_message = f"{prompt}\n\n=====\n\n{variation_instruction}" From afa29b9554c2f55213799af1d8e79735f2129eee Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 5 Dec 2025 05:53:22 -0800 Subject: [PATCH 35/61] Image: Several fixes --- README.md | 3 +-- extensions/openai/images.py | 3 +++ modules/image_models.py | 24 ++++++++++++------- modules/ui_image_generation.py | 42 +++++++++++++--------------------- 4 files changed, 36 insertions(+), 36 deletions(-) diff --git a/README.md b/README.md index 174fc2ac..ef4b2160 100644 --- a/README.md +++ b/README.md @@ -28,8 +28,7 @@ A Gradio web UI for Large Language Models. - 100% offline and private, with zero telemetry, external resources, or remote update requests. - **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents. - **Vision (multimodal models)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)). -Image generation: A dedicated tab for diffusers models like Z-Image-Turbo and Qwen-Image. Features 4-bit/8-bit quantization and a persistent gallery with metadata (tutorial). -- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo** and **Qwen-Image**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)). +- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)). - **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation. - Aesthetic UI with dark and light themes. - Syntax highlighting for code blocks and LaTeX rendering for mathematical expressions. diff --git a/extensions/openai/images.py b/extensions/openai/images.py index 1337771a..f7be3d22 100644 --- a/extensions/openai/images.py +++ b/extensions/openai/images.py @@ -43,6 +43,9 @@ def generations(request): for images, _ in generate(state, save_images=False): pass + if not images: + raise ServiceUnavailableError("Image generation failed or produced no images.") + # Build response resp = {'created': int(time.time()), 'data': []} for img in images: diff --git a/modules/image_models.py b/modules/image_models.py index 8363533b..28b2bb4f 100644 --- a/modules/image_models.py +++ b/modules/image_models.py @@ -141,16 +141,24 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl if not cpu_offload: pipe.to(get_device()) - # Set attention backend (if supported by the pipeline) - if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'set_attention_backend'): - if attn_backend == 'flash_attention_2': - pipe.transformer.set_attention_backend("flash") - # sdpa is the default, no action needed + modules = ["transformer", "unet"] + # Set attention backend + if attn_backend == 'flash_attention_2': + for name in modules: + mod = getattr(pipe, name, None) + if hasattr(mod, "set_attention_backend"): + mod.set_attention_backend("flash") + break + + # Compile model if compile_model: - if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'compile'): - logger.info("Compiling model (first run will be slow)...") - pipe.transformer.compile() + for name in modules: + mod = getattr(pipe, name, None) + if hasattr(mod, "compile"): + logger.info("Compiling model (first run will be slow)...") + mod.compile() + break if cpu_offload: pipe.enable_model_cpu_offload() diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 139fd891..0a1b6891 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -43,10 +43,6 @@ METADATA_SETTINGS_KEYS = [ 'image_cfg_scale', ] -# Cache for all image paths -_image_cache = [] -_cache_timestamp = 0 - def round_to_step(value, step=STEP): return round(value / step) * step @@ -134,6 +130,9 @@ def build_generation_metadata(state, actual_seed): def save_generated_images(images, state, actual_seed): """Save images with generation metadata embedded in PNG.""" + if shared.args.multi_user: + return + date_str = datetime.now().strftime("%Y-%m-%d") folder_path = os.path.join("user_data", "image_outputs", date_str) os.makedirs(folder_path, exist_ok=True) @@ -157,9 +156,14 @@ def save_generated_images(images, state, actual_seed): def read_image_metadata(image_path): """Read generation metadata from PNG file.""" try: - with open_image_safely(image_path) as img: + img = open_image_safely(image_path) + if img is None: + return None + try: if hasattr(img, 'text') and 'image_gen_settings' in img.text: return json.loads(img.text['image_gen_settings']) + finally: + img.close() except Exception as e: logger.debug(f"Could not read metadata from {image_path}: {e}") return None @@ -198,19 +202,12 @@ def format_metadata_for_display(metadata): return "\n\n".join(lines) -def get_all_history_images(force_refresh=False): - """Get all history images sorted by modification time (newest first). Uses caching.""" - global _image_cache, _cache_timestamp - +def get_all_history_images(): + """Get all history images sorted by modification time (newest first).""" output_dir = os.path.join("user_data", "image_outputs") if not os.path.exists(output_dir): return [] - # Check if we need to refresh cache - current_time = time.time() - if not force_refresh and _image_cache and (current_time - _cache_timestamp) < 2: - return _image_cache - image_files = [] for root, _, files in os.walk(output_dir): for file in files: @@ -219,15 +216,12 @@ def get_all_history_images(force_refresh=False): image_files.append((full_path, os.path.getmtime(full_path))) image_files.sort(key=lambda x: x[1], reverse=True) - _image_cache = [x[0] for x in image_files] - _cache_timestamp = current_time - - return _image_cache + return [x[0] for x in image_files] -def get_paginated_images(page=0, force_refresh=False): +def get_paginated_images(page=0): """Get images for a specific page.""" - all_images = get_all_history_images(force_refresh) + all_images = get_all_history_images() total_images = len(all_images) total_pages = max(1, (total_images + IMAGES_PER_PAGE - 1) // IMAGES_PER_PAGE) @@ -250,7 +244,7 @@ def get_initial_page_info(): def refresh_gallery(current_page=0): """Refresh gallery with current page.""" - images, page, total_pages, total_images = get_paginated_images(current_page, force_refresh=True) + images, page, total_pages, total_images = get_paginated_images(current_page) page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)" return images, page, page_info @@ -286,11 +280,7 @@ def on_gallery_select(evt: gr.SelectData, current_page): if evt.index is None: return "", "Select an image to view its settings" - if not _image_cache: - get_all_history_images() - - # Get the current page's images to find the actual file path - all_images = _image_cache + all_images = get_all_history_images() total_images = len(all_images) # Calculate the actual index in the full list From b63d57158d02be08b76d215479a9e6b9ccef2881 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 5 Dec 2025 05:59:54 -0800 Subject: [PATCH 36/61] Image: Add TGW as a prefix to output images --- modules/ui_image_generation.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 0a1b6891..92704834 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -142,7 +142,7 @@ def save_generated_images(images, state, actual_seed): for idx, img in enumerate(images): timestamp = datetime.now().strftime("%H-%M-%S") - filename = f"{timestamp}_{actual_seed:010d}_{idx:03d}.png" + filename = f"TGW_{timestamp}_{actual_seed:010d}_{idx:03d}.png" filepath = os.path.join(folder_path, filename) # Create PNG metadata From 0dd468245c41ccd649c725689418ffd4116ef1e8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 5 Dec 2025 07:11:38 -0800 Subject: [PATCH 37/61] Image: Add back the gallery cache (for performance) --- modules/ui_image_generation.py | 31 ++++++++++++++++++++++++------- 1 file changed, 24 insertions(+), 7 deletions(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 92704834..28d4c627 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -43,6 +43,10 @@ METADATA_SETTINGS_KEYS = [ 'image_cfg_scale', ] +# Cache for all image paths +_image_cache = [] +_cache_timestamp = 0 + def round_to_step(value, step=STEP): return round(value / step) * step @@ -202,12 +206,19 @@ def format_metadata_for_display(metadata): return "\n\n".join(lines) -def get_all_history_images(): - """Get all history images sorted by modification time (newest first).""" +def get_all_history_images(force_refresh=False): + """Get all history images sorted by modification time (newest first). Uses caching.""" + global _image_cache, _cache_timestamp + output_dir = os.path.join("user_data", "image_outputs") if not os.path.exists(output_dir): return [] + # Check if we need to refresh cache + current_time = time.time() + if not force_refresh and _image_cache and (current_time - _cache_timestamp) < 2: + return _image_cache + image_files = [] for root, _, files in os.walk(output_dir): for file in files: @@ -216,12 +227,15 @@ def get_all_history_images(): image_files.append((full_path, os.path.getmtime(full_path))) image_files.sort(key=lambda x: x[1], reverse=True) - return [x[0] for x in image_files] + _image_cache = [x[0] for x in image_files] + _cache_timestamp = current_time + + return _image_cache -def get_paginated_images(page=0): +def get_paginated_images(page=0, force_refresh=False): """Get images for a specific page.""" - all_images = get_all_history_images() + all_images = get_all_history_images(force_refresh) total_images = len(all_images) total_pages = max(1, (total_images + IMAGES_PER_PAGE - 1) // IMAGES_PER_PAGE) @@ -244,7 +258,7 @@ def get_initial_page_info(): def refresh_gallery(current_page=0): """Refresh gallery with current page.""" - images, page, total_pages, total_images = get_paginated_images(current_page) + images, page, total_pages, total_images = get_paginated_images(current_page, force_refresh=True) page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)" return images, page, page_info @@ -280,7 +294,10 @@ def on_gallery_select(evt: gr.SelectData, current_page): if evt.index is None: return "", "Select an image to view its settings" - all_images = get_all_history_images() + if not _image_cache: + get_all_history_images() + + all_images = _image_cache total_images = len(all_images) # Calculate the actual index in the full list From c11c14590aa5d34985fd12a0b1100d4e51343ae9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 5 Dec 2025 08:08:11 -0800 Subject: [PATCH 38/61] Image: Better LLM variation default prompt --- modules/shared.py | 2 +- modules/ui_image_generation.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index f6e86bdf..3f3742de 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -322,7 +322,7 @@ settings = { 'image_batch_size': 1, 'image_batch_count': 1, 'image_llm_variations': False, - 'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Your reply should contain the new prompt and nothing else.', + 'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.', 'image_model_menu': 'None', 'image_dtype': 'bfloat16', 'image_attn_backend': 'sdpa', diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 28d4c627..c55d2438 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -701,7 +701,7 @@ def generate_prompt_variation(state): # Get the custom variation prompt or use default variation_instruction = state.get('image_llm_variations_prompt', '') if not variation_instruction: - variation_instruction = 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Your reply should contain the new prompt and nothing else.' + variation_instruction = 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.' augmented_message = f"{prompt}\n\n=====\n\n{variation_instruction}" From 5848c7884d641ec3c76799585e6f44f379d8b5da Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 5 Dec 2025 10:24:51 -0800 Subject: [PATCH 39/61] Increase the height of the image output gallery --- css/main.css | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/css/main.css b/css/main.css index e7586960..dad53c30 100644 --- a/css/main.css +++ b/css/main.css @@ -1692,8 +1692,8 @@ button#swap-height-width { } #image-output-gallery, #image-output-gallery > :nth-child(2) { - height: calc(100vh - 83px); - max-height: calc(100vh - 83px); + height: calc(100vh - 66px); + max-height: calc(100vh - 66px); } #image-history-gallery, #image-history-gallery > :nth-child(2) { From eba8a594665cd9407f2657fb26a1c4bbac60baf0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 5 Dec 2025 12:10:41 -0800 Subject: [PATCH 40/61] docs: Improve the image generation tutorial --- docs/Image Generation Tutorial.md | 67 ++++++++++++++++++++++++++----- 1 file changed, 56 insertions(+), 11 deletions(-) diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md index e7022c34..0c9eb848 100644 --- a/docs/Image Generation Tutorial.md +++ b/docs/Image Generation Tutorial.md @@ -1,20 +1,65 @@ # Image Generation Tutorial -This feature allows you to generate images using high-speed models like Z-Image-Turbo directly within the web UI. +This feature allows you to generate images using `diffusers` models like [Tongyi-MAI/Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) directly within the web UI. -## How to use +## Installation -1. Click on the **Image AI** tab at the top of the interface. -2. Select the **Model** sub-tab. -3. Copy and paste the following link into the **Download model** box: +1. Clone the repository with ``` -https://huggingface.co/Tongyi-MAI/Z-Image-Turbo +git clone https://github.com/oobabooga/text-generation-webui ``` -4. Click the **Download** button and wait for the confirmation message. -5. In the **Model** dropdown menu, select the model you just downloaded (if you don't see it, click the 🔄 refresh button). -6. Click **Load**. -7. Go to the **Generate** sub-tab, type a prompt, and click **GENERATE**. +or download it from [here](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) and unzip it. + +2. Use the one-click installer. + +- Windows: Double click on `start_windows.bat` +- Linux: Run `./start_linux.sh` +- macOS: Run `./start_macos.sh` + +Note: Image generation does not work with the portable builds in `.zip` format in the [Releases page](https://github.com/oobabooga/text-generation-webui/releases). You need the "full" version of the web UI. + +## Downloading a model + +1. Once installation ends, browse to `http://127.0.0.1:7860/`. +2. Click on "Image AI" on the left. +3. Click on "Model" at the top. +4. In the "Download model" field, paste `https://huggingface.co/Tongyi-MAI/Z-Image-Turbo` and click "Download". +5. Wait for the download to finish (it's 31 GB). + +## Loading the model + +Select the quantization option in the "Quantization" menu and click "Load". + +The memory usage for `Z-Image-Turbo` for each option is: + +If you have less GPU memory than _, check the "CPU Offload" option. + +Note: The next time you launch the web UI, the model will get automatically loaded with your last settings when you try to generate an image. You do not need to go to the Model tab and click "Load" each time. + +## Generating images: + +1. While still in the "Image AI" page, go to the "Generate" tab. +2. Type your prompt and click on the Generate button. + +### LLM Prompt Variations + +To use this feature, you need to load an LLM in the main "Model" tab on the left. + +If you have no idea what to use, do this to get started: + +1. Download [Qwen3-4B-Q3_K_M.gguf](https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q3_K_M.gguf) to your `text-generation-webui/user_data/models` folder. +2. Select the model in the dropdown menu in the "Model" page. +3. Click Load. + +Then go back to the "Image AI" page and check "LLM Prompt Variations". + +After that, your prompts will be automatically updated by the LLM each time you generate an image. If you use sequential batch count value greater than 1, a new prompt will be created for each sequential batch. + +The improvement in creativity is striking: + +### Model-specific settings + +- For Z-Image-Turbo, make sure to keep CFG Scale at 0 and Steps at 9. Do not write a Negative Prompt as it will get ignored with this CFG Scale value. -> **Note for Z-Image-Turbo:** For the best results with this specific model, keep the **CFG Scale** slider at **0**. From 11937de517e6b661c3a112faa29852f474a3b9bc Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 5 Dec 2025 12:13:24 -0800 Subject: [PATCH 41/61] Use flash attention for image generation by default --- modules/shared.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index 3f3742de..0a27f33d 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -58,7 +58,7 @@ group = parser.add_argument_group('Image model') group.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).') group.add_argument('--image-model-dir', type=str, default='user_data/image_models', help='Path to directory with all the image models.') group.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.') -group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2'], help='Attention backend for image model.') +group.add_argument('--image-attn-backend', type=str, default=None, choices=['flash_attention_2', 'sdpa'], help='Attention backend for image model.') group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.') group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.') group.add_argument('--image-quant', type=str, default=None, @@ -325,7 +325,7 @@ settings = { 'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.', 'image_model_menu': 'None', 'image_dtype': 'bfloat16', - 'image_attn_backend': 'sdpa', + 'image_attn_backend': 'flash_attention_2', 'image_cpu_offload': False, 'image_compile': False, 'image_quant': 'none', From 6ca99910ba82a8b2212562ccaaf2e584aa369642 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 5 Dec 2025 13:08:46 -0800 Subject: [PATCH 42/61] Image: Quantize the text encoder for lower VRAM --- modules/image_models.py | 35 ++++++++++++++++++++++------------- 1 file changed, 22 insertions(+), 13 deletions(-) diff --git a/modules/image_models.py b/modules/image_models.py index 28b2bb4f..290aaf19 100644 --- a/modules/image_models.py +++ b/modules/image_models.py @@ -8,17 +8,14 @@ from modules.utils import resolve_model_path def get_quantization_config(quant_method): """ Get the appropriate quantization config based on the selected method. - - Args: - quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit', - 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo' - - Returns: - PipelineQuantizationConfig or None + Applies quantization to both the transformer and the text_encoder. """ import torch - from diffusers import BitsAndBytesConfig, TorchAoConfig + # Import BitsAndBytesConfig from BOTH libraries to be safe + from diffusers import BitsAndBytesConfig as DiffusersBnBConfig + from diffusers import TorchAoConfig from diffusers.quantizers import PipelineQuantizationConfig + from transformers import BitsAndBytesConfig as TransformersBnBConfig if quant_method == 'none' or not quant_method: return None @@ -27,7 +24,10 @@ def get_quantization_config(quant_method): elif quant_method == 'bnb-8bit': return PipelineQuantizationConfig( quant_mapping={ - "transformer": BitsAndBytesConfig( + "transformer": DiffusersBnBConfig( + load_in_8bit=True + ), + "text_encoder": TransformersBnBConfig( load_in_8bit=True ) } @@ -37,7 +37,13 @@ def get_quantization_config(quant_method): elif quant_method == 'bnb-4bit': return PipelineQuantizationConfig( quant_mapping={ - "transformer": BitsAndBytesConfig( + "transformer": DiffusersBnBConfig( + load_in_4bit=True, + bnb_4bit_quant_type="nf4", + bnb_4bit_compute_dtype=torch.bfloat16, + bnb_4bit_use_double_quant=True + ), + "text_encoder": TransformersBnBConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.bfloat16, @@ -50,7 +56,8 @@ def get_quantization_config(quant_method): elif quant_method == 'torchao-int8wo': return PipelineQuantizationConfig( quant_mapping={ - "transformer": TorchAoConfig("int8wo") + "transformer": TorchAoConfig("int8wo"), + "text_encoder": TorchAoConfig("int8wo") } ) @@ -58,7 +65,8 @@ def get_quantization_config(quant_method): elif quant_method == 'torchao-fp4': return PipelineQuantizationConfig( quant_mapping={ - "transformer": TorchAoConfig("fp4_e2m1") + "transformer": TorchAoConfig("fp4_e2m1"), + "text_encoder": TorchAoConfig("fp4_e2m1") } ) @@ -66,7 +74,8 @@ def get_quantization_config(quant_method): elif quant_method == 'torchao-float8wo': return PipelineQuantizationConfig( quant_mapping={ - "transformer": TorchAoConfig("float8wo") + "transformer": TorchAoConfig("float8wo"), + "text_encoder": TorchAoConfig("float8wo") } ) From e20b2d38ff38fbd6451c8ff53c9e12fc9a327a14 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 5 Dec 2025 14:12:08 -0800 Subject: [PATCH 43/61] docs: Add VRAM measurements for Z-Image-Turbo --- docs/Image Generation Tutorial.md | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md index 0c9eb848..a74a4ddd 100644 --- a/docs/Image Generation Tutorial.md +++ b/docs/Image Generation Tutorial.md @@ -34,7 +34,15 @@ Select the quantization option in the "Quantization" menu and click "Load". The memory usage for `Z-Image-Turbo` for each option is: -If you have less GPU memory than _, check the "CPU Offload" option. +| Quantization Method | VRAM Usage | +| :--- | :--- | +| **None (FP16/BF16)** | 25613 MiB | +| **bnb-8bit** | 16301 MiB | +| **bnb-8bit + CPU Offload** | 16235 MiB | +| **bnb-4bit** | 11533 MiB | +| **bnb-4bit + CPU Offload** | 7677 MiB | + +The `torchao` options support `torch.compile` for faster image generation, with `float8wo` specifically providing native hardware acceleration for RTX 40-series and newer GPUs. Note: The next time you launch the web UI, the model will get automatically loaded with your last settings when you try to generate an image. You do not need to go to the Model tab and click "Load" each time. From 17b12567d82e7459438cd54f361805993ca4ca59 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 5 Dec 2025 14:15:15 -0800 Subject: [PATCH 44/61] docs: Small changes --- docs/Image Generation Tutorial.md | 11 +++++------ 1 file changed, 5 insertions(+), 6 deletions(-) diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md index a74a4ddd..85f891d5 100644 --- a/docs/Image Generation Tutorial.md +++ b/docs/Image Generation Tutorial.md @@ -51,6 +51,10 @@ Note: The next time you launch the web UI, the model will get automatically load 1. While still in the "Image AI" page, go to the "Generate" tab. 2. Type your prompt and click on the Generate button. +### Model-specific settings + +- For Z-Image-Turbo, make sure to keep CFG Scale at 0 and Steps at 9. Do not write a Negative Prompt as it will get ignored with this CFG Scale value. + ### LLM Prompt Variations To use this feature, you need to load an LLM in the main "Model" tab on the left. @@ -63,11 +67,6 @@ If you have no idea what to use, do this to get started: Then go back to the "Image AI" page and check "LLM Prompt Variations". -After that, your prompts will be automatically updated by the LLM each time you generate an image. If you use sequential batch count value greater than 1, a new prompt will be created for each sequential batch. +After that, your prompts will be automatically updated by the LLM each time you generate an image. If you use a "Sequential Count" value greater than 1, a new prompt will be created for each sequential batch. The improvement in creativity is striking: - -### Model-specific settings - -- For Z-Image-Turbo, make sure to keep CFG Scale at 0 and Steps at 9. Do not write a Negative Prompt as it will get ignored with this CFG Scale value. - From 1a9ed1fe98a2d51a860d3c4f747ba46c543d0bf3 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 6 Dec 2025 05:21:26 -0800 Subject: [PATCH 45/61] Fix the height of the image output gallery --- css/main.css | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/css/main.css b/css/main.css index dad53c30..e7586960 100644 --- a/css/main.css +++ b/css/main.css @@ -1692,8 +1692,8 @@ button#swap-height-width { } #image-output-gallery, #image-output-gallery > :nth-child(2) { - height: calc(100vh - 66px); - max-height: calc(100vh - 66px); + height: calc(100vh - 83px); + max-height: calc(100vh - 83px); } #image-history-gallery, #image-history-gallery > :nth-child(2) { From 455dc06db08fc347f1bbd09c4ac74134954fc641 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 6 Dec 2025 05:43:00 -0800 Subject: [PATCH 46/61] Serve the original PNG images in the UI instead of webp --- modules/ui_image_generation.py | 16 ++++++++++++---- 1 file changed, 12 insertions(+), 4 deletions(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index c55d2438..2fb93fd8 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -133,9 +133,9 @@ def build_generation_metadata(state, actual_seed): def save_generated_images(images, state, actual_seed): - """Save images with generation metadata embedded in PNG.""" + """Save images with generation metadata embedded in PNG. Returns list of saved file paths.""" if shared.args.multi_user: - return + return [] date_str = datetime.now().strftime("%Y-%m-%d") folder_path = os.path.join("user_data", "image_outputs", date_str) @@ -144,6 +144,7 @@ def save_generated_images(images, state, actual_seed): metadata = build_generation_metadata(state, actual_seed) metadata_json = json.dumps(metadata, ensure_ascii=False) + saved_paths = [] for idx, img in enumerate(images): timestamp = datetime.now().strftime("%H-%M-%S") filename = f"TGW_{timestamp}_{actual_seed:010d}_{idx:03d}.png" @@ -155,6 +156,9 @@ def save_generated_images(images, state, actual_seed): # Save with metadata img.save(filepath, pnginfo=png_info) + saved_paths.append(filepath) + + return saved_paths def read_image_metadata(image_path): @@ -892,10 +896,14 @@ def generate(state, save_images=True): batch_seed = seed + batch_idx original_prompt = state['image_prompt'] state['image_prompt'] = gen_kwargs["prompt"] - save_generated_images(result_holder, state, batch_seed) + saved_paths = save_generated_images(result_holder, state, batch_seed) state['image_prompt'] = original_prompt + # Use file paths so gallery serves actual PNGs with metadata + all_images.extend(saved_paths) + else: + # Fallback to PIL objects if not saving + all_images.extend(result_holder) - all_images.extend(result_holder) yield all_images, progress_bar_html((batch_idx + 1) / batch_count, f"Batch {batch_idx + 1}/{batch_count} complete") t1 = time.time() From 6411142111db1736a4fffac72fd5ebc63ea5de11 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 6 Dec 2025 06:36:16 -0800 Subject: [PATCH 47/61] docs: Small changes --- docs/Image Generation Tutorial.md | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md index 85f891d5..875d1d44 100644 --- a/docs/Image Generation Tutorial.md +++ b/docs/Image Generation Tutorial.md @@ -2,6 +2,9 @@ This feature allows you to generate images using `diffusers` models like [Tongyi-MAI/Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) directly within the web UI. +print + + ## Installation 1. Clone the repository with @@ -36,11 +39,11 @@ The memory usage for `Z-Image-Turbo` for each option is: | Quantization Method | VRAM Usage | | :--- | :--- | -| **None (FP16/BF16)** | 25613 MiB | -| **bnb-8bit** | 16301 MiB | -| **bnb-8bit + CPU Offload** | 16235 MiB | -| **bnb-4bit** | 11533 MiB | -| **bnb-4bit + CPU Offload** | 7677 MiB | +| None (FP16/BF16) | 25613 MiB | +| bnb-8bit | 16301 MiB | +| bnb-8bit + CPU Offload | 16235 MiB | +| bnb-4bit | 11533 MiB | +| bnb-4bit + CPU Offload | 7677 MiB | The `torchao` options support `torch.compile` for faster image generation, with `float8wo` specifically providing native hardware acceleration for RTX 40-series and newer GPUs. @@ -70,3 +73,5 @@ Then go back to the "Image AI" page and check "LLM Prompt Variations". After that, your prompts will be automatically updated by the LLM each time you generate an image. If you use a "Sequential Count" value greater than 1, a new prompt will be created for each sequential batch. The improvement in creativity is striking: + +comparison_collage From 0100ad1bd7c0fbb068585902db78d0edce3d497d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 6 Dec 2025 06:39:30 -0800 Subject: [PATCH 48/61] Add user_data/image_outputs to the Gradio allowed paths --- server.py | 7 ++++++- 1 file changed, 6 insertions(+), 1 deletion(-) diff --git a/server.py b/server.py index 0c5d14ce..d8fb2c22 100644 --- a/server.py +++ b/server.py @@ -101,6 +101,11 @@ def create_interface(): auth.extend(x.strip() for line in file for x in line.split(',') if x.strip()) auth = [tuple(cred.split(':')) for cred in auth] + # Allowed paths + allowed_paths = ["css", "js", "extensions", "user_data/cache"] + if not shared.args.multi_user: + allowed_paths.append("user_data/image_outputs") + # Import the extensions and execute their setup() functions if shared.args.extensions is not None and len(shared.args.extensions) > 0: extensions_module.load_extensions() @@ -237,7 +242,7 @@ def create_interface(): ssl_keyfile=shared.args.ssl_keyfile, ssl_certfile=shared.args.ssl_certfile, root_path=shared.args.subpath, - allowed_paths=["css", "js", "extensions", "user_data/cache"] + allowed_paths=allowed_paths, ) From 02518a96a9c9e75947a4d41e22a758fe9d83310c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 6 Dec 2025 06:55:06 -0800 Subject: [PATCH 49/61] Lint --- modules/models_settings.py | 1 - 1 file changed, 1 deletion(-) diff --git a/modules/models_settings.py b/modules/models_settings.py index d333e269..e9f19a06 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -89,7 +89,6 @@ def get_model_metadata(model): else: bos_token = "" - shared.bos_token = bos_token shared.eos_token = eos_token From 1c36559e2b1b453a526b682b7db89486f9c3753c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 6 Dec 2025 07:05:00 -0800 Subject: [PATCH 50/61] Add a News section to the README --- README.md | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/README.md b/README.md index ef4b2160..8065ca71 100644 --- a/README.md +++ b/README.md @@ -21,6 +21,10 @@ A Gradio web UI for Large Language Models. |:---:|:---:| |![Image1](https://github.com/oobabooga/screenshots/raw/main/DEFAULT-3.5.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/PARAMETERS-3.5.png) | +## 🔥 News + +- The project now supports **image generation**! Including Z-Image-Turbo, 4bit/8bit quantization, `torch.compile`, and LLM-generated prompt variations ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)). + ## Features - Supports multiple local text generation backends, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)). From 194e4c285fe0358a5ed15109cda1aaad204ee023 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 6 Dec 2025 08:14:48 -0800 Subject: [PATCH 51/61] Update llama.cpp --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 4 ++-- requirements/full/requirements_apple_silicon.txt | 4 ++-- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/full/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_amd.txt | 4 ++-- requirements/portable/requirements_amd_noavx2.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 4 ++-- requirements/portable/requirements_apple_silicon.txt | 4 ++-- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/portable/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- requirements/portable/requirements_vulkan_noavx2.txt | 4 ++-- 18 files changed, 36 insertions(+), 36 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index c1f87988..2f1c0f09 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -44,8 +44,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 512231e0..8d9f8c43 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -42,7 +42,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 1ccc507c..f69c2466 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -42,7 +42,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 4d8aa771..fd2dad7f 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 5290aa71..d71dfbe5 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index d9c76a31..2d174713 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 1a796c21..2e047b42 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 8105abaa..4baca6c9 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -44,8 +44,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index 0b944d48..21cef622 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt index 9b43c901..bce5a9e0 100644 --- a/requirements/portable/requirements_amd.txt +++ b/requirements/portable/requirements_amd.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_amd_noavx2.txt b/requirements/portable/requirements_amd_noavx2.txt index 825e6dec..18257eea 100644 --- a/requirements/portable/requirements_amd_noavx2.txt +++ b/requirements/portable/requirements_amd_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 864b6775..e97ff558 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 03e090cf..d113431c 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 7dd851e8..1553096c 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index 63949d9f..ce259dcc 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 34652264..887a4542 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 28849ff7..d9eff1ac 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Vulkan wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 365a798e..4a5ee8a3 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.64.0/llama_cpp_binaries-0.64.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From c026dbaf647a1a6ead61f88848db4bb5f292b47a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 6 Dec 2025 08:23:21 -0800 Subject: [PATCH 52/61] Fix API requests always returning the same 'created' time --- extensions/openai/typing.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index 5ac9f6ef..5fea2324 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -130,7 +130,7 @@ class CompletionRequest(GenerationOptions, CompletionRequestParams): class CompletionResponse(BaseModel): id: str choices: List[dict] - created: int = int(time.time()) + created: int = Field(default_factory=lambda: int(time.time())) model: str object: str = "text_completion" usage: dict @@ -178,7 +178,7 @@ class ChatCompletionRequest(GenerationOptions, ChatCompletionRequestParams): class ChatCompletionResponse(BaseModel): id: str choices: List[dict] - created: int = int(time.time()) + created: int = Field(default_factory=lambda: int(time.time())) model: str object: str = "chat.completion" usage: dict @@ -296,7 +296,7 @@ class ImageGenerationRequest(BaseModel): class ImageGenerationResponse(BaseModel): - created: int = int(time.time()) + created: int = Field(default_factory=lambda: int(time.time())) data: List[dict] From f93cc4b5c37a929aa1a91c2648c571908d9a47cf Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 6 Dec 2025 08:33:06 -0800 Subject: [PATCH 53/61] Add an API example to the image generation tutorial --- docs/Image Generation Tutorial.md | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md index 875d1d44..0d562041 100644 --- a/docs/Image Generation Tutorial.md +++ b/docs/Image Generation Tutorial.md @@ -75,3 +75,24 @@ After that, your prompts will be automatically updated by the LLM each time you The improvement in creativity is striking: comparison_collage + +## Generating images over API + +It is possible to generate images usign the project's API. Just make sure to start the server with `--api`, either by + +1. Passing the `--api` flag to your `start` script, like `./start_linux.sh --api`, or +2. Writing `--api` to your `user_data/CMD_FLAGS.txt` file and relaunching the web UI. + +Here is an API call example: + +``` +curl http://127.0.0.1:5000/v1/images/generations \ + -H "Content-Type: application/json" \ + -d '{ + "prompt": "an orange tree", + "steps": 9, + "cfg_scale": 0, + "batch_size": 1, + "batch_count": 1 + }' +``` From 160a25165af5f5a0b759269657a241fe75f684d5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 6 Dec 2025 08:41:12 -0800 Subject: [PATCH 54/61] docs: Small change --- docs/Image Generation Tutorial.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md index 0d562041..b285bbc1 100644 --- a/docs/Image Generation Tutorial.md +++ b/docs/Image Generation Tutorial.md @@ -72,7 +72,7 @@ Then go back to the "Image AI" page and check "LLM Prompt Variations". After that, your prompts will be automatically updated by the LLM each time you generate an image. If you use a "Sequential Count" value greater than 1, a new prompt will be created for each sequential batch. -The improvement in creativity is striking: +The improvement in creativity is striking (prompt: `Photo of a beautiful woman at night under moonlight`): comparison_collage From 1762312fb48e74cca45c916b92d616ed14d6ee9e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 6 Dec 2025 20:10:32 -0800 Subject: [PATCH 55/61] Use random instead of np.random for image seeds (makes it work on Windows) --- modules/ui_image_generation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 2fb93fd8..6b39c5b5 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -1,12 +1,12 @@ import json import os +import random import time import traceback from datetime import datetime from pathlib import Path import gradio as gr -import numpy as np from PIL.PngImagePlugin import PngInfo from modules import shared, ui, utils @@ -794,7 +794,7 @@ def generate(state, save_images=True): seed = state['image_seed'] if seed == -1: - seed = np.random.randint(0, 2**32 - 1) + seed = random.randint(0, 2**32 - 1) device = get_device() if device is None: From 85f2df92e9e736708cab5ba8ea9b7484036d7ded Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 7 Dec 2025 06:56:58 -0800 Subject: [PATCH 56/61] Use flash_attention_2 by default for Transformers models --- modules/shared.py | 2 +- modules/ui_model_menu.py | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index 0a27f33d..b2aeadc6 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -112,7 +112,7 @@ group.add_argument('--no-cache', action='store_true', help='Set use_cache to Fal group.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.') group.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.') group.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.') -group.add_argument('--attn-implementation', type=str, default='sdpa', metavar="IMPLEMENTATION", help='Attention implementation. Valid options: sdpa, eager, flash_attention_2.') +group.add_argument('--attn-implementation', type=str, default='flash_attention_2', metavar="IMPLEMENTATION", help='Attention implementation. Valid options: flash_attention_2, sdpa, eager.') # bitsandbytes 4-bit group = parser.add_argument_group('bitsandbytes 4-bit') diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 86adc229..aab0fcaf 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -44,7 +44,7 @@ def create_ui(): shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.') shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') - shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.') + shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['flash_attention_2', 'sdpa', 'eager'], value=shared.args.attn_implementation, info='Attention implementation.') shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).') shared.gradio['tp_backend'] = gr.Dropdown(label="tp-backend", choices=['native', 'nccl'], value=shared.args.tp_backend, info='The backend for tensor parallelism.') From 17bd8d10f080fc744080bc104dff0b2c47371d80 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 7 Dec 2025 09:37:18 -0800 Subject: [PATCH 57/61] Update exllamav3 to 0.0.17 --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 2f1c0f09..b1dd9d83 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -46,8 +46,8 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 4baca6c9..072f65bb 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -46,8 +46,8 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" From 058e78411d97737011420d4c4341dc8d2d4643b9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 7 Dec 2025 10:16:08 -0800 Subject: [PATCH 58/61] docs: Small changes --- docs/Image Generation Tutorial.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/docs/Image Generation Tutorial.md b/docs/Image Generation Tutorial.md index b285bbc1..43d6de7c 100644 --- a/docs/Image Generation Tutorial.md +++ b/docs/Image Generation Tutorial.md @@ -60,7 +60,7 @@ Note: The next time you launch the web UI, the model will get automatically load ### LLM Prompt Variations -To use this feature, you need to load an LLM in the main "Model" tab on the left. +To use this feature, you need to load an LLM in the main "Model" page on the left. If you have no idea what to use, do this to get started: @@ -78,7 +78,7 @@ The improvement in creativity is striking (prompt: `Photo of a beautiful woman a ## Generating images over API -It is possible to generate images usign the project's API. Just make sure to start the server with `--api`, either by +It is possible to generate images using the project's API. Just make sure to start the server with `--api`, either by 1. Passing the `--api` flag to your `start` script, like `./start_linux.sh --api`, or 2. Writing `--api` to your `user_data/CMD_FLAGS.txt` file and relaunching the web UI. From 3b8369a6798fbc504a980e50f579b7e042336762 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 7 Dec 2025 11:18:36 -0800 Subject: [PATCH 59/61] Update llama.cpp --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 4 ++-- requirements/full/requirements_apple_silicon.txt | 4 ++-- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/full/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_amd.txt | 4 ++-- requirements/portable/requirements_amd_noavx2.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 4 ++-- requirements/portable/requirements_apple_silicon.txt | 4 ++-- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/portable/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- requirements/portable/requirements_vulkan_noavx2.txt | 4 ++-- 18 files changed, 36 insertions(+), 36 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index b1dd9d83..8bfa1318 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -44,8 +44,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 8d9f8c43..088f7713 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -42,7 +42,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index f69c2466..e4e55e4d 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -42,7 +42,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index fd2dad7f..5903fe60 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index d71dfbe5..47517578 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 2d174713..1fee7121 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 2e047b42..204af32a 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -42,5 +42,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 072f65bb..86208b9f 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -44,8 +44,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index 21cef622..4a92fb8d 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt index bce5a9e0..5ad1fe3a 100644 --- a/requirements/portable/requirements_amd.txt +++ b/requirements/portable/requirements_amd.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_amd_noavx2.txt b/requirements/portable/requirements_amd_noavx2.txt index 18257eea..9c3e2851 100644 --- a/requirements/portable/requirements_amd_noavx2.txt +++ b/requirements/portable/requirements_amd_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index e97ff558..8a618e35 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index d113431c..c1c643e8 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 1553096c..f4bc8cab 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index ce259dcc..81f9bf2e 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 887a4542..6cc87a4e 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index d9eff1ac..25c3638f 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Vulkan wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 4a5ee8a3..54d0cd1a 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.65.0/llama_cpp_binaries-0.65.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From 1ec9f708e5cbb7a2d74fa35f56735d80991163c2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 7 Dec 2025 11:49:22 -0800 Subject: [PATCH 60/61] Clear the torch cache between sequential image generations --- modules/ui_image_generation.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 6b39c5b5..bcc5bb2d 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -848,6 +848,9 @@ def generate(state, save_images=True): if shared.stop_everything: break + if batch_idx > 0: + clear_torch_cache() + generator.manual_seed(int(seed + batch_idx)) # Generate prompt variation if enabled From b758059e950c2442cf6750082a1914e1c20b9e10 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 7 Dec 2025 12:23:19 -0800 Subject: [PATCH 61/61] Revert "Clear the torch cache between sequential image generations" This reverts commit 1ec9f708e5cbb7a2d74fa35f56735d80991163c2. --- modules/ui_image_generation.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index bcc5bb2d..6b39c5b5 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -848,9 +848,6 @@ def generate(state, save_images=True): if shared.stop_everything: break - if batch_idx > 0: - clear_torch_cache() - generator.manual_seed(int(seed + batch_idx)) # Generate prompt variation if enabled