Compare commits

...

5 commits

Author SHA1 Message Date
dependabot[bot] a48999b971
Merge 3670ef56cf into 9448bf1caa 2025-12-03 01:01:56 +00:00
dependabot[bot] 3670ef56cf
Update safetensors requirement in /requirements/full
Updates the requirements on [safetensors](https://github.com/huggingface/safetensors) to permit the latest version.
- [Release notes](https://github.com/huggingface/safetensors/releases)
- [Changelog](https://github.com/huggingface/safetensors/blob/main/RELEASE.md)
- [Commits](https://github.com/huggingface/safetensors/compare/v0.6.0-rc0...v0.7.0)

---
updated-dependencies:
- dependency-name: safetensors
  dependency-version: 0.7.0
  dependency-type: direct:production
...

Signed-off-by: dependabot[bot] <support@github.com>
2025-12-03 01:01:53 +00:00
oobabooga 9448bf1caa Image generation: add torchao quantization (supports torch.compile) 2025-12-02 14:22:51 -08:00
oobabooga 97281ff831 UI: Fix an index error in the new image gallery 2025-12-02 11:20:52 -08:00
oobabooga 9d07d3a229 Make portable builds functional again after b3666e140d 2025-12-02 10:06:57 -08:00
13 changed files with 98 additions and 58 deletions

View file

@ -2,7 +2,6 @@ import time
import modules.shared as shared
from modules.logging_colors import logger
from modules.torch_utils import get_device
from modules.utils import resolve_model_path
@ -11,13 +10,14 @@ def get_quantization_config(quant_method):
Get the appropriate quantization config based on the selected method.
Args:
quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit'
quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit',
'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'
Returns:
PipelineQuantizationConfig or None
"""
import torch
from diffusers import BitsAndBytesConfig, QuantoConfig
from diffusers import BitsAndBytesConfig, TorchAoConfig
from diffusers.quantizers import PipelineQuantizationConfig
if quant_method == 'none' or not quant_method:
@ -46,6 +46,30 @@ def get_quantization_config(quant_method):
}
)
# torchao int8 weight-only
elif quant_method == 'torchao-int8wo':
return PipelineQuantizationConfig(
quant_mapping={
"transformer": TorchAoConfig("int8wo")
}
)
# torchao fp4 (e2m1)
elif quant_method == 'torchao-fp4':
return PipelineQuantizationConfig(
quant_mapping={
"transformer": TorchAoConfig("fp4_e2m1")
}
)
# torchao float8 weight-only
elif quant_method == 'torchao-float8wo':
return PipelineQuantizationConfig(
quant_mapping={
"transformer": TorchAoConfig("float8wo")
}
)
else:
logger.warning(f"Unknown quantization method: {quant_method}. Loading without quantization.")
return None
@ -77,11 +101,13 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3'
cpu_offload: Enable CPU offloading for low VRAM
compile_model: Compile the model for faster inference (slow first run)
quant_method: Quantization method - 'none', 'bnb-8bit', 'bnb-4bit'
quant_method: 'none', 'bnb-8bit', 'bnb-4bit', or torchao options (int8wo, fp4, float8wo)
"""
import torch
from diffusers import DiffusionPipeline
from modules.torch_utils import get_device
logger.info(f"Loading image model \"{model_name}\" with quantization: {quant_method}")
t0 = time.time()

View file

@ -60,7 +60,7 @@ group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdp
group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.')
group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.')
group.add_argument('--image-quant', type=str, default=None,
choices=['none', 'bnb-8bit', 'bnb-4bit'],
choices=['none', 'bnb-8bit', 'bnb-4bit', 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'],
help='Quantization method for image model.')
# Model loader

View file

@ -280,25 +280,26 @@ def list_interface_input_elements():
'include_past_attachments',
]
# Image generation elements
elements += [
'image_prompt',
'image_neg_prompt',
'image_width',
'image_height',
'image_aspect_ratio',
'image_steps',
'image_cfg_scale',
'image_seed',
'image_batch_size',
'image_batch_count',
'image_model_menu',
'image_dtype',
'image_attn_backend',
'image_compile',
'image_cpu_offload',
'image_quant',
]
if not shared.args.portable:
# Image generation elements
elements += [
'image_prompt',
'image_neg_prompt',
'image_width',
'image_height',
'image_aspect_ratio',
'image_steps',
'image_cfg_scale',
'image_seed',
'image_batch_size',
'image_batch_count',
'image_model_menu',
'image_dtype',
'image_attn_backend',
'image_compile',
'image_cpu_offload',
'image_quant',
]
return elements
@ -531,25 +532,29 @@ def setup_auto_save():
'paste_to_attachment',
'include_past_attachments',
# Image generation tab (ui_image_generation.py)
'image_prompt',
'image_neg_prompt',
'image_width',
'image_height',
'image_aspect_ratio',
'image_steps',
'image_cfg_scale',
'image_seed',
'image_batch_size',
'image_batch_count',
'image_model_menu',
'image_dtype',
'image_attn_backend',
'image_compile',
'image_cpu_offload',
'image_quant',
]
if not shared.args.portable:
# Image generation tab (ui_image_generation.py)
change_elements += [
'image_prompt',
'image_neg_prompt',
'image_width',
'image_height',
'image_aspect_ratio',
'image_steps',
'image_cfg_scale',
'image_seed',
'image_batch_size',
'image_batch_count',
'image_model_menu',
'image_dtype',
'image_attn_backend',
'image_compile',
'image_cpu_offload',
'image_quant',
]
for element_name in change_elements:
if element_name in shared.gradio:
shared.gradio[element_name].change(

View file

@ -18,7 +18,6 @@ from modules.image_models import (
)
from modules.logging_colors import logger
from modules.text_generation import stop_everything_event
from modules.torch_utils import get_device
from modules.utils import gradio
ASPECT_RATIOS = {
@ -291,8 +290,11 @@ def on_gallery_select(evt: gr.SelectData, current_page):
if evt.index is None:
return "", "Select an image to view its settings"
if not _image_cache:
get_all_history_images()
# Get the current page's images to find the actual file path
all_images = get_all_history_images()
all_images = _image_cache
total_images = len(all_images)
# Calculate the actual index in the full list
@ -471,9 +473,9 @@ def create_ui():
with gr.Column():
shared.gradio['image_quant'] = gr.Dropdown(
label='Quantization',
choices=['none', 'bnb-8bit', 'bnb-4bit'],
choices=['none', 'bnb-8bit', 'bnb-4bit', 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'],
value=shared.settings['image_quant'],
info='Quantization method for reduced VRAM usage. Quanto supports lower precisions (2-bit, 4-bit, 8-bit).'
info='BnB: bitsandbytes quantization. torchao: int8wo, fp4, float8wo.'
)
shared.gradio['image_dtype'] = gr.Dropdown(
@ -507,9 +509,7 @@ def create_ui():
info="Enter HuggingFace path. Use : for branch, e.g. user/model:main"
)
shared.gradio['image_download_btn'] = gr.Button("Download", variant='primary')
shared.gradio['image_model_status'] = gr.Markdown(
value=f"Model: **{shared.settings['image_model_menu']}** (not loaded)" if shared.settings['image_model_menu'] != 'None' else "No model selected"
)
shared.gradio['image_model_status'] = gr.Markdown(value="")
def create_event_handlers():
@ -667,7 +667,7 @@ def generate(state):
"""
import torch
from modules.torch_utils import clear_torch_cache
from modules.torch_utils import clear_torch_cache, get_device
clear_torch_cache()

View file

@ -21,10 +21,11 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.6.*
safetensors==0.7.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm

View file

@ -19,10 +19,11 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.6.*
safetensors==0.7.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm

View file

@ -19,10 +19,11 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.6.*
safetensors==0.7.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm

View file

@ -19,10 +19,11 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.6.*
safetensors==0.7.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm

View file

@ -19,10 +19,11 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.6.*
safetensors==0.7.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm

View file

@ -19,10 +19,11 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.6.*
safetensors==0.7.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm

View file

@ -19,10 +19,11 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.6.*
safetensors==0.7.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm

View file

@ -21,10 +21,11 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.6.*
safetensors==0.7.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm

View file

@ -19,10 +19,11 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.6.*
safetensors==0.7.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm