From 742db85de082b11779c5d51ca2aab87c1210c804 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 27 Nov 2025 18:23:26 -0800 Subject: [PATCH] Hardcode 8-bit quantization for now --- modules/image_models.py | 11 +++++++++-- 1 file changed, 9 insertions(+), 2 deletions(-) diff --git a/modules/image_models.py b/modules/image_models.py index 21612f61..6a6c6547 100644 --- a/modules/image_models.py +++ b/modules/image_models.py @@ -19,7 +19,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl cpu_offload: Enable CPU offloading for low VRAM compile_model: Compile the model for faster inference (slow first run) """ - from diffusers import ZImagePipeline + from diffusers import PipelineQuantizationConfig, ZImagePipeline logger.info(f"Loading image model \"{model_name}\"") t0 = time.time() @@ -30,10 +30,17 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl model_path = resolve_model_path(model_name, image_model=True) try: + # Define quantization config for 8-bit + pipeline_quant_config = PipelineQuantizationConfig( + quant_backend="bitsandbytes_8bit", + quant_kwargs={"load_in_8bit": True}, + ) + pipe = ZImagePipeline.from_pretrained( str(model_path), + quantization_config=pipeline_quant_config, torch_dtype=target_dtype, - low_cpu_mem_usage=False, + low_cpu_mem_usage=True, ) if not cpu_offload: