From cecb172d2c7445f06dd90ee86aa6e9c0b437e1fe Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 27 Nov 2025 18:29:32 -0800 Subject: [PATCH] Add the code for 4-bit quantization --- modules/image_models.py | 11 +++++++++++ 1 file changed, 11 insertions(+) diff --git a/modules/image_models.py b/modules/image_models.py index 6a6c6547..9e2075fd 100644 --- a/modules/image_models.py +++ b/modules/image_models.py @@ -36,6 +36,17 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl quant_kwargs={"load_in_8bit": True}, ) + # Define quantization config for 4-bit + # pipeline_quant_config = PipelineQuantizationConfig( + # quant_backend="bitsandbytes_4bit", + # quant_kwargs={ + # "load_in_4bit": True, + # "bnb_4bit_quant_type": "nf4", # Or "fp4" for floating point + # "bnb_4bit_compute_dtype": torch.bfloat16, # For faster computation + # "bnb_4bit_use_double_quant": True, # Nested quantization for extra savings + # }, + # ) + pipe = ZImagePipeline.from_pretrained( str(model_path), quantization_config=pipeline_quant_config,