From cecb172d2c7445f06dd90ee86aa6e9c0b437e1fe Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 27 Nov 2025 18:29:32 -0800
Subject: [PATCH] Add the code for 4-bit quantization

---
 modules/image_models.py | 11 +++++++++++
 1 file changed, 11 insertions(+)

diff --git a/modules/image_models.py b/modules/image_models.py
index 6a6c6547..9e2075fd 100644
--- a/modules/image_models.py
+++ b/modules/image_models.py
@@ -36,6 +36,17 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
             quant_kwargs={"load_in_8bit": True},
         )
 
+        # Define quantization config for 4-bit
+        # pipeline_quant_config = PipelineQuantizationConfig(
+        #     quant_backend="bitsandbytes_4bit",
+        #     quant_kwargs={
+        #         "load_in_4bit": True,
+        #         "bnb_4bit_quant_type": "nf4",  # Or "fp4" for floating point
+        #         "bnb_4bit_compute_dtype": torch.bfloat16,  # For faster computation
+        #         "bnb_4bit_use_double_quant": True,  # Nested quantization for extra savings
+        #     },
+        # )
+
         pipe = ZImagePipeline.from_pretrained(
             str(model_path),
             quantization_config=pipeline_quant_config,