Add the code for 4-bit quantization

2025-12-06 07:12:10 +01:00 · 2025-11-27 18:29:32 -08:00 · 2025-11-27 18:29:32 -08:00 · cecb172d2c
parent 742db85de0
commit cecb172d2c
1 changed files with 11 additions and 0 deletions
--- a/modules/image_models.py
+++ b/modules/image_models.py
@ -36,6 +36,17 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
            quant_kwargs={"load_in_8bit": True},
        )

+        # Define quantization config for 4-bit
+        # pipeline_quant_config = PipelineQuantizationConfig(
+        #     quant_backend="bitsandbytes_4bit",
+        #     quant_kwargs={
+        #         "load_in_4bit": True,
+        #         "bnb_4bit_quant_type": "nf4",  # Or "fp4" for floating point
+        #         "bnb_4bit_compute_dtype": torch.bfloat16,  # For faster computation
+        #         "bnb_4bit_use_double_quant": True,  # Nested quantization for extra savings
+        #     },
+        # )
+
        pipe = ZImagePipeline.from_pretrained(
            str(model_path),
            quantization_config=pipeline_quant_config,