diff --git a/modules/image_models.py b/modules/image_models.py index e6f9a172..4d7cc5f9 100644 --- a/modules/image_models.py +++ b/modules/image_models.py @@ -11,7 +11,7 @@ def get_quantization_config(quant_method): Get the appropriate quantization config based on the selected method. Args: - quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit' + quant_method: One of 'none', 'bnb-8bit', 'bnb-4bit' Returns: PipelineQuantizationConfig or None @@ -46,30 +46,6 @@ def get_quantization_config(quant_method): } ) - # Quanto 8-bit quantization - elif quant_method == 'quanto-8bit': - return PipelineQuantizationConfig( - quant_mapping={ - "transformer": QuantoConfig(weights_dtype="int8") - } - ) - - # Quanto 4-bit quantization - elif quant_method == 'quanto-4bit': - return PipelineQuantizationConfig( - quant_mapping={ - "transformer": QuantoConfig(weights_dtype="int4") - } - ) - - # Quanto 2-bit quantization - elif quant_method == 'quanto-2bit': - return PipelineQuantizationConfig( - quant_mapping={ - "transformer": QuantoConfig(weights_dtype="int2") - } - ) - else: logger.warning(f"Unknown quantization method: {quant_method}. Loading without quantization.") return None @@ -101,7 +77,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3' cpu_offload: Enable CPU offloading for low VRAM compile_model: Compile the model for faster inference (slow first run) - quant_method: Quantization method - 'none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit' + quant_method: Quantization method - 'none', 'bnb-8bit', 'bnb-4bit' """ import torch from diffusers import DiffusionPipeline diff --git a/modules/shared.py b/modules/shared.py index 316f7729..a96cd70c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -60,7 +60,7 @@ group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdp group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.') group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.') group.add_argument('--image-quant', type=str, default=None, - choices=['none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'], + choices=['none', 'bnb-8bit', 'bnb-4bit'], help='Quantization method for image model.') # Model loader diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 92c447c8..5ef66820 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -471,7 +471,7 @@ def create_ui(): with gr.Column(): shared.gradio['image_quant'] = gr.Dropdown( label='Quantization', - choices=['none', 'bnb-8bit', 'bnb-4bit', 'quanto-8bit', 'quanto-4bit', 'quanto-2bit'], + choices=['none', 'bnb-8bit', 'bnb-4bit'], value=shared.settings['image_quant'], info='Quantization method for reduced VRAM usage. Quanto supports lower precisions (2-bit, 4-bit, 8-bit).' ) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 7076df4f..a031bbab 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -11,7 +11,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index d6264089..ee8d67ac 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 985511fa..7b36b151 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 1c75806d..4f72d5ac 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 0f536b6a..7942b9f0 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 0f8970d0..96013c35 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index e862c1a1..0813f06c 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index a734ce82..ea7edf6c 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -11,7 +11,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 8561462c..15247d72 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -9,7 +9,6 @@ huggingface-hub==0.36.0 jinja2==3.1.6 markdown numpy==2.2.* -optimum-quanto==0.2.7 pandas peft==0.18.* Pillow>=9.5.0