mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-12-06 07:12:10 +01:00
Add the code for 4-bit quantization
This commit is contained in:
parent
742db85de0
commit
cecb172d2c
|
|
@ -36,6 +36,17 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
|
|||
quant_kwargs={"load_in_8bit": True},
|
||||
)
|
||||
|
||||
# Define quantization config for 4-bit
|
||||
# pipeline_quant_config = PipelineQuantizationConfig(
|
||||
# quant_backend="bitsandbytes_4bit",
|
||||
# quant_kwargs={
|
||||
# "load_in_4bit": True,
|
||||
# "bnb_4bit_quant_type": "nf4", # Or "fp4" for floating point
|
||||
# "bnb_4bit_compute_dtype": torch.bfloat16, # For faster computation
|
||||
# "bnb_4bit_use_double_quant": True, # Nested quantization for extra savings
|
||||
# },
|
||||
# )
|
||||
|
||||
pipe = ZImagePipeline.from_pretrained(
|
||||
str(model_path),
|
||||
quantization_config=pipeline_quant_config,
|
||||
|
|
|
|||
Loading…
Reference in a new issue