diff --git a/modules/image_models.py b/modules/image_models.py index 2ef1e730..8363533b 100644 --- a/modules/image_models.py +++ b/modules/image_models.py @@ -98,7 +98,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl Args: model_name: Name of the model directory dtype: 'bfloat16' or 'float16' - attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3' + attn_backend: 'sdpa' or 'flash_attention_2' cpu_offload: Enable CPU offloading for low VRAM compile_model: Compile the model for faster inference (slow first run) quant_method: 'none', 'bnb-8bit', 'bnb-4bit', or torchao options (int8wo, fp4, float8wo) @@ -145,8 +145,6 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'set_attention_backend'): if attn_backend == 'flash_attention_2': pipe.transformer.set_attention_backend("flash") - elif attn_backend == 'flash_attention_3': - pipe.transformer.set_attention_backend("_flash_3") # sdpa is the default, no action needed if compile_model: diff --git a/modules/shared.py b/modules/shared.py index 4a0fd986..4e17497b 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -56,7 +56,7 @@ group = parser.add_argument_group('Image model') group.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).') group.add_argument('--image-model-dir', type=str, default='user_data/image_models', help='Path to directory with all the image models.') group.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.') -group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], help='Attention backend for image model.') +group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2'], help='Attention backend for image model.') group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.') group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.') group.add_argument('--image-quant', type=str, default=None, diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py index 08cf3f64..fdf1af86 100644 --- a/modules/ui_image_generation.py +++ b/modules/ui_image_generation.py @@ -485,7 +485,7 @@ def create_ui(): info='bfloat16 recommended for modern GPUs' ) shared.gradio['image_attn_backend'] = gr.Dropdown( - choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], + choices=['sdpa', 'flash_attention_2'], value=shared.settings['image_attn_backend'], label='Attention Backend', info='SDPA is default. Flash Attention requires compatible GPU.'