mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-12-06 07:12:10 +01:00
Image: Remove the flash_attention_3 option (no idea how to get it working)
This commit is contained in:
parent
c93d27add3
commit
c357eed4c7
|
|
@ -98,7 +98,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
|
|||
Args:
|
||||
model_name: Name of the model directory
|
||||
dtype: 'bfloat16' or 'float16'
|
||||
attn_backend: 'sdpa', 'flash_attention_2', or 'flash_attention_3'
|
||||
attn_backend: 'sdpa' or 'flash_attention_2'
|
||||
cpu_offload: Enable CPU offloading for low VRAM
|
||||
compile_model: Compile the model for faster inference (slow first run)
|
||||
quant_method: 'none', 'bnb-8bit', 'bnb-4bit', or torchao options (int8wo, fp4, float8wo)
|
||||
|
|
@ -145,8 +145,6 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
|
|||
if hasattr(pipe, 'transformer') and hasattr(pipe.transformer, 'set_attention_backend'):
|
||||
if attn_backend == 'flash_attention_2':
|
||||
pipe.transformer.set_attention_backend("flash")
|
||||
elif attn_backend == 'flash_attention_3':
|
||||
pipe.transformer.set_attention_backend("_flash_3")
|
||||
# sdpa is the default, no action needed
|
||||
|
||||
if compile_model:
|
||||
|
|
|
|||
|
|
@ -56,7 +56,7 @@ group = parser.add_argument_group('Image model')
|
|||
group.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).')
|
||||
group.add_argument('--image-model-dir', type=str, default='user_data/image_models', help='Path to directory with all the image models.')
|
||||
group.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.')
|
||||
group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2', 'flash_attention_3'], help='Attention backend for image model.')
|
||||
group.add_argument('--image-attn-backend', type=str, default=None, choices=['sdpa', 'flash_attention_2'], help='Attention backend for image model.')
|
||||
group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.')
|
||||
group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.')
|
||||
group.add_argument('--image-quant', type=str, default=None,
|
||||
|
|
|
|||
|
|
@ -485,7 +485,7 @@ def create_ui():
|
|||
info='bfloat16 recommended for modern GPUs'
|
||||
)
|
||||
shared.gradio['image_attn_backend'] = gr.Dropdown(
|
||||
choices=['sdpa', 'flash_attention_2', 'flash_attention_3'],
|
||||
choices=['sdpa', 'flash_attention_2'],
|
||||
value=shared.settings['image_attn_backend'],
|
||||
label='Attention Backend',
|
||||
info='SDPA is default. Flash Attention requires compatible GPU.'
|
||||
|
|
|
|||
Loading…
Reference in a new issue