Transformers loader: replace use_flash_attention_2/use_eager_attention with a unified attn_implementation

Closes #7107
2026-04-09 08:33:41 +00:00 · 2025-07-09 18:38:45 -07:00 · 2025-07-09 18:38:45 -07:00 · 6c2bdda0f0
commit 6c2bdda0f0
parent 511bb31646
6 changed files with 5 additions and 20 deletions
--- a/modules/shared.py
+++ b/modules/shared.py
@ -61,8 +61,7 @@ group.add_argument('--no-cache', action='store_true', help='Set use_cache to Fal
 group.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.')
 group.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')
 group.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.')
-group.add_argument('--use_flash_attention_2', action='store_true', help='Set use_flash_attention_2=True while loading the model.')
-group.add_argument('--use_eager_attention', action='store_true', help='Set attn_implementation= eager while loading the model.')
+group.add_argument('--attn-implementation', type=str, default='sdpa', help='Attention implementation. Valid options: sdpa, eager, flash_attention_2.')
 group.add_argument('--torch-compile', action='store_true', help='Compile the model with torch.compile for improved performance.')

 # bitsandbytes 4-bit