transformers: Add eager attention option to make Gemma-2 work properly (#6188)

2026-04-05 06:35:15 +00:00 · 2024-07-01 23:08:08 +08:00 · 2024-07-01 23:08:08 +08:00 · 8a39f579d8
commit 8a39f579d8
parent 19a56dd538
5 changed files with 7 additions and 0 deletions
--- a/modules/shared.py
+++ b/modules/shared.py
@ -106,6 +106,7 @@ group.add_argument('--trust-remote-code', action='store_true', help='Set trust_r
 group.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')
 group.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.')
 group.add_argument('--use_flash_attention_2', action='store_true', help='Set use_flash_attention_2=True while loading the model.')
+group.add_argument('--use_eager_attention', action='store_true', help='Set attn_implementation= eager while loading the model.')

 # bitsandbytes 4-bit
 group = parser.add_argument_group('bitsandbytes 4-bit')