Add --no_xformers and --no_sdpa flags for ExllamaV2

This commit is contained in:
oobabooga 2024-07-11 15:47:37 -07:00
parent 512b311137
commit e436d69e2b
6 changed files with 15 additions and 1 deletions

View file

@ -48,6 +48,8 @@ class Exllamav2Model:
config.scale_pos_emb = shared.args.compress_pos_emb
config.scale_alpha_value = shared.args.alpha_value
config.no_flash_attn = shared.args.no_flash_attn
config.no_xformers = shared.args.no_xformers
config.no_sdpa = shared.args.no_sdpa
config.num_experts_per_token = int(shared.args.num_experts_per_token)
model = ExLlamaV2(config)