mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-04 14:17:28 +00:00
Transformers loader: replace use_flash_attention_2/use_eager_attention with a unified attn_implementation
Closes #7107
This commit is contained in:
parent
511bb31646
commit
6c2bdda0f0
6 changed files with 5 additions and 20 deletions
|
|
@ -135,20 +135,15 @@ def load_model_HF(model_name):
|
|||
params = {
|
||||
'low_cpu_mem_usage': True,
|
||||
'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
|
||||
'attn_implementation': shared.args.attn_implementation,
|
||||
}
|
||||
|
||||
if shared.args.trust_remote_code:
|
||||
params['trust_remote_code'] = True
|
||||
|
||||
if shared.args.use_flash_attention_2:
|
||||
params['use_flash_attention_2'] = True
|
||||
|
||||
if shared.args.force_safetensors:
|
||||
params['force_safetensors'] = True
|
||||
|
||||
if shared.args.use_eager_attention:
|
||||
params['attn_implementation'] = 'eager'
|
||||
|
||||
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)
|
||||
|
||||
if 'chatglm' in model_name.lower():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue