Use ExLlamaV2 (instead of the HF one) for EXL2 models for now

It doesn't seem to have the "OverflowError" bug
This commit is contained in:
oobabooga 2025-04-17 05:47:40 -07:00
parent 38dc09dca5
commit 0ef1b8f8b4
3 changed files with 2 additions and 4 deletions

View file

@ -172,11 +172,11 @@ def infer_loader(model_name, model_settings, hf_quant_method=None):
elif hf_quant_method == 'exl3':
loader = 'ExLlamav3_HF'
elif hf_quant_method in ['exl2', 'gptq']:
loader = 'ExLlamav2_HF'
loader = 'ExLlamav2'
elif re.match(r'.*exl3', model_name.lower()):
loader = 'ExLlamav3_HF'
elif re.match(r'.*exl2', model_name.lower()):
loader = 'ExLlamav2_HF'
loader = 'ExLlamav2'
elif re.match(r'.*-hqq', model_name.lower()):
return 'HQQ'
else: