Revert "Use ExLlamaV2 (instead of the HF one) for EXL2 models for now"

This reverts commit 0ef1b8f8b4.
This commit is contained in:
oobabooga 2025-04-17 21:31:32 -07:00
parent 0ef1b8f8b4
commit 2c2d453c8c
3 changed files with 4 additions and 2 deletions

View file

@ -172,11 +172,11 @@ def infer_loader(model_name, model_settings, hf_quant_method=None):
elif hf_quant_method == 'exl3':
loader = 'ExLlamav3_HF'
elif hf_quant_method in ['exl2', 'gptq']:
loader = 'ExLlamav2'
loader = 'ExLlamav2_HF'
elif re.match(r'.*exl3', model_name.lower()):
loader = 'ExLlamav3_HF'
elif re.match(r'.*exl2', model_name.lower()):
loader = 'ExLlamav2'
loader = 'ExLlamav2_HF'
elif re.match(r'.*-hqq', model_name.lower()):
return 'HQQ'
else: