Use ExLlamav3 instead of ExLlamav3_HF by default for EXL3 models

This commit is contained in:
oobabooga 2025-08-09 07:26:59 -07:00
parent d9db8f63a7
commit fa9be444fa

View file

@ -211,11 +211,11 @@ def infer_loader(model_name, model_settings, hf_quant_method=None):
elif re.match(r'.*\.gguf', model_name.lower()):
loader = 'llama.cpp'
elif hf_quant_method == 'exl3':
loader = 'ExLlamav3_HF'
loader = 'ExLlamav3'
elif hf_quant_method in ['exl2', 'gptq']:
loader = 'ExLlamav2_HF'
elif re.match(r'.*exl3', model_name.lower()):
loader = 'ExLlamav3_HF'
loader = 'ExLlamav3'
elif re.match(r'.*exl2', model_name.lower()):
loader = 'ExLlamav2_HF'
else: