Use ExLlamaV2 (instead of the HF one) for EXL2 models for now

It doesn't seem to have the "OverflowError" bug
This commit is contained in:
oobabooga 2025-04-17 05:47:40 -07:00
parent 38dc09dca5
commit 0ef1b8f8b4
3 changed files with 2 additions and 4 deletions

View file

@ -110,7 +110,6 @@ loaders_and_params = OrderedDict({
'no_flash_attn',
'no_xformers',
'no_sdpa',
'exllamav2_info',
],
'HQQ': [
'hqq_backend',