Remove ExLlamaV2 backend

- archived upstream: 7dc12af3a8
- replaced by ExLlamaV3, which has much better quantization accuracy
This commit is contained in:
oobabooga 2026-03-05 13:57:21 -08:00
parent 134ac8fc29
commit 2f08dce7b0
19 changed files with 22 additions and 713 deletions

View file

@ -141,7 +141,6 @@ def list_model_elements():
'compress_pos_emb',
'compute_dtype',
'quant_type',
'num_experts_per_token',
'load_in_8bit',
'load_in_4bit',
'attn_implementation',
@ -154,12 +153,8 @@ def list_model_elements():
'numa',
'use_double_quant',
'bf16',
'autosplit',
'enable_tp',
'tp_backend',
'no_flash_attn',
'no_xformers',
'no_sdpa',
'cfg_cache',
'cpp_runner',
'no_use_fast',