Remove ExLlamaV2 backend

- archived upstream: 7dc12af3a8 - replaced by ExLlamaV3, which has much better quantization accuracy
2026-04-06 15:13:38 +00:00 · 2026-03-05 13:57:21 -08:00 · 2026-03-05 13:57:21 -08:00 · 2f08dce7b0
commit 2f08dce7b0
parent 134ac8fc29
19 changed files with 22 additions and 713 deletions
--- a/modules/ui.py
+++ b/modules/ui.py
@ -141,7 +141,6 @@ def list_model_elements():
        'compress_pos_emb',
        'compute_dtype',
        'quant_type',
-        'num_experts_per_token',
        'load_in_8bit',
        'load_in_4bit',
        'attn_implementation',
@ -154,12 +153,8 @@ def list_model_elements():
        'numa',
        'use_double_quant',
        'bf16',
-        'autosplit',
        'enable_tp',
        'tp_backend',
-        'no_flash_attn',
-        'no_xformers',
-        'no_sdpa',
        'cfg_cache',
        'cpp_runner',
        'no_use_fast',