Remove ExLlamaV2 backend

- archived upstream: 7dc12af3a8 - replaced by ExLlamaV3, which has much better quantization accuracy
2026-04-05 06:35:15 +00:00 · 2026-03-05 13:57:21 -08:00 · 2026-03-05 13:57:21 -08:00 · 2f08dce7b0
commit 2f08dce7b0
parent 134ac8fc29
19 changed files with 22 additions and 713 deletions
--- a/modules/models.py
+++ b/modules/models.py
@ -20,8 +20,6 @@ def load_model(model_name, loader=None):
        'Transformers': transformers_loader,
        'ExLlamav3_HF': ExLlamav3_HF_loader,
        'ExLlamav3': ExLlamav3_loader,
-        'ExLlamav2_HF': ExLlamav2_HF_loader,
-        'ExLlamav2': ExLlamav2_loader,
        'TensorRT-LLM': TensorRT_LLM_loader,
    }

@ -109,19 +107,6 @@ def ExLlamav3_loader(model_name):
    return model, tokenizer


-def ExLlamav2_HF_loader(model_name):
-    from modules.exllamav2_hf import Exllamav2HF
-
-    return Exllamav2HF.from_pretrained(model_name)
-
-
-def ExLlamav2_loader(model_name):
-    from modules.exllamav2 import Exllamav2Model
-
-    model, tokenizer = Exllamav2Model.from_pretrained(model_name)
-    return model, tokenizer
-
-
 def TensorRT_LLM_loader(model_name):
    try:
        from modules.tensorrt_llm import TensorRTLLMModel
@ -141,8 +126,6 @@ def unload_model(keep_model_name=False):

    if model_class_name in ['Exllamav3Model', 'Exllamav3HF']:
        shared.model.unload()
-    elif model_class_name in ['Exllamav2Model', 'Exllamav2HF'] and hasattr(shared.model, 'unload'):
-        shared.model.unload()

    shared.model = shared.tokenizer = None
    shared.lora_names = []