Remove ExLlamaV2 backend

- archived upstream: 7dc12af3a8 - replaced by ExLlamaV3, which has much better quantization accuracy
2026-04-06 07:03:37 +00:00 · 2026-03-05 13:57:21 -08:00 · 2026-03-05 13:57:21 -08:00 · 2f08dce7b0
commit 2f08dce7b0
parent 134ac8fc29
19 changed files with 22 additions and 713 deletions
--- a/modules/evaluate.py
+++ b/modules/evaluate.py
@ -46,10 +46,6 @@ def calculate_perplexity(models, input_dataset, stride, _max_length):
        logger.error("Perplexity evaluation is not implemented for the llama.cpp loader.")
        raise ValueError

-    if shared.args.loader == "ExLlamav2":
-        logger.error("ExLlamav2_HF is required for perplexity evaluation with EXL2 models. Please reload the model with ExLlamav2_HF instead of ExLlamav2.")
-        raise ValueError
-
    if not shared.args.no_use_fast:
        logger.warning("--no_use_fast is not set. If tokenizing the input dataset takes a long time, try reloading the model with that option set/checked.")