New llama.cpp loader (#6846)

2026-04-04 14:17:28 +00:00 · 2025-04-18 09:59:37 -03:00 · 2025-04-18 09:59:37 -03:00 · ae54d8faaa
commit ae54d8faaa
parent 5c2f8d828e
23 changed files with 471 additions and 999 deletions
--- a/modules/evaluate.py
+++ b/modules/evaluate.py
@ -40,17 +40,13 @@ def calculate_perplexity(models, input_dataset, stride, _max_length):
    '''

    if shared.args.loader == "llama.cpp":
-        logger.error("llamacpp_HF is required for perplexity evaluation with GGUF models. Please reload the model with llamacpp_HF instead of llama.cpp.")
+        logger.error("Perplexity evaluation is not implemented for the llama.cpp loader.")
        raise ValueError

    if shared.args.loader == "ExLlamav2":
        logger.error("ExLlamav2_HF is required for perplexity evaluation with EXL2 models. Please reload the model with ExLlamav2_HF instead of ExLlamav2.")
        raise ValueError

-    if shared.args.loader == "llamacpp_HF" and not shared.args.logits_all:
-        logger.error("--logits_all is required for perplexity evaluation with GGUF models. Please reload the model with that option set/checked.")
-        raise ValueError
-
    if not shared.args.no_use_fast:
        logger.warning("--no_use_fast is not set. If tokenizing the input dataset takes a long time, try reloading the model with that option set/checked.")