New llama.cpp loader (#6846)

This commit is contained in:
oobabooga 2025-04-18 09:59:37 -03:00 committed by GitHub
parent 5c2f8d828e
commit ae54d8faaa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 471 additions and 999 deletions

View file

@ -40,17 +40,13 @@ def calculate_perplexity(models, input_dataset, stride, _max_length):
'''
if shared.args.loader == "llama.cpp":
logger.error("llamacpp_HF is required for perplexity evaluation with GGUF models. Please reload the model with llamacpp_HF instead of llama.cpp.")
logger.error("Perplexity evaluation is not implemented for the llama.cpp loader.")
raise ValueError
if shared.args.loader == "ExLlamav2":
logger.error("ExLlamav2_HF is required for perplexity evaluation with EXL2 models. Please reload the model with ExLlamav2_HF instead of ExLlamav2.")
raise ValueError
if shared.args.loader == "llamacpp_HF" and not shared.args.logits_all:
logger.error("--logits_all is required for perplexity evaluation with GGUF models. Please reload the model with that option set/checked.")
raise ValueError
if not shared.args.no_use_fast:
logger.warning("--no_use_fast is not set. If tokenizing the input dataset takes a long time, try reloading the model with that option set/checked.")