diff --git a/modules/models.py b/modules/models.py index ea85da84..60568063 100644 --- a/modules/models.py +++ b/modules/models.py @@ -257,7 +257,7 @@ def llamacpp_HF_loader(model_name): path = Path(f'{shared.args.model_dir}/{model_name}') # Check if a HF tokenizer is available for the model - if all((path / file).exists() for file in ['tokenizer.json', 'tokenizer_config.json']): + if all((path / file).exists() for file in ['tokenizer_config.json']): logger.info(f'Using tokenizer from: \"{path}\"') else: logger.error("Could not load the model because a tokenizer in Transformers format was not found.") diff --git a/modules/models_settings.py b/modules/models_settings.py index b4473275..659bc35d 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -153,6 +153,8 @@ def infer_loader(model_name, model_settings): loader = 'ExLlamav2_HF' elif (path_to_model / 'quant_config.json').exists() or re.match(r'.*-awq', model_name.lower()): loader = 'AutoAWQ' + elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists(): + loader = 'llamacpp_HF' elif len(list(path_to_model.glob('*.gguf'))) > 0: loader = 'llama.cpp' elif re.match(r'.*\.gguf', model_name.lower()): @@ -225,7 +227,7 @@ def apply_model_settings_to_state(model, state): loader = model_settings.pop('loader') # If the user is using an alternative loader for the same model type, let them keep using it - if not (loader == 'ExLlamav2_HF' and state['loader'] in ['GPTQ-for-LLaMa', 'ExLlamav2', 'AutoGPTQ']) and not (loader == 'llama.cpp' and state['loader'] in ['llamacpp_HF', 'ctransformers']): + if not (loader == 'ExLlamav2_HF' and state['loader'] in ['GPTQ-for-LLaMa', 'ExLlamav2', 'AutoGPTQ']) and not (loader == 'llama.cpp' and state['loader'] in ['ctransformers']): state['loader'] = loader for k in model_settings: