Add ExLlamaV3 support (#6832)

This commit is contained in:
oobabooga 2025-04-09 00:07:08 -03:00 committed by GitHub
parent 0b3503c91f
commit 8b8d39ec4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 322 additions and 54 deletions

View file

@ -69,8 +69,9 @@ def load_model(model_name, loader=None):
'Transformers': huggingface_loader,
'llama.cpp': llamacpp_loader,
'llamacpp_HF': llamacpp_HF_loader,
'ExLlamav2': ExLlamav2_loader,
'ExLlamav3_HF': ExLlamav3_HF_loader,
'ExLlamav2_HF': ExLlamav2_HF_loader,
'ExLlamav2': ExLlamav2_loader,
'HQQ': HQQ_loader,
'TensorRT-LLM': TensorRT_LLM_loader,
}
@ -304,11 +305,10 @@ def llamacpp_HF_loader(model_name):
return model
def ExLlamav2_loader(model_name):
from modules.exllamav2 import Exllamav2Model
def ExLlamav3_HF_loader(model_name):
from modules.exllamav3_hf import Exllamav3HF
model, tokenizer = Exllamav2Model.from_pretrained(model_name)
return model, tokenizer
return Exllamav3HF.from_pretrained(model_name)
def ExLlamav2_HF_loader(model_name):
@ -317,6 +317,13 @@ def ExLlamav2_HF_loader(model_name):
return Exllamav2HF.from_pretrained(model_name)
def ExLlamav2_loader(model_name):
from modules.exllamav2 import Exllamav2Model
model, tokenizer = Exllamav2Model.from_pretrained(model_name)
return model, tokenizer
def HQQ_loader(model_name):
try:
from hqq.core.quantize import HQQBackend, HQQLinear