mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-04 14:17:28 +00:00
Add ExLlamaV3 support (#6832)
This commit is contained in:
parent
0b3503c91f
commit
8b8d39ec4e
13 changed files with 322 additions and 54 deletions
|
|
@ -69,8 +69,9 @@ def load_model(model_name, loader=None):
|
|||
'Transformers': huggingface_loader,
|
||||
'llama.cpp': llamacpp_loader,
|
||||
'llamacpp_HF': llamacpp_HF_loader,
|
||||
'ExLlamav2': ExLlamav2_loader,
|
||||
'ExLlamav3_HF': ExLlamav3_HF_loader,
|
||||
'ExLlamav2_HF': ExLlamav2_HF_loader,
|
||||
'ExLlamav2': ExLlamav2_loader,
|
||||
'HQQ': HQQ_loader,
|
||||
'TensorRT-LLM': TensorRT_LLM_loader,
|
||||
}
|
||||
|
|
@ -304,11 +305,10 @@ def llamacpp_HF_loader(model_name):
|
|||
return model
|
||||
|
||||
|
||||
def ExLlamav2_loader(model_name):
|
||||
from modules.exllamav2 import Exllamav2Model
|
||||
def ExLlamav3_HF_loader(model_name):
|
||||
from modules.exllamav3_hf import Exllamav3HF
|
||||
|
||||
model, tokenizer = Exllamav2Model.from_pretrained(model_name)
|
||||
return model, tokenizer
|
||||
return Exllamav3HF.from_pretrained(model_name)
|
||||
|
||||
|
||||
def ExLlamav2_HF_loader(model_name):
|
||||
|
|
@ -317,6 +317,13 @@ def ExLlamav2_HF_loader(model_name):
|
|||
return Exllamav2HF.from_pretrained(model_name)
|
||||
|
||||
|
||||
def ExLlamav2_loader(model_name):
|
||||
from modules.exllamav2 import Exllamav2Model
|
||||
|
||||
model, tokenizer = Exllamav2Model.from_pretrained(model_name)
|
||||
return model, tokenizer
|
||||
|
||||
|
||||
def HQQ_loader(model_name):
|
||||
try:
|
||||
from hqq.core.quantize import HQQBackend, HQQLinear
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue