mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-05 06:35:15 +00:00
Remove ExLlamaV2 backend
- archived upstream: 7dc12af3a8
- replaced by ExLlamaV3, which has much better quantization accuracy
This commit is contained in:
parent
134ac8fc29
commit
2f08dce7b0
19 changed files with 22 additions and 713 deletions
|
|
@ -20,8 +20,6 @@ def load_model(model_name, loader=None):
|
|||
'Transformers': transformers_loader,
|
||||
'ExLlamav3_HF': ExLlamav3_HF_loader,
|
||||
'ExLlamav3': ExLlamav3_loader,
|
||||
'ExLlamav2_HF': ExLlamav2_HF_loader,
|
||||
'ExLlamav2': ExLlamav2_loader,
|
||||
'TensorRT-LLM': TensorRT_LLM_loader,
|
||||
}
|
||||
|
||||
|
|
@ -109,19 +107,6 @@ def ExLlamav3_loader(model_name):
|
|||
return model, tokenizer
|
||||
|
||||
|
||||
def ExLlamav2_HF_loader(model_name):
|
||||
from modules.exllamav2_hf import Exllamav2HF
|
||||
|
||||
return Exllamav2HF.from_pretrained(model_name)
|
||||
|
||||
|
||||
def ExLlamav2_loader(model_name):
|
||||
from modules.exllamav2 import Exllamav2Model
|
||||
|
||||
model, tokenizer = Exllamav2Model.from_pretrained(model_name)
|
||||
return model, tokenizer
|
||||
|
||||
|
||||
def TensorRT_LLM_loader(model_name):
|
||||
try:
|
||||
from modules.tensorrt_llm import TensorRTLLMModel
|
||||
|
|
@ -141,8 +126,6 @@ def unload_model(keep_model_name=False):
|
|||
|
||||
if model_class_name in ['Exllamav3Model', 'Exllamav3HF']:
|
||||
shared.model.unload()
|
||||
elif model_class_name in ['Exllamav2Model', 'Exllamav2HF'] and hasattr(shared.model, 'unload'):
|
||||
shared.model.unload()
|
||||
|
||||
shared.model = shared.tokenizer = None
|
||||
shared.lora_names = []
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue