mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-07 23:53:40 +00:00
TensorRT-LLM: Migrate from ModelRunner to LLM API, add concurrent API request support
This commit is contained in:
parent
9824c82cb6
commit
f52d9336e5
7 changed files with 50 additions and 89 deletions
|
|
@ -114,7 +114,7 @@ def TensorRT_LLM_loader(model_name):
|
|||
raise ModuleNotFoundError("Failed to import 'tensorrt_llm'. Please install it manually following the instructions in the TensorRT-LLM GitHub repository.")
|
||||
|
||||
model = TensorRTLLMModel.from_pretrained(model_name)
|
||||
return model
|
||||
return model, model.tokenizer
|
||||
|
||||
|
||||
def unload_model(keep_model_name=False):
|
||||
|
|
@ -124,7 +124,7 @@ def unload_model(keep_model_name=False):
|
|||
model_class_name = shared.model.__class__.__name__
|
||||
is_llamacpp = (model_class_name == 'LlamaServer')
|
||||
|
||||
if model_class_name in ['Exllamav3Model', 'Exllamav3HF']:
|
||||
if model_class_name in ['Exllamav3Model', 'Exllamav3HF', 'TensorRTLLMModel']:
|
||||
shared.model.unload()
|
||||
|
||||
shared.model = shared.tokenizer = None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue