TensorRT-LLM: Migrate from ModelRunner to LLM API, add concurrent API request support

This commit is contained in:
oobabooga 2026-03-05 18:09:45 -08:00
parent 9824c82cb6
commit f52d9336e5
7 changed files with 50 additions and 89 deletions

View file

@ -157,7 +157,6 @@ def list_model_elements():
'enable_tp',
'tp_backend',
'cfg_cache',
'cpp_runner',
'no_use_fast',
'model_draft',
'draft_max',