API: Add parallel request support for llama.cpp and ExLlamaV3

This commit is contained in:
oobabooga 2026-03-05 16:49:58 -08:00
parent 2f08dce7b0
commit 9824c82cb6
10 changed files with 198 additions and 63 deletions

View file

@ -151,6 +151,7 @@ def list_model_elements():
'no_mmap',
'mlock',
'numa',
'parallel',
'use_double_quant',
'bf16',
'enable_tp',