ExLlamaV3: Enable the --enable-tp option, add a --tp-backend option

This commit is contained in:
oobabooga 2025-08-17 13:19:11 -07:00
parent d771ca4a13
commit dbabe67e77
5 changed files with 16 additions and 2 deletions

View file

@ -91,6 +91,11 @@ class Exllamav3Model:
split = [float(alloc) for alloc in shared.args.gpu_split.split(",")]
load_params['use_per_device'] = split
# Tensor-parallelism
if shared.args.enable_tp:
load_params['tensor_p'] = True
load_params['tp_backend'] = shared.args.tp_backend
model.load(**load_params)
tokenizer = Tokenizer.from_config(config)