Add the TP option to exllamav3_HF

This commit is contained in:
oobabooga 2025-08-19 06:48:22 -07:00
parent e0f5905a97
commit 1972479610
2 changed files with 7 additions and 0 deletions

View file

@ -74,6 +74,11 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
split = [float(alloc) for alloc in shared.args.gpu_split.split(",")]
load_params['use_per_device'] = split
# Tensor-parallelism
if shared.args.enable_tp:
load_params['tensor_p'] = True
load_params['tp_backend'] = shared.args.tp_backend
self.ex_model.load(**load_params)
self.past_seq = None
self.max_tokens = max_tokens

View file

@ -56,6 +56,8 @@ loaders_and_params = OrderedDict({
'cfg_cache',
'trust_remote_code',
'no_use_fast',
'enable_tp',
'tp_backend',
],
'ExLlamav3': [
'ctx_size',