From 1972479610f4b1482912ff012469e8ab9cbaa908 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 19 Aug 2025 06:48:22 -0700 Subject: [PATCH] Add the TP option to exllamav3_HF --- modules/exllamav3_hf.py | 5 +++++ modules/loaders.py | 2 ++ 2 files changed, 7 insertions(+) diff --git a/modules/exllamav3_hf.py b/modules/exllamav3_hf.py index 1254ff5d..d9f4ed57 100644 --- a/modules/exllamav3_hf.py +++ b/modules/exllamav3_hf.py @@ -74,6 +74,11 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin): split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] load_params['use_per_device'] = split + # Tensor-parallelism + if shared.args.enable_tp: + load_params['tensor_p'] = True + load_params['tp_backend'] = shared.args.tp_backend + self.ex_model.load(**load_params) self.past_seq = None self.max_tokens = max_tokens diff --git a/modules/loaders.py b/modules/loaders.py index 295db1e7..f88e976d 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -56,6 +56,8 @@ loaders_and_params = OrderedDict({ 'cfg_cache', 'trust_remote_code', 'no_use_fast', + 'enable_tp', + 'tp_backend', ], 'ExLlamav3': [ 'ctx_size',