From f1b64df8ddd79833e685b22ed7447da86b5d7e46 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 24 Apr 2025 09:03:49 -0700 Subject: [PATCH] EXL2: add another torch.cuda.synchronize() call to prevent errors --- modules/text_generation.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/modules/text_generation.py b/modules/text_generation.py index 40046eb2..4e3d1d7a 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -264,6 +264,11 @@ def apply_stopping_strings(reply, all_stop_strings): def get_reply_from_output_ids(output_ids, state=None, starting_from=0): + import torch + + if torch.cuda.is_available(): + torch.cuda.synchronize() + reply = decode(output_ids[starting_from:], state['skip_special_tokens'] if state else True) # Handle tokenizers that do not add the leading space for the first token