EXL2: add another torch.cuda.synchronize() call to prevent errors

2026-02-20 22:55:01 +01:00 · 2025-04-24 09:03:49 -07:00 · 2025-04-24 09:03:49 -07:00 · f1b64df8dd
parent b313adf653
commit f1b64df8dd
1 changed files with 5 additions and 0 deletions
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@ -264,6 +264,11 @@ def apply_stopping_strings(reply, all_stop_strings):


 def get_reply_from_output_ids(output_ids, state=None, starting_from=0):
+    import torch
+
+    if torch.cuda.is_available():
+        torch.cuda.synchronize()
+
    reply = decode(output_ids[starting_from:], state['skip_special_tokens'] if state else True)

    # Handle tokenizers that do not add the leading space for the first token