Fix exllamav2 generating eos randomly after previous fix

2026-03-16 18:34:40 +01:00 · 2025-04-18 05:42:38 -07:00 · 2025-04-18 05:42:38 -07:00 · 5c2f8d828e
parent 2fc58ad935
commit 5c2f8d828e
2 changed files with 3 additions and 3 deletions
--- a/modules/exllamav2_hf.py
+++ b/modules/exllamav2_hf.py
@ -157,6 +157,9 @@ class Exllamav2HF(PreTrainedModel):
        else:
            self.past_seq = seq_tensor

+        if torch.cuda.is_available():
+            torch.cuda.synchronize()
+
        loss = None
        if labels is not None:
            # Shift so that tokens < n predict n
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@ -263,9 +263,6 @@ def apply_stopping_strings(reply, all_stop_strings):


 def get_reply_from_output_ids(output_ids, state=None, starting_from=0):
-    if torch.cuda.is_available():
-        torch.cuda.synchronize()
-
    reply = decode(output_ids[starting_from:], state['skip_special_tokens'] if state else True)

    # Handle tokenizers that do not add the leading space for the first token