Attempt at fixing new exllamav3 loader undefined behavior when switching conversations

2026-03-07 14:13:49 +01:00 · 2025-08-09 14:10:41 -07:00 · 2025-08-09 14:10:41 -07:00 · d489eb589a
parent a6d6bee88c
commit d489eb589a
1 changed files with 1 additions and 3 deletions
--- a/modules/exllamav3.py
+++ b/modules/exllamav3.py
@ -304,9 +304,7 @@ class Exllamav3Model:
                        response_text += chunk
                        yield response_text
        finally:
-            # No cleanup needed. MMEmbedding lifetime is managed by Python.
-            # Cache and page table resets are unnecessary and can cause token ID conflicts.
-            pass
+            self.generator.clear_queue()

    def generate(self, prompt, state):
        output = ""