Fix exllamav3 token count

2026-03-19 11:54:39 +01:00 · 2025-08-09 17:10:58 -07:00 · 2025-08-09 17:10:58 -07:00 · a289a92b94
parent d489eb589a
commit a289a92b94
1 changed files with 4 additions and 3 deletions
--- a/modules/exllamav3.py
+++ b/modules/exllamav3.py
@ -267,9 +267,11 @@ class Exllamav3Model:

        input_ids = input_ids[:, -get_max_prompt_length(state):]

+        self._last_prompt_token_count = input_ids.shape[-1]
+
        # Determine max_new_tokens
        if state['auto_max_new_tokens']:
-            max_new_tokens = state['truncation_length'] - input_ids.shape[-1]
+            max_new_tokens = state['truncation_length'] - self._last_prompt_token_count
        else:
            max_new_tokens = state['max_new_tokens']

@ -323,8 +325,7 @@ class Exllamav3Model:

    @property
    def last_prompt_token_count(self):
-        # This would need to be tracked during generation
-        return 0
+        return getattr(self, '_last_prompt_token_count', 0)

    def unload(self):
        logger.info("Unloading ExLlamaV3 model components...")