From a289a92b9408e2542632ffa600ef57c373200aec Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 9 Aug 2025 17:10:58 -0700 Subject: [PATCH] Fix exllamav3 token count --- modules/exllamav3.py | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) diff --git a/modules/exllamav3.py b/modules/exllamav3.py index e3a2d95a..268a64ec 100644 --- a/modules/exllamav3.py +++ b/modules/exllamav3.py @@ -267,9 +267,11 @@ class Exllamav3Model: input_ids = input_ids[:, -get_max_prompt_length(state):] + self._last_prompt_token_count = input_ids.shape[-1] + # Determine max_new_tokens if state['auto_max_new_tokens']: - max_new_tokens = state['truncation_length'] - input_ids.shape[-1] + max_new_tokens = state['truncation_length'] - self._last_prompt_token_count else: max_new_tokens = state['max_new_tokens'] @@ -323,8 +325,7 @@ class Exllamav3Model: @property def last_prompt_token_count(self): - # This would need to be tracked during generation - return 0 + return getattr(self, '_last_prompt_token_count', 0) def unload(self): logger.info("Unloading ExLlamaV3 model components...")