From 9dd04b86ce407507bcaf0862b97aadc64b6e62a6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Mar 2026 06:17:57 -0700 Subject: [PATCH] Suppress EOS token at logit level for ExLlamav3 when ban_eos_token is set --- modules/exllamav3.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/modules/exllamav3.py b/modules/exllamav3.py index 75c76c7c..f873503a 100644 --- a/modules/exllamav3.py +++ b/modules/exllamav3.py @@ -423,6 +423,15 @@ class Exllamav3Model: if logit_bias: filters.append(LogitBiasFilter(self.tokenizer, logit_bias)) + # Suppress EOS tokens via logit bias so they are never sampled + if state['ban_eos_token']: + eos_bias = {} + for eos_id in self.config.eos_token_id_list: + if eos_id is not None: + eos_bias[str(eos_id)] = float('-inf') + if eos_bias: + filters.append(LogitBiasFilter(self.tokenizer, eos_bias)) + # Logprobs support (OpenAI API) logprobs = state.get('logprobs', 0) or 0 return_top_tokens = logprobs if logprobs > 0 else 0