Ignore add_bos_token in instruct prompts, let the jinja2 template decide

This commit is contained in:
oobabooga 2025-07-10 07:04:47 -07:00
parent 0f3a88057c
commit 635e6efd18
3 changed files with 8 additions and 9 deletions

View file

@ -643,6 +643,10 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
output = apply_extensions('history', output)
state = apply_extensions('state', state)
# Let the jinja2 template handle the BOS token
if state['mode'] in ['instruct', 'chat-instruct']:
state['add_bos_token'] = False
# Initialize metadata if not present
if 'metadata' not in output:
output['metadata'] = {}

View file

@ -134,7 +134,6 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
input_ids = np.array(input_ids).reshape(1, len(input_ids))
else:
input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', add_special_tokens=add_special_tokens)
if hasattr(shared.tokenizer, 'bos_token_id') and shared.tokenizer.bos_token_id is not None:
if add_bos_token:
# Add BOS token if missing
@ -142,13 +141,9 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
bos_tensor = torch.tensor([[shared.tokenizer.bos_token_id]])
input_ids = torch.cat((bos_tensor, input_ids), 1)
# Prevent double BOS tokens from jinja templates
# Always prevent double BOS tokens (regardless of add_bos_token setting)
while len(input_ids[0]) > 1 and input_ids[0][0] == shared.tokenizer.bos_token_id and input_ids[0][1] == shared.tokenizer.bos_token_id:
input_ids = input_ids[:, 1:]
else:
# Remove BOS tokens when not wanted
while len(input_ids[0]) > 0 and input_ids[0][0] == shared.tokenizer.bos_token_id:
input_ids = input_ids[:, 1:]
if truncation_length is not None:
input_ids = input_ids[:, -truncation_length:]

View file

@ -84,7 +84,7 @@ def create_ui():
shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Only applies to text completion (notebook). In chat mode, templates control BOS tokens.')
shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.')
shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming')
shared.gradio['static_cache'] = gr.Checkbox(value=shared.settings['static_cache'], label='Static KV cache', info='Use a static cache for improved performance.')