mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-12-06 07:12:10 +01:00
Ignore add_bos_token in instruct prompts, let the jinja2 template decide
This commit is contained in:
parent
0f3a88057c
commit
635e6efd18
|
|
@ -643,6 +643,10 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||
output = apply_extensions('history', output)
|
||||
state = apply_extensions('state', state)
|
||||
|
||||
# Let the jinja2 template handle the BOS token
|
||||
if state['mode'] in ['instruct', 'chat-instruct']:
|
||||
state['add_bos_token'] = False
|
||||
|
||||
# Initialize metadata if not present
|
||||
if 'metadata' not in output:
|
||||
output['metadata'] = {}
|
||||
|
|
|
|||
|
|
@ -134,7 +134,6 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
|
|||
input_ids = np.array(input_ids).reshape(1, len(input_ids))
|
||||
else:
|
||||
input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', add_special_tokens=add_special_tokens)
|
||||
|
||||
if hasattr(shared.tokenizer, 'bos_token_id') and shared.tokenizer.bos_token_id is not None:
|
||||
if add_bos_token:
|
||||
# Add BOS token if missing
|
||||
|
|
@ -142,13 +141,9 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
|
|||
bos_tensor = torch.tensor([[shared.tokenizer.bos_token_id]])
|
||||
input_ids = torch.cat((bos_tensor, input_ids), 1)
|
||||
|
||||
# Prevent double BOS tokens from jinja templates
|
||||
# Always prevent double BOS tokens (regardless of add_bos_token setting)
|
||||
while len(input_ids[0]) > 1 and input_ids[0][0] == shared.tokenizer.bos_token_id and input_ids[0][1] == shared.tokenizer.bos_token_id:
|
||||
input_ids = input_ids[:, 1:]
|
||||
else:
|
||||
# Remove BOS tokens when not wanted
|
||||
while len(input_ids[0]) > 0 and input_ids[0][0] == shared.tokenizer.bos_token_id:
|
||||
input_ids = input_ids[:, 1:]
|
||||
|
||||
if truncation_length is not None:
|
||||
input_ids = input_ids[:, -truncation_length:]
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ def create_ui():
|
|||
|
||||
shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
|
||||
shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
|
||||
shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
|
||||
shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Only applies to text completion (notebook). In chat mode, templates control BOS tokens.')
|
||||
shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.')
|
||||
shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming')
|
||||
shared.gradio['static_cache'] = gr.Checkbox(value=shared.settings['static_cache'], label='Static KV cache', info='Use a static cache for improved performance.')
|
||||
|
|
|
|||
Loading…
Reference in a new issue