From 3e6bd1a310217c89fa5dff23c34e92ae43f1acb9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 12 Mar 2026 14:30:51 -0700 Subject: [PATCH] UI: Prepend thinking tag when template appends it to prompt Makes Qwen models have a thinking block straight away during streaming. --- modules/chat.py | 14 ++++++++++++++ 1 file changed, 14 insertions(+) diff --git a/modules/chat.py b/modules/chat.py index d3cd0cae..e4d5dd30 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -28,6 +28,7 @@ from modules.html_generator import ( ) from modules.image_utils import open_image_safely from modules.logging_colors import logger +from modules.reasoning import THINKING_FORMATS from modules.text_generation import ( generate_reply, get_encoded_length, @@ -986,10 +987,23 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess # Add timestamp for assistant's response at the start of generation update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp(), model_name=shared.model_name) + # Detect if the template appended a thinking start tag to the prompt + thinking_prefix = None + if not _continue: + stripped_prompt = prompt.rstrip('\n') + for start_tag, end_tag, content_tag in THINKING_FORMATS: + if start_tag is not None and stripped_prompt.endswith(start_tag): + thinking_prefix = start_tag + break + # Generate reply = None for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True, for_ui=for_ui)): + # Prepend thinking tag if the template appended it to the prompt + if thinking_prefix: + reply = thinking_prefix + reply + # Extract the reply if state['mode'] in ['chat', 'chat-instruct']: if not _continue: