From 3e6bd1a310217c89fa5dff23c34e92ae43f1acb9 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 12 Mar 2026 14:30:51 -0700
Subject: [PATCH] UI: Prepend thinking tag when template appends it to prompt

Makes Qwen models have a thinking block straight away during streaming.
---
 modules/chat.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/modules/chat.py b/modules/chat.py
index d3cd0cae..e4d5dd30 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -28,6 +28,7 @@ from modules.html_generator import (
 )
 from modules.image_utils import open_image_safely
 from modules.logging_colors import logger
+from modules.reasoning import THINKING_FORMATS
 from modules.text_generation import (
     generate_reply,
     get_encoded_length,
@@ -986,10 +987,23 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
     # Add timestamp for assistant's response at the start of generation
     update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp(), model_name=shared.model_name)
 
+    # Detect if the template appended a thinking start tag to the prompt
+    thinking_prefix = None
+    if not _continue:
+        stripped_prompt = prompt.rstrip('\n')
+        for start_tag, end_tag, content_tag in THINKING_FORMATS:
+            if start_tag is not None and stripped_prompt.endswith(start_tag):
+                thinking_prefix = start_tag
+                break
+
     # Generate
     reply = None
     for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True, for_ui=for_ui)):
 
+        # Prepend thinking tag if the template appended it to the prompt
+        if thinking_prefix:
+            reply = thinking_prefix + reply
+
         # Extract the reply
         if state['mode'] in ['chat', 'chat-instruct']:
             if not _continue: