UI: Prepend thinking tag when template appends it to prompt

Makes Qwen models have a thinking block straight away during streaming.
2026-03-18 03:14:39 +01:00 · 2026-03-12 14:30:51 -07:00 · 2026-03-12 14:30:51 -07:00 · 3e6bd1a310
parent 9a7428b627
commit 3e6bd1a310
1 changed files with 14 additions and 0 deletions
--- a/modules/chat.py
+++ b/modules/chat.py
@ -28,6 +28,7 @@ from modules.html_generator import (
 )
 from modules.image_utils import open_image_safely
 from modules.logging_colors import logger
+from modules.reasoning import THINKING_FORMATS
 from modules.text_generation import (
    generate_reply,
    get_encoded_length,
@ -986,10 +987,23 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
    # Add timestamp for assistant's response at the start of generation
    update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp(), model_name=shared.model_name)

+    # Detect if the template appended a thinking start tag to the prompt
+    thinking_prefix = None
+    if not _continue:
+        stripped_prompt = prompt.rstrip('\n')
+        for start_tag, end_tag, content_tag in THINKING_FORMATS:
+            if start_tag is not None and stripped_prompt.endswith(start_tag):
+                thinking_prefix = start_tag
+                break
+
    # Generate
    reply = None
    for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True, for_ui=for_ui)):

+        # Prepend thinking tag if the template appended it to the prompt
+        if thinking_prefix:
+            reply = thinking_prefix + reply
+
        # Extract the reply
        if state['mode'] in ['chat', 'chat-instruct']:
            if not _continue: