diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py
index eb1702a8..0eb0cd27 100644
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@@ -13,6 +13,7 @@ from extensions.openai.errors import InvalidRequestError
from extensions.openai.typing import ToolDefinition
from extensions.openai.utils import debug_msg, getToolCallId, parseToolCall
from modules import shared
+from modules.reasoning import extract_reasoning
from modules.chat import (
generate_chat_prompt,
generate_chat_reply,
@@ -553,6 +554,14 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
else:
yield chunk
else:
+ reasoning, content = extract_reasoning(answer) if not tool_calls else (None, answer)
+ message = {
+ "role": "assistant",
+ "refusal": None,
+ "content": None if tool_calls else content,
+ **({"reasoning_content": reasoning} if reasoning else {}),
+ **({"tool_calls": tool_calls} if tool_calls else {}),
+ }
resp = {
"id": cmpl_id,
"object": object_type,
@@ -562,7 +571,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
resp_list: [{
"index": 0,
"finish_reason": stop_reason,
- "message": {"role": "assistant", "refusal": None, "content": None if tool_calls else answer, **({"tool_calls": tool_calls} if tool_calls else {})},
+ "message": message,
"logprobs": None,
}],
"usage": {
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 472a9ea0..4d9904fb 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -10,6 +10,7 @@ import markdown
from PIL import Image, ImageOps
from modules import shared
+from modules.reasoning import extract_reasoning
from modules.sane_markdown_lists import SaneListExtension
from modules.utils import get_available_chat_styles
@@ -108,66 +109,9 @@ def replace_blockquote(m):
return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
-# Thinking block format definitions: (start_tag, end_tag, content_start_tag)
-# Use None for start_tag to match from beginning (end-only formats should be listed last)
-THINKING_FORMATS = [
- ('', '', None),
- ('<|channel|>analysis<|message|>', '<|end|>', '<|start|>assistant<|channel|>final<|message|>'),
- ('', '', None),
- ('<|think|>', '<|end|>', '<|content|>'), # Solar Open
- ('Thinking Process:', '', None), # Qwen3.5 verbose thinking outside tags
- (None, '', None), # End-only variant (e.g., Qwen3-next)
-]
-
-
def extract_thinking_block(string):
- """Extract thinking blocks from the beginning of a string."""
- if not string:
- return None, string
-
- for start_tag, end_tag, content_tag in THINKING_FORMATS:
- end_esc = html.escape(end_tag)
- content_esc = html.escape(content_tag) if content_tag else None
-
- if start_tag is None:
- # End-only format: require end tag, start from beginning
- end_pos = string.find(end_esc)
- if end_pos == -1:
- continue
- thought_start = 0
- else:
- # Normal format: require start tag
- start_esc = html.escape(start_tag)
- start_pos = string.find(start_esc)
- if start_pos == -1:
- continue
- thought_start = start_pos + len(start_esc)
- end_pos = string.find(end_esc, thought_start)
-
- if end_pos == -1:
- # End tag missing - check if content tag can serve as fallback
- if content_esc:
- content_pos = string.find(content_esc, thought_start)
- if content_pos != -1:
- thought_end = content_pos
- content_start = content_pos + len(content_esc)
- else:
- thought_end = len(string)
- content_start = len(string)
- else:
- thought_end = len(string)
- content_start = len(string)
- else:
- thought_end = end_pos
- if content_esc:
- content_pos = string.find(content_esc, end_pos)
- content_start = content_pos + len(content_esc) if content_pos != -1 else end_pos + len(end_esc)
- else:
- content_start = end_pos + len(end_esc)
-
- return string[thought_start:thought_end], string[content_start:]
-
- return None, string
+ """Extract thinking blocks from the beginning of an HTML-escaped string."""
+ return extract_reasoning(string, html_escaped=True)
def build_thinking_block(thinking_content, message_id, has_remaining_content):
diff --git a/modules/reasoning.py b/modules/reasoning.py
new file mode 100644
index 00000000..12f8553d
--- /dev/null
+++ b/modules/reasoning.py
@@ -0,0 +1,71 @@
+import html as html_module
+
+# Thinking block format definitions: (start_tag, end_tag, content_start_tag)
+# Use None for start_tag to match from beginning (end-only formats should be listed last)
+THINKING_FORMATS = [
+ ('', '', None),
+ ('<|channel|>analysis<|message|>', '<|end|>', '<|start|>assistant<|channel|>final<|message|>'),
+ ('', '', None),
+ ('<|think|>', '<|end|>', '<|content|>'), # Solar Open
+ ('Thinking Process:', '', None), # Qwen3.5 verbose thinking outside tags
+ (None, '', None), # End-only variant (e.g., Qwen3-next)
+]
+
+
+def extract_reasoning(text, html_escaped=False):
+ """Extract reasoning/thinking blocks from the beginning of a string.
+
+ When html_escaped=True, tags are HTML-escaped before searching
+ (for use on already-escaped UI strings).
+
+ Returns (reasoning_content, final_content) where reasoning_content is
+ None if no thinking block is found.
+ """
+ if not text:
+ return None, text
+
+ esc = html_module.escape if html_escaped else lambda s: s
+
+ for start_tag, end_tag, content_tag in THINKING_FORMATS:
+ end_esc = esc(end_tag)
+ content_esc = esc(content_tag) if content_tag else None
+
+ if start_tag is None:
+ # End-only format: require end tag, start from beginning
+ end_pos = text.find(end_esc)
+ if end_pos == -1:
+ continue
+ thought_start = 0
+ else:
+ # Normal format: require start tag
+ start_esc = esc(start_tag)
+ start_pos = text.find(start_esc)
+ if start_pos == -1:
+ continue
+ thought_start = start_pos + len(start_esc)
+ end_pos = text.find(end_esc, thought_start)
+
+ if end_pos == -1:
+ # End tag missing - check if content tag can serve as fallback
+ if content_esc:
+ content_pos = text.find(content_esc, thought_start)
+ if content_pos != -1:
+ thought_end = content_pos
+ content_start = content_pos + len(content_esc)
+ else:
+ thought_end = len(text)
+ content_start = len(text)
+ else:
+ thought_end = len(text)
+ content_start = len(text)
+ else:
+ thought_end = end_pos
+ if content_esc:
+ content_pos = text.find(content_esc, end_pos)
+ content_start = content_pos + len(content_esc) if content_pos != -1 else end_pos + len(end_esc)
+ else:
+ content_start = end_pos + len(end_esc)
+
+ return text[thought_start:thought_end], text[content_start:]
+
+ return None, text