API: Add reasoning_content field to non-streaming chat completions

Extract thinking/reasoning blocks (e.g. <think>...</think>) into a separate reasoning_content field on the assistant message, matching the convention used by DeepSeek, llama.cpp, and SGLang.
2026-04-20 22:13:43 +00:00 · 2026-03-12 16:29:46 -03:00 · 2026-03-12 16:29:46 -03:00 · 2d0cc7726e
commit 2d0cc7726e
parent d45c9b3c59
3 changed files with 84 additions and 60 deletions
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@ -13,6 +13,7 @@ from extensions.openai.errors import InvalidRequestError
 from extensions.openai.typing import ToolDefinition
 from extensions.openai.utils import debug_msg, getToolCallId, parseToolCall
 from modules import shared
+from modules.reasoning import extract_reasoning
 from modules.chat import (
    generate_chat_prompt,
    generate_chat_reply,
@ -553,6 +554,14 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
        else:
            yield chunk
    else:
+        reasoning, content = extract_reasoning(answer) if not tool_calls else (None, answer)
+        message = {
+            "role": "assistant",
+            "refusal": None,
+            "content": None if tool_calls else content,
+            **({"reasoning_content": reasoning} if reasoning else {}),
+            **({"tool_calls": tool_calls} if tool_calls else {}),
+        }
        resp = {
            "id": cmpl_id,
            "object": object_type,
@ -562,7 +571,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
            resp_list: [{
                "index": 0,
                "finish_reason": stop_reason,
-                "message": {"role": "assistant", "refusal": None, "content": None if tool_calls else answer, **({"tool_calls": tool_calls} if tool_calls else {})},
+                "message": message,
                "logprobs": None,
            }],
            "usage": {
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@ -10,6 +10,7 @@ import markdown
 from PIL import Image, ImageOps

 from modules import shared
+from modules.reasoning import extract_reasoning
 from modules.sane_markdown_lists import SaneListExtension
 from modules.utils import get_available_chat_styles

@ -108,66 +109,9 @@ def replace_blockquote(m):
    return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')


-# Thinking block format definitions: (start_tag, end_tag, content_start_tag)
-# Use None for start_tag to match from beginning (end-only formats should be listed last)
-THINKING_FORMATS = [
-    ('<think>', '</think>', None),
-    ('<|channel|>analysis<|message|>', '<|end|>', '<|start|>assistant<|channel|>final<|message|>'),
-    ('<seed:think>', '</seed:think>', None),
-    ('<|think|>', '<|end|>', '<|content|>'),  # Solar Open
-    ('Thinking Process:', '</think>', None),  # Qwen3.5 verbose thinking outside tags
-    (None, '</think>', None),  # End-only variant (e.g., Qwen3-next)
-]
-
-
 def extract_thinking_block(string):
-    """Extract thinking blocks from the beginning of a string."""
-    if not string:
-        return None, string
-
-    for start_tag, end_tag, content_tag in THINKING_FORMATS:
-        end_esc = html.escape(end_tag)
-        content_esc = html.escape(content_tag) if content_tag else None
-
-        if start_tag is None:
-            # End-only format: require end tag, start from beginning
-            end_pos = string.find(end_esc)
-            if end_pos == -1:
-                continue
-            thought_start = 0
-        else:
-            # Normal format: require start tag
-            start_esc = html.escape(start_tag)
-            start_pos = string.find(start_esc)
-            if start_pos == -1:
-                continue
-            thought_start = start_pos + len(start_esc)
-            end_pos = string.find(end_esc, thought_start)
-
-        if end_pos == -1:
-            # End tag missing - check if content tag can serve as fallback
-            if content_esc:
-                content_pos = string.find(content_esc, thought_start)
-                if content_pos != -1:
-                    thought_end = content_pos
-                    content_start = content_pos + len(content_esc)
-                else:
-                    thought_end = len(string)
-                    content_start = len(string)
-            else:
-                thought_end = len(string)
-                content_start = len(string)
-        else:
-            thought_end = end_pos
-            if content_esc:
-                content_pos = string.find(content_esc, end_pos)
-                content_start = content_pos + len(content_esc) if content_pos != -1 else end_pos + len(end_esc)
-            else:
-                content_start = end_pos + len(end_esc)
-
-        return string[thought_start:thought_end], string[content_start:]
-
-    return None, string
+    """Extract thinking blocks from the beginning of an HTML-escaped string."""
+    return extract_reasoning(string, html_escaped=True)


 def build_thinking_block(thinking_content, message_id, has_remaining_content):
--- a/modules/reasoning.py
+++ b/modules/reasoning.py
@ -0,0 +1,71 @@
+import html as html_module
+
+# Thinking block format definitions: (start_tag, end_tag, content_start_tag)
+# Use None for start_tag to match from beginning (end-only formats should be listed last)
+THINKING_FORMATS = [
+    ('<think>', '</think>', None),
+    ('<|channel|>analysis<|message|>', '<|end|>', '<|start|>assistant<|channel|>final<|message|>'),
+    ('<seed:think>', '</seed:think>', None),
+    ('<|think|>', '<|end|>', '<|content|>'),  # Solar Open
+    ('Thinking Process:', '</think>', None),  # Qwen3.5 verbose thinking outside tags
+    (None, '</think>', None),  # End-only variant (e.g., Qwen3-next)
+]
+
+
+def extract_reasoning(text, html_escaped=False):
+    """Extract reasoning/thinking blocks from the beginning of a string.
+
+    When html_escaped=True, tags are HTML-escaped before searching
+    (for use on already-escaped UI strings).
+
+    Returns (reasoning_content, final_content) where reasoning_content is
+    None if no thinking block is found.
+    """
+    if not text:
+        return None, text
+
+    esc = html_module.escape if html_escaped else lambda s: s
+
+    for start_tag, end_tag, content_tag in THINKING_FORMATS:
+        end_esc = esc(end_tag)
+        content_esc = esc(content_tag) if content_tag else None
+
+        if start_tag is None:
+            # End-only format: require end tag, start from beginning
+            end_pos = text.find(end_esc)
+            if end_pos == -1:
+                continue
+            thought_start = 0
+        else:
+            # Normal format: require start tag
+            start_esc = esc(start_tag)
+            start_pos = text.find(start_esc)
+            if start_pos == -1:
+                continue
+            thought_start = start_pos + len(start_esc)
+            end_pos = text.find(end_esc, thought_start)
+
+        if end_pos == -1:
+            # End tag missing - check if content tag can serve as fallback
+            if content_esc:
+                content_pos = text.find(content_esc, thought_start)
+                if content_pos != -1:
+                    thought_end = content_pos
+                    content_start = content_pos + len(content_esc)
+                else:
+                    thought_end = len(text)
+                    content_start = len(text)
+            else:
+                thought_end = len(text)
+                content_start = len(text)
+        else:
+            thought_end = end_pos
+            if content_esc:
+                content_pos = text.find(content_esc, end_pos)
+                content_start = content_pos + len(content_esc) if content_pos != -1 else end_pos + len(end_esc)
+            else:
+                content_start = end_pos + len(end_esc)
+
+        return text[thought_start:thought_end], text[content_start:]
+
+    return None, text