From 2d0cc7726eae0d98e52352b740397d86cdd2973e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 12 Mar 2026 16:29:46 -0300 Subject: [PATCH] API: Add reasoning_content field to non-streaming chat completions Extract thinking/reasoning blocks (e.g. ...) into a separate reasoning_content field on the assistant message, matching the convention used by DeepSeek, llama.cpp, and SGLang. --- extensions/openai/completions.py | 11 ++++- modules/html_generator.py | 62 ++-------------------------- modules/reasoning.py | 71 ++++++++++++++++++++++++++++++++ 3 files changed, 84 insertions(+), 60 deletions(-) create mode 100644 modules/reasoning.py diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index eb1702a8..0eb0cd27 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -13,6 +13,7 @@ from extensions.openai.errors import InvalidRequestError from extensions.openai.typing import ToolDefinition from extensions.openai.utils import debug_msg, getToolCallId, parseToolCall from modules import shared +from modules.reasoning import extract_reasoning from modules.chat import ( generate_chat_prompt, generate_chat_reply, @@ -553,6 +554,14 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p else: yield chunk else: + reasoning, content = extract_reasoning(answer) if not tool_calls else (None, answer) + message = { + "role": "assistant", + "refusal": None, + "content": None if tool_calls else content, + **({"reasoning_content": reasoning} if reasoning else {}), + **({"tool_calls": tool_calls} if tool_calls else {}), + } resp = { "id": cmpl_id, "object": object_type, @@ -562,7 +571,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p resp_list: [{ "index": 0, "finish_reason": stop_reason, - "message": {"role": "assistant", "refusal": None, "content": None if tool_calls else answer, **({"tool_calls": tool_calls} if tool_calls else {})}, + "message": message, "logprobs": None, }], "usage": { diff --git a/modules/html_generator.py b/modules/html_generator.py index 472a9ea0..4d9904fb 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -10,6 +10,7 @@ import markdown from PIL import Image, ImageOps from modules import shared +from modules.reasoning import extract_reasoning from modules.sane_markdown_lists import SaneListExtension from modules.utils import get_available_chat_styles @@ -108,66 +109,9 @@ def replace_blockquote(m): return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') -# Thinking block format definitions: (start_tag, end_tag, content_start_tag) -# Use None for start_tag to match from beginning (end-only formats should be listed last) -THINKING_FORMATS = [ - ('', '', None), - ('<|channel|>analysis<|message|>', '<|end|>', '<|start|>assistant<|channel|>final<|message|>'), - ('', '', None), - ('<|think|>', '<|end|>', '<|content|>'), # Solar Open - ('Thinking Process:', '', None), # Qwen3.5 verbose thinking outside tags - (None, '', None), # End-only variant (e.g., Qwen3-next) -] - - def extract_thinking_block(string): - """Extract thinking blocks from the beginning of a string.""" - if not string: - return None, string - - for start_tag, end_tag, content_tag in THINKING_FORMATS: - end_esc = html.escape(end_tag) - content_esc = html.escape(content_tag) if content_tag else None - - if start_tag is None: - # End-only format: require end tag, start from beginning - end_pos = string.find(end_esc) - if end_pos == -1: - continue - thought_start = 0 - else: - # Normal format: require start tag - start_esc = html.escape(start_tag) - start_pos = string.find(start_esc) - if start_pos == -1: - continue - thought_start = start_pos + len(start_esc) - end_pos = string.find(end_esc, thought_start) - - if end_pos == -1: - # End tag missing - check if content tag can serve as fallback - if content_esc: - content_pos = string.find(content_esc, thought_start) - if content_pos != -1: - thought_end = content_pos - content_start = content_pos + len(content_esc) - else: - thought_end = len(string) - content_start = len(string) - else: - thought_end = len(string) - content_start = len(string) - else: - thought_end = end_pos - if content_esc: - content_pos = string.find(content_esc, end_pos) - content_start = content_pos + len(content_esc) if content_pos != -1 else end_pos + len(end_esc) - else: - content_start = end_pos + len(end_esc) - - return string[thought_start:thought_end], string[content_start:] - - return None, string + """Extract thinking blocks from the beginning of an HTML-escaped string.""" + return extract_reasoning(string, html_escaped=True) def build_thinking_block(thinking_content, message_id, has_remaining_content): diff --git a/modules/reasoning.py b/modules/reasoning.py new file mode 100644 index 00000000..12f8553d --- /dev/null +++ b/modules/reasoning.py @@ -0,0 +1,71 @@ +import html as html_module + +# Thinking block format definitions: (start_tag, end_tag, content_start_tag) +# Use None for start_tag to match from beginning (end-only formats should be listed last) +THINKING_FORMATS = [ + ('', '', None), + ('<|channel|>analysis<|message|>', '<|end|>', '<|start|>assistant<|channel|>final<|message|>'), + ('', '', None), + ('<|think|>', '<|end|>', '<|content|>'), # Solar Open + ('Thinking Process:', '', None), # Qwen3.5 verbose thinking outside tags + (None, '', None), # End-only variant (e.g., Qwen3-next) +] + + +def extract_reasoning(text, html_escaped=False): + """Extract reasoning/thinking blocks from the beginning of a string. + + When html_escaped=True, tags are HTML-escaped before searching + (for use on already-escaped UI strings). + + Returns (reasoning_content, final_content) where reasoning_content is + None if no thinking block is found. + """ + if not text: + return None, text + + esc = html_module.escape if html_escaped else lambda s: s + + for start_tag, end_tag, content_tag in THINKING_FORMATS: + end_esc = esc(end_tag) + content_esc = esc(content_tag) if content_tag else None + + if start_tag is None: + # End-only format: require end tag, start from beginning + end_pos = text.find(end_esc) + if end_pos == -1: + continue + thought_start = 0 + else: + # Normal format: require start tag + start_esc = esc(start_tag) + start_pos = text.find(start_esc) + if start_pos == -1: + continue + thought_start = start_pos + len(start_esc) + end_pos = text.find(end_esc, thought_start) + + if end_pos == -1: + # End tag missing - check if content tag can serve as fallback + if content_esc: + content_pos = text.find(content_esc, thought_start) + if content_pos != -1: + thought_end = content_pos + content_start = content_pos + len(content_esc) + else: + thought_end = len(text) + content_start = len(text) + else: + thought_end = len(text) + content_start = len(text) + else: + thought_end = end_pos + if content_esc: + content_pos = text.find(content_esc, end_pos) + content_start = content_pos + len(content_esc) if content_pos != -1 else end_pos + len(end_esc) + else: + content_start = end_pos + len(end_esc) + + return text[thought_start:thought_end], text[content_start:] + + return None, text