mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-03-18 03:14:39 +01:00
API: Add reasoning_content field to non-streaming chat completions
Extract thinking/reasoning blocks (e.g. <think>...</think>) into a separate reasoning_content field on the assistant message, matching the convention used by DeepSeek, llama.cpp, and SGLang.
This commit is contained in:
parent
d45c9b3c59
commit
2d0cc7726e
|
|
@ -13,6 +13,7 @@ from extensions.openai.errors import InvalidRequestError
|
|||
from extensions.openai.typing import ToolDefinition
|
||||
from extensions.openai.utils import debug_msg, getToolCallId, parseToolCall
|
||||
from modules import shared
|
||||
from modules.reasoning import extract_reasoning
|
||||
from modules.chat import (
|
||||
generate_chat_prompt,
|
||||
generate_chat_reply,
|
||||
|
|
@ -553,6 +554,14 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
|
|||
else:
|
||||
yield chunk
|
||||
else:
|
||||
reasoning, content = extract_reasoning(answer) if not tool_calls else (None, answer)
|
||||
message = {
|
||||
"role": "assistant",
|
||||
"refusal": None,
|
||||
"content": None if tool_calls else content,
|
||||
**({"reasoning_content": reasoning} if reasoning else {}),
|
||||
**({"tool_calls": tool_calls} if tool_calls else {}),
|
||||
}
|
||||
resp = {
|
||||
"id": cmpl_id,
|
||||
"object": object_type,
|
||||
|
|
@ -562,7 +571,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
|
|||
resp_list: [{
|
||||
"index": 0,
|
||||
"finish_reason": stop_reason,
|
||||
"message": {"role": "assistant", "refusal": None, "content": None if tool_calls else answer, **({"tool_calls": tool_calls} if tool_calls else {})},
|
||||
"message": message,
|
||||
"logprobs": None,
|
||||
}],
|
||||
"usage": {
|
||||
|
|
|
|||
|
|
@ -10,6 +10,7 @@ import markdown
|
|||
from PIL import Image, ImageOps
|
||||
|
||||
from modules import shared
|
||||
from modules.reasoning import extract_reasoning
|
||||
from modules.sane_markdown_lists import SaneListExtension
|
||||
from modules.utils import get_available_chat_styles
|
||||
|
||||
|
|
@ -108,66 +109,9 @@ def replace_blockquote(m):
|
|||
return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
|
||||
|
||||
|
||||
# Thinking block format definitions: (start_tag, end_tag, content_start_tag)
|
||||
# Use None for start_tag to match from beginning (end-only formats should be listed last)
|
||||
THINKING_FORMATS = [
|
||||
('<think>', '</think>', None),
|
||||
('<|channel|>analysis<|message|>', '<|end|>', '<|start|>assistant<|channel|>final<|message|>'),
|
||||
('<seed:think>', '</seed:think>', None),
|
||||
('<|think|>', '<|end|>', '<|content|>'), # Solar Open
|
||||
('Thinking Process:', '</think>', None), # Qwen3.5 verbose thinking outside tags
|
||||
(None, '</think>', None), # End-only variant (e.g., Qwen3-next)
|
||||
]
|
||||
|
||||
|
||||
def extract_thinking_block(string):
|
||||
"""Extract thinking blocks from the beginning of a string."""
|
||||
if not string:
|
||||
return None, string
|
||||
|
||||
for start_tag, end_tag, content_tag in THINKING_FORMATS:
|
||||
end_esc = html.escape(end_tag)
|
||||
content_esc = html.escape(content_tag) if content_tag else None
|
||||
|
||||
if start_tag is None:
|
||||
# End-only format: require end tag, start from beginning
|
||||
end_pos = string.find(end_esc)
|
||||
if end_pos == -1:
|
||||
continue
|
||||
thought_start = 0
|
||||
else:
|
||||
# Normal format: require start tag
|
||||
start_esc = html.escape(start_tag)
|
||||
start_pos = string.find(start_esc)
|
||||
if start_pos == -1:
|
||||
continue
|
||||
thought_start = start_pos + len(start_esc)
|
||||
end_pos = string.find(end_esc, thought_start)
|
||||
|
||||
if end_pos == -1:
|
||||
# End tag missing - check if content tag can serve as fallback
|
||||
if content_esc:
|
||||
content_pos = string.find(content_esc, thought_start)
|
||||
if content_pos != -1:
|
||||
thought_end = content_pos
|
||||
content_start = content_pos + len(content_esc)
|
||||
else:
|
||||
thought_end = len(string)
|
||||
content_start = len(string)
|
||||
else:
|
||||
thought_end = len(string)
|
||||
content_start = len(string)
|
||||
else:
|
||||
thought_end = end_pos
|
||||
if content_esc:
|
||||
content_pos = string.find(content_esc, end_pos)
|
||||
content_start = content_pos + len(content_esc) if content_pos != -1 else end_pos + len(end_esc)
|
||||
else:
|
||||
content_start = end_pos + len(end_esc)
|
||||
|
||||
return string[thought_start:thought_end], string[content_start:]
|
||||
|
||||
return None, string
|
||||
"""Extract thinking blocks from the beginning of an HTML-escaped string."""
|
||||
return extract_reasoning(string, html_escaped=True)
|
||||
|
||||
|
||||
def build_thinking_block(thinking_content, message_id, has_remaining_content):
|
||||
|
|
|
|||
71
modules/reasoning.py
Normal file
71
modules/reasoning.py
Normal file
|
|
@ -0,0 +1,71 @@
|
|||
import html as html_module
|
||||
|
||||
# Thinking block format definitions: (start_tag, end_tag, content_start_tag)
|
||||
# Use None for start_tag to match from beginning (end-only formats should be listed last)
|
||||
THINKING_FORMATS = [
|
||||
('<think>', '</think>', None),
|
||||
('<|channel|>analysis<|message|>', '<|end|>', '<|start|>assistant<|channel|>final<|message|>'),
|
||||
('<seed:think>', '</seed:think>', None),
|
||||
('<|think|>', '<|end|>', '<|content|>'), # Solar Open
|
||||
('Thinking Process:', '</think>', None), # Qwen3.5 verbose thinking outside tags
|
||||
(None, '</think>', None), # End-only variant (e.g., Qwen3-next)
|
||||
]
|
||||
|
||||
|
||||
def extract_reasoning(text, html_escaped=False):
|
||||
"""Extract reasoning/thinking blocks from the beginning of a string.
|
||||
|
||||
When html_escaped=True, tags are HTML-escaped before searching
|
||||
(for use on already-escaped UI strings).
|
||||
|
||||
Returns (reasoning_content, final_content) where reasoning_content is
|
||||
None if no thinking block is found.
|
||||
"""
|
||||
if not text:
|
||||
return None, text
|
||||
|
||||
esc = html_module.escape if html_escaped else lambda s: s
|
||||
|
||||
for start_tag, end_tag, content_tag in THINKING_FORMATS:
|
||||
end_esc = esc(end_tag)
|
||||
content_esc = esc(content_tag) if content_tag else None
|
||||
|
||||
if start_tag is None:
|
||||
# End-only format: require end tag, start from beginning
|
||||
end_pos = text.find(end_esc)
|
||||
if end_pos == -1:
|
||||
continue
|
||||
thought_start = 0
|
||||
else:
|
||||
# Normal format: require start tag
|
||||
start_esc = esc(start_tag)
|
||||
start_pos = text.find(start_esc)
|
||||
if start_pos == -1:
|
||||
continue
|
||||
thought_start = start_pos + len(start_esc)
|
||||
end_pos = text.find(end_esc, thought_start)
|
||||
|
||||
if end_pos == -1:
|
||||
# End tag missing - check if content tag can serve as fallback
|
||||
if content_esc:
|
||||
content_pos = text.find(content_esc, thought_start)
|
||||
if content_pos != -1:
|
||||
thought_end = content_pos
|
||||
content_start = content_pos + len(content_esc)
|
||||
else:
|
||||
thought_end = len(text)
|
||||
content_start = len(text)
|
||||
else:
|
||||
thought_end = len(text)
|
||||
content_start = len(text)
|
||||
else:
|
||||
thought_end = end_pos
|
||||
if content_esc:
|
||||
content_pos = text.find(content_esc, end_pos)
|
||||
content_start = content_pos + len(content_esc) if content_pos != -1 else end_pos + len(end_esc)
|
||||
else:
|
||||
content_start = end_pos + len(end_esc)
|
||||
|
||||
return text[thought_start:thought_end], text[content_start:]
|
||||
|
||||
return None, text
|
||||
Loading…
Reference in a new issue