From cb511928e2be4b7ee234582ecba96801fccf94fe Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 6 Apr 2026 12:06:28 -0700 Subject: [PATCH] Fix GPT-OSS tag leak during streaming between thinking and tool calls --- modules/reasoning.py | 13 ++++++++++--- modules/tool_parsing.py | 12 +++++++++--- 2 files changed, 19 insertions(+), 6 deletions(-) diff --git a/modules/reasoning.py b/modules/reasoning.py index 4a7cfa79..2b260818 100644 --- a/modules/reasoning.py +++ b/modules/reasoning.py @@ -73,9 +73,16 @@ def extract_reasoning(text, html_escaped=False): if content_pos != -1: content_start = content_pos + len(content_esc) else: - # Content tag not present — fall back to content after - # end_tag (e.g. GPT-OSS tool calls skip the final channel). - content_start = end_pos + len(end_esc) + # Content tag not present yet. In GPT-OSS the region + # between <|end|> and the content tag contains internal + # markup (<|start|>assistant…) that must not be shown. + # Suppress it to prevent tag leaks during streaming. + remainder = text[end_pos + len(end_esc):].lstrip() + framing_token = esc('<|start|>') + if not remainder or remainder.startswith(framing_token) or framing_token.startswith(remainder): + content_start = len(text) + else: + content_start = end_pos + len(end_esc) else: content_start = end_pos + len(end_esc) diff --git a/modules/tool_parsing.py b/modules/tool_parsing.py index 7fcf58b7..aa3e0e95 100644 --- a/modules/tool_parsing.py +++ b/modules/tool_parsing.py @@ -638,9 +638,15 @@ def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = Fa # Strip thinking blocks so tool-call syntax inside is ignored. original_answer = answer _, answer = extract_reasoning(answer) - # Offset between original and stripped text, used to map start_pos - # back to the original string when returning a prefix. - reasoning_offset = len(original_answer) - len(answer) + # Reasoning extraction returns empty content when GPT-OSS internal + # markup (<|start|>assistant…) follows the thinking block without a + # content tag. Fall back to the full text so tool-call markers can + # be found. + if not answer.strip(): + answer = original_answer + reasoning_offset = 0 + else: + reasoning_offset = len(original_answer) - len(answer) matches = [] start_pos = None