Fix GPT-OSS tag leak during streaming between thinking and tool calls

This commit is contained in:
oobabooga 2026-04-06 12:06:28 -07:00
parent 193424cc93
commit cb511928e2
2 changed files with 19 additions and 6 deletions

View file

@ -73,9 +73,16 @@ def extract_reasoning(text, html_escaped=False):
if content_pos != -1:
content_start = content_pos + len(content_esc)
else:
# Content tag not present — fall back to content after
# end_tag (e.g. GPT-OSS tool calls skip the final channel).
content_start = end_pos + len(end_esc)
# Content tag not present yet. In GPT-OSS the region
# between <|end|> and the content tag contains internal
# markup (<|start|>assistant…) that must not be shown.
# Suppress it to prevent tag leaks during streaming.
remainder = text[end_pos + len(end_esc):].lstrip()
framing_token = esc('<|start|>')
if not remainder or remainder.startswith(framing_token) or framing_token.startswith(remainder):
content_start = len(text)
else:
content_start = end_pos + len(end_esc)
else:
content_start = end_pos + len(end_esc)