Fix GPT-OSS tag leak during streaming between thinking and tool calls

2026-04-09 00:23:38 +00:00 · 2026-04-06 12:06:28 -07:00 · 2026-04-06 12:06:28 -07:00 · cb511928e2
commit cb511928e2
parent 193424cc93
2 changed files with 19 additions and 6 deletions
--- a/modules/reasoning.py
+++ b/modules/reasoning.py
@ -73,9 +73,16 @@ def extract_reasoning(text, html_escaped=False):
                if content_pos != -1:
                    content_start = content_pos + len(content_esc)
                else:
-                    # Content tag not present — fall back to content after
-                    # end_tag (e.g. GPT-OSS tool calls skip the final channel).
-                    content_start = end_pos + len(end_esc)
+                    # Content tag not present yet.  In GPT-OSS the region
+                    # between <|end|> and the content tag contains internal
+                    # markup (<|start|>assistant…) that must not be shown.
+                    # Suppress it to prevent tag leaks during streaming.
+                    remainder = text[end_pos + len(end_esc):].lstrip()
+                    framing_token = esc('<|start|>')
+                    if not remainder or remainder.startswith(framing_token) or framing_token.startswith(remainder):
+                        content_start = len(text)
+                    else:
+                        content_start = end_pos + len(end_esc)
            else:
                content_start = end_pos + len(end_esc)