mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-07 07:33:47 +00:00
Fix GPT-OSS tag leak during streaming between thinking and tool calls
This commit is contained in:
parent
193424cc93
commit
cb511928e2
2 changed files with 19 additions and 6 deletions
|
|
@ -73,9 +73,16 @@ def extract_reasoning(text, html_escaped=False):
|
|||
if content_pos != -1:
|
||||
content_start = content_pos + len(content_esc)
|
||||
else:
|
||||
# Content tag not present — fall back to content after
|
||||
# end_tag (e.g. GPT-OSS tool calls skip the final channel).
|
||||
content_start = end_pos + len(end_esc)
|
||||
# Content tag not present yet. In GPT-OSS the region
|
||||
# between <|end|> and the content tag contains internal
|
||||
# markup (<|start|>assistant…) that must not be shown.
|
||||
# Suppress it to prevent tag leaks during streaming.
|
||||
remainder = text[end_pos + len(end_esc):].lstrip()
|
||||
framing_token = esc('<|start|>')
|
||||
if not remainder or remainder.startswith(framing_token) or framing_token.startswith(remainder):
|
||||
content_start = len(text)
|
||||
else:
|
||||
content_start = end_pos + len(end_esc)
|
||||
else:
|
||||
content_start = end_pos + len(end_esc)
|
||||
|
||||
|
|
|
|||
|
|
@ -638,9 +638,15 @@ def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = Fa
|
|||
# Strip thinking blocks so tool-call syntax inside <think> is ignored.
|
||||
original_answer = answer
|
||||
_, answer = extract_reasoning(answer)
|
||||
# Offset between original and stripped text, used to map start_pos
|
||||
# back to the original string when returning a prefix.
|
||||
reasoning_offset = len(original_answer) - len(answer)
|
||||
# Reasoning extraction returns empty content when GPT-OSS internal
|
||||
# markup (<|start|>assistant…) follows the thinking block without a
|
||||
# content tag. Fall back to the full text so tool-call markers can
|
||||
# be found.
|
||||
if not answer.strip():
|
||||
answer = original_answer
|
||||
reasoning_offset = 0
|
||||
else:
|
||||
reasoning_offset = len(original_answer) - len(answer)
|
||||
|
||||
matches = []
|
||||
start_pos = None
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue