diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py
index 27defe42..51427050 100644
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@@ -417,7 +417,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
         logprob_proc.token_alternatives_history.clear()
     chat_logprobs_offset = [0]  # mutable for closure access in streaming
 
-    def chat_streaming_chunk(content=None, chunk_tool_calls=None, include_role=False):
+    def chat_streaming_chunk(content=None, chunk_tool_calls=None, include_role=False, reasoning_content=None):
         # begin streaming
         delta = {}
         if include_role:
@@ -425,6 +425,8 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
             delta['refusal'] = None
         if content is not None:
             delta['content'] = content
+        if reasoning_content is not None:
+            delta['reasoning_content'] = reasoning_content
         if chunk_tool_calls:
             delta['tool_calls'] = chunk_tool_calls
 
@@ -477,6 +479,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
 
     answer = ''
     seen_content = ''
+    seen_reasoning = ''
 
     tool_calls = []
     end_last_tool_call = 0
@@ -508,17 +511,31 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
             break
 
         if stream:
-            len_seen = len(seen_content)
-            new_content = answer[len_seen:]
+            # Strip reasoning/thinking blocks so only final content is streamed.
+            # Reasoning is emitted separately as reasoning_content deltas.
+            reasoning, content = extract_reasoning(answer)
+            if reasoning is not None:
+                new_reasoning = reasoning[len(seen_reasoning):]
+                new_content = content[len(seen_content):]
+            else:
+                new_reasoning = None
+                new_content = answer[len(seen_content):]
 
-            if not new_content or chr(0xfffd) in new_content:  # partial unicode character, don't send it yet.
+            if (not new_content and not new_reasoning) or chr(0xfffd) in (new_content or '') + (new_reasoning or ''):
                 continue
 
-            chunk = chat_streaming_chunk(new_content)
+            chunk = chat_streaming_chunk(
+                content=new_content if new_content else None,
+                reasoning_content=new_reasoning if new_reasoning else None,
+            )
             if include_usage:
                 chunk['usage'] = None
 
-            seen_content = answer
+            if reasoning is not None:
+                seen_reasoning = reasoning
+                seen_content = content
+            else:
+                seen_content = answer
             yield chunk
 
     token_count = shared.model.last_prompt_token_count if hasattr(shared.model, 'last_prompt_token_count') else 0
diff --git a/modules/reasoning.py b/modules/reasoning.py
index 708ee55a..3a9ab546 100644
--- a/modules/reasoning.py
+++ b/modules/reasoning.py
@@ -8,7 +8,7 @@ THINKING_FORMATS = [
     ('<|channel|>commentary<|message|>', '<|end|>', '<|start|>assistant<|channel|>final<|message|>'),
     ('<seed:think>', '</seed:think>', None),
     ('<|think|>', '<|end|>', '<|content|>'),  # Solar Open
-    ('Thinking Process:', '</think>', None),  # Qwen3.5 verbose thinking outside tags
+    # ('Thinking Process:', '</think>', None),  # Qwen3.5 verbose thinking outside tags -- removed: too prone to false positives in streaming
     (None, '</think>', None),  # End-only variant (e.g., Qwen3-next)
 ]
 
@@ -42,6 +42,12 @@ def extract_reasoning(text, html_escaped=False):
             start_esc = esc(start_tag)
             start_pos = text.find(start_esc)
             if start_pos == -1:
+                # During streaming, the start tag may be arriving partially.
+                # If the text is a prefix of a start tag, return empty content
+                # to prevent the partial tag from leaking.
+                stripped = text.strip()
+                if stripped and start_esc.startswith(stripped):
+                    return '', ''
                 continue
             thought_start = start_pos + len(start_esc)
             end_pos = text.find(end_esc, thought_start)
@@ -63,7 +69,13 @@ def extract_reasoning(text, html_escaped=False):
             thought_end = end_pos
             if content_esc:
                 content_pos = text.find(content_esc, end_pos)
-                content_start = content_pos + len(content_esc) if content_pos != -1 else end_pos + len(end_esc)
+                if content_pos != -1:
+                    content_start = content_pos + len(content_esc)
+                else:
+                    # Content tag expected but not yet present (e.g. partial
+                    # streaming) — suppress intermediate tags between end_tag
+                    # and content_tag so they don't leak as content.
+                    content_start = len(text)
             else:
                 content_start = end_pos + len(end_esc)