diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py
index 04e644d6..03c4b03e 100644
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@@ -310,28 +310,41 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
     requested_model = generate_params.pop('model')
     logprob_proc = generate_params.pop('logprob_proc', None)
 
-    def chat_streaming_chunk(content, chunk_tool_calls=None):
+    def chat_streaming_chunk(content=None, chunk_tool_calls=None, include_role=False):
         # begin streaming
+        delta = {}
+        if include_role:
+            delta['role'] = 'assistant'
+            delta['refusal'] = None
+        if content is not None:
+            delta['content'] = content
+        if chunk_tool_calls:
+            delta['tool_calls'] = chunk_tool_calls
+
         chunk = {
             "id": cmpl_id,
             "object": object_type,
             "created": created_time,
             "model": shared.model_name,
+            "system_fingerprint": None,
             resp_list: [{
                 "index": 0,
                 "finish_reason": None,
-                "delta": {'role': 'assistant', 'content': content, 'tool_calls': chunk_tool_calls},
+                "delta": delta,
+                "logprobs": None,
             }],
         }
 
         if logprob_proc:  # not official for chat yet
             top_logprobs = convert_logprobs_to_tiktoken(model=requested_model, logprobs=logprob_proc.token_alternatives)
             chunk[resp_list][0]["logprobs"] = {'top_logprobs': [top_logprobs]}
-        # else:
-        #    chunk[resp_list][0]["logprobs"] = None
 
         return chunk
 
+    # Check if usage should be included in streaming chunks per OpenAI spec
+    stream_options = body.get('stream_options')
+    include_usage = bool(stream_options) and bool(stream_options.get('include_usage') if isinstance(stream_options, dict) else getattr(stream_options, 'include_usage', False))
+
     # generate reply #######################################
     if prompt_only:
         prompt = generate_chat_prompt(user_input, generate_params, _continue=continue_)
@@ -339,7 +352,10 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
         return
 
     if stream:
-        yield chat_streaming_chunk('')
+        chunk = chat_streaming_chunk('', include_role=True)
+        if include_usage:
+            chunk['usage'] = None
+        yield chunk
 
     generator = generate_chat_reply(
         user_input, generate_params, regenerate=False, _continue=continue_, loading_message=False)
@@ -372,6 +388,8 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
                 continue
 
             chunk = chat_streaming_chunk(new_content)
+            if include_usage:
+                chunk['usage'] = None
 
             seen_content = answer
             yield chunk
@@ -389,25 +407,42 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
         stop_reason = "length"
 
     if stream:
-        chunk = chat_streaming_chunk('', tool_calls)
+        chunk = chat_streaming_chunk(chunk_tool_calls=tool_calls)
         chunk[resp_list][0]['finish_reason'] = stop_reason
-        chunk['usage'] = {
+        usage = {
             "prompt_tokens": token_count,
             "completion_tokens": completion_token_count,
             "total_tokens": token_count + completion_token_count
         }
 
-        yield chunk
+        if include_usage:
+            chunk['usage'] = None
+            yield chunk
+            # Separate usage-only chunk with choices: [] per OpenAI spec
+            yield {
+                "id": cmpl_id,
+                "object": object_type,
+                "created": created_time,
+                "model": shared.model_name,
+                "system_fingerprint": None,
+                resp_list: [],
+                "usage": usage
+            }
+        else:
+            chunk['usage'] = usage
+            yield chunk
     else:
         resp = {
             "id": cmpl_id,
             "object": object_type,
             "created": created_time,
             "model": shared.model_name,
+            "system_fingerprint": None,
             resp_list: [{
                 "index": 0,
                 "finish_reason": stop_reason,
-                "message": {"role": "assistant", "content": answer, **({"tool_calls": tool_calls} if tool_calls else {})},
+                "message": {"role": "assistant", "refusal": None, "content": answer, **({"tool_calls": tool_calls} if tool_calls else {})},
+                "logprobs": None,
             }],
             "usage": {
                 "prompt_tokens": token_count,
@@ -418,8 +453,6 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
         if logprob_proc:  # not official for chat yet
             top_logprobs = convert_logprobs_to_tiktoken(model=requested_model, logprobs=logprob_proc.token_alternatives)
             resp[resp_list][0]["logprobs"] = {'top_logprobs': [top_logprobs]}
-        # else:
-        #     resp[resp_list][0]["logprobs"] = None
 
         yield resp
 
@@ -427,7 +460,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
 def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_event=None):
     object_type = 'text_completion'
     created_time = int(time.time())
-    cmpl_id = "conv-%d" % (int(time.time() * 1000000000))
+    cmpl_id = "cmpl-%d" % (int(time.time() * 1000000000))
     resp_list = 'data' if is_legacy else 'choices'
 
     prompt_str = 'context' if is_legacy else 'prompt'
@@ -548,6 +581,7 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
             "object": object_type,
             "created": created_time,
             "model": shared.model_name,
+            "system_fingerprint": None,
             resp_list: resp_list_data,
             "usage": {
                 "prompt_tokens": total_prompt_token_count,
@@ -572,6 +606,10 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
         prefix = prompt if echo else ''
         token_count = len(encode(prompt)[0])
 
+        # Check if usage should be included in streaming chunks per OpenAI spec
+        stream_options = body.get('stream_options')
+        include_usage = bool(stream_options) and bool(stream_options.get('include_usage') if isinstance(stream_options, dict) else getattr(stream_options, 'include_usage', False))
+
         def text_streaming_chunk(content):
             # begin streaming
             if logprob_proc:
@@ -587,6 +625,7 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
                 "object": object_type,
                 "created": created_time,
                 "model": shared.model_name,
+                "system_fingerprint": None,
                 resp_list: [{
                     "index": 0,
                     "finish_reason": None,
@@ -597,7 +636,10 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
 
             return chunk
 
-        yield text_streaming_chunk(prefix)
+        chunk = text_streaming_chunk(prefix)
+        if include_usage:
+            chunk['usage'] = None
+        yield chunk
 
         # generate reply #######################################
         debug_msg({'prompt': prompt, 'generate_params': generate_params})
@@ -617,6 +659,8 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
 
             seen_content = answer
             chunk = text_streaming_chunk(new_content)
+            if include_usage:
+                chunk['usage'] = None
             yield chunk
 
         completion_token_count = len(encode(answer)[0])
@@ -626,13 +670,28 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
 
         chunk = text_streaming_chunk(suffix)
         chunk[resp_list][0]["finish_reason"] = stop_reason
-        chunk["usage"] = {
+        usage = {
             "prompt_tokens": token_count,
             "completion_tokens": completion_token_count,
             "total_tokens": token_count + completion_token_count
         }
 
-        yield chunk
+        if include_usage:
+            chunk['usage'] = None
+            yield chunk
+            # Separate usage-only chunk with choices: [] per OpenAI spec
+            yield {
+                "id": cmpl_id,
+                "object": object_type,
+                "created": created_time,
+                "model": shared.model_name,
+                "system_fingerprint": None,
+                resp_list: [],
+                "usage": usage
+            }
+        else:
+            chunk["usage"] = usage
+            yield chunk
 
 
 def chat_completions(body: dict, is_legacy: bool = False, stop_event=None) -> dict:
diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py
index 2156074b..078bd201 100644
--- a/extensions/openai/typing.py
+++ b/extensions/openai/typing.py
@@ -99,6 +99,10 @@ class ToolCall(BaseModel):
     function: FunctionCall
 
 
+class StreamOptions(BaseModel):
+    include_usage: bool | None = False
+
+
 class CompletionRequestParams(BaseModel):
     model: str | None = Field(default=None, description="Unused parameter. To change the model, use the /v1/internal/model/load endpoint.")
     prompt: str | List[str] | None = Field(default=None, description="Text prompt for completion. Can also use 'messages' format for multimodal.")
@@ -113,6 +117,7 @@ class CompletionRequestParams(BaseModel):
     presence_penalty: float | None = shared.args.presence_penalty
     stop: str | List[str] | None = None
     stream: bool | None = False
+    stream_options: StreamOptions | None = None
     suffix: str | None = None
     temperature: float | None = shared.args.temperature
     top_p: float | None = shared.args.top_p
@@ -151,6 +156,7 @@ class ChatCompletionRequestParams(BaseModel):
     presence_penalty: float | None = shared.args.presence_penalty
     stop: str | List[str] | None = None
     stream: bool | None = False
+    stream_options: StreamOptions | None = None
     temperature: float | None = shared.args.temperature
     top_p: float | None = shared.args.top_p
     user: str | None = Field(default=None, description="Unused parameter.")