From d0ac58ad3135776ec6e108f82a5c64a9662cc122 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 5 Mar 2026 21:25:03 -0800 Subject: [PATCH] API: Fix tool_calls placement and other response compatibility issues --- docs/12 - OpenAI API.md | 4 ++-- extensions/openai/completions.py | 5 ++--- extensions/openai/script.py | 4 ++-- 3 files changed, 6 insertions(+), 7 deletions(-) diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md index ebe24797..637ccced 100644 --- a/docs/12 - OpenAI API.md +++ b/docs/12 - OpenAI API.md @@ -456,11 +456,11 @@ for _ in range(10): messages.append({ "role": "assistant", "content": choice["message"]["content"], - "tool_calls": choice["tool_calls"], + "tool_calls": choice["message"]["tool_calls"], }) # Execute each tool and add results to history - for tool_call in choice["tool_calls"]: + for tool_call in choice["message"]["tool_calls"]: name = tool_call["function"]["name"] arguments = json.loads(tool_call["function"]["arguments"]) result = execute_tool(name, arguments) diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index 5187343f..1538b87a 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -370,8 +370,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p resp_list: [{ "index": 0, "finish_reason": stop_reason, - "message": {"role": "assistant", "content": answer}, - "tool_calls": tool_calls + "message": {"role": "assistant", "content": answer, "tool_calls": tool_calls}, }], "usage": { "prompt_tokens": token_count, @@ -389,7 +388,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_event=None): - object_type = 'text_completion.chunk' if stream else 'text_completion' + object_type = 'text_completion' created_time = int(time.time()) cmpl_id = "conv-%d" % (int(time.time() * 1000000000)) resp_list = 'data' if is_legacy else 'choices' diff --git a/extensions/openai/script.py b/extensions/openai/script.py index 521d2cb4..bfb6fd54 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -359,7 +359,7 @@ async def handle_load_model(request_data: LoadModelRequest): return JSONResponse(content="OK") except Exception: traceback.print_exc() - return HTTPException(status_code=400, detail="Failed to load the model.") + raise HTTPException(status_code=400, detail="Failed to load the model.") @app.post("/v1/internal/model/unload", dependencies=check_admin_key) @@ -380,7 +380,7 @@ async def handle_load_loras(request_data: LoadLorasRequest): return JSONResponse(content="OK") except Exception: traceback.print_exc() - return HTTPException(status_code=400, detail="Failed to apply the LoRA(s).") + raise HTTPException(status_code=400, detail="Failed to apply the LoRA(s).") @app.post("/v1/internal/lora/unload", dependencies=check_admin_key)