From 02f18a1d65881cb3ed291050a191d8cf712b7115 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 23 Mar 2026 07:06:38 -0700 Subject: [PATCH] API: Add thinking block signature field, fix error codes, clean up logging --- modules/api/anthropic.py | 2 +- modules/api/embeddings.py | 4 ++-- modules/api/moderations.py | 2 -- modules/api/script.py | 9 +++++++-- 4 files changed, 10 insertions(+), 7 deletions(-) diff --git a/modules/api/anthropic.py b/modules/api/anthropic.py index 5fbf5caf..3fab09a6 100644 --- a/modules/api/anthropic.py +++ b/modules/api/anthropic.py @@ -326,7 +326,7 @@ class StreamConverter: "data": json.dumps({ "type": "content_block_start", "index": self.block_index, - "content_block": {"type": "thinking", "thinking": ""} + "content_block": {"type": "thinking", "thinking": "", "signature": ""} }) }) events.append({ diff --git a/modules/api/embeddings.py b/modules/api/embeddings.py index ad299c9d..16cf0482 100644 --- a/modules/api/embeddings.py +++ b/modules/api/embeddings.py @@ -39,14 +39,14 @@ def load_embedding_model(model: str): initialize_embedding_params() global embeddings_device, embeddings_model try: - print(f"Try embedding model: {model} on {embeddings_device}") + logger.info(f"Try embedding model: {model} on {embeddings_device}") if 'jina-embeddings' in model: embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True) # trust_remote_code is needed to use the encode method embeddings_model = embeddings_model.to(embeddings_device) else: embeddings_model = SentenceTransformer(model, device=embeddings_device) - print(f"Loaded embedding model: {model}") + logger.info(f"Loaded embedding model: {model}") except Exception as e: embeddings_model = None raise ServiceUnavailableError(f"Error: Failed to load embedding model: {model}", internal_message=repr(e)) diff --git a/modules/api/moderations.py b/modules/api/moderations.py index ac0539d6..a41763cf 100644 --- a/modules/api/moderations.py +++ b/modules/api/moderations.py @@ -64,6 +64,4 @@ def moderations(input): 'category_scores': category_scores, }]) - print(results) - return results diff --git a/modules/api/script.py b/modules/api/script.py index 5913c2c5..85f4974f 100644 --- a/modules/api/script.py +++ b/modules/api/script.py @@ -506,12 +506,17 @@ async def handle_load_model(request_data: LoadModelRequest): return JSONResponse(content="OK") except Exception: traceback.print_exc() - raise HTTPException(status_code=400, detail="Failed to load the model.") + raise HTTPException(status_code=500, detail="Failed to load the model.") @app.post("/v1/internal/model/unload", dependencies=check_admin_key) async def handle_unload_model(): - unload_model() + try: + unload_model() + return JSONResponse(content="OK") + except Exception: + traceback.print_exc() + raise HTTPException(status_code=500, detail="Failed to unload the model.") @app.get("/v1/internal/lora/list", response_model=LoraListResponse, dependencies=check_admin_key)