From 02f18a1d65881cb3ed291050a191d8cf712b7115 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 23 Mar 2026 07:06:38 -0700
Subject: [PATCH] API: Add thinking block signature field, fix error codes,
 clean up logging

---
 modules/api/anthropic.py   | 2 +-
 modules/api/embeddings.py  | 4 ++--
 modules/api/moderations.py | 2 --
 modules/api/script.py      | 9 +++++++--
 4 files changed, 10 insertions(+), 7 deletions(-)

diff --git a/modules/api/anthropic.py b/modules/api/anthropic.py
index 5fbf5caf..3fab09a6 100644
--- a/modules/api/anthropic.py
+++ b/modules/api/anthropic.py
@@ -326,7 +326,7 @@ class StreamConverter:
                     "data": json.dumps({
                         "type": "content_block_start",
                         "index": self.block_index,
-                        "content_block": {"type": "thinking", "thinking": ""}
+                        "content_block": {"type": "thinking", "thinking": "", "signature": ""}
                     })
                 })
             events.append({
diff --git a/modules/api/embeddings.py b/modules/api/embeddings.py
index ad299c9d..16cf0482 100644
--- a/modules/api/embeddings.py
+++ b/modules/api/embeddings.py
@@ -39,14 +39,14 @@ def load_embedding_model(model: str):
     initialize_embedding_params()
     global embeddings_device, embeddings_model
     try:
-        print(f"Try embedding model: {model} on {embeddings_device}")
+        logger.info(f"Try embedding model: {model} on {embeddings_device}")
         if 'jina-embeddings' in model:
             embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True)  # trust_remote_code is needed to use the encode method
             embeddings_model = embeddings_model.to(embeddings_device)
         else:
             embeddings_model = SentenceTransformer(model, device=embeddings_device)
 
-        print(f"Loaded embedding model: {model}")
+        logger.info(f"Loaded embedding model: {model}")
     except Exception as e:
         embeddings_model = None
         raise ServiceUnavailableError(f"Error: Failed to load embedding model: {model}", internal_message=repr(e))
diff --git a/modules/api/moderations.py b/modules/api/moderations.py
index ac0539d6..a41763cf 100644
--- a/modules/api/moderations.py
+++ b/modules/api/moderations.py
@@ -64,6 +64,4 @@ def moderations(input):
                 'category_scores': category_scores,
             }])
 
-    print(results)
-
     return results
diff --git a/modules/api/script.py b/modules/api/script.py
index 5913c2c5..85f4974f 100644
--- a/modules/api/script.py
+++ b/modules/api/script.py
@@ -506,12 +506,17 @@ async def handle_load_model(request_data: LoadModelRequest):
         return JSONResponse(content="OK")
     except Exception:
         traceback.print_exc()
-        raise HTTPException(status_code=400, detail="Failed to load the model.")
+        raise HTTPException(status_code=500, detail="Failed to load the model.")
 
 
 @app.post("/v1/internal/model/unload", dependencies=check_admin_key)
 async def handle_unload_model():
-    unload_model()
+    try:
+        unload_model()
+        return JSONResponse(content="OK")
+    except Exception:
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail="Failed to unload the model.")
 
 
 @app.get("/v1/internal/lora/list", response_model=LoraListResponse, dependencies=check_admin_key)