API: Add thinking block signature field, fix error codes, clean up logging

2026-03-26 15:24:39 +01:00 · 2026-03-23 07:06:38 -07:00 · 2026-03-23 07:06:38 -07:00 · 02f18a1d65
parent 307d0c92be
commit 02f18a1d65
4 changed files with 10 additions and 7 deletions
--- a/modules/api/anthropic.py
+++ b/modules/api/anthropic.py
@ -326,7 +326,7 @@ class StreamConverter:
                    "data": json.dumps({
                        "type": "content_block_start",
                        "index": self.block_index,
-                        "content_block": {"type": "thinking", "thinking": ""}
+                        "content_block": {"type": "thinking", "thinking": "", "signature": ""}
                    })
                })
            events.append({
--- a/modules/api/embeddings.py
+++ b/modules/api/embeddings.py
@ -39,14 +39,14 @@ def load_embedding_model(model: str):
    initialize_embedding_params()
    global embeddings_device, embeddings_model
    try:
-        print(f"Try embedding model: {model} on {embeddings_device}")
+        logger.info(f"Try embedding model: {model} on {embeddings_device}")
        if 'jina-embeddings' in model:
            embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True)  # trust_remote_code is needed to use the encode method
            embeddings_model = embeddings_model.to(embeddings_device)
        else:
            embeddings_model = SentenceTransformer(model, device=embeddings_device)

-        print(f"Loaded embedding model: {model}")
+        logger.info(f"Loaded embedding model: {model}")
    except Exception as e:
        embeddings_model = None
        raise ServiceUnavailableError(f"Error: Failed to load embedding model: {model}", internal_message=repr(e))
--- a/modules/api/moderations.py
+++ b/modules/api/moderations.py
@ -64,6 +64,4 @@ def moderations(input):
                'category_scores': category_scores,
            }])

-    print(results)
-
    return results
--- a/modules/api/script.py
+++ b/modules/api/script.py
@ -506,12 +506,17 @@ async def handle_load_model(request_data: LoadModelRequest):
        return JSONResponse(content="OK")
    except Exception:
        traceback.print_exc()
-        raise HTTPException(status_code=400, detail="Failed to load the model.")
+        raise HTTPException(status_code=500, detail="Failed to load the model.")


@app.post("/v1/internal/model/unload", dependencies=check_admin_key)
 async def handle_unload_model():
-    unload_model()
+    try:
+        unload_model()
+        return JSONResponse(content="OK")
+    except Exception:
+        traceback.print_exc()
+        raise HTTPException(status_code=500, detail="Failed to unload the model.")


@app.get("/v1/internal/lora/list", response_model=LoraListResponse, dependencies=check_admin_key)