diff --git a/modules/api/anthropic.py b/modules/api/anthropic.py index 5fbf5caf..3fab09a6 100644 --- a/modules/api/anthropic.py +++ b/modules/api/anthropic.py @@ -326,7 +326,7 @@ class StreamConverter: "data": json.dumps({ "type": "content_block_start", "index": self.block_index, - "content_block": {"type": "thinking", "thinking": ""} + "content_block": {"type": "thinking", "thinking": "", "signature": ""} }) }) events.append({ diff --git a/modules/api/embeddings.py b/modules/api/embeddings.py index ad299c9d..16cf0482 100644 --- a/modules/api/embeddings.py +++ b/modules/api/embeddings.py @@ -39,14 +39,14 @@ def load_embedding_model(model: str): initialize_embedding_params() global embeddings_device, embeddings_model try: - print(f"Try embedding model: {model} on {embeddings_device}") + logger.info(f"Try embedding model: {model} on {embeddings_device}") if 'jina-embeddings' in model: embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True) # trust_remote_code is needed to use the encode method embeddings_model = embeddings_model.to(embeddings_device) else: embeddings_model = SentenceTransformer(model, device=embeddings_device) - print(f"Loaded embedding model: {model}") + logger.info(f"Loaded embedding model: {model}") except Exception as e: embeddings_model = None raise ServiceUnavailableError(f"Error: Failed to load embedding model: {model}", internal_message=repr(e)) diff --git a/modules/api/moderations.py b/modules/api/moderations.py index ac0539d6..a41763cf 100644 --- a/modules/api/moderations.py +++ b/modules/api/moderations.py @@ -64,6 +64,4 @@ def moderations(input): 'category_scores': category_scores, }]) - print(results) - return results diff --git a/modules/api/script.py b/modules/api/script.py index 5913c2c5..85f4974f 100644 --- a/modules/api/script.py +++ b/modules/api/script.py @@ -506,12 +506,17 @@ async def handle_load_model(request_data: LoadModelRequest): return JSONResponse(content="OK") except Exception: traceback.print_exc() - raise HTTPException(status_code=400, detail="Failed to load the model.") + raise HTTPException(status_code=500, detail="Failed to load the model.") @app.post("/v1/internal/model/unload", dependencies=check_admin_key) async def handle_unload_model(): - unload_model() + try: + unload_model() + return JSONResponse(content="OK") + except Exception: + traceback.print_exc() + raise HTTPException(status_code=500, detail="Failed to unload the model.") @app.get("/v1/internal/lora/list", response_model=LoraListResponse, dependencies=check_admin_key)