API: Add thinking block signature field, fix error codes, clean up logging

This commit is contained in:
oobabooga 2026-03-23 07:06:38 -07:00
parent 307d0c92be
commit 02f18a1d65
4 changed files with 10 additions and 7 deletions

View file

@ -326,7 +326,7 @@ class StreamConverter:
"data": json.dumps({
"type": "content_block_start",
"index": self.block_index,
"content_block": {"type": "thinking", "thinking": ""}
"content_block": {"type": "thinking", "thinking": "", "signature": ""}
})
})
events.append({

View file

@ -39,14 +39,14 @@ def load_embedding_model(model: str):
initialize_embedding_params()
global embeddings_device, embeddings_model
try:
print(f"Try embedding model: {model} on {embeddings_device}")
logger.info(f"Try embedding model: {model} on {embeddings_device}")
if 'jina-embeddings' in model:
embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True) # trust_remote_code is needed to use the encode method
embeddings_model = embeddings_model.to(embeddings_device)
else:
embeddings_model = SentenceTransformer(model, device=embeddings_device)
print(f"Loaded embedding model: {model}")
logger.info(f"Loaded embedding model: {model}")
except Exception as e:
embeddings_model = None
raise ServiceUnavailableError(f"Error: Failed to load embedding model: {model}", internal_message=repr(e))

View file

@ -64,6 +64,4 @@ def moderations(input):
'category_scores': category_scores,
}])
print(results)
return results

View file

@ -506,12 +506,17 @@ async def handle_load_model(request_data: LoadModelRequest):
return JSONResponse(content="OK")
except Exception:
traceback.print_exc()
raise HTTPException(status_code=400, detail="Failed to load the model.")
raise HTTPException(status_code=500, detail="Failed to load the model.")
@app.post("/v1/internal/model/unload", dependencies=check_admin_key)
async def handle_unload_model():
unload_model()
try:
unload_model()
return JSONResponse(content="OK")
except Exception:
traceback.print_exc()
raise HTTPException(status_code=500, detail="Failed to unload the model.")
@app.get("/v1/internal/lora/list", response_model=LoraListResponse, dependencies=check_admin_key)