mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-03-26 15:24:39 +01:00
API: Add thinking block signature field, fix error codes, clean up logging
This commit is contained in:
parent
307d0c92be
commit
02f18a1d65
|
|
@ -326,7 +326,7 @@ class StreamConverter:
|
|||
"data": json.dumps({
|
||||
"type": "content_block_start",
|
||||
"index": self.block_index,
|
||||
"content_block": {"type": "thinking", "thinking": ""}
|
||||
"content_block": {"type": "thinking", "thinking": "", "signature": ""}
|
||||
})
|
||||
})
|
||||
events.append({
|
||||
|
|
|
|||
|
|
@ -39,14 +39,14 @@ def load_embedding_model(model: str):
|
|||
initialize_embedding_params()
|
||||
global embeddings_device, embeddings_model
|
||||
try:
|
||||
print(f"Try embedding model: {model} on {embeddings_device}")
|
||||
logger.info(f"Try embedding model: {model} on {embeddings_device}")
|
||||
if 'jina-embeddings' in model:
|
||||
embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True) # trust_remote_code is needed to use the encode method
|
||||
embeddings_model = embeddings_model.to(embeddings_device)
|
||||
else:
|
||||
embeddings_model = SentenceTransformer(model, device=embeddings_device)
|
||||
|
||||
print(f"Loaded embedding model: {model}")
|
||||
logger.info(f"Loaded embedding model: {model}")
|
||||
except Exception as e:
|
||||
embeddings_model = None
|
||||
raise ServiceUnavailableError(f"Error: Failed to load embedding model: {model}", internal_message=repr(e))
|
||||
|
|
|
|||
|
|
@ -64,6 +64,4 @@ def moderations(input):
|
|||
'category_scores': category_scores,
|
||||
}])
|
||||
|
||||
print(results)
|
||||
|
||||
return results
|
||||
|
|
|
|||
|
|
@ -506,12 +506,17 @@ async def handle_load_model(request_data: LoadModelRequest):
|
|||
return JSONResponse(content="OK")
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
raise HTTPException(status_code=400, detail="Failed to load the model.")
|
||||
raise HTTPException(status_code=500, detail="Failed to load the model.")
|
||||
|
||||
|
||||
@app.post("/v1/internal/model/unload", dependencies=check_admin_key)
|
||||
async def handle_unload_model():
|
||||
unload_model()
|
||||
try:
|
||||
unload_model()
|
||||
return JSONResponse(content="OK")
|
||||
except Exception:
|
||||
traceback.print_exc()
|
||||
raise HTTPException(status_code=500, detail="Failed to unload the model.")
|
||||
|
||||
|
||||
@app.get("/v1/internal/lora/list", response_model=LoraListResponse, dependencies=check_admin_key)
|
||||
|
|
|
|||
Loading…
Reference in a new issue