mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-03-26 07:14:39 +01:00
API: Move OpenAI-compatible API from extensions/openai to modules/api
This commit is contained in:
parent
2e4232e02b
commit
bf6fbc019d
|
|
@ -106,7 +106,7 @@ jobs:
|
|||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
|
||||
|
||||
# Define common variables
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ jobs:
|
|||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
|
||||
|
||||
# Define common variables
|
||||
|
|
|
|||
|
|
@ -105,7 +105,7 @@ jobs:
|
|||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
|
||||
|
||||
# Define common variables
|
||||
|
|
|
|||
2
.github/workflows/build-portable-release.yml
vendored
2
.github/workflows/build-portable-release.yml
vendored
|
|
@ -105,7 +105,7 @@ jobs:
|
|||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
|
||||
|
||||
# Define common variables
|
||||
|
|
|
|||
|
|
@ -20,7 +20,6 @@ If you create an extension, you are welcome to host it in a GitHub repository an
|
|||
|
||||
|Extension|Description|
|
||||
|---------|-----------|
|
||||
|[openai](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/openai)| Creates an API that mimics the OpenAI API and can be used as a drop-in replacement. |
|
||||
|[superboogav2](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/superboogav2)| Enhanced RAG extension with support for PDF, DOCX, and PPTX files. |
|
||||
|[send_pictures](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/send_pictures/)| Creates an image upload field that can be used to send images to the bot in chat mode. Captions are automatically generated using BLIP. |
|
||||
|[coqui_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/coqui_tts)| Text-to-speech extension using Coqui XTTS v2. |
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ Add `--api` to your command-line flags.
|
|||
|
||||
### Examples
|
||||
|
||||
For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file.
|
||||
For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/modules/api/typing.py) file.
|
||||
|
||||
The official examples in the [OpenAI documentation](https://platform.openai.com/docs/api-reference) should also work, and the same parameters apply (although the API here has more optional parameters).
|
||||
|
||||
|
|
@ -490,16 +490,6 @@ The following environment variables can be used (they take precedence over every
|
|||
| `OPENEDAI_EMBEDDING_MODEL` | Embedding model (if applicable) | sentence-transformers/all-mpnet-base-v2 |
|
||||
| `OPENEDAI_EMBEDDING_DEVICE` | Embedding device (if applicable) | cuda |
|
||||
|
||||
#### Persistent settings with `settings.yaml`
|
||||
|
||||
You can also set the following variables in your `settings.yaml` file:
|
||||
|
||||
```
|
||||
openai-embedding_device: cuda
|
||||
openai-embedding_model: "sentence-transformers/all-mpnet-base-v2"
|
||||
openai-debug: 1
|
||||
```
|
||||
|
||||
### Third-party application setup
|
||||
|
||||
You can usually force an application that uses the OpenAI API to connect to the local API by using the following environment variables:
|
||||
|
|
|
|||
0
modules/api/__init__.py
Normal file
0
modules/api/__init__.py
Normal file
|
|
@ -9,9 +9,9 @@ import tiktoken
|
|||
import yaml
|
||||
from pydantic import ValidationError
|
||||
|
||||
from extensions.openai.errors import InvalidRequestError
|
||||
from extensions.openai.typing import ToolDefinition
|
||||
from extensions.openai.utils import debug_msg
|
||||
from .errors import InvalidRequestError
|
||||
from .typing import ToolDefinition
|
||||
from .utils import debug_msg
|
||||
from modules.tool_parsing import get_tool_call_id, parse_tool_call, detect_tool_call_format
|
||||
from modules import shared
|
||||
from modules.reasoning import extract_reasoning
|
||||
|
|
@ -3,8 +3,8 @@ import os
|
|||
import numpy as np
|
||||
from transformers import AutoModel
|
||||
|
||||
from extensions.openai.errors import ServiceUnavailableError
|
||||
from extensions.openai.utils import debug_msg, float_list_to_base64
|
||||
from .errors import ServiceUnavailableError
|
||||
from .utils import debug_msg, float_list_to_base64
|
||||
from modules.logging_colors import logger
|
||||
|
||||
embeddings_params_initialized = False
|
||||
|
|
@ -17,14 +17,12 @@ def initialize_embedding_params():
|
|||
'''
|
||||
global embeddings_params_initialized
|
||||
if not embeddings_params_initialized:
|
||||
from extensions.openai.script import params
|
||||
|
||||
global st_model, embeddings_model, embeddings_device
|
||||
|
||||
st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", params.get('embedding_model', 'all-mpnet-base-v2'))
|
||||
st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", 'sentence-transformers/all-mpnet-base-v2')
|
||||
embeddings_model = None
|
||||
# OPENEDAI_EMBEDDING_DEVICE: auto (best or cpu), cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone
|
||||
embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", params.get('embedding_device', 'cpu'))
|
||||
embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", 'cpu')
|
||||
if embeddings_device.lower() == 'auto':
|
||||
embeddings_device = None
|
||||
|
||||
|
|
@ -6,7 +6,7 @@ import base64
|
|||
import io
|
||||
import time
|
||||
|
||||
from extensions.openai.errors import ServiceUnavailableError
|
||||
from .errors import ServiceUnavailableError
|
||||
from modules import shared
|
||||
|
||||
|
||||
|
|
@ -1,4 +1,4 @@
|
|||
from extensions.openai.completions import process_parameters
|
||||
from .completions import process_parameters
|
||||
from modules.logits import get_next_logits
|
||||
|
||||
|
||||
|
|
@ -3,7 +3,7 @@ import time
|
|||
import numpy as np
|
||||
from numpy.linalg import norm
|
||||
|
||||
from extensions.openai.embeddings import get_embeddings
|
||||
from .embeddings import get_embeddings
|
||||
|
||||
moderations_disabled = False # return 0/false
|
||||
category_embeddings = None
|
||||
|
|
@ -13,16 +13,15 @@ from fastapi import Depends, FastAPI, Header, HTTPException
|
|||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.requests import Request
|
||||
from fastapi.responses import JSONResponse
|
||||
from pydub import AudioSegment
|
||||
from sse_starlette import EventSourceResponse
|
||||
from starlette.concurrency import iterate_in_threadpool
|
||||
|
||||
import extensions.openai.completions as OAIcompletions
|
||||
import extensions.openai.logits as OAIlogits
|
||||
import extensions.openai.models as OAImodels
|
||||
from extensions.openai.tokens import token_count, token_decode, token_encode
|
||||
from extensions.openai.errors import OpenAIError
|
||||
from extensions.openai.utils import _start_cloudflared
|
||||
import modules.api.completions as OAIcompletions
|
||||
import modules.api.logits as OAIlogits
|
||||
import modules.api.models as OAImodels
|
||||
from .tokens import token_count, token_decode, token_encode
|
||||
from .errors import OpenAIError
|
||||
from .utils import _start_cloudflared
|
||||
from modules import shared
|
||||
from modules.logging_colors import logger
|
||||
from modules.models import unload_model
|
||||
|
|
@ -53,12 +52,6 @@ from .typing import (
|
|||
to_dict
|
||||
)
|
||||
|
||||
params = {
|
||||
'embedding_device': 'cpu',
|
||||
'embedding_model': 'sentence-transformers/all-mpnet-base-v2',
|
||||
'debug': 0
|
||||
}
|
||||
|
||||
|
||||
async def _wait_for_disconnect(request: Request, stop_event: threading.Event):
|
||||
"""Block until the client disconnects, then signal the stop_event."""
|
||||
|
|
@ -244,6 +237,7 @@ def handle_billing_usage():
|
|||
@app.post('/v1/audio/transcriptions', dependencies=check_key)
|
||||
async def handle_audio_transcription(request: Request):
|
||||
import speech_recognition as sr
|
||||
from pydub import AudioSegment
|
||||
|
||||
r = sr.Recognizer()
|
||||
|
||||
|
|
@ -275,7 +269,7 @@ async def handle_audio_transcription(request: Request):
|
|||
|
||||
@app.post('/v1/images/generations', response_model=ImageGenerationResponse, dependencies=check_key)
|
||||
async def handle_image_generation(request_data: ImageGenerationRequest):
|
||||
import extensions.openai.images as OAIimages
|
||||
import modules.api.images as OAIimages
|
||||
|
||||
response = await asyncio.to_thread(OAIimages.generations, request_data)
|
||||
return JSONResponse(response)
|
||||
|
|
@ -283,7 +277,7 @@ async def handle_image_generation(request_data: ImageGenerationRequest):
|
|||
|
||||
@app.post("/v1/embeddings", response_model=EmbeddingsResponse, dependencies=check_key)
|
||||
async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
|
||||
import extensions.openai.embeddings as OAIembeddings
|
||||
import modules.api.embeddings as OAIembeddings
|
||||
|
||||
input = request_data.input
|
||||
if not input:
|
||||
|
|
@ -298,7 +292,7 @@ async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
|
|||
|
||||
@app.post("/v1/moderations", dependencies=check_key)
|
||||
async def handle_moderations(request: Request):
|
||||
import extensions.openai.moderations as OAImoderations
|
||||
import modules.api.moderations as OAImoderations
|
||||
|
||||
body = await request.json()
|
||||
input = body["input"]
|
||||
|
|
@ -500,7 +494,15 @@ def run_server():
|
|||
uvicorn.run(app, host=server_addrs, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, access_log=False)
|
||||
|
||||
|
||||
_server_started = False
|
||||
|
||||
|
||||
def setup():
|
||||
global _server_started
|
||||
if _server_started:
|
||||
return
|
||||
|
||||
_server_started = True
|
||||
if shared.args.nowebui:
|
||||
run_server()
|
||||
else:
|
||||
|
|
@ -23,8 +23,7 @@ def float_list_to_base64(float_array: np.ndarray) -> str:
|
|||
|
||||
|
||||
def debug_msg(*args, **kwargs):
|
||||
from extensions.openai.script import params
|
||||
if os.environ.get("OPENEDAI_DEBUG", params.get('debug', 0)):
|
||||
if os.environ.get("OPENEDAI_DEBUG", 0):
|
||||
print(*args, **kwargs)
|
||||
|
||||
|
||||
|
|
@ -32,8 +32,7 @@ def load_extensions():
|
|||
if name not in available_extensions:
|
||||
continue
|
||||
|
||||
if name != 'api':
|
||||
logger.info(f'Loading the extension "{name}"')
|
||||
logger.info(f'Loading the extension "{name}"')
|
||||
|
||||
try:
|
||||
# Prefer user extension, fall back to system extension
|
||||
|
|
|
|||
|
|
@ -156,7 +156,7 @@ group.add_argument('--portable', action='store_true', help='Hide features not av
|
|||
|
||||
# API
|
||||
group = parser.add_argument_group('API')
|
||||
group.add_argument('--api', action='store_true', help='Enable the API extension.')
|
||||
group.add_argument('--api', action='store_true', help='Enable the API server.')
|
||||
group.add_argument('--public-api', action='store_true', help='Create a public URL for the API using Cloudflare.')
|
||||
group.add_argument('--public-api-id', type=str, help='Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.', default=None)
|
||||
group.add_argument('--api-port', type=int, default=5000, help='The listening port for the API.')
|
||||
|
|
@ -435,16 +435,6 @@ def fix_loader_name(name):
|
|||
return 'TensorRT-LLM'
|
||||
|
||||
|
||||
def add_extension(name, last=False):
|
||||
if args.extensions is None:
|
||||
args.extensions = [name]
|
||||
elif last:
|
||||
args.extensions = [x for x in args.extensions if x != name]
|
||||
args.extensions.append(name)
|
||||
elif name not in args.extensions:
|
||||
args.extensions.append(name)
|
||||
|
||||
|
||||
def is_chat():
|
||||
return True
|
||||
|
||||
|
|
@ -464,10 +454,6 @@ def load_user_config():
|
|||
|
||||
args.loader = fix_loader_name(args.loader)
|
||||
|
||||
# Activate the API extension
|
||||
if args.api or args.public_api:
|
||||
add_extension('openai', last=True)
|
||||
|
||||
# Load model-specific settings
|
||||
p = Path(f'{args.model_dir}/config.yaml')
|
||||
if p.exists():
|
||||
|
|
|
|||
|
|
@ -95,8 +95,6 @@ def set_interface_arguments(extensions, bool_active):
|
|||
setattr(shared.args, k, False)
|
||||
for k in bool_active:
|
||||
setattr(shared.args, k, True)
|
||||
if k == 'api':
|
||||
shared.add_extension('openai', last=True)
|
||||
|
||||
shared.need_restart = True
|
||||
|
||||
|
|
|
|||
15
server.py
15
server.py
|
|
@ -106,6 +106,11 @@ def create_interface():
|
|||
if shared.args.extensions is not None and len(shared.args.extensions) > 0:
|
||||
extensions_module.load_extensions()
|
||||
|
||||
# Start the API server if enabled
|
||||
if shared.args.api or shared.args.public_api:
|
||||
from modules.api.script import setup as api_setup
|
||||
api_setup()
|
||||
|
||||
# Force some events to be triggered on page load
|
||||
shared.persistent_interface_state.update({
|
||||
'mode': shared.settings['mode'],
|
||||
|
|
@ -273,6 +278,12 @@ if __name__ == "__main__":
|
|||
# Activate the extensions listed on settings.yaml
|
||||
extensions_module.available_extensions = utils.get_available_extensions()
|
||||
for extension in shared.settings['default_extensions']:
|
||||
# The openai extension was moved to modules/api and is now
|
||||
# activated with --api. Treat it as an alias for backwards compat.
|
||||
if extension == 'openai':
|
||||
shared.args.api = True
|
||||
continue
|
||||
|
||||
shared.args.extensions = shared.args.extensions or []
|
||||
if extension not in shared.args.extensions:
|
||||
shared.args.extensions.append(extension)
|
||||
|
|
@ -337,6 +348,10 @@ if __name__ == "__main__":
|
|||
shared.args.extensions = [x for x in (shared.args.extensions or []) if x != 'gallery']
|
||||
if shared.args.extensions:
|
||||
extensions_module.load_extensions()
|
||||
|
||||
if shared.args.api or shared.args.public_api:
|
||||
from modules.api.script import setup as api_setup
|
||||
api_setup()
|
||||
else:
|
||||
# Launch the web UI
|
||||
create_interface()
|
||||
|
|
|
|||
Loading…
Reference in a new issue