API: Move OpenAI-compatible API from extensions/openai to modules/api

This commit is contained in:
oobabooga 2026-03-20 14:46:00 -03:00
parent 2e4232e02b
commit bf6fbc019d
23 changed files with 51 additions and 65 deletions

View file

@ -106,7 +106,7 @@ jobs:
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
allowed=("character_bias" "gallery" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables

View file

@ -105,7 +105,7 @@ jobs:
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
allowed=("character_bias" "gallery" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables

View file

@ -105,7 +105,7 @@ jobs:
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
allowed=("character_bias" "gallery" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables

View file

@ -105,7 +105,7 @@ jobs:
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
allowed=("character_bias" "gallery" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables

View file

@ -20,7 +20,6 @@ If you create an extension, you are welcome to host it in a GitHub repository an
|Extension|Description|
|---------|-----------|
|[openai](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/openai)| Creates an API that mimics the OpenAI API and can be used as a drop-in replacement. |
|[superboogav2](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/superboogav2)| Enhanced RAG extension with support for PDF, DOCX, and PPTX files. |
|[send_pictures](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/send_pictures/)| Creates an image upload field that can be used to send images to the bot in chat mode. Captions are automatically generated using BLIP. |
|[coqui_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/coqui_tts)| Text-to-speech extension using Coqui XTTS v2. |

View file

@ -19,7 +19,7 @@ Add `--api` to your command-line flags.
### Examples
For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file.
For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/modules/api/typing.py) file.
The official examples in the [OpenAI documentation](https://platform.openai.com/docs/api-reference) should also work, and the same parameters apply (although the API here has more optional parameters).
@ -490,16 +490,6 @@ The following environment variables can be used (they take precedence over every
| `OPENEDAI_EMBEDDING_MODEL` | Embedding model (if applicable) | sentence-transformers/all-mpnet-base-v2 |
| `OPENEDAI_EMBEDDING_DEVICE` | Embedding device (if applicable) | cuda |
#### Persistent settings with `settings.yaml`
You can also set the following variables in your `settings.yaml` file:
```
openai-embedding_device: cuda
openai-embedding_model: "sentence-transformers/all-mpnet-base-v2"
openai-debug: 1
```
### Third-party application setup
You can usually force an application that uses the OpenAI API to connect to the local API by using the following environment variables:

0
modules/api/__init__.py Normal file
View file

View file

@ -9,9 +9,9 @@ import tiktoken
import yaml
from pydantic import ValidationError
from extensions.openai.errors import InvalidRequestError
from extensions.openai.typing import ToolDefinition
from extensions.openai.utils import debug_msg
from .errors import InvalidRequestError
from .typing import ToolDefinition
from .utils import debug_msg
from modules.tool_parsing import get_tool_call_id, parse_tool_call, detect_tool_call_format
from modules import shared
from modules.reasoning import extract_reasoning

View file

@ -3,8 +3,8 @@ import os
import numpy as np
from transformers import AutoModel
from extensions.openai.errors import ServiceUnavailableError
from extensions.openai.utils import debug_msg, float_list_to_base64
from .errors import ServiceUnavailableError
from .utils import debug_msg, float_list_to_base64
from modules.logging_colors import logger
embeddings_params_initialized = False
@ -17,14 +17,12 @@ def initialize_embedding_params():
'''
global embeddings_params_initialized
if not embeddings_params_initialized:
from extensions.openai.script import params
global st_model, embeddings_model, embeddings_device
st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", params.get('embedding_model', 'all-mpnet-base-v2'))
st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", 'sentence-transformers/all-mpnet-base-v2')
embeddings_model = None
# OPENEDAI_EMBEDDING_DEVICE: auto (best or cpu), cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone
embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", params.get('embedding_device', 'cpu'))
embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", 'cpu')
if embeddings_device.lower() == 'auto':
embeddings_device = None

View file

@ -6,7 +6,7 @@ import base64
import io
import time
from extensions.openai.errors import ServiceUnavailableError
from .errors import ServiceUnavailableError
from modules import shared

View file

@ -1,4 +1,4 @@
from extensions.openai.completions import process_parameters
from .completions import process_parameters
from modules.logits import get_next_logits

View file

@ -3,7 +3,7 @@ import time
import numpy as np
from numpy.linalg import norm
from extensions.openai.embeddings import get_embeddings
from .embeddings import get_embeddings
moderations_disabled = False # return 0/false
category_embeddings = None

View file

@ -13,16 +13,15 @@ from fastapi import Depends, FastAPI, Header, HTTPException
from fastapi.middleware.cors import CORSMiddleware
from fastapi.requests import Request
from fastapi.responses import JSONResponse
from pydub import AudioSegment
from sse_starlette import EventSourceResponse
from starlette.concurrency import iterate_in_threadpool
import extensions.openai.completions as OAIcompletions
import extensions.openai.logits as OAIlogits
import extensions.openai.models as OAImodels
from extensions.openai.tokens import token_count, token_decode, token_encode
from extensions.openai.errors import OpenAIError
from extensions.openai.utils import _start_cloudflared
import modules.api.completions as OAIcompletions
import modules.api.logits as OAIlogits
import modules.api.models as OAImodels
from .tokens import token_count, token_decode, token_encode
from .errors import OpenAIError
from .utils import _start_cloudflared
from modules import shared
from modules.logging_colors import logger
from modules.models import unload_model
@ -53,12 +52,6 @@ from .typing import (
to_dict
)
params = {
'embedding_device': 'cpu',
'embedding_model': 'sentence-transformers/all-mpnet-base-v2',
'debug': 0
}
async def _wait_for_disconnect(request: Request, stop_event: threading.Event):
"""Block until the client disconnects, then signal the stop_event."""
@ -244,6 +237,7 @@ def handle_billing_usage():
@app.post('/v1/audio/transcriptions', dependencies=check_key)
async def handle_audio_transcription(request: Request):
import speech_recognition as sr
from pydub import AudioSegment
r = sr.Recognizer()
@ -275,7 +269,7 @@ async def handle_audio_transcription(request: Request):
@app.post('/v1/images/generations', response_model=ImageGenerationResponse, dependencies=check_key)
async def handle_image_generation(request_data: ImageGenerationRequest):
import extensions.openai.images as OAIimages
import modules.api.images as OAIimages
response = await asyncio.to_thread(OAIimages.generations, request_data)
return JSONResponse(response)
@ -283,7 +277,7 @@ async def handle_image_generation(request_data: ImageGenerationRequest):
@app.post("/v1/embeddings", response_model=EmbeddingsResponse, dependencies=check_key)
async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
import extensions.openai.embeddings as OAIembeddings
import modules.api.embeddings as OAIembeddings
input = request_data.input
if not input:
@ -298,7 +292,7 @@ async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
@app.post("/v1/moderations", dependencies=check_key)
async def handle_moderations(request: Request):
import extensions.openai.moderations as OAImoderations
import modules.api.moderations as OAImoderations
body = await request.json()
input = body["input"]
@ -500,7 +494,15 @@ def run_server():
uvicorn.run(app, host=server_addrs, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, access_log=False)
_server_started = False
def setup():
global _server_started
if _server_started:
return
_server_started = True
if shared.args.nowebui:
run_server()
else:

View file

@ -23,8 +23,7 @@ def float_list_to_base64(float_array: np.ndarray) -> str:
def debug_msg(*args, **kwargs):
from extensions.openai.script import params
if os.environ.get("OPENEDAI_DEBUG", params.get('debug', 0)):
if os.environ.get("OPENEDAI_DEBUG", 0):
print(*args, **kwargs)

View file

@ -32,8 +32,7 @@ def load_extensions():
if name not in available_extensions:
continue
if name != 'api':
logger.info(f'Loading the extension "{name}"')
logger.info(f'Loading the extension "{name}"')
try:
# Prefer user extension, fall back to system extension

View file

@ -156,7 +156,7 @@ group.add_argument('--portable', action='store_true', help='Hide features not av
# API
group = parser.add_argument_group('API')
group.add_argument('--api', action='store_true', help='Enable the API extension.')
group.add_argument('--api', action='store_true', help='Enable the API server.')
group.add_argument('--public-api', action='store_true', help='Create a public URL for the API using Cloudflare.')
group.add_argument('--public-api-id', type=str, help='Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.', default=None)
group.add_argument('--api-port', type=int, default=5000, help='The listening port for the API.')
@ -435,16 +435,6 @@ def fix_loader_name(name):
return 'TensorRT-LLM'
def add_extension(name, last=False):
if args.extensions is None:
args.extensions = [name]
elif last:
args.extensions = [x for x in args.extensions if x != name]
args.extensions.append(name)
elif name not in args.extensions:
args.extensions.append(name)
def is_chat():
return True
@ -464,10 +454,6 @@ def load_user_config():
args.loader = fix_loader_name(args.loader)
# Activate the API extension
if args.api or args.public_api:
add_extension('openai', last=True)
# Load model-specific settings
p = Path(f'{args.model_dir}/config.yaml')
if p.exists():

View file

@ -95,8 +95,6 @@ def set_interface_arguments(extensions, bool_active):
setattr(shared.args, k, False)
for k in bool_active:
setattr(shared.args, k, True)
if k == 'api':
shared.add_extension('openai', last=True)
shared.need_restart = True

View file

@ -106,6 +106,11 @@ def create_interface():
if shared.args.extensions is not None and len(shared.args.extensions) > 0:
extensions_module.load_extensions()
# Start the API server if enabled
if shared.args.api or shared.args.public_api:
from modules.api.script import setup as api_setup
api_setup()
# Force some events to be triggered on page load
shared.persistent_interface_state.update({
'mode': shared.settings['mode'],
@ -273,6 +278,12 @@ if __name__ == "__main__":
# Activate the extensions listed on settings.yaml
extensions_module.available_extensions = utils.get_available_extensions()
for extension in shared.settings['default_extensions']:
# The openai extension was moved to modules/api and is now
# activated with --api. Treat it as an alias for backwards compat.
if extension == 'openai':
shared.args.api = True
continue
shared.args.extensions = shared.args.extensions or []
if extension not in shared.args.extensions:
shared.args.extensions.append(extension)
@ -337,6 +348,10 @@ if __name__ == "__main__":
shared.args.extensions = [x for x in (shared.args.extensions or []) if x != 'gallery']
if shared.args.extensions:
extensions_module.load_extensions()
if shared.args.api or shared.args.public_api:
from modules.api.script import setup as api_setup
api_setup()
else:
# Launch the web UI
create_interface()