From bf6fbc019dbd9470efdeafa033818efa178d7735 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 20 Mar 2026 14:46:00 -0300 Subject: [PATCH] API: Move OpenAI-compatible API from extensions/openai to modules/api --- .../workflows/build-portable-release-cuda.yml | 2 +- .../workflows/build-portable-release-rocm.yml | 2 +- .../build-portable-release-vulkan.yml | 2 +- .github/workflows/build-portable-release.yml | 2 +- docs/07 - Extensions.md | 1 - docs/12 - OpenAI API.md | 12 +------ modules/api/__init__.py | 0 .../api}/cache_embedding_model.py | 0 .../openai => modules/api}/completions.py | 6 ++-- .../openai => modules/api}/embeddings.py | 10 +++--- {extensions/openai => modules/api}/errors.py | 0 {extensions/openai => modules/api}/images.py | 2 +- {extensions/openai => modules/api}/logits.py | 2 +- {extensions/openai => modules/api}/models.py | 0 .../openai => modules/api}/moderations.py | 2 +- {extensions/openai => modules/api}/script.py | 34 ++++++++++--------- {extensions/openai => modules/api}/tokens.py | 0 {extensions/openai => modules/api}/typing.py | 0 {extensions/openai => modules/api}/utils.py | 3 +- modules/extensions.py | 3 +- modules/shared.py | 16 +-------- modules/ui_session.py | 2 -- server.py | 15 ++++++++ 23 files changed, 51 insertions(+), 65 deletions(-) create mode 100644 modules/api/__init__.py rename {extensions/openai => modules/api}/cache_embedding_model.py (100%) rename {extensions/openai => modules/api}/completions.py (99%) rename {extensions/openai => modules/api}/embeddings.py (90%) rename {extensions/openai => modules/api}/errors.py (100%) rename {extensions/openai => modules/api}/images.py (96%) rename {extensions/openai => modules/api}/logits.py (84%) rename {extensions/openai => modules/api}/models.py (100%) rename {extensions/openai => modules/api}/moderations.py (97%) rename {extensions/openai => modules/api}/script.py (96%) rename {extensions/openai => modules/api}/tokens.py (100%) rename {extensions/openai => modules/api}/typing.py (100%) rename {extensions/openai => modules/api}/utils.py (93%) diff --git a/.github/workflows/build-portable-release-cuda.yml b/.github/workflows/build-portable-release-cuda.yml index 5d66bd77..f9eea58a 100644 --- a/.github/workflows/build-portable-release-cuda.yml +++ b/.github/workflows/build-portable-release-cuda.yml @@ -106,7 +106,7 @@ jobs: cd "text-generation-webui-${VERSION_CLEAN}" # Remove extensions that need additional requirements - allowed=("character_bias" "gallery" "openai" "sd_api_pictures") + allowed=("character_bias" "gallery" "sd_api_pictures") find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf # Define common variables diff --git a/.github/workflows/build-portable-release-rocm.yml b/.github/workflows/build-portable-release-rocm.yml index b9a10bac..db42b7dc 100644 --- a/.github/workflows/build-portable-release-rocm.yml +++ b/.github/workflows/build-portable-release-rocm.yml @@ -105,7 +105,7 @@ jobs: cd "text-generation-webui-${VERSION_CLEAN}" # Remove extensions that need additional requirements - allowed=("character_bias" "gallery" "openai" "sd_api_pictures") + allowed=("character_bias" "gallery" "sd_api_pictures") find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf # Define common variables diff --git a/.github/workflows/build-portable-release-vulkan.yml b/.github/workflows/build-portable-release-vulkan.yml index 9748d5b8..8f5aa7c8 100644 --- a/.github/workflows/build-portable-release-vulkan.yml +++ b/.github/workflows/build-portable-release-vulkan.yml @@ -105,7 +105,7 @@ jobs: cd "text-generation-webui-${VERSION_CLEAN}" # Remove extensions that need additional requirements - allowed=("character_bias" "gallery" "openai" "sd_api_pictures") + allowed=("character_bias" "gallery" "sd_api_pictures") find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf # Define common variables diff --git a/.github/workflows/build-portable-release.yml b/.github/workflows/build-portable-release.yml index e03116f6..9ace90f6 100644 --- a/.github/workflows/build-portable-release.yml +++ b/.github/workflows/build-portable-release.yml @@ -105,7 +105,7 @@ jobs: cd "text-generation-webui-${VERSION_CLEAN}" # Remove extensions that need additional requirements - allowed=("character_bias" "gallery" "openai" "sd_api_pictures") + allowed=("character_bias" "gallery" "sd_api_pictures") find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf # Define common variables diff --git a/docs/07 - Extensions.md b/docs/07 - Extensions.md index 48cd30ce..779b2a34 100644 --- a/docs/07 - Extensions.md +++ b/docs/07 - Extensions.md @@ -20,7 +20,6 @@ If you create an extension, you are welcome to host it in a GitHub repository an |Extension|Description| |---------|-----------| -|[openai](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/openai)| Creates an API that mimics the OpenAI API and can be used as a drop-in replacement. | |[superboogav2](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/superboogav2)| Enhanced RAG extension with support for PDF, DOCX, and PPTX files. | |[send_pictures](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/send_pictures/)| Creates an image upload field that can be used to send images to the bot in chat mode. Captions are automatically generated using BLIP. | |[coqui_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/coqui_tts)| Text-to-speech extension using Coqui XTTS v2. | diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md index 637ccced..276a7e19 100644 --- a/docs/12 - OpenAI API.md +++ b/docs/12 - OpenAI API.md @@ -19,7 +19,7 @@ Add `--api` to your command-line flags. ### Examples -For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file. +For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/modules/api/typing.py) file. The official examples in the [OpenAI documentation](https://platform.openai.com/docs/api-reference) should also work, and the same parameters apply (although the API here has more optional parameters). @@ -490,16 +490,6 @@ The following environment variables can be used (they take precedence over every | `OPENEDAI_EMBEDDING_MODEL` | Embedding model (if applicable) | sentence-transformers/all-mpnet-base-v2 | | `OPENEDAI_EMBEDDING_DEVICE` | Embedding device (if applicable) | cuda | -#### Persistent settings with `settings.yaml` - -You can also set the following variables in your `settings.yaml` file: - -``` -openai-embedding_device: cuda -openai-embedding_model: "sentence-transformers/all-mpnet-base-v2" -openai-debug: 1 -``` - ### Third-party application setup You can usually force an application that uses the OpenAI API to connect to the local API by using the following environment variables: diff --git a/modules/api/__init__.py b/modules/api/__init__.py new file mode 100644 index 00000000..e69de29b diff --git a/extensions/openai/cache_embedding_model.py b/modules/api/cache_embedding_model.py similarity index 100% rename from extensions/openai/cache_embedding_model.py rename to modules/api/cache_embedding_model.py diff --git a/extensions/openai/completions.py b/modules/api/completions.py similarity index 99% rename from extensions/openai/completions.py rename to modules/api/completions.py index d0cd9802..8948bb86 100644 --- a/extensions/openai/completions.py +++ b/modules/api/completions.py @@ -9,9 +9,9 @@ import tiktoken import yaml from pydantic import ValidationError -from extensions.openai.errors import InvalidRequestError -from extensions.openai.typing import ToolDefinition -from extensions.openai.utils import debug_msg +from .errors import InvalidRequestError +from .typing import ToolDefinition +from .utils import debug_msg from modules.tool_parsing import get_tool_call_id, parse_tool_call, detect_tool_call_format from modules import shared from modules.reasoning import extract_reasoning diff --git a/extensions/openai/embeddings.py b/modules/api/embeddings.py similarity index 90% rename from extensions/openai/embeddings.py rename to modules/api/embeddings.py index 1420879c..ad299c9d 100644 --- a/extensions/openai/embeddings.py +++ b/modules/api/embeddings.py @@ -3,8 +3,8 @@ import os import numpy as np from transformers import AutoModel -from extensions.openai.errors import ServiceUnavailableError -from extensions.openai.utils import debug_msg, float_list_to_base64 +from .errors import ServiceUnavailableError +from .utils import debug_msg, float_list_to_base64 from modules.logging_colors import logger embeddings_params_initialized = False @@ -17,14 +17,12 @@ def initialize_embedding_params(): ''' global embeddings_params_initialized if not embeddings_params_initialized: - from extensions.openai.script import params - global st_model, embeddings_model, embeddings_device - st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", params.get('embedding_model', 'all-mpnet-base-v2')) + st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", 'sentence-transformers/all-mpnet-base-v2') embeddings_model = None # OPENEDAI_EMBEDDING_DEVICE: auto (best or cpu), cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone - embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", params.get('embedding_device', 'cpu')) + embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", 'cpu') if embeddings_device.lower() == 'auto': embeddings_device = None diff --git a/extensions/openai/errors.py b/modules/api/errors.py similarity index 100% rename from extensions/openai/errors.py rename to modules/api/errors.py diff --git a/extensions/openai/images.py b/modules/api/images.py similarity index 96% rename from extensions/openai/images.py rename to modules/api/images.py index f7be3d22..95704535 100644 --- a/extensions/openai/images.py +++ b/modules/api/images.py @@ -6,7 +6,7 @@ import base64 import io import time -from extensions.openai.errors import ServiceUnavailableError +from .errors import ServiceUnavailableError from modules import shared diff --git a/extensions/openai/logits.py b/modules/api/logits.py similarity index 84% rename from extensions/openai/logits.py rename to modules/api/logits.py index 280612db..e0c7ea0e 100644 --- a/extensions/openai/logits.py +++ b/modules/api/logits.py @@ -1,4 +1,4 @@ -from extensions.openai.completions import process_parameters +from .completions import process_parameters from modules.logits import get_next_logits diff --git a/extensions/openai/models.py b/modules/api/models.py similarity index 100% rename from extensions/openai/models.py rename to modules/api/models.py diff --git a/extensions/openai/moderations.py b/modules/api/moderations.py similarity index 97% rename from extensions/openai/moderations.py rename to modules/api/moderations.py index 1ca6b8ab..ac0539d6 100644 --- a/extensions/openai/moderations.py +++ b/modules/api/moderations.py @@ -3,7 +3,7 @@ import time import numpy as np from numpy.linalg import norm -from extensions.openai.embeddings import get_embeddings +from .embeddings import get_embeddings moderations_disabled = False # return 0/false category_embeddings = None diff --git a/extensions/openai/script.py b/modules/api/script.py similarity index 96% rename from extensions/openai/script.py rename to modules/api/script.py index a0d5deb8..356919e9 100644 --- a/extensions/openai/script.py +++ b/modules/api/script.py @@ -13,16 +13,15 @@ from fastapi import Depends, FastAPI, Header, HTTPException from fastapi.middleware.cors import CORSMiddleware from fastapi.requests import Request from fastapi.responses import JSONResponse -from pydub import AudioSegment from sse_starlette import EventSourceResponse from starlette.concurrency import iterate_in_threadpool -import extensions.openai.completions as OAIcompletions -import extensions.openai.logits as OAIlogits -import extensions.openai.models as OAImodels -from extensions.openai.tokens import token_count, token_decode, token_encode -from extensions.openai.errors import OpenAIError -from extensions.openai.utils import _start_cloudflared +import modules.api.completions as OAIcompletions +import modules.api.logits as OAIlogits +import modules.api.models as OAImodels +from .tokens import token_count, token_decode, token_encode +from .errors import OpenAIError +from .utils import _start_cloudflared from modules import shared from modules.logging_colors import logger from modules.models import unload_model @@ -53,12 +52,6 @@ from .typing import ( to_dict ) -params = { - 'embedding_device': 'cpu', - 'embedding_model': 'sentence-transformers/all-mpnet-base-v2', - 'debug': 0 -} - async def _wait_for_disconnect(request: Request, stop_event: threading.Event): """Block until the client disconnects, then signal the stop_event.""" @@ -244,6 +237,7 @@ def handle_billing_usage(): @app.post('/v1/audio/transcriptions', dependencies=check_key) async def handle_audio_transcription(request: Request): import speech_recognition as sr + from pydub import AudioSegment r = sr.Recognizer() @@ -275,7 +269,7 @@ async def handle_audio_transcription(request: Request): @app.post('/v1/images/generations', response_model=ImageGenerationResponse, dependencies=check_key) async def handle_image_generation(request_data: ImageGenerationRequest): - import extensions.openai.images as OAIimages + import modules.api.images as OAIimages response = await asyncio.to_thread(OAIimages.generations, request_data) return JSONResponse(response) @@ -283,7 +277,7 @@ async def handle_image_generation(request_data: ImageGenerationRequest): @app.post("/v1/embeddings", response_model=EmbeddingsResponse, dependencies=check_key) async def handle_embeddings(request: Request, request_data: EmbeddingsRequest): - import extensions.openai.embeddings as OAIembeddings + import modules.api.embeddings as OAIembeddings input = request_data.input if not input: @@ -298,7 +292,7 @@ async def handle_embeddings(request: Request, request_data: EmbeddingsRequest): @app.post("/v1/moderations", dependencies=check_key) async def handle_moderations(request: Request): - import extensions.openai.moderations as OAImoderations + import modules.api.moderations as OAImoderations body = await request.json() input = body["input"] @@ -500,7 +494,15 @@ def run_server(): uvicorn.run(app, host=server_addrs, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, access_log=False) +_server_started = False + + def setup(): + global _server_started + if _server_started: + return + + _server_started = True if shared.args.nowebui: run_server() else: diff --git a/extensions/openai/tokens.py b/modules/api/tokens.py similarity index 100% rename from extensions/openai/tokens.py rename to modules/api/tokens.py diff --git a/extensions/openai/typing.py b/modules/api/typing.py similarity index 100% rename from extensions/openai/typing.py rename to modules/api/typing.py diff --git a/extensions/openai/utils.py b/modules/api/utils.py similarity index 93% rename from extensions/openai/utils.py rename to modules/api/utils.py index 2b414769..fae181ff 100644 --- a/extensions/openai/utils.py +++ b/modules/api/utils.py @@ -23,8 +23,7 @@ def float_list_to_base64(float_array: np.ndarray) -> str: def debug_msg(*args, **kwargs): - from extensions.openai.script import params - if os.environ.get("OPENEDAI_DEBUG", params.get('debug', 0)): + if os.environ.get("OPENEDAI_DEBUG", 0): print(*args, **kwargs) diff --git a/modules/extensions.py b/modules/extensions.py index 4bb7b683..09db9f40 100644 --- a/modules/extensions.py +++ b/modules/extensions.py @@ -32,8 +32,7 @@ def load_extensions(): if name not in available_extensions: continue - if name != 'api': - logger.info(f'Loading the extension "{name}"') + logger.info(f'Loading the extension "{name}"') try: # Prefer user extension, fall back to system extension diff --git a/modules/shared.py b/modules/shared.py index 37bc5876..69e16960 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -156,7 +156,7 @@ group.add_argument('--portable', action='store_true', help='Hide features not av # API group = parser.add_argument_group('API') -group.add_argument('--api', action='store_true', help='Enable the API extension.') +group.add_argument('--api', action='store_true', help='Enable the API server.') group.add_argument('--public-api', action='store_true', help='Create a public URL for the API using Cloudflare.') group.add_argument('--public-api-id', type=str, help='Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.', default=None) group.add_argument('--api-port', type=int, default=5000, help='The listening port for the API.') @@ -435,16 +435,6 @@ def fix_loader_name(name): return 'TensorRT-LLM' -def add_extension(name, last=False): - if args.extensions is None: - args.extensions = [name] - elif last: - args.extensions = [x for x in args.extensions if x != name] - args.extensions.append(name) - elif name not in args.extensions: - args.extensions.append(name) - - def is_chat(): return True @@ -464,10 +454,6 @@ def load_user_config(): args.loader = fix_loader_name(args.loader) -# Activate the API extension -if args.api or args.public_api: - add_extension('openai', last=True) - # Load model-specific settings p = Path(f'{args.model_dir}/config.yaml') if p.exists(): diff --git a/modules/ui_session.py b/modules/ui_session.py index 19026fbb..3f2c8a7b 100644 --- a/modules/ui_session.py +++ b/modules/ui_session.py @@ -95,8 +95,6 @@ def set_interface_arguments(extensions, bool_active): setattr(shared.args, k, False) for k in bool_active: setattr(shared.args, k, True) - if k == 'api': - shared.add_extension('openai', last=True) shared.need_restart = True diff --git a/server.py b/server.py index 1aa9fc04..cbdd2854 100644 --- a/server.py +++ b/server.py @@ -106,6 +106,11 @@ def create_interface(): if shared.args.extensions is not None and len(shared.args.extensions) > 0: extensions_module.load_extensions() + # Start the API server if enabled + if shared.args.api or shared.args.public_api: + from modules.api.script import setup as api_setup + api_setup() + # Force some events to be triggered on page load shared.persistent_interface_state.update({ 'mode': shared.settings['mode'], @@ -273,6 +278,12 @@ if __name__ == "__main__": # Activate the extensions listed on settings.yaml extensions_module.available_extensions = utils.get_available_extensions() for extension in shared.settings['default_extensions']: + # The openai extension was moved to modules/api and is now + # activated with --api. Treat it as an alias for backwards compat. + if extension == 'openai': + shared.args.api = True + continue + shared.args.extensions = shared.args.extensions or [] if extension not in shared.args.extensions: shared.args.extensions.append(extension) @@ -337,6 +348,10 @@ if __name__ == "__main__": shared.args.extensions = [x for x in (shared.args.extensions or []) if x != 'gallery'] if shared.args.extensions: extensions_module.load_extensions() + + if shared.args.api or shared.args.public_api: + from modules.api.script import setup as api_setup + api_setup() else: # Launch the web UI create_interface()