API: Move OpenAI-compatible API from extensions/openai to modules/api

2026-04-20 22:13:43 +00:00 · 2026-03-20 14:46:00 -03:00 · 2026-03-20 14:46:00 -03:00 · bf6fbc019d
commit bf6fbc019d
parent 2e4232e02b
23 changed files with 51 additions and 65 deletions
--- a/.github/workflows/build-portable-release-cuda.yml
+++ b/.github/workflows/build-portable-release-cuda.yml
@ -106,7 +106,7 @@ jobs:
            cd "text-generation-webui-${VERSION_CLEAN}"

            # Remove extensions that need additional requirements
-            allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
+            allowed=("character_bias" "gallery" "sd_api_pictures")
            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf

            # Define common variables
--- a/.github/workflows/build-portable-release-rocm.yml
+++ b/.github/workflows/build-portable-release-rocm.yml
@ -105,7 +105,7 @@ jobs:
            cd "text-generation-webui-${VERSION_CLEAN}"

            # Remove extensions that need additional requirements
-            allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
+            allowed=("character_bias" "gallery" "sd_api_pictures")
            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf

            # Define common variables
--- a/.github/workflows/build-portable-release-vulkan.yml
+++ b/.github/workflows/build-portable-release-vulkan.yml
@ -105,7 +105,7 @@ jobs:
            cd "text-generation-webui-${VERSION_CLEAN}"

            # Remove extensions that need additional requirements
-            allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
+            allowed=("character_bias" "gallery" "sd_api_pictures")
            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf

            # Define common variables
--- a/.github/workflows/build-portable-release.yml
+++ b/.github/workflows/build-portable-release.yml
@ -105,7 +105,7 @@ jobs:
            cd "text-generation-webui-${VERSION_CLEAN}"

            # Remove extensions that need additional requirements
-            allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
+            allowed=("character_bias" "gallery" "sd_api_pictures")
            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf

            # Define common variables
--- a/Extensions.md
+++ b/Extensions.md
@ -20,7 +20,6 @@ If you create an extension, you are welcome to host it in a GitHub repository an

 |Extension|Description|
 |---------|-----------|
-|[openai](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/openai)| Creates an API that mimics the OpenAI API and can be used as a drop-in replacement. |
 |[superboogav2](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/superboogav2)| Enhanced RAG extension with support for PDF, DOCX, and PPTX files. |
 |[send_pictures](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/send_pictures/)| Creates an image upload field that can be used to send images to the bot in chat mode. Captions are automatically generated using BLIP. |
 |[coqui_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/coqui_tts)| Text-to-speech extension using Coqui XTTS v2. |
--- a/docs/12
+++ b/docs/12
@ -19,7 +19,7 @@ Add `--api` to your command-line flags.

 ### Examples

-For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file.
+For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/modules/api/typing.py) file.

 The official examples in the [OpenAI documentation](https://platform.openai.com/docs/api-reference) should also work, and the same parameters apply (although the API here has more optional parameters).

@ -490,16 +490,6 @@ The following environment variables can be used (they take precedence over every
 | `OPENEDAI_EMBEDDING_MODEL` | Embedding model (if applicable) |          sentence-transformers/all-mpnet-base-v2                  |
 | `OPENEDAI_EMBEDDING_DEVICE` | Embedding device (if applicable) |           cuda                 |

-#### Persistent settings with `settings.yaml`
-
-You can also set the following variables in your `settings.yaml` file:
-
-```
-openai-embedding_device: cuda
-openai-embedding_model: "sentence-transformers/all-mpnet-base-v2"
-openai-debug: 1
-```
-
 ### Third-party application setup

 You can usually force an application that uses the OpenAI API to connect to the local API by using the following environment variables:
--- a/modules/api/init.py
+++ b/modules/api/init.py
--- a/extensions/openai/cache_embedding_model.py
+++ b/extensions/openai/cache_embedding_model.py
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@ -9,9 +9,9 @@ import tiktoken
 import yaml
 from pydantic import ValidationError

-from extensions.openai.errors import InvalidRequestError
-from extensions.openai.typing import ToolDefinition
-from extensions.openai.utils import debug_msg
+from .errors import InvalidRequestError
+from .typing import ToolDefinition
+from .utils import debug_msg
 from modules.tool_parsing import get_tool_call_id, parse_tool_call, detect_tool_call_format
 from modules import shared
 from modules.reasoning import extract_reasoning
--- a/extensions/openai/embeddings.py
+++ b/extensions/openai/embeddings.py
@ -3,8 +3,8 @@ import os
 import numpy as np
 from transformers import AutoModel

-from extensions.openai.errors import ServiceUnavailableError
-from extensions.openai.utils import debug_msg, float_list_to_base64
+from .errors import ServiceUnavailableError
+from .utils import debug_msg, float_list_to_base64
 from modules.logging_colors import logger

 embeddings_params_initialized = False
@ -17,14 +17,12 @@ def initialize_embedding_params():
    '''
    global embeddings_params_initialized
    if not embeddings_params_initialized:
-        from extensions.openai.script import params
-
        global st_model, embeddings_model, embeddings_device

-        st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", params.get('embedding_model', 'all-mpnet-base-v2'))
+        st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", 'sentence-transformers/all-mpnet-base-v2')
        embeddings_model = None
        # OPENEDAI_EMBEDDING_DEVICE: auto (best or cpu), cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone
-        embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", params.get('embedding_device', 'cpu'))
+        embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", 'cpu')
        if embeddings_device.lower() == 'auto':
            embeddings_device = None

--- a/extensions/openai/errors.py
+++ b/extensions/openai/errors.py
--- a/extensions/openai/images.py
+++ b/extensions/openai/images.py
@ -6,7 +6,7 @@ import base64
 import io
 import time

-from extensions.openai.errors import ServiceUnavailableError
+from .errors import ServiceUnavailableError
 from modules import shared


--- a/extensions/openai/logits.py
+++ b/extensions/openai/logits.py
@ -1,4 +1,4 @@
-from extensions.openai.completions import process_parameters
+from .completions import process_parameters
 from modules.logits import get_next_logits


--- a/extensions/openai/models.py
+++ b/extensions/openai/models.py
--- a/extensions/openai/moderations.py
+++ b/extensions/openai/moderations.py
@ -3,7 +3,7 @@ import time
 import numpy as np
 from numpy.linalg import norm

-from extensions.openai.embeddings import get_embeddings
+from .embeddings import get_embeddings

 moderations_disabled = False  # return 0/false
 category_embeddings = None
--- a/extensions/openai/script.py
+++ b/extensions/openai/script.py
@ -13,16 +13,15 @@ from fastapi import Depends, FastAPI, Header, HTTPException
 from fastapi.middleware.cors import CORSMiddleware
 from fastapi.requests import Request
 from fastapi.responses import JSONResponse
-from pydub import AudioSegment
 from sse_starlette import EventSourceResponse
 from starlette.concurrency import iterate_in_threadpool

-import extensions.openai.completions as OAIcompletions
-import extensions.openai.logits as OAIlogits
-import extensions.openai.models as OAImodels
-from extensions.openai.tokens import token_count, token_decode, token_encode
-from extensions.openai.errors import OpenAIError
-from extensions.openai.utils import _start_cloudflared
+import modules.api.completions as OAIcompletions
+import modules.api.logits as OAIlogits
+import modules.api.models as OAImodels
+from .tokens import token_count, token_decode, token_encode
+from .errors import OpenAIError
+from .utils import _start_cloudflared
 from modules import shared
 from modules.logging_colors import logger
 from modules.models import unload_model
@ -53,12 +52,6 @@ from .typing import (
    to_dict
 )

-params = {
-    'embedding_device': 'cpu',
-    'embedding_model': 'sentence-transformers/all-mpnet-base-v2',
-    'debug': 0
-}
-

 async def _wait_for_disconnect(request: Request, stop_event: threading.Event):
    """Block until the client disconnects, then signal the stop_event."""
@ -244,6 +237,7 @@ def handle_billing_usage():
@app.post('/v1/audio/transcriptions', dependencies=check_key)
 async def handle_audio_transcription(request: Request):
    import speech_recognition as sr
+    from pydub import AudioSegment

    r = sr.Recognizer()

@ -275,7 +269,7 @@ async def handle_audio_transcription(request: Request):

@app.post('/v1/images/generations', response_model=ImageGenerationResponse, dependencies=check_key)
 async def handle_image_generation(request_data: ImageGenerationRequest):
-    import extensions.openai.images as OAIimages
+    import modules.api.images as OAIimages

    response = await asyncio.to_thread(OAIimages.generations, request_data)
    return JSONResponse(response)
@ -283,7 +277,7 @@ async def handle_image_generation(request_data: ImageGenerationRequest):

@app.post("/v1/embeddings", response_model=EmbeddingsResponse, dependencies=check_key)
 async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
-    import extensions.openai.embeddings as OAIembeddings
+    import modules.api.embeddings as OAIembeddings

    input = request_data.input
    if not input:
@ -298,7 +292,7 @@ async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):

@app.post("/v1/moderations", dependencies=check_key)
 async def handle_moderations(request: Request):
-    import extensions.openai.moderations as OAImoderations
+    import modules.api.moderations as OAImoderations

    body = await request.json()
    input = body["input"]
@ -500,7 +494,15 @@ def run_server():
    uvicorn.run(app, host=server_addrs, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, access_log=False)


+_server_started = False
+
+
 def setup():
+    global _server_started
+    if _server_started:
+        return
+
+    _server_started = True
    if shared.args.nowebui:
        run_server()
    else:
--- a/extensions/openai/tokens.py
+++ b/extensions/openai/tokens.py
--- a/extensions/openai/typing.py
+++ b/extensions/openai/typing.py
--- a/extensions/openai/utils.py
+++ b/extensions/openai/utils.py
@ -23,8 +23,7 @@ def float_list_to_base64(float_array: np.ndarray) -> str:


 def debug_msg(*args, **kwargs):
-    from extensions.openai.script import params
-    if os.environ.get("OPENEDAI_DEBUG", params.get('debug', 0)):
+    if os.environ.get("OPENEDAI_DEBUG", 0):
        print(*args, **kwargs)


--- a/modules/extensions.py
+++ b/modules/extensions.py
@ -32,8 +32,7 @@ def load_extensions():
        if name not in available_extensions:
            continue

-        if name != 'api':
-            logger.info(f'Loading the extension "{name}"')
+        logger.info(f'Loading the extension "{name}"')

        try:
            # Prefer user extension, fall back to system extension
--- a/modules/shared.py
+++ b/modules/shared.py
@ -156,7 +156,7 @@ group.add_argument('--portable', action='store_true', help='Hide features not av

 # API
 group = parser.add_argument_group('API')
-group.add_argument('--api', action='store_true', help='Enable the API extension.')
+group.add_argument('--api', action='store_true', help='Enable the API server.')
 group.add_argument('--public-api', action='store_true', help='Create a public URL for the API using Cloudflare.')
 group.add_argument('--public-api-id', type=str, help='Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.', default=None)
 group.add_argument('--api-port', type=int, default=5000, help='The listening port for the API.')
@ -435,16 +435,6 @@ def fix_loader_name(name):
        return 'TensorRT-LLM'


-def add_extension(name, last=False):
-    if args.extensions is None:
-        args.extensions = [name]
-    elif last:
-        args.extensions = [x for x in args.extensions if x != name]
-        args.extensions.append(name)
-    elif name not in args.extensions:
-        args.extensions.append(name)
-
-
 def is_chat():
    return True

@ -464,10 +454,6 @@ def load_user_config():

 args.loader = fix_loader_name(args.loader)

-# Activate the API extension
-if args.api or args.public_api:
-    add_extension('openai', last=True)
-
 # Load model-specific settings
 p = Path(f'{args.model_dir}/config.yaml')
 if p.exists():
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@ -95,8 +95,6 @@ def set_interface_arguments(extensions, bool_active):
        setattr(shared.args, k, False)
    for k in bool_active:
        setattr(shared.args, k, True)
-        if k == 'api':
-            shared.add_extension('openai', last=True)

    shared.need_restart = True

--- a/server.py
+++ b/server.py
@ -106,6 +106,11 @@ def create_interface():
    if shared.args.extensions is not None and len(shared.args.extensions) > 0:
        extensions_module.load_extensions()

+    # Start the API server if enabled
+    if shared.args.api or shared.args.public_api:
+        from modules.api.script import setup as api_setup
+        api_setup()
+
    # Force some events to be triggered on page load
    shared.persistent_interface_state.update({
        'mode': shared.settings['mode'],
@ -273,6 +278,12 @@ if __name__ == "__main__":
    # Activate the extensions listed on settings.yaml
    extensions_module.available_extensions = utils.get_available_extensions()
    for extension in shared.settings['default_extensions']:
+        # The openai extension was moved to modules/api and is now
+        # activated with --api. Treat it as an alias for backwards compat.
+        if extension == 'openai':
+            shared.args.api = True
+            continue
+
        shared.args.extensions = shared.args.extensions or []
        if extension not in shared.args.extensions:
            shared.args.extensions.append(extension)
@ -337,6 +348,10 @@ if __name__ == "__main__":
        shared.args.extensions = [x for x in (shared.args.extensions or []) if x != 'gallery']
        if shared.args.extensions:
            extensions_module.load_extensions()
+
+        if shared.args.api or shared.args.public_api:
+            from modules.api.script import setup as api_setup
+            api_setup()
    else:
        # Launch the web UI
        create_interface()