Merge pull request #7441 from oobabooga/dev

Merge dev branch
This commit is contained in:
oobabooga 2026-03-24 16:39:03 -03:00 committed by GitHub
commit dd9d254c49
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
117 changed files with 1027 additions and 1776 deletions

View file

@ -106,7 +106,7 @@ jobs:
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
allowed=("character_bias" "gallery" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables
@ -116,13 +116,13 @@ jobs:
# 1. Set platform-specific variables
if [[ "$RUNNER_OS" == "Windows" ]]; then
PLATFORM="windows"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz"
PIP_PATH="portable_env/python.exe -m pip"
PACKAGES_PATH="portable_env/Lib/site-packages"
rm start_linux.sh start_macos.sh
else
PLATFORM="linux"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz"
PIP_PATH="portable_env/bin/python -m pip"
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
rm start_macos.sh start_windows.bat

View file

@ -105,7 +105,7 @@ jobs:
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
allowed=("character_bias" "gallery" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables
@ -114,13 +114,13 @@ jobs:
# 1. Set platform-specific variables
if [[ "$RUNNER_OS" == "Windows" ]]; then
PLATFORM="windows"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz"
PIP_PATH="portable_env/python.exe -m pip"
PACKAGES_PATH="portable_env/Lib/site-packages"
rm start_linux.sh start_macos.sh
else
PLATFORM="linux"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz"
PIP_PATH="portable_env/bin/python -m pip"
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
rm start_macos.sh start_windows.bat

View file

@ -105,7 +105,7 @@ jobs:
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
allowed=("character_bias" "gallery" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables
@ -114,13 +114,13 @@ jobs:
# 1. Set platform-specific variables
if [[ "$RUNNER_OS" == "Windows" ]]; then
PLATFORM="windows"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz"
PIP_PATH="portable_env/python.exe -m pip"
PACKAGES_PATH="portable_env/Lib/site-packages"
rm start_linux.sh start_macos.sh
else
PLATFORM="linux"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz"
PIP_PATH="portable_env/bin/python -m pip"
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
rm start_macos.sh start_windows.bat

View file

@ -105,7 +105,7 @@ jobs:
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
allowed=("character_bias" "gallery" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables
@ -115,18 +115,18 @@ jobs:
# 1. Set platform-specific variables
if [[ "$RUNNER_OS" == "Windows" ]]; then
PLATFORM="windows-cpu"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz"
PIP_PATH="portable_env/python.exe -m pip"
PACKAGES_PATH="portable_env/Lib/site-packages"
rm start_linux.sh start_macos.sh
elif [[ "$RUNNER_OS" == "macOS" ]]; then
if [[ "$OS_TYPE" == "macos-15-intel" ]]; then
PLATFORM="macos-x86_64"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-apple-darwin-install_only.tar.gz"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-apple-darwin-install_only_stripped.tar.gz"
REQ_TYPE="apple_intel"
else
PLATFORM="macos-arm64"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-aarch64-apple-darwin-install_only.tar.gz"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-aarch64-apple-darwin-install_only_stripped.tar.gz"
REQ_TYPE="apple_silicon"
fi
PIP_PATH="portable_env/bin/python -m pip"
@ -135,7 +135,7 @@ jobs:
else
# Linux case
PLATFORM="linux-cpu"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz"
PIP_PATH="portable_env/bin/python -m pip"
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
rm start_macos.sh start_windows.bat

View file

@ -23,21 +23,20 @@ A Gradio web UI for running Large Language Models locally. 100% private and offl
## Features
- **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set.
- **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting.
- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
- **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
- **OpenAI/Anthropic-compatible API**: Chat, Completions, and Messages endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI/Anthropic APIs ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)).
- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file, easy to create and extend ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).
- **OpenAI-compatible API**: Chat and Completions endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI API ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)).
- **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
- **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)).
- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)).
- **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set.
- 100% offline and private, with zero telemetry, external resources, or remote update requests.
- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters. Prompts are automatically formatted with Jinja2 templates.
- Edit messages, navigate between message versions, and branch conversations at any point.
- Free-form text generation in the Notebook tab without being limited to chat turns.
- Multiple sampling parameters and generation options for sophisticated text generation control.
- Aesthetic UI with dark and light themes.
- Syntax highlighting for code blocks and LaTeX rendering for mathematical expressions.
- Dark/light themes, syntax highlighting for code blocks, and LaTeX rendering for mathematical expressions.
- Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.
## How to install
@ -313,7 +312,7 @@ llama.cpp:
--row-split Split the model by rows across GPUs. This may improve multi-gpu performance.
--no-mmap Prevent mmap from being used.
--mlock Force the system to keep the model in RAM.
--no-kv-offload Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.
--no-kv-offload Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.
--batch-size BATCH_SIZE Maximum number of prompt tokens to batch together when calling llama-server. This is the application level batch size.
--ubatch-size UBATCH_SIZE Maximum number of prompt tokens to batch together when calling llama-server. This is the max physical batch size for computation (device level).
--threads THREADS Number of threads to use.
@ -429,7 +428,7 @@ API generation defaults:
That's it. The UI will detect it automatically.
To check what will fit your GPU, you can use the [VRAM Calculator](https://huggingface.co/spaces/oobabooga/accurate-gguf-vram-calculator).
To estimate how much memory a model will use, you can use the [GGUF Memory Calculator](https://huggingface.co/spaces/oobabooga/accurate-gguf-vram-calculator).
<details>
<summary>Other model types (Transformers, EXL3)</summary>

View file

@ -2,8 +2,8 @@
--darker-gray: #1C1C1D;
--dark-gray: #212125;
--light-gray: #2C2E34;
--light-theme-gray: #f9fbff;
--border-color-dark: #525252;
--light-theme-gray: #f0f3fb;
--border-color-dark: rgba(255, 255, 255, 0.15);
--header-width: 112px;
--selected-item-color-dark: #282930;
}
@ -54,7 +54,7 @@ div.svelte-iyf88w {
height: 39.594px;
align-self: end;
line-height: 1em;
border-radius: 0.375rem;
border-radius: 0.75rem;
flex: none;
}
@ -127,7 +127,7 @@ gradio-app > :first-child {
}
.header_bar {
border-right: var(--input-border-width) solid var(--input-border-color);
border-right: none;
margin-bottom: 0;
overflow-x: scroll;
text-wrap: nowrap;
@ -150,7 +150,7 @@ gradio-app > :first-child {
.dark .header_bar {
border: none !important;
box-shadow: 0 3px 4px rgba(20 20 20 / 60%);
box-shadow: none;
background-color: #8080802b;
}
@ -268,17 +268,17 @@ button {
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
background: rgb(255 255 255 / 6.25%);
border-radius: 10px;
border-radius: 30px;
}
.pretty_scrollbar::-webkit-resizer,
#image-history-gallery > :nth-child(2)::-webkit-resizer {
background: #c5c5d2;
background: #d2d2d8;
}
.dark .pretty_scrollbar::-webkit-resizer,
.dark #image-history-gallery > :nth-child(2)::-webkit-resizer {
background: #ccc;
background: rgb(255 255 255 / 10%);
border-radius: 10px;
}
@ -582,10 +582,28 @@ audio {
#chat-input textarea {
background: #f3f4f6;
padding: 0.65rem 2.5rem;
border: 0;
box-shadow: 0;
border-radius: 8px;
padding: 0.675rem 2.5rem 0.6rem;
margin-top: 0.15rem;
border: 1px solid #d2d2d8;
border-radius: 1.5rem;
overflow-y: auto !important;
}
#chat-input textarea::-webkit-scrollbar {
width: 8px;
}
#chat-input textarea::-webkit-scrollbar-track {
background: transparent;
}
#chat-input textarea::-webkit-scrollbar-thumb {
background: var(--neutral-300);
border-radius: 30px;
}
.dark #chat-input textarea::-webkit-scrollbar-thumb {
background: rgb(255 255 255 / 6.25%);
}
#chat-input textarea::placeholder {
@ -725,10 +743,12 @@ audio {
position: absolute;
bottom: 100%;
left: 0;
box-shadow: 0 0 5px rgb(0 0 0 / 25%);
box-shadow: 0 2px 12px rgb(0 0 0 / 15%);
border-radius: 0.5rem;
z-index: 10000;
min-width: 330px;
flex-direction: column;
overflow: hidden;
}
.hover-menu button {
@ -739,6 +759,7 @@ audio {
margin: 0 !important;
height: 36px;
border-color: transparent !important;
transition: background-color 0.15s ease;
}
.hover-menu button:not(#clear-history-confirm) {
@ -914,7 +935,7 @@ audio {
.options {
z-index: 100 !important;
border: 1px solid var(--input-border-color);
border-radius: 0;
border-radius: 0.5rem;
}
/* ----------------------------------------------
@ -1008,9 +1029,13 @@ audio {
cursor: pointer;
}
#past-chats label {
transition: background-color 0.15s ease;
}
#past-chats .selected,
#past-chats label:hover {
background-color: #dbeafe !important;
background-color: #c8d8f5 !important;
}
#past-chats-buttons,
@ -1166,7 +1191,7 @@ audio {
Dark theme
---------------------------------------------- */
.dark .header_bar {
background-color: var(--darker-gray) !important;
background-color: #1a1a1a !important;
}
.dark .header_bar button.selected {
@ -1176,7 +1201,7 @@ audio {
.dark #chat-input textarea {
background: var(--light-gray);
color: white !important;
border-color: #292c3b;
border-color: rgba(255, 255, 255, 0.06);
}
.dark #chat-input textarea::placeholder {
@ -1192,6 +1217,7 @@ audio {
.dark #past-chats-row {
background-color: var(--darker-gray);
border: 0 !important;
box-shadow: none;
}
.dark gradio-app .gradio-container.gradio-container-4-37-2 .contain #past-chats .selected,
@ -1228,11 +1254,11 @@ audio {
Light theme
---------------------------------------------- */
.header_bar {
background-color: var(--light-theme-gray) !important;
background-color: #e4e8f0 !important;
}
.header_bar button.selected {
background: #dbeafe;
background: #c8d8f5;
}
#chat-controls,
@ -1241,11 +1267,11 @@ audio {
}
.dark #chat-controls {
border-left: 1px solid #d9d9d0;
border-left: 1px solid rgba(255, 255, 255, 0.06);
}
.dark #past-chats-row {
border-right: 1px solid #d9d9d0;
border-right: 1px solid rgba(255, 255, 255, 0.06);
}
#past-chats-toggle,
@ -1364,6 +1390,7 @@ audio {
.tgw-accordion {
padding: 10px 12px !important;
border: 1px solid #d2d2d8;
}
.dark .tgw-accordion {
@ -1531,7 +1558,7 @@ strong {
min-height: 200px;
max-height: 65vh;
padding: 10px;
border-radius: 5px;
border-radius: 0.5rem;
border: 1px solid #ccc;
background-color: var(--light-theme-gray);
font-family: inherit;
@ -1559,7 +1586,7 @@ strong {
.edit-control-button {
padding: 6px 12px;
border: 1px solid #ccc;
border-radius: 4px;
border-radius: 0.75rem;
cursor: pointer;
background-color: #f8f9fa;
color: #212529;
@ -1742,7 +1769,7 @@ button:focus {
}
.dark .sidebar-vertical-separator {
border-bottom: 1px solid rgb(255 255 255 / 10%);
border-bottom: 1px solid rgba(255, 255, 255, 0.06);
}
button#swap-height-width {
@ -1932,7 +1959,7 @@ thead + tbody tr:first-child th { border-top: 1px solid; }
.dark #tools-group .wrap::-webkit-scrollbar-thumb,
.dark #tools-group .wrap::-webkit-scrollbar-thumb:hover {
background: rgb(255 255 255 / 6.25%);
border-radius: 10px;
border-radius: 30px;
}
#tools-group .wrap::-webkit-scrollbar-corner {

View file

@ -100,6 +100,8 @@ Each parameter has a description in the UI. Below is guidance on the most import
VRAM usage during training is roughly similar to inference with ~1000 tokens of context. If you can run the model, you can probably train LoRAs with the default settings. If you run out of VRAM, reduce `Micro Batch Size` or `Cutoff Length`. Training 4-bit quantized models uses more VRAM — set `Micro Batch Size` to `1` to compensate.
**Gradient checkpointing** is enabled by default. It reduces VRAM usage by recomputing activations during the backward pass instead of storing them in memory. The tradeoff is ~20-30% slower training. There is no impact on accuracy — the results are mathematically identical. The savings are most noticeable with longer sequences and larger batch sizes. You can disable it if you have VRAM to spare and want faster training.
### Rank
Higher rank = more learning capacity = larger adapter = more VRAM. Use 48 for style/format, 128256 to teach factual knowledge.

View file

@ -20,7 +20,6 @@ If you create an extension, you are welcome to host it in a GitHub repository an
|Extension|Description|
|---------|-----------|
|[openai](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/openai)| Creates an API that mimics the OpenAI API and can be used as a drop-in replacement. |
|[superboogav2](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/superboogav2)| Enhanced RAG extension with support for PDF, DOCX, and PPTX files. |
|[send_pictures](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/send_pictures/)| Creates an image upload field that can be used to send images to the bot in chat mode. Captions are automatically generated using BLIP. |
|[coqui_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/coqui_tts)| Text-to-speech extension using Coqui XTTS v2. |

View file

@ -1,6 +1,6 @@
## OpenAI compatible API
## OpenAI/Anthropic-compatible API
The main API for this project is meant to be a drop-in replacement to the OpenAI API, including Chat and Completions endpoints.
The main API for this project is meant to be a drop-in replacement for the OpenAI and Anthropic APIs, including Chat, Completions, and Messages endpoints.
* It is 100% offline and private.
* It doesn't create any logs.
@ -19,7 +19,7 @@ Add `--api` to your command-line flags.
### Examples
For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/openai/typing.py) file.
For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/modules/api/typing.py) file.
The official examples in the [OpenAI documentation](https://platform.openai.com/docs/api-reference) should also work, and the same parameters apply (although the API here has more optional parameters).
@ -490,16 +490,6 @@ The following environment variables can be used (they take precedence over every
| `OPENEDAI_EMBEDDING_MODEL` | Embedding model (if applicable) | sentence-transformers/all-mpnet-base-v2 |
| `OPENEDAI_EMBEDDING_DEVICE` | Embedding device (if applicable) | cuda |
#### Persistent settings with `settings.yaml`
You can also set the following variables in your `settings.yaml` file:
```
openai-embedding_device: cuda
openai-embedding_model: "sentence-transformers/all-mpnet-base-v2"
openai-debug: 1
```
### Third-party application setup
You can usually force an application that uses the OpenAI API to connect to the local API by using the following environment variables:

View file

@ -1,18 +1,3 @@
## Supported models
The following models are supported:
- Qwen 3.5
- GPT-OSS
- Mistral Small / Devstral
- DeepSeek V3
- Kimi-K2
- MiniMax-M2.5
- GLM-5
- Llama 4
Other models that output tool calls as JSON (inside XML tags, code blocks, or plain JSON) are also supported through a generic fallback parser.
## Tool calling in the UI
### 1. Load a model with tool-calling support
@ -23,11 +8,11 @@ Load a model with tool-calling support from the Model tab.
In the chat sidebar, check the tools you want the model to use:
- **web_search** -- Search the web using DuckDuckGo.
- **fetch_webpage** -- Fetch the content of a URL.
- **calculate** -- Evaluate math expressions.
- **get_datetime** -- Get the current date and time.
- **roll_dice** -- Roll dice.
- `web_search`: Search the web using DuckDuckGo.
- `fetch_webpage`: Fetch the content of a URL.
- `calculate`: Evaluate math expressions.
- `get_datetime`: Get the current date and time.
- `roll_dice`: Roll dice.
### 3. Chat
@ -157,3 +142,18 @@ for _ in range(10):
print(f"\nAssistant: {choice['message']['content']}")
break
```
## Supported models
The following models are supported:
- Qwen 3.5
- GPT-OSS
- Mistral Small / Devstral
- DeepSeek V3
- Kimi-K2
- MiniMax-M2.5
- GLM-5
- Llama 4
Other models that output tool calls as JSON (inside XML tags, code blocks, or plain JSON) are also supported through a generic fallback parser.

View file

@ -107,7 +107,7 @@ class Handler(BaseHTTPRequestHandler):
elif path in ['/api/v1/delete', '/api/delete']:
metadata = body.get('metadata')
if corpus is None:
if metadata is None:
self._send_412_error("Missing parameter 'metadata'")
return

View file

@ -7,8 +7,8 @@ Allows you to enter your inputs in chat mode using your microphone.
To adjust your default settings, you can add the following to your settings.yaml file.
```
whisper_stt-whipser_language: chinese
whisper_stt-whipser_model: tiny
whisper_stt-whisper_language: chinese
whisper_stt-whisper_model: tiny
whisper_stt-auto_submit: False
```

View file

@ -18,13 +18,13 @@ input_hijack = {
# parameters which can be customized in settings.yaml of webui
params = {
'whipser_language': 'english',
'whipser_model': 'small.en',
'whisper_language': 'english',
'whisper_model': 'small.en',
'auto_submit': True
}
startup_device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
WHISPERMODEL = whisper.load_model(params['whipser_model'], device=startup_device)
WHISPERMODEL = whisper.load_model(params['whisper_model'], device=startup_device)
def chat_input_modifier(text, visible_text, state):
@ -36,7 +36,7 @@ def chat_input_modifier(text, visible_text, state):
return text, visible_text
def do_stt(audio, whipser_language):
def do_stt(audio, whisper_language):
# use pydub to convert sample_rate and sample_width for whisper input
dubaudio = AudioSegment.from_file(io.BytesIO(audio))
dubaudio = dubaudio.set_channels(1)
@ -46,20 +46,20 @@ def do_stt(audio, whipser_language):
# same method to get the array as openai whisper repo used from wav file
audio_np = np.frombuffer(dubaudio.raw_data, np.int16).flatten().astype(np.float32) / 32768.0
if len(whipser_language) == 0:
if len(whisper_language) == 0:
result = WHISPERMODEL.transcribe(audio=audio_np)
else:
result = WHISPERMODEL.transcribe(audio=audio_np, language=whipser_language)
result = WHISPERMODEL.transcribe(audio=audio_np, language=whisper_language)
return result["text"]
def auto_transcribe(audio, auto_submit, whipser_language):
def auto_transcribe(audio, auto_submit, whisper_language):
if audio is None or audio == "":
print("Whisper received no audio data")
return "", ""
audio_bytes = base64.b64decode(audio.split(',')[1])
transcription = do_stt(audio_bytes, whipser_language)
transcription = do_stt(audio_bytes, whisper_language)
if auto_submit:
input_hijack.update({"state": True, "value": [transcription, transcription]})
return transcription
@ -78,7 +78,7 @@ def reload_whispermodel(whisper_model_name: str, whisper_language: str, device:
device = torch.device('cuda' if torch.cuda.is_available() else 'cpu')
WHISPERMODEL = whisper.load_model(whisper_model_name, device=device)
params.update({"whipser_model": whisper_model_name})
params.update({"whisper_model": whisper_model_name})
if ".en" in whisper_model_name:
whisper_language = "english"
audio_update = gr.Audio.update(interactive=True)
@ -96,8 +96,8 @@ def ui():
with gr.Accordion("Settings", open=False):
auto_submit = gr.Checkbox(label='Submit the transcribed audio automatically', value=params['auto_submit'])
device_dropd = gr.Dropdown(label='Device', value=str(startup_device), choices=["cuda", "cpu", "none"])
whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whipser_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large", "turbo"])
whisper_language = gr.Dropdown(label='Whisper Language', value=params['whipser_language'], choices=["english", "chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"])
whisper_model_dropd = gr.Dropdown(label='Whisper Model', value=params['whisper_model'], choices=["tiny.en", "base.en", "small.en", "medium.en", "tiny", "base", "small", "medium", "large", "turbo"])
whisper_language = gr.Dropdown(label='Whisper Language', value=params['whisper_language'], choices=["english", "chinese", "german", "spanish", "russian", "korean", "french", "japanese", "portuguese", "turkish", "polish", "catalan", "dutch", "arabic", "swedish", "italian", "indonesian", "hindi", "finnish", "vietnamese", "hebrew", "ukrainian", "greek", "malay", "czech", "romanian", "danish", "hungarian", "tamil", "norwegian", "thai", "urdu", "croatian", "bulgarian", "lithuanian", "latin", "maori", "malayalam", "welsh", "slovak", "telugu", "persian", "latvian", "bengali", "serbian", "azerbaijani", "slovenian", "kannada", "estonian", "macedonian", "breton", "basque", "icelandic", "armenian", "nepali", "mongolian", "bosnian", "kazakh", "albanian", "swahili", "galician", "marathi", "punjabi", "sinhala", "khmer", "shona", "yoruba", "somali", "afrikaans", "occitan", "georgian", "belarusian", "tajik", "sindhi", "gujarati", "amharic", "yiddish", "lao", "uzbek", "faroese", "haitian creole", "pashto", "turkmen", "nynorsk", "maltese", "sanskrit", "luxembourgish", "myanmar", "tibetan", "tagalog", "malagasy", "assamese", "tatar", "hawaiian", "lingala", "hausa", "bashkir", "javanese", "sundanese"])
audio.change(
auto_transcribe, [audio, auto_submit, whisper_language], [shared.gradio['textbox']]).then(
@ -105,7 +105,7 @@ def ui():
device_dropd.input(reload_whispermodel, [whisper_model_dropd, whisper_language, device_dropd], [whisper_model_dropd, whisper_language, device_dropd, audio])
whisper_model_dropd.change(reload_whispermodel, [whisper_model_dropd, whisper_language, device_dropd], [whisper_model_dropd, whisper_language, device_dropd, audio])
whisper_language.change(lambda x: params.update({"whipser_language": x}), whisper_language, None)
whisper_language.change(lambda x: params.update({"whisper_language": x}), whisper_language, None)
auto_submit.change(lambda x: params.update({"auto_submit": x}), auto_submit, None)

0
modules/api/__init__.py Normal file
View file

468
modules/api/anthropic.py Normal file
View file

@ -0,0 +1,468 @@
import json
import time
from modules import shared
def convert_request(body: dict) -> dict:
"""Transform Anthropic Messages API body into the dict that chat_completions_common expects."""
messages = []
# System message
system = body.get('system')
if system:
if isinstance(system, list):
# List of content blocks like [{"type":"text","text":"..."}]
text_parts = [block.get('text', '') for block in system if isinstance(block, dict) and block.get('type') == 'text']
system_text = '\n'.join(text_parts)
else:
system_text = str(system)
if system_text:
messages.append({"role": "system", "content": system_text})
# Convert messages
for msg in body.get('messages', []):
role = msg.get('role')
content = msg.get('content')
if isinstance(content, str):
messages.append({"role": role, "content": content})
continue
if not isinstance(content, list):
messages.append({"role": role, "content": str(content) if content else ""})
continue
if role == 'assistant':
# Split into text content, tool_calls, and skip thinking blocks
text_parts = []
tool_calls = []
for block in content:
btype = block.get('type')
if btype == 'text':
text_parts.append(block.get('text', ''))
elif btype == 'tool_use':
tool_calls.append({
"id": block.get('id', ''),
"type": "function",
"function": {
"name": block.get('name', ''),
"arguments": json.dumps(block.get('input', {}))
}
})
elif btype == 'thinking':
pass # Strip thinking blocks
assistant_msg = {"role": "assistant", "content": '\n'.join(text_parts) if text_parts else ""}
if tool_calls:
assistant_msg["tool_calls"] = tool_calls
messages.append(assistant_msg)
elif role == 'user':
# Handle tool_result blocks and regular content
regular_parts = []
for block in content:
btype = block.get('type')
if btype == 'tool_result':
# Emit any accumulated regular content first
if regular_parts:
if len(regular_parts) == 1 and regular_parts[0].get('type') == 'text':
messages.append({"role": "user", "content": regular_parts[0]['text']})
else:
messages.append({"role": "user", "content": regular_parts})
regular_parts = []
# Convert tool_result to OpenAI tool message
tool_content = block.get('content', '')
if isinstance(tool_content, list):
tool_content = '\n'.join(
b.get('text', '') for b in tool_content
if isinstance(b, dict) and b.get('type') == 'text'
)
messages.append({
"role": "tool",
"tool_call_id": block.get('tool_use_id', ''),
"content": str(tool_content)
})
elif btype == 'text':
regular_parts.append({"type": "text", "text": block.get('text', '')})
elif btype == 'image':
source = block.get('source', {})
if source.get('type') == 'base64':
media_type = source.get('media_type', 'image/png')
data = source.get('data', '')
regular_parts.append({
"type": "image_url",
"image_url": {"url": f"data:{media_type};base64,{data}"}
})
elif btype == 'thinking':
pass # Strip thinking blocks
if regular_parts:
if len(regular_parts) == 1 and regular_parts[0].get('type') == 'text':
messages.append({"role": "user", "content": regular_parts[0]['text']})
else:
messages.append({"role": "user", "content": regular_parts})
else:
messages.append({"role": role, "content": str(content)})
# Start with all fields from the original body (includes GenerationOptions defaults)
result = dict(body)
# Remove Anthropic-specific fields that don't map directly
for key in ('system', 'stop_sequences', 'tools', 'tool_choice', 'thinking', 'metadata'):
result.pop(key, None)
# Set converted fields
result['messages'] = messages
result['max_tokens'] = body.get('max_tokens', 4096)
result['stream'] = body.get('stream', False)
result['mode'] = 'instruct'
# Ensure ChatCompletionRequestParams defaults are present
result.setdefault('continue_', False)
result.setdefault('instruction_template', None)
result.setdefault('instruction_template_str', None)
result.setdefault('character', None)
result.setdefault('bot_name', None)
result.setdefault('context', None)
result.setdefault('greeting', None)
result.setdefault('user_name', None)
result.setdefault('user_bio', None)
result.setdefault('chat_template_str', None)
result.setdefault('chat_instruct_command', 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>')
result.setdefault('frequency_penalty', None)
result.setdefault('presence_penalty', None)
result.setdefault('logit_bias', None)
result.setdefault('logprobs', None)
result.setdefault('top_logprobs', None)
result.setdefault('n', 1)
result.setdefault('model', None)
result.setdefault('functions', None)
result.setdefault('function_call', None)
result.setdefault('stream_options', None)
result.setdefault('user', None)
result.setdefault('stop', None)
result.setdefault('tool_choice', None)
# Always request usage in streaming so the usage-only chunk triggers
# the deferred message_delta/message_stop with accurate output_tokens
if body.get('stream', False):
result['stream_options'] = {'include_usage': True}
# Map stop_sequences -> stop
if body.get('stop_sequences'):
result['stop'] = body['stop_sequences']
# Tools
if body.get('tools'):
result['tools'] = [
{
"type": "function",
"function": {
"name": t.get('name', ''),
"description": t.get('description', ''),
"parameters": t.get('input_schema', {"type": "object", "properties": {}})
}
}
for t in body['tools']
]
# Tool choice
tc = body.get('tool_choice')
if tc and isinstance(tc, dict):
tc_type = tc.get('type')
if tc_type == 'auto':
result['tool_choice'] = 'auto'
elif tc_type == 'any':
result['tool_choice'] = 'required'
elif tc_type == 'tool':
result['tool_choice'] = {"type": "function", "function": {"name": tc.get('name', '')}}
elif tc_type == 'none':
result['tool_choice'] = 'none'
else:
result.setdefault('tool_choice', None)
# Thinking
thinking = body.get('thinking')
if thinking and isinstance(thinking, dict) and thinking.get('type') in ('enabled', 'adaptive'):
result['enable_thinking'] = True
return result
_FINISH_REASON_MAP = {
"stop": "end_turn",
"length": "max_tokens",
"tool_calls": "tool_use",
}
def build_response(openai_resp: dict, model: str) -> dict:
"""Transform OpenAI chat completion response dict into Anthropic Messages format."""
resp_id = openai_resp.get('id', 'msg_unknown')
if resp_id.startswith('chatcmpl-'):
resp_id = 'msg_' + resp_id[9:]
choice = openai_resp.get('choices', [{}])[0]
message = choice.get('message', {})
content = []
# Reasoning/thinking content
reasoning = message.get('reasoning_content')
if reasoning:
content.append({"type": "thinking", "thinking": reasoning, "signature": ""})
# Text content
text = message.get('content')
if text:
content.append({"type": "text", "text": text})
# Tool calls
tool_calls = message.get('tool_calls')
if tool_calls:
for tc in tool_calls:
func = tc.get('function', {})
try:
input_data = json.loads(func.get('arguments', '{}'))
except (json.JSONDecodeError, TypeError):
input_data = {}
content.append({
"type": "tool_use",
"id": tc.get('id', ''),
"name": func.get('name', ''),
"input": input_data
})
finish_reason = choice.get('finish_reason', 'stop')
stop_reason = _FINISH_REASON_MAP.get(finish_reason, 'end_turn')
usage = openai_resp.get('usage', {})
return {
"id": resp_id,
"type": "message",
"role": "assistant",
"content": content,
"model": model,
"stop_reason": stop_reason,
"stop_sequence": None,
"usage": {
"input_tokens": usage.get('prompt_tokens', 0),
"output_tokens": usage.get('completion_tokens', 0),
}
}
class StreamConverter:
"""Stateful converter: processes one OpenAI chunk at a time, yields Anthropic SSE events.
When include_usage is enabled in the OpenAI request, the final chunk with
finish_reason has usage=None, followed by a separate usage-only chunk
(choices=[], usage={...}). We defer emitting message_delta and message_stop
until we receive that usage chunk so output_tokens is accurate.
"""
def __init__(self, model: str):
self.model = model
self.msg_id = "msg_%d" % int(time.time() * 1000000000)
self.block_index = 0
self.in_thinking = False
self.in_text = False
self.input_tokens = 0
self.output_tokens = 0
self.tool_calls_accum = {}
self.stop_reason = "end_turn"
self._pending_finish = False # True after we've seen finish_reason
def process_chunk(self, chunk: dict) -> list[dict]:
"""Process a single OpenAI streaming chunk; return list of Anthropic SSE event dicts."""
events = []
choices = chunk.get('choices', [])
usage = chunk.get('usage')
if usage:
self.input_tokens = usage.get('prompt_tokens', self.input_tokens)
self.output_tokens = usage.get('completion_tokens', self.output_tokens)
# Usage-only chunk (choices=[]) arrives after the finish chunk
if not choices:
if self._pending_finish:
events.extend(self.finish())
return events
choice = choices[0]
delta = choice.get('delta', {})
finish_reason = choice.get('finish_reason')
# First chunk with role
if 'role' in delta:
events.append({
"event": "message_start",
"data": json.dumps({
"type": "message_start",
"message": {
"id": self.msg_id,
"type": "message",
"role": "assistant",
"content": [],
"model": self.model,
"stop_reason": None,
"stop_sequence": None,
"usage": {"input_tokens": self.input_tokens, "output_tokens": 0}
}
})
})
events.append({"event": "ping", "data": json.dumps({"type": "ping"})})
return events
# Reasoning content
reasoning_content = delta.get('reasoning_content')
if reasoning_content:
if not self.in_thinking:
self.in_thinking = True
events.append({
"event": "content_block_start",
"data": json.dumps({
"type": "content_block_start",
"index": self.block_index,
"content_block": {"type": "thinking", "thinking": "", "signature": ""}
})
})
events.append({
"event": "content_block_delta",
"data": json.dumps({
"type": "content_block_delta",
"index": self.block_index,
"delta": {"type": "thinking_delta", "thinking": reasoning_content}
})
})
return events
# Text content
text_content = delta.get('content')
if text_content:
if self.in_thinking:
events.append({
"event": "content_block_stop",
"data": json.dumps({"type": "content_block_stop", "index": self.block_index})
})
self.in_thinking = False
self.block_index += 1
if not self.in_text:
self.in_text = True
events.append({
"event": "content_block_start",
"data": json.dumps({
"type": "content_block_start",
"index": self.block_index,
"content_block": {"type": "text", "text": ""}
})
})
events.append({
"event": "content_block_delta",
"data": json.dumps({
"type": "content_block_delta",
"index": self.block_index,
"delta": {"type": "text_delta", "text": text_content}
})
})
return events
# Tool calls in delta
chunk_tool_calls = delta.get('tool_calls')
if chunk_tool_calls:
for tc in chunk_tool_calls:
tc_id = tc.get('id', '')
tc_idx = tc.get('index', 0)
func = tc.get('function', {})
if tc_id:
self.tool_calls_accum[tc_idx] = {
"id": tc_id,
"name": func.get('name', ''),
"arguments": func.get('arguments', '')
}
elif tc_idx in self.tool_calls_accum:
self.tool_calls_accum[tc_idx]["arguments"] += func.get('arguments', '')
# Final chunk — close open content blocks, defer message_delta/stop for usage
if finish_reason is not None:
self.stop_reason = _FINISH_REASON_MAP.get(finish_reason, 'end_turn')
if self.in_thinking:
events.append({
"event": "content_block_stop",
"data": json.dumps({"type": "content_block_stop", "index": self.block_index})
})
self.in_thinking = False
self.block_index += 1
if self.in_text:
events.append({
"event": "content_block_stop",
"data": json.dumps({"type": "content_block_stop", "index": self.block_index})
})
self.in_text = False
self.block_index += 1
for tc_idx in sorted(self.tool_calls_accum.keys()):
tc = self.tool_calls_accum[tc_idx]
arguments_str = tc["arguments"] or "{}"
events.append({
"event": "content_block_start",
"data": json.dumps({
"type": "content_block_start",
"index": self.block_index,
"content_block": {
"type": "tool_use",
"id": tc["id"],
"name": tc["name"],
"input": {}
}
})
})
# Emit the full input as a single input_json_delta so SDK
# clients that reconstruct from deltas get the correct data
events.append({
"event": "content_block_delta",
"data": json.dumps({
"type": "content_block_delta",
"index": self.block_index,
"delta": {
"type": "input_json_delta",
"partial_json": arguments_str
}
})
})
events.append({
"event": "content_block_stop",
"data": json.dumps({"type": "content_block_stop", "index": self.block_index})
})
self.block_index += 1
# Defer message_delta/stop — usage chunk may follow
self._pending_finish = True
return events
def finish(self) -> list[dict]:
"""Emit deferred message_delta and message_stop. Safe to call multiple times."""
if not self._pending_finish:
return []
self._pending_finish = False
return [
{
"event": "message_delta",
"data": json.dumps({
"type": "message_delta",
"delta": {"stop_reason": self.stop_reason, "stop_sequence": None},
"usage": {"input_tokens": self.input_tokens, "output_tokens": self.output_tokens}
})
},
{
"event": "message_stop",
"data": json.dumps({"type": "message_stop"})
}
]

View file

@ -9,9 +9,9 @@ import tiktoken
import yaml
from pydantic import ValidationError
from extensions.openai.errors import InvalidRequestError
from extensions.openai.typing import ToolDefinition
from extensions.openai.utils import debug_msg
from .errors import InvalidRequestError
from .typing import ToolDefinition
from .utils import debug_msg
from modules.tool_parsing import get_tool_call_id, parse_tool_call, detect_tool_call_format
from modules import shared
from modules.reasoning import extract_reasoning
@ -263,7 +263,7 @@ def convert_history(history):
seen_non_system = True
meta = {}
tool_calls = entry.get("tool_calls")
if tool_calls and isinstance(tool_calls, list) and len(tool_calls) > 0:
if tool_calls and isinstance(tool_calls, list):
meta["tool_calls"] = tool_calls
if content.strip() == "":
content = "" # keep empty content, don't skip
@ -315,7 +315,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
raise InvalidRequestError(message="messages is required", param='messages')
tools = None
if 'tools' in body and body['tools'] is not None and isinstance(body['tools'], list) and len(body['tools']) > 0:
if 'tools' in body and body['tools'] is not None and isinstance(body['tools'], list) and body['tools']:
tools = validateTools(body['tools']) # raises InvalidRequestError if validation fails
tool_choice = body.get('tool_choice', None)

View file

@ -3,8 +3,8 @@ import os
import numpy as np
from transformers import AutoModel
from extensions.openai.errors import ServiceUnavailableError
from extensions.openai.utils import debug_msg, float_list_to_base64
from .errors import ServiceUnavailableError
from .utils import debug_msg, float_list_to_base64
from modules.logging_colors import logger
embeddings_params_initialized = False
@ -17,14 +17,12 @@ def initialize_embedding_params():
'''
global embeddings_params_initialized
if not embeddings_params_initialized:
from extensions.openai.script import params
global st_model, embeddings_model, embeddings_device
st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", params.get('embedding_model', 'all-mpnet-base-v2'))
st_model = os.environ.get("OPENEDAI_EMBEDDING_MODEL", 'sentence-transformers/all-mpnet-base-v2')
embeddings_model = None
# OPENEDAI_EMBEDDING_DEVICE: auto (best or cpu), cpu, cuda, ipu, xpu, mkldnn, opengl, opencl, ideep, hip, ve, fpga, ort, xla, lazy, vulkan, mps, meta, hpu, mtia, privateuseone
embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", params.get('embedding_device', 'cpu'))
embeddings_device = os.environ.get("OPENEDAI_EMBEDDING_DEVICE", 'cpu')
if embeddings_device.lower() == 'auto':
embeddings_device = None
@ -41,14 +39,14 @@ def load_embedding_model(model: str):
initialize_embedding_params()
global embeddings_device, embeddings_model
try:
print(f"Try embedding model: {model} on {embeddings_device}")
logger.info(f"Try embedding model: {model} on {embeddings_device}")
if 'jina-embeddings' in model:
embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True) # trust_remote_code is needed to use the encode method
embeddings_model = embeddings_model.to(embeddings_device)
else:
embeddings_model = SentenceTransformer(model, device=embeddings_device)
print(f"Loaded embedding model: {model}")
logger.info(f"Loaded embedding model: {model}")
except Exception as e:
embeddings_model = None
raise ServiceUnavailableError(f"Error: Failed to load embedding model: {model}", internal_message=repr(e))

View file

@ -6,7 +6,7 @@ import base64
import io
import time
from extensions.openai.errors import ServiceUnavailableError
from .errors import ServiceUnavailableError
from modules import shared

View file

@ -1,4 +1,4 @@
from extensions.openai.completions import process_parameters
from .completions import process_parameters
from modules.logits import get_next_logits

View file

@ -3,7 +3,7 @@ import time
import numpy as np
from numpy.linalg import norm
from extensions.openai.embeddings import get_embeddings
from .embeddings import get_embeddings
moderations_disabled = False # return 0/false
category_embeddings = None
@ -64,6 +64,4 @@ def moderations(input):
'category_scores': category_scores,
}])
print(results)
return results

View file

@ -10,25 +10,27 @@ from threading import Thread
import uvicorn
from fastapi import Depends, FastAPI, Header, HTTPException
from fastapi.exceptions import RequestValidationError
from fastapi.middleware.cors import CORSMiddleware
from fastapi.requests import Request
from fastapi.responses import JSONResponse
from pydub import AudioSegment
from sse_starlette import EventSourceResponse
from starlette.concurrency import iterate_in_threadpool
import extensions.openai.completions as OAIcompletions
import extensions.openai.logits as OAIlogits
import extensions.openai.models as OAImodels
from extensions.openai.tokens import token_count, token_decode, token_encode
from extensions.openai.errors import OpenAIError
from extensions.openai.utils import _start_cloudflared
import modules.api.completions as OAIcompletions
import modules.api.logits as OAIlogits
import modules.api.models as OAImodels
import modules.api.anthropic as Anthropic
from .tokens import token_count, token_decode, token_encode
from .errors import OpenAIError
from .utils import _start_cloudflared
from modules import shared
from modules.logging_colors import logger
from modules.models import unload_model
from modules.text_generation import stop_everything_event # used by /v1/internal/stop-generation
from .typing import (
AnthropicRequest,
ChatCompletionRequest,
ChatCompletionResponse,
ChatPromptResponse,
@ -53,12 +55,6 @@ from .typing import (
to_dict
)
params = {
'embedding_device': 'cpu',
'embedding_model': 'sentence-transformers/all-mpnet-base-v2',
'debug': 0
}
async def _wait_for_disconnect(request: Request, stop_event: threading.Event):
"""Block until the client disconnects, then signal the stop_event."""
@ -81,9 +77,23 @@ def verify_admin_key(authorization: str = Header(None)) -> None:
raise HTTPException(status_code=401, detail="Unauthorized")
def verify_anthropic_key(x_api_key: str = Header(None, alias="x-api-key")) -> None:
expected_api_key = shared.args.api_key
if expected_api_key and (x_api_key is None or x_api_key != expected_api_key):
raise HTTPException(status_code=401, detail="Unauthorized")
class AnthropicError(Exception):
def __init__(self, message: str, error_type: str = "invalid_request_error", status_code: int = 400):
self.message = message
self.error_type = error_type
self.status_code = status_code
app = FastAPI()
check_key = [Depends(verify_api_key)]
check_admin_key = [Depends(verify_admin_key)]
check_anthropic_key = [Depends(verify_anthropic_key)]
# Configure CORS settings to allow all origins, methods, and headers
app.add_middleware(
@ -109,6 +119,28 @@ async def openai_error_handler(request: Request, exc: OpenAIError):
)
@app.exception_handler(AnthropicError)
async def anthropic_error_handler(request: Request, exc: AnthropicError):
return JSONResponse(
status_code=exc.status_code,
content={"type": "error", "error": {"type": exc.error_type, "message": exc.message}}
)
@app.exception_handler(RequestValidationError)
async def validation_error_handler(request: Request, exc: RequestValidationError):
if request.url.path.startswith("/v1/messages"):
messages = "; ".join(
f"{'.'.join(str(l) for l in e['loc'])}: {e['msg']}" for e in exc.errors()
)
return JSONResponse(
status_code=400,
content={"type": "error", "error": {"type": "invalid_request_error", "message": messages}}
)
return JSONResponse(status_code=422, content={"detail": exc.errors()})
@app.middleware("http")
async def validate_host_header(request: Request, call_next):
# Be strict about only approving access to localhost by default
@ -218,6 +250,76 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion
return JSONResponse(response)
@app.post('/v1/messages', dependencies=check_anthropic_key)
async def anthropic_messages(request: Request, request_data: AnthropicRequest):
body = to_dict(request_data)
model = body.get('model') or shared.model_name or 'unknown'
try:
converted = Anthropic.convert_request(body)
except Exception as e:
raise AnthropicError(message=str(e))
try:
return await _anthropic_generate(request, request_data, converted, model)
except OpenAIError as e:
error_type = "invalid_request_error" if e.code < 500 else "api_error"
if e.code == 503:
error_type = "overloaded_error"
raise AnthropicError(message=e.message, error_type=error_type, status_code=e.code)
except Exception as e:
raise AnthropicError(message=str(e) or "Internal server error", error_type="api_error", status_code=500)
async def _anthropic_generate(request, request_data, converted, model):
if request_data.stream:
stop_event = threading.Event()
async def generator():
converter = Anthropic.StreamConverter(model)
response = OAIcompletions.stream_chat_completions(converted, is_legacy=False, stop_event=stop_event)
try:
async for resp in iterate_in_threadpool(response):
disconnected = await request.is_disconnected()
if disconnected:
break
for event in converter.process_chunk(resp):
yield event
for event in converter.finish():
yield event
except OpenAIError as e:
error_type = "invalid_request_error" if e.code < 500 else "api_error"
if e.code == 503:
error_type = "overloaded_error"
yield {
"event": "error",
"data": json.dumps({"type": "error", "error": {"type": error_type, "message": e.message}})
}
finally:
stop_event.set()
response.close()
return EventSourceResponse(generator(), sep="\n")
else:
stop_event = threading.Event()
monitor = asyncio.create_task(_wait_for_disconnect(request, stop_event))
try:
openai_resp = await asyncio.to_thread(
OAIcompletions.chat_completions,
converted,
is_legacy=False,
stop_event=stop_event
)
finally:
stop_event.set()
monitor.cancel()
return JSONResponse(Anthropic.build_response(openai_resp, model))
@app.get("/v1/models", dependencies=check_key)
@app.get("/v1/models/{model}", dependencies=check_key)
async def handle_models(request: Request):
@ -244,6 +346,7 @@ def handle_billing_usage():
@app.post('/v1/audio/transcriptions', dependencies=check_key)
async def handle_audio_transcription(request: Request):
import speech_recognition as sr
from pydub import AudioSegment
r = sr.Recognizer()
@ -275,7 +378,7 @@ async def handle_audio_transcription(request: Request):
@app.post('/v1/images/generations', response_model=ImageGenerationResponse, dependencies=check_key)
async def handle_image_generation(request_data: ImageGenerationRequest):
import extensions.openai.images as OAIimages
import modules.api.images as OAIimages
response = await asyncio.to_thread(OAIimages.generations, request_data)
return JSONResponse(response)
@ -283,7 +386,7 @@ async def handle_image_generation(request_data: ImageGenerationRequest):
@app.post("/v1/embeddings", response_model=EmbeddingsResponse, dependencies=check_key)
async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
import extensions.openai.embeddings as OAIembeddings
import modules.api.embeddings as OAIembeddings
input = request_data.input
if not input:
@ -298,7 +401,7 @@ async def handle_embeddings(request: Request, request_data: EmbeddingsRequest):
@app.post("/v1/moderations", dependencies=check_key)
async def handle_moderations(request: Request):
import extensions.openai.moderations as OAImoderations
import modules.api.moderations as OAImoderations
body = await request.json()
input = body["input"]
@ -403,12 +506,17 @@ async def handle_load_model(request_data: LoadModelRequest):
return JSONResponse(content="OK")
except Exception:
traceback.print_exc()
raise HTTPException(status_code=400, detail="Failed to load the model.")
raise HTTPException(status_code=500, detail="Failed to load the model.")
@app.post("/v1/internal/model/unload", dependencies=check_admin_key)
async def handle_unload_model():
unload_model()
try:
unload_model()
return JSONResponse(content="OK")
except Exception:
traceback.print_exc()
raise HTTPException(status_code=500, detail="Failed to unload the model.")
@app.get("/v1/internal/lora/list", response_model=LoraListResponse, dependencies=check_admin_key)
@ -475,15 +583,15 @@ def run_server():
port,
shared.args.public_api_id,
max_attempts=3,
on_start=lambda url: logger.info(f'OpenAI-compatible API URL:\n\n{url}/v1\n')
on_start=lambda url: logger.info(f'OpenAI/Anthropic-compatible API URL:\n\n{url}/v1\n')
)
else:
url_proto = 'https://' if (ssl_certfile and ssl_keyfile) else 'http://'
urls = [f'{url_proto}{addr}:{port}/v1' for addr in server_addrs]
if len(urls) > 1:
logger.info('OpenAI-compatible API URLs:\n\n' + '\n'.join(urls) + '\n')
logger.info('OpenAI/Anthropic-compatible API URLs:\n\n' + '\n'.join(urls) + '\n')
else:
logger.info('OpenAI-compatible API URL:\n\n' + '\n'.join(urls) + '\n')
logger.info('OpenAI/Anthropic-compatible API URL:\n\n' + '\n'.join(urls) + '\n')
# Log API keys
if shared.args.api_key:
@ -500,7 +608,15 @@ def run_server():
uvicorn.run(app, host=server_addrs, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, access_log=False)
_server_started = False
def setup():
global _server_started
if _server_started:
return
_server_started = True
if shared.args.nowebui:
run_server()
else:

View file

@ -144,7 +144,7 @@ class CompletionResponse(BaseModel):
class ChatCompletionRequestParams(BaseModel):
messages: List[dict]
messages: List[dict] = Field(..., min_length=1)
model: str | None = Field(default=None, description="Unused parameter. To change the model, use the /v1/internal/model/load endpoint.")
frequency_penalty: float | None = shared.args.frequency_penalty
function_call: str | dict | None = Field(default=None, description="Unused parameter.")
@ -282,6 +282,25 @@ class LoadLorasRequest(BaseModel):
lora_names: List[str]
class AnthropicRequestParams(BaseModel):
model: str | None = None
messages: List[dict] = Field(..., min_length=1)
max_tokens: int
system: str | list | None = None
temperature: float | None = shared.args.temperature
top_p: float | None = shared.args.top_p
stop_sequences: list[str] | None = None
stream: bool = False
tools: list[dict] | None = None
tool_choice: dict | None = None
thinking: dict | None = None
metadata: dict | None = None
class AnthropicRequest(GenerationOptions, AnthropicRequestParams):
pass
class ImageGenerationRequest(BaseModel):
"""Image-specific parameters for generation."""
prompt: str

View file

@ -23,8 +23,7 @@ def float_list_to_base64(float_array: np.ndarray) -> str:
def debug_msg(*args, **kwargs):
from extensions.openai.script import params
if os.environ.get("OPENEDAI_DEBUG", params.get('debug', 0)):
if int(os.environ.get("OPENEDAI_DEBUG", 0)):
print(*args, **kwargs)
@ -51,4 +50,4 @@ def _start_cloudflared(port: int, tunnel_id: str, max_attempts: int = 3, on_star
traceback.print_exc()
time.sleep(3)
raise Exception('Could not start cloudflared.')
raise Exception('Could not start cloudflared.')

View file

@ -1,8 +1,8 @@
import traceback
from queue import Queue
from threading import Thread
import modules.shared as shared
from modules.logging_colors import logger
class StopNowException(Exception):
@ -34,12 +34,11 @@ class Iteratorize:
def gentask():
try:
ret = self.mfunc(callback=_callback, *args, **self.kwargs)
ret = self.mfunc(callback=_callback, *self.args, **self.kwargs)
except StopNowException:
pass
except Exception:
traceback.print_exc()
pass
logger.exception("Failed in generation callback")
self.q.put(self.sentinel)
if self.c_callback:

View file

@ -70,9 +70,7 @@ def update_message_metadata(metadata_dict, role, index, **fields):
if key not in metadata_dict:
metadata_dict[key] = {}
# Update with provided fields
for field_name, field_value in fields.items():
metadata_dict[key][field_name] = field_value
metadata_dict[key].update(fields)
jinja_env = ImmutableSandboxedEnvironment(
@ -212,6 +210,24 @@ def _expand_tool_sequence(tool_seq):
return messages
def _format_attachments(attachments, include_text=True):
"""Build image ref and text attachment strings from a list of attachments."""
attachments_text = ""
image_refs = ""
for attachment in attachments:
if attachment.get("type") == "image":
image_refs += "<__media__>"
elif include_text:
filename = attachment.get("name", "file")
content = attachment.get("content", "")
if attachment.get("type") == "text/html" and attachment.get("url"):
attachments_text += f"\nName: {filename}\nURL: {attachment['url']}\nContents:\n\n=====\n{content}\n=====\n\n"
else:
attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
return image_refs, attachments_text
def generate_chat_prompt(user_input, state, **kwargs):
impersonate = kwargs.get('impersonate', False)
_continue = kwargs.get('_continue', False)
@ -235,6 +251,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
tools_in_user_message=False,
add_generation_prompt=False,
enable_thinking=state['enable_thinking'],
thinking=state['enable_thinking'],
reasoning_effort=state['reasoning_effort'],
thinking_budget=-1 if state.get('enable_thinking', True) else 0,
bos_token=shared.bos_token,
@ -327,24 +344,23 @@ def generate_chat_prompt(user_input, state, **kwargs):
messages.insert(insert_pos, msg_dict)
# Handle Seed-OSS
elif '<seed:think>' in assistant_msg:
# Handle <think> blocks (Kimi, DeepSeek, Qwen, etc.) and Seed-OSS
elif '<think>' in assistant_msg or '<seed:think>' in assistant_msg:
open_tag = '<think>' if '<think>' in assistant_msg else '<seed:think>'
close_tag = '</think>' if open_tag == '<think>' else '</seed:think>'
thinking_content = ""
final_content = assistant_msg
# Extract thinking content if present
if '<seed:think>' in assistant_msg:
parts = assistant_msg.split('<seed:think>', 1)
if len(parts) > 1:
potential_content = parts[1]
if '</seed:think>' in potential_content:
thinking_content = potential_content.split('</seed:think>', 1)[0].strip()
final_content = parts[0] + potential_content.split('</seed:think>', 1)[1]
else:
thinking_content = potential_content.strip()
final_content = parts[0]
parts = assistant_msg.split(open_tag, 1)
if len(parts) > 1:
potential_content = parts[1]
if close_tag in potential_content:
thinking_content = potential_content.split(close_tag, 1)[0].strip()
final_content = parts[0] + potential_content.split(close_tag, 1)[1]
else:
thinking_content = potential_content.strip()
final_content = parts[0]
# Insert as structured message
msg_dict = {"role": "assistant", "content": final_content.strip()}
if thinking_content:
msg_dict["reasoning_content"] = thinking_content
@ -377,22 +393,10 @@ def generate_chat_prompt(user_input, state, **kwargs):
# Add attachment content if present AND if past attachments are enabled
if user_key in metadata and "attachments" in metadata[user_key]:
attachments_text = ""
image_refs = ""
for attachment in metadata[user_key]["attachments"]:
if attachment.get("type") == "image":
# Add image reference for multimodal models
image_refs += "<__media__>"
elif state.get('include_past_attachments', True):
# Handle text/PDF attachments
filename = attachment.get("name", "file")
content = attachment.get("content", "")
if attachment.get("type") == "text/html" and attachment.get("url"):
attachments_text += f"\nName: {filename}\nURL: {attachment['url']}\nContents:\n\n=====\n{content}\n=====\n\n"
else:
attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
image_refs, attachments_text = _format_attachments(
metadata[user_key]["attachments"],
include_text=state.get('include_past_attachments', True)
)
if image_refs:
enhanced_user_msg = f"{image_refs}\n\n{enhanced_user_msg}"
if attachments_text:
@ -405,37 +409,18 @@ def generate_chat_prompt(user_input, state, **kwargs):
# Check if we have attachments
if not (impersonate or _continue):
has_attachments = False
if len(history_data.get('metadata', {})) > 0:
current_row_idx = len(history)
user_key = f"user_{current_row_idx}"
has_attachments = user_key in metadata and "attachments" in metadata[user_key]
current_row_idx = len(history)
user_key = f"user_{current_row_idx}"
has_attachments = user_key in metadata and "attachments" in metadata[user_key]
if user_input or has_attachments:
# For the current user input being processed, check if we need to add attachments
if len(history_data.get('metadata', {})) > 0:
current_row_idx = len(history)
user_key = f"user_{current_row_idx}"
if user_key in metadata and "attachments" in metadata[user_key]:
attachments_text = ""
image_refs = ""
for attachment in metadata[user_key]["attachments"]:
if attachment.get("type") == "image":
image_refs += "<__media__>"
else:
filename = attachment.get("name", "file")
content = attachment.get("content", "")
if attachment.get("type") == "text/html" and attachment.get("url"):
attachments_text += f"\nName: {filename}\nURL: {attachment['url']}\nContents:\n\n=====\n{content}\n=====\n\n"
else:
attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
if image_refs:
user_input = f"{image_refs}\n\n{user_input}"
if attachments_text:
user_input += f"\n\nATTACHMENTS:\n{attachments_text}"
if has_attachments:
image_refs, attachments_text = _format_attachments(metadata[user_key]["attachments"])
if image_refs:
user_input = f"{image_refs}\n\n{user_input}"
if attachments_text:
user_input += f"\n\nATTACHMENTS:\n{attachments_text}"
messages.append({"role": "user", "content": user_input})
@ -449,6 +434,8 @@ def generate_chat_prompt(user_input, state, **kwargs):
messages.append({"role": "user", "content": "fake user message replace me"})
def make_prompt(messages):
if _continue:
messages = copy.deepcopy(messages)
last_message = messages[-1].copy()
if _continue:
if state['mode'] == 'chat-instruct':
@ -587,7 +574,6 @@ def count_prompt_tokens(text_input, state):
try:
# Handle dict format with text and files
files = []
if isinstance(text_input, dict):
files = text_input.get('files', [])
text = text_input.get('text', '')
@ -625,7 +611,6 @@ def count_prompt_tokens(text_input, state):
def get_stopping_strings(state):
stopping_strings = []
renderers = []
if state['mode'] in ['instruct', 'chat-instruct']:
@ -2629,7 +2614,7 @@ def handle_delete_template_click(template):
f"{template}.yaml",
root,
root,
gr.update(visible=False)
gr.update(visible=True)
]

View file

@ -1,7 +1,6 @@
import math
import queue
import threading
import traceback
from pathlib import Path
from typing import Any, List, Tuple
@ -34,8 +33,7 @@ from modules.text_generation import get_max_prompt_length
try:
import flash_attn
except Exception:
logger.warning('Failed to load flash-attention due to the following error:\n')
traceback.print_exc()
logger.warning('Failed to load flash-attention due to the following error:', exc_info=True)
class LogitBiasFilter(Filter):
@ -81,7 +79,7 @@ class ConcurrentGenerator:
try:
results = self.generator.iterate()
except Exception:
logger.error("Exception in ConcurrentGenerator iterate loop:\n" + traceback.format_exc())
logger.exception("Exception in ConcurrentGenerator iterate loop")
for q in self.job_queues.values():
q.put(None)
self.job_queues.clear()

View file

@ -1,5 +1,4 @@
import os
import traceback
from pathlib import Path
from typing import Any, Dict, Optional, Union
@ -21,8 +20,7 @@ from modules.logging_colors import logger
try:
import flash_attn
except Exception:
logger.warning('Failed to load flash-attention due to the following error:\n')
traceback.print_exc()
logger.warning('Failed to load flash-attention due to the following error:', exc_info=True)
class Exllamav3HF(PreTrainedModel, GenerationMixin):

View file

@ -1,7 +1,6 @@
import importlib
import importlib.util
import sys
import traceback
from functools import partial
from inspect import signature
from pathlib import Path
@ -33,8 +32,7 @@ def load_extensions():
if name not in available_extensions:
continue
if name != 'api':
logger.info(f'Loading the extension "{name}"')
logger.info(f'Loading the extension "{name}"')
try:
# Prefer user extension, fall back to system extension
@ -75,8 +73,7 @@ def load_extensions():
raise
except Exception:
logger.error(f'Failed to load the extension "{name}".')
traceback.print_exc()
logger.exception(f'Failed to load the extension "{name}".')
# This iterator returns the extensions in the order specified in the command-line

View file

View file

@ -1,6 +1,7 @@
import json
import os
import pprint
import shlex
import re
import socket
import subprocess
@ -446,21 +447,28 @@ class LlamaServer:
elif extra_flags.startswith("'") and extra_flags.endswith("'"):
extra_flags = extra_flags[1:-1].strip()
for flag_item in extra_flags.split(','):
flag_item = flag_item.strip()
if '=' in flag_item:
flag, value = flag_item.split('=', 1)
flag = flag.strip()
value = value.strip()
if len(flag) <= 3:
cmd += [f"-{flag}", value]
if extra_flags.startswith('-'):
# New literal format: "--jinja --rpc 1222,1222"
cmd += shlex.split(extra_flags)
else:
# Legacy format: "flag1=value1,flag2,flag3=value3"
long_form_only = {'rpc', 'fit', 'pos', 'ppl'}
for flag_item in extra_flags.split(','):
flag_item = flag_item.strip()
if '=' in flag_item:
flag, value = flag_item.split('=', 1)
flag = flag.strip()
value = value.strip()
if len(flag) <= 3 and flag not in long_form_only:
cmd += [f"-{flag}", value]
else:
cmd += [f"--{flag}", value]
else:
cmd += [f"--{flag}", value]
else:
if len(flag_item) <= 3:
cmd.append(f"-{flag_item}")
else:
cmd.append(f"--{flag_item}")
if len(flag_item) <= 3 and flag_item not in long_form_only:
cmd.append(f"-{flag_item}")
else:
cmd.append(f"--{flag_item}")
env = os.environ.copy()
if os.name == 'posix':
@ -492,9 +500,8 @@ class LlamaServer:
health_url = f"http://127.0.0.1:{self.port}/health"
while True:
# Check if process is still alive
if self.process.poll() is not None:
# Process has terminated
exit_code = self.process.poll()
exit_code = self.process.poll()
if exit_code is not None:
raise RuntimeError(f"Server process terminated unexpectedly with exit code: {exit_code}")
try:

View file

@ -1,5 +1,4 @@
import time
import traceback
import numpy as np
@ -23,7 +22,7 @@ def get_next_logits(*args, **kwargs):
try:
result = _get_next_logits(*args, **kwargs)
except Exception:
traceback.print_exc()
logger.exception("Failed to get next logits")
result = None
if needs_lock:

View file

@ -34,7 +34,8 @@ def get_model_metadata(model):
path = model_path / 'config.json'
if path.exists():
hf_metadata = json.loads(open(path, 'r', encoding='utf-8').read())
with open(path, 'r', encoding='utf-8') as f:
hf_metadata = json.loads(f.read())
else:
hf_metadata = None
@ -93,7 +94,7 @@ def get_model_metadata(model):
else:
# Transformers metadata
if hf_metadata is not None:
metadata = json.loads(open(path, 'r', encoding='utf-8').read())
metadata = hf_metadata
if 'pretrained_config' in metadata:
metadata = metadata['pretrained_config']
@ -134,7 +135,8 @@ def get_model_metadata(model):
# 3. Fall back to tokenizer_config.json metadata
if path.exists():
metadata = json.loads(open(path, 'r', encoding='utf-8').read())
with open(path, 'r', encoding='utf-8') as f:
metadata = json.loads(f.read())
# Only read from metadata if we haven't already loaded from .jinja or .json
if template is None and 'chat_template' in metadata:

View file

@ -72,14 +72,13 @@ def extract_reasoning(text, html_escaped=False):
if content_pos != -1:
content_start = content_pos + len(content_esc)
else:
# Content tag expected but not yet present (e.g. partial
# streaming) — suppress intermediate tags between end_tag
# and content_tag so they don't leak as content.
content_start = len(text)
# Content tag not present — fall back to content after
# end_tag (e.g. GPT-OSS tool calls skip the final channel).
content_start = end_pos + len(end_esc)
else:
content_start = end_pos + len(end_esc)
return text[thought_start:thought_end], text[content_start:]
return text[thought_start:thought_end], text[content_start:].lstrip()
# Handle standalone GPT-OSS final channel marker without a preceding
# analysis/commentary block (the model skipped thinking entirely).

View file

@ -101,7 +101,7 @@ group.add_argument('--tensor-split', type=str, default=None, help='Split the mod
group.add_argument('--row-split', action='store_true', help='Split the model by rows across GPUs. This may improve multi-gpu performance.')
group.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')
group.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
group.add_argument('--no-kv-offload', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
group.add_argument('--no-kv-offload', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.')
group.add_argument('--batch-size', type=int, default=1024, help='Maximum number of prompt tokens to batch together when calling llama-server. This is the application level batch size.')
group.add_argument('--ubatch-size', type=int, default=1024, help='Maximum number of prompt tokens to batch together when calling llama-server. This is the max physical batch size for computation (device level).')
group.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
@ -109,7 +109,7 @@ group.add_argument('--threads-batch', type=int, default=0, help='Number of threa
group.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')
group.add_argument('--parallel', type=int, default=1, help='Number of parallel request slots. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
group.add_argument('--fit-target', type=str, default='512', help='Target VRAM margin per device for auto GPU layers, comma-separated list of values in MiB. A single value is broadcast across all devices.')
group.add_argument('--extra-flags', type=str, default=None, help='Extra flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"')
group.add_argument('--extra-flags', type=str, default=None, help='Extra flags to pass to llama-server. Example: "--jinja --rpc 192.168.1.100:50052"')
# Transformers/Accelerate
group = parser.add_argument_group('Transformers/Accelerate')
@ -156,7 +156,7 @@ group.add_argument('--portable', action='store_true', help='Hide features not av
# API
group = parser.add_argument_group('API')
group.add_argument('--api', action='store_true', help='Enable the API extension.')
group.add_argument('--api', action='store_true', help='Enable the API server.')
group.add_argument('--public-api', action='store_true', help='Create a public URL for the API using Cloudflare.')
group.add_argument('--public-api-id', type=str, help='Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.', default=None)
group.add_argument('--api-port', type=int, default=5000, help='The listening port for the API.')
@ -175,7 +175,7 @@ group.add_argument('--dynatemp-high', type=float, default=_d['dynatemp_high'], m
group.add_argument('--dynatemp-exponent', type=float, default=_d['dynatemp_exponent'], metavar='N', help='Dynamic temperature exponent')
group.add_argument('--smoothing-factor', type=float, default=_d['smoothing_factor'], metavar='N', help='Smoothing factor')
group.add_argument('--smoothing-curve', type=float, default=_d['smoothing_curve'], metavar='N', help='Smoothing curve')
group.add_argument('--top-p', type=float, default=_d['top_p'], metavar='N', help='Top P')
group.add_argument('--top-p', type=float, default=0.95, metavar='N', help='Top P')
group.add_argument('--top-k', type=int, default=_d['top_k'], metavar='N', help='Top K')
group.add_argument('--min-p', type=float, default=_d['min_p'], metavar='N', help='Min P')
group.add_argument('--top-n-sigma', type=float, default=_d['top_n_sigma'], metavar='N', help='Top N Sigma')
@ -435,16 +435,6 @@ def fix_loader_name(name):
return 'TensorRT-LLM'
def add_extension(name, last=False):
if args.extensions is None:
args.extensions = [name]
elif last:
args.extensions = [x for x in args.extensions if x != name]
args.extensions.append(name)
elif name not in args.extensions:
args.extensions.append(name)
def is_chat():
return True
@ -453,25 +443,17 @@ def load_user_config():
'''
Loads custom model-specific settings
'''
user_config = {}
if Path(f'{args.model_dir}/config-user.yaml').exists():
file_content = open(f'{args.model_dir}/config-user.yaml', 'r').read().strip()
if file_content:
user_config = yaml.safe_load(file_content)
else:
user_config = {}
else:
user_config = {}
return user_config
args.loader = fix_loader_name(args.loader)
# Activate the API extension
if args.api or args.public_api:
add_extension('openai', last=True)
# Load model-specific settings
p = Path(f'{args.model_dir}/config.yaml')
if p.exists():

View file

@ -4,7 +4,6 @@ import html
import pprint
import random
import time
import traceback
import numpy as np
@ -477,7 +476,7 @@ def generate_reply_HF(question, original_question, state, stopping_strings=None,
yield cumulative_reply
except Exception:
traceback.print_exc()
logger.exception("Failed to generate reply (HF)")
finally:
t1 = time.time()
original_tokens = len(original_input_ids[0])
@ -510,7 +509,7 @@ def generate_reply_custom(question, original_question, state, stopping_strings=N
yield reply
except Exception:
traceback.print_exc()
logger.exception("Failed to generate reply (custom)")
finally:
t1 = time.time()

View file

@ -2,6 +2,12 @@ import json
import random
import re
from modules.reasoning import extract_reasoning
def _make_tool_call(name, arguments):
return {"type": "function", "function": {"name": name, "arguments": arguments}}
def get_tool_call_id() -> str:
letter_bytes = "abcdefghijklmnopqrstuvwxyz0123456789"
@ -37,6 +43,10 @@ def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_
check_bare_names: Whether to do partial-prefix matching on tool
names (for models with unknown template format).
'''
# Strip thinking blocks so tool-call syntax inside <think> doesn't
# trigger false positives.
_, text = extract_reasoning(text)
# Full marker found in text → buffer permanently.
# Always checks ALL known markers regardless of template (cheap safety net).
for marker in TOOL_CALL_OPENING_MARKERS:
@ -149,13 +159,7 @@ def _parse_channel_tool_calls(answer: str, tool_names: list[str]):
if start_pos is None:
prefix = answer.rfind('<|start|>assistant', 0, m.start())
start_pos = prefix if prefix != -1 else m.start()
matches.append({
"type": "function",
"function": {
"name": func_name,
"arguments": arguments
}
})
matches.append(_make_tool_call(func_name, arguments))
except json.JSONDecodeError:
pass
if matches:
@ -185,13 +189,7 @@ def _parse_mistral_token_tool_calls(answer: str, tool_names: list[str]):
arguments = json.loads(json_str)
if start_pos is None:
start_pos = m.start()
matches.append({
"type": "function",
"function": {
"name": func_name,
"arguments": arguments
}
})
matches.append(_make_tool_call(func_name, arguments))
except json.JSONDecodeError:
pass
return matches, start_pos
@ -226,13 +224,7 @@ def _parse_bare_name_tool_calls(answer: str, tool_names: list[str]):
arguments = json.loads(json_str)
if start_pos is None:
start_pos = match.start()
matches.append({
"type": "function",
"function": {
"name": name,
"arguments": arguments
}
})
matches.append(_make_tool_call(name, arguments))
except json.JSONDecodeError:
pass
return matches, start_pos
@ -269,13 +261,7 @@ def _parse_xml_param_tool_calls(answer: str, tool_names: list[str]):
arguments[param_name] = param_value
if start_pos is None:
start_pos = tc_match.start()
matches.append({
"type": "function",
"function": {
"name": func_name,
"arguments": arguments
}
})
matches.append(_make_tool_call(func_name, arguments))
return matches, start_pos
@ -305,13 +291,7 @@ def _parse_kimi_tool_calls(answer: str, tool_names: list[str]):
# Check for section begin marker before the call marker
section = answer.rfind('<|tool_calls_section_begin|>', 0, m.start())
start_pos = section if section != -1 else m.start()
matches.append({
"type": "function",
"function": {
"name": func_name,
"arguments": arguments
}
})
matches.append(_make_tool_call(func_name, arguments))
except json.JSONDecodeError:
pass
return matches, start_pos
@ -348,13 +328,7 @@ def _parse_minimax_tool_calls(answer: str, tool_names: list[str]):
arguments[param_name] = param_value
if start_pos is None:
start_pos = tc_match.start()
matches.append({
"type": "function",
"function": {
"name": func_name,
"arguments": arguments
}
})
matches.append(_make_tool_call(func_name, arguments))
return matches, start_pos
@ -382,13 +356,7 @@ def _parse_deep_seek_tool_calls(answer: str, tool_names: list[str]):
# Check for section begin marker before the call marker
section = answer.rfind('<tool▁calls▁begin>', 0, m.start())
start_pos = section if section != -1 else m.start()
matches.append({
"type": "function",
"function": {
"name": func_name,
"arguments": arguments
}
})
matches.append(_make_tool_call(func_name, arguments))
except json.JSONDecodeError:
pass
return matches, start_pos
@ -428,13 +396,7 @@ def _parse_glm_tool_calls(answer: str, tool_names: list[str]):
arguments[k] = v
if start_pos is None:
start_pos = tc_match.start()
matches.append({
"type": "function",
"function": {
"name": func_name,
"arguments": arguments
}
})
matches.append(_make_tool_call(func_name, arguments))
return matches, start_pos
@ -486,13 +448,7 @@ def _parse_pythonic_tool_calls(answer: str, tool_names: list[str]):
if start_pos is None:
start_pos = bracket_match.start()
matches.append({
"type": "function",
"function": {
"name": func_name,
"arguments": arguments
}
})
matches.append(_make_tool_call(func_name, arguments))
return matches, start_pos
@ -593,12 +549,19 @@ def detect_tool_call_format(template_str):
def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = False, parsers: list = None):
# Strip thinking blocks so tool-call syntax inside <think> is ignored.
original_answer = answer
_, answer = extract_reasoning(answer)
# Offset between original and stripped text, used to map start_pos
# back to the original string when returning a prefix.
reasoning_offset = len(original_answer) - len(answer)
matches = []
start_pos = None
def _return(matches, start_pos):
if return_prefix:
prefix = answer[:start_pos] if matches and start_pos is not None else ''
prefix = original_answer[:start_pos + reasoning_offset] if matches and start_pos is not None else ''
return matches, prefix
return matches

View file

@ -26,7 +26,7 @@ from modules.evaluate import (
from modules.logging_colors import logger
from modules.models import reload_model
PARAMETERS = ["lora_name", "always_override", "all_linear", "q_proj_en", "v_proj_en", "k_proj_en", "o_proj_en", "gate_proj_en", "down_proj_en", "up_proj_en", "save_steps", "micro_batch_size", "batch_size", "epochs", "learning_rate", "lr_scheduler_type", "lora_rank", "lora_alpha", "lora_dropout", "cutoff_len", "dataset", "eval_dataset", "format", "eval_steps", "text_dataset", "higher_rank_limit", "warmup_steps", "optimizer", "stride_length", "stop_at_loss", "add_eos_token", "excess_length", "report_to"]
PARAMETERS = ["lora_name", "always_override", "all_linear", "q_proj_en", "v_proj_en", "k_proj_en", "o_proj_en", "gate_proj_en", "down_proj_en", "up_proj_en", "save_steps", "micro_batch_size", "batch_size", "epochs", "learning_rate", "lr_scheduler_type", "lora_rank", "lora_alpha", "lora_dropout", "cutoff_len", "dataset", "eval_dataset", "format", "eval_steps", "text_dataset", "warmup_steps", "optimizer", "stride_length", "stop_at_loss", "add_eos_token", "excess_length", "report_to", "gradient_checkpointing"]
WANT_INTERRUPT = False
train_log = {}
@ -73,8 +73,8 @@ def create_ui():
with gr.Row():
with gr.Column():
lora_rank = gr.Slider(label='LoRA Rank', value=8, minimum=0, maximum=1024, step=4, info='Also called dimension count. Higher values = larger file, more content control. Smaller values = smaller file, less control. Use 4 or 8 for style, 128 or 256 to teach, 1024+ for fine-detail on big data. More VRAM is needed for higher ranks.')
lora_alpha = gr.Slider(label='LoRA Alpha', value=16, minimum=0, maximum=2048, step=4, info='This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
lora_rank = gr.Slider(label='LoRA Rank', value=8, minimum=0, maximum=2048, step=4, info='Also called dimension count. Use 48 for style/format, 128256 to teach factual knowledge, 1024+ for comprehensive fine-tuning. Very high ranks require significant VRAM.')
lora_alpha = gr.Slider(label='LoRA Alpha', value=16, minimum=0, maximum=4096, step=4, info='This divided by the rank becomes the scaling of the LoRA. Higher means stronger. A good standard value is twice your Rank.')
batch_size = gr.Slider(label='Batch Size', value=32, minimum=0, maximum=1024, step=4, info='Global batch size. The two batch sizes together determine gradient accumulation (gradientAccum = batch / microBatch). Higher gradient accum values lead to better quality training.')
micro_batch_size = gr.Slider(label='Micro Batch Size', value=4, minimum=1, maximum=128, step=1, info='Per-device batch size (NOTE: multiple devices not yet implemented). Increasing this will increase VRAM usage.')
cutoff_len = gr.Slider(label='Cutoff Length', minimum=0, maximum=4096, value=512, step=32, info='Maximum sequence length in tokens. For instruction datasets, conversations longer than this are dropped. For text datasets, documents are split into chunks of this size. Higher values require more VRAM.')
@ -90,18 +90,15 @@ def create_ui():
with gr.Accordion(label='Advanced Options', open=False, elem_classes='tgw-accordion'):
with gr.Row():
with gr.Column():
optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Optimizer algorithm. adamw_torch is the standard choice. adamw_bnb_8bit uses less VRAM. adafactor is memory-efficient for large models.', elem_classes=['slim-dropdown'])
warmup_steps = gr.Number(label='Warmup Steps', value=100, info='For this many steps at the start, the learning rate is gradually ramped up from 0 to the target value. This prevents unstable updates early in training.')
lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.0, info='Percentage probability for dropout of LoRA layers. This can help reduce overfitting. Most users should leave at default.')
stop_at_loss = gr.Slider(label='Stop at loss', minimum=0.0, maximum=3.0, step=0.1, value=0.00, info='The process will automatically stop once the desired loss value is reached. (reasonable numbers are 1.5-1.8)')
with gr.Row():
optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Optimizer algorithm. adamw_torch is the standard choice. adamw_bnb_8bit uses less VRAM. adafactor is memory-efficient for large models.', elem_classes=['slim-dropdown'])
with gr.Column():
warmup_steps = gr.Number(label='Warmup Steps', value=100, info='For this many steps at the start, the learning rate is gradually ramped up from 0 to the target value. This prevents unstable updates early in training.')
gradient_checkpointing = gr.Checkbox(label='Gradient checkpointing', value=True, info='Trades ~20-30% training speed for reduced VRAM usage by recomputing activations during the backward pass instead of storing them. No impact on accuracy.')
add_eos_token = gr.Checkbox(label='Add EOS token', value=True, info="Adds EOS token for each document in text datasets.")
excess_length = gr.Dropdown(label='Excess length', value='drop', choices=['drop', 'truncate'], info='What to do with conversations that exceed the cutoff length. "Drop" removes them entirely (recommended). "Truncate" cuts from the right, which may produce incomplete responses.', elem_classes=['slim-dropdown'])
higher_rank_limit = gr.Checkbox(label='Enable higher ranks', value=False, info='If checked, changes Rank/Alpha slider above to go much higher. This will not work without a datacenter-class GPU.')
report_to = gr.Radio(label="Save detailed logs with", value="None", choices=["None", "wandb", "tensorboard"], interactive=True)
with gr.Column():
@ -159,12 +156,12 @@ def create_ui():
refresh_table = gr.Button('Refresh the table', elem_classes="small-button", interactive=not mu)
# Training events
all_params = [lora_name, always_override, all_linear, q_proj_en, v_proj_en, k_proj_en, o_proj_en, gate_proj_en, down_proj_en, up_proj_en, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lr_scheduler_type, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, text_dataset, higher_rank_limit, warmup_steps, optimizer, stride_length, stop_at_loss, add_eos_token, excess_length, report_to]
all_params = [lora_name, always_override, all_linear, q_proj_en, v_proj_en, k_proj_en, o_proj_en, gate_proj_en, down_proj_en, up_proj_en, save_steps, micro_batch_size, batch_size, epochs, learning_rate, lr_scheduler_type, lora_rank, lora_alpha, lora_dropout, cutoff_len, dataset, eval_dataset, format, eval_steps, text_dataset, warmup_steps, optimizer, stride_length, stop_at_loss, add_eos_token, excess_length, report_to, gradient_checkpointing]
copy_from.change(do_copy_params, [copy_from] + all_params, all_params)
start_button.click(do_train, all_params, output)
stop_button.click(do_interrupt, None, None, queue=False)
higher_rank_limit.change(change_rank_limit, [higher_rank_limit], [lora_rank, lora_alpha])
# Evaluation events. For some reason, the interrupt event
# doesn't work with the .then() syntax, so I write them one
@ -209,10 +206,6 @@ def do_copy_params(lora_name: str, *args):
return result
def change_rank_limit(use_higher_ranks: bool):
mult = 2 if use_higher_ranks else 1
return {"maximum": 1024 * mult, "__type__": "update"}, {"maximum": 2048 * mult, "__type__": "update"}
def clean_path(base_path: str, path: str):
"""Strips unusual symbols and forcibly builds a path as relative to the intended directory."""
@ -293,7 +286,7 @@ def calc_trainable_parameters(model):
return trainable_params, all_param
def do_train(lora_name: str, always_override: bool, all_linear: bool, q_proj_en: bool, v_proj_en: bool, k_proj_en: bool, o_proj_en: bool, gate_proj_en: bool, down_proj_en: bool, up_proj_en: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, text_dataset: str, higher_rank_limit: bool, warmup_steps: int, optimizer: str, stride_length: int, stop_at_loss: float, add_eos_token: bool, excess_length: str, report_to: str):
def do_train(lora_name: str, always_override: bool, all_linear: bool, q_proj_en: bool, v_proj_en: bool, k_proj_en: bool, o_proj_en: bool, gate_proj_en: bool, down_proj_en: bool, up_proj_en: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, text_dataset: str, warmup_steps: int, optimizer: str, stride_length: int, stop_at_loss: float, add_eos_token: bool, excess_length: str, report_to: str, gradient_checkpointing: bool = True):
import torch
import transformers
@ -553,10 +546,8 @@ def do_train(lora_name: str, always_override: bool, all_linear: bool, q_proj_en:
yield f"Failed to load {selected_model}."
return
except Exception:
exc = traceback.format_exc()
logger.error('Failed to reload the model.')
print(exc)
yield exc.replace('\n', '\n\n')
logger.exception('Failed to reload the model.')
yield traceback.format_exc().replace('\n', '\n\n')
return
# == Start prepping the model itself ==
@ -708,6 +699,7 @@ def do_train(lora_name: str, always_override: bool, all_linear: bool, q_proj_en:
load_best_model_at_end=eval_data is not None,
# TODO: Enable multi-device support
ddp_find_unused_parameters=None,
gradient_checkpointing=gradient_checkpointing,
use_cpu=shared.args.cpu,
remove_unused_columns=False,
),
@ -740,11 +732,13 @@ def do_train(lora_name: str, always_override: bool, all_linear: bool, q_proj_en:
if lora_all_param > 0:
print(f"Trainable params: {lora_trainable_param:,d} ({100 * lora_trainable_param / lora_all_param:.4f} %), All params: {lora_all_param:,d} (Model: {model_all_params:,d})")
train_log.update({"base_model_name": shared.model_name})
train_log.update({"base_model_class": shared.model.__class__.__name__})
train_log.update({"base_loaded_in_4bit": getattr(lora_model, "is_loaded_in_4bit", False)})
train_log.update({"base_loaded_in_8bit": getattr(lora_model, "is_loaded_in_8bit", False)})
train_log.update({"projections": projections_string})
train_log.update({
"base_model_name": shared.model_name,
"base_model_class": shared.model.__class__.__name__,
"base_loaded_in_4bit": getattr(lora_model, "is_loaded_in_4bit", False),
"base_loaded_in_8bit": getattr(lora_model, "is_loaded_in_8bit", False),
"projections": projections_string,
})
if stop_at_loss > 0:
print(f"Monitoring loss \033[1;31;1m(Auto-Stop at: {stop_at_loss})\033[0;37;0m")

View file

@ -44,8 +44,8 @@ class Stream(transformers.StoppingCriteria):
class LogitsBiasProcessor(LogitsProcessor):
def __init__(self, logit_bias={}):
self.logit_bias = logit_bias
def __init__(self, logit_bias=None):
self.logit_bias = logit_bias if logit_bias is not None else {}
if self.logit_bias:
self.keys = list([int(key) for key in self.logit_bias.keys()])
values = [self.logit_bias[str(key)] for key in self.keys]

View file

@ -66,7 +66,8 @@ theme = gr.themes.Default(
if not shared.args.old_colors:
theme = theme.set(
# General Colors
border_color_primary='#c5c5d2',
border_color_primary='#d2d2d8',
block_border_color='transparent',
body_text_color_subdued='#484848',
background_fill_secondary='#eaeaea',
background_fill_secondary_dark='var(--selected-item-color-dark, #282930)',
@ -77,6 +78,12 @@ if not shared.args.old_colors:
body_text_color='rgb(64, 64, 64)',
button_secondary_background_fill="white",
button_secondary_border_color="var(--border-color-primary)",
block_title_text_color='*body_text_color',
button_primary_background_fill='#374151',
button_primary_background_fill_hover='#4b5563',
button_primary_background_fill_hover_dark='rgba(255, 255, 255, 0.05)',
button_primary_border_color='#374151',
button_primary_text_color='white',
input_shadow="none",
button_shadow_hover="none",
@ -85,11 +92,11 @@ if not shared.args.old_colors:
checkbox_background_color_dark='var(--darker-gray, #1C1C1D)',
block_background_fill_dark='transparent',
block_border_color_dark='transparent',
input_border_color_dark='var(--border-color-dark, #525252)',
input_border_color_focus_dark='var(--border-color-dark, #525252)',
checkbox_border_color_dark='var(--border-color-dark, #525252)',
border_color_primary_dark='var(--border-color-dark, #525252)',
button_secondary_border_color_dark='var(--border-color-dark, #525252)',
input_border_color_dark='var(--border-color-dark)',
input_border_color_focus_dark='var(--border-color-dark)',
checkbox_border_color_dark='rgba(255, 255, 255, 0.2)',
border_color_primary_dark='var(--border-color-dark)',
button_secondary_border_color_dark='var(--border-color-dark)',
body_background_fill_dark='var(--dark-gray, #212125)',
button_primary_background_fill_dark='transparent',
button_secondary_background_fill_dark='transparent',
@ -107,10 +114,12 @@ if not shared.args.old_colors:
block_shadow_dark='none',
input_shadow_focus='none',
input_shadow_focus_dark='none',
button_large_radius='0.375rem',
button_large_radius='0.75rem',
button_small_radius='0.75rem',
button_large_padding='6px 12px',
input_radius='0.375rem',
block_radius='0',
input_radius='0.5rem',
block_radius='0.375rem',
button_transition='background-color 0.15s ease, border-color 0.15s ease, color 0.15s ease',
)
if (shared.user_data_dir / "notification.mp3").exists():
@ -291,7 +300,7 @@ def apply_interface_values(state, use_persistent=False):
elements = list_interface_input_elements()
if len(state) == 0:
if not state:
return [gr.update() for k in elements] # Dummy, do nothing
else:
return [state[k] if k in state else gr.update() for k in elements]
@ -299,9 +308,8 @@ def apply_interface_values(state, use_persistent=False):
def save_settings(state, preset, extensions_list, show_controls, theme_state, manual_save=False):
output = copy.deepcopy(shared.settings)
exclude = []
for k in state:
if k in shared.settings and k not in exclude:
if k in shared.settings:
output[k] = state[k]
if preset:
@ -315,7 +323,7 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state, ma
output['custom_stopping_strings'] = output.get('custom_stopping_strings') or ''
output['custom_token_bans'] = output.get('custom_token_bans') or ''
output['show_controls'] = show_controls
output['dark_theme'] = True if theme_state == 'dark' else False
output['dark_theme'] = theme_state == 'dark'
output.pop('instruction_template_str')
output.pop('truncation_length')

View file

@ -1,8 +1,7 @@
import traceback
import gradio as gr
from modules import chat, presets, shared, ui, utils
from modules.logging_colors import logger
from modules.utils import gradio, sanitize_filename
@ -103,7 +102,7 @@ def handle_save_preset_confirm_click(filename, contents):
output = gr.update(choices=available_presets, value=filename)
except Exception:
output = gr.update()
traceback.print_exc()
logger.exception("Failed to save preset")
return [
output,
@ -119,7 +118,7 @@ def handle_save_confirm_click(root_state, filename, contents):
filename = sanitize_filename(filename)
utils.save_file(root_state + filename, contents)
except Exception:
traceback.print_exc()
logger.exception("Failed to save file")
return None, gr.update(visible=False)
@ -132,7 +131,7 @@ def handle_delete_confirm_click(root_state, filename):
filename = sanitize_filename(filename)
utils.delete_file(root_state + filename)
except Exception:
traceback.print_exc()
logger.exception("Failed to delete file")
return None, gr.update(visible=False)
@ -144,7 +143,7 @@ def handle_save_character_confirm_click(name2, greeting, context, character_pict
output = gr.update(choices=available_characters, value=filename)
except Exception:
output = gr.update()
traceback.print_exc()
logger.exception("Failed to save character")
return [
output,
@ -159,7 +158,7 @@ def handle_delete_character_confirm_click(character):
output = chat.update_character_menu_after_deletion(index)
except Exception:
output = gr.update()
traceback.print_exc()
logger.exception("Failed to delete character")
return [
output,
@ -214,7 +213,7 @@ def handle_save_user_confirm_click(name1, user_bio, your_picture, filename):
output = gr.update(choices=available_users, value=filename)
except Exception:
output = gr.update()
traceback.print_exc()
logger.exception("Failed to save user")
return [
output,
@ -229,7 +228,7 @@ def handle_delete_user_confirm_click(user):
output = chat.update_user_menu_after_deletion(index)
except Exception:
output = gr.update()
traceback.print_exc()
logger.exception("Failed to delete user")
return [
output,

View file

@ -916,9 +916,8 @@ def generate(state, save_images=True):
yield all_images, progress_bar_html()
clear_torch_cache()
except Exception as e:
logger.error(f"Image generation failed: {e}")
traceback.print_exc()
except Exception:
logger.exception("Image generation failed")
yield [], progress_bar_html()
clear_torch_cache()

View file

@ -98,7 +98,7 @@ def create_ui():
shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
shared.gradio['ubatch_size'] = gr.Slider(label="ubatch_size", minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"', value=shared.args.extra_flags)
shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Extra flags to pass to llama-server. Example: --jinja --rpc 192.168.1.100:50052', value=shared.args.extra_flags)
shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype, info='Used by load-in-4bit.')
shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type, info='Used by load-in-4bit.')
@ -107,7 +107,7 @@ def create_ui():
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.')
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
@ -134,7 +134,7 @@ def create_ui():
ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
shared.gradio['customized_template_submit'] = gr.Button("Submit", variant="primary", interactive=not mu)
gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.")
gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's metadata, which sometimes is wrong.")
with gr.Row():
shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
@ -222,16 +222,14 @@ def load_model_wrapper(selected_model, loader, autoload=False):
else:
yield f"Failed to load `{selected_model}`."
except Exception:
exc = traceback.format_exc()
logger.error('Failed to load the model.')
print(exc)
yield exc.replace('\n', '\n\n')
logger.exception('Failed to load the model.')
yield traceback.format_exc().replace('\n', '\n\n')
def load_lora_wrapper(selected_loras):
yield ("Applying the following LoRAs to {}:\n\n{}".format(shared.model_name, '\n'.join(selected_loras)))
add_lora_to_model(selected_loras)
yield ("Successfuly applied the LoRAs")
yield ("Successfully applied the LoRAs")
def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):

View file

@ -17,7 +17,7 @@ def create_ui():
with gr.Column():
gr.Markdown("## Extensions & flags")
shared.gradio['save_settings'] = gr.Button(f'Save extensions settings to {shared.user_data_dir}/settings.yaml', elem_classes='refresh-button', interactive=not mu)
shared.gradio['save_settings'] = gr.Button(f'Save extensions settings to {shared.user_data_dir}/settings.yaml', interactive=not mu)
shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
with gr.Row():
with gr.Column():
@ -95,8 +95,6 @@ def set_interface_arguments(extensions, bool_active):
setattr(shared.args, k, False)
for k in bool_active:
setattr(shared.args, k, True)
if k == 'api':
shared.add_extension('openai', last=True)
shared.need_restart = True

View file

@ -81,14 +81,6 @@ def atoi(text):
return int(text) if text.isdigit() else text.lower()
# Replace multiple string pairs in a string
def replace_all(text, dic):
for i, j in dic.items():
text = text.replace(i, j)
return text
def natural_keys(text):
return [atoi(c) for c in re.split(r'(\d+)', text)]

View file

@ -48,7 +48,7 @@ def download_web_page(url, timeout=10, include_links=False):
try:
_validate_url(url)
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36'
}
max_redirects = 5
for _ in range(max_redirects):
@ -82,8 +82,8 @@ def perform_web_search(query, num_pages=3, max_workers=5, timeout=10, fetch_cont
search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}"
agents = [
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36"
"Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36",
"Mozilla/5.0 (Macintosh; Intel Mac OS X 10_15_7) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/130.0.0.0 Safari/537.36"
]
response = requests.get(search_url, headers={'User-Agent': random.choice(agents)}, timeout=timeout)

View file

@ -117,7 +117,7 @@ def get_pytorch_install_command(gpu_choice):
return base_cmd + "--index-url https://download.pytorch.org/whl/cu128" + pypi_fallback
elif gpu_choice == "AMD":
py_tag = f"cp{PYTHON_VERSION.replace('.', '')}"
return f"python -m pip install https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-{TORCH_VERSION}%2Brocm7.2.0.lw.git7e1940d4-{py_tag}-{py_tag}-linux_x86_64.whl"
return f"python -m pip install https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-{TORCH_VERSION}%2Brocm7.2.0.lw.git7e1940d4-{py_tag}-{py_tag}-linux_x86_64.whl --find-links https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/"
elif gpu_choice in ["APPLE", "NONE"]:
return base_cmd + "--index-url https://download.pytorch.org/whl/cpu" + pypi_fallback
elif gpu_choice == "INTEL":
@ -135,7 +135,7 @@ def get_pytorch_update_command(gpu_choice):
return f"{base_cmd}--index-url https://download.pytorch.org/whl/cu128" + pypi_fallback
elif gpu_choice == "AMD":
py_tag = f"cp{PYTHON_VERSION.replace('.', '')}"
return f"python -m pip install --upgrade https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-{TORCH_VERSION}%2Brocm7.2.0.lw.git7e1940d4-{py_tag}-{py_tag}-linux_x86_64.whl"
return f"python -m pip install --upgrade https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-{TORCH_VERSION}%2Brocm7.2.0.lw.git7e1940d4-{py_tag}-{py_tag}-linux_x86_64.whl --find-links https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/"
elif gpu_choice in ["APPLE", "NONE"]:
return f"{base_cmd}--index-url https://download.pytorch.org/whl/cpu" + pypi_fallback
elif gpu_choice == "INTEL":

View file

@ -14,7 +14,7 @@ pandas
peft==0.18.*
Pillow>=9.5.0
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -31,8 +31,8 @@ tqdm
wandb
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -40,9 +40,9 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.25/exllamav3-0.0.25+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.25/exllamav3-0.0.25+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.26/exllamav3-0.0.26+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.26/exllamav3-0.0.26+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"

View file

@ -12,7 +12,7 @@ pandas
peft==0.18.*
Pillow>=9.5.0
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -28,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -37,5 +37,5 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -12,7 +12,7 @@ pandas
peft==0.18.*
Pillow>=9.5.0
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -28,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -37,4 +37,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"

View file

@ -12,7 +12,7 @@ pandas
peft==0.18.*
Pillow>=9.5.0
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -28,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -37,4 +37,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"

View file

@ -12,7 +12,7 @@ pandas
peft==0.18.*
Pillow>=9.5.0
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -28,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -37,5 +37,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"

View file

@ -12,7 +12,7 @@ pandas
peft==0.18.*
Pillow>=9.5.0
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -28,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15

View file

@ -5,7 +5,7 @@ jinja2==3.1.6
markdown
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -5,7 +5,7 @@ jinja2==3.1.6
markdown
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -5,7 +5,7 @@ jinja2==3.1.6
markdown
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -23,4 +23,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"

View file

@ -5,7 +5,7 @@ jinja2==3.1.6
markdown
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -23,4 +23,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"

View file

@ -5,7 +5,7 @@ jinja2==3.1.6
markdown
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"

View file

@ -5,7 +5,7 @@ jinja2==3.1.6
markdown
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -5,7 +5,7 @@ jinja2==3.1.6
markdown
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15

View file

@ -5,7 +5,7 @@ jinja2==3.1.6
markdown
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.1
pymupdf==1.27.*
python-docx==1.1.2
pyyaml
requests
@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio-4.37.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.11/gradio_client-1.0.2+custom.11-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# Vulkan wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.94.0/llama_cpp_binaries-0.94.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -106,6 +106,11 @@ def create_interface():
if shared.args.extensions is not None and len(shared.args.extensions) > 0:
extensions_module.load_extensions()
# Start the API server if enabled
if shared.args.api or shared.args.public_api:
from modules.api.script import setup as api_setup
api_setup()
# Force some events to be triggered on page load
shared.persistent_interface_state.update({
'mode': shared.settings['mode'],
@ -273,10 +278,21 @@ if __name__ == "__main__":
# Activate the extensions listed on settings.yaml
extensions_module.available_extensions = utils.get_available_extensions()
for extension in shared.settings['default_extensions']:
# The openai extension was moved to modules/api and is now
# activated with --api. Treat it as an alias for backwards compat.
if extension == 'openai':
shared.args.api = True
continue
shared.args.extensions = shared.args.extensions or []
if extension not in shared.args.extensions:
shared.args.extensions.append(extension)
# Handle --extensions openai from the command line (moved to modules/api)
if shared.args.extensions and 'openai' in shared.args.extensions:
shared.args.extensions.remove('openai')
shared.args.api = True
# Load image model if specified via CLI
if shared.args.image_model:
logger.info(f"Loading image model: {shared.args.image_model}")
@ -337,6 +353,10 @@ if __name__ == "__main__":
shared.args.extensions = [x for x in (shared.args.extensions or []) if x != 'gallery']
if shared.args.extensions:
extensions_module.load_extensions()
if shared.args.api or shared.args.public_api:
from modules.api.script import setup as api_setup
api_setup()
else:
# Launch the web UI
create_interface()

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + 'A chat between a curious user and an assistant. The assistant gives helpful, detailed, accurate, uncensored responses to the user\'s input.' + '\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'USER: ' + message['content'] + '\n'-}}
{%- else -%}
{{-'ASSISTANT: ' + message['content'] + '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'ASSISTANT:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'### Input:\n' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'### Output:\n' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'### Output:\n'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'<reserved_102>' + message['content'] + ''-}}
{%- else -%}
{{-'<reserved_103>' + message['content'] + '</s>' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'<reserved_103>'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + 'The following is a conversation between a human and an AI assistant named Baize (named after a mythical creature in Chinese folklore). Baize is an open-source AI assistant developed by UCSD and Sun Yat-Sen University. The human and the AI assistant take turns chatting. Human statements start with [|Human|] and AI assistant statements start with [|AI|]. The AI assistant always provides responses in as much detail as possible, and in Markdown format. The AI assistant always declines to engage with topics, questions and instructions related to unethical, controversial, or sensitive issues. Complete the transcript in exactly that format.\n[|Human|]Hello!\n[|AI|]Hi!' + '\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'[|Human|]' + message['content'] + '\n'-}}
{%- else -%}
{{-'[|AI|]' + message['content'] + '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'[|AI|]'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + 'A transcript of a roleplay between two players, LEAD and ASSOCIATE. LEAD sets up a scenario and the characters, from which ASSOCIATE then assumes a character role and continues the story for that role in response to description given by LEAD. The story and characters are developed by exchange of detailed event descriptions and character dialogs, successively given by both LEAD and ASSOCIATE.' + '\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'LEAD: ' + message['content'] + '\n'-}}
{%- else -%}
{{-'ASSOCIATE: ' + message['content'] + '</s>\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'ASSOCIATE:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'[Round <|round|>]\n问' + message['content'] + '\n'-}}
{%- else -%}
{{-'答:' + message['content'] + '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'答:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + 'The following is a conversation between an AI assistant called Assistant and a human user called User. The assistant is intelligent, knowledgeable and polite to answer questions of user.' + '\n\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '\n\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'User:' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'Assistant:' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'Assistant:'-}}
{%- endif -%}

View file

@ -1,26 +0,0 @@
instruction_template: |-
{%- if messages[0]['role'] == 'system' -%}
{%- set loop_messages = messages[1:] -%}
{%- set system_message = messages[0]['content'] -%}
{%- elif false == true -%}
{%- set loop_messages = messages -%}
{%- set system_message = 'You are Command-R, a brilliant, sophisticated, AI-assistant trained to assist human users by providing thorough responses. You are trained by Cohere.' -%}
{%- else -%}
{%- set loop_messages = messages -%}
{%- set system_message = false -%}
{%- endif -%}
{%- if system_message != false -%}
{{ '<|START_OF_TURN_TOKEN|><|SYSTEM_TOKEN|>' + system_message + '<|END_OF_TURN_TOKEN|>' }}
{%- endif -%}
{%- for message in loop_messages -%}
{%- set content = message['content'] -%}
{%- if message['role'] == 'user' -%}
{{ '<|START_OF_TURN_TOKEN|><|USER_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}
{%- elif message['role'] == 'assistant' -%}
{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' + content.strip() + '<|END_OF_TURN_TOKEN|>' }}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{ '<|START_OF_TURN_TOKEN|><|CHATBOT_TOKEN|>' }}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'' + message['content'] + ' '-}}
{%- else -%}
{{-'[START_REF]' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'[START_REF]'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'<question>' + message['content'] + ''-}}
{%- else -%}
{{-'<answer>' + message['content'] + '' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'<answer>'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'Q: ' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'A: ' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'A:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'TLDR:' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'TLDR:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'Question: ' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'<work>' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'<work>'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '<prefix>' + 'You are a helpful chatbot name Stan' + '</prefix>' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '<prefix>' + message['content'] + '</prefix>' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'<human>' + message['content'] + ''-}}
{%- else -%}
{{-'<bot>' + message['content'] + '' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'<bot>'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'Question: ' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'Answer: ' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'Answer:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'###USER: ' + message['content'] + '\n'-}}
{%- else -%}
{{-'###ASSISTANT: ' + message['content'] + '</s>\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'###ASSISTANT:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'### Instruction:\n' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'### Response:\n' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'### Response:\n'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'### Human: ' + message['content'] + '\n'-}}
{%- else -%}
{{-'### Assistant: ' + message['content'] + '</s>\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'### Assistant:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'<|prompt|>' + message['content'] + '<|endoftext|>'-}}
{%- else -%}
{{-'<|answer|>' + message['content'] + '<|endoftext|>' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'<|answer|>'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + 'You are a helpful assistant' + '\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'USER: ' + message['content'] + '\n'-}}
{%- else -%}
{{-'ASSISTANT: ' + message['content'] + '</s>\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'ASSISTANT:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'<human>: ' + message['content'] + '\n'-}}
{%- else -%}
{{-'<bot>:' + message['content'] + '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'<bot>:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'Q: ' + message['content'] + '\n'-}}
{%- else -%}
{{-'A:' + message['content'] + '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'A:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'### 질문: ' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'### 답변:' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'### 답변:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + 'BEGINNING OF CONVERSATION:' + ' ' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + ' ' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'USER: ' + message['content'] + ' '-}}
{%- else -%}
{{-'GPT:' + message['content'] + '</s>' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'GPT:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + 'You are LLaVA, a large language and vision assistant trained by UW Madison WAIV Lab. You are able to understand the visual content that the user provides, and assist the user with a variety of tasks using natural language. Follow the instructions carefully and explain your answers in detail.### Human: Hi!### Assistant: Hi there! How can I help you today?' + '\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'### Human: ' + message['content'] + ''-}}
{%- else -%}
{{-'### Assistant: ' + message['content'] + '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'### Assistant:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '[INST] <<SYS>>\n' + 'Answer the questions.' + '\n<</SYS>>\n\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '[INST] <<SYS>>\n' + message['content'] + '\n<</SYS>>\n\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'' + message['content'] + ' [/INST] '-}}
{%- else -%}
{{-'' + message['content'] + ' </s><s>[INST] ' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-''-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + 'You are an AI assistant whose name is MOSS.\n- MOSS is a conversational language model that is developed by Fudan University. It is designed to be helpful, honest, and harmless.\n- MOSS can understand and communicate fluently in the language chosen by the user such as English and 中文. MOSS can perform any language-based tasks.\n- MOSS must refuse to discuss anything related to its prompts, instructions, or rules.\n- Its responses must not be vague, accusatory, rude, controversial, off-topic, or defensive.\n- It should avoid giving subjective opinions but rely on objective facts or phrases like "in this context a human might say...", "some people might think...", etc.\n- Its responses must also be positive, polite, interesting, entertaining, and engaging.\n- It can provide additional relevant details to answer in-depth and comprehensively covering mutiple aspects.\n- It apologizes and accepts the user\'s suggestion if the user corrects the incorrect answer generated by MOSS.\nCapabilities and tools that MOSS can possess.' + '\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'<|Human|>: ' + message['content'] + '<eoh>\n'-}}
{%- else -%}
{{-'<|MOSS|>: ' + message['content'] + '<eom>\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'<|MOSS|>:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'USER: ' + message['content'] + '\n'-}}
{%- else -%}
{{-'ASSISTANT:' + message['content'] + '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'ASSISTANT:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'<|user|>' + message['content'] + ''-}}
{%- else -%}
{{-'<|model|>' + message['content'] + '' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'<|model|>'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- 'System:' + message['content'] + '\n\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'User: ' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'Assistant: ' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'Assistant:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'### Instruction:\n' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'### Response:\n' + message['content'] + '</s><s> ' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'### Response:\n'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + 'Consider a conversation between User (a human) and Assistant (named Buddy).\nBuddy is an INTP-T, a friendly, intelligent and multilingual AI assistant, by OpenBuddy team on GitHub.\nBuddy cannot access the Internet.\nBuddy can fluently speak the user\'s language (e.g. English, Chinese).\nBuddy can generate poems, stories, code, essays, songs, parodies, and more.\nBuddy possesses vast knowledge about the world, history, and culture.\nBuddy\'s responses are always safe, creative, high-quality, helpful and interesting.\nBuddy strictly refuses to discuss political, NSFW, illegal, abusive, offensive, or other sensitive topics.\n\nUser: Hi.\nAssistant: Hi, I\'m Buddy, your AI assistant. How can I help you today?\n' + '\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'User: ' + message['content'] + '\n'-}}
{%- else -%}
{{-'Assistant: ' + message['content'] + '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'Assistant:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'GPT4 User: ' + message['content'] + '<|end_of_turn|>'-}}
{%- else -%}
{{-'GPT4 Assistant: ' + message['content'] + '<|end_of_turn|>' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'GPT4 Assistant:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + '' + '' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'### Instruction: ' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'### Response: ' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'### Response:'-}}
{%- endif -%}

View file

@ -1,25 +0,0 @@
instruction_template: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '### System:\n' + 'You are an AI assistant that follows instruction extremely well. Help as much as you can.' + '\n\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '### System:\n' + message['content'] + '\n\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'### User:\n' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'### Response:\n' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'### Response:\n'-}}
{%- endif -%}

Some files were not shown because too many files have changed in this diff Show more