Compare commits

..

No commits in common. "main" and "v3.16" have entirely different histories.
main ... v3.16

49 changed files with 242 additions and 2175 deletions

View file

@ -41,13 +41,6 @@ jobs:
version: ${{ inputs.version }}
config: 'os:ubuntu-22.04'
build_release_rocm_linux:
name: ROCm Linux
uses: ./.github/workflows/build-portable-release-rocm.yml
with:
version: ${{ inputs.version }}
config: 'os:ubuntu-22.04'
build_release_cpu_windows:
name: CPU Windows
uses: ./.github/workflows/build-portable-release.yml

View file

@ -60,7 +60,7 @@ jobs:
'os' = @('ubuntu-22.04', 'windows-2022')
'pyver' = @("3.11")
'avx' = @("AVX2")
'cuda' = @("12.4")
'cuda' = @("11.7", "12.4")
}
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
@ -147,13 +147,22 @@ jobs:
# Create CUDA-specific requirements file if needed
cd "text-generation-webui-${VERSION_CLEAN}"
REQ_FILE="$BASE_REQ_FILE"
if [[ "$CUDA_VERSION" == "11.7" ]]; then
echo "Creating CUDA 11.7 specific requirements file"
sed 's/cu124/cu117/g' "$BASE_REQ_FILE" > requirements_cuda_temp.txt
REQ_FILE="requirements_cuda_temp.txt"
else
REQ_FILE="$BASE_REQ_FILE"
fi
# 4. Install packages
echo "Installing Python packages from $REQ_FILE..."
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
# 5. Clean up
if [[ "$CUDA_VERSION" == "11.7" ]]; then
rm requirements_cuda_temp.txt
fi
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
# 6. Create ZIP file

View file

@ -1,165 +0,0 @@
name: Build ROCm
on:
workflow_dispatch:
inputs:
version:
description: 'Version tag of text-generation-webui to build: v3.0'
default: 'v3.0'
required: true
type: string
config:
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string
workflow_call:
inputs:
version:
description: 'Version tag of text-generation-webui to build: v3.0'
default: 'v3.0'
required: true
type: string
config:
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string
permissions:
contents: write
jobs:
define_matrix:
name: Define Build Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
defaults:
run:
shell: pwsh
env:
CONFIGIN: ${{ inputs.config }}
EXCLUDEIN: ${{ inputs.exclude }}
steps:
- name: Define Job Output
id: set-matrix
run: |
$matrix = @{
'os' = @('ubuntu-22.04')
'pyver' = @("3.11")
'avx' = @("AVX2")
}
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
if ($env:EXCLUDEIN -ne 'None') {
$exclusions = @()
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
$matrix['exclude'] = $exclusions
}
$matrixOut = ConvertTo-Json $matrix -Compress
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
build_wheels:
name: ${{ matrix.os }} ${{ matrix.pyver }} CPU ${{ matrix.avx }}
needs: define_matrix
runs-on: ${{ matrix.os }}
strategy:
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
defaults:
run:
shell: pwsh
env:
AVXVER: ${{ matrix.avx }}
PCKGVER: ${{ inputs.version }}
steps:
- uses: actions/checkout@v4
with:
repository: 'oobabooga/text-generation-webui'
ref: ${{ inputs.version }}
submodules: 'recursive'
- uses: actions/setup-python@v4
with:
python-version: ${{ matrix.pyver }}
- name: Build Package
shell: bash
run: |
VERSION_CLEAN="${{ inputs.version }}"
VERSION_CLEAN="${VERSION_CLEAN#v}"
cd ..
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables
AVX_SUPPORT="${{ matrix.avx }}"
VERSION="${{ inputs.version }}"
# 1. Set platform-specific variables (Linux only for ROCm)
PLATFORM="linux"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20250409/cpython-3.11.12+20250409-x86_64-unknown-linux-gnu-install_only.tar.gz"
PIP_PATH="portable_env/bin/python -m pip"
PACKAGES_PATH="portable_env/lib/python3.11/site-packages"
rm start_macos.sh start_windows.bat
# 2. Download and extract Python
cd ..
echo "Downloading Python for $PLATFORM..."
curl -L -o python-build.tar.gz "$PYTHON_URL"
tar -xzf python-build.tar.gz
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
# 3. Prepare requirements file based on AVX
if [[ "$AVX_SUPPORT" == "AVX2" ]]; then
BASE_REQ_FILE="requirements/portable/requirements_amd.txt"
else
BASE_REQ_FILE="requirements/portable/requirements_amd_noavx2.txt"
fi
REQ_FILE="$BASE_REQ_FILE"
cd "text-generation-webui-${VERSION_CLEAN}"
# 4. Install packages
echo "Installing Python packages from $REQ_FILE..."
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
# 5. Clean up
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
# 6. Create ZIP file
cd ..
ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm.zip"
echo "Creating archive: $ZIP_NAME"
zip -r "$ZIP_NAME" "text-generation-webui-${VERSION_CLEAN}"
- name: Upload files to a GitHub release
id: upload-release
uses: svenstaro/upload-release-action@2.7.0
continue-on-error: true
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: ../textgen-portable-*.zip
tag: ${{ inputs.version }}
file_glob: true
make_latest: false
overwrite: true

View file

@ -57,7 +57,7 @@ jobs:
id: set-matrix
run: |
$matrix = @{
'os' = @('ubuntu-22.04', 'windows-2022', 'macos-14')
'os' = @('ubuntu-22.04', 'windows-2022', 'macos-13', 'macos-14')
'pyver' = @("3.11")
'avx' = @("AVX2")
}

View file

@ -21,10 +21,6 @@ A Gradio web UI for Large Language Models.
|:---:|:---:|
|![Image1](https://github.com/oobabooga/screenshots/raw/main/DEFAULT-3.5.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/PARAMETERS-3.5.png) |
## 🔥 News
- The project now supports **image generation**! Including Z-Image-Turbo, 4bit/8bit quantization, `torch.compile`, and LLM-generated prompt variations ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)).
## Features
- Supports multiple local text generation backends, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)).
@ -32,7 +28,6 @@ A Gradio web UI for Large Language Models.
- 100% offline and private, with zero telemetry, external resources, or remote update requests.
- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
- **Vision (multimodal models)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)).
- **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
- Aesthetic UI with dark and light themes.
- Syntax highlighting for code blocks and LaTeX rendering for mathematical expressions.
@ -437,7 +432,6 @@ https://colab.research.google.com/github/oobabooga/text-generation-webui/blob/ma
https://www.reddit.com/r/Oobabooga/
## Acknowledgments
## Acknowledgment
- In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.
- This project was inspired by [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) and wouldn't exist without it.
In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.

View file

@ -93,11 +93,11 @@ ol li p, ul li p {
display: inline-block;
}
#notebook-parent-tab, #chat-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab, #image-ai-tab {
#notebook-parent-tab, #chat-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab {
border: 0;
}
#notebook-parent-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab, #image-ai-tab {
#notebook-parent-tab, #parameters, #chat-settings, #lora, #training-tab, #model-tab, #session-tab, #character-tab {
padding: 1rem;
}
@ -244,46 +244,37 @@ button {
font-size: 100% !important;
}
.pretty_scrollbar::-webkit-scrollbar,
#image-history-gallery > :nth-child(2)::-webkit-scrollbar {
.pretty_scrollbar::-webkit-scrollbar {
width: 8px;
height: 8px;
}
.pretty_scrollbar::-webkit-scrollbar-track,
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-track {
.pretty_scrollbar::-webkit-scrollbar-track {
background: transparent;
}
.pretty_scrollbar::-webkit-scrollbar-thumb,
.pretty_scrollbar::-webkit-scrollbar-thumb:hover,
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
.pretty_scrollbar::-webkit-scrollbar-thumb:hover {
background: var(--neutral-300);
border-radius: 30px;
}
.dark .pretty_scrollbar::-webkit-scrollbar-thumb,
.dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover,
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
.dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
background: rgb(255 255 255 / 6.25%);
border-radius: 10px;
}
.pretty_scrollbar::-webkit-resizer,
#image-history-gallery > :nth-child(2)::-webkit-resizer {
.pretty_scrollbar::-webkit-resizer {
background: #c5c5d2;
}
.dark .pretty_scrollbar::-webkit-resizer,
.dark #image-history-gallery > :nth-child(2)::-webkit-resizer {
.dark .pretty_scrollbar::-webkit-resizer {
background: #ccc;
border-radius: 10px;
}
.pretty_scrollbar::-webkit-scrollbar-corner,
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-corner {
.pretty_scrollbar::-webkit-scrollbar-corner {
background: transparent;
}
@ -1683,117 +1674,3 @@ button:focus {
.dark .sidebar-vertical-separator {
border-bottom: 1px solid rgb(255 255 255 / 10%);
}
button#swap-height-width {
position: absolute;
top: -50px;
right: 0;
border: 0;
}
#image-output-gallery, #image-output-gallery > :nth-child(2) {
height: calc(100vh - 83px);
max-height: calc(100vh - 83px);
}
#image-history-gallery, #image-history-gallery > :nth-child(2) {
height: calc(100vh - 174px);
max-height: calc(100vh - 174px);
}
/* Additional CSS for the paginated image gallery */
/* Page info styling */
#image-page-info {
display: flex;
align-items: center;
justify-content: center;
min-width: 200px;
font-size: 0.9em;
color: var(--body-text-color-subdued);
}
/* Settings display panel */
#image-ai-tab .settings-display-panel {
background: var(--background-fill-secondary);
padding: 12px;
border-radius: 8px;
font-size: 0.9em;
max-height: 300px;
overflow-y: auto;
margin-top: 8px;
}
/* Gallery status message */
#image-ai-tab .gallery-status {
color: var(--color-accent);
font-size: 0.85em;
margin-top: 4px;
}
/* Pagination button row alignment */
#image-ai-tab .pagination-controls {
display: flex;
align-items: center;
gap: 8px;
flex-wrap: wrap;
}
/* Selected image preview container */
#image-ai-tab .selected-preview-container {
border: 1px solid var(--border-color-primary);
border-radius: 8px;
padding: 8px;
background: var(--background-fill-secondary);
}
/* Fix a gr.Markdown UI glitch when clicking Next in the
* Image AI > Gallery tab */
.min.svelte-1yrv54 {
min-height: 0;
}
/* Image Generation Progress Bar */
#image-progress .image-ai-separator {
height: 24px;
margin: 20px 0;
border-top: 1px solid var(--input-border-color);
}
#image-progress .image-ai-progress-wrapper {
height: 24px;
margin: 20px 0;
}
#image-progress .image-ai-progress-track {
background: #e5e7eb;
border-radius: 4px;
overflow: hidden;
height: 8px;
}
.dark #image-progress .image-ai-progress-track {
background: #333;
}
#image-progress .image-ai-progress-fill {
background: #4a9eff;
height: 100%;
}
#image-progress .image-ai-progress-text {
text-align: center;
font-size: 12px;
color: #666;
margin-top: 4px;
}
.dark #image-progress .image-ai-progress-text {
color: #888;
}
#llm-prompt-variations {
position: absolute;
top: 0;
left: calc(100% - 174px);
}

View file

@ -139,35 +139,6 @@ curl http://127.0.0.1:5000/v1/completions \
For base64-encoded images, just replace the inner "url" values with this format: `data:image/FORMAT;base64,BASE64_STRING` where FORMAT is the file type (png, jpeg, gif, etc.) and BASE64_STRING is your base64-encoded image data.
#### Image generation
```shell
curl http://127.0.0.1:5000/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "an orange tree",
"steps": 9,
"cfg_scale": 0,
"batch_size": 1,
"batch_count": 1
}'
```
You need to load an image model first. You can do this via the UI, or by adding `--image-model your_model_name` when launching the server.
The output is a JSON object containing a `data` array. Each element has a `b64_json` field with the base64-encoded PNG image:
```json
{
"created": 1764791227,
"data": [
{
"b64_json": "iVBORw0KGgo..."
}
]
}
```
#### SSE streaming
```shell
@ -448,6 +419,7 @@ The following environment variables can be used (they take precedence over every
| `OPENEDAI_CERT_PATH` | SSL certificate file path | cert.pem |
| `OPENEDAI_KEY_PATH` | SSL key file path | key.pem |
| `OPENEDAI_DEBUG` | Enable debugging (set to 1) | 1 |
| `SD_WEBUI_URL` | WebUI URL (used by endpoint) | http://127.0.0.1:7861 |
| `OPENEDAI_EMBEDDING_MODEL` | Embedding model (if applicable) | sentence-transformers/all-mpnet-base-v2 |
| `OPENEDAI_EMBEDDING_DEVICE` | Embedding device (if applicable) | cuda |
@ -458,6 +430,7 @@ You can also set the following variables in your `settings.yaml` file:
```
openai-embedding_device: cuda
openai-embedding_model: "sentence-transformers/all-mpnet-base-v2"
openai-sd_webui_url: http://127.0.0.1:7861
openai-debug: 1
```

View file

@ -1,98 +0,0 @@
# Image Generation Tutorial
This feature allows you to generate images using `diffusers` models like [Tongyi-MAI/Z-Image-Turbo](https://huggingface.co/Tongyi-MAI/Z-Image-Turbo) directly within the web UI.
<img alt="print" src="https://github.com/user-attachments/assets/5108de50-658b-4e93-b2ae-4656d076bc9d" />
## Installation
1. Clone the repository with
```
git clone https://github.com/oobabooga/text-generation-webui
```
or download it from [here](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) and unzip it.
2. Use the one-click installer.
- Windows: Double click on `start_windows.bat`
- Linux: Run `./start_linux.sh`
- macOS: Run `./start_macos.sh`
Note: Image generation does not work with the portable builds in `.zip` format in the [Releases page](https://github.com/oobabooga/text-generation-webui/releases). You need the "full" version of the web UI.
## Downloading a model
1. Once installation ends, browse to `http://127.0.0.1:7860/`.
2. Click on "Image AI" on the left.
3. Click on "Model" at the top.
4. In the "Download model" field, paste `https://huggingface.co/Tongyi-MAI/Z-Image-Turbo` and click "Download".
5. Wait for the download to finish (it's 31 GB).
## Loading the model
Select the quantization option in the "Quantization" menu and click "Load".
The memory usage for `Z-Image-Turbo` for each option is:
| Quantization Method | VRAM Usage |
| :--- | :--- |
| None (FP16/BF16) | 25613 MiB |
| bnb-8bit | 16301 MiB |
| bnb-8bit + CPU Offload | 16235 MiB |
| bnb-4bit | 11533 MiB |
| bnb-4bit + CPU Offload | 7677 MiB |
The `torchao` options support `torch.compile` for faster image generation, with `float8wo` specifically providing native hardware acceleration for RTX 40-series and newer GPUs.
Note: The next time you launch the web UI, the model will get automatically loaded with your last settings when you try to generate an image. You do not need to go to the Model tab and click "Load" each time.
## Generating images:
1. While still in the "Image AI" page, go to the "Generate" tab.
2. Type your prompt and click on the Generate button.
### Model-specific settings
- For Z-Image-Turbo, make sure to keep CFG Scale at 0 and Steps at 9. Do not write a Negative Prompt as it will get ignored with this CFG Scale value.
### LLM Prompt Variations
To use this feature, you need to load an LLM in the main "Model" page on the left.
If you have no idea what to use, do this to get started:
1. Download [Qwen3-4B-Q3_K_M.gguf](https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q3_K_M.gguf) to your `text-generation-webui/user_data/models` folder.
2. Select the model in the dropdown menu in the "Model" page.
3. Click Load.
Then go back to the "Image AI" page and check "LLM Prompt Variations".
After that, your prompts will be automatically updated by the LLM each time you generate an image. If you use a "Sequential Count" value greater than 1, a new prompt will be created for each sequential batch.
The improvement in creativity is striking (prompt: `Photo of a beautiful woman at night under moonlight`):
<img alt="comparison_collage" src="https://github.com/user-attachments/assets/67884832-2800-41cb-a146-e88e25af89c4" />
## Generating images over API
It is possible to generate images using the project's API. Just make sure to start the server with `--api`, either by
1. Passing the `--api` flag to your `start` script, like `./start_linux.sh --api`, or
2. Writing `--api` to your `user_data/CMD_FLAGS.txt` file and relaunching the web UI.
Here is an API call example:
```
curl http://127.0.0.1:5000/v1/images/generations \
-H "Content-Type: application/json" \
-d '{
"prompt": "an orange tree",
"steps": 9,
"cfg_scale": 0,
"batch_size": 1,
"batch_count": 1
}'
```

View file

@ -823,7 +823,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
lora_model = get_peft_model(shared.model, config)
if not always_override and Path(f"{lora_file_path}/adapter_model.bin").is_file():
logger.info("Loading existing LoRA data...")
state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin", weights_only=True)
state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin")
set_peft_model_state_dict(lora_model, state_dict_peft)
print(f" + Continue Training on {RED}{lora_file_path}/adapter_model.bin{RESET}")

View file

@ -1 +1 @@
coqui-tts>=0.27.0
coqui-tts==0.25.1

View file

@ -1,69 +1,70 @@
"""
OpenAI-compatible image generation using local diffusion models.
"""
import base64
import io
import os
import time
import requests
from extensions.openai.errors import ServiceUnavailableError
from modules import shared
def generations(request):
"""
Generate images using the loaded diffusion model.
Returns dict with 'created' timestamp and 'data' list of images.
"""
from modules.ui_image_generation import generate
def generations(prompt: str, size: str, response_format: str, n: int):
# Stable Diffusion callout wrapper for txt2img
# Low effort implementation for compatibility. With only "prompt" being passed and assuming DALL-E
# the results will be limited and likely poor. SD has hundreds of models and dozens of settings.
# If you want high quality tailored results you should just use the Stable Diffusion API directly.
# it's too general an API to try and shape the result with specific tags like negative prompts
# or "masterpiece", etc. SD configuration is beyond the scope of this API.
# At this point I will not add the edits and variations endpoints (ie. img2img) because they
# require changing the form data handling to accept multipart form data, also to properly support
# url return types will require file management and a web serving files... Perhaps later!
base_model_size = 512 if 'SD_BASE_MODEL_SIZE' not in os.environ else int(os.environ.get('SD_BASE_MODEL_SIZE', 512))
sd_defaults = {
'sampler_name': 'DPM++ 2M Karras', # vast improvement
'steps': 30,
}
if shared.image_model is None:
raise ServiceUnavailableError("No image model loaded. Load a model via the UI first.")
width, height = [int(x) for x in size.split('x')] # ignore the restrictions on size
width, height = request.get_width_height()
# to hack on better generation, edit default payload.
payload = {
'prompt': prompt, # ignore prompt limit of 1000 characters
'width': width,
'height': height,
'batch_size': n,
}
payload.update(sd_defaults)
# Build state dict: GenerationOptions fields + image-specific keys
state = request.model_dump()
state.update({
'image_model_menu': shared.image_model_name,
'image_prompt': request.prompt,
'image_neg_prompt': request.negative_prompt,
'image_width': width,
'image_height': height,
'image_steps': request.steps,
'image_seed': request.image_seed,
'image_batch_size': request.batch_size,
'image_batch_count': request.batch_count,
'image_cfg_scale': request.cfg_scale,
'image_llm_variations': False,
})
scale = min(width, height) / base_model_size
if scale >= 1.2:
# for better performance with the default size (1024), and larger res.
scaler = {
'width': width // scale,
'height': height // scale,
'hr_scale': scale,
'enable_hr': True,
'hr_upscaler': 'Latent',
'denoising_strength': 0.68,
}
payload.update(scaler)
# Exhaust generator, keep final result
images = []
for images, _ in generate(state, save_images=False):
pass
resp = {
'created': int(time.time()),
'data': []
}
from extensions.openai.script import params
if not images:
raise ServiceUnavailableError("Image generation failed or produced no images.")
# TODO: support SD_WEBUI_AUTH username:password pair.
sd_url = f"{os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', ''))}/sdapi/v1/txt2img"
# Build response
resp = {'created': int(time.time()), 'data': []}
for img in images:
b64 = _image_to_base64(img)
image_obj = {'revised_prompt': request.prompt}
if request.response_format == 'b64_json':
image_obj['b64_json'] = b64
response = requests.post(url=sd_url, json=payload)
r = response.json()
if response.status_code != 200 or 'images' not in r:
print(r)
raise ServiceUnavailableError(r.get('error', 'Unknown error calling Stable Diffusion'), code=response.status_code, internal_message=r.get('errors', None))
# r['parameters']...
for b64_json in r['images']:
if response_format == 'b64_json':
resp['data'].extend([{'b64_json': b64_json}])
else:
image_obj['url'] = f'data:image/png;base64,{b64}'
resp['data'].append(image_obj)
resp['data'].extend([{'url': f'data:image/png;base64,{b64_json}'}]) # yeah it's lazy. requests.get() will not work with this
return resp
def _image_to_base64(image) -> str:
buffered = io.BytesIO()
image.save(buffered, format="PNG")
return base64.b64encode(buffered.getvalue()).decode('utf-8')

View file

@ -17,8 +17,10 @@ from sse_starlette import EventSourceResponse
from starlette.concurrency import iterate_in_threadpool
import extensions.openai.completions as OAIcompletions
import extensions.openai.images as OAIimages
import extensions.openai.logits as OAIlogits
import extensions.openai.models as OAImodels
from extensions.openai.errors import ServiceUnavailableError
from extensions.openai.tokens import token_count, token_decode, token_encode
from extensions.openai.utils import _start_cloudflared
from modules import shared
@ -38,8 +40,6 @@ from .typing import (
EmbeddingsResponse,
EncodeRequest,
EncodeResponse,
ImageGenerationRequest,
ImageGenerationResponse,
LoadLorasRequest,
LoadModelRequest,
LogitsRequest,
@ -54,12 +54,12 @@ from .typing import (
params = {
'embedding_device': 'cpu',
'embedding_model': 'sentence-transformers/all-mpnet-base-v2',
'sd_webui_url': '',
'debug': 0
}
streaming_semaphore = asyncio.Semaphore(1)
image_generation_semaphore = asyncio.Semaphore(1)
def verify_api_key(authorization: str = Header(None)) -> None:
@ -228,13 +228,20 @@ async def handle_audio_transcription(request: Request):
return JSONResponse(content=transcription)
@app.post('/v1/images/generations', response_model=ImageGenerationResponse, dependencies=check_key)
async def handle_image_generation(request_data: ImageGenerationRequest):
import extensions.openai.images as OAIimages
@app.post('/v1/images/generations', dependencies=check_key)
async def handle_image_generation(request: Request):
async with image_generation_semaphore:
response = await asyncio.to_thread(OAIimages.generations, request_data)
return JSONResponse(response)
if not os.environ.get('SD_WEBUI_URL', params.get('sd_webui_url', '')):
raise ServiceUnavailableError("Stable Diffusion not available. SD_WEBUI_URL not set.")
body = await request.json()
prompt = body['prompt']
size = body.get('size', '1024x1024')
response_format = body.get('response_format', 'url') # or b64_json
n = body.get('n', 1) # ignore the batch limits of max 10
response = await OAIimages.generations(prompt=prompt, size=size, response_format=response_format, n=n)
return JSONResponse(response)
@app.post("/v1/embeddings", response_model=EmbeddingsResponse, dependencies=check_key)

View file

@ -130,7 +130,7 @@ class CompletionRequest(GenerationOptions, CompletionRequestParams):
class CompletionResponse(BaseModel):
id: str
choices: List[dict]
created: int = Field(default_factory=lambda: int(time.time()))
created: int = int(time.time())
model: str
object: str = "text_completion"
usage: dict
@ -178,7 +178,7 @@ class ChatCompletionRequest(GenerationOptions, ChatCompletionRequestParams):
class ChatCompletionResponse(BaseModel):
id: str
choices: List[dict]
created: int = Field(default_factory=lambda: int(time.time()))
created: int = int(time.time())
model: str
object: str = "chat.completion"
usage: dict
@ -264,42 +264,6 @@ class LoadLorasRequest(BaseModel):
lora_names: List[str]
class ImageGenerationRequest(BaseModel):
"""Image-specific parameters for generation."""
prompt: str
negative_prompt: str = ""
size: str = Field(default="1024x1024", description="'WIDTHxHEIGHT'")
steps: int = Field(default=9, ge=1)
cfg_scale: float = Field(default=0.0, ge=0.0)
image_seed: int = Field(default=-1, description="-1 for random")
batch_size: int | None = Field(default=None, ge=1, description="Parallel batch size (VRAM heavy)")
n: int = Field(default=1, ge=1, description="Alias for batch_size (OpenAI compatibility)")
batch_count: int = Field(default=1, ge=1, description="Sequential batch count")
# OpenAI compatibility (unused)
model: str | None = None
response_format: str = "b64_json"
user: str | None = None
@model_validator(mode='after')
def resolve_batch_size(self):
if self.batch_size is None:
self.batch_size = self.n
return self
def get_width_height(self) -> tuple[int, int]:
try:
parts = self.size.lower().split('x')
return int(parts[0]), int(parts[1])
except (ValueError, IndexError):
return 1024, 1024
class ImageGenerationResponse(BaseModel):
created: int = Field(default_factory=lambda: int(time.time()))
data: List[dict]
def to_json(obj):
return json.dumps(obj.__dict__, indent=4)

View file

@ -36,17 +36,3 @@ function switch_to_character() {
document.getElementById("character-tab-button").click();
scrollToTop();
}
function switch_to_image_ai_generate() {
const container = document.querySelector("#image-ai-tab");
const buttons = container.getElementsByTagName("button");
for (let i = 0; i < buttons.length; i++) {
if (buttons[i].textContent.trim() === "Generate") {
buttons[i].click();
break;
}
}
scrollToTop();
}

View file

@ -3,6 +3,7 @@ import copy
import functools
import html
import json
import os
import pprint
import re
import shutil
@ -25,7 +26,6 @@ from modules.html_generator import (
convert_to_markdown,
make_thumbnail
)
from modules.image_utils import open_image_safely
from modules.logging_colors import logger
from modules.text_generation import (
generate_reply,
@ -112,9 +112,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
add_generation_prompt=False,
enable_thinking=state['enable_thinking'],
reasoning_effort=state['reasoning_effort'],
thinking_budget=-1 if state.get('enable_thinking', True) else 0,
bos_token=shared.bos_token,
eos_token=shared.eos_token,
thinking_budget=-1 if state.get('enable_thinking', True) else 0
)
chat_renderer = partial(
@ -477,7 +475,7 @@ def get_stopping_strings(state):
if state['mode'] in ['instruct', 'chat-instruct']:
template = jinja_env.from_string(state['instruction_template_str'])
renderer = partial(template.render, add_generation_prompt=False, bos_token=shared.bos_token, eos_token=shared.eos_token)
renderer = partial(template.render, add_generation_prompt=False)
renderers.append(renderer)
if state['mode'] in ['chat']:
@ -1518,6 +1516,20 @@ def load_instruction_template_memoized(template):
return load_instruction_template(template)
def open_image_safely(path):
if path is None or not isinstance(path, str) or not Path(path).exists():
return None
if os.path.islink(path):
return None
try:
return Image.open(path)
except Exception as e:
logger.error(f"Failed to open image file: {path}. Reason: {e}")
return None
def upload_character(file, img_path, tavern=False):
img = open_image_safely(img_path)
decoded_file = file if isinstance(file, str) else file.decode('utf-8')

View file

@ -196,45 +196,50 @@ def extract_thinking_block(string):
return None, string
def build_thinking_block(thinking_content, message_id, has_remaining_content):
"""Build HTML for a thinking block."""
if thinking_content is None:
return None
# Process the thinking content through markdown
thinking_html = process_markdown_content(thinking_content)
# Generate unique ID for the thinking block
block_id = f"thinking-{message_id}-0"
# Check if thinking is complete or still in progress
is_streaming = not has_remaining_content
title_text = "Thinking..." if is_streaming else "Thought"
return f'''
<details class="thinking-block" data-block-id="{block_id}" data-streaming="{str(is_streaming).lower()}">
<summary class="thinking-header">
{info_svg_small}
<span class="thinking-title">{title_text}</span>
</summary>
<div class="thinking-content pretty_scrollbar">{thinking_html}</div>
</details>
'''
def build_main_content_block(content):
"""Build HTML for the main content block."""
if not content:
@functools.lru_cache(maxsize=None)
def convert_to_markdown(string, message_id=None):
if not string:
return ""
return process_markdown_content(content)
# Use a default message ID if none provided
if message_id is None:
message_id = "unknown"
# Extract thinking block if present
thinking_content, remaining_content = extract_thinking_block(string)
# Process the main content
html_output = process_markdown_content(remaining_content)
# If thinking content was found, process it using the same function
if thinking_content is not None:
thinking_html = process_markdown_content(thinking_content)
# Generate unique ID for the thinking block
block_id = f"thinking-{message_id}-0"
# Check if thinking is complete or still in progress
is_streaming = not remaining_content
title_text = "Thinking..." if is_streaming else "Thought"
thinking_block = f'''
<details class="thinking-block" data-block-id="{block_id}" data-streaming="{str(is_streaming).lower()}">
<summary class="thinking-header">
{info_svg_small}
<span class="thinking-title">{title_text}</span>
</summary>
<div class="thinking-content pretty_scrollbar">{thinking_html}</div>
</details>
'''
# Prepend the thinking block to the message HTML
html_output = thinking_block + html_output
return html_output
def process_markdown_content(string):
"""
Process a string through the markdown conversion pipeline.
Uses robust manual parsing to ensure correct LaTeX and Code Block rendering.
"""
"""Process a string through the markdown conversion pipeline."""
if not string:
return ""
@ -275,7 +280,7 @@ def process_markdown_content(string):
pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
string = pattern.sub(replace_blockquote, string)
# Code block standardization
# Code
string = string.replace('\\begin{code}', '```')
string = string.replace('\\end{code}', '```')
string = string.replace('\\begin{align*}', '$$')
@ -296,7 +301,6 @@ def process_markdown_content(string):
is_code = False
is_latex = False
# Manual line iteration for robust structure parsing
for line in string.split('\n'):
stripped_line = line.strip()
@ -367,39 +371,6 @@ def process_markdown_content(string):
return html_output
@functools.lru_cache(maxsize=None)
def convert_to_markdown(string, message_id=None):
"""
Convert a string to markdown HTML with support for multiple block types.
Blocks are assembled in order: thinking, main content, etc.
"""
if not string:
return ""
# Use a default message ID if none provided
if message_id is None:
message_id = "unknown"
# Extract different components from the string
thinking_content, remaining_content = extract_thinking_block(string)
# Build individual HTML blocks
blocks = []
# Add thinking block if present
thinking_html = build_thinking_block(thinking_content, message_id, bool(remaining_content))
if thinking_html:
blocks.append(thinking_html)
# Add main content block
main_html = build_main_content_block(remaining_content)
if main_html:
blocks.append(main_html)
# Assemble all blocks into final HTML
return ''.join(blocks)
def convert_to_markdown_wrapped(string, message_id=None, use_cache=True):
'''
Used to avoid caching convert_to_markdown calls during streaming.

View file

@ -1,200 +0,0 @@
import time
import modules.shared as shared
from modules.logging_colors import logger
from modules.utils import resolve_model_path
def get_quantization_config(quant_method):
"""
Get the appropriate quantization config based on the selected method.
Applies quantization to both the transformer and the text_encoder.
"""
import torch
# Import BitsAndBytesConfig from BOTH libraries to be safe
from diffusers import BitsAndBytesConfig as DiffusersBnBConfig
from diffusers import TorchAoConfig
from diffusers.quantizers import PipelineQuantizationConfig
from transformers import BitsAndBytesConfig as TransformersBnBConfig
if quant_method == 'none' or not quant_method:
return None
# Bitsandbytes 8-bit quantization
elif quant_method == 'bnb-8bit':
return PipelineQuantizationConfig(
quant_mapping={
"transformer": DiffusersBnBConfig(
load_in_8bit=True
),
"text_encoder": TransformersBnBConfig(
load_in_8bit=True
)
}
)
# Bitsandbytes 4-bit quantization
elif quant_method == 'bnb-4bit':
return PipelineQuantizationConfig(
quant_mapping={
"transformer": DiffusersBnBConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True
),
"text_encoder": TransformersBnBConfig(
load_in_4bit=True,
bnb_4bit_quant_type="nf4",
bnb_4bit_compute_dtype=torch.bfloat16,
bnb_4bit_use_double_quant=True
)
}
)
# torchao int8 weight-only
elif quant_method == 'torchao-int8wo':
return PipelineQuantizationConfig(
quant_mapping={
"transformer": TorchAoConfig("int8wo"),
"text_encoder": TorchAoConfig("int8wo")
}
)
# torchao fp4 (e2m1)
elif quant_method == 'torchao-fp4':
return PipelineQuantizationConfig(
quant_mapping={
"transformer": TorchAoConfig("fp4_e2m1"),
"text_encoder": TorchAoConfig("fp4_e2m1")
}
)
# torchao float8 weight-only
elif quant_method == 'torchao-float8wo':
return PipelineQuantizationConfig(
quant_mapping={
"transformer": TorchAoConfig("float8wo"),
"text_encoder": TorchAoConfig("float8wo")
}
)
else:
logger.warning(f"Unknown quantization method: {quant_method}. Loading without quantization.")
return None
def get_pipeline_type(pipe):
"""
Detect the pipeline type based on the loaded pipeline class.
Returns:
str: 'zimage', 'qwenimage', or 'unknown'
"""
class_name = pipe.__class__.__name__
if class_name == 'ZImagePipeline':
return 'zimage'
elif class_name == 'QwenImagePipeline':
return 'qwenimage'
else:
return 'unknown'
def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offload=False, compile_model=False, quant_method='none'):
"""
Load a diffusers image generation model.
Args:
model_name: Name of the model directory
dtype: 'bfloat16' or 'float16'
attn_backend: 'sdpa' or 'flash_attention_2'
cpu_offload: Enable CPU offloading for low VRAM
compile_model: Compile the model for faster inference (slow first run)
quant_method: 'none', 'bnb-8bit', 'bnb-4bit', or torchao options (int8wo, fp4, float8wo)
"""
import torch
from diffusers import DiffusionPipeline
from modules.torch_utils import get_device
logger.info(f"Loading image model \"{model_name}\" with quantization: {quant_method}")
t0 = time.time()
dtype_map = {"bfloat16": torch.bfloat16, "float16": torch.float16}
target_dtype = dtype_map.get(dtype, torch.bfloat16)
model_path = resolve_model_path(model_name, image_model=True)
try:
# Get quantization config based on selected method
pipeline_quant_config = get_quantization_config(quant_method)
# Load the pipeline
load_kwargs = {
"torch_dtype": target_dtype,
"low_cpu_mem_usage": True,
}
if pipeline_quant_config is not None:
load_kwargs["quantization_config"] = pipeline_quant_config
# Use DiffusionPipeline for automatic pipeline detection
# This handles both ZImagePipeline and QwenImagePipeline
pipe = DiffusionPipeline.from_pretrained(
str(model_path),
**load_kwargs
)
pipeline_type = get_pipeline_type(pipe)
if not cpu_offload:
pipe.to(get_device())
modules = ["transformer", "unet"]
# Set attention backend
if attn_backend == 'flash_attention_2':
for name in modules:
mod = getattr(pipe, name, None)
if hasattr(mod, "set_attention_backend"):
mod.set_attention_backend("flash")
break
# Compile model
if compile_model:
for name in modules:
mod = getattr(pipe, name, None)
if hasattr(mod, "compile"):
logger.info("Compiling model (first run will be slow)...")
mod.compile()
break
if cpu_offload:
pipe.enable_model_cpu_offload()
shared.image_model = pipe
shared.image_model_name = model_name
shared.image_pipeline_type = pipeline_type
logger.info(f"Loaded image model \"{model_name}\" in {(time.time() - t0):.2f} seconds.")
return pipe
except Exception as e:
logger.error(f"Failed to load image model: {str(e)}")
return None
def unload_image_model():
"""Unload the current image model and free VRAM."""
if shared.image_model is None:
return
del shared.image_model
shared.image_model = None
shared.image_model_name = 'None'
shared.image_pipeline_type = None
from modules.torch_utils import clear_torch_cache
clear_torch_cache()
logger.info("Image model unloaded.")

View file

@ -1,7 +1,9 @@
"""
Shared image processing utilities for multimodal support.
Used by both ExLlamaV3 and llama.cpp implementations.
"""
import base64
import io
import os
from pathlib import Path
from typing import Any, List, Tuple
from PIL import Image
@ -9,20 +11,6 @@ from PIL import Image
from modules.logging_colors import logger
def open_image_safely(path):
if path is None or not isinstance(path, str) or not Path(path).exists():
return None
if os.path.islink(path):
return None
try:
return Image.open(path)
except Exception as e:
logger.error(f"Failed to open image file: {path}. Reason: {e}")
return None
def convert_pil_to_base64(image: Image.Image) -> str:
"""Converts a PIL Image to a base64 encoded string."""
buffered = io.BytesIO()

View file

@ -317,7 +317,6 @@ class LlamaServer:
"--ctx-size", str(shared.args.ctx_size),
"--gpu-layers", str(shared.args.gpu_layers),
"--batch-size", str(shared.args.batch_size),
"--ubatch-size", str(shared.args.ubatch_size),
"--port", str(self.port),
"--no-webui",
"--flash-attn", "on",
@ -327,8 +326,6 @@ class LlamaServer:
cmd += ["--threads", str(shared.args.threads)]
if shared.args.threads_batch > 0:
cmd += ["--threads-batch", str(shared.args.threads_batch)]
if shared.args.cpu_moe:
cmd.append("--cpu-moe")
if shared.args.no_mmap:
cmd.append("--no-mmap")
if shared.args.mlock:

View file

@ -6,11 +6,9 @@ import gradio as gr
loaders_and_params = OrderedDict({
'llama.cpp': [
'gpu_layers',
'cpu_moe',
'threads',
'threads_batch',
'batch_size',
'ubatch_size',
'ctx_size',
'cache_type',
'tensor_split',

View file

@ -89,8 +89,8 @@ def get_model_metadata(model):
else:
bos_token = ""
shared.bos_token = bos_token
shared.eos_token = eos_token
template = template.replace('eos_token', "'{}'".format(eos_token))
template = template.replace('bos_token', "'{}'".format(bos_token))
template = re.sub(r"\{\{-?\s*raise_exception\(.*?\)\s*-?\}\}", "", template, flags=re.DOTALL)
template = re.sub(r'raise_exception\([^)]*\)', "''", template)
@ -160,16 +160,13 @@ def get_model_metadata(model):
# 4. If a template was found from any source, process it
if template:
shared.bos_token = '<s>'
shared.eos_token = '</s>'
for k in ['eos_token', 'bos_token']:
if k in metadata:
value = metadata[k]
if isinstance(value, dict):
value = value['content']
setattr(shared, k, value)
template = template.replace(k, "'{}'".format(value))
template = re.sub(r"\{\{-?\s*raise_exception\(.*?\)\s*-?\}\}", "", template, flags=re.DOTALL)
template = re.sub(r'raise_exception\([^)]*\)', "''", template)

View file

@ -11,7 +11,7 @@ import yaml
from modules.logging_colors import logger
from modules.presets import default_preset
# Text model variables
# Model variables
model = None
tokenizer = None
model_name = 'None'
@ -19,13 +19,6 @@ is_seq2seq = False
is_multimodal = False
model_dirty_from_training = False
lora_names = []
bos_token = '<s>'
eos_token = '</s>'
# Image model variables
image_model = None
image_model_name = 'None'
image_pipeline_type = None
# Generation variables
stop_everything = False
@ -53,18 +46,6 @@ group.add_argument('--extensions', type=str, nargs='+', help='The list of extens
group.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
group.add_argument('--idle-timeout', type=int, default=0, help='Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.')
# Image generation
group = parser.add_argument_group('Image model')
group.add_argument('--image-model', type=str, help='Name of the image model to select on startup (overrides saved setting).')
group.add_argument('--image-model-dir', type=str, default='user_data/image_models', help='Path to directory with all the image models.')
group.add_argument('--image-dtype', type=str, default=None, choices=['bfloat16', 'float16'], help='Data type for image model.')
group.add_argument('--image-attn-backend', type=str, default=None, choices=['flash_attention_2', 'sdpa'], help='Attention backend for image model.')
group.add_argument('--image-cpu-offload', action='store_true', help='Enable CPU offloading for image model.')
group.add_argument('--image-compile', action='store_true', help='Compile the image model for faster inference.')
group.add_argument('--image-quant', type=str, default=None,
choices=['none', 'bnb-8bit', 'bnb-4bit', 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'],
help='Quantization method for image model.')
# Model loader
group = parser.add_argument_group('Model loader')
group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, TensorRT-LLM.')
@ -85,7 +66,6 @@ group.add_argument('--ctx-size-draft', type=int, default=0, help='Size of the pr
# llama.cpp
group = parser.add_argument_group('llama.cpp')
group.add_argument('--gpu-layers', '--n-gpu-layers', type=int, default=256, metavar='N', help='Number of layers to offload to the GPU.')
group.add_argument('--cpu-moe', action='store_true', help='Move the experts to the CPU (for MoE models).')
group.add_argument('--mmproj', type=str, default=None, help='Path to the mmproj file for vision models.')
group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
group.add_argument('--tensor-split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.')
@ -93,8 +73,7 @@ group.add_argument('--row-split', action='store_true', help='Split the model by
group.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')
group.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
group.add_argument('--no-kv-offload', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
group.add_argument('--batch-size', type=int, default=1024, help='Maximum number of prompt tokens to batch together when calling llama-server. This is the application level batch size.')
group.add_argument('--ubatch-size', type=int, default=1024, help='Maximum number of prompt tokens to batch together when calling llama-server. This is the max physical batch size for computation (device level).')
group.add_argument('--batch-size', type=int, default=256, help='Maximum number of prompt tokens to batch together when calling llama_eval.')
group.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')
group.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')
@ -309,26 +288,6 @@ settings = {
# Extensions
'default_extensions': [],
# Image generation settings
'image_prompt': '',
'image_neg_prompt': '',
'image_width': 1024,
'image_height': 1024,
'image_aspect_ratio': '1:1 Square',
'image_steps': 9,
'image_cfg_scale': 0.0,
'image_seed': -1,
'image_batch_size': 1,
'image_batch_count': 1,
'image_llm_variations': False,
'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.',
'image_model_menu': 'None',
'image_dtype': 'bfloat16',
'image_attn_backend': 'flash_attention_2',
'image_cpu_offload': False,
'image_compile': False,
'image_quant': 'none',
}
default_settings = copy.deepcopy(settings)
@ -353,22 +312,6 @@ def do_cmd_flags_warnings():
logger.warning('\nThe multi-user mode is highly experimental and should not be shared publicly.')
def apply_image_model_cli_overrides():
"""Apply command-line overrides for image model settings."""
if args.image_model is not None:
settings['image_model_menu'] = args.image_model
if args.image_dtype is not None:
settings['image_dtype'] = args.image_dtype
if args.image_attn_backend is not None:
settings['image_attn_backend'] = args.image_attn_backend
if args.image_cpu_offload:
settings['image_cpu_offload'] = True
if args.image_compile:
settings['image_compile'] = True
if args.image_quant is not None:
settings['image_quant'] = args.image_quant
def fix_loader_name(name):
if not name:
return name

View file

@ -611,7 +611,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
bf16=shared.args.bf16,
optim=optimizer,
logging_steps=2 if stop_at_loss > 0 else 5,
eval_strategy="steps" if eval_data is not None else "no",
evaluation_strategy="steps" if eval_data is not None else "no",
eval_steps=math.ceil(eval_steps / gradient_accumulation_steps) if eval_data is not None else None,
save_strategy="steps" if eval_data is not None else "no",
output_dir=lora_file_path,
@ -620,7 +620,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
# TODO: Enable multi-device support
ddp_find_unused_parameters=None,
no_cuda=shared.args.cpu,
# use_ipex=True if is_torch_xpu_available() and not shared.args.cpu else False
use_ipex=True if is_torch_xpu_available() and not shared.args.cpu else False
),
data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
callbacks=list([Callbacks()])

View file

@ -125,11 +125,9 @@ def list_model_elements():
'loader',
'cpu_memory',
'gpu_layers',
'cpu_moe',
'threads',
'threads_batch',
'batch_size',
'ubatch_size',
'ctx_size',
'cache_type',
'tensor_split',
@ -280,29 +278,6 @@ def list_interface_input_elements():
'include_past_attachments',
]
if not shared.args.portable:
# Image generation elements
elements += [
'image_prompt',
'image_neg_prompt',
'image_width',
'image_height',
'image_aspect_ratio',
'image_steps',
'image_cfg_scale',
'image_seed',
'image_batch_size',
'image_batch_count',
'image_llm_variations',
'image_llm_variations_prompt',
'image_model_menu',
'image_dtype',
'image_attn_backend',
'image_compile',
'image_cpu_offload',
'image_quant',
]
return elements
@ -532,33 +507,9 @@ def setup_auto_save():
'theme_state',
'show_two_notebook_columns',
'paste_to_attachment',
'include_past_attachments',
'include_past_attachments'
]
if not shared.args.portable:
# Image generation tab (ui_image_generation.py)
change_elements += [
'image_prompt',
'image_neg_prompt',
'image_width',
'image_height',
'image_aspect_ratio',
'image_steps',
'image_cfg_scale',
'image_seed',
'image_batch_size',
'image_batch_count',
'image_llm_variations',
'image_llm_variations_prompt',
'image_model_menu',
'image_dtype',
'image_attn_backend',
'image_compile',
'image_cpu_offload',
'image_quant',
]
for element_name in change_elements:
if element_name in shared.gradio:
shared.gradio[element_name].change(

View file

@ -1,993 +0,0 @@
import json
import os
import random
import time
import traceback
from datetime import datetime
from pathlib import Path
import gradio as gr
from PIL.PngImagePlugin import PngInfo
from modules import shared, ui, utils
from modules.image_models import (
get_pipeline_type,
load_image_model,
unload_image_model
)
from modules.image_utils import open_image_safely
from modules.logging_colors import logger
from modules.text_generation import stop_everything_event
from modules.utils import check_model_loaded, gradio
ASPECT_RATIOS = {
"1:1 Square": (1, 1),
"16:9 Cinema": (16, 9),
"9:16 Mobile": (9, 16),
"4:3 Photo": (4, 3),
"Custom": None,
}
STEP = 16
IMAGES_PER_PAGE = 32
# Settings keys to save in PNG metadata (Generate tab only)
METADATA_SETTINGS_KEYS = [
'image_prompt',
'image_neg_prompt',
'image_width',
'image_height',
'image_aspect_ratio',
'image_steps',
'image_seed',
'image_cfg_scale',
]
# Cache for all image paths
_image_cache = []
_cache_timestamp = 0
def round_to_step(value, step=STEP):
return round(value / step) * step
def clamp(value, min_val, max_val):
return max(min_val, min(max_val, value))
def apply_aspect_ratio(aspect_ratio, current_width, current_height):
if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
return current_width, current_height
w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
if w_ratio == h_ratio:
base = min(current_width, current_height)
new_width = base
new_height = base
elif w_ratio < h_ratio:
new_width = current_width
new_height = round_to_step(current_width * h_ratio / w_ratio)
else:
new_height = current_height
new_width = round_to_step(current_height * w_ratio / h_ratio)
new_width = clamp(new_width, 256, 2048)
new_height = clamp(new_height, 256, 2048)
return int(new_width), int(new_height)
def update_height_from_width(width, aspect_ratio):
if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
return gr.update()
w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
new_height = round_to_step(width * h_ratio / w_ratio)
new_height = clamp(new_height, 256, 2048)
return int(new_height)
def update_width_from_height(height, aspect_ratio):
if aspect_ratio == "Custom" or aspect_ratio not in ASPECT_RATIOS:
return gr.update()
w_ratio, h_ratio = ASPECT_RATIOS[aspect_ratio]
new_width = round_to_step(height * w_ratio / h_ratio)
new_width = clamp(new_width, 256, 2048)
return int(new_width)
def swap_dimensions_and_update_ratio(width, height, aspect_ratio):
new_width, new_height = height, width
new_ratio = "Custom"
for name, ratios in ASPECT_RATIOS.items():
if ratios is None:
continue
w_r, h_r = ratios
expected_height = new_width * h_r / w_r
if abs(expected_height - new_height) < STEP:
new_ratio = name
break
return new_width, new_height, new_ratio
def build_generation_metadata(state, actual_seed):
"""Build metadata dict from generation settings."""
metadata = {}
for key in METADATA_SETTINGS_KEYS:
if key in state:
metadata[key] = state[key]
# Store the actual seed used (not -1)
metadata['image_seed'] = actual_seed
metadata['generated_at'] = datetime.now().isoformat()
metadata['model'] = shared.image_model_name
return metadata
def save_generated_images(images, state, actual_seed):
"""Save images with generation metadata embedded in PNG. Returns list of saved file paths."""
if shared.args.multi_user:
return []
date_str = datetime.now().strftime("%Y-%m-%d")
folder_path = os.path.join("user_data", "image_outputs", date_str)
os.makedirs(folder_path, exist_ok=True)
metadata = build_generation_metadata(state, actual_seed)
metadata_json = json.dumps(metadata, ensure_ascii=False)
saved_paths = []
for idx, img in enumerate(images):
timestamp = datetime.now().strftime("%H-%M-%S")
filename = f"TGW_{timestamp}_{actual_seed:010d}_{idx:03d}.png"
filepath = os.path.join(folder_path, filename)
# Create PNG metadata
png_info = PngInfo()
png_info.add_text("image_gen_settings", metadata_json)
# Save with metadata
img.save(filepath, pnginfo=png_info)
saved_paths.append(filepath)
return saved_paths
def read_image_metadata(image_path):
"""Read generation metadata from PNG file."""
try:
img = open_image_safely(image_path)
if img is None:
return None
try:
if hasattr(img, 'text') and 'image_gen_settings' in img.text:
return json.loads(img.text['image_gen_settings'])
finally:
img.close()
except Exception as e:
logger.debug(f"Could not read metadata from {image_path}: {e}")
return None
def format_metadata_for_display(metadata):
"""Format metadata as readable text."""
if not metadata:
return "No generation settings found in this image."
lines = []
# Display in a nice order
display_order = [
('image_prompt', 'Prompt'),
('image_neg_prompt', 'Negative Prompt'),
('image_width', 'Width'),
('image_height', 'Height'),
('image_aspect_ratio', 'Aspect Ratio'),
('image_steps', 'Steps'),
('image_cfg_scale', 'CFG Scale'),
('image_seed', 'Seed'),
('model', 'Model'),
('generated_at', 'Generated At'),
]
for key, label in display_order:
if key in metadata:
value = metadata[key]
if key in ['image_prompt', 'image_neg_prompt'] and value:
# Truncate long prompts for display
if len(str(value)) > 200:
value = str(value)[:200] + "..."
lines.append(f"**{label}:** {value}")
return "\n\n".join(lines)
def get_all_history_images(force_refresh=False):
"""Get all history images sorted by modification time (newest first). Uses caching."""
global _image_cache, _cache_timestamp
output_dir = os.path.join("user_data", "image_outputs")
if not os.path.exists(output_dir):
return []
# Check if we need to refresh cache
current_time = time.time()
if not force_refresh and _image_cache and (current_time - _cache_timestamp) < 2:
return _image_cache
image_files = []
for root, _, files in os.walk(output_dir):
for file in files:
if file.endswith((".png", ".jpg", ".jpeg")):
full_path = os.path.join(root, file)
image_files.append((full_path, os.path.getmtime(full_path)))
image_files.sort(key=lambda x: x[1], reverse=True)
_image_cache = [x[0] for x in image_files]
_cache_timestamp = current_time
return _image_cache
def get_paginated_images(page=0, force_refresh=False):
"""Get images for a specific page."""
all_images = get_all_history_images(force_refresh)
total_images = len(all_images)
total_pages = max(1, (total_images + IMAGES_PER_PAGE - 1) // IMAGES_PER_PAGE)
# Clamp page to valid range
page = max(0, min(page, total_pages - 1))
start_idx = page * IMAGES_PER_PAGE
end_idx = min(start_idx + IMAGES_PER_PAGE, total_images)
page_images = all_images[start_idx:end_idx]
return page_images, page, total_pages, total_images
def get_initial_page_info():
"""Get page info string for initial load."""
_, page, total_pages, total_images = get_paginated_images(0)
return f"Page {page + 1} of {total_pages} ({total_images} total images)"
def refresh_gallery(current_page=0):
"""Refresh gallery with current page."""
images, page, total_pages, total_images = get_paginated_images(current_page, force_refresh=True)
page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)"
return images, page, page_info
def go_to_page(page_num, current_page):
"""Go to a specific page (1-indexed input)."""
try:
page = int(page_num) - 1 # Convert to 0-indexed
except (ValueError, TypeError):
page = current_page
images, page, total_pages, total_images = get_paginated_images(page)
page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)"
return images, page, page_info
def next_page(current_page):
"""Go to next page."""
images, page, total_pages, total_images = get_paginated_images(current_page + 1)
page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)"
return images, page, page_info
def prev_page(current_page):
"""Go to previous page."""
images, page, total_pages, total_images = get_paginated_images(current_page - 1)
page_info = f"Page {page + 1} of {total_pages} ({total_images} total images)"
return images, page, page_info
def on_gallery_select(evt: gr.SelectData, current_page):
"""Handle image selection from gallery."""
if evt.index is None:
return "", "Select an image to view its settings"
if not _image_cache:
get_all_history_images()
all_images = _image_cache
total_images = len(all_images)
# Calculate the actual index in the full list
start_idx = current_page * IMAGES_PER_PAGE
actual_idx = start_idx + evt.index
if actual_idx >= total_images:
return "", "Image not found"
image_path = all_images[actual_idx]
metadata = read_image_metadata(image_path)
metadata_display = format_metadata_for_display(metadata)
return image_path, metadata_display
def send_to_generate(selected_image_path):
"""Load settings from selected image and return updates for all Generate tab inputs."""
if not selected_image_path or not os.path.exists(selected_image_path):
return [gr.update()] * 8 + ["No image selected"]
metadata = read_image_metadata(selected_image_path)
if not metadata:
return [gr.update()] * 8 + ["No settings found in this image"]
# Return updates for each input element in order
updates = [
gr.update(value=metadata.get('image_prompt', '')),
gr.update(value=metadata.get('image_neg_prompt', '')),
gr.update(value=metadata.get('image_width', 1024)),
gr.update(value=metadata.get('image_height', 1024)),
gr.update(value=metadata.get('image_aspect_ratio', '1:1 Square')),
gr.update(value=metadata.get('image_steps', 9)),
gr.update(value=metadata.get('image_seed', -1)),
gr.update(value=metadata.get('image_cfg_scale', 0.0)),
]
status = f"✓ Settings loaded from image (seed: {metadata.get('image_seed', 'unknown')})"
return updates + [status]
def read_dropped_image_metadata(image_path):
"""Read metadata from a dropped/uploaded image."""
if not image_path:
return "Drop an image to view its generation settings."
metadata = read_image_metadata(image_path)
return format_metadata_for_display(metadata)
def create_ui():
if shared.settings['image_model_menu'] != 'None':
shared.image_model_name = shared.settings['image_model_menu']
with gr.Tab("Image AI", elem_id="image-ai-tab"):
with gr.Tabs():
# TAB 1: GENERATE
with gr.TabItem("Generate"):
with gr.Row():
with gr.Column(scale=4, min_width=350):
shared.gradio['image_prompt'] = gr.Textbox(
label="Prompt",
placeholder="Describe your imagination...",
lines=3,
autofocus=True,
value=shared.settings['image_prompt']
)
shared.gradio['image_neg_prompt'] = gr.Textbox(
label="Negative Prompt",
placeholder="Low quality...",
lines=3,
value=shared.settings['image_neg_prompt']
)
shared.gradio['image_llm_variations'] = gr.Checkbox(
value=shared.settings['image_llm_variations'],
label='LLM Prompt Variations',
elem_id="llm-prompt-variations",
)
shared.gradio['image_llm_variations_prompt'] = gr.Textbox(
value=shared.settings['image_llm_variations_prompt'],
label='Variation Prompt',
lines=3,
placeholder='Instructions for generating prompt variations...',
visible=shared.settings['image_llm_variations'],
info='Use the loaded LLM to generate creative prompt variations for each sequential batch.'
)
shared.gradio['image_generate_btn'] = gr.Button("Generate", variant="primary", size="lg")
shared.gradio['image_stop_btn'] = gr.Button("Stop", size="lg", visible=False)
shared.gradio['image_progress'] = gr.HTML(
value=progress_bar_html(),
elem_id="image-progress"
)
gr.Markdown("### Dimensions")
with gr.Row():
with gr.Column():
shared.gradio['image_width'] = gr.Slider(256, 2048, value=shared.settings['image_width'], step=STEP, label="Width")
with gr.Column():
shared.gradio['image_height'] = gr.Slider(256, 2048, value=shared.settings['image_height'], step=STEP, label="Height")
shared.gradio['image_swap_btn'] = gr.Button("⇄ Swap", elem_classes='refresh-button', scale=0, min_width=80, elem_id="swap-height-width")
with gr.Row():
shared.gradio['image_aspect_ratio'] = gr.Radio(
choices=["1:1 Square", "16:9 Cinema", "9:16 Mobile", "4:3 Photo", "Custom"],
value=shared.settings['image_aspect_ratio'],
label="Aspect Ratio",
interactive=True
)
gr.Markdown("### Config")
with gr.Row():
with gr.Column():
shared.gradio['image_steps'] = gr.Slider(1, 100, value=shared.settings['image_steps'], step=1, label="Steps")
shared.gradio['image_cfg_scale'] = gr.Slider(
0.0, 10.0,
value=shared.settings['image_cfg_scale'],
step=0.1,
label="CFG Scale",
info="Z-Image Turbo: 0.0 | Qwen: 4.0"
)
shared.gradio['image_seed'] = gr.Number(label="Seed", value=shared.settings['image_seed'], precision=0, info="-1 = Random")
with gr.Column():
shared.gradio['image_batch_size'] = gr.Slider(1, 32, value=shared.settings['image_batch_size'], step=1, label="Batch Size (VRAM Heavy)", info="Generates N images at once.")
shared.gradio['image_batch_count'] = gr.Slider(1, 128, value=shared.settings['image_batch_count'], step=1, label="Sequential Count (Loop)", info="Repeats the generation N times.")
with gr.Column(scale=6, min_width=500):
with gr.Column(elem_classes=["viewport-container"]):
shared.gradio['image_output_gallery'] = gr.Gallery(label="Output", show_label=False, columns=2, rows=2, height="80vh", object_fit="contain", preview=True, elem_id="image-output-gallery")
# TAB 2: GALLERY (with pagination)
with gr.TabItem("Gallery"):
with gr.Row():
with gr.Column(scale=3):
# Pagination controls
with gr.Row():
shared.gradio['image_refresh_history'] = gr.Button("🔄 Refresh", elem_classes="refresh-button")
shared.gradio['image_prev_page'] = gr.Button("◀ Prev Page", elem_classes="refresh-button")
shared.gradio['image_page_info'] = gr.Markdown(value=get_initial_page_info, elem_id="image-page-info")
shared.gradio['image_next_page'] = gr.Button("Next Page ▶", elem_classes="refresh-button")
shared.gradio['image_page_input'] = gr.Number(value=1, label="Page", precision=0, minimum=1, scale=0, min_width=80)
shared.gradio['image_go_to_page'] = gr.Button("Go", elem_classes="refresh-button", scale=0, min_width=50)
# State for current page and selected image path
shared.gradio['image_current_page'] = gr.State(value=0)
shared.gradio['image_selected_path'] = gr.State(value="")
# Paginated gallery using gr.Gallery
shared.gradio['image_history_gallery'] = gr.Gallery(
value=lambda: get_paginated_images(0)[0],
label="Image History",
show_label=False,
columns=6,
object_fit="cover",
height="auto",
allow_preview=True,
elem_id="image-history-gallery"
)
with gr.Column(scale=1):
gr.Markdown("### Generation Settings")
shared.gradio['image_settings_display'] = gr.Markdown("Select an image to view its settings")
shared.gradio['image_send_to_generate'] = gr.Button("Send to Generate", variant="primary")
shared.gradio['image_gallery_status'] = gr.Markdown("")
gr.Markdown("### Import Image")
shared.gradio['image_drop_upload'] = gr.Image(
label="Drop image here to view settings",
type="filepath",
height=150
)
# TAB 3: MODEL
with gr.TabItem("Model"):
with gr.Row():
with gr.Column():
with gr.Row():
shared.gradio['image_model_menu'] = gr.Dropdown(
choices=utils.get_available_image_models(),
value=shared.settings['image_model_menu'],
label='Model',
elem_classes='slim-dropdown'
)
shared.gradio['image_refresh_models'] = gr.Button("🔄", elem_classes='refresh-button', scale=0, min_width=40)
shared.gradio['image_load_model'] = gr.Button("Load", variant='primary', elem_classes='refresh-button')
shared.gradio['image_unload_model'] = gr.Button("Unload", elem_classes='refresh-button')
gr.Markdown("## Settings")
with gr.Row():
with gr.Column():
shared.gradio['image_quant'] = gr.Dropdown(
label='Quantization',
choices=['none', 'bnb-8bit', 'bnb-4bit', 'torchao-int8wo', 'torchao-fp4', 'torchao-float8wo'],
value=shared.settings['image_quant'],
info='BnB: bitsandbytes quantization. torchao: int8wo, fp4, float8wo.'
)
shared.gradio['image_dtype'] = gr.Dropdown(
choices=['bfloat16', 'float16'],
value=shared.settings['image_dtype'],
label='Data Type',
info='bfloat16 recommended for modern GPUs'
)
shared.gradio['image_attn_backend'] = gr.Dropdown(
choices=['sdpa', 'flash_attention_2'],
value=shared.settings['image_attn_backend'],
label='Attention Backend',
info='SDPA is default. Flash Attention requires compatible GPU.'
)
with gr.Column():
shared.gradio['image_compile'] = gr.Checkbox(
value=shared.settings['image_compile'],
label='Compile Model',
info='Faster inference after first run. First run will be slow.'
)
shared.gradio['image_cpu_offload'] = gr.Checkbox(
value=shared.settings['image_cpu_offload'],
label='CPU Offload',
info='Enable for low VRAM GPUs. Slower but uses less memory.'
)
with gr.Column():
shared.gradio['image_download_path'] = gr.Textbox(
label="Download model",
placeholder="Tongyi-MAI/Z-Image-Turbo",
info="Enter HuggingFace path. Use : for branch, e.g. user/model:main"
)
shared.gradio['image_download_btn'] = gr.Button("Download", variant='primary')
shared.gradio['image_model_status'] = gr.Markdown(value="")
def create_event_handlers():
# Dimension controls
shared.gradio['image_aspect_ratio'].change(
apply_aspect_ratio,
gradio('image_aspect_ratio', 'image_width', 'image_height'),
gradio('image_width', 'image_height'),
show_progress=False
)
shared.gradio['image_width'].release(
update_height_from_width,
gradio('image_width', 'image_aspect_ratio'),
gradio('image_height'),
show_progress=False
)
shared.gradio['image_height'].release(
update_width_from_height,
gradio('image_height', 'image_aspect_ratio'),
gradio('image_width'),
show_progress=False
)
shared.gradio['image_swap_btn'].click(
swap_dimensions_and_update_ratio,
gradio('image_width', 'image_height', 'image_aspect_ratio'),
gradio('image_width', 'image_height', 'image_aspect_ratio'),
show_progress=False
)
# Generation
shared.gradio['image_generate_btn'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(
generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then(
lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))
shared.gradio['image_prompt'].submit(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(
generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then(
lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))
shared.gradio['image_neg_prompt'].submit(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
lambda: [gr.update(visible=True), gr.update(visible=False)], None, gradio('image_stop_btn', 'image_generate_btn')).then(
generate, gradio('interface_state'), gradio('image_output_gallery', 'image_progress'), show_progress=False).then(
lambda: [gr.update(visible=False), gr.update(visible=True)], None, gradio('image_stop_btn', 'image_generate_btn'))
# Stop button
shared.gradio['image_stop_btn'].click(
stop_everything_event, None, None, show_progress=False
)
# Model management
shared.gradio['image_refresh_models'].click(
lambda: gr.update(choices=utils.get_available_image_models()),
None,
gradio('image_model_menu'),
show_progress=False
)
shared.gradio['image_load_model'].click(
load_image_model_wrapper,
gradio('image_model_menu', 'image_dtype', 'image_attn_backend', 'image_cpu_offload', 'image_compile', 'image_quant'),
gradio('image_model_status'),
show_progress=True
)
shared.gradio['image_unload_model'].click(
unload_image_model_wrapper,
None,
gradio('image_model_status'),
show_progress=False
)
shared.gradio['image_download_btn'].click(
download_image_model_wrapper,
gradio('image_download_path'),
gradio('image_model_status', 'image_model_menu'),
show_progress=True
)
# Gallery pagination handlers
shared.gradio['image_refresh_history'].click(
refresh_gallery,
gradio('image_current_page'),
gradio('image_history_gallery', 'image_current_page', 'image_page_info'),
show_progress=False
)
shared.gradio['image_next_page'].click(
next_page,
gradio('image_current_page'),
gradio('image_history_gallery', 'image_current_page', 'image_page_info'),
show_progress=False
)
shared.gradio['image_prev_page'].click(
prev_page,
gradio('image_current_page'),
gradio('image_history_gallery', 'image_current_page', 'image_page_info'),
show_progress=False
)
shared.gradio['image_go_to_page'].click(
go_to_page,
gradio('image_page_input', 'image_current_page'),
gradio('image_history_gallery', 'image_current_page', 'image_page_info'),
show_progress=False
)
# Image selection from gallery
shared.gradio['image_history_gallery'].select(
on_gallery_select,
gradio('image_current_page'),
gradio('image_selected_path', 'image_settings_display'),
show_progress=False
)
# Send to Generate
shared.gradio['image_send_to_generate'].click(
send_to_generate,
gradio('image_selected_path'),
gradio(
'image_prompt',
'image_neg_prompt',
'image_width',
'image_height',
'image_aspect_ratio',
'image_steps',
'image_seed',
'image_cfg_scale',
'image_gallery_status'
),
js=f'() => {{{ui.switch_tabs_js}; switch_to_image_ai_generate()}}',
show_progress=False
)
shared.gradio['image_drop_upload'].change(
read_dropped_image_metadata,
gradio('image_drop_upload'),
gradio('image_settings_display'),
show_progress=False
)
# LLM Variations visibility toggle
shared.gradio['image_llm_variations'].change(
lambda x: gr.update(visible=x),
gradio('image_llm_variations'),
gradio('image_llm_variations_prompt'),
show_progress=False
)
def generate_prompt_variation(state):
"""Generate a creative variation of the image prompt using the LLM."""
from modules.chat import generate_chat_prompt
from modules.text_generation import generate_reply
prompt = state['image_prompt']
# Check if LLM is loaded
model_loaded, _ = check_model_loaded()
if not model_loaded:
logger.warning("No LLM loaded for prompt variation. Using original prompt.")
return prompt
# Get the custom variation prompt or use default
variation_instruction = state.get('image_llm_variations_prompt', '')
if not variation_instruction:
variation_instruction = 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.'
augmented_message = f"{prompt}\n\n=====\n\n{variation_instruction}"
# Use minimal state for generation
var_state = state.copy()
var_state['history'] = {'internal': [], 'visible': [], 'metadata': {}}
var_state['auto_max_new_tokens'] = True
var_state['enable_thinking'] = False
var_state['reasoning_effort'] = 'low'
var_state['start_with'] = ""
formatted_prompt = generate_chat_prompt(augmented_message, var_state)
variation = ""
for reply in generate_reply(formatted_prompt, var_state, stopping_strings=[], is_chat=True):
variation = reply
# Strip thinking blocks if present
if "</think>" in variation:
variation = variation.rsplit("</think>", 1)[1]
elif "<|start|>assistant<|channel|>final<|message|>" in variation:
variation = variation.rsplit("<|start|>assistant<|channel|>final<|message|>", 1)[1]
elif "</seed:think>" in variation:
variation = variation.rsplit("</seed:think>", 1)[1]
variation = variation.strip()
if len(variation) >= 2 and variation.startswith('"') and variation.endswith('"'):
variation = variation[1:-1]
if variation:
logger.info("Prompt variation:")
print(variation)
return variation
return prompt
def progress_bar_html(progress=0, text=""):
"""Generate HTML for progress bar. Empty div when progress <= 0."""
if progress <= 0:
return '<div class="image-ai-separator"></div>'
return f'''<div class="image-ai-progress-wrapper">
<div class="image-ai-progress-track">
<div class="image-ai-progress-fill" style="width: {progress * 100:.1f}%;"></div>
</div>
<div class="image-ai-progress-text">{text}</div>
</div>'''
def generate(state, save_images=True):
"""
Generate images using the loaded model.
Automatically adjusts parameters based on pipeline type.
"""
import queue
import threading
import torch
from modules.torch_utils import clear_torch_cache, get_device
try:
model_name = state['image_model_menu']
if not model_name or model_name == 'None':
logger.error("No image model selected. Go to the Model tab and select a model.")
yield [], progress_bar_html()
return
if shared.image_model is None:
result = load_image_model(
model_name,
dtype=state['image_dtype'],
attn_backend=state['image_attn_backend'],
cpu_offload=state['image_cpu_offload'],
compile_model=state['image_compile'],
quant_method=state['image_quant']
)
if result is None:
logger.error(f"Failed to load model `{model_name}`.")
yield [], progress_bar_html()
return
shared.image_model_name = model_name
seed = state['image_seed']
if seed == -1:
seed = random.randint(0, 2**32 - 1)
device = get_device()
if device is None:
device = "cpu"
generator = torch.Generator(device)
all_images = []
# Get pipeline type for parameter adjustment
pipeline_type = getattr(shared, 'image_pipeline_type', None)
if pipeline_type is None:
pipeline_type = get_pipeline_type(shared.image_model)
prompt = state['image_prompt']
shared.stop_everything = False
batch_count = int(state['image_batch_count'])
steps_per_batch = int(state['image_steps'])
total_steps = steps_per_batch * batch_count
# Queue for progress updates from callback
progress_queue = queue.Queue()
def interrupt_callback(pipe, step_index, timestep, callback_kwargs):
if shared.stop_everything:
pipe._interrupt = True
progress_queue.put(step_index + 1)
return callback_kwargs
gen_kwargs = {
"prompt": prompt,
"negative_prompt": state['image_neg_prompt'],
"height": int(state['image_height']),
"width": int(state['image_width']),
"num_inference_steps": steps_per_batch,
"num_images_per_prompt": int(state['image_batch_size']),
"generator": generator,
"callback_on_step_end": interrupt_callback,
}
cfg_val = state.get('image_cfg_scale', 0.0)
if pipeline_type == 'qwenimage':
gen_kwargs["true_cfg_scale"] = cfg_val
else:
gen_kwargs["guidance_scale"] = cfg_val
t0 = time.time()
for batch_idx in range(batch_count):
if shared.stop_everything:
break
generator.manual_seed(int(seed + batch_idx))
# Generate prompt variation if enabled
if state['image_llm_variations']:
gen_kwargs["prompt"] = generate_prompt_variation(state)
# Run generation in thread so we can yield progress
result_holder = []
error_holder = []
def run_batch():
try:
# Apply magic suffix only at generation time for qwenimage
clean_prompt = gen_kwargs["prompt"]
if pipeline_type == 'qwenimage':
magic_suffix = ", Ultra HD, 4K, cinematic composition"
if magic_suffix.strip(", ") not in clean_prompt:
gen_kwargs["prompt"] = clean_prompt + magic_suffix
result_holder.extend(shared.image_model(**gen_kwargs).images)
gen_kwargs["prompt"] = clean_prompt # restore
except Exception as e:
error_holder.append(e)
thread = threading.Thread(target=run_batch)
thread.start()
# Yield progress updates while generation runs
while thread.is_alive():
try:
step = progress_queue.get(timeout=0.1)
absolute_step = batch_idx * steps_per_batch + step
pct = absolute_step / total_steps
text = f"Batch {batch_idx + 1}/{batch_count} — Step {step}/{steps_per_batch}"
yield all_images, progress_bar_html(pct, text)
except queue.Empty:
pass
thread.join()
if error_holder:
raise error_holder[0]
# Save this batch's images with the actual prompt and seed used
if save_images:
batch_seed = seed + batch_idx
original_prompt = state['image_prompt']
state['image_prompt'] = gen_kwargs["prompt"]
saved_paths = save_generated_images(result_holder, state, batch_seed)
state['image_prompt'] = original_prompt
# Use file paths so gallery serves actual PNGs with metadata
all_images.extend(saved_paths)
else:
# Fallback to PIL objects if not saving
all_images.extend(result_holder)
yield all_images, progress_bar_html((batch_idx + 1) / batch_count, f"Batch {batch_idx + 1}/{batch_count} complete")
t1 = time.time()
total_images = batch_count * int(state['image_batch_size'])
logger.info(f'Generated {total_images} {"image" if total_images == 1 else "images"} in {(t1 - t0):.2f} seconds ({total_steps / (t1 - t0):.2f} steps/s, seed {seed})')
yield all_images, progress_bar_html()
clear_torch_cache()
except Exception as e:
logger.error(f"Image generation failed: {e}")
traceback.print_exc()
yield [], progress_bar_html()
clear_torch_cache()
def load_image_model_wrapper(model_name, dtype, attn_backend, cpu_offload, compile_model, quant_method):
if not model_name or model_name == 'None':
yield "No model selected"
return
try:
yield f"Loading `{model_name}`..."
unload_image_model()
result = load_image_model(
model_name,
dtype=dtype,
attn_backend=attn_backend,
cpu_offload=cpu_offload,
compile_model=compile_model,
quant_method=quant_method
)
if result is not None:
shared.image_model_name = model_name
yield f"✓ Loaded **{model_name}** (quantization: {quant_method})"
else:
yield f"✗ Failed to load `{model_name}`"
except Exception:
yield f"Error:\n```\n{traceback.format_exc()}\n```"
def unload_image_model_wrapper():
previous_name = shared.image_model_name
unload_image_model()
if previous_name != 'None':
return f"Model: **{previous_name}** (unloaded)"
return "No model loaded"
def download_image_model_wrapper(model_path):
from huggingface_hub import snapshot_download
if not model_path:
yield "No model specified", gr.update()
return
try:
model_path = model_path.strip()
if model_path.startswith('https://huggingface.co/'):
model_path = model_path[len('https://huggingface.co/'):]
elif model_path.startswith('huggingface.co/'):
model_path = model_path[len('huggingface.co/'):]
if ':' in model_path:
model_id, branch = model_path.rsplit(':', 1)
else:
model_id, branch = model_path, 'main'
folder_name = model_id.replace('/', '_')
output_folder = Path(shared.args.image_model_dir) / folder_name
yield f"Downloading `{model_id}` (branch: {branch})...", gr.update()
snapshot_download(
repo_id=model_id,
revision=branch,
local_dir=output_folder,
local_dir_use_symlinks=False,
)
new_choices = utils.get_available_image_models()
yield f"✓ Downloaded to `{output_folder}`", gr.update(choices=new_choices, value=folder_name)
except Exception:
yield f"Error:\n```\n{traceback.format_exc()}\n```", gr.update()

View file

@ -50,7 +50,6 @@ def create_ui():
with gr.Column():
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
@ -84,7 +83,6 @@ def create_ui():
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
shared.gradio['ubatch_size'] = gr.Slider(label="ubatch_size", minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"', value=shared.args.extra_flags)
shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
@ -96,7 +94,7 @@ def create_ui():
shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
with gr.Column():
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')

View file

@ -86,7 +86,7 @@ def check_model_loaded():
return True, None
def resolve_model_path(model_name_or_path, image_model=False):
def resolve_model_path(model_name_or_path):
"""
Resolves a model path, checking for a direct path
before the default models directory.
@ -95,8 +95,6 @@ def resolve_model_path(model_name_or_path, image_model=False):
path_candidate = Path(model_name_or_path)
if path_candidate.exists():
return path_candidate
elif image_model:
return Path(f'{shared.args.image_model_dir}/{model_name_or_path}')
else:
return Path(f'{shared.args.model_dir}/{model_name_or_path}')
@ -155,24 +153,6 @@ def get_available_models():
return filtered_gguf_files + model_dirs
def get_available_image_models():
model_dir = Path(shared.args.image_model_dir)
model_dir.mkdir(parents=True, exist_ok=True)
# Find valid model directories
model_dirs = []
for item in os.listdir(model_dir):
item_path = model_dir / item
if not item_path.is_dir():
continue
model_dirs.append(item)
model_dirs = sorted(model_dirs, key=natural_keys)
return model_dirs
def get_available_ggufs():
model_list = []
model_dir = Path(shared.args.model_dir)

View file

@ -3,17 +3,15 @@ audioop-lts<1.0; python_version >= "3.13"
bitsandbytes==0.48.*
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
flash-linear-attention==0.4.0
flash-linear-attention==0.3.2
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.18.*
peft==0.17.*
Pillow>=9.5.0
psutil
pydantic==2.11.0
@ -22,13 +20,12 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.0.post21; platform_system == "Windows"
tqdm
wandb
@ -42,10 +39,10 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.11/exllamav3-0.0.11+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.11/exllamav3-0.0.11+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"

View file

@ -2,16 +2,14 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.18.*
peft==0.17.*
Pillow>=9.5.0
psutil
pydantic==2.11.0
@ -20,13 +18,12 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.0.post21; platform_system == "Windows"
tqdm
wandb
@ -40,7 +37,7 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View file

@ -2,16 +2,14 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.18.*
peft==0.17.*
Pillow>=9.5.0
psutil
pydantic==2.11.0
@ -20,13 +18,12 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.0.post21; platform_system == "Windows"
tqdm
wandb
@ -40,7 +37,7 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View file

@ -2,16 +2,14 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.18.*
peft==0.17.*
Pillow>=9.5.0
psutil
pydantic==2.11.0
@ -20,13 +18,12 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.0.post21; platform_system == "Windows"
tqdm
wandb
@ -40,5 +37,5 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"

View file

@ -2,16 +2,14 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.18.*
peft==0.17.*
Pillow>=9.5.0
psutil
pydantic==2.11.0
@ -20,13 +18,12 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.0.post21; platform_system == "Windows"
tqdm
wandb
@ -40,5 +37,6 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"

View file

@ -2,16 +2,14 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.18.*
peft==0.17.*
Pillow>=9.5.0
psutil
pydantic==2.11.0
@ -20,13 +18,12 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.0.post21; platform_system == "Windows"
tqdm
wandb
@ -40,5 +37,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"

View file

@ -2,16 +2,14 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.18.*
peft==0.17.*
Pillow>=9.5.0
psutil
pydantic==2.11.0
@ -20,13 +18,12 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.0.post21; platform_system == "Windows"
tqdm
wandb
@ -40,5 +37,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"

View file

@ -3,17 +3,15 @@ audioop-lts<1.0; python_version >= "3.13"
bitsandbytes==0.48.*
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
flash-linear-attention==0.4.0
flash-linear-attention==0.3.2
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.18.*
peft==0.17.*
Pillow>=9.5.0
psutil
pydantic==2.11.0
@ -22,13 +20,12 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.0.post21; platform_system == "Windows"
tqdm
wandb
@ -42,10 +39,10 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.11/exllamav3-0.0.11+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.11/exllamav3-0.0.11+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"

View file

@ -2,16 +2,14 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.18.*
peft==0.17.*
Pillow>=9.5.0
psutil
pydantic==2.11.0
@ -20,13 +18,12 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.0.post21; platform_system == "Windows"
tqdm
wandb

View file

@ -1,7 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
@ -23,5 +22,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -1,27 +0,0 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
pydantic==2.11.0
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
tqdm
# Gradio
gradio==4.37.*
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -1,27 +0,0 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
pydantic==2.11.0
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
tqdm
# Gradio
gradio==4.37.*
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -1,7 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
@ -23,5 +22,6 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"

View file

@ -1,7 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
@ -23,5 +22,6 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"

View file

@ -1,7 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
@ -23,5 +22,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"

View file

@ -1,7 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
@ -23,5 +22,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"

View file

@ -1,7 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
@ -23,5 +22,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -1,7 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*

View file

@ -1,7 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
@ -22,6 +21,6 @@ flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# Vulkan wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -1,7 +1,6 @@
audioop-lts<1.0; python_version >= "3.13"
fastapi==0.112.4
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
markdown
numpy==2.2.*
@ -23,5 +22,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -5,7 +5,6 @@ from pathlib import Path
from modules import shared
from modules.block_requests import OpenMonkeyPatch, RequestBlocker
from modules.image_models import load_image_model
from modules.logging_colors import logger
from modules.prompts import load_prompt
@ -51,7 +50,6 @@ from modules import (
ui_chat,
ui_default,
ui_file_saving,
ui_image_generation,
ui_model_menu,
ui_notebook,
ui_parameters,
@ -101,11 +99,6 @@ def create_interface():
auth.extend(x.strip() for line in file for x in line.split(',') if x.strip())
auth = [tuple(cred.split(':')) for cred in auth]
# Allowed paths
allowed_paths = ["css", "js", "extensions", "user_data/cache"]
if not shared.args.multi_user:
allowed_paths.append("user_data/image_outputs")
# Import the extensions and execute their setup() functions
if shared.args.extensions is not None and len(shared.args.extensions) > 0:
extensions_module.load_extensions()
@ -170,7 +163,6 @@ def create_interface():
ui_chat.create_character_settings_ui() # Character tab
ui_model_menu.create_ui() # Model tab
if not shared.args.portable:
ui_image_generation.create_ui() # Image generation tab
training.create_ui() # Training tab
ui_session.create_ui() # Session tab
@ -178,8 +170,6 @@ def create_interface():
ui_chat.create_event_handlers()
ui_default.create_event_handlers()
ui_notebook.create_event_handlers()
if not shared.args.portable:
ui_image_generation.create_event_handlers()
# Other events
ui_file_saving.create_event_handlers()
@ -242,7 +232,7 @@ def create_interface():
ssl_keyfile=shared.args.ssl_keyfile,
ssl_certfile=shared.args.ssl_certfile,
root_path=shared.args.subpath,
allowed_paths=allowed_paths,
allowed_paths=["css", "js", "extensions", "user_data/cache"]
)
@ -266,9 +256,6 @@ if __name__ == "__main__":
if new_settings:
shared.settings.update(new_settings)
# Apply CLI overrides for image model settings (CLI flags take precedence over saved settings)
shared.apply_image_model_cli_overrides()
# Fallback settings for models
shared.model_config['.*'] = get_fallback_settings()
shared.model_config.move_to_end('.*', last=False) # Move to the beginning
@ -280,22 +267,6 @@ if __name__ == "__main__":
if extension not in shared.args.extensions:
shared.args.extensions.append(extension)
# Load image model if specified via CLI
if shared.args.image_model:
logger.info(f"Loading image model: {shared.args.image_model}")
result = load_image_model(
shared.args.image_model,
dtype=shared.settings.get('image_dtype', 'bfloat16'),
attn_backend=shared.settings.get('image_attn_backend', 'sdpa'),
cpu_offload=shared.settings.get('image_cpu_offload', False),
compile_model=shared.settings.get('image_compile', False),
quant_method=shared.settings.get('image_quant', 'none')
)
if result is not None:
shared.image_model_name = shared.args.image_model
else:
logger.error(f"Failed to load image model: {shared.args.image_model}")
available_models = utils.get_available_models()
# Model defined through --model