Compare commits

..

No commits in common. "main" and "v1.7" have entirely different histories.
main ... v1.7

501 changed files with 14516 additions and 28285 deletions

1
.github/FUNDING.yml vendored Normal file
View file

@ -0,0 +1 @@
ko_fi: oobabooga

View file

@ -46,7 +46,7 @@ body:
id: system-info id: system-info
attributes: attributes:
label: System Info label: System Info
description: "Please share your operating system and GPU type (NVIDIA/AMD/Intel/Apple). If you are using a Google Colab notebook, mention that instead." description: "Please share your system info with us: operating system, GPU brand, and GPU model. If you are using a Google Colab notebook, mention that instead."
render: shell render: shell
placeholder: placeholder:
validations: validations:

View file

@ -5,10 +5,7 @@
version: 2 version: 2
updates: updates:
- package-ecosystem: "pip" - package-ecosystem: "pip" # See documentation for possible values
directories: directory: "/" # Location of package manifests
- "/requirements/full/"
- "/requirements/portable/"
target-branch: "dev"
schedule: schedule:
interval: "weekly" interval: "weekly"

View file

@ -1,70 +0,0 @@
name: Build Everything TGW
on:
workflow_dispatch:
inputs:
version:
description: 'Version tag of text-generation-webui to build: v3.0'
default: 'v3.0'
required: true
type: string
permissions:
contents: write
jobs:
build_release_cuda_windows:
name: CUDA Windows
uses: ./.github/workflows/build-portable-release-cuda.yml
with:
version: ${{ inputs.version }}
config: 'os:windows-2022'
build_release_cuda_linux:
name: CUDA Linux
uses: ./.github/workflows/build-portable-release-cuda.yml
with:
version: ${{ inputs.version }}
config: 'os:ubuntu-22.04'
build_release_vulkan_windows:
name: Vulkan Windows
uses: ./.github/workflows/build-portable-release-vulkan.yml
with:
version: ${{ inputs.version }}
config: 'os:windows-2022'
build_release_vulkan_linux:
name: Vulkan Linux
uses: ./.github/workflows/build-portable-release-vulkan.yml
with:
version: ${{ inputs.version }}
config: 'os:ubuntu-22.04'
build_release_rocm_linux:
name: ROCm Linux
uses: ./.github/workflows/build-portable-release-rocm.yml
with:
version: ${{ inputs.version }}
config: 'os:ubuntu-22.04'
build_release_cpu_windows:
name: CPU Windows
uses: ./.github/workflows/build-portable-release.yml
with:
version: ${{ inputs.version }}
config: 'os:windows-2022'
build_release_cpu_linux:
name: CPU Linux
uses: ./.github/workflows/build-portable-release.yml
with:
version: ${{ inputs.version }}
config: 'os:ubuntu-22.04'
build_release_macos:
name: macOS
uses: ./.github/workflows/build-portable-release.yml
with:
version: ${{ inputs.version }}
config: 'os:macos-15-intel,macos-14'

View file

@ -1,175 +0,0 @@
name: Build CUDA
on:
workflow_dispatch:
inputs:
version:
description: 'Version tag of text-generation-webui to build: v3.0'
default: 'v3.0'
required: true
type: string
config:
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string
workflow_call:
inputs:
version:
description: 'Version tag of text-generation-webui to build: v3.0'
default: 'v3.0'
required: true
type: string
config:
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string
permissions:
contents: write
jobs:
define_matrix:
name: Define Build Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
defaults:
run:
shell: pwsh
env:
CONFIGIN: ${{ inputs.config }}
EXCLUDEIN: ${{ inputs.exclude }}
steps:
- name: Define Job Output
id: set-matrix
run: |
$matrix = @{
'os' = @('ubuntu-22.04', 'windows-2022')
'pyver' = @("3.13")
'cuda' = @("12.4", "13.1")
}
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
if ($env:EXCLUDEIN -ne 'None') {
$exclusions = @()
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
$matrix['exclude'] = $exclusions
}
$matrixOut = ConvertTo-Json $matrix -Compress
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
build_wheels:
name: ${{ matrix.os }} ${{ matrix.pyver }} CUDA ${{ matrix.cuda }}
needs: define_matrix
runs-on: ${{ matrix.os }}
strategy:
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
defaults:
run:
shell: pwsh
env:
PCKGVER: ${{ inputs.version }}
steps:
- uses: actions/checkout@v6
with:
repository: 'oobabooga/text-generation-webui'
ref: ${{ inputs.version }}
submodules: 'recursive'
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.pyver }}
- name: Build Package
shell: bash
run: |
VERSION_CLEAN="${{ inputs.version }}"
VERSION_CLEAN="${VERSION_CLEAN#v}"
cd ..
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables
CUDA_VERSION="${{ matrix.cuda }}"
VERSION="${{ inputs.version }}"
# 1. Set platform-specific variables
if [[ "$RUNNER_OS" == "Windows" ]]; then
PLATFORM="windows"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
PIP_PATH="portable_env/python.exe -m pip"
PACKAGES_PATH="portable_env/Lib/site-packages"
rm start_linux.sh start_macos.sh
else
PLATFORM="linux"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
PIP_PATH="portable_env/bin/python -m pip"
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
rm start_macos.sh start_windows.bat
fi
# 2. Download and extract Python
cd ..
echo "Downloading Python for $PLATFORM..."
curl -L -o python-build.tar.gz "$PYTHON_URL"
tar -xzf python-build.tar.gz
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
# 3. Prepare requirements file based on CUDA version
cd "text-generation-webui-${VERSION_CLEAN}"
if [[ "$CUDA_VERSION" == "13.1" ]]; then
REQ_FILE="requirements/portable/requirements_cuda131.txt"
else
REQ_FILE="requirements/portable/requirements.txt"
fi
# 4. Install packages
echo "Installing Python packages from $REQ_FILE..."
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
# 5. Clean up
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
# 6. Create archive
cd ..
if [[ "$RUNNER_OS" == "Windows" ]]; then
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
echo "Creating archive: $ARCHIVE_NAME"
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
else
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.tar.gz"
echo "Creating archive: $ARCHIVE_NAME"
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
fi
- name: Upload files to a GitHub release
id: upload-release
uses: svenstaro/upload-release-action@2.7.0
continue-on-error: true
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: ../textgen-portable-*
tag: ${{ inputs.version }}
file_glob: true
make_latest: false
overwrite: true

View file

@ -1,170 +0,0 @@
name: Build ROCm
on:
workflow_dispatch:
inputs:
version:
description: 'Version tag of text-generation-webui to build: v3.0'
default: 'v3.0'
required: true
type: string
config:
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string
workflow_call:
inputs:
version:
description: 'Version tag of text-generation-webui to build: v3.0'
default: 'v3.0'
required: true
type: string
config:
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string
permissions:
contents: write
jobs:
define_matrix:
name: Define Build Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
defaults:
run:
shell: pwsh
env:
CONFIGIN: ${{ inputs.config }}
EXCLUDEIN: ${{ inputs.exclude }}
steps:
- name: Define Job Output
id: set-matrix
run: |
$matrix = @{
'os' = @('ubuntu-22.04', 'windows-2022')
'pyver' = @("3.13")
}
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
if ($env:EXCLUDEIN -ne 'None') {
$exclusions = @()
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
$matrix['exclude'] = $exclusions
}
$matrixOut = ConvertTo-Json $matrix -Compress
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
build_wheels:
name: ${{ matrix.os }} ${{ matrix.pyver }}
needs: define_matrix
runs-on: ${{ matrix.os }}
strategy:
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
defaults:
run:
shell: pwsh
env:
PCKGVER: ${{ inputs.version }}
steps:
- uses: actions/checkout@v6
with:
repository: 'oobabooga/text-generation-webui'
ref: ${{ inputs.version }}
submodules: 'recursive'
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.pyver }}
- name: Build Package
shell: bash
run: |
VERSION_CLEAN="${{ inputs.version }}"
VERSION_CLEAN="${VERSION_CLEAN#v}"
cd ..
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables
VERSION="${{ inputs.version }}"
# 1. Set platform-specific variables
if [[ "$RUNNER_OS" == "Windows" ]]; then
PLATFORM="windows"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
PIP_PATH="portable_env/python.exe -m pip"
PACKAGES_PATH="portable_env/Lib/site-packages"
rm start_linux.sh start_macos.sh
else
PLATFORM="linux"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
PIP_PATH="portable_env/bin/python -m pip"
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
rm start_macos.sh start_windows.bat
fi
# 2. Download and extract Python
cd ..
echo "Downloading Python for $PLATFORM..."
curl -L -o python-build.tar.gz "$PYTHON_URL"
tar -xzf python-build.tar.gz
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
# 3. Prepare requirements file
REQ_FILE="requirements/portable/requirements_amd.txt"
cd "text-generation-webui-${VERSION_CLEAN}"
# 4. Install packages
echo "Installing Python packages from $REQ_FILE..."
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
# 5. Clean up
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
# 6. Create archive
cd ..
if [[ "$RUNNER_OS" == "Windows" ]]; then
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm7.2.zip"
echo "Creating archive: $ARCHIVE_NAME"
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
else
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm7.2.tar.gz"
echo "Creating archive: $ARCHIVE_NAME"
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
fi
- name: Upload files to a GitHub release
id: upload-release
uses: svenstaro/upload-release-action@2.7.0
continue-on-error: true
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: ../textgen-portable-*
tag: ${{ inputs.version }}
file_glob: true
make_latest: false
overwrite: true

View file

@ -1,170 +0,0 @@
name: Build Vulkan
on:
workflow_dispatch:
inputs:
version:
description: 'Version tag of text-generation-webui to build: v3.0'
default: 'v3.0'
required: true
type: string
config:
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string
workflow_call:
inputs:
version:
description: 'Version tag of text-generation-webui to build: v3.0'
default: 'v3.0'
required: true
type: string
config:
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string
permissions:
contents: write
jobs:
define_matrix:
name: Define Build Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
defaults:
run:
shell: pwsh
env:
CONFIGIN: ${{ inputs.config }}
EXCLUDEIN: ${{ inputs.exclude }}
steps:
- name: Define Job Output
id: set-matrix
run: |
$matrix = @{
'os' = @('ubuntu-22.04', 'windows-2022')
'pyver' = @("3.13")
}
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
if ($env:EXCLUDEIN -ne 'None') {
$exclusions = @()
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
$matrix['exclude'] = $exclusions
}
$matrixOut = ConvertTo-Json $matrix -Compress
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
build_wheels:
name: ${{ matrix.os }} ${{ matrix.pyver }}
needs: define_matrix
runs-on: ${{ matrix.os }}
strategy:
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
defaults:
run:
shell: pwsh
env:
PCKGVER: ${{ inputs.version }}
steps:
- uses: actions/checkout@v6
with:
repository: 'oobabooga/text-generation-webui'
ref: ${{ inputs.version }}
submodules: 'recursive'
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.pyver }}
- name: Build Package
shell: bash
run: |
VERSION_CLEAN="${{ inputs.version }}"
VERSION_CLEAN="${VERSION_CLEAN#v}"
cd ..
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables
VERSION="${{ inputs.version }}"
# 1. Set platform-specific variables
if [[ "$RUNNER_OS" == "Windows" ]]; then
PLATFORM="windows"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
PIP_PATH="portable_env/python.exe -m pip"
PACKAGES_PATH="portable_env/Lib/site-packages"
rm start_linux.sh start_macos.sh
else
PLATFORM="linux"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
PIP_PATH="portable_env/bin/python -m pip"
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
rm start_macos.sh start_windows.bat
fi
# 2. Download and extract Python
cd ..
echo "Downloading Python for $PLATFORM..."
curl -L -o python-build.tar.gz "$PYTHON_URL"
tar -xzf python-build.tar.gz
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
# 3. Prepare requirements file
REQ_FILE="requirements/portable/requirements_vulkan.txt"
cd "text-generation-webui-${VERSION_CLEAN}"
# 4. Install packages
echo "Installing Python packages from $REQ_FILE..."
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
# 5. Clean up
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
# 6. Create archive
cd ..
if [[ "$RUNNER_OS" == "Windows" ]]; then
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.zip"
echo "Creating archive: $ARCHIVE_NAME"
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
else
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.tar.gz"
echo "Creating archive: $ARCHIVE_NAME"
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
fi
- name: Upload files to a GitHub release
id: upload-release
uses: svenstaro/upload-release-action@2.7.0
continue-on-error: true
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: ../textgen-portable-*
tag: ${{ inputs.version }}
file_glob: true
make_latest: false
overwrite: true

View file

@ -1,196 +0,0 @@
name: Build CPU and macOS
on:
workflow_dispatch:
inputs:
version:
description: 'Version tag of text-generation-webui to build: v3.0'
default: 'v3.0'
required: true
type: string
config:
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string
workflow_call:
inputs:
version:
description: 'Version tag of text-generation-webui to build: v3.0'
default: 'v3.0'
required: true
type: string
config:
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
default: 'Default'
required: false
type: string
exclude:
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
default: 'None'
required: false
type: string
permissions:
contents: write
jobs:
define_matrix:
name: Define Build Matrix
runs-on: ubuntu-latest
outputs:
matrix: ${{ steps.set-matrix.outputs.matrix }}
defaults:
run:
shell: pwsh
env:
CONFIGIN: ${{ inputs.config }}
EXCLUDEIN: ${{ inputs.exclude }}
steps:
- name: Define Job Output
id: set-matrix
run: |
$matrix = @{
'os' = @('ubuntu-22.04', 'windows-2022', 'macos-14')
'pyver' = @("3.13")
}
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
if ($env:EXCLUDEIN -ne 'None') {
$exclusions = @()
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
$matrix['exclude'] = $exclusions
}
$matrixOut = ConvertTo-Json $matrix -Compress
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
build_wheels:
name: ${{ matrix.os }} ${{ matrix.pyver }}
needs: define_matrix
runs-on: ${{ matrix.os }}
strategy:
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
defaults:
run:
shell: pwsh
env:
PCKGVER: ${{ inputs.version }}
steps:
- uses: actions/checkout@v6
with:
repository: 'oobabooga/text-generation-webui'
ref: ${{ inputs.version }}
submodules: 'recursive'
- uses: actions/setup-python@v6
with:
python-version: ${{ matrix.pyver }}
- name: Build Package
shell: bash
run: |
VERSION_CLEAN="${{ inputs.version }}"
VERSION_CLEAN="${VERSION_CLEAN#v}"
cd ..
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
cd "text-generation-webui-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
# Define common variables
VERSION="${{ inputs.version }}"
OS_TYPE="${{ matrix.os }}"
# 1. Set platform-specific variables
if [[ "$RUNNER_OS" == "Windows" ]]; then
PLATFORM="windows-cpu"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
PIP_PATH="portable_env/python.exe -m pip"
PACKAGES_PATH="portable_env/Lib/site-packages"
rm start_linux.sh start_macos.sh
elif [[ "$RUNNER_OS" == "macOS" ]]; then
if [[ "$OS_TYPE" == "macos-15-intel" ]]; then
PLATFORM="macos-x86_64"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-apple-darwin-install_only.tar.gz"
REQ_TYPE="apple_intel"
else
PLATFORM="macos-arm64"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-aarch64-apple-darwin-install_only.tar.gz"
REQ_TYPE="apple_silicon"
fi
PIP_PATH="portable_env/bin/python -m pip"
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
rm start_linux.sh start_windows.bat
else
# Linux case
PLATFORM="linux-cpu"
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
PIP_PATH="portable_env/bin/python -m pip"
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
rm start_macos.sh start_windows.bat
fi
# 2. Download and extract Python
echo "Downloading Python for $PLATFORM..."
cd ..
curl -L -o python-build.tar.gz "$PYTHON_URL"
tar -xzf python-build.tar.gz
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
# 3. Prepare requirements file based on platform
cd "text-generation-webui-${VERSION_CLEAN}"
# Select requirements file based on platform
if [[ "$RUNNER_OS" == "macOS" ]]; then
if [[ "$OS_TYPE" == "macos-15-intel" ]]; then
REQ_FILE="requirements/portable/requirements_apple_intel.txt"
else
REQ_FILE="requirements/portable/requirements_apple_silicon.txt"
fi
else
REQ_FILE="requirements/portable/requirements_cpu_only.txt"
fi
echo "Using requirements file: $REQ_FILE"
# 4. Install packages
echo "Installing Python packages from $REQ_FILE..."
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
# 5. Clean up
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
# 6. Create archive
cd ..
if [[ "$RUNNER_OS" == "Windows" ]]; then
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}.zip"
echo "Creating archive: $ARCHIVE_NAME"
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
else
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}.tar.gz"
echo "Creating archive: $ARCHIVE_NAME"
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
fi
- name: Upload files to a GitHub release
id: upload-release
uses: svenstaro/upload-release-action@2.7.0
continue-on-error: true
with:
repo_token: ${{ secrets.GITHUB_TOKEN }}
file: ../textgen-portable-*
tag: ${{ inputs.version }}
file_glob: true
make_latest: false
overwrite: true

22
.github/workflows/stale.yml vendored Normal file
View file

@ -0,0 +1,22 @@
name: Close inactive issues
on:
schedule:
- cron: "10 23 * * *"
jobs:
close-issues:
runs-on: ubuntu-latest
permissions:
issues: write
pull-requests: write
steps:
- uses: actions/stale@v5
with:
stale-issue-message: ""
close-issue-message: "This issue has been closed due to inactivity for 6 weeks. If you believe it is still relevant, please leave a comment below. You can tag a developer in your comment."
days-before-issue-stale: 42
days-before-issue-close: 0
stale-issue-label: "stale"
days-before-pr-stale: -1
days-before-pr-close: -1
repo-token: ${{ secrets.GITHUB_TOKEN }}

61
.gitignore vendored
View file

@ -1,33 +1,38 @@
/css cache
/extensions characters
/installer_files training/datasets
/repositories extensions/silero_tts/outputs
/user_data extensions/elevenlabs_tts/outputs
extensions/sd_api_pictures/outputs
.chroma extensions/multimodal/pipelines
.DS_Store logs
.eslintrc.js loras
.idea models
.installer_state.json presets
.venv repositories
venv softprompts
.envrc torch-dumps
.direnv *pycache*
.vs */*pycache*
*/*/pycache*
venv/
.venv/
.vscode .vscode
.idea/
*.bak *.bak
*.ipynb *.ipynb
*.log *.log
*pycache*
cert.pem
key.pem
package.json
package-lock.json
Thumbs.db
wandb
# ignore user docker config and top level links to docker files settings.json
/docker-compose.yaml settings.yaml
/docker-compose.yml notification.mp3
/Dockerfile img_bot*
.env img_me*
prompts/[0-9]*
models/config-user.yaml
.DS_Store
Thumbs.db
.chroma
installer_files
/CMD_FLAGS.txt

3
CMD_FLAGS.txt Normal file
View file

@ -0,0 +1,3 @@
# Only used by the one-click installer.
# Example:
# --listen --api

View file

@ -1,119 +0,0 @@
{
"nbformat": 4,
"nbformat_minor": 0,
"metadata": {
"colab": {
"private_outputs": true,
"provenance": [],
"gpuType": "T4"
},
"kernelspec": {
"name": "python3",
"display_name": "Python 3"
},
"language_info": {
"name": "python"
},
"accelerator": "GPU"
},
"cells": [
{
"cell_type": "markdown",
"source": [
"# oobabooga/text-generation-webui\n",
"\n",
"After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.\n",
"\n",
"* Project page: https://github.com/oobabooga/text-generation-webui\n",
"* Gradio server status: https://status.gradio.app/"
],
"metadata": {
"id": "MFQl6-FjSYtY"
}
},
{
"cell_type": "code",
"source": [
"#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: \"form\" }\n",
"\n",
"#@markdown Press play on the music player that will appear below:\n",
"%%html\n",
"<audio src=\"https://oobabooga.github.io/silence.m4a\" controls>"
],
"metadata": {
"id": "f7TVVj_z4flw"
},
"execution_count": null,
"outputs": []
},
{
"cell_type": "code",
"source": [
"#@title 2. Launch the web UI\n",
"\n",
"#@markdown You can provide a direct GGUF link or a Hugging Face model URL.\n",
"\n",
"import os\n",
"from pathlib import Path\n",
"\n",
"os.environ.pop('PYTHONPATH', None)\n",
"os.environ.pop('MPLBACKEND', None)\n",
"\n",
"if Path.cwd().name != 'text-generation-webui':\n",
" print(\"\\033[1;32;1m\\n --> Installing the web UI. This will take a while, but after the initial setup, you can download and test as many models as you like.\\033[0;37;0m\\n\")\n",
"\n",
" !git clone https://github.com/oobabooga/text-generation-webui\n",
" %cd text-generation-webui\n",
"\n",
" # Install the project in an isolated environment\n",
" !GPU_CHOICE=A \\\n",
" LAUNCH_AFTER_INSTALL=FALSE \\\n",
" INSTALL_EXTENSIONS=FALSE \\\n",
" ./start_linux.sh\n",
"\n",
"# Parameters\n",
"model_url = \"https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf\" #@param {type:\"string\"}\n",
"branch = \"\" #@param {type:\"string\"}\n",
"command_line_flags = \"--load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n",
"api = False #@param {type:\"boolean\"}\n",
"\n",
"if api:\n",
" for param in ['--api', '--public-api']:\n",
" if param not in command_line_flags:\n",
" command_line_flags += f\" {param}\"\n",
"\n",
"model_url = model_url.strip()\n",
"model_name = \"\"\n",
"if model_url != \"\":\n",
" if not model_url.startswith('http'):\n",
" model_url = 'https://huggingface.co/' + model_url\n",
"\n",
" branch = branch.strip()\n",
" if '/resolve/' in model_url:\n",
" model_name = model_url.split('?')[0].split('/')[-1]\n",
" !python download-model.py {model_url}\n",
" else:\n",
" url_parts = model_url.strip('/').split('/')\n",
" model_name = f\"{url_parts[-2]}_{url_parts[-1]}\"\n",
" if branch not in ['', 'main']:\n",
" model_name += f\"_{branch}\"\n",
" !python download-model.py {model_url} --branch {branch}\n",
" else:\n",
" !python download-model.py {model_url}\n",
"\n",
"# Start the web UI\n",
"cmd = f\"./start_linux.sh {command_line_flags} --share\"\n",
"if model_name != \"\":\n",
" cmd += f\" --model {model_name}\"\n",
"\n",
"!$cmd"
],
"metadata": {
"id": "LGQ8BiMuXMDG",
"cellView": "form"
},
"execution_count": null,
"outputs": []
}
]
}

696
README.md
View file

@ -1,142 +1,89 @@
<div align="center" markdown="1"> **Breaking change: WebUI now uses PyTorch 2.1.**
<sup>Special thanks to:</sup>
<br>
<br>
<a href="https://go.warp.dev/text-generation-webui">
<img alt="Warp sponsorship" width="400" src="https://raw.githubusercontent.com/warpdotdev/brand-assets/refs/heads/main/Github/Sponsor/Warp-Github-LG-02.png">
</a>
### [Warp, built for coding with multiple AI agents](https://go.warp.dev/text-generation-webui) * For one-click installer users: If you encounter problems after updating, rerun the update script. If issues persist, delete the `installer_files` folder and use the start script to reinstall requirements.
[Available for macOS, Linux, & Windows](https://go.warp.dev/text-generation-webui)<br> * For manual installations, update PyTorch with the [provided command](https://github.com/oobabooga/text-generation-webui/#2-install-pytorch).
</div>
<hr>
# Text Generation Web UI # Text generation web UI
A Gradio web UI for running Large Language Models locally. 100% private and offline. Supports text generation, vision, tool-calling, training, image generation, and more. A Gradio web UI for Large Language Models.
[Try the Deep Reason extension](https://oobabooga.gumroad.com/l/deep_reason) Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation.
|![Image1](https://github.com/oobabooga/screenshots/raw/main/INSTRUCT-3.5.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/CHAT-3.5.png) | |![Image1](https://github.com/oobabooga/screenshots/raw/main/print_instruct.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_chat.png) |
|:---:|:---:| |:---:|:---:|
|![Image1](https://github.com/oobabooga/screenshots/raw/main/DEFAULT-3.5.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/PARAMETERS-3.5.png) | |![Image1](https://github.com/oobabooga/screenshots/raw/main/print_default.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_parameters.png) |
## Features ## Features
- **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting. * 3 interface modes: default (two columns), notebook, and chat
- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents. * Multiple model backends: [transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), [ExLlama](https://github.com/turboderp/exllama), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa), [CTransformers](https://github.com/marella/ctransformers), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ)
- **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)). * Dropdown menu for quickly switching between different models
- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file, easy to create and extend ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)). * LoRA: load and unload LoRAs on the fly, train a new LoRA using QLoRA
- **OpenAI-compatible API**: Chat and Completions endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI API ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)). * Precise instruction templates for chat mode, including Llama-2-chat, Alpaca, Vicuna, WizardLM, StableLM, and many others
- **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)). * 4-bit, 8-bit, and CPU inference through the transformers library
- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)). * Use llama.cpp models with transformers samplers (`llamacpp_HF` loader)
- **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set. * [Multimodal pipelines, including LLaVA and MiniGPT-4](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal)
- 100% offline and private, with zero telemetry, external resources, or remote update requests. * [Extensions framework](docs/Extensions.md)
- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters. Prompts are automatically formatted with Jinja2 templates. * [Custom chat characters](docs/Chat-mode.md)
- Edit messages, navigate between message versions, and branch conversations at any point. * Very efficient text streaming
- Free-form text generation in the Notebook tab without being limited to chat turns. * Markdown output with LaTeX rendering, to use for instance with [GALACTICA](https://github.com/paperswithcode/galai)
- Multiple sampling parameters and generation options for sophisticated text generation control. * API, including endpoints for websocket streaming ([see the examples](https://github.com/oobabooga/text-generation-webui/blob/main/api-examples))
- Aesthetic UI with dark and light themes.
- Syntax highlighting for code blocks and LaTeX rendering for mathematical expressions.
- Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.
## How to install To learn how to use the various features, check out the Documentation: https://github.com/oobabooga/text-generation-webui/tree/main/docs
#### ✅ Option 1: Portable builds (get started in 1 minute) ## Installation
No installation needed just download, unzip and run. All dependencies included. ### One-click installers
Download from here: **https://github.com/oobabooga/text-generation-webui/releases** 1) Clone or download the repository.
2) Run the `start_linux.sh`, `start_windows.bat`, `start_macos.sh`, or `start_wsl.bat` script depending on your OS.
3) Select your GPU vendor when asked.
4) Have fun!
- Builds are provided for Linux, Windows, and macOS, with options for CUDA, Vulkan, ROCm, and CPU-only. #### How it works
- Compatible with GGUF (llama.cpp) models.
#### Option 2: Manual portable install with venv The script creates a folder called `installer_files` where it sets up a Conda environment using Miniconda. The installation is self-contained: if you want to reinstall, just delete `installer_files` and run the start script again.
Very fast setup that should work on any Python 3.9+: To launch the webui in the future after it is already installed, run the same `start` script.
```bash #### Getting updates
# Clone repository
git clone https://github.com/oobabooga/text-generation-webui
cd text-generation-webui
# Create virtual environment Run `update_linux.sh`, `update_windows.bat`, `update_macos.sh`, or `update_wsl.bat`.
python -m venv venv
# Activate virtual environment #### Running commands
# On Windows:
venv\Scripts\activate
# On macOS/Linux:
source venv/bin/activate
# Install dependencies (choose appropriate file under requirements/portable for your hardware) If you ever need to install something manually in the `installer_files` environment, you can launch an interactive shell using the cmd script: `cmd_linux.sh`, `cmd_windows.bat`, `cmd_macos.sh`, or `cmd_wsl.bat`.
pip install -r requirements/portable/requirements.txt --upgrade
# Launch server (basic command) #### Defining command-line flags
python server.py --portable --api --auto-launch
# When done working, deactivate To define persistent command-line flags like `--listen` or `--api`, edit the `CMD_FLAGS.txt` file with a text editor and add them there. Flags can also be provided directly to the start scripts, for instance, `./start-linux.sh --listen`.
deactivate
```
#### Option 3: One-click installer #### Other info
For users who need additional backends (ExLlamaV3, Transformers), training, image generation, or extensions (TTS, voice input, translation, etc). Requires ~10GB disk space and downloads PyTorch. * There is no need to run any of those scripts as admin/root.
* For additional instructions about AMD setup, WSL setup, and nvcc installation, consult [this page](https://github.com/oobabooga/text-generation-webui/blob/main/docs/One-Click-Installers.md).
* The installer has been tested mostly on NVIDIA GPUs. If you can find a way to improve it for your AMD/Intel Arc/Mac Metal GPU, you are highly encouraged to submit a PR to this repository. The main file to be edited is `one_click.py`.
* For automated installation, you can use the `GPU_CHOICE`, `LAUNCH_AFTER_INSTALL`, and `INSTALL_EXTENSIONS` environment variables. For instance: `GPU_CHOICE=A LAUNCH_AFTER_INSTALL=False INSTALL_EXTENSIONS=False ./start_linux.sh`.
1. Clone the repository, or [download its source code](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) and extract it. ### Manual installation using Conda
2. Run the startup script for your OS: `start_windows.bat`, `start_linux.sh`, or `start_macos.sh`.
3. When prompted, select your GPU vendor.
4. After installation, open `http://127.0.0.1:7860` in your browser.
To restart the web UI later, run the same `start_` script. Recommended if you have some experience with the command-line.
You can pass command-line flags directly (e.g., `./start_linux.sh --help`), or add them to `user_data/CMD_FLAGS.txt` (e.g., `--api` to enable the API).
To update, run the update script for your OS: `update_wizard_windows.bat`, `update_wizard_linux.sh`, or `update_wizard_macos.sh`.
To reinstall with a fresh Python environment, delete the `installer_files` folder and run the `start_` script again.
<details>
<summary>
One-click installer details
</summary>
### One-click-installer
The script uses Miniforge to set up a Conda environment in the `installer_files` folder.
If you ever need to install something manually in the `installer_files` environment, you can launch an interactive shell using the cmd script: `cmd_linux.sh`, `cmd_windows.bat`, or `cmd_macos.sh`.
* There is no need to run any of those scripts (`start_`, `update_wizard_`, or `cmd_`) as admin/root.
* To install requirements for extensions, it is recommended to use the update wizard script with the "Install/update extensions requirements" option. At the end, this script will install the main requirements for the project to make sure that they take precedence in case of version conflicts.
* For automated installation, you can use the `GPU_CHOICE`, `LAUNCH_AFTER_INSTALL`, and `INSTALL_EXTENSIONS` environment variables. For instance: `GPU_CHOICE=A LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh`.
</details>
<details>
<summary>
Manual full installation with conda or docker
</summary>
### Full installation with Conda
#### 0. Install Conda #### 0. Install Conda
https://github.com/conda-forge/miniforge https://docs.conda.io/en/latest/miniconda.html
On Linux or WSL, Miniforge can be automatically installed with these two commands: On Linux or WSL, it can be automatically installed with these two commands ([source](https://educe-ubc.github.io/conda.html)):
``` ```
curl -sL "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" > "Miniforge3.sh" curl -sL "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" > "Miniconda3.sh"
bash Miniforge3.sh bash Miniconda3.sh
``` ```
For other platforms, download from: https://github.com/conda-forge/miniforge/releases/latest
#### 1. Create a new conda environment #### 1. Create a new conda environment
``` ```
conda create -n textgen python=3.13 conda create -n textgen python=3.10
conda activate textgen conda activate textgen
``` ```
@ -144,323 +91,330 @@ conda activate textgen
| System | GPU | Command | | System | GPU | Command |
|--------|---------|---------| |--------|---------|---------|
| Linux/WSL | NVIDIA | `pip3 install torch==2.9.1 --index-url https://download.pytorch.org/whl/cu128` | | Linux/WSL | NVIDIA | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118` |
| Linux/WSL | CPU only | `pip3 install torch==2.9.1 --index-url https://download.pytorch.org/whl/cpu` | | Linux/WSL | CPU only | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu` |
| Linux | AMD | `pip3 install https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-2.9.1%2Brocm7.2.0.lw.git7e1940d4-cp313-cp313-linux_x86_64.whl` | | Linux | AMD | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.6` |
| MacOS + MPS | Any | `pip3 install torch==2.9.1` | | MacOS + MPS | Any | `pip3 install torch torchvision torchaudio` |
| Windows | NVIDIA | `pip3 install torch==2.9.1 --index-url https://download.pytorch.org/whl/cu128` | | Windows | NVIDIA | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118` |
| Windows | CPU only | `pip3 install torch==2.9.1` | | Windows | CPU only | `pip3 install torch torchvision torchaudio` |
The up-to-date commands can be found here: https://pytorch.org/get-started/locally/. The up-to-date commands can be found here: https://pytorch.org/get-started/locally/.
If you need `nvcc` to compile some library manually, you will additionally need to install this:
```
conda install -y -c "nvidia/label/cuda-12.8.1" cuda
```
#### 3. Install the web UI #### 3. Install the web UI
``` ```
git clone https://github.com/oobabooga/text-generation-webui git clone https://github.com/oobabooga/text-generation-webui
cd text-generation-webui cd text-generation-webui
pip install -r requirements/full/<requirements file according to table below> pip install -r requirements.txt
``` ```
Requirements file to use: #### AMD, Metal, Intel Arc, and CPUs without AVX2
| GPU | requirements file to use | 1) Replace the last command above with
|--------|---------|
| NVIDIA | `requirements.txt` | ```
| AMD | `requirements_amd.txt` | pip install -r requirements_nowheels.txt
| CPU only | `requirements_cpu_only.txt` | ```
| Apple Intel | `requirements_apple_intel.txt` |
| Apple Silicon | `requirements_apple_silicon.txt` | 2) Manually install llama-cpp-python using the appropriate command for your hardware: [Installation from PyPI](https://github.com/abetlen/llama-cpp-python#installation-from-pypi).
### Start the web UI 3) Do the same for CTransformers: [Installation](https://github.com/marella/ctransformers#installation).
4) AMD: Manually install AutoGPTQ: [Installation](https://github.com/PanQiWei/AutoGPTQ#installation).
5) AMD: Manually install [ExLlama](https://github.com/turboderp/exllama) by simply cloning it into the `repositories` folder (it will be automatically compiled at runtime after that):
``` ```
conda activate textgen
cd text-generation-webui cd text-generation-webui
python server.py git clone https://github.com/turboderp/exllama repositories/exllama
``` ```
Then browse to #### bitsandbytes on older NVIDIA GPUs
`http://127.0.0.1:7860` bitsandbytes >= 0.39 may not work. In that case, to use `--load-in-8bit`, you may have to downgrade like this:
#### Manual install * Linux: `pip install bitsandbytes==0.38.1`
* Windows: `pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.38.1-py3-none-any.whl`
The `requirements*.txt` above contain various wheels precompiled through GitHub Actions. If you wish to compile things manually, or if you need to because no suitable wheels are available for your hardware, you can use `requirements_nowheels.txt` and then install your desired loaders manually.
### Alternative: Docker ### Alternative: Docker
``` ```
For NVIDIA GPU: ln -s docker/{Dockerfile,docker-compose.yml,.dockerignore} .
ln -s docker/{nvidia/Dockerfile,nvidia/docker-compose.yml,.dockerignore} .
For AMD GPU:
ln -s docker/{amd/Dockerfile,amd/docker-compose.yml,.dockerignore} .
For Intel GPU:
ln -s docker/{intel/Dockerfile,intel/docker-compose.yml,.dockerignore} .
For CPU only
ln -s docker/{cpu/Dockerfile,cpu/docker-compose.yml,.dockerignore} .
cp docker/.env.example .env cp docker/.env.example .env
#Create logs/cache dir : # Edit .env and set TORCH_CUDA_ARCH_LIST based on your GPU model
mkdir -p user_data/logs user_data/cache
# Edit .env and set:
# TORCH_CUDA_ARCH_LIST based on your GPU model
# APP_RUNTIME_GID your host user's group id (run `id -g` in a terminal)
# BUILD_EXTENIONS optionally add comma separated list of extensions to build
# Edit user_data/CMD_FLAGS.txt and add in it the options you want to execute (like --listen --cpu)
#
docker compose up --build docker compose up --build
``` ```
* You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions. * You need to have docker compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/blob/main/docs/Docker.md) for instructions.
* For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker). * For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker).
### Updating the requirements ### Updating the requirements
From time to time, the `requirements*.txt` change. To update, use these commands: From time to time, the `requirements.txt` changes. To update, use these commands:
``` ```
conda activate textgen conda activate textgen
cd text-generation-webui cd text-generation-webui
pip install -r <requirements file that you have used> --upgrade pip install -r requirements.txt --upgrade
``` ```
</details>
<details>
<summary>
List of command-line flags
</summary>
```txt
usage: server.py [-h] [--user-data-dir USER_DATA_DIR] [--multi-user] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]
[--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--image-model IMAGE_MODEL] [--image-model-dir IMAGE_MODEL_DIR] [--image-dtype {bfloat16,float16}]
[--image-attn-backend {flash_attention_2,sdpa}] [--image-cpu-offload] [--image-compile] [--image-quant {none,bnb-8bit,bnb-4bit,torchao-int8wo,torchao-fp4,torchao-float8wo}]
[--loader LOADER] [--ctx-size N] [--cache-type N] [--model-draft MODEL_DRAFT] [--draft-max DRAFT_MAX] [--gpu-layers-draft GPU_LAYERS_DRAFT] [--device-draft DEVICE_DRAFT]
[--ctx-size-draft CTX_SIZE_DRAFT] [--spec-type {none,ngram-mod,ngram-simple,ngram-map-k,ngram-map-k4v,ngram-cache}] [--spec-ngram-size-n SPEC_NGRAM_SIZE_N]
[--spec-ngram-size-m SPEC_NGRAM_SIZE_M] [--spec-ngram-min-hits SPEC_NGRAM_MIN_HITS] [--gpu-layers N] [--cpu-moe] [--mmproj MMPROJ] [--streaming-llm] [--tensor-split TENSOR_SPLIT]
[--row-split] [--no-mmap] [--mlock] [--no-kv-offload] [--batch-size BATCH_SIZE] [--ubatch-size UBATCH_SIZE] [--threads THREADS] [--threads-batch THREADS_BATCH] [--numa]
[--parallel PARALLEL] [--fit-target FIT_TARGET] [--extra-flags EXTRA_FLAGS] [--cpu] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16]
[--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast] [--attn-implementation IMPLEMENTATION] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE]
[--quant_type QUANT_TYPE] [--gpu-split GPU_SPLIT] [--enable-tp] [--tp-backend TP_BACKEND] [--cfg-cache] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share]
[--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] [--subpath SUBPATH] [--old-colors]
[--portable] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--api-enable-ipv6] [--api-disable-ipv4]
[--nowebui] [--temperature N] [--dynatemp-low N] [--dynatemp-high N] [--dynatemp-exponent N] [--smoothing-factor N] [--smoothing-curve N] [--min-p N] [--top-p N] [--top-k N]
[--typical-p N] [--xtc-threshold N] [--xtc-probability N] [--epsilon-cutoff N] [--eta-cutoff N] [--tfs N] [--top-a N] [--top-n-sigma N] [--adaptive-target N] [--adaptive-decay N]
[--dry-multiplier N] [--dry-allowed-length N] [--dry-base N] [--repetition-penalty N] [--frequency-penalty N] [--presence-penalty N] [--encoder-repetition-penalty N]
[--no-repeat-ngram-size N] [--repetition-penalty-range N] [--penalty-alpha N] [--guidance-scale N] [--mirostat-mode N] [--mirostat-tau N] [--mirostat-eta N]
[--do-sample | --no-do-sample] [--dynamic-temperature | --no-dynamic-temperature] [--temperature-last | --no-temperature-last] [--sampler-priority N] [--dry-sequence-breakers N]
[--enable-thinking | --no-enable-thinking] [--reasoning-effort N] [--chat-template-file CHAT_TEMPLATE_FILE]
Text Generation Web UI
options:
-h, --help show this help message and exit
Basic settings:
--user-data-dir USER_DATA_DIR Path to the user data directory. Default: auto-detected.
--multi-user Multi-user mode. Chat histories are not saved or automatically loaded. Best suited for small trusted teams.
--model MODEL Name of the model to load by default.
--lora LORA [LORA ...] The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.
--model-dir MODEL_DIR Path to directory with all the models.
--lora-dir LORA_DIR Path to directory with all the loras.
--model-menu Show a model menu in the terminal when the web UI is first launched.
--settings SETTINGS Load the default interface settings from this yaml file. See user_data/settings-template.yaml for an example. If you create a file called
user_data/settings.yaml, this file will be loaded by default without the need to use the --settings flag.
--extensions EXTENSIONS [EXTENSIONS ...] The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.
--verbose Print the prompts to the terminal.
--idle-timeout IDLE_TIMEOUT Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.
Image model:
--image-model IMAGE_MODEL Name of the image model to select on startup (overrides saved setting).
--image-model-dir IMAGE_MODEL_DIR Path to directory with all the image models.
--image-dtype {bfloat16,float16} Data type for image model.
--image-attn-backend {flash_attention_2,sdpa} Attention backend for image model.
--image-cpu-offload Enable CPU offloading for image model.
--image-compile Compile the image model for faster inference.
--image-quant {none,bnb-8bit,bnb-4bit,torchao-int8wo,torchao-fp4,torchao-float8wo}
Quantization method for image model.
Model loader:
--loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav3, TensorRT-
LLM.
Context and cache:
--ctx-size, --n_ctx, --max_seq_len N Context size in tokens. 0 = auto for llama.cpp (requires gpu-layers=-1), 8192 for other loaders.
--cache-type, --cache_type N KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV3 - fp16, q2 to q8 (can specify k_bits and v_bits separately, e.g. q4_q8).
Speculative decoding:
--model-draft MODEL_DRAFT Path to the draft model for speculative decoding.
--draft-max DRAFT_MAX Number of tokens to draft for speculative decoding.
--gpu-layers-draft GPU_LAYERS_DRAFT Number of layers to offload to the GPU for the draft model.
--device-draft DEVICE_DRAFT Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1
--ctx-size-draft CTX_SIZE_DRAFT Size of the prompt context for the draft model. If 0, uses the same as the main model.
--spec-type {none,ngram-mod,ngram-simple,ngram-map-k,ngram-map-k4v,ngram-cache}
Draftless speculative decoding type. Recommended: ngram-mod.
--spec-ngram-size-n SPEC_NGRAM_SIZE_N N-gram lookup size for ngram speculative decoding.
--spec-ngram-size-m SPEC_NGRAM_SIZE_M Draft n-gram size for ngram speculative decoding.
--spec-ngram-min-hits SPEC_NGRAM_MIN_HITS Minimum n-gram hits for ngram-map speculative decoding.
llama.cpp:
--gpu-layers, --n-gpu-layers N Number of layers to offload to the GPU. -1 = auto.
--cpu-moe Move the experts to the CPU (for MoE models).
--mmproj MMPROJ Path to the mmproj file for vision models.
--streaming-llm Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.
--tensor-split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.
--row-split Split the model by rows across GPUs. This may improve multi-gpu performance.
--no-mmap Prevent mmap from being used.
--mlock Force the system to keep the model in RAM.
--no-kv-offload Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.
--batch-size BATCH_SIZE Maximum number of prompt tokens to batch together when calling llama-server. This is the application level batch size.
--ubatch-size UBATCH_SIZE Maximum number of prompt tokens to batch together when calling llama-server. This is the max physical batch size for computation (device level).
--threads THREADS Number of threads to use.
--threads-batch THREADS_BATCH Number of threads to use for batches/prompt processing.
--numa Activate NUMA task allocation for llama.cpp.
--parallel PARALLEL Number of parallel request slots. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set
ctx_size to 32768.
--fit-target FIT_TARGET Target VRAM margin per device for auto GPU layers, comma-separated list of values in MiB. A single value is broadcast across all devices.
Default: 1024.
--extra-flags EXTRA_FLAGS Extra flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"
Transformers/Accelerate:
--cpu Use the CPU to generate text. Warning: Training on CPU is extremely slow.
--cpu-memory CPU_MEMORY Maximum CPU memory in GiB. Use this for CPU offloading.
--disk If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.
--disk-cache-dir DISK_CACHE_DIR Directory to save the disk cache to.
--load-in-8bit Load the model with 8-bit precision (using bitsandbytes).
--bf16 Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
--no-cache Set use_cache to False while generating text. This reduces VRAM usage slightly, but it comes at a performance cost.
--trust-remote-code Set trust_remote_code=True while loading the model. Necessary for some models.
--force-safetensors Set use_safetensors=True while loading the model. This prevents arbitrary code execution.
--no_use_fast Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast.
--attn-implementation IMPLEMENTATION Attention implementation. Valid options: sdpa, eager, flash_attention_2.
bitsandbytes 4-bit:
--load-in-4bit Load the model with 4-bit precision (using bitsandbytes).
--use_double_quant use_double_quant for 4-bit.
--compute_dtype COMPUTE_DTYPE compute dtype for 4-bit. Valid options: bfloat16, float16, float32.
--quant_type QUANT_TYPE quant_type for 4-bit. Valid options: nf4, fp4.
ExLlamaV3:
--gpu-split GPU_SPLIT Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.
--enable-tp, --enable_tp Enable Tensor Parallelism (TP) to split the model across GPUs.
--tp-backend TP_BACKEND The backend for tensor parallelism. Valid options: native, nccl. Default: native.
--cfg-cache Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.
Gradio:
--listen Make the web UI reachable from your local network.
--listen-port LISTEN_PORT The listening port that the server will use.
--listen-host LISTEN_HOST The hostname that the server will use.
--share Create a public URL. This is useful for running the web UI on Google Colab or similar.
--auto-launch Open the web UI in the default browser upon launch.
--gradio-auth GRADIO_AUTH Set Gradio authentication password in the format "username:password". Multiple credentials can also be supplied with "u1:p1,u2:p2,u3:p3".
--gradio-auth-path GRADIO_AUTH_PATH Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.
--ssl-keyfile SSL_KEYFILE The path to the SSL certificate key file.
--ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file.
--subpath SUBPATH Customize the subpath for gradio, use with reverse proxy
--old-colors Use the legacy Gradio colors, before the December/2024 update.
--portable Hide features not available in portable mode like training.
API:
--api Enable the API extension.
--public-api Create a public URL for the API using Cloudflare.
--public-api-id PUBLIC_API_ID Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.
--api-port API_PORT The listening port for the API.
--api-key API_KEY API authentication key.
--admin-key ADMIN_KEY API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key.
--api-enable-ipv6 Enable IPv6 for the API
--api-disable-ipv4 Disable IPv4 for the API
--nowebui Do not launch the Gradio UI. Useful for launching the API in standalone mode.
API generation defaults:
--temperature N Temperature
--dynatemp-low N Dynamic temperature low
--dynatemp-high N Dynamic temperature high
--dynatemp-exponent N Dynamic temperature exponent
--smoothing-factor N Smoothing factor
--smoothing-curve N Smoothing curve
--min-p N Min P
--top-p N Top P
--top-k N Top K
--typical-p N Typical P
--xtc-threshold N XTC threshold
--xtc-probability N XTC probability
--epsilon-cutoff N Epsilon cutoff
--eta-cutoff N Eta cutoff
--tfs N TFS
--top-a N Top A
--top-n-sigma N Top N Sigma
--adaptive-target N Adaptive target
--adaptive-decay N Adaptive decay
--dry-multiplier N DRY multiplier
--dry-allowed-length N DRY allowed length
--dry-base N DRY base
--repetition-penalty N Repetition penalty
--frequency-penalty N Frequency penalty
--presence-penalty N Presence penalty
--encoder-repetition-penalty N Encoder repetition penalty
--no-repeat-ngram-size N No repeat ngram size
--repetition-penalty-range N Repetition penalty range
--penalty-alpha N Penalty alpha
--guidance-scale N Guidance scale
--mirostat-mode N Mirostat mode
--mirostat-tau N Mirostat tau
--mirostat-eta N Mirostat eta
--do-sample, --no-do-sample Do sample
--dynamic-temperature, --no-dynamic-temperature Dynamic temperature
--temperature-last, --no-temperature-last Temperature last
--sampler-priority N Sampler priority
--dry-sequence-breakers N DRY sequence breakers
--enable-thinking, --no-enable-thinking Enable thinking
--reasoning-effort N Reasoning effort
--chat-template-file CHAT_TEMPLATE_FILE Path to a chat template file (.jinja, .jinja2, or .yaml) to use as the default instruction template for API requests. Overrides the model's
built-in template.
```
</details>
## Downloading models ## Downloading models
1. Download a GGUF model file from [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads&search=gguf). Models should be placed in the `text-generation-webui/models` folder. They are usually downloaded from [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads).
2. Place it in the `user_data/models` folder.
That's it. The UI will detect it automatically. * Transformers or GPTQ models are made of several files and must be placed in a subfolder. Example:
To check what will fit your GPU, you can use the [VRAM Calculator](https://huggingface.co/spaces/oobabooga/accurate-gguf-vram-calculator).
<details>
<summary>Other model types (Transformers, EXL3)</summary>
Models that consist of multiple files (like 16-bit Transformers models and EXL3 models) should be placed in a subfolder inside `user_data/models`:
``` ```
text-generation-webui text-generation-webui
└── user_data ├── models
└── models │   ├── lmsys_vicuna-33b-v1.3
└── Qwen_Qwen3-8B │   │   ├── config.json
├── config.json │   │   ├── generation_config.json
├── generation_config.json │   │   ├── pytorch_model-00001-of-00007.bin
├── model-00001-of-00004.safetensors │   │   ├── pytorch_model-00002-of-00007.bin
├── ... │   │   ├── pytorch_model-00003-of-00007.bin
├── tokenizer_config.json │   │   ├── pytorch_model-00004-of-00007.bin
└── tokenizer.json │   │   ├── pytorch_model-00005-of-00007.bin
│   │   ├── pytorch_model-00006-of-00007.bin
│   │   ├── pytorch_model-00007-of-00007.bin
│   │   ├── pytorch_model.bin.index.json
│   │   ├── special_tokens_map.json
│   │   ├── tokenizer_config.json
│   │   └── tokenizer.model
``` ```
These formats require the one-click installer (not the portable build). * GGUF models are a single file and should be placed directly into `models`. Example:
```
text-generation-webui
├── models
│   ├── llama-2-13b-chat.Q4_K_M.gguf
```
In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download via the command-line with `python download-model.py organization/model` (use `--help` to see all the options).
#### GPT-4chan
<details>
<summary>
Instructions
</summary>
[GPT-4chan](https://huggingface.co/ykilcher/gpt-4chan) has been shut down from Hugging Face, so you need to download it elsewhere. You have two options:
* Torrent: [16-bit](https://archive.org/details/gpt4chan_model_float16) / [32-bit](https://archive.org/details/gpt4chan_model)
* Direct download: [16-bit](https://theswissbay.ch/pdf/_notpdf_/gpt4chan_model_float16/) / [32-bit](https://theswissbay.ch/pdf/_notpdf_/gpt4chan_model/)
The 32-bit version is only relevant if you intend to run the model in CPU mode. Otherwise, you should use the 16-bit version.
After downloading the model, follow these steps:
1. Place the files under `models/gpt4chan_model_float16` or `models/gpt4chan_model`.
2. Place GPT-J 6B's config.json file in that same folder: [config.json](https://huggingface.co/EleutherAI/gpt-j-6B/raw/main/config.json).
3. Download GPT-J 6B's tokenizer files (they will be automatically detected when you attempt to load GPT-4chan):
```
python download-model.py EleutherAI/gpt-j-6B --text-only
```
When you load this model in default or notebook modes, the "HTML" tab will show the generated text in 4chan format:
![Image3](https://github.com/oobabooga/screenshots/raw/main/gpt4chan.png)
</details> </details>
## Documentation ## Starting the web UI
https://github.com/oobabooga/text-generation-webui/wiki conda activate textgen
cd text-generation-webui
python server.py
Then browse to
`http://localhost:7860/?__theme=dark`
Optionally, you can use the following command-line flags:
#### Basic settings
| Flag | Description |
|--------------------------------------------|-------------|
| `-h`, `--help` | Show this help message and exit. |
| `--multi-user` | Multi-user mode. Chat histories are not saved or automatically loaded. WARNING: this is highly experimental. |
| `--character CHARACTER` | The name of the character to load in chat mode by default. |
| `--model MODEL` | Name of the model to load by default. |
| `--lora LORA [LORA ...]` | The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces. |
| `--model-dir MODEL_DIR` | Path to directory with all the models. |
| `--lora-dir LORA_DIR` | Path to directory with all the loras. |
| `--model-menu` | Show a model menu in the terminal when the web UI is first launched. |
| `--settings SETTINGS_FILE` | Load the default interface settings from this yaml file. See `settings-template.yaml` for an example. If you create a file called `settings.yaml`, this file will be loaded by default without the need to use the `--settings` flag. |
| `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. |
| `--verbose` | Print the prompts to the terminal. |
| `--chat-buttons` | Show buttons on chat tab instead of hover menu. |
#### Model loader
| Flag | Description |
|--------------------------------------------|-------------|
| `--loader LOADER` | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv, ctransformers |
#### Accelerate/transformers
| Flag | Description |
|---------------------------------------------|-------------|
| `--cpu` | Use the CPU to generate text. Warning: Training on CPU is extremely slow.|
| `--auto-devices` | Automatically split the model across the available GPU(s) and CPU. |
| `--gpu-memory GPU_MEMORY [GPU_MEMORY ...]` | Maximum GPU memory in GiB to be allocated per GPU. Example: `--gpu-memory 10` for a single GPU, `--gpu-memory 10 5` for two GPUs. You can also set values in MiB like `--gpu-memory 3500MiB`. |
| `--cpu-memory CPU_MEMORY` | Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.|
| `--disk` | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. |
| `--disk-cache-dir DISK_CACHE_DIR` | Directory to save the disk cache to. Defaults to `cache/`. |
| `--load-in-8bit` | Load the model with 8-bit precision (using bitsandbytes).|
| `--bf16` | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. |
| `--no-cache` | Set `use_cache` to False while generating text. This reduces the VRAM usage a bit with a performance cost. |
| `--xformers` | Use xformer's memory efficient attention. This should increase your tokens/s. |
| `--sdp-attention` | Use torch 2.0's sdp attention. |
| `--trust-remote-code` | Set trust_remote_code=True while loading a model. Necessary for ChatGLM and Falcon. |
| `--use_fast` | Set use_fast=True while loading a tokenizer. |
#### Accelerate 4-bit
⚠️ Requires minimum compute of 7.0 on Windows at the moment.
| Flag | Description |
|---------------------------------------------|-------------|
| `--load-in-4bit` | Load the model with 4-bit precision (using bitsandbytes). |
| `--compute_dtype COMPUTE_DTYPE` | compute dtype for 4-bit. Valid options: bfloat16, float16, float32. |
| `--quant_type QUANT_TYPE` | quant_type for 4-bit. Valid options: nf4, fp4. |
| `--use_double_quant` | use_double_quant for 4-bit. |
#### GGUF (for llama.cpp and ctransformers)
| Flag | Description |
|-------------|-------------|
| `--threads` | Number of threads to use. |
| `--threads-batch THREADS_BATCH` | Number of threads to use for batches/prompt processing. |
| `--n_batch` | Maximum number of prompt tokens to batch together when calling llama_eval. |
| `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. Only works if llama-cpp-python was compiled with BLAS. Set this to 1000000000 to offload all layers to the GPU. |
| `--n_ctx N_CTX` | Size of the prompt context. |
#### llama.cpp
| Flag | Description |
|---------------|---------------|
| `--mul_mat_q` | Activate new mulmat kernels. |
| `--tensor_split TENSOR_SPLIT` | Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17 |
| `--llama_cpp_seed SEED` | Seed for llama-cpp models. Default 0 (random). |
| `--cache-capacity CACHE_CAPACITY` | Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
|`--cfg-cache` | llamacpp_HF: Create an additional cache for CFG negative prompts. |
| `--no-mmap` | Prevent mmap from being used. |
| `--mlock` | Force the system to keep the model in RAM. |
| `--numa` | Activate NUMA task allocation for llama.cpp |
| `--cpu` | Use the CPU version of llama-cpp-python instead of the GPU-accelerated version. |
#### ctransformers
| Flag | Description |
|-------------|-------------|
| `--model_type MODEL_TYPE` | Model type of pre-quantized model. Currently gpt2, gptj, gptneox, falcon, llama, mpt, starcoder (gptbigcode), dollyv2, and replit are supported. |
#### AutoGPTQ
| Flag | Description |
|------------------|-------------|
| `--triton` | Use triton. |
| `--no_inject_fused_attention` | Disable the use of fused attention, which will use less VRAM at the cost of slower inference. |
| `--no_inject_fused_mlp` | Triton mode only: disable the use of fused MLP, which will use less VRAM at the cost of slower inference. |
| `--no_use_cuda_fp16` | This can make models faster on some systems. |
| `--desc_act` | For models that don't have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig. |
| `--disable_exllama` | Disable ExLlama kernel, which can improve inference speed on some systems. |
#### ExLlama
| Flag | Description |
|------------------|-------------|
|`--gpu-split` | Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. `20,7,7` |
|`--max_seq_len MAX_SEQ_LEN` | Maximum sequence length. |
|`--cfg-cache` | ExLlama_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader, but not necessary for CFG with base ExLlama. |
#### GPTQ-for-LLaMa
| Flag | Description |
|---------------------------|-------------|
| `--wbits WBITS` | Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. |
| `--model_type MODEL_TYPE` | Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported. |
| `--groupsize GROUPSIZE` | Group size. |
| `--pre_layer PRE_LAYER [PRE_LAYER ...]` | The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models. For multi-gpu, write the numbers separated by spaces, eg `--pre_layer 30 60`. |
| `--checkpoint CHECKPOINT` | The path to the quantized checkpoint file. If not specified, it will be automatically detected. |
| `--monkey-patch` | Apply the monkey patch for using LoRAs with quantized models.
#### DeepSpeed
| Flag | Description |
|---------------------------------------|-------------|
| `--deepspeed` | Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. |
| `--nvme-offload-dir NVME_OFFLOAD_DIR` | DeepSpeed: Directory to use for ZeRO-3 NVME offloading. |
| `--local_rank LOCAL_RANK` | DeepSpeed: Optional argument for distributed setups. |
#### RWKV
| Flag | Description |
|---------------------------------|-------------|
| `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
| `--rwkv-cuda-on` | RWKV: Compile the CUDA kernel for better performance. |
#### RoPE (for llama.cpp, ExLlama, ExLlamaV2, and transformers)
| Flag | Description |
|------------------|-------------|
| `--alpha_value ALPHA_VALUE` | Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both. |
| `--rope_freq_base ROPE_FREQ_BASE` | If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63). |
| `--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale. |
#### Gradio
| Flag | Description |
|---------------------------------------|-------------|
| `--listen` | Make the web UI reachable from your local network. |
| `--listen-host LISTEN_HOST` | The hostname that the server will use. |
| `--listen-port LISTEN_PORT` | The listening port that the server will use. |
| `--share` | Create a public URL. This is useful for running the web UI on Google Colab or similar. |
| `--auto-launch` | Open the web UI in the default browser upon launch. |
| `--gradio-auth USER:PWD` | set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3" |
| `--gradio-auth-path GRADIO_AUTH_PATH` | Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3" |
| `--ssl-keyfile SSL_KEYFILE` | The path to the SSL certificate key file. |
| `--ssl-certfile SSL_CERTFILE` | The path to the SSL certificate cert file. |
#### API
| Flag | Description |
|---------------------------------------|-------------|
| `--api` | Enable the API extension. |
| `--public-api` | Create a public URL for the API using Cloudfare. |
| `--public-api-id PUBLIC_API_ID` | Tunnel ID for named Cloudflare Tunnel. Use together with public-api option. |
| `--api-blocking-port BLOCKING_PORT` | The listening port for the blocking API. |
| `--api-streaming-port STREAMING_PORT` | The listening port for the streaming API. |
#### Multimodal
| Flag | Description |
|---------------------------------------|-------------|
| `--multimodal-pipeline PIPELINE` | The multimodal pipeline to use. Examples: `llava-7b`, `llava-13b`. |
## Presets
Inference settings presets can be created under `presets/` as yaml files. These files are detected automatically at startup.
The presets that are included by default are the result of a contest that received 7215 votes. More details can be found [here](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md).
## Contributing
If you would like to contribute to the project, check out the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines).
## Community ## Community
https://www.reddit.com/r/Oobabooga/ * Subreddit: https://www.reddit.com/r/oobabooga/
* Discord: https://discord.gg/jwZCF2dPQN
## Acknowledgments ## Acknowledgment
- In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition. In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition, which will allow me to dedicate more time towards realizing the full potential of text-generation-webui.
- This project was inspired by [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) and wouldn't exist without it.

View file

@ -0,0 +1,112 @@
import asyncio
import html
import json
import sys
try:
import websockets
except ImportError:
print("Websockets package not found. Make sure it's installed.")
# For local streaming, the websockets are hosted without ssl - ws://
HOST = 'localhost:5005'
URI = f'ws://{HOST}/api/v1/chat-stream'
# For reverse-proxied streaming, the remote will likely host with ssl - wss://
# URI = 'wss://your-uri-here.trycloudflare.com/api/v1/stream'
async def run(user_input, history):
# Note: the selected defaults change from time to time.
request = {
'user_input': user_input,
'max_new_tokens': 250,
'auto_max_new_tokens': False,
'max_tokens_second': 0,
'history': history,
'mode': 'instruct', # Valid options: 'chat', 'chat-instruct', 'instruct'
'character': 'Example',
'instruction_template': 'Vicuna-v1.1', # Will get autodetected if unset
'your_name': 'You',
# 'name1': 'name of user', # Optional
# 'name2': 'name of character', # Optional
# 'context': 'character context', # Optional
# 'greeting': 'greeting', # Optional
# 'name1_instruct': 'You', # Optional
# 'name2_instruct': 'Assistant', # Optional
# 'context_instruct': 'context_instruct', # Optional
# 'turn_template': 'turn_template', # Optional
'regenerate': False,
'_continue': False,
'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
# Generation params. If 'preset' is set to different than 'None', the values
# in presets/preset-name.yaml are used instead of the individual numbers.
'preset': 'None',
'do_sample': True,
'temperature': 0.7,
'top_p': 0.1,
'typical_p': 1,
'epsilon_cutoff': 0, # In units of 1e-4
'eta_cutoff': 0, # In units of 1e-4
'tfs': 1,
'top_a': 0,
'repetition_penalty': 1.18,
'repetition_penalty_range': 0,
'top_k': 40,
'min_length': 0,
'no_repeat_ngram_size': 0,
'num_beams': 1,
'penalty_alpha': 0,
'length_penalty': 1,
'early_stopping': False,
'mirostat_mode': 0,
'mirostat_tau': 5,
'mirostat_eta': 0.1,
'grammar_string': '',
'guidance_scale': 1,
'negative_prompt': '',
'seed': -1,
'add_bos_token': True,
'truncation_length': 2048,
'ban_eos_token': False,
'custom_token_bans': '',
'skip_special_tokens': True,
'stopping_strings': []
}
async with websockets.connect(URI, ping_interval=None) as websocket:
await websocket.send(json.dumps(request))
while True:
incoming_data = await websocket.recv()
incoming_data = json.loads(incoming_data)
match incoming_data['event']:
case 'text_stream':
yield incoming_data['history']
case 'stream_end':
return
async def print_response_stream(user_input, history):
cur_len = 0
async for new_history in run(user_input, history):
cur_message = new_history['visible'][-1][1][cur_len:]
cur_len += len(cur_message)
print(html.unescape(cur_message), end='')
sys.stdout.flush() # If we don't flush, we won't see tokens in realtime.
if __name__ == '__main__':
user_input = "Please give me a step-by-step guide on how to plant a tree in my backyard."
# Basic example
history = {'internal': [], 'visible': []}
# "Continue" example. Make sure to set '_continue' to True above
# arr = [user_input, 'Surely, here is']
# history = {'internal': [arr], 'visible': [arr]}
asyncio.run(print_response_stream(user_input, history))

View file

@ -0,0 +1,92 @@
import html
import json
import requests
# For local streaming, the websockets are hosted without ssl - http://
HOST = 'localhost:5000'
URI = f'http://{HOST}/api/v1/chat'
# For reverse-proxied streaming, the remote will likely host with ssl - https://
# URI = 'https://your-uri-here.trycloudflare.com/api/v1/chat'
def run(user_input, history):
request = {
'user_input': user_input,
'max_new_tokens': 250,
'auto_max_new_tokens': False,
'max_tokens_second': 0,
'history': history,
'mode': 'instruct', # Valid options: 'chat', 'chat-instruct', 'instruct'
'character': 'Example',
'instruction_template': 'Vicuna-v1.1', # Will get autodetected if unset
'your_name': 'You',
# 'name1': 'name of user', # Optional
# 'name2': 'name of character', # Optional
# 'context': 'character context', # Optional
# 'greeting': 'greeting', # Optional
# 'name1_instruct': 'You', # Optional
# 'name2_instruct': 'Assistant', # Optional
# 'context_instruct': 'context_instruct', # Optional
# 'turn_template': 'turn_template', # Optional
'regenerate': False,
'_continue': False,
'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
# Generation params. If 'preset' is set to different than 'None', the values
# in presets/preset-name.yaml are used instead of the individual numbers.
'preset': 'None',
'do_sample': True,
'temperature': 0.7,
'top_p': 0.1,
'typical_p': 1,
'epsilon_cutoff': 0, # In units of 1e-4
'eta_cutoff': 0, # In units of 1e-4
'tfs': 1,
'top_a': 0,
'repetition_penalty': 1.18,
'repetition_penalty_range': 0,
'top_k': 40,
'min_length': 0,
'no_repeat_ngram_size': 0,
'num_beams': 1,
'penalty_alpha': 0,
'length_penalty': 1,
'early_stopping': False,
'mirostat_mode': 0,
'mirostat_tau': 5,
'mirostat_eta': 0.1,
'grammar_string': '',
'guidance_scale': 1,
'negative_prompt': '',
'seed': -1,
'add_bos_token': True,
'truncation_length': 2048,
'ban_eos_token': False,
'custom_token_bans': '',
'skip_special_tokens': True,
'stopping_strings': []
}
response = requests.post(URI, json=request)
if response.status_code == 200:
result = response.json()['results'][0]['history']
print(json.dumps(result, indent=4))
print()
print(html.unescape(result['visible'][-1][1]))
if __name__ == '__main__':
user_input = "Please give me a step-by-step guide on how to plant a tree in my backyard."
# Basic example
history = {'internal': [], 'visible': []}
# "Continue" example. Make sure to set '_continue' to True above
# arr = [user_input, 'Surely, here is']
# history = {'internal': [arr], 'visible': [arr]}
run(user_input, history)

View file

@ -0,0 +1,176 @@
#!/usr/bin/env python3
import requests
HOST = '0.0.0.0:5000'
def generate(prompt, tokens=200):
request = {'prompt': prompt, 'max_new_tokens': tokens}
response = requests.post(f'http://{HOST}/api/v1/generate', json=request)
if response.status_code == 200:
return response.json()['results'][0]['text']
def model_api(request):
response = requests.post(f'http://{HOST}/api/v1/model', json=request)
return response.json()
# print some common settings
def print_basic_model_info(response):
basic_settings = ['truncation_length', 'instruction_template']
print("Model: ", response['result']['model_name'])
print("Lora(s): ", response['result']['lora_names'])
for setting in basic_settings:
print(setting, "=", response['result']['shared.settings'][setting])
# model info
def model_info():
response = model_api({'action': 'info'})
print_basic_model_info(response)
# simple loader
def model_load(model_name):
return model_api({'action': 'load', 'model_name': model_name})
# complex loader
def complex_model_load(model):
def guess_groupsize(model_name):
if '1024g' in model_name:
return 1024
elif '128g' in model_name:
return 128
elif '32g' in model_name:
return 32
else:
return -1
req = {
'action': 'load',
'model_name': model,
'args': {
'loader': 'AutoGPTQ',
'bf16': False,
'load_in_8bit': False,
'groupsize': 0,
'wbits': 0,
# llama.cpp
'threads': 0,
'n_batch': 512,
'no_mmap': False,
'mlock': False,
'cache_capacity': None,
'n_gpu_layers': 0,
'n_ctx': 2048,
# RWKV
'rwkv_strategy': None,
'rwkv_cuda_on': False,
# b&b 4-bit
# 'load_in_4bit': False,
# 'compute_dtype': 'float16',
# 'quant_type': 'nf4',
# 'use_double_quant': False,
# "cpu": false,
# "auto_devices": false,
# "gpu_memory": null,
# "cpu_memory": null,
# "disk": false,
# "disk_cache_dir": "cache",
},
}
model = model.lower()
if '4bit' in model or 'gptq' in model or 'int4' in model:
req['args']['wbits'] = 4
req['args']['groupsize'] = guess_groupsize(model)
elif '3bit' in model:
req['args']['wbits'] = 3
req['args']['groupsize'] = guess_groupsize(model)
else:
req['args']['gptq_for_llama'] = False
if '8bit' in model:
req['args']['load_in_8bit'] = True
elif '-hf' in model or 'fp16' in model:
if '7b' in model:
req['args']['bf16'] = True # for 24GB
elif '13b' in model:
req['args']['load_in_8bit'] = True # for 24GB
elif 'gguf' in model:
# req['args']['threads'] = 16
if '7b' in model:
req['args']['n_gpu_layers'] = 100
elif '13b' in model:
req['args']['n_gpu_layers'] = 100
elif '30b' in model or '33b' in model:
req['args']['n_gpu_layers'] = 59 # 24GB
elif '65b' in model:
req['args']['n_gpu_layers'] = 42 # 24GB
elif 'rwkv' in model:
req['args']['rwkv_cuda_on'] = True
if '14b' in model:
req['args']['rwkv_strategy'] = 'cuda f16i8' # 24GB
else:
req['args']['rwkv_strategy'] = 'cuda f16' # 24GB
return model_api(req)
if __name__ == '__main__':
for model in model_api({'action': 'list'})['result']:
try:
resp = complex_model_load(model)
if 'error' in resp:
print(f"{model} FAIL Error: {resp['error']['message']}")
continue
else:
print_basic_model_info(resp)
ans = generate("0,1,1,2,3,5,8,13,", tokens=2)
if '21' in ans:
print(f"{model} PASS ({ans})")
else:
print(f"{model} FAIL ({ans})")
except Exception as e:
print(f"{model} FAIL Exception: {repr(e)}")
# 0,1,1,2,3,5,8,13, is the fibonacci sequence, the next number is 21.
# Some results below.
""" $ ./model-api-example.py
Model: 4bit_gpt4-x-alpaca-13b-native-4bit-128g-cuda
Lora(s): []
truncation_length = 2048
instruction_template = Alpaca
4bit_gpt4-x-alpaca-13b-native-4bit-128g-cuda PASS (21)
Model: 4bit_WizardLM-13B-Uncensored-4bit-128g
Lora(s): []
truncation_length = 2048
instruction_template = WizardLM
4bit_WizardLM-13B-Uncensored-4bit-128g PASS (21)
Model: Aeala_VicUnlocked-alpaca-30b-4bit
Lora(s): []
truncation_length = 2048
instruction_template = Alpaca
Aeala_VicUnlocked-alpaca-30b-4bit PASS (21)
Model: alpaca-30b-4bit
Lora(s): []
truncation_length = 2048
instruction_template = Alpaca
alpaca-30b-4bit PASS (21)
"""

View file

@ -0,0 +1,86 @@
import asyncio
import json
import sys
try:
import websockets
except ImportError:
print("Websockets package not found. Make sure it's installed.")
# For local streaming, the websockets are hosted without ssl - ws://
HOST = 'localhost:5005'
URI = f'ws://{HOST}/api/v1/stream'
# For reverse-proxied streaming, the remote will likely host with ssl - wss://
# URI = 'wss://your-uri-here.trycloudflare.com/api/v1/stream'
async def run(context):
# Note: the selected defaults change from time to time.
request = {
'prompt': context,
'max_new_tokens': 250,
'auto_max_new_tokens': False,
'max_tokens_second': 0,
# Generation params. If 'preset' is set to different than 'None', the values
# in presets/preset-name.yaml are used instead of the individual numbers.
'preset': 'None',
'do_sample': True,
'temperature': 0.7,
'top_p': 0.1,
'typical_p': 1,
'epsilon_cutoff': 0, # In units of 1e-4
'eta_cutoff': 0, # In units of 1e-4
'tfs': 1,
'top_a': 0,
'repetition_penalty': 1.18,
'repetition_penalty_range': 0,
'top_k': 40,
'min_length': 0,
'no_repeat_ngram_size': 0,
'num_beams': 1,
'penalty_alpha': 0,
'length_penalty': 1,
'early_stopping': False,
'mirostat_mode': 0,
'mirostat_tau': 5,
'mirostat_eta': 0.1,
'grammar_string': '',
'guidance_scale': 1,
'negative_prompt': '',
'seed': -1,
'add_bos_token': True,
'truncation_length': 2048,
'ban_eos_token': False,
'custom_token_bans': '',
'skip_special_tokens': True,
'stopping_strings': []
}
async with websockets.connect(URI, ping_interval=None) as websocket:
await websocket.send(json.dumps(request))
yield context # Remove this if you just want to see the reply
while True:
incoming_data = await websocket.recv()
incoming_data = json.loads(incoming_data)
match incoming_data['event']:
case 'text_stream':
yield incoming_data['text']
case 'stream_end':
return
async def print_response_stream(prompt):
async for response in run(prompt):
print(response, end='')
sys.stdout.flush() # If we don't flush, we won't see tokens in realtime.
if __name__ == '__main__':
prompt = "In order to make homemade bread, follow these steps:\n1)"
asyncio.run(print_response_stream(prompt))

View file

@ -0,0 +1,63 @@
import requests
# For local streaming, the websockets are hosted without ssl - http://
HOST = 'localhost:5000'
URI = f'http://{HOST}/api/v1/generate'
# For reverse-proxied streaming, the remote will likely host with ssl - https://
# URI = 'https://your-uri-here.trycloudflare.com/api/v1/generate'
def run(prompt):
request = {
'prompt': prompt,
'max_new_tokens': 250,
'auto_max_new_tokens': False,
'max_tokens_second': 0,
# Generation params. If 'preset' is set to different than 'None', the values
# in presets/preset-name.yaml are used instead of the individual numbers.
'preset': 'None',
'do_sample': True,
'temperature': 0.7,
'top_p': 0.1,
'typical_p': 1,
'epsilon_cutoff': 0, # In units of 1e-4
'eta_cutoff': 0, # In units of 1e-4
'tfs': 1,
'top_a': 0,
'repetition_penalty': 1.18,
'repetition_penalty_range': 0,
'top_k': 40,
'min_length': 0,
'no_repeat_ngram_size': 0,
'num_beams': 1,
'penalty_alpha': 0,
'length_penalty': 1,
'early_stopping': False,
'mirostat_mode': 0,
'mirostat_tau': 5,
'mirostat_eta': 0.1,
'grammar_string': '',
'guidance_scale': 1,
'negative_prompt': '',
'seed': -1,
'add_bos_token': True,
'truncation_length': 2048,
'ban_eos_token': False,
'custom_token_bans': '',
'skip_special_tokens': True,
'stopping_strings': []
}
response = requests.post(URI, json=request)
if response.status_code == 200:
result = response.json()['results'][0]['text']
print(prompt + result)
if __name__ == '__main__':
prompt = "In order to make homemade bread, follow these steps:\n1)"
run(prompt)

View file

Before

Width:  |  Height:  |  Size: 206 KiB

After

Width:  |  Height:  |  Size: 206 KiB

View file

@ -1,8 +1,8 @@
#!/usr/bin/env bash #!/bin/bash
cd "$(dirname "${BASH_SOURCE[0]}")" cd "$(dirname "${BASH_SOURCE[0]}")"
if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
# deactivate existing conda envs as needed to avoid conflicts # deactivate existing conda envs as needed to avoid conflicts
{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null

View file

@ -2,7 +2,7 @@
cd "$(dirname "${BASH_SOURCE[0]}")" cd "$(dirname "${BASH_SOURCE[0]}")"
if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
# deactivate existing conda envs as needed to avoid conflicts # deactivate existing conda envs as needed to avoid conflicts
{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null

View file

@ -4,7 +4,7 @@ cd /D "%~dp0"
set PATH=%PATH%;%SystemRoot%\system32 set PATH=%PATH%;%SystemRoot%\system32
echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniforge which can not be silently installed under a path with spaces. && goto end echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
@rem fix failed install when installing to a separate drive @rem fix failed install when installing to a separate drive
set TMP=%cd%\installer_files set TMP=%cd%\installer_files
@ -21,12 +21,11 @@ set INSTALL_ENV_DIR=%cd%\installer_files\env
set PYTHONNOUSERSITE=1 set PYTHONNOUSERSITE=1
set PYTHONPATH= set PYTHONPATH=
set PYTHONHOME= set PYTHONHOME=
set PYTHONUTF8=1
set "CUDA_PATH=%INSTALL_ENV_DIR%" set "CUDA_PATH=%INSTALL_ENV_DIR%"
set "CUDA_HOME=%CUDA_PATH%" set "CUDA_HOME=%CUDA_PATH%"
@rem activate installer env @rem activate installer env
call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniforge hook not found. && goto end ) call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
@rem enter commands @rem enter commands
cmd /k "%*" cmd /k "%*"

11
cmd_wsl.bat Executable file
View file

@ -0,0 +1,11 @@
@echo off
cd /D "%~dp0"
set PATH=%PATH%;%SystemRoot%\system32
@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script
call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh cmd"
:end
pause

38
convert-to-safetensors.py Normal file
View file

@ -0,0 +1,38 @@
'''
Converts a transformers model to safetensors format and shards it.
This makes it faster to load (because of safetensors) and lowers its RAM usage
while loading (because of sharding).
Based on the original script by 81300:
https://gist.github.com/81300/fe5b08bff1cba45296a829b9d6b0f303
'''
import argparse
from pathlib import Path
import torch
from transformers import AutoModelForCausalLM, AutoTokenizer
parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=54))
parser.add_argument('MODEL', type=str, default=None, nargs='?', help="Path to the input model.")
parser.add_argument('--output', type=str, default=None, help='Path to the output folder (default: models/{model_name}_safetensors).')
parser.add_argument("--max-shard-size", type=str, default="2GB", help="Maximum size of a shard in GB or MB (default: %(default)s).")
parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
args = parser.parse_args()
if __name__ == '__main__':
path = Path(args.MODEL)
model_name = path.name
print(f"Loading {model_name}...")
model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if args.bf16 else torch.float16)
tokenizer = AutoTokenizer.from_pretrained(path)
out_folder = args.output or Path(f"models/{model_name}_safetensors")
print(f"Saving the converted model to {out_folder} with a maximum shard size of {args.max_shard_size}...")
model.save_pretrained(out_folder, max_shard_size=args.max_shard_size, safe_serialization=True)
tokenizer.save_pretrained(out_folder)

View file

@ -1,129 +0,0 @@
.message {
display: grid;
align-items: start;
grid-template-columns: 60px minmax(0, 1fr);
width: min(100%, calc(724px + 60px));
padding-bottom: 22px;
padding-top: 6px;
font-size: 18px;
font-family: Roboto, Arial, sans-serif; /* Modern font */
line-height: 1.5;
}
.circle-you,
.circle-bot {
background-color: #2b2b2b; /* Darker background for circles */
border-radius: 50%; /* Perfect circle */
border: 1px solid #4a90e2; /* Soft blue border */
box-shadow: 0 4px 8px rgb(0 0 0 / 50%); /* Soft shadow for depth */
}
.circle-bot img,
.circle-you img {
border-radius: 50%; /* Make images circular */
width: 100%;
height: 100%;
object-fit: cover;
}
.circle-you, .circle-bot {
width: 64px; /* Smaller size for modern look */
height: 64px;
}
.text {
padding-left: 12px; /* Reduced padding for a cleaner layout */
color: #f0f0f0; /* Light text color for readability */
}
.text p {
margin-top: 2px;
}
.username {
padding-left: 10px;
font-size: 20px;
font-weight: bold;
color: #e0e0e0; /* Light gray text */
transition: color 0.3s ease; /* Smooth color transition */
}
.username:hover {
color: #4a90e2; /* Blue color on hover */
}
.message-body {
position: relative;
border: 1px solid rgb(255 255 255 / 10%); /* Soft white border */
border-radius: 8px; /* Slightly rounded corners */
padding: 15px;
background: #1e1e1e; /* Dark background */
box-shadow: 0 4px 10px rgb(0 0 0 / 30%); /* Subtle shadow for depth */
transition: background 0.3s ease; /* Smooth transition for background */
}
.message-body:hover {
background: #252525; /* Slightly lighter on hover */
}
/* Adds 2 extra lines at the top and bottom of the message */
.message-body::before,
.message-body::after {
content: "";
position: absolute;
left: 10px;
right: 10px;
height: 1px;
background-color: rgb(255 255 255 / 5%); /* Faded lines for subtle separation */
}
.message-body::before {
top: 4px;
}
.message-body::after {
bottom: 4px;
}
.message-body img {
max-width: 300px;
max-height: 300px;
border-radius: 10px; /* Rounded corners for images */
}
.message-body p {
color: #e0e0e0 !important; /* Light color for text */
}
.message-body p em {
color: #a6a6a6 !important; /* Softer gray for emphasized text */
}
@media screen and (width <= 688px) {
.message {
display: grid;
align-items: start;
grid-template-columns: 60px minmax(0, 1fr);
padding-bottom: 25px;
font-size: 15px;
font-family: Roboto, Arial, sans-serif; /* Modern font */
line-height: 1.5;
}
.circle-you, .circle-bot {
width: 40px; /* Smaller size for mobile */
height: 40px;
}
.text {
padding-left: 10px; /* Reduced padding for mobile */
}
.message-body p {
font-size: 14px !important;
}
.username {
font-size: 18px; /* Smaller username for mobile */
}
}

View file

@ -2,11 +2,8 @@
.message { .message {
display: grid; display: grid;
align-items: start;
grid-template-columns: 60px minmax(0, 1fr); grid-template-columns: 60px minmax(0, 1fr);
width: min(100%, calc(724px + 60px + 90px)); padding-bottom: 28px;
padding-bottom: 21px;
padding-top: 7px;
font-size: 18px; font-size: 18px;
font-family: 'Noto Sans', Arial, sans-serif; font-family: 'Noto Sans', Arial, sans-serif;
line-height: 1.428571429; line-height: 1.428571429;
@ -28,15 +25,15 @@
} }
.circle-you, .circle-bot { .circle-you, .circle-bot {
/* You can set the size of the profile images here, but if you do, you have to also adjust the .text{padding-left: 90px} to a different number according to the width of the image which is right below here */ /*You can set the size of the profile images here, but if you do, you have to also adjust the .text{padding-left: 90px} to a different number according to the width of the image which is right below here*/
width: 135px; width: 135px;
height: 175px; height: 175px;
} }
.text { .text {
/* Change this to move the message box further left or right depending on the size of your profile pic */ /*Change this to move the message box further left or right depending on the size of your profile pic*/
padding-left: 90px; padding-left: 90px;
text-shadow: 2px 2px 2px rgb(0 0 0 / 40%); text-shadow: 2px 2px 2px rgb(0, 0, 0, 0.4);
} }
.text p { .text p {
@ -47,37 +44,37 @@
padding-left: 10px; padding-left: 10px;
font-size: 22px; font-size: 22px;
font-weight: bold; font-weight: bold;
border-top: 1px solid rgb(51 64 90); border-top: 1px solid rgb(51, 64, 90);
padding: 3px; padding: 3px;
} }
.message-body { .message-body {
position: relative; position: relative;
border: 1px solid rgb(255 255 255 / 45.9%); border-radius: 1rem;
border: 1px solid rgba(255, 255, 255, 0.459);
border-radius: 10px; border-radius: 10px;
padding: 10px; padding: 10px;
padding-top: 5px; padding-top: 5px;
/*Message gradient background color - remove the line bellow if you don't want a background color or gradient*/
/* Message gradient background color - remove the line bellow if you don't want a background color or gradient */
background: linear-gradient(to bottom, #171730, #1b263f); background: linear-gradient(to bottom, #171730, #1b263f);
} }
/* Adds 2 extra lines at the top and bottom of the message */ /*Adds 2 extra lines at the top and bottom of the message*/
.message-body::before, .message-body:before,
.message-body::after { .message-body:after {
content: ""; content: "";
position: absolute; position: absolute;
left: 10px; left: 10px;
right: 10px; right: 10px;
height: 1px; height: 1px;
background-color: rgb(255 255 255 / 13%); background-color: rgba(255, 255, 255, 0.13);
} }
.message-body::before { .message-body:before {
top: 6px; top: 6px;
} }
.message-body::after { .message-body:after {
bottom: 6px; bottom: 6px;
} }
@ -87,21 +84,21 @@
border-radius: 20px; border-radius: 20px;
} }
.message-body p, .message-body li { .message-body p {
margin-bottom: 0 !important;
font-size: 18px !important; font-size: 18px !important;
color: rgb(243 244 246) !important; line-height: 1.428571429 !important;
text-shadow: 2px 2px 2px rgb(0 0 0); color: rgb(243, 244, 246) !important;
font-weight: 500; text-shadow: 2px 2px 2px rgb(0, 0, 0);
} }
.message-body p em { .message-body p em {
color: rgb(138 138 138) !important; color: rgb(138, 138, 138) !important;
} }
@media screen and (width <= 688px) { @media screen and (max-width: 688px) {
.message { .message {
display: grid; display: grid;
align-items: start;
grid-template-columns: 60px minmax(0, 1fr); grid-template-columns: 60px minmax(0, 1fr);
padding-bottom: 25px; padding-bottom: 25px;
font-size: 15px; font-size: 15px;
@ -123,10 +120,10 @@
} }
.text { .text {
padding-left: 0; padding-left: 0px;
} }
.message-body p, .message-body li { .message-body p {
font-size: 16px !important; font-size: 16px !important;
} }

View file

@ -1,23 +1,21 @@
@import url("file/css/chat_style-cai-chat.css"); @import url("file/css/chat_style-cai-chat.css");
.circle-bot, .circle-you { .circle-bot, .circle-you {
height: 90px; height: 90px;
width: 60px; width: 60px;
border-radius: 10px; border-radius: 10px;
background-color: #656565; background-color: #656565;
} }
.circle-bot img, .circle-you img { .circle-bot img, .circle-you img {
border-radius: 8.333px; border-radius: 8.333px;
} }
.circle-you { .circle-you {
background-color: #656565; background-color: #656565;
} }
.message { .message {
padding-bottom: 1.5em; padding-bottom: 30px;
padding-top: 0.5em; grid-template-columns: 70px minmax(0, 1fr);
grid-template-columns: 70px minmax(0, 1fr);
width: min(100%, calc(724px + 70px));
} }

View file

@ -1,31 +1,23 @@
.message { .message {
display: grid; display: grid;
align-items: start;
grid-template-columns: 60px minmax(0, 1fr); grid-template-columns: 60px minmax(0, 1fr);
width: min(100%, calc(724px + 60px)); padding-bottom: 25px;
padding-bottom: 1.5em;
padding-top: 0.5em;
font-size: 15px; font-size: 15px;
font-family: 'Noto Sans', Helvetica, Arial, sans-serif; font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
line-height: 22.5px !important; line-height: 23px !important;
}
.message-body {
margin-top: 3px;
font-size: 15px !important;
} }
.circle-you { .circle-you {
width: 50px; width: 50px;
height: 50px; height: 50px;
background-color: rgb(238 78 59); background-color: rgb(238, 78, 59);
border-radius: 50%; border-radius: 50%;
} }
.circle-bot { .circle-bot {
width: 50px; width: 50px;
height: 50px; height: 50px;
background-color: rgb(59 78 244); background-color: rgb(59, 78, 244);
border-radius: 50%; border-radius: 50%;
} }
@ -37,6 +29,10 @@
object-fit: cover; object-fit: cover;
} }
.text p {
margin-top: 5px;
}
.username { .username {
font-weight: bold; font-weight: bold;
} }
@ -47,15 +43,17 @@
border-radius: 20px; border-radius: 20px;
} }
.message-body p, .message-body li { .message-body p {
font-weight: 500; margin-bottom: 0 !important;
font-size: 15px !important;
line-height: 23px !important;
} }
.dark .message-body p em { .dark .message-body p em {
color: rgb(138 138 138) !important; color: rgb(138, 138, 138) !important;
} }
.message-body p em { .message-body p em {
color: rgb(110 110 110) !important; color: rgb(110, 110, 110) !important;
font-weight: 500; font-weight: 500;
} }

View file

@ -1,7 +1,5 @@
.message { .message {
width: min(100%, calc(724px + 60px)); padding-bottom: 25px;
padding-bottom: 22px;
padding-top: 3px;
font-size: 15px; font-size: 15px;
font-family: 'Noto Sans', Helvetica, Arial, sans-serif; font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
line-height: 1.428571429; line-height: 1.428571429;
@ -10,14 +8,14 @@
.circle-you { .circle-you {
width: 50px; width: 50px;
height: 50px; height: 50px;
background-color: rgb(238 78 59); background-color: rgb(238, 78, 59);
border-radius: 50%; border-radius: 50%;
} }
.circle-bot { .circle-bot {
width: 50px; width: 50px;
height: 50px; height: 50px;
background-color: rgb(59 78 244); background-color: rgb(59, 78, 244);
border-radius: 50%; border-radius: 50%;
float: left; float: left;
margin-right: 10px; margin-right: 10px;
@ -49,7 +47,7 @@
.circle-you + .text { .circle-you + .text {
float: right; float: right;
background-color: rgb(0 132 255); background-color: rgb(0, 132, 255);
margin-right: 10px; margin-right: 10px;
} }
@ -61,10 +59,8 @@
text-align: right; text-align: right;
} }
.dark .circle-bot + .text div, .dark .circle-bot + .text *, .dark .circle-bot + .text div, .dark .circle-bot + .text * {
.dark .chat .message .circle-bot + .text .message-body :is(h1, h2, h3, h4, h5, h6), color: #000;
.dark .chat .message .circle-bot + .text .message-body a {
color: #000 !important;
} }
.text { .text {
@ -79,29 +75,25 @@
font-weight: bold; font-weight: bold;
} }
.message-body {
}
.message-body img { .message-body img {
max-width: 300px; max-width: 300px;
max-height: 300px; max-height: 300px;
border-radius: 20px; border-radius: 20px;
} }
.message-body p, .message-body li { .message-body p {
margin-bottom: 0 !important;
font-size: 15px !important; font-size: 15px !important;
font-weight: 500; line-height: 1.428571429 !important;
} }
.dark .message-body p em { .dark .message-body p em {
color: rgb(138 138 138) !important; color: rgb(138, 138, 138) !important;
} }
.message-body p em { .message-body p em {
color: rgb(110 110 110) !important; color: rgb(110, 110, 110) !important;
}
.editing-textarea {
width: max(30rem) !important;
}
.circle-you + .text .edit-control-button, .circle-you + .text .editing-textarea {
color: #000 !important;
} }

View file

@ -1,97 +1,55 @@
.message { .message {
display: block; padding-bottom: 25px;
width: min(100%, 724px);
padding-top: 0;
padding-bottom: 21px;
font-size: 15px; font-size: 15px;
font-family: 'Noto Sans', Helvetica, Arial, sans-serif; font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
line-height: 1.428571429; line-height: 1.428571429;
grid-template-columns: none;
} }
.circle-you, .circle-bot { .text-you {
display: none;
}
.text {
max-width: 65%;
border-radius: 18px;
padding: 12px 16px;
margin-bottom: 8px;
clear: both;
box-shadow: 0 1px 2px rgb(0 0 0 / 10%);
}
.username {
font-weight: 600;
margin-bottom: 8px;
opacity: 0.65;
padding-left: 0;
}
/* User messages - right aligned, WhatsApp green */
.circle-you + .text {
background-color: #d9fdd3; background-color: #d9fdd3;
border-radius: 15px;
padding: 10px;
padding-top: 5px;
float: right; float: right;
margin-left: auto;
margin-right: 8px;
} }
.circle-you + .text .username { .text-bot {
display: none; background-color: #f2f2f2;
border-radius: 15px;
padding: 10px;
padding-top: 5px;
} }
/* Bot messages - left aligned, white */ .dark .text-you {
.circle-bot + .text { background-color: #005c4b;
background-color: #fff; color: #111b21;
float: left;
margin-right: auto;
margin-left: 8px;
border: 1px solid #e5e5e5;
} }
.circle-bot + .text .message-actions { .dark .text-bot {
bottom: -25px !important; background-color: #1f2937;
color: #111b21;
} }
/* Dark theme colors */ .text-bot p, .text-you p {
.dark .circle-you + .text { margin-top: 5px;
background-color: #144d37;
color: #e4e6ea;
box-shadow: 0 1px 2px rgb(0 0 0 / 30%);
}
.dark .circle-bot + .text {
background-color: #202c33;
color: #e4e6ea;
border: 1px solid #3c4043;
box-shadow: 0 1px 2px rgb(0 0 0 / 30%);
}
.dark .username {
opacity: 0.7;
} }
.message-body img { .message-body img {
max-width: 300px; max-width: 300px;
max-height: 300px; max-height: 300px;
border-radius: 12px; border-radius: 20px;
} }
.message-body p, .message-body li { .message-body p {
margin-bottom: 0 !important;
font-size: 15px !important; font-size: 15px !important;
line-height: 1.428571429 !important;
} }
.dark .message-body p em { .dark .message-body p em {
color: rgb(170 170 170) !important; color: rgb(138, 138, 138) !important;
} }
.message-body p em { .message-body p em {
color: rgb(100 100 100) !important; color: rgb(110, 110, 110) !important;
} }
/* Message actions positioning */
.message-actions {
margin-top: 8px;
}

View file

@ -1,111 +0,0 @@
html body gradio-app .gradio-container pre code.hljs {
display: block;
overflow-x: auto;
padding: 1em
}
html body gradio-app .gradio-container code.hljs {
padding: 3px 5px
}
/*!
Theme: GitHub Dark
Description: Dark theme as seen on github.com
Author: github.com
Maintainer: @Hirse
Updated: 2021-05-15
Outdated base version: https://github.com/primer/github-syntax-dark
Current colors taken from GitHub's CSS
*/
html body gradio-app .gradio-container .hljs {
color: #c9d1d9;
background: #0d1117
}
html body gradio-app .gradio-container .hljs-doctag,
html body gradio-app .gradio-container .hljs-keyword,
html body gradio-app .gradio-container .hljs-meta .hljs-keyword,
html body gradio-app .gradio-container .hljs-template-tag,
html body gradio-app .gradio-container .hljs-template-variable,
html body gradio-app .gradio-container .hljs-type,
html body gradio-app .gradio-container .hljs-variable.language_ {
color: #ff7b72
}
html body gradio-app .gradio-container .hljs-title,
html body gradio-app .gradio-container .hljs-title.class_,
html body gradio-app .gradio-container .hljs-title.class_.inherited__,
html body gradio-app .gradio-container .hljs-title.function_ {
color: #d2a8ff
}
html body gradio-app .gradio-container .hljs-attr,
html body gradio-app .gradio-container .hljs-attribute,
html body gradio-app .gradio-container .hljs-literal,
html body gradio-app .gradio-container .hljs-meta,
html body gradio-app .gradio-container .hljs-number,
html body gradio-app .gradio-container .hljs-operator,
html body gradio-app .gradio-container .hljs-selector-attr,
html body gradio-app .gradio-container .hljs-selector-class,
html body gradio-app .gradio-container .hljs-selector-id,
html body gradio-app .gradio-container .hljs-variable {
color: #79c0ff
}
html body gradio-app .gradio-container .hljs-meta .hljs-string,
html body gradio-app .gradio-container .hljs-regexp,
html body gradio-app .gradio-container .hljs-string {
color: #a5d6ff
}
html body gradio-app .gradio-container .hljs-built_in,
html body gradio-app .gradio-container .hljs-symbol {
color: #ffa657
}
html body gradio-app .gradio-container .hljs-code,
html body gradio-app .gradio-container .hljs-comment,
html body gradio-app .gradio-container .hljs-formula {
color: #8b949e
}
html body gradio-app .gradio-container .hljs-name,
html body gradio-app .gradio-container .hljs-quote,
html body gradio-app .gradio-container .hljs-selector-pseudo,
html body gradio-app .gradio-container .hljs-selector-tag {
color: #7ee787
}
html body gradio-app .gradio-container .hljs-subst {
color: #c9d1d9
}
html body gradio-app .gradio-container .hljs-section {
color: #1f6feb;
font-weight: 700
}
html body gradio-app .gradio-container .hljs-bullet {
color: #f2cc60
}
html body gradio-app .gradio-container .hljs-emphasis {
color: #c9d1d9;
font-style: italic
}
html body gradio-app .gradio-container .hljs-strong {
color: #c9d1d9;
font-weight: 700
}
html body gradio-app .gradio-container .hljs-addition {
color: #aff5b4;
background-color: #033a16
}
html body gradio-app .gradio-container .hljs-deletion {
color: #ffdcd7;
background-color: #67060c
}

View file

@ -1,111 +0,0 @@
html body gradio-app .gradio-container pre code.hljs {
display: block;
overflow-x: auto;
padding: 1em
}
html body gradio-app .gradio-container code.hljs {
padding: 3px 5px
}
/*!
Theme: GitHub
Description: Light theme as seen on github.com
Author: github.com
Maintainer: @Hirse
Updated: 2021-05-15
Outdated base version: https://github.com/primer/github-syntax-light
Current colors taken from GitHub's CSS
*/
html body gradio-app .gradio-container .hljs {
color: #24292e;
background: #fff
}
html body gradio-app .gradio-container .hljs-doctag,
html body gradio-app .gradio-container .hljs-keyword,
html body gradio-app .gradio-container .hljs-meta .hljs-keyword,
html body gradio-app .gradio-container .hljs-template-tag,
html body gradio-app .gradio-container .hljs-template-variable,
html body gradio-app .gradio-container .hljs-type,
html body gradio-app .gradio-container .hljs-variable.language_ {
color: #d73a49
}
html body gradio-app .gradio-container .hljs-title,
html body gradio-app .gradio-container .hljs-title.class_,
html body gradio-app .gradio-container .hljs-title.class_.inherited__,
html body gradio-app .gradio-container .hljs-title.function_ {
color: #6f42c1
}
html body gradio-app .gradio-container .hljs-attr,
html body gradio-app .gradio-container .hljs-attribute,
html body gradio-app .gradio-container .hljs-literal,
html body gradio-app .gradio-container .hljs-meta,
html body gradio-app .gradio-container .hljs-number,
html body gradio-app .gradio-container .hljs-operator,
html body gradio-app .gradio-container .hljs-selector-attr,
html body gradio-app .gradio-container .hljs-selector-class,
html body gradio-app .gradio-container .hljs-selector-id,
html body gradio-app .gradio-container .hljs-variable {
color: #005cc5
}
html body gradio-app .gradio-container .hljs-meta .hljs-string,
html body gradio-app .gradio-container .hljs-regexp,
html body gradio-app .gradio-container .hljs-string {
color: #032f62
}
html body gradio-app .gradio-container .hljs-built_in,
html body gradio-app .gradio-container .hljs-symbol {
color: #e36209
}
html body gradio-app .gradio-container .hljs-code,
html body gradio-app .gradio-container .hljs-comment,
html body gradio-app .gradio-container .hljs-formula {
color: #6a737d
}
html body gradio-app .gradio-container .hljs-name,
html body gradio-app .gradio-container .hljs-quote,
html body gradio-app .gradio-container .hljs-selector-pseudo,
html body gradio-app .gradio-container .hljs-selector-tag {
color: #22863a
}
html body gradio-app .gradio-container .hljs-subst {
color: #24292e
}
html body gradio-app .gradio-container .hljs-section {
color: #005cc5;
font-weight: 700
}
html body gradio-app .gradio-container .hljs-bullet {
color: #735c0f
}
html body gradio-app .gradio-container .hljs-emphasis {
color: #24292e;
font-style: italic
}
html body gradio-app .gradio-container .hljs-strong {
color: #24292e;
font-weight: 700
}
html body gradio-app .gradio-container .hljs-addition {
color: #22863a;
background-color: #f0fff4
}
html body gradio-app .gradio-container .hljs-deletion {
color: #b31d28;
background-color: #ffeef0
}

View file

@ -1 +0,0 @@
.hljs-copy-wrapper{position:relative;overflow:hidden}.hljs-copy-wrapper:hover .hljs-copy-button,.hljs-copy-button:focus{transform:translateX(0)}.hljs-copy-button{position:absolute;transform:translateX(calc(100% + 1.125em));top:1em;right:1em;width:2rem;height:2rem;text-indent:-9999px;color:#fff;border-radius:.25rem;border:1px solid #ffffff22;background-color:#2d2b57;background-color:var(--hljs-theme-background);background-image:url('data:image/svg+xml;utf-8,<svg width="16" height="16" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" clip-rule="evenodd" d="M6 5C5.73478 5 5.48043 5.10536 5.29289 5.29289C5.10536 5.48043 5 5.73478 5 6V20C5 20.2652 5.10536 20.5196 5.29289 20.7071C5.48043 20.8946 5.73478 21 6 21H18C18.2652 21 18.5196 20.8946 18.7071 20.7071C18.8946 20.5196 19 20.2652 19 20V6C19 5.73478 18.8946 5.48043 18.7071 5.29289C18.5196 5.10536 18.2652 5 18 5H16C15.4477 5 15 4.55228 15 4C15 3.44772 15.4477 3 16 3H18C18.7956 3 19.5587 3.31607 20.1213 3.87868C20.6839 4.44129 21 5.20435 21 6V20C21 20.7957 20.6839 21.5587 20.1213 22.1213C19.5587 22.6839 18.7957 23 18 23H6C5.20435 23 4.44129 22.6839 3.87868 22.1213C3.31607 21.5587 3 20.7957 3 20V6C3 5.20435 3.31607 4.44129 3.87868 3.87868C4.44129 3.31607 5.20435 3 6 3H8C8.55228 3 9 3.44772 9 4C9 4.55228 8.55228 5 8 5H6Z" fill="white"/><path fill-rule="evenodd" clip-rule="evenodd" d="M7 3C7 1.89543 7.89543 1 9 1H15C16.1046 1 17 1.89543 17 3V5C17 6.10457 16.1046 7 15 7H9C7.89543 7 7 6.10457 7 5V3ZM15 3H9V5H15V3Z" fill="white"/></svg>');background-repeat:no-repeat;background-position:center;transition:background-color 200ms ease,transform 200ms ease-out}.hljs-copy-button:hover{border-color:#ffffff44}.hljs-copy-button:active{border-color:#ffffff66}.hljs-copy-button[data-copied="true"]{text-indent:0;width:auto;background-image:none}@media(prefers-reduced-motion){.hljs-copy-button{transition:none}}.hljs-copy-alert{clip:rect(0 0 0 0);clip-path:inset(50%);height:1px;overflow:hidden;position:absolute;white-space:nowrap;width:1px}

104
css/html_4chan_style.css Normal file
View file

@ -0,0 +1,104 @@
#parent #container {
background-color: #eef2ff;
padding: 17px;
}
#parent #container .reply {
background-color: rgb(214, 218, 240);
border-bottom-color: rgb(183, 197, 217);
border-bottom-style: solid;
border-bottom-width: 1px;
border-image-outset: 0;
border-image-repeat: stretch;
border-image-slice: 100%;
border-image-source: none;
border-image-width: 1;
border-left-color: rgb(0, 0, 0);
border-left-style: none;
border-left-width: 0px;
border-right-color: rgb(183, 197, 217);
border-right-style: solid;
border-right-width: 1px;
border-top-color: rgb(0, 0, 0);
border-top-style: none;
border-top-width: 0px;
color: rgb(0, 0, 0);
display: table;
font-family: arial, helvetica, sans-serif;
font-size: 13.3333px;
margin-bottom: 4px;
margin-left: 0px;
margin-right: 0px;
margin-top: 4px;
overflow-x: hidden;
overflow-y: hidden;
padding-bottom: 4px;
padding-left: 2px;
padding-right: 2px;
padding-top: 4px;
}
#parent #container .number {
color: rgb(0, 0, 0);
font-family: arial, helvetica, sans-serif;
font-size: 13.3333px;
width: 342.65px;
margin-right: 7px;
}
#parent #container .op {
color: rgb(0, 0, 0);
font-family: arial, helvetica, sans-serif;
font-size: 13.3333px;
margin-bottom: 8px;
margin-left: 0px;
margin-right: 0px;
margin-top: 4px;
overflow-x: hidden;
overflow-y: hidden;
}
#parent #container .op blockquote {
margin-left: 0px !important;
}
#parent #container .name {
color: rgb(17, 119, 67);
font-family: arial, helvetica, sans-serif;
font-size: 13.3333px;
font-weight: 700;
margin-left: 7px;
}
#parent #container .quote {
color: rgb(221, 0, 0);
font-family: arial, helvetica, sans-serif;
font-size: 13.3333px;
text-decoration-color: rgb(221, 0, 0);
text-decoration-line: underline;
text-decoration-style: solid;
text-decoration-thickness: auto;
}
#parent #container .greentext {
color: rgb(120, 153, 34);
font-family: arial, helvetica, sans-serif;
font-size: 13.3333px;
}
#parent #container blockquote {
margin: 0px !important;
margin-block-start: 1em;
margin-block-end: 1em;
margin-inline-start: 40px;
margin-inline-end: 40px;
margin-top: 13.33px !important;
margin-bottom: 13.33px !important;
margin-left: 40px !important;
margin-right: 40px !important;
}
#parent #container .message_4chan {
color: black;
border: none;
}

View file

@ -1,97 +1,64 @@
.chat { .message {
background: transparent; display: grid;
padding: 0; grid-template-columns: 60px 1fr;
padding-top: 0; padding-bottom: 25px;
} font-size: 15px;
font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
.chat > .messages:first-child { line-height: 22px;
padding-top: 0 !important;
}
.chat .message-body p, .chat .message-body li {
font-size: 1rem !important;
line-height: 28px !important;
}
.dark .chat .message-body :is(p,li,h1,h2,h3,h4,h5,h6),
.dark .chat .message-body em:not(:is(h1,h2,h3,h4,h5,h6,b,strong) em),
.dark .chat .message-body q:not(:is(h1,h2,h3,h4,h5,h6,b,strong) q) {
color: #d1d5db !important;
}
.chat .message-body :is(th, td),
.prose hr {
border-color: #40404096 !important;
}
.dark .chat .message-body :is(th, td),
.dark .prose hr {
border-color: rgb(255 255 255 / 30%) !important;
}
.chat .message-body :is(p, ul, ol) {
margin: 1.25em 0 !important;
}
.chat .message-body :is(p, ul, ol):first-child {
margin-top: 0 !important;
}
.chat .message-body :is(p, ul, ol):last-child {
margin-bottom: 0 !important;
}
.user-message, .assistant-message {
font-family: Inter, Helvetica, Arial, sans-serif;
}
.message:first-child {
padding-top: 0;
} }
.username { .username {
display: none; display: none;
} }
.chat .user-message { .message-body p {
background: #f3f4f6; font-size: 15px !important;
padding: 1.5rem 1rem; line-height: 22px !important;
padding-bottom: 2rem; margin-bottom: 1.25em !important;
border-radius: 0;
border-bottom-right-radius: 0;
} }
.chat .assistant-message { .chat .message-body ul, .chat .message-body ol {
padding: 1.5rem 1rem; margin-bottom: 1.25em !important;
padding-bottom: 2rem;
border-radius: 0;
border: 0;
} }
.dark .chat .user-message { .dark .message-body p em {
background: var(--light-gray); color: rgb(198, 202, 214) !important;
}
.message-body p em {
color: rgb(110, 110, 110) !important;
}
.gradio-container .chat .assistant-message {
padding: 15px;
border-radius: 20px;
background-color: #0000000f;
margin-top: 9px !important;
margin-bottom: 18px !important;
}
.gradio-container .chat .user-message {
padding: 15px;
border-radius: 20px;
margin-bottom: 9px !important;
}
.gradio-container .chat .assistant-message:last-child, .gradio-container .chat .user-message:last-child {
margin-bottom: 0px !important;
} }
.dark .chat .assistant-message { .dark .chat .assistant-message {
background: transparent; background-color: #1f2937;
} }
.chat .user-message .text, .dark .chat .user-message {
.chat .assistant-message .text { background-color: transparent;
max-width: 724px;
margin-left: auto;
margin-right: auto;
} }
/* Create space between two assistant messages in a row */ code {
.assistant-message + .assistant-message { background-color: white !important;
margin-top: 1.5rem;
} }
pre > code { .dark code {
background-color: #f3f4f6 !important; background-color: #0e1321 !important;
} }
.dark pre > code {
background-color: #1f2937 !important;
}

View file

@ -1,33 +1,33 @@
.readable-container { .container {
max-width: 600px; max-width: 600px;
margin-left: auto; margin-left: auto;
margin-right: auto; margin-right: auto;
background-color: rgb(31 41 55); background-color: rgb(31, 41, 55);
padding: 3em; padding: 3em;
word-break: break-word; word-break: break-word;
overflow-wrap: anywhere; overflow-wrap: anywhere;
color: #efefef !important; color: #efefef !important;
} }
.readable-container p, .readable-container li { .container p, .container li {
font-size: 16px !important; font-size: 16px !important;
color: #efefef !important; color: #efefef !important;
margin-bottom: 22px; margin-bottom: 22px;
line-height: 1.4 !important; line-height: 1.4 !important;
} }
.readable-container li > p { .container li > p {
display: inline !important; display: inline !important;
} }
.readable-container code { .container code {
overflow-x: auto; overflow-x: auto;
} }
.readable-container :not(pre) > code { .container :not(pre) > code {
white-space: normal !important; white-space: normal !important;
} }
.readable-container .hoverable { .container .hoverable {
font-size: 14px; font-size: 14px;
} }

Binary file not shown.

Binary file not shown.

Some files were not shown because too many files have changed in this diff Show more