Security: server-side file save roots, image URL SSRF protection, extension allowlist

Merge pull request #7425 from oobabooga/dev
Merge dev branch
2026-03-20 04:14:38 +01:00 · 2026-03-17 22:31:20 -07:00 · 2026-03-16 12:51:33 -03:00 · 2026-03-16 06:21:14 -07:00 · 2026-03-16 06:00:16 -07:00 · 2026-03-16 05:37:46 -07:00
501 changed files with 28296 additions and 14527 deletions
--- a/.github/FUNDING.yml
+++ b/.github/FUNDING.yml
@ -1 +0,0 @@
 ko_fi: oobabooga
--- a/.github/ISSUE_TEMPLATE/bug_report_template.yml
+++ b/.github/ISSUE_TEMPLATE/bug_report_template.yml
@ -46,7 +46,7 @@ body:
    id: system-info
    attributes:
      label: System Info
-      description: "Please share your system info with us: operating system, GPU brand, and GPU model. If you are using a Google Colab notebook, mention that instead."
+      description: "Please share your operating system and GPU type (NVIDIA/AMD/Intel/Apple). If you are using a Google Colab notebook, mention that instead."
      render: shell
      placeholder: 
    validations:
--- a/.github/dependabot.yml
+++ b/.github/dependabot.yml
@ -5,7 +5,10 @@
 version: 2
 updates:
-  - package-ecosystem: "pip" # See documentation for possible values
+  - package-ecosystem: "pip"
-    directory: "/" # Location of package manifests
+    directories:
      - "/requirements/full/"
      - "/requirements/portable/"
    target-branch: "dev"
    schedule:
      interval: "weekly"
--- a/.github/workflows/build-everything-tgw.yml
+++ b/.github/workflows/build-everything-tgw.yml
@ -0,0 +1,70 @@
 name: Build Everything TGW
 on:
  workflow_dispatch:
    inputs:
      version:
        description: 'Version tag of text-generation-webui to build: v3.0'
        default: 'v3.0'
        required: true
        type: string
 permissions:
  contents: write
 jobs:
  build_release_cuda_windows:
    name: CUDA Windows
    uses: ./.github/workflows/build-portable-release-cuda.yml
    with:
      version: ${{ inputs.version }}
      config: 'os:windows-2022'
  build_release_cuda_linux:
    name: CUDA Linux
    uses: ./.github/workflows/build-portable-release-cuda.yml
    with:
      version: ${{ inputs.version }}
      config: 'os:ubuntu-22.04'
  build_release_vulkan_windows:
    name: Vulkan Windows
    uses: ./.github/workflows/build-portable-release-vulkan.yml
    with:
      version: ${{ inputs.version }}
      config: 'os:windows-2022'
  build_release_vulkan_linux:
    name: Vulkan Linux
    uses: ./.github/workflows/build-portable-release-vulkan.yml
    with:
      version: ${{ inputs.version }}
      config: 'os:ubuntu-22.04'
  build_release_rocm_linux:
    name: ROCm Linux
    uses: ./.github/workflows/build-portable-release-rocm.yml
    with:
      version: ${{ inputs.version }}
      config: 'os:ubuntu-22.04'
  build_release_cpu_windows:
    name: CPU Windows
    uses: ./.github/workflows/build-portable-release.yml
    with:
      version: ${{ inputs.version }}
      config: 'os:windows-2022'
  build_release_cpu_linux:
    name: CPU Linux
    uses: ./.github/workflows/build-portable-release.yml
    with:
      version: ${{ inputs.version }}
      config: 'os:ubuntu-22.04'
  build_release_macos:
    name: macOS
    uses: ./.github/workflows/build-portable-release.yml
    with:
      version: ${{ inputs.version }}
      config: 'os:macos-15-intel,macos-14'
--- a/.github/workflows/build-portable-release-cuda.yml
+++ b/.github/workflows/build-portable-release-cuda.yml
@ -0,0 +1,175 @@
 name: Build CUDA
 on:
  workflow_dispatch:
    inputs:
      version:
        description: 'Version tag of text-generation-webui to build: v3.0'
        default: 'v3.0'
        required: true
        type: string
      config:
        description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
        default: 'Default'
        required: false
        type: string
      exclude:
        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
        default: 'None'
        required: false
        type: string
  workflow_call:
    inputs:
      version:
        description: 'Version tag of text-generation-webui to build: v3.0'
        default: 'v3.0'
        required: true
        type: string
      config:
        description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
        default: 'Default'
        required: false
        type: string
      exclude:
        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
        default: 'None'
        required: false
        type: string
 permissions:
  contents: write
 jobs:
  define_matrix:
    name: Define Build Matrix
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    defaults:
      run:
        shell: pwsh
    env:
      CONFIGIN: ${{ inputs.config }}
      EXCLUDEIN: ${{ inputs.exclude }}
    steps:
      - name: Define Job Output
        id: set-matrix
        run: |
          $matrix = @{
              'os' = @('ubuntu-22.04', 'windows-2022')
              'pyver' = @("3.13")
              'cuda' = @("12.4", "13.1")
          }
          if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
          if ($env:EXCLUDEIN -ne 'None') {
              $exclusions = @()
              $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
              $matrix['exclude'] = $exclusions
          }
          $matrixOut = ConvertTo-Json $matrix -Compress
          Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
  build_wheels:
    name: ${{ matrix.os }} ${{ matrix.pyver }} CUDA ${{ matrix.cuda }}
    needs: define_matrix
    runs-on: ${{ matrix.os }}
    strategy:
      matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
    defaults:
      run:
        shell: pwsh
    env:
      PCKGVER: ${{ inputs.version }}
    steps:
      - uses: actions/checkout@v6
        with:
          repository: 'oobabooga/text-generation-webui'
          ref: ${{ inputs.version }}
          submodules: 'recursive'
      - uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.pyver }}
      - name: Build Package
        shell: bash
        run: |
            VERSION_CLEAN="${{ inputs.version }}"
            VERSION_CLEAN="${VERSION_CLEAN#v}"
            cd ..
            cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
            cd "text-generation-webui-${VERSION_CLEAN}"
            # Remove extensions that need additional requirements
            allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
            # Define common variables
            CUDA_VERSION="${{ matrix.cuda }}"
            VERSION="${{ inputs.version }}"
            # 1. Set platform-specific variables
            if [[ "$RUNNER_OS" == "Windows" ]]; then
                PLATFORM="windows"
                PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
                PIP_PATH="portable_env/python.exe -m pip"
                PACKAGES_PATH="portable_env/Lib/site-packages"
                rm start_linux.sh start_macos.sh
            else
                PLATFORM="linux"
                PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
                PIP_PATH="portable_env/bin/python -m pip"
                PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
                rm start_macos.sh start_windows.bat
            fi
            # 2. Download and extract Python
            cd ..
            echo "Downloading Python for $PLATFORM..."
            curl -L -o python-build.tar.gz "$PYTHON_URL"
            tar -xzf python-build.tar.gz
            mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
            # 3. Prepare requirements file based on CUDA version
            cd "text-generation-webui-${VERSION_CLEAN}"
            if [[ "$CUDA_VERSION" == "13.1" ]]; then
                REQ_FILE="requirements/portable/requirements_cuda131.txt"
            else
                REQ_FILE="requirements/portable/requirements.txt"
            fi
            # 4. Install packages
            echo "Installing Python packages from $REQ_FILE..."
            $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
            # 5. Clean up
            rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
            # 6. Create archive
            cd ..
            if [[ "$RUNNER_OS" == "Windows" ]]; then
                ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
                echo "Creating archive: $ARCHIVE_NAME"
                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
            else
                ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.tar.gz"
                echo "Creating archive: $ARCHIVE_NAME"
                tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
            fi
      - name: Upload files to a GitHub release
        id: upload-release
        uses: svenstaro/upload-release-action@2.7.0
        continue-on-error: true
        with:
          repo_token: ${{ secrets.GITHUB_TOKEN }}
          file: ../textgen-portable-*
          tag: ${{ inputs.version }}
          file_glob: true
          make_latest: false
          overwrite: true
--- a/.github/workflows/build-portable-release-rocm.yml
+++ b/.github/workflows/build-portable-release-rocm.yml
@ -0,0 +1,170 @@
 name: Build ROCm
 on:
  workflow_dispatch:
    inputs:
      version:
        description: 'Version tag of text-generation-webui to build: v3.0'
        default: 'v3.0'
        required: true
        type: string
      config:
        description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
        default: 'Default'
        required: false
        type: string
      exclude:
        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
        default: 'None'
        required: false
        type: string
  workflow_call:
    inputs:
      version:
        description: 'Version tag of text-generation-webui to build: v3.0'
        default: 'v3.0'
        required: true
        type: string
      config:
        description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
        default: 'Default'
        required: false
        type: string
      exclude:
        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
        default: 'None'
        required: false
        type: string
 permissions:
  contents: write
 jobs:
  define_matrix:
    name: Define Build Matrix
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    defaults:
      run:
        shell: pwsh
    env:
      CONFIGIN: ${{ inputs.config }}
      EXCLUDEIN: ${{ inputs.exclude }}
    steps:
      - name: Define Job Output
        id: set-matrix
        run: |
          $matrix = @{
              'os' = @('ubuntu-22.04', 'windows-2022')
              'pyver' = @("3.13")
          }
          if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
          if ($env:EXCLUDEIN -ne 'None') {
              $exclusions = @()
              $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
              $matrix['exclude'] = $exclusions
          }
          $matrixOut = ConvertTo-Json $matrix -Compress
          Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
  build_wheels:
    name: ${{ matrix.os }} ${{ matrix.pyver }}
    needs: define_matrix
    runs-on: ${{ matrix.os }}
    strategy:
      matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
    defaults:
      run:
        shell: pwsh
    env:
      PCKGVER: ${{ inputs.version }}
    steps:
      - uses: actions/checkout@v6
        with:
          repository: 'oobabooga/text-generation-webui'
          ref: ${{ inputs.version }}
          submodules: 'recursive'
      - uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.pyver }}
      - name: Build Package
        shell: bash
        run: |
            VERSION_CLEAN="${{ inputs.version }}"
            VERSION_CLEAN="${VERSION_CLEAN#v}"
            cd ..
            cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
            cd "text-generation-webui-${VERSION_CLEAN}"
            # Remove extensions that need additional requirements
            allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
            # Define common variables
            VERSION="${{ inputs.version }}"
            # 1. Set platform-specific variables
            if [[ "$RUNNER_OS" == "Windows" ]]; then
                PLATFORM="windows"
                PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
                PIP_PATH="portable_env/python.exe -m pip"
                PACKAGES_PATH="portable_env/Lib/site-packages"
                rm start_linux.sh start_macos.sh
            else
                PLATFORM="linux"
                PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
                PIP_PATH="portable_env/bin/python -m pip"
                PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
                rm start_macos.sh start_windows.bat
            fi
            # 2. Download and extract Python
            cd ..
            echo "Downloading Python for $PLATFORM..."
            curl -L -o python-build.tar.gz "$PYTHON_URL"
            tar -xzf python-build.tar.gz
            mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
            # 3. Prepare requirements file
            REQ_FILE="requirements/portable/requirements_amd.txt"
            cd "text-generation-webui-${VERSION_CLEAN}"
            # 4. Install packages
            echo "Installing Python packages from $REQ_FILE..."
            $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
            # 5. Clean up
            rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
            # 6. Create archive
            cd ..
            if [[ "$RUNNER_OS" == "Windows" ]]; then
                ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm7.2.zip"
                echo "Creating archive: $ARCHIVE_NAME"
                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
            else
                ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm7.2.tar.gz"
                echo "Creating archive: $ARCHIVE_NAME"
                tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
            fi
      - name: Upload files to a GitHub release
        id: upload-release
        uses: svenstaro/upload-release-action@2.7.0
        continue-on-error: true
        with:
          repo_token: ${{ secrets.GITHUB_TOKEN }}
          file: ../textgen-portable-*
          tag: ${{ inputs.version }}
          file_glob: true
          make_latest: false
          overwrite: true
--- a/.github/workflows/build-portable-release-vulkan.yml
+++ b/.github/workflows/build-portable-release-vulkan.yml
@ -0,0 +1,170 @@
 name: Build Vulkan
 on:
  workflow_dispatch:
    inputs:
      version:
        description: 'Version tag of text-generation-webui to build: v3.0'
        default: 'v3.0'
        required: true
        type: string
      config:
        description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
        default: 'Default'
        required: false
        type: string
      exclude:
        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
        default: 'None'
        required: false
        type: string
  workflow_call:
    inputs:
      version:
        description: 'Version tag of text-generation-webui to build: v3.0'
        default: 'v3.0'
        required: true
        type: string
      config:
        description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
        default: 'Default'
        required: false
        type: string
      exclude:
        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
        default: 'None'
        required: false
        type: string
 permissions:
  contents: write
 jobs:
  define_matrix:
    name: Define Build Matrix
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    defaults:
      run:
        shell: pwsh
    env:
      CONFIGIN: ${{ inputs.config }}
      EXCLUDEIN: ${{ inputs.exclude }}
    steps:
      - name: Define Job Output
        id: set-matrix
        run: |
          $matrix = @{
              'os' = @('ubuntu-22.04', 'windows-2022')
              'pyver' = @("3.13")
          }
          if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
          if ($env:EXCLUDEIN -ne 'None') {
              $exclusions = @()
              $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
              $matrix['exclude'] = $exclusions
          }
          $matrixOut = ConvertTo-Json $matrix -Compress
          Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
  build_wheels:
    name: ${{ matrix.os }} ${{ matrix.pyver }}
    needs: define_matrix
    runs-on: ${{ matrix.os }}
    strategy:
      matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
    defaults:
      run:
        shell: pwsh
    env:
      PCKGVER: ${{ inputs.version }}
    steps:
      - uses: actions/checkout@v6
        with:
          repository: 'oobabooga/text-generation-webui'
          ref: ${{ inputs.version }}
          submodules: 'recursive'
      - uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.pyver }}
      - name: Build Package
        shell: bash
        run: |
            VERSION_CLEAN="${{ inputs.version }}"
            VERSION_CLEAN="${VERSION_CLEAN#v}"
            cd ..
            cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
            cd "text-generation-webui-${VERSION_CLEAN}"
            # Remove extensions that need additional requirements
            allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
            # Define common variables
            VERSION="${{ inputs.version }}"
            # 1. Set platform-specific variables
            if [[ "$RUNNER_OS" == "Windows" ]]; then
                PLATFORM="windows"
                PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
                PIP_PATH="portable_env/python.exe -m pip"
                PACKAGES_PATH="portable_env/Lib/site-packages"
                rm start_linux.sh start_macos.sh
            else
                PLATFORM="linux"
                PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
                PIP_PATH="portable_env/bin/python -m pip"
                PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
                rm start_macos.sh start_windows.bat
            fi
            # 2. Download and extract Python
            cd ..
            echo "Downloading Python for $PLATFORM..."
            curl -L -o python-build.tar.gz "$PYTHON_URL"
            tar -xzf python-build.tar.gz
            mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
            # 3. Prepare requirements file
            REQ_FILE="requirements/portable/requirements_vulkan.txt"
            cd "text-generation-webui-${VERSION_CLEAN}"
            # 4. Install packages
            echo "Installing Python packages from $REQ_FILE..."
            $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
            # 5. Clean up
            rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
            # 6. Create archive
            cd ..
            if [[ "$RUNNER_OS" == "Windows" ]]; then
                ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.zip"
                echo "Creating archive: $ARCHIVE_NAME"
                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
            else
                ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.tar.gz"
                echo "Creating archive: $ARCHIVE_NAME"
                tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
            fi
      - name: Upload files to a GitHub release
        id: upload-release
        uses: svenstaro/upload-release-action@2.7.0
        continue-on-error: true
        with:
          repo_token: ${{ secrets.GITHUB_TOKEN }}
          file: ../textgen-portable-*
          tag: ${{ inputs.version }}
          file_glob: true
          make_latest: false
          overwrite: true
--- a/.github/workflows/build-portable-release.yml
+++ b/.github/workflows/build-portable-release.yml
@ -0,0 +1,196 @@
 name: Build CPU and macOS
 on:
  workflow_dispatch:
    inputs:
      version:
        description: 'Version tag of text-generation-webui to build: v3.0'
        default: 'v3.0'
        required: true
        type: string
      config:
        description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
        default: 'Default'
        required: false
        type: string
      exclude:
        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
        default: 'None'
        required: false
        type: string
  workflow_call:
    inputs:
      version:
        description: 'Version tag of text-generation-webui to build: v3.0'
        default: 'v3.0'
        required: true
        type: string
      config:
        description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
        default: 'Default'
        required: false
        type: string
      exclude:
        description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
        default: 'None'
        required: false
        type: string
 permissions:
  contents: write
 jobs:
  define_matrix:
    name: Define Build Matrix
    runs-on: ubuntu-latest
    outputs:
      matrix: ${{ steps.set-matrix.outputs.matrix }}
    defaults:
      run:
        shell: pwsh
    env:
      CONFIGIN: ${{ inputs.config }}
      EXCLUDEIN: ${{ inputs.exclude }}
    steps:
      - name: Define Job Output
        id: set-matrix
        run: |
          $matrix = @{
              'os' = @('ubuntu-22.04', 'windows-2022', 'macos-14')
              'pyver' = @("3.13")
          }
          if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
          if ($env:EXCLUDEIN -ne 'None') {
              $exclusions = @()
              $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
              $matrix['exclude'] = $exclusions
          }
          $matrixOut = ConvertTo-Json $matrix -Compress
          Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
  build_wheels:
    name: ${{ matrix.os }} ${{ matrix.pyver }}
    needs: define_matrix
    runs-on: ${{ matrix.os }}
    strategy:
      matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
    defaults:
      run:
        shell: pwsh
    env:
      PCKGVER: ${{ inputs.version }}
    steps:
      - uses: actions/checkout@v6
        with:
          repository: 'oobabooga/text-generation-webui'
          ref: ${{ inputs.version }}
          submodules: 'recursive'
      - uses: actions/setup-python@v6
        with:
          python-version: ${{ matrix.pyver }}
      - name: Build Package
        shell: bash
        run: |
            VERSION_CLEAN="${{ inputs.version }}"
            VERSION_CLEAN="${VERSION_CLEAN#v}"
            cd ..
            cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
            cd "text-generation-webui-${VERSION_CLEAN}"
            # Remove extensions that need additional requirements
            allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
            find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
            # Define common variables
            VERSION="${{ inputs.version }}"
            OS_TYPE="${{ matrix.os }}"
            # 1. Set platform-specific variables
            if [[ "$RUNNER_OS" == "Windows" ]]; then
                PLATFORM="windows-cpu"
                PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only.tar.gz"
                PIP_PATH="portable_env/python.exe -m pip"
                PACKAGES_PATH="portable_env/Lib/site-packages"
                rm start_linux.sh start_macos.sh
            elif [[ "$RUNNER_OS" == "macOS" ]]; then
                if [[ "$OS_TYPE" == "macos-15-intel" ]]; then
                    PLATFORM="macos-x86_64"
                    PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-apple-darwin-install_only.tar.gz"
                    REQ_TYPE="apple_intel"
                else
                    PLATFORM="macos-arm64"
                    PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-aarch64-apple-darwin-install_only.tar.gz"
                    REQ_TYPE="apple_silicon"
                fi
                PIP_PATH="portable_env/bin/python -m pip"
                PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
                rm start_linux.sh start_windows.bat
            else
                # Linux case
                PLATFORM="linux-cpu"
                PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only.tar.gz"
                PIP_PATH="portable_env/bin/python -m pip"
                PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
                rm start_macos.sh start_windows.bat
            fi
            # 2. Download and extract Python
            echo "Downloading Python for $PLATFORM..."
            cd ..
            curl -L -o python-build.tar.gz "$PYTHON_URL"
            tar -xzf python-build.tar.gz
            mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
            # 3. Prepare requirements file based on platform
            cd "text-generation-webui-${VERSION_CLEAN}"
            # Select requirements file based on platform
            if [[ "$RUNNER_OS" == "macOS" ]]; then
                if [[ "$OS_TYPE" == "macos-15-intel" ]]; then
                    REQ_FILE="requirements/portable/requirements_apple_intel.txt"
                else
                    REQ_FILE="requirements/portable/requirements_apple_silicon.txt"
                fi
            else
                REQ_FILE="requirements/portable/requirements_cpu_only.txt"
            fi
            echo "Using requirements file: $REQ_FILE"
            # 4. Install packages
            echo "Installing Python packages from $REQ_FILE..."
            $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
            # 5. Clean up
            rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
            # 6. Create archive
            cd ..
            if [[ "$RUNNER_OS" == "Windows" ]]; then
                ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}.zip"
                echo "Creating archive: $ARCHIVE_NAME"
                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
            else
                ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}.tar.gz"
                echo "Creating archive: $ARCHIVE_NAME"
                tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
            fi
      - name: Upload files to a GitHub release
        id: upload-release
        uses: svenstaro/upload-release-action@2.7.0
        continue-on-error: true
        with:
          repo_token: ${{ secrets.GITHUB_TOKEN }}
          file: ../textgen-portable-*
          tag: ${{ inputs.version }}
          file_glob: true
          make_latest: false
          overwrite: true
--- a/.github/workflows/stale.yml
+++ b/.github/workflows/stale.yml
@ -1,22 +0,0 @@
 name: Close inactive issues
 on:
  schedule:
    - cron: "10 23 * * *"
 jobs:
  close-issues:
    runs-on: ubuntu-latest
    permissions:
      issues: write
      pull-requests: write
    steps:
      - uses: actions/stale@v5
        with:
          stale-issue-message: ""
          close-issue-message: "This issue has been closed due to inactivity for 6 weeks. If you believe it is still relevant, please leave a comment below. You can tag a developer in your comment."
          days-before-issue-stale: 42
          days-before-issue-close: 0
          stale-issue-label: "stale"
          days-before-pr-stale: -1
          days-before-pr-close: -1
          repo-token: ${{ secrets.GITHUB_TOKEN }}
--- a/.gitignore
+++ b/.gitignore
@ -1,38 +1,33 @@
-cache
+/css
-characters
+/extensions
-training/datasets
+/installer_files
-extensions/silero_tts/outputs
+/repositories
-extensions/elevenlabs_tts/outputs
+/user_data
-extensions/sd_api_pictures/outputs
+
-extensions/multimodal/pipelines
+.chroma
-logs
+.DS_Store
-loras
+.eslintrc.js
-models
+.idea
-presets
+.installer_state.json
-repositories
+.venv
-softprompts
+venv
-torch-dumps
+.envrc
-*pycache*
+.direnv
-*/*pycache*
+.vs
 */*/pycache*
 venv/
 .venv/
 .vscode
 .idea/
 *.bak
 *.ipynb
 *.log
-
+*pycache*
-settings.json
+cert.pem
-settings.yaml
+key.pem
-notification.mp3
+package.json
-img_bot*
+package-lock.json
 img_me*
 prompts/[0-9]*
 models/config-user.yaml
 .DS_Store
 Thumbs.db
-.chroma
+wandb
-installer_files
+
-/CMD_FLAGS.txt
+# ignore user docker config and top level links to docker files
 /docker-compose.yaml
 /docker-compose.yml
 /Dockerfile
 .env
--- a/CMD_FLAGS.txt
+++ b/CMD_FLAGS.txt
@ -1,3 +0,0 @@
 # Only used by the one-click installer.
 # Example:
 # --listen --api
--- a/Colab-TextGen-GPU.ipynb
+++ b/Colab-TextGen-GPU.ipynb
@ -0,0 +1,119 @@
 {
  "nbformat": 4,
  "nbformat_minor": 0,
  "metadata": {
    "colab": {
      "private_outputs": true,
      "provenance": [],
      "gpuType": "T4"
    },
    "kernelspec": {
      "name": "python3",
      "display_name": "Python 3"
    },
    "language_info": {
      "name": "python"
    },
    "accelerator": "GPU"
  },
  "cells": [
    {
      "cell_type": "markdown",
      "source": [
        "# oobabooga/text-generation-webui\n",
        "\n",
        "After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.\n",
        "\n",
        "* Project page: https://github.com/oobabooga/text-generation-webui\n",
        "* Gradio server status: https://status.gradio.app/"
      ],
      "metadata": {
        "id": "MFQl6-FjSYtY"
      }
    },
    {
      "cell_type": "code",
      "source": [
        "#@title 1. Keep this tab alive to prevent Colab from disconnecting you { display-mode: \"form\" }\n",
        "\n",
        "#@markdown Press play on the music player that will appear below:\n",
        "%%html\n",
        "<audio src=\"https://oobabooga.github.io/silence.m4a\" controls>"
      ],
      "metadata": {
        "id": "f7TVVj_z4flw"
      },
      "execution_count": null,
      "outputs": []
    },
    {
      "cell_type": "code",
      "source": [
        "#@title 2. Launch the web UI\n",
        "\n",
        "#@markdown You can provide a direct GGUF link or a Hugging Face model URL.\n",
        "\n",
        "import os\n",
        "from pathlib import Path\n",
        "\n",
        "os.environ.pop('PYTHONPATH', None)\n",
        "os.environ.pop('MPLBACKEND', None)\n",
        "\n",
        "if Path.cwd().name != 'text-generation-webui':\n",
        "  print(\"\\033[1;32;1m\\n --> Installing the web UI. This will take a while, but after the initial setup, you can download and test as many models as you like.\\033[0;37;0m\\n\")\n",
        "\n",
        "  !git clone https://github.com/oobabooga/text-generation-webui\n",
        "  %cd text-generation-webui\n",
        "\n",
        "  # Install the project in an isolated environment\n",
        "  !GPU_CHOICE=A \\\n",
        "  LAUNCH_AFTER_INSTALL=FALSE \\\n",
        "  INSTALL_EXTENSIONS=FALSE \\\n",
        "  ./start_linux.sh\n",
        "\n",
        "# Parameters\n",
        "model_url = \"https://huggingface.co/unsloth/Qwen3.5-9B-GGUF/resolve/main/Qwen3.5-9B-Q4_K_M.gguf\" #@param {type:\"string\"}\n",
        "branch = \"\" #@param {type:\"string\"}\n",
        "command_line_flags = \"--load-in-4bit --use_double_quant\" #@param {type:\"string\"}\n",
        "api = False #@param {type:\"boolean\"}\n",
        "\n",
        "if api:\n",
        "  for param in ['--api', '--public-api']:\n",
        "    if param not in command_line_flags:\n",
        "      command_line_flags += f\" {param}\"\n",
        "\n",
        "model_url = model_url.strip()\n",
        "model_name = \"\"\n",
        "if model_url != \"\":\n",
        "    if not model_url.startswith('http'):\n",
        "        model_url = 'https://huggingface.co/' + model_url\n",
        "\n",
        "    branch = branch.strip()\n",
        "    if '/resolve/' in model_url:\n",
        "        model_name = model_url.split('?')[0].split('/')[-1]\n",
        "        !python download-model.py {model_url}\n",
        "    else:\n",
        "        url_parts = model_url.strip('/').split('/')\n",
        "        model_name = f\"{url_parts[-2]}_{url_parts[-1]}\"\n",
        "        if branch not in ['', 'main']:\n",
        "            model_name += f\"_{branch}\"\n",
        "            !python download-model.py {model_url} --branch {branch}\n",
        "        else:\n",
        "            !python download-model.py {model_url}\n",
        "\n",
        "# Start the web UI\n",
        "cmd = f\"./start_linux.sh {command_line_flags} --share\"\n",
        "if model_name != \"\":\n",
        "    cmd += f\" --model {model_name}\"\n",
        "\n",
        "!$cmd"
      ],
      "metadata": {
        "id": "LGQ8BiMuXMDG",
        "cellView": "form"
      },
      "execution_count": null,
      "outputs": []
    }
  ]
 }
--- a/README.md
+++ b/README.md
@ -1,89 +1,142 @@
-**Breaking change: WebUI now uses PyTorch 2.1.**
+<div align="center" markdown="1">
   <sup>Special thanks to:</sup>
   <br>
   <br>
   <a href="https://go.warp.dev/text-generation-webui">
      <img alt="Warp sponsorship" width="400" src="https://raw.githubusercontent.com/warpdotdev/brand-assets/refs/heads/main/Github/Sponsor/Warp-Github-LG-02.png">
   </a>
-* For one-click installer users: If you encounter problems after updating, rerun the update script. If issues persist, delete the `installer_files` folder and use the start script to reinstall requirements.
+### [Warp, built for coding with multiple AI agents](https://go.warp.dev/text-generation-webui)
-* For manual installations, update PyTorch with the [provided command](https://github.com/oobabooga/text-generation-webui/#2-install-pytorch).
+[Available for macOS, Linux, & Windows](https://go.warp.dev/text-generation-webui)<br>
 </div>
 <hr>
-# Text generation web UI
+# Text Generation Web UI
-A Gradio web UI for Large Language Models.
+A Gradio web UI for running Large Language Models locally. 100% private and offline. Supports text generation, vision, tool-calling, training, image generation, and more.
-Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation.
+[Try the Deep Reason extension](https://oobabooga.gumroad.com/l/deep_reason)
-|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_instruct.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_chat.png) |
+|![Image1](https://github.com/oobabooga/screenshots/raw/main/INSTRUCT-3.5.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/CHAT-3.5.png) |
 |:---:|:---:|
-|![Image1](https://github.com/oobabooga/screenshots/raw/main/print_default.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/print_parameters.png) |
+|![Image1](https://github.com/oobabooga/screenshots/raw/main/DEFAULT-3.5.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/PARAMETERS-3.5.png) |
 ## Features
-* 3 interface modes: default (two columns), notebook, and chat
+- **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting.
-* Multiple model backends: [transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), [ExLlama](https://github.com/turboderp/exllama), [ExLlamaV2](https://github.com/turboderp/exllamav2), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [GPTQ-for-LLaMa](https://github.com/qwopqwop200/GPTQ-for-LLaMa), [CTransformers](https://github.com/marella/ctransformers), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ)
+- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
-* Dropdown menu for quickly switching between different models
+- **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
-* LoRA: load and unload LoRAs on the fly, train a new LoRA using QLoRA
+- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file, easy to create and extend ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).
-* Precise instruction templates for chat mode, including Llama-2-chat, Alpaca, Vicuna, WizardLM, StableLM, and many others
+- **OpenAI-compatible API**: Chat and Completions endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI API ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)).
-* 4-bit, 8-bit, and CPU inference through the transformers library
+- **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)).
-* Use llama.cpp models with transformers samplers (`llamacpp_HF` loader)
+- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)).
-* [Multimodal pipelines, including LLaVA and MiniGPT-4](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/multimodal)
+- **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set.
-* [Extensions framework](docs/Extensions.md)
+- 100% offline and private, with zero telemetry, external resources, or remote update requests.
-* [Custom chat characters](docs/Chat-mode.md)
+- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters. Prompts are automatically formatted with Jinja2 templates.
-* Very efficient text streaming
+- Edit messages, navigate between message versions, and branch conversations at any point.
-* Markdown output with LaTeX rendering, to use for instance with [GALACTICA](https://github.com/paperswithcode/galai)
+- Free-form text generation in the Notebook tab without being limited to chat turns.
-* API, including endpoints for websocket streaming ([see the examples](https://github.com/oobabooga/text-generation-webui/blob/main/api-examples))
+- Multiple sampling parameters and generation options for sophisticated text generation control.
 - Aesthetic UI with dark and light themes.
 - Syntax highlighting for code blocks and LaTeX rendering for mathematical expressions.
 - Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.
-To learn how to use the various features, check out the Documentation: https://github.com/oobabooga/text-generation-webui/tree/main/docs
+## How to install
-## Installation
+#### ✅ Option 1: Portable builds (get started in 1 minute)
-### One-click installers
+No installation needed – just download, unzip and run. All dependencies included.
-1) Clone or download the repository.
+Download from here: **https://github.com/oobabooga/text-generation-webui/releases**
 2) Run the `start_linux.sh`, `start_windows.bat`, `start_macos.sh`, or `start_wsl.bat` script depending on your OS.
 3) Select your GPU vendor when asked.
 4) Have fun!
-#### How it works
+- Builds are provided for Linux, Windows, and macOS, with options for CUDA, Vulkan, ROCm, and CPU-only.
 - Compatible with GGUF (llama.cpp) models.
-The script creates a folder called `installer_files` where it sets up a Conda environment using Miniconda. The installation is self-contained: if you want to reinstall, just delete `installer_files` and run the start script again.
+#### Option 2: Manual portable install with venv
-To launch the webui in the future after it is already installed, run the same `start` script. 
+Very fast setup that should work on any Python 3.9+:
-#### Getting updates
+```bash
 # Clone repository
 git clone https://github.com/oobabooga/text-generation-webui
 cd text-generation-webui
-Run `update_linux.sh`, `update_windows.bat`, `update_macos.sh`, or `update_wsl.bat`.
+# Create virtual environment
 python -m venv venv
-#### Running commands
+# Activate virtual environment
 # On Windows:
 venv\Scripts\activate
 # On macOS/Linux:
 source venv/bin/activate
-If you ever need to install something manually in the `installer_files` environment, you can launch an interactive shell using the cmd script: `cmd_linux.sh`, `cmd_windows.bat`, `cmd_macos.sh`, or `cmd_wsl.bat`.
+# Install dependencies (choose appropriate file under requirements/portable for your hardware)
 pip install -r requirements/portable/requirements.txt --upgrade
-#### Defining command-line flags
+# Launch server (basic command)
 python server.py --portable --api --auto-launch
-To define persistent command-line flags like `--listen` or `--api`, edit the `CMD_FLAGS.txt` file with a text editor and add them there. Flags can also be provided directly to the start scripts, for instance, `./start-linux.sh --listen`.
+# When done working, deactivate
 deactivate
 ```
-#### Other info
+#### Option 3: One-click installer
-* There is no need to run any of those scripts as admin/root.
+For users who need additional backends (ExLlamaV3, Transformers), training, image generation, or extensions (TTS, voice input, translation, etc). Requires ~10GB disk space and downloads PyTorch.
 * For additional instructions about AMD setup, WSL setup, and nvcc installation, consult [this page](https://github.com/oobabooga/text-generation-webui/blob/main/docs/One-Click-Installers.md).
 * The installer has been tested mostly on NVIDIA GPUs. If you can find a way to improve it for your AMD/Intel Arc/Mac Metal GPU, you are highly encouraged to submit a PR to this repository. The main file to be edited is `one_click.py`.
 * For automated installation, you can use the `GPU_CHOICE`, `LAUNCH_AFTER_INSTALL`, and `INSTALL_EXTENSIONS` environment variables. For instance: `GPU_CHOICE=A LAUNCH_AFTER_INSTALL=False INSTALL_EXTENSIONS=False ./start_linux.sh`.
-### Manual installation using Conda
+1. Clone the repository, or [download its source code](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) and extract it.
 2. Run the startup script for your OS: `start_windows.bat`, `start_linux.sh`, or `start_macos.sh`.
 3. When prompted, select your GPU vendor.
 4. After installation, open `http://127.0.0.1:7860` in your browser.
-Recommended if you have some experience with the command-line.
+To restart the web UI later, run the same `start_` script.
 You can pass command-line flags directly (e.g., `./start_linux.sh --help`), or add them to `user_data/CMD_FLAGS.txt` (e.g., `--api` to enable the API).
 To update, run the update script for your OS: `update_wizard_windows.bat`, `update_wizard_linux.sh`, or `update_wizard_macos.sh`.
 To reinstall with a fresh Python environment, delete the `installer_files` folder and run the `start_` script again.
 <details>
 <summary>
 One-click installer details
 </summary>
 ### One-click-installer
 The script uses Miniforge to set up a Conda environment in the `installer_files` folder.
 If you ever need to install something manually in the `installer_files` environment, you can launch an interactive shell using the cmd script: `cmd_linux.sh`, `cmd_windows.bat`, or `cmd_macos.sh`.
 * There is no need to run any of those scripts (`start_`, `update_wizard_`, or `cmd_`) as admin/root.
 * To install requirements for extensions, it is recommended to use the update wizard script with the "Install/update extensions requirements" option. At the end, this script will install the main requirements for the project to make sure that they take precedence in case of version conflicts.
 * For automated installation, you can use the `GPU_CHOICE`, `LAUNCH_AFTER_INSTALL`, and `INSTALL_EXTENSIONS` environment variables. For instance: `GPU_CHOICE=A LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh`.
 </details>
 <details>
 <summary>
 Manual full installation with conda or docker
 </summary>
 ### Full installation with Conda
 #### 0. Install Conda
-https://docs.conda.io/en/latest/miniconda.html
+https://github.com/conda-forge/miniforge
-On Linux or WSL, it can be automatically installed with these two commands ([source](https://educe-ubc.github.io/conda.html)):
+On Linux or WSL, Miniforge can be automatically installed with these two commands:
 ```
-curl -sL "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" > "Miniconda3.sh"
+curl -sL "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" > "Miniforge3.sh"
-bash Miniconda3.sh
+bash Miniforge3.sh
 ```
 For other platforms, download from: https://github.com/conda-forge/miniforge/releases/latest
 #### 1. Create a new conda environment
 ```
-conda create -n textgen python=3.10
+conda create -n textgen python=3.13
 conda activate textgen
 ```
@ -91,330 +144,323 @@ conda activate textgen
 | System | GPU | Command |
 |--------|---------|---------|
-| Linux/WSL | NVIDIA | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118` |
+| Linux/WSL | NVIDIA | `pip3 install torch==2.9.1 --index-url https://download.pytorch.org/whl/cu128` |
-| Linux/WSL | CPU only | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cpu` |
+| Linux/WSL | CPU only | `pip3 install torch==2.9.1 --index-url https://download.pytorch.org/whl/cpu` |
-| Linux | AMD | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/rocm5.6` |
+| Linux | AMD | `pip3 install https://repo.radeon.com/rocm/manylinux/rocm-rel-7.2/torch-2.9.1%2Brocm7.2.0.lw.git7e1940d4-cp313-cp313-linux_x86_64.whl` |
-| MacOS + MPS | Any | `pip3 install torch torchvision torchaudio` |
+| MacOS + MPS | Any | `pip3 install torch==2.9.1` |
-| Windows | NVIDIA | `pip3 install torch torchvision torchaudio --index-url https://download.pytorch.org/whl/cu118` |
+| Windows | NVIDIA | `pip3 install torch==2.9.1 --index-url https://download.pytorch.org/whl/cu128` |
-| Windows | CPU only | `pip3 install torch torchvision torchaudio` |
+| Windows | CPU only | `pip3 install torch==2.9.1` |
 The up-to-date commands can be found here: https://pytorch.org/get-started/locally/.
 If you need `nvcc` to compile some library manually, you will additionally need to install this:
 ```
 conda install -y -c "nvidia/label/cuda-12.8.1" cuda
 ```
 #### 3. Install the web UI
 ```
 git clone https://github.com/oobabooga/text-generation-webui
 cd text-generation-webui
-pip install -r requirements.txt
+pip install -r requirements/full/<requirements file according to table below>
 ```
-#### AMD, Metal, Intel Arc, and CPUs without AVX2
+Requirements file to use:
-1) Replace the last command above with
+| GPU | requirements file to use |
 |--------|---------|
 | NVIDIA | `requirements.txt` |
 | AMD | `requirements_amd.txt` |
 | CPU only | `requirements_cpu_only.txt` |
 | Apple Intel | `requirements_apple_intel.txt` |
 | Apple Silicon | `requirements_apple_silicon.txt` |
-```
+### Start the web UI
 pip install -r requirements_nowheels.txt
 ```
 2) Manually install llama-cpp-python using the appropriate command for your hardware: [Installation from PyPI](https://github.com/abetlen/llama-cpp-python#installation-from-pypi).
 3) Do the same for CTransformers: [Installation](https://github.com/marella/ctransformers#installation).
 4) AMD: Manually install AutoGPTQ: [Installation](https://github.com/PanQiWei/AutoGPTQ#installation).
 5) AMD: Manually install [ExLlama](https://github.com/turboderp/exllama) by simply cloning it into the `repositories` folder (it will be automatically compiled at runtime after that):
 ```
 cd text-generation-webui
 git clone https://github.com/turboderp/exllama repositories/exllama
 ```
 #### bitsandbytes on older NVIDIA GPUs
 bitsandbytes >= 0.39 may not work. In that case, to use `--load-in-8bit`, you may have to downgrade like this:
 * Linux: `pip install bitsandbytes==0.38.1`
 * Windows: `pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.38.1-py3-none-any.whl`
 ### Alternative: Docker
 ```
 ln -s docker/{Dockerfile,docker-compose.yml,.dockerignore} .
 cp docker/.env.example .env
 # Edit .env and set TORCH_CUDA_ARCH_LIST based on your GPU model
 docker compose up --build
 ```
 * You need to have docker compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/blob/main/docs/Docker.md) for instructions.
 * For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker).
 ### Updating the requirements
 From time to time, the `requirements.txt` changes. To update, use these commands:
 ```
 conda activate textgen
 cd text-generation-webui
-pip install -r requirements.txt --upgrade
+python server.py
 ```
 ## Downloading models
 Models should be placed in the `text-generation-webui/models` folder. They are usually downloaded from [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads).
 * Transformers or GPTQ models are made of several files and must be placed in a subfolder. Example:
 ```
 text-generation-webui
 ├── models
 │   ├── lmsys_vicuna-33b-v1.3
 │   │   ├── config.json
 │   │   ├── generation_config.json
 │   │   ├── pytorch_model-00001-of-00007.bin
 │   │   ├── pytorch_model-00002-of-00007.bin
 │   │   ├── pytorch_model-00003-of-00007.bin
 │   │   ├── pytorch_model-00004-of-00007.bin
 │   │   ├── pytorch_model-00005-of-00007.bin
 │   │   ├── pytorch_model-00006-of-00007.bin
 │   │   ├── pytorch_model-00007-of-00007.bin
 │   │   ├── pytorch_model.bin.index.json
 │   │   ├── special_tokens_map.json
 │   │   ├── tokenizer_config.json
 │   │   └── tokenizer.model
 ```
 * GGUF models are a single file and should be placed directly into `models`. Example:
 ```
 text-generation-webui
 ├── models
 │   ├── llama-2-13b-chat.Q4_K_M.gguf
 ```
 In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download via the command-line with `python download-model.py organization/model` (use `--help` to see all the options).
 #### GPT-4chan
 <details>
 <summary>
 Instructions
 </summary>
 [GPT-4chan](https://huggingface.co/ykilcher/gpt-4chan) has been shut down from Hugging Face, so you need to download it elsewhere. You have two options:
 * Torrent: [16-bit](https://archive.org/details/gpt4chan_model_float16) / [32-bit](https://archive.org/details/gpt4chan_model)
 * Direct download: [16-bit](https://theswissbay.ch/pdf/_notpdf_/gpt4chan_model_float16/) / [32-bit](https://theswissbay.ch/pdf/_notpdf_/gpt4chan_model/)
 The 32-bit version is only relevant if you intend to run the model in CPU mode. Otherwise, you should use the 16-bit version.
 After downloading the model, follow these steps:
 1. Place the files under `models/gpt4chan_model_float16` or `models/gpt4chan_model`.
 2. Place GPT-J 6B's config.json file in that same folder: [config.json](https://huggingface.co/EleutherAI/gpt-j-6B/raw/main/config.json).
 3. Download GPT-J 6B's tokenizer files (they will be automatically detected when you attempt to load GPT-4chan):
 ```
 python download-model.py EleutherAI/gpt-j-6B --text-only
 ```
 When you load this model in default or notebook modes, the "HTML" tab will show the generated text in 4chan format:
 ![Image3](https://github.com/oobabooga/screenshots/raw/main/gpt4chan.png)
 </details>
 ## Starting the web UI
    conda activate textgen
    cd text-generation-webui
    python server.py
 Then browse to
-`http://localhost:7860/?__theme=dark`
+`http://127.0.0.1:7860`
-Optionally, you can use the following command-line flags:
+#### Manual install
-#### Basic settings
+The `requirements*.txt` above contain various wheels precompiled through GitHub Actions. If you wish to compile things manually, or if you need to because no suitable wheels are available for your hardware, you can use `requirements_nowheels.txt` and then install your desired loaders manually.
-| Flag                                       | Description |
+### Alternative: Docker
 |--------------------------------------------|-------------|
 | `-h`, `--help`                             | Show this help message and exit. |
 | `--multi-user`                             | Multi-user mode. Chat histories are not saved or automatically loaded. WARNING: this is highly experimental. |
 | `--character CHARACTER`                    | The name of the character to load in chat mode by default. |
 | `--model MODEL`                            | Name of the model to load by default. |
 | `--lora LORA [LORA ...]`                   | The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces. |
 | `--model-dir MODEL_DIR`                    | Path to directory with all the models. |
 | `--lora-dir LORA_DIR`                      | Path to directory with all the loras. |
 | `--model-menu`                             | Show a model menu in the terminal when the web UI is first launched. |
 | `--settings SETTINGS_FILE`                 | Load the default interface settings from this yaml file. See `settings-template.yaml` for an example. If you create a file called `settings.yaml`, this file will be loaded by default without the need to use the `--settings` flag. |
 | `--extensions EXTENSIONS [EXTENSIONS ...]` | The list of extensions to load. If you want to load more than one extension, write the names separated by spaces. |
 | `--verbose`                                | Print the prompts to the terminal. |
 | `--chat-buttons`                           | Show buttons on chat tab instead of hover menu. |
-#### Model loader
+```
 For NVIDIA GPU:
 ln -s docker/{nvidia/Dockerfile,nvidia/docker-compose.yml,.dockerignore} .
 For AMD GPU:
 ln -s docker/{amd/Dockerfile,amd/docker-compose.yml,.dockerignore} .
 For Intel GPU:
 ln -s docker/{intel/Dockerfile,intel/docker-compose.yml,.dockerignore} .
 For CPU only
 ln -s docker/{cpu/Dockerfile,cpu/docker-compose.yml,.dockerignore} .
 cp docker/.env.example .env
 #Create logs/cache dir :
 mkdir -p user_data/logs user_data/cache
 # Edit .env and set:
 #   TORCH_CUDA_ARCH_LIST based on your GPU model
 #   APP_RUNTIME_GID      your host user's group id (run `id -g` in a terminal)
 #   BUILD_EXTENIONS      optionally add comma separated list of extensions to build
 # Edit user_data/CMD_FLAGS.txt and add in it the options you want to execute (like --listen --cpu)
 #
 docker compose up --build
 ```
-| Flag                                       | Description |
+* You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions.
-|--------------------------------------------|-------------|
+* For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker).
 | `--loader LOADER`                          | Choose the model loader manually, otherwise, it will get autodetected. Valid options: transformers, autogptq, gptq-for-llama, exllama, exllama_hf, llamacpp, rwkv, ctransformers |
-#### Accelerate/transformers
+### Updating the requirements
-| Flag                                        | Description |
+From time to time, the `requirements*.txt` change. To update, use these commands:
 |---------------------------------------------|-------------|
 | `--cpu`                                     | Use the CPU to generate text. Warning: Training on CPU is extremely slow.|
 | `--auto-devices`                            | Automatically split the model across the available GPU(s) and CPU. |
 |  `--gpu-memory GPU_MEMORY [GPU_MEMORY ...]` | Maximum GPU memory in GiB to be allocated per GPU. Example: `--gpu-memory 10` for a single GPU, `--gpu-memory 10 5` for two GPUs. You can also set values in MiB like `--gpu-memory 3500MiB`. |
 | `--cpu-memory CPU_MEMORY`                   | Maximum CPU memory in GiB to allocate for offloaded weights. Same as above.|
 | `--disk`                                    | If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk. |
 | `--disk-cache-dir DISK_CACHE_DIR`           | Directory to save the disk cache to. Defaults to `cache/`. |
 | `--load-in-8bit`                            | Load the model with 8-bit precision (using bitsandbytes).|
 | `--bf16`                                    | Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU. |
 | `--no-cache`                                | Set `use_cache` to False while generating text. This reduces the VRAM usage a bit with a performance cost. |
 | `--xformers`                                | Use xformer's memory efficient attention. This should increase your tokens/s. |
 | `--sdp-attention`                           | Use torch 2.0's sdp attention. |
 | `--trust-remote-code`                       | Set trust_remote_code=True while loading a model. Necessary for ChatGLM and Falcon. |
 | `--use_fast`                                | Set use_fast=True while loading a tokenizer. |
-#### Accelerate 4-bit
+```
 conda activate textgen
 cd text-generation-webui
 pip install -r <requirements file that you have used> --upgrade
 ```
 </details>
-⚠️ Requires minimum compute of 7.0 on Windows at the moment.
+<details>
 <summary>
 List of command-line flags
 </summary>
-| Flag                                        | Description |
+```txt
-|---------------------------------------------|-------------|
+usage: server.py [-h] [--user-data-dir USER_DATA_DIR] [--multi-user] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]
-| `--load-in-4bit`                            | Load the model with 4-bit precision (using bitsandbytes). |
+                 [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--image-model IMAGE_MODEL] [--image-model-dir IMAGE_MODEL_DIR] [--image-dtype {bfloat16,float16}]
-| `--compute_dtype COMPUTE_DTYPE`             | compute dtype for 4-bit. Valid options: bfloat16, float16, float32. |
+                 [--image-attn-backend {flash_attention_2,sdpa}] [--image-cpu-offload] [--image-compile] [--image-quant {none,bnb-8bit,bnb-4bit,torchao-int8wo,torchao-fp4,torchao-float8wo}]
-| `--quant_type QUANT_TYPE`                   | quant_type for 4-bit. Valid options: nf4, fp4. |
+                 [--loader LOADER] [--ctx-size N] [--cache-type N] [--model-draft MODEL_DRAFT] [--draft-max DRAFT_MAX] [--gpu-layers-draft GPU_LAYERS_DRAFT] [--device-draft DEVICE_DRAFT]
-| `--use_double_quant`                        | use_double_quant for 4-bit. |
+                 [--ctx-size-draft CTX_SIZE_DRAFT] [--spec-type {none,ngram-mod,ngram-simple,ngram-map-k,ngram-map-k4v,ngram-cache}] [--spec-ngram-size-n SPEC_NGRAM_SIZE_N]
                 [--spec-ngram-size-m SPEC_NGRAM_SIZE_M] [--spec-ngram-min-hits SPEC_NGRAM_MIN_HITS] [--gpu-layers N] [--cpu-moe] [--mmproj MMPROJ] [--streaming-llm] [--tensor-split TENSOR_SPLIT]
                 [--row-split] [--no-mmap] [--mlock] [--no-kv-offload] [--batch-size BATCH_SIZE] [--ubatch-size UBATCH_SIZE] [--threads THREADS] [--threads-batch THREADS_BATCH] [--numa]
                 [--parallel PARALLEL] [--fit-target FIT_TARGET] [--extra-flags EXTRA_FLAGS] [--cpu] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16]
                 [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast] [--attn-implementation IMPLEMENTATION] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE]
                 [--quant_type QUANT_TYPE] [--gpu-split GPU_SPLIT] [--enable-tp] [--tp-backend TP_BACKEND] [--cfg-cache] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share]
                 [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] [--subpath SUBPATH] [--old-colors]
                 [--portable] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--api-enable-ipv6] [--api-disable-ipv4]
                 [--nowebui] [--temperature N] [--dynatemp-low N] [--dynatemp-high N] [--dynatemp-exponent N] [--smoothing-factor N] [--smoothing-curve N] [--min-p N] [--top-p N] [--top-k N]
                 [--typical-p N] [--xtc-threshold N] [--xtc-probability N] [--epsilon-cutoff N] [--eta-cutoff N] [--tfs N] [--top-a N] [--top-n-sigma N] [--adaptive-target N] [--adaptive-decay N]
                 [--dry-multiplier N] [--dry-allowed-length N] [--dry-base N] [--repetition-penalty N] [--frequency-penalty N] [--presence-penalty N] [--encoder-repetition-penalty N]
                 [--no-repeat-ngram-size N] [--repetition-penalty-range N] [--penalty-alpha N] [--guidance-scale N] [--mirostat-mode N] [--mirostat-tau N] [--mirostat-eta N]
                 [--do-sample | --no-do-sample] [--dynamic-temperature | --no-dynamic-temperature] [--temperature-last | --no-temperature-last] [--sampler-priority N] [--dry-sequence-breakers N]
                 [--enable-thinking | --no-enable-thinking] [--reasoning-effort N] [--chat-template-file CHAT_TEMPLATE_FILE]
-#### GGUF (for llama.cpp and ctransformers)
+Text Generation Web UI
-| Flag        | Description |
+options:
-|-------------|-------------|
+  -h, --help                                           show this help message and exit
 | `--threads` | Number of threads to use. |
 | `--threads-batch THREADS_BATCH` | Number of threads to use for batches/prompt processing. |
 | `--n_batch` | Maximum number of prompt tokens to batch together when calling llama_eval. |
 | `--n-gpu-layers N_GPU_LAYERS` | Number of layers to offload to the GPU. Only works if llama-cpp-python was compiled with BLAS. Set this to 1000000000 to offload all layers to the GPU. |
 | `--n_ctx N_CTX` | Size of the prompt context. |
-#### llama.cpp
+Basic settings:
  --user-data-dir USER_DATA_DIR                        Path to the user data directory. Default: auto-detected.
  --multi-user                                         Multi-user mode. Chat histories are not saved or automatically loaded. Best suited for small trusted teams.
  --model MODEL                                        Name of the model to load by default.
  --lora LORA [LORA ...]                               The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.
  --model-dir MODEL_DIR                                Path to directory with all the models.
  --lora-dir LORA_DIR                                  Path to directory with all the loras.
  --model-menu                                         Show a model menu in the terminal when the web UI is first launched.
  --settings SETTINGS                                  Load the default interface settings from this yaml file. See user_data/settings-template.yaml for an example. If you create a file called
                                                       user_data/settings.yaml, this file will be loaded by default without the need to use the --settings flag.
  --extensions EXTENSIONS [EXTENSIONS ...]             The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.
  --verbose                                            Print the prompts to the terminal.
  --idle-timeout IDLE_TIMEOUT                          Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.
-| Flag          | Description |
+Image model:
-|---------------|---------------|
+  --image-model IMAGE_MODEL                            Name of the image model to select on startup (overrides saved setting).
-| `--mul_mat_q` | Activate new mulmat kernels. |
+  --image-model-dir IMAGE_MODEL_DIR                    Path to directory with all the image models.
-| `--tensor_split TENSOR_SPLIT`       | Split the model across multiple GPUs, comma-separated list of proportions, e.g. 18,17 |
+  --image-dtype {bfloat16,float16}                     Data type for image model.
-| `--llama_cpp_seed SEED`             | Seed for llama-cpp models. Default 0 (random). |
+  --image-attn-backend {flash_attention_2,sdpa}        Attention backend for image model.
-| `--cache-capacity CACHE_CAPACITY`   | Maximum cache capacity. Examples: 2000MiB, 2GiB. When provided without units, bytes will be assumed. |
+  --image-cpu-offload                                  Enable CPU offloading for image model.
-|`--cfg-cache`                        | llamacpp_HF: Create an additional cache for CFG negative prompts. |
+  --image-compile                                      Compile the image model for faster inference.
-| `--no-mmap`   | Prevent mmap from being used. |
+  --image-quant {none,bnb-8bit,bnb-4bit,torchao-int8wo,torchao-fp4,torchao-float8wo}
-| `--mlock`     | Force the system to keep the model in RAM. |
+                                                       Quantization method for image model.
 | `--numa`      | Activate NUMA task allocation for llama.cpp |
 | `--cpu`       | Use the CPU version of llama-cpp-python instead of the GPU-accelerated version. |
-#### ctransformers
+Model loader:
  --loader LOADER                                      Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav3, TensorRT-
                                                       LLM.
-| Flag        | Description |
+Context and cache:
-|-------------|-------------|
+  --ctx-size, --n_ctx, --max_seq_len N                 Context size in tokens. 0 = auto for llama.cpp (requires gpu-layers=-1), 8192 for other loaders.
-| `--model_type MODEL_TYPE` | Model type of pre-quantized model. Currently gpt2, gptj, gptneox, falcon, llama, mpt, starcoder (gptbigcode), dollyv2, and replit are supported. |
+  --cache-type, --cache_type N                         KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV3 - fp16, q2 to q8 (can specify k_bits and v_bits separately, e.g. q4_q8).
-#### AutoGPTQ
+Speculative decoding:
  --model-draft MODEL_DRAFT                            Path to the draft model for speculative decoding.
  --draft-max DRAFT_MAX                                Number of tokens to draft for speculative decoding.
  --gpu-layers-draft GPU_LAYERS_DRAFT                  Number of layers to offload to the GPU for the draft model.
  --device-draft DEVICE_DRAFT                          Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1
  --ctx-size-draft CTX_SIZE_DRAFT                      Size of the prompt context for the draft model. If 0, uses the same as the main model.
  --spec-type {none,ngram-mod,ngram-simple,ngram-map-k,ngram-map-k4v,ngram-cache}
                                                       Draftless speculative decoding type. Recommended: ngram-mod.
  --spec-ngram-size-n SPEC_NGRAM_SIZE_N                N-gram lookup size for ngram speculative decoding.
  --spec-ngram-size-m SPEC_NGRAM_SIZE_M                Draft n-gram size for ngram speculative decoding.
  --spec-ngram-min-hits SPEC_NGRAM_MIN_HITS            Minimum n-gram hits for ngram-map speculative decoding.
-| Flag             | Description |
+llama.cpp:
-|------------------|-------------|
+  --gpu-layers, --n-gpu-layers N                       Number of layers to offload to the GPU. -1 = auto.
-| `--triton`                     | Use triton. |
+  --cpu-moe                                            Move the experts to the CPU (for MoE models).
-| `--no_inject_fused_attention`  | Disable the use of fused attention, which will use less VRAM at the cost of slower inference. |
+  --mmproj MMPROJ                                      Path to the mmproj file for vision models.
-| `--no_inject_fused_mlp`        | Triton mode only: disable the use of fused MLP, which will use less VRAM at the cost of slower inference. |
+  --streaming-llm                                      Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.
-| `--no_use_cuda_fp16`           | This can make models faster on some systems. |
+  --tensor-split TENSOR_SPLIT                          Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.
-| `--desc_act`                   | For models that don't have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig. |
+  --row-split                                          Split the model by rows across GPUs. This may improve multi-gpu performance.
-| `--disable_exllama`            | Disable ExLlama kernel, which can improve inference speed on some systems. |
+  --no-mmap                                            Prevent mmap from being used.
  --mlock                                              Force the system to keep the model in RAM.
  --no-kv-offload                                      Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.
  --batch-size BATCH_SIZE                              Maximum number of prompt tokens to batch together when calling llama-server. This is the application level batch size.
  --ubatch-size UBATCH_SIZE                            Maximum number of prompt tokens to batch together when calling llama-server. This is the max physical batch size for computation (device level).
  --threads THREADS                                    Number of threads to use.
  --threads-batch THREADS_BATCH                        Number of threads to use for batches/prompt processing.
  --numa                                               Activate NUMA task allocation for llama.cpp.
  --parallel PARALLEL                                  Number of parallel request slots. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set
                                                       ctx_size to 32768.
  --fit-target FIT_TARGET                              Target VRAM margin per device for auto GPU layers, comma-separated list of values in MiB. A single value is broadcast across all devices.
                                                       Default: 1024.
  --extra-flags EXTRA_FLAGS                            Extra flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"
-#### ExLlama
+Transformers/Accelerate:
  --cpu                                                Use the CPU to generate text. Warning: Training on CPU is extremely slow.
  --cpu-memory CPU_MEMORY                              Maximum CPU memory in GiB. Use this for CPU offloading.
  --disk                                               If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.
  --disk-cache-dir DISK_CACHE_DIR                      Directory to save the disk cache to.
  --load-in-8bit                                       Load the model with 8-bit precision (using bitsandbytes).
  --bf16                                               Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.
  --no-cache                                           Set use_cache to False while generating text. This reduces VRAM usage slightly, but it comes at a performance cost.
  --trust-remote-code                                  Set trust_remote_code=True while loading the model. Necessary for some models.
  --force-safetensors                                  Set use_safetensors=True while loading the model. This prevents arbitrary code execution.
  --no_use_fast                                        Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast.
  --attn-implementation IMPLEMENTATION                 Attention implementation. Valid options: sdpa, eager, flash_attention_2.
-| Flag             | Description |
+bitsandbytes 4-bit:
-|------------------|-------------|
+  --load-in-4bit                                       Load the model with 4-bit precision (using bitsandbytes).
-|`--gpu-split`     | Comma-separated list of VRAM (in GB) to use per GPU device for model layers, e.g. `20,7,7` |
+  --use_double_quant                                   use_double_quant for 4-bit.
-|`--max_seq_len MAX_SEQ_LEN`           | Maximum sequence length. |
+  --compute_dtype COMPUTE_DTYPE                        compute dtype for 4-bit. Valid options: bfloat16, float16, float32.
-|`--cfg-cache`                         | ExLlama_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader, but not necessary for CFG with base ExLlama. |
+  --quant_type QUANT_TYPE                              quant_type for 4-bit. Valid options: nf4, fp4.
-#### GPTQ-for-LLaMa
+ExLlamaV3:
  --gpu-split GPU_SPLIT                                Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.
  --enable-tp, --enable_tp                             Enable Tensor Parallelism (TP) to split the model across GPUs.
  --tp-backend TP_BACKEND                              The backend for tensor parallelism. Valid options: native, nccl. Default: native.
  --cfg-cache                                          Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.
-| Flag                      | Description |
+Gradio:
-|---------------------------|-------------|
+  --listen                                             Make the web UI reachable from your local network.
-| `--wbits WBITS`           | Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. |
+  --listen-port LISTEN_PORT                            The listening port that the server will use.
-| `--model_type MODEL_TYPE` | Model type of pre-quantized model. Currently LLaMA, OPT, and GPT-J are supported. |
+  --listen-host LISTEN_HOST                            The hostname that the server will use.
-| `--groupsize GROUPSIZE`   | Group size. |
+  --share                                              Create a public URL. This is useful for running the web UI on Google Colab or similar.
-| `--pre_layer PRE_LAYER [PRE_LAYER ...]`  | The number of layers to allocate to the GPU. Setting this parameter enables CPU offloading for 4-bit models. For multi-gpu, write the numbers separated by spaces, eg `--pre_layer 30 60`. |
+  --auto-launch                                        Open the web UI in the default browser upon launch.
-| `--checkpoint CHECKPOINT` | The path to the quantized checkpoint file. If not specified, it will be automatically detected. |
+  --gradio-auth GRADIO_AUTH                            Set Gradio authentication password in the format "username:password". Multiple credentials can also be supplied with "u1:p1,u2:p2,u3:p3".
-| `--monkey-patch`          | Apply the monkey patch for using LoRAs with quantized models.
+  --gradio-auth-path GRADIO_AUTH_PATH                  Set the Gradio authentication file path. The file should contain one or more user:password pairs in the same format as above.
  --ssl-keyfile SSL_KEYFILE                            The path to the SSL certificate key file.
  --ssl-certfile SSL_CERTFILE                          The path to the SSL certificate cert file.
  --subpath SUBPATH                                    Customize the subpath for gradio, use with reverse proxy
  --old-colors                                         Use the legacy Gradio colors, before the December/2024 update.
  --portable                                           Hide features not available in portable mode like training.
-#### DeepSpeed
+API:
  --api                                                Enable the API extension.
  --public-api                                         Create a public URL for the API using Cloudflare.
  --public-api-id PUBLIC_API_ID                        Tunnel ID for named Cloudflare Tunnel. Use together with public-api option.
  --api-port API_PORT                                  The listening port for the API.
  --api-key API_KEY                                    API authentication key.
  --admin-key ADMIN_KEY                                API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key.
  --api-enable-ipv6                                    Enable IPv6 for the API
  --api-disable-ipv4                                   Disable IPv4 for the API
  --nowebui                                            Do not launch the Gradio UI. Useful for launching the API in standalone mode.
-| Flag                                  | Description |
+API generation defaults:
-|---------------------------------------|-------------|
+  --temperature N                                      Temperature
-| `--deepspeed`                         | Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. |
+  --dynatemp-low N                                     Dynamic temperature low
-| `--nvme-offload-dir NVME_OFFLOAD_DIR` | DeepSpeed: Directory to use for ZeRO-3 NVME offloading. |
+  --dynatemp-high N                                    Dynamic temperature high
-| `--local_rank LOCAL_RANK`             | DeepSpeed: Optional argument for distributed setups. |
+  --dynatemp-exponent N                                Dynamic temperature exponent
  --smoothing-factor N                                 Smoothing factor
  --smoothing-curve N                                  Smoothing curve
  --min-p N                                            Min P
  --top-p N                                            Top P
  --top-k N                                            Top K
  --typical-p N                                        Typical P
  --xtc-threshold N                                    XTC threshold
  --xtc-probability N                                  XTC probability
  --epsilon-cutoff N                                   Epsilon cutoff
  --eta-cutoff N                                       Eta cutoff
  --tfs N                                              TFS
  --top-a N                                            Top A
  --top-n-sigma N                                      Top N Sigma
  --adaptive-target N                                  Adaptive target
  --adaptive-decay N                                   Adaptive decay
  --dry-multiplier N                                   DRY multiplier
  --dry-allowed-length N                               DRY allowed length
  --dry-base N                                         DRY base
  --repetition-penalty N                               Repetition penalty
  --frequency-penalty N                                Frequency penalty
  --presence-penalty N                                 Presence penalty
  --encoder-repetition-penalty N                       Encoder repetition penalty
  --no-repeat-ngram-size N                             No repeat ngram size
  --repetition-penalty-range N                         Repetition penalty range
  --penalty-alpha N                                    Penalty alpha
  --guidance-scale N                                   Guidance scale
  --mirostat-mode N                                    Mirostat mode
  --mirostat-tau N                                     Mirostat tau
  --mirostat-eta N                                     Mirostat eta
  --do-sample, --no-do-sample                          Do sample
  --dynamic-temperature, --no-dynamic-temperature      Dynamic temperature
  --temperature-last, --no-temperature-last            Temperature last
  --sampler-priority N                                 Sampler priority
  --dry-sequence-breakers N                            DRY sequence breakers
  --enable-thinking, --no-enable-thinking              Enable thinking
  --reasoning-effort N                                 Reasoning effort
  --chat-template-file CHAT_TEMPLATE_FILE              Path to a chat template file (.jinja, .jinja2, or .yaml) to use as the default instruction template for API requests. Overrides the model's
                                                       built-in template.
 ```
-#### RWKV
+</details>
-| Flag                            | Description |
+## Downloading models
 |---------------------------------|-------------|
 | `--rwkv-strategy RWKV_STRATEGY` | RWKV: The strategy to use while loading the model. Examples: "cpu fp32", "cuda fp16", "cuda fp16i8". |
 | `--rwkv-cuda-on`                | RWKV: Compile the CUDA kernel for better performance. |
-#### RoPE (for llama.cpp, ExLlama, ExLlamaV2, and transformers)
+1. Download a GGUF model file from [Hugging Face](https://huggingface.co/models?pipeline_tag=text-generation&sort=downloads&search=gguf).
 2. Place it in the `user_data/models` folder.
-| Flag             | Description |
+That's it. The UI will detect it automatically.
 |------------------|-------------|
 | `--alpha_value ALPHA_VALUE`           | Positional embeddings alpha factor for NTK RoPE scaling. Use either this or compress_pos_emb, not both. |
 | `--rope_freq_base ROPE_FREQ_BASE`     | If greater than 0, will be used instead of alpha_value. Those two are related by rope_freq_base = 10000 * alpha_value ^ (64 / 63). |
 | `--compress_pos_emb COMPRESS_POS_EMB` | Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale. |
-#### Gradio
+To check what will fit your GPU, you can use the [VRAM Calculator](https://huggingface.co/spaces/oobabooga/accurate-gguf-vram-calculator).
-| Flag                                  | Description |
+<details>
-|---------------------------------------|-------------|
+<summary>Other model types (Transformers, EXL3)</summary>
 | `--listen`                            | Make the web UI reachable from your local network. |
 | `--listen-host LISTEN_HOST`           | The hostname that the server will use. |
 | `--listen-port LISTEN_PORT`           | The listening port that the server will use. |
 | `--share`                             | Create a public URL. This is useful for running the web UI on Google Colab or similar. |
 | `--auto-launch`                       | Open the web UI in the default browser upon launch. |
 | `--gradio-auth USER:PWD`              | set gradio authentication like "username:password"; or comma-delimit multiple like "u1:p1,u2:p2,u3:p3" |
 | `--gradio-auth-path GRADIO_AUTH_PATH` | Set the gradio authentication file path. The file should contain one or more user:password pairs in this format: "u1:p1,u2:p2,u3:p3" |
 | `--ssl-keyfile SSL_KEYFILE`           | The path to the SSL certificate key file. |
 | `--ssl-certfile SSL_CERTFILE`         | The path to the SSL certificate cert file. |
-#### API
+Models that consist of multiple files (like 16-bit Transformers models and EXL3 models) should be placed in a subfolder inside `user_data/models`:
-| Flag                                  | Description |
+```
-|---------------------------------------|-------------|
+text-generation-webui
-| `--api`                               | Enable the API extension. |
+└── user_data
-| `--public-api`                        | Create a public URL for the API using Cloudfare. |
+    └── models
-| `--public-api-id PUBLIC_API_ID`       | Tunnel ID for named Cloudflare Tunnel. Use together with public-api option. |
+        └── Qwen_Qwen3-8B
-| `--api-blocking-port BLOCKING_PORT`   | The listening port for the blocking API. |
+            ├── config.json
-| `--api-streaming-port STREAMING_PORT` | The listening port for the streaming API. |
+            ├── generation_config.json
            ├── model-00001-of-00004.safetensors
            ├── ...
            ├── tokenizer_config.json
            └── tokenizer.json
 ```
-#### Multimodal
+These formats require the one-click installer (not the portable build).
 </details>
-| Flag                                  | Description |
+## Documentation
 |---------------------------------------|-------------|
 | `--multimodal-pipeline PIPELINE`      | The multimodal pipeline to use. Examples: `llava-7b`, `llava-13b`. |
-## Presets
+https://github.com/oobabooga/text-generation-webui/wiki
 Inference settings presets can be created under `presets/` as yaml files. These files are detected automatically at startup.
 The presets that are included by default are the result of a contest that received 7215 votes. More details can be found [here](https://github.com/oobabooga/oobabooga.github.io/blob/main/arena/results.md).
 ## Contributing
 If you would like to contribute to the project, check out the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines).
 ## Community
-* Subreddit: https://www.reddit.com/r/oobabooga/
+https://www.reddit.com/r/Oobabooga/
 * Discord: https://discord.gg/jwZCF2dPQN
-## Acknowledgment
+## Acknowledgments
-In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition, which will allow me to dedicate more time towards realizing the full potential of text-generation-webui.
+- In August 2023, [Andreessen Horowitz](https://a16z.com/) (a16z) provided a generous grant to encourage and support my independent work on this project. I am **extremely** grateful for their trust and recognition.
 - This project was inspired by [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) and wouldn't exist without it.
--- a/api-examples/api-example-chat-stream.py
+++ b/api-examples/api-example-chat-stream.py
@ -1,112 +0,0 @@
 import asyncio
 import html
 import json
 import sys
 try:
    import websockets
 except ImportError:
    print("Websockets package not found. Make sure it's installed.")
 # For local streaming, the websockets are hosted without ssl - ws://
 HOST = 'localhost:5005'
 URI = f'ws://{HOST}/api/v1/chat-stream'
 # For reverse-proxied streaming, the remote will likely host with ssl - wss://
 # URI = 'wss://your-uri-here.trycloudflare.com/api/v1/stream'
 async def run(user_input, history):
    # Note: the selected defaults change from time to time.
    request = {
        'user_input': user_input,
        'max_new_tokens': 250,
        'auto_max_new_tokens': False,
        'max_tokens_second': 0,
        'history': history,
        'mode': 'instruct',  # Valid options: 'chat', 'chat-instruct', 'instruct'
        'character': 'Example',
        'instruction_template': 'Vicuna-v1.1',  # Will get autodetected if unset
        'your_name': 'You',
        # 'name1': 'name of user', # Optional
        # 'name2': 'name of character', # Optional
        # 'context': 'character context', # Optional
        # 'greeting': 'greeting', # Optional
        # 'name1_instruct': 'You', # Optional
        # 'name2_instruct': 'Assistant', # Optional
        # 'context_instruct': 'context_instruct', # Optional
        # 'turn_template': 'turn_template', # Optional
        'regenerate': False,
        '_continue': False,
        'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
        # Generation params. If 'preset' is set to different than 'None', the values
        # in presets/preset-name.yaml are used instead of the individual numbers.
        'preset': 'None',
        'do_sample': True,
        'temperature': 0.7,
        'top_p': 0.1,
        'typical_p': 1,
        'epsilon_cutoff': 0,  # In units of 1e-4
        'eta_cutoff': 0,  # In units of 1e-4
        'tfs': 1,
        'top_a': 0,
        'repetition_penalty': 1.18,
        'repetition_penalty_range': 0,
        'top_k': 40,
        'min_length': 0,
        'no_repeat_ngram_size': 0,
        'num_beams': 1,
        'penalty_alpha': 0,
        'length_penalty': 1,
        'early_stopping': False,
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,
        'grammar_string': '',
        'guidance_scale': 1,
        'negative_prompt': '',
        'seed': -1,
        'add_bos_token': True,
        'truncation_length': 2048,
        'ban_eos_token': False,
        'custom_token_bans': '',
        'skip_special_tokens': True,
        'stopping_strings': []
    }
    async with websockets.connect(URI, ping_interval=None) as websocket:
        await websocket.send(json.dumps(request))
        while True:
            incoming_data = await websocket.recv()
            incoming_data = json.loads(incoming_data)
            match incoming_data['event']:
                case 'text_stream':
                    yield incoming_data['history']
                case 'stream_end':
                    return
 async def print_response_stream(user_input, history):
    cur_len = 0
    async for new_history in run(user_input, history):
        cur_message = new_history['visible'][-1][1][cur_len:]
        cur_len += len(cur_message)
        print(html.unescape(cur_message), end='')
        sys.stdout.flush()  # If we don't flush, we won't see tokens in realtime.
 if __name__ == '__main__':
    user_input = "Please give me a step-by-step guide on how to plant a tree in my backyard."
    # Basic example
    history = {'internal': [], 'visible': []}
    # "Continue" example. Make sure to set '_continue' to True above
    # arr = [user_input, 'Surely, here is']
    # history = {'internal': [arr], 'visible': [arr]}
    asyncio.run(print_response_stream(user_input, history))
--- a/api-examples/api-example-chat.py
+++ b/api-examples/api-example-chat.py
@ -1,92 +0,0 @@
 import html
 import json
 import requests
 # For local streaming, the websockets are hosted without ssl - http://
 HOST = 'localhost:5000'
 URI = f'http://{HOST}/api/v1/chat'
 # For reverse-proxied streaming, the remote will likely host with ssl - https://
 # URI = 'https://your-uri-here.trycloudflare.com/api/v1/chat'
 def run(user_input, history):
    request = {
        'user_input': user_input,
        'max_new_tokens': 250,
        'auto_max_new_tokens': False,
        'max_tokens_second': 0,
        'history': history,
        'mode': 'instruct',  # Valid options: 'chat', 'chat-instruct', 'instruct'
        'character': 'Example',
        'instruction_template': 'Vicuna-v1.1',  # Will get autodetected if unset
        'your_name': 'You',
        # 'name1': 'name of user', # Optional
        # 'name2': 'name of character', # Optional
        # 'context': 'character context', # Optional
        # 'greeting': 'greeting', # Optional
        # 'name1_instruct': 'You', # Optional
        # 'name2_instruct': 'Assistant', # Optional
        # 'context_instruct': 'context_instruct', # Optional
        # 'turn_template': 'turn_template', # Optional
        'regenerate': False,
        '_continue': False,
        'chat_instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
        # Generation params. If 'preset' is set to different than 'None', the values
        # in presets/preset-name.yaml are used instead of the individual numbers.
        'preset': 'None',
        'do_sample': True,
        'temperature': 0.7,
        'top_p': 0.1,
        'typical_p': 1,
        'epsilon_cutoff': 0,  # In units of 1e-4
        'eta_cutoff': 0,  # In units of 1e-4
        'tfs': 1,
        'top_a': 0,
        'repetition_penalty': 1.18,
        'repetition_penalty_range': 0,
        'top_k': 40,
        'min_length': 0,
        'no_repeat_ngram_size': 0,
        'num_beams': 1,
        'penalty_alpha': 0,
        'length_penalty': 1,
        'early_stopping': False,
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,
        'grammar_string': '',
        'guidance_scale': 1,
        'negative_prompt': '',
        'seed': -1,
        'add_bos_token': True,
        'truncation_length': 2048,
        'ban_eos_token': False,
        'custom_token_bans': '',
        'skip_special_tokens': True,
        'stopping_strings': []
    }
    response = requests.post(URI, json=request)
    if response.status_code == 200:
        result = response.json()['results'][0]['history']
        print(json.dumps(result, indent=4))
        print()
        print(html.unescape(result['visible'][-1][1]))
 if __name__ == '__main__':
    user_input = "Please give me a step-by-step guide on how to plant a tree in my backyard."
    # Basic example
    history = {'internal': [], 'visible': []}
    # "Continue" example. Make sure to set '_continue' to True above
    # arr = [user_input, 'Surely, here is']
    # history = {'internal': [arr], 'visible': [arr]}
    run(user_input, history)
--- a/api-examples/api-example-model.py
+++ b/api-examples/api-example-model.py
@ -1,176 +0,0 @@
 #!/usr/bin/env python3
 import requests
 HOST = '0.0.0.0:5000'
 def generate(prompt, tokens=200):
    request = {'prompt': prompt, 'max_new_tokens': tokens}
    response = requests.post(f'http://{HOST}/api/v1/generate', json=request)
    if response.status_code == 200:
        return response.json()['results'][0]['text']
 def model_api(request):
    response = requests.post(f'http://{HOST}/api/v1/model', json=request)
    return response.json()
 # print some common settings
 def print_basic_model_info(response):
    basic_settings = ['truncation_length', 'instruction_template']
    print("Model: ", response['result']['model_name'])
    print("Lora(s): ", response['result']['lora_names'])
    for setting in basic_settings:
        print(setting, "=", response['result']['shared.settings'][setting])
 # model info
 def model_info():
    response = model_api({'action': 'info'})
    print_basic_model_info(response)
 # simple loader
 def model_load(model_name):
    return model_api({'action': 'load', 'model_name': model_name})
 # complex loader
 def complex_model_load(model):
    def guess_groupsize(model_name):
        if '1024g' in model_name:
            return 1024
        elif '128g' in model_name:
            return 128
        elif '32g' in model_name:
            return 32
        else:
            return -1
    req = {
        'action': 'load',
        'model_name': model,
        'args': {
            'loader': 'AutoGPTQ',
            'bf16': False,
            'load_in_8bit': False,
            'groupsize': 0,
            'wbits': 0,
            # llama.cpp
            'threads': 0,
            'n_batch': 512,
            'no_mmap': False,
            'mlock': False,
            'cache_capacity': None,
            'n_gpu_layers': 0,
            'n_ctx': 2048,
            # RWKV
            'rwkv_strategy': None,
            'rwkv_cuda_on': False,
            # b&b 4-bit
            # 'load_in_4bit': False,
            # 'compute_dtype': 'float16',
            # 'quant_type': 'nf4',
            # 'use_double_quant': False,
            # "cpu": false,
            # "auto_devices": false,
            # "gpu_memory": null,
            # "cpu_memory": null,
            # "disk": false,
            # "disk_cache_dir": "cache",
        },
    }
    model = model.lower()
    if '4bit' in model or 'gptq' in model or 'int4' in model:
        req['args']['wbits'] = 4
        req['args']['groupsize'] = guess_groupsize(model)
    elif '3bit' in model:
        req['args']['wbits'] = 3
        req['args']['groupsize'] = guess_groupsize(model)
    else:
        req['args']['gptq_for_llama'] = False
    if '8bit' in model:
        req['args']['load_in_8bit'] = True
    elif '-hf' in model or 'fp16' in model:
        if '7b' in model:
            req['args']['bf16'] = True  # for 24GB
        elif '13b' in model:
            req['args']['load_in_8bit'] = True  # for 24GB
    elif 'gguf' in model:
        # req['args']['threads'] = 16
        if '7b' in model:
            req['args']['n_gpu_layers'] = 100
        elif '13b' in model:
            req['args']['n_gpu_layers'] = 100
        elif '30b' in model or '33b' in model:
            req['args']['n_gpu_layers'] = 59  # 24GB
        elif '65b' in model:
            req['args']['n_gpu_layers'] = 42  # 24GB
    elif 'rwkv' in model:
        req['args']['rwkv_cuda_on'] = True
        if '14b' in model:
            req['args']['rwkv_strategy'] = 'cuda f16i8'  # 24GB
        else:
            req['args']['rwkv_strategy'] = 'cuda f16'  # 24GB
    return model_api(req)
 if __name__ == '__main__':
    for model in model_api({'action': 'list'})['result']:
        try:
            resp = complex_model_load(model)
            if 'error' in resp:
                print(f"❌ {model} FAIL Error: {resp['error']['message']}")
                continue
            else:
                print_basic_model_info(resp)
            ans = generate("0,1,1,2,3,5,8,13,", tokens=2)
            if '21' in ans:
                print(f"✅ {model} PASS ({ans})")
            else:
                print(f"❌ {model} FAIL ({ans})")
        except Exception as e:
            print(f"❌ {model} FAIL Exception: {repr(e)}")
 # 0,1,1,2,3,5,8,13, is the fibonacci sequence, the next number is 21.
 # Some results below.
 """ $ ./model-api-example.py
 Model:  4bit_gpt4-x-alpaca-13b-native-4bit-128g-cuda
 Lora(s):  []
 truncation_length = 2048
 instruction_template = Alpaca
 ✅ 4bit_gpt4-x-alpaca-13b-native-4bit-128g-cuda PASS (21)
 Model:  4bit_WizardLM-13B-Uncensored-4bit-128g
 Lora(s):  []
 truncation_length = 2048
 instruction_template = WizardLM
 ✅ 4bit_WizardLM-13B-Uncensored-4bit-128g PASS (21)
 Model:  Aeala_VicUnlocked-alpaca-30b-4bit
 Lora(s):  []
 truncation_length = 2048
 instruction_template = Alpaca
 ✅ Aeala_VicUnlocked-alpaca-30b-4bit PASS (21)
 Model:  alpaca-30b-4bit
 Lora(s):  []
 truncation_length = 2048
 instruction_template = Alpaca
 ✅ alpaca-30b-4bit PASS (21)
 """
--- a/api-examples/api-example-stream.py
+++ b/api-examples/api-example-stream.py
@ -1,86 +0,0 @@
 import asyncio
 import json
 import sys
 try:
    import websockets
 except ImportError:
    print("Websockets package not found. Make sure it's installed.")
 # For local streaming, the websockets are hosted without ssl - ws://
 HOST = 'localhost:5005'
 URI = f'ws://{HOST}/api/v1/stream'
 # For reverse-proxied streaming, the remote will likely host with ssl - wss://
 # URI = 'wss://your-uri-here.trycloudflare.com/api/v1/stream'
 async def run(context):
    # Note: the selected defaults change from time to time.
    request = {
        'prompt': context,
        'max_new_tokens': 250,
        'auto_max_new_tokens': False,
        'max_tokens_second': 0,
        # Generation params. If 'preset' is set to different than 'None', the values
        # in presets/preset-name.yaml are used instead of the individual numbers.
        'preset': 'None',
        'do_sample': True,
        'temperature': 0.7,
        'top_p': 0.1,
        'typical_p': 1,
        'epsilon_cutoff': 0,  # In units of 1e-4
        'eta_cutoff': 0,  # In units of 1e-4
        'tfs': 1,
        'top_a': 0,
        'repetition_penalty': 1.18,
        'repetition_penalty_range': 0,
        'top_k': 40,
        'min_length': 0,
        'no_repeat_ngram_size': 0,
        'num_beams': 1,
        'penalty_alpha': 0,
        'length_penalty': 1,
        'early_stopping': False,
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,
        'grammar_string': '',
        'guidance_scale': 1,
        'negative_prompt': '',
        'seed': -1,
        'add_bos_token': True,
        'truncation_length': 2048,
        'ban_eos_token': False,
        'custom_token_bans': '',
        'skip_special_tokens': True,
        'stopping_strings': []
    }
    async with websockets.connect(URI, ping_interval=None) as websocket:
        await websocket.send(json.dumps(request))
        yield context  # Remove this if you just want to see the reply
        while True:
            incoming_data = await websocket.recv()
            incoming_data = json.loads(incoming_data)
            match incoming_data['event']:
                case 'text_stream':
                    yield incoming_data['text']
                case 'stream_end':
                    return
 async def print_response_stream(prompt):
    async for response in run(prompt):
        print(response, end='')
        sys.stdout.flush()  # If we don't flush, we won't see tokens in realtime.
 if __name__ == '__main__':
    prompt = "In order to make homemade bread, follow these steps:\n1)"
    asyncio.run(print_response_stream(prompt))
--- a/api-examples/api-example.py
+++ b/api-examples/api-example.py
@ -1,63 +0,0 @@
 import requests
 # For local streaming, the websockets are hosted without ssl - http://
 HOST = 'localhost:5000'
 URI = f'http://{HOST}/api/v1/generate'
 # For reverse-proxied streaming, the remote will likely host with ssl - https://
 # URI = 'https://your-uri-here.trycloudflare.com/api/v1/generate'
 def run(prompt):
    request = {
        'prompt': prompt,
        'max_new_tokens': 250,
        'auto_max_new_tokens': False,
        'max_tokens_second': 0,
        # Generation params. If 'preset' is set to different than 'None', the values
        # in presets/preset-name.yaml are used instead of the individual numbers.
        'preset': 'None',
        'do_sample': True,
        'temperature': 0.7,
        'top_p': 0.1,
        'typical_p': 1,
        'epsilon_cutoff': 0,  # In units of 1e-4
        'eta_cutoff': 0,  # In units of 1e-4
        'tfs': 1,
        'top_a': 0,
        'repetition_penalty': 1.18,
        'repetition_penalty_range': 0,
        'top_k': 40,
        'min_length': 0,
        'no_repeat_ngram_size': 0,
        'num_beams': 1,
        'penalty_alpha': 0,
        'length_penalty': 1,
        'early_stopping': False,
        'mirostat_mode': 0,
        'mirostat_tau': 5,
        'mirostat_eta': 0.1,
        'grammar_string': '',
        'guidance_scale': 1,
        'negative_prompt': '',
        'seed': -1,
        'add_bos_token': True,
        'truncation_length': 2048,
        'ban_eos_token': False,
        'custom_token_bans': '',
        'skip_special_tokens': True,
        'stopping_strings': []
    }
    response = requests.post(URI, json=request)
    if response.status_code == 200:
        result = response.json()['results'][0]['text']
        print(prompt + result)
 if __name__ == '__main__':
    prompt = "In order to make homemade bread, follow these steps:\n1)"
    run(prompt)
--- a/cmd_linux.sh
+++ b/cmd_linux.sh
@ -1,8 +1,8 @@
-#!/bin/bash
+#!/usr/bin/env bash
 cd "$(dirname "${BASH_SOURCE[0]}")"
-if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi
 # deactivate existing conda envs as needed to avoid conflicts
 { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
--- a/cmd_macos.sh
+++ b/cmd_macos.sh
@ -2,7 +2,7 @@
 cd "$(dirname "${BASH_SOURCE[0]}")"
-if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi
 # deactivate existing conda envs as needed to avoid conflicts
 { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
--- a/cmd_windows.bat
+++ b/cmd_windows.bat
@ -4,7 +4,7 @@ cd /D "%~dp0"
 set PATH=%PATH%;%SystemRoot%\system32
-echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
+echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniforge which can not be silently installed under a path with spaces. && goto end
@rem fix failed install when installing to a separate drive
 set TMP=%cd%\installer_files
@ -21,11 +21,12 @@ set INSTALL_ENV_DIR=%cd%\installer_files\env
 set PYTHONNOUSERSITE=1
 set PYTHONPATH=
 set PYTHONHOME=
 set PYTHONUTF8=1
 set "CUDA_PATH=%INSTALL_ENV_DIR%"
 set "CUDA_HOME=%CUDA_PATH%"
@rem activate installer env
-call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
+call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniforge hook not found. && goto end )
@rem enter commands
 cmd /k "%*"
--- a/cmd_wsl.bat
+++ b/cmd_wsl.bat
@ -1,11 +0,0 @@
@echo off
 cd /D "%~dp0"
 set PATH=%PATH%;%SystemRoot%\system32
@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script
 call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh cmd"
 :end
 pause
--- a/convert-to-safetensors.py
+++ b/convert-to-safetensors.py
@ -1,38 +0,0 @@
 '''
 Converts a transformers model to safetensors format and shards it.
 This makes it faster to load (because of safetensors) and lowers its RAM usage
 while loading (because of sharding).
 Based on the original script by 81300:
 https://gist.github.com/81300/fe5b08bff1cba45296a829b9d6b0f303
 '''
 import argparse
 from pathlib import Path
 import torch
 from transformers import AutoModelForCausalLM, AutoTokenizer
 parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=54))
 parser.add_argument('MODEL', type=str, default=None, nargs='?', help="Path to the input model.")
 parser.add_argument('--output', type=str, default=None, help='Path to the output folder (default: models/{model_name}_safetensors).')
 parser.add_argument("--max-shard-size", type=str, default="2GB", help="Maximum size of a shard in GB or MB (default: %(default)s).")
 parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
 args = parser.parse_args()
 if __name__ == '__main__':
    path = Path(args.MODEL)
    model_name = path.name
    print(f"Loading {model_name}...")
    model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if args.bf16 else torch.float16)
    tokenizer = AutoTokenizer.from_pretrained(path)
    out_folder = args.output or Path(f"models/{model_name}_safetensors")
    print(f"Saving the converted model to {out_folder} with a maximum shard size of {args.max_shard_size}...")
    model.save_pretrained(out_folder, max_shard_size=args.max_shard_size, safe_serialization=True)
    tokenizer.save_pretrained(out_folder)
--- a/css/Inter/Inter-Italic-VariableFont_opsz,wght.ttf
+++ b/css/Inter/Inter-Italic-VariableFont_opsz,wght.ttf
--- a/css/Inter/Inter-VariableFont_opsz,wght.ttf
+++ b/css/Inter/Inter-VariableFont_opsz,wght.ttf
--- a/css/chat_style-Dark.css
+++ b/css/chat_style-Dark.css
@ -0,0 +1,129 @@
 .message {
    display: grid;
    align-items: start;
    grid-template-columns: 60px minmax(0, 1fr);
    width: min(100%, calc(724px + 60px));
    padding-bottom: 22px;
    padding-top: 6px;
    font-size: 18px;
    font-family: Roboto, Arial, sans-serif; /* Modern font */
    line-height: 1.5;
 }
 .circle-you,
 .circle-bot {
    background-color: #2b2b2b; /* Darker background for circles */
    border-radius: 50%; /* Perfect circle */
    border: 1px solid #4a90e2; /* Soft blue border */
    box-shadow: 0 4px 8px rgb(0 0 0 / 50%); /* Soft shadow for depth */
 }
 .circle-bot img,
 .circle-you img {
    border-radius: 50%; /* Make images circular */
    width: 100%;
    height: 100%;
    object-fit: cover;
 }
 .circle-you, .circle-bot {
    width: 64px; /* Smaller size for modern look */
    height: 64px;
 }
 .text {
    padding-left: 12px; /* Reduced padding for a cleaner layout */
    color: #f0f0f0; /* Light text color for readability */
 }
 .text p {
    margin-top: 2px;
 }
 .username {
    padding-left: 10px;
    font-size: 20px;
    font-weight: bold;
    color: #e0e0e0; /* Light gray text */
    transition: color 0.3s ease; /* Smooth color transition */
 }
 .username:hover {
    color: #4a90e2; /* Blue color on hover */
 }
 .message-body {
    position: relative;
    border: 1px solid rgb(255 255 255 / 10%); /* Soft white border */
    border-radius: 8px; /* Slightly rounded corners */
    padding: 15px;
    background: #1e1e1e; /* Dark background */
    box-shadow: 0 4px 10px rgb(0 0 0 / 30%); /* Subtle shadow for depth */
    transition: background 0.3s ease; /* Smooth transition for background */
 }
 .message-body:hover {
    background: #252525; /* Slightly lighter on hover */
 }
 /* Adds 2 extra lines at the top and bottom of the message */
 .message-body::before,
 .message-body::after {
    content: "";
    position: absolute;
    left: 10px;
    right: 10px;
    height: 1px;
    background-color: rgb(255 255 255 / 5%); /* Faded lines for subtle separation */
 }
 .message-body::before {
    top: 4px;
 }
 .message-body::after {
    bottom: 4px;
 }
 .message-body img {
    max-width: 300px;
    max-height: 300px;
    border-radius: 10px; /* Rounded corners for images */
 }
 .message-body p {
    color: #e0e0e0 !important; /* Light color for text */
 }
 .message-body p em {
    color: #a6a6a6 !important; /* Softer gray for emphasized text */
 }
@media screen and (width <= 688px) {
    .message {
        display: grid;
        align-items: start;
        grid-template-columns: 60px minmax(0, 1fr);
        padding-bottom: 25px;
        font-size: 15px;
        font-family: Roboto, Arial, sans-serif; /* Modern font */
        line-height: 1.5;
    }
    .circle-you, .circle-bot {
        width: 40px; /* Smaller size for mobile */
        height: 40px;
    }
    .text {
        padding-left: 10px; /* Reduced padding for mobile */
    }
    .message-body p {
        font-size: 14px !important;
    }
    .username {
        font-size: 18px; /* Smaller username for mobile */
    }
 }
--- a/css/chat_style-TheEncrypted777.css
+++ b/css/chat_style-TheEncrypted777.css
@ -2,8 +2,11 @@
 .message {
    display: grid;
    align-items: start;
    grid-template-columns: 60px minmax(0, 1fr);
-    padding-bottom: 28px;
+    width: min(100%, calc(724px + 60px + 90px));
    padding-bottom: 21px;
    padding-top: 7px;
    font-size: 18px;
    font-family: 'Noto Sans', Arial, sans-serif;
    line-height: 1.428571429;
@ -25,15 +28,15 @@
 }
 .circle-you, .circle-bot {
-    /*You can set the size of the profile images here, but if you do, you have to also adjust the .text{padding-left: 90px} to a different number according to the width of the image which is right below here*/
+    /* You can set the size of the profile images here, but if you do, you have to also adjust the .text{padding-left: 90px} to a different number according to the width of the image which is right below here */
    width: 135px;
    height: 175px;
 }
 .text {
-    /*Change this to move the message box further left or right depending on the size of your profile pic*/
+    /* Change this to move the message box further left or right depending on the size of your profile pic */
    padding-left: 90px;
-    text-shadow: 2px 2px 2px rgb(0, 0, 0, 0.4);
+    text-shadow: 2px 2px 2px rgb(0 0 0 / 40%);
 }
 .text p {
@ -44,37 +47,37 @@
    padding-left: 10px;
    font-size: 22px;
    font-weight: bold;
-    border-top: 1px solid rgb(51, 64, 90);
+    border-top: 1px solid rgb(51 64 90);
    padding: 3px;
 }
 .message-body {
    position: relative;
-    border-radius: 1rem;
+    border: 1px solid rgb(255 255 255 / 45.9%);
    border: 1px solid rgba(255, 255, 255, 0.459);
    border-radius: 10px;
    padding: 10px;
    padding-top: 5px;
-    /*Message gradient background color - remove the line bellow if you don't want a background color or gradient*/
+
    /* Message gradient background color - remove the line bellow if you don't want a background color or gradient */
    background: linear-gradient(to bottom, #171730, #1b263f);
 }
-  /*Adds 2 extra lines at the top and bottom of the message*/
+/* Adds 2 extra lines at the top and bottom of the message */
-.message-body:before,
+.message-body::before,
-  .message-body:after {
+.message-body::after {
    content: "";
    position: absolute;
    left: 10px;
    right: 10px;
    height: 1px;
-    background-color: rgba(255, 255, 255, 0.13);
+    background-color: rgb(255 255 255 / 13%);
 }
-.message-body:before {
+.message-body::before {
    top: 6px;
 }
-.message-body:after {
+.message-body::after {
    bottom: 6px;
 }
@ -84,21 +87,21 @@
    border-radius: 20px;
 }
-.message-body p {
+.message-body p, .message-body li {
    margin-bottom: 0 !important;
    font-size: 18px !important;
-    line-height: 1.428571429 !important;
+    color: rgb(243 244 246) !important;
-    color: rgb(243, 244, 246) !important;
+    text-shadow: 2px 2px 2px rgb(0 0 0);
-    text-shadow: 2px 2px 2px rgb(0, 0, 0);
+    font-weight: 500;
 }
 .message-body p em {
-    color: rgb(138, 138, 138) !important;
+    color: rgb(138 138 138) !important;
 }
-@media screen and (max-width: 688px) {
+@media screen and (width <= 688px) {
    .message {
        display: grid;
        align-items: start;
        grid-template-columns: 60px minmax(0, 1fr);
        padding-bottom: 25px;
        font-size: 15px;
@ -120,10 +123,10 @@
    }
    .text {
-        padding-left: 0px;
+        padding-left: 0;
    }
-    .message-body p {
+    .message-body p, .message-body li {
        font-size: 16px !important;
    }
--- a/css/chat_style-cai-chat-square.css
+++ b/css/chat_style-cai-chat-square.css
@ -16,6 +16,8 @@
 }
 .message {
-  padding-bottom: 30px;
+    padding-bottom: 1.5em;
    padding-top: 0.5em;
    grid-template-columns: 70px minmax(0, 1fr);
    width: min(100%, calc(724px + 70px));
 }
--- a/css/chat_style-cai-chat.css
+++ b/css/chat_style-cai-chat.css
@ -1,23 +1,31 @@
 .message {
    display: grid;
    align-items: start;
    grid-template-columns: 60px minmax(0, 1fr);
-    padding-bottom: 25px;
+    width: min(100%, calc(724px + 60px));
    padding-bottom: 1.5em;
    padding-top: 0.5em;
    font-size: 15px;
    font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
-    line-height: 23px !important;
+    line-height: 22.5px !important;
 }
 .message-body {
    margin-top: 3px;
    font-size: 15px !important;
 }
 .circle-you {
    width: 50px;
    height: 50px;
-    background-color: rgb(238, 78, 59);
+    background-color: rgb(238 78 59);
    border-radius: 50%;
 }
 .circle-bot {
    width: 50px;
    height: 50px;
-    background-color: rgb(59, 78, 244);
+    background-color: rgb(59 78 244);
    border-radius: 50%;
 }
@ -29,10 +37,6 @@
    object-fit: cover;
 }
 .text p {
    margin-top: 5px;
 }
 .username {
    font-weight: bold;
 }
@ -43,17 +47,15 @@
    border-radius: 20px;
 }
-.message-body p {
+.message-body p, .message-body li {
-    margin-bottom: 0 !important;
+    font-weight: 500;
    font-size: 15px !important;
    line-height: 23px !important;
 }
 .dark .message-body p em {
-    color: rgb(138, 138, 138) !important;
+    color: rgb(138 138 138) !important;
 }
 .message-body p em {
-    color: rgb(110, 110, 110) !important;
+    color: rgb(110 110 110) !important;
    font-weight: 500;
 }
--- a/css/chat_style-messenger.css
+++ b/css/chat_style-messenger.css
@ -1,5 +1,7 @@
 .message {
-    padding-bottom: 25px;
+    width: min(100%, calc(724px + 60px));
    padding-bottom: 22px;
    padding-top: 3px;
    font-size: 15px;
    font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
    line-height: 1.428571429;
@ -8,14 +10,14 @@
 .circle-you {
    width: 50px;
    height: 50px;
-    background-color: rgb(238, 78, 59);
+    background-color: rgb(238 78 59);
    border-radius: 50%;
 }
 .circle-bot {
    width: 50px;
    height: 50px;
-    background-color: rgb(59, 78, 244);
+    background-color: rgb(59 78 244);
    border-radius: 50%;
    float: left;
    margin-right: 10px;
@ -47,7 +49,7 @@
 .circle-you + .text {
    float: right;
-    background-color: rgb(0, 132, 255);
+    background-color: rgb(0 132 255);
    margin-right: 10px;
 }
@ -59,8 +61,10 @@
    text-align: right;
 }
-.dark .circle-bot + .text div, .dark .circle-bot + .text * {
+.dark .circle-bot + .text div, .dark .circle-bot + .text *,
-    color: #000;
+.dark .chat .message .circle-bot + .text .message-body :is(h1, h2, h3, h4, h5, h6),
 .dark .chat .message .circle-bot + .text .message-body a {
    color: #000 !important;
 }
 .text {
@ -75,25 +79,29 @@
    font-weight: bold;
 }
 .message-body {
 }
 .message-body img {
    max-width: 300px;
    max-height: 300px;
    border-radius: 20px;
 }
-.message-body p {
+.message-body p, .message-body li {
    margin-bottom: 0 !important;
    font-size: 15px !important;
-    line-height: 1.428571429 !important;
+    font-weight: 500;
 }
 .dark .message-body p em {
-    color: rgb(138, 138, 138) !important;
+    color: rgb(138 138 138) !important;
 }
 .message-body p em {
-    color: rgb(110, 110, 110) !important;
+    color: rgb(110 110 110) !important;
 }
 .editing-textarea {
    width: max(30rem) !important;
 }
 .circle-you + .text .edit-control-button, .circle-you + .text .editing-textarea {
    color: #000 !important;
 }
--- a/css/chat_style-wpp.css
+++ b/css/chat_style-wpp.css
@ -1,55 +1,97 @@
 .message {
-    padding-bottom: 25px;
+    display: block;
    width: min(100%, 724px);
    padding-top: 0;
    padding-bottom: 21px;
    font-size: 15px;
    font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
    line-height: 1.428571429;
    grid-template-columns: none;
 }
-.text-you {
+.circle-you, .circle-bot {
    display: none;
 }
 .text {
    max-width: 65%;
    border-radius: 18px;
    padding: 12px 16px;
    margin-bottom: 8px;
    clear: both;
    box-shadow: 0 1px 2px rgb(0 0 0 / 10%);
 }
 .username {
    font-weight: 600;
    margin-bottom: 8px;
    opacity: 0.65;
    padding-left: 0;
 }
 /* User messages - right aligned, WhatsApp green */
 .circle-you + .text {
    background-color: #d9fdd3;
    border-radius: 15px;
    padding: 10px;
    padding-top: 5px;
    float: right;
    margin-left: auto;
    margin-right: 8px;
 }
-.text-bot {
+.circle-you + .text .username {
-    background-color: #f2f2f2;
+    display: none;
    border-radius: 15px;
    padding: 10px;
    padding-top: 5px;
 }
-.dark .text-you {
+/* Bot messages - left aligned, white */
-    background-color: #005c4b;
+.circle-bot + .text {
-    color: #111b21;
+    background-color: #fff;
    float: left;
    margin-right: auto;
    margin-left: 8px;
    border: 1px solid #e5e5e5;
 }
-.dark .text-bot {
+.circle-bot + .text .message-actions {
-    background-color: #1f2937;
+    bottom: -25px !important;
    color: #111b21;
 }
-.text-bot p, .text-you p {
+/* Dark theme colors */
-    margin-top: 5px;
+.dark .circle-you + .text {
    background-color: #144d37;
    color: #e4e6ea;
    box-shadow: 0 1px 2px rgb(0 0 0 / 30%);
 }
 .dark .circle-bot + .text {
    background-color: #202c33;
    color: #e4e6ea;
    border: 1px solid #3c4043;
    box-shadow: 0 1px 2px rgb(0 0 0 / 30%);
 }
 .dark .username {
    opacity: 0.7;
 }
 .message-body img {
    max-width: 300px;
    max-height: 300px;
-    border-radius: 20px;
+    border-radius: 12px;
 }
-.message-body p {
+.message-body p, .message-body li {
    margin-bottom: 0 !important;
    font-size: 15px !important;
    line-height: 1.428571429 !important;
 }
 .dark .message-body p em {
-    color: rgb(138, 138, 138) !important;
+    color: rgb(170 170 170) !important;
 }
 .message-body p em {
-    color: rgb(110, 110, 110) !important;
+    color: rgb(100 100 100) !important;
 }
 /* Message actions positioning */
 .message-actions {
    margin-top: 8px;
 }
--- a/css/highlightjs/github-dark.min.css
+++ b/css/highlightjs/github-dark.min.css
@ -0,0 +1,111 @@
 html body gradio-app .gradio-container pre code.hljs {
  display: block;
  overflow-x: auto;
  padding: 1em
 }
 html body gradio-app .gradio-container code.hljs {
  padding: 3px 5px
 }
 /*!
  Theme: GitHub Dark
  Description: Dark theme as seen on github.com
  Author: github.com
  Maintainer: @Hirse
  Updated: 2021-05-15
  Outdated base version: https://github.com/primer/github-syntax-dark
  Current colors taken from GitHub's CSS
 */
 html body gradio-app .gradio-container .hljs {
  color: #c9d1d9;
  background: #0d1117
 }
 html body gradio-app .gradio-container .hljs-doctag,
 html body gradio-app .gradio-container .hljs-keyword,
 html body gradio-app .gradio-container .hljs-meta .hljs-keyword,
 html body gradio-app .gradio-container .hljs-template-tag,
 html body gradio-app .gradio-container .hljs-template-variable,
 html body gradio-app .gradio-container .hljs-type,
 html body gradio-app .gradio-container .hljs-variable.language_ {
  color: #ff7b72
 }
 html body gradio-app .gradio-container .hljs-title,
 html body gradio-app .gradio-container .hljs-title.class_,
 html body gradio-app .gradio-container .hljs-title.class_.inherited__,
 html body gradio-app .gradio-container .hljs-title.function_ {
  color: #d2a8ff
 }
 html body gradio-app .gradio-container .hljs-attr,
 html body gradio-app .gradio-container .hljs-attribute,
 html body gradio-app .gradio-container .hljs-literal,
 html body gradio-app .gradio-container .hljs-meta,
 html body gradio-app .gradio-container .hljs-number,
 html body gradio-app .gradio-container .hljs-operator,
 html body gradio-app .gradio-container .hljs-selector-attr,
 html body gradio-app .gradio-container .hljs-selector-class,
 html body gradio-app .gradio-container .hljs-selector-id,
 html body gradio-app .gradio-container .hljs-variable {
  color: #79c0ff
 }
 html body gradio-app .gradio-container .hljs-meta .hljs-string,
 html body gradio-app .gradio-container .hljs-regexp,
 html body gradio-app .gradio-container .hljs-string {
  color: #a5d6ff
 }
 html body gradio-app .gradio-container .hljs-built_in,
 html body gradio-app .gradio-container .hljs-symbol {
  color: #ffa657
 }
 html body gradio-app .gradio-container .hljs-code,
 html body gradio-app .gradio-container .hljs-comment,
 html body gradio-app .gradio-container .hljs-formula {
  color: #8b949e
 }
 html body gradio-app .gradio-container .hljs-name,
 html body gradio-app .gradio-container .hljs-quote,
 html body gradio-app .gradio-container .hljs-selector-pseudo,
 html body gradio-app .gradio-container .hljs-selector-tag {
  color: #7ee787
 }
 html body gradio-app .gradio-container .hljs-subst {
  color: #c9d1d9
 }
 html body gradio-app .gradio-container .hljs-section {
  color: #1f6feb;
  font-weight: 700
 }
 html body gradio-app .gradio-container .hljs-bullet {
  color: #f2cc60
 }
 html body gradio-app .gradio-container .hljs-emphasis {
  color: #c9d1d9;
  font-style: italic
 }
 html body gradio-app .gradio-container .hljs-strong {
  color: #c9d1d9;
  font-weight: 700
 }
 html body gradio-app .gradio-container .hljs-addition {
  color: #aff5b4;
  background-color: #033a16
 }
 html body gradio-app .gradio-container .hljs-deletion {
  color: #ffdcd7;
  background-color: #67060c
 }
--- a/css/highlightjs/github.min.css
+++ b/css/highlightjs/github.min.css
@ -0,0 +1,111 @@
 html body gradio-app .gradio-container pre code.hljs {
  display: block;
  overflow-x: auto;
  padding: 1em
 }
 html body gradio-app .gradio-container code.hljs {
  padding: 3px 5px
 }
 /*!
  Theme: GitHub
  Description: Light theme as seen on github.com
  Author: github.com
  Maintainer: @Hirse
  Updated: 2021-05-15
  Outdated base version: https://github.com/primer/github-syntax-light
  Current colors taken from GitHub's CSS
 */
 html body gradio-app .gradio-container .hljs {
  color: #24292e;
  background: #fff
 }
 html body gradio-app .gradio-container .hljs-doctag,
 html body gradio-app .gradio-container .hljs-keyword,
 html body gradio-app .gradio-container .hljs-meta .hljs-keyword,
 html body gradio-app .gradio-container .hljs-template-tag,
 html body gradio-app .gradio-container .hljs-template-variable,
 html body gradio-app .gradio-container .hljs-type,
 html body gradio-app .gradio-container .hljs-variable.language_ {
  color: #d73a49
 }
 html body gradio-app .gradio-container .hljs-title,
 html body gradio-app .gradio-container .hljs-title.class_,
 html body gradio-app .gradio-container .hljs-title.class_.inherited__,
 html body gradio-app .gradio-container .hljs-title.function_ {
  color: #6f42c1
 }
 html body gradio-app .gradio-container .hljs-attr,
 html body gradio-app .gradio-container .hljs-attribute,
 html body gradio-app .gradio-container .hljs-literal,
 html body gradio-app .gradio-container .hljs-meta,
 html body gradio-app .gradio-container .hljs-number,
 html body gradio-app .gradio-container .hljs-operator,
 html body gradio-app .gradio-container .hljs-selector-attr,
 html body gradio-app .gradio-container .hljs-selector-class,
 html body gradio-app .gradio-container .hljs-selector-id,
 html body gradio-app .gradio-container .hljs-variable {
  color: #005cc5
 }
 html body gradio-app .gradio-container .hljs-meta .hljs-string,
 html body gradio-app .gradio-container .hljs-regexp,
 html body gradio-app .gradio-container .hljs-string {
  color: #032f62
 }
 html body gradio-app .gradio-container .hljs-built_in,
 html body gradio-app .gradio-container .hljs-symbol {
  color: #e36209
 }
 html body gradio-app .gradio-container .hljs-code,
 html body gradio-app .gradio-container .hljs-comment,
 html body gradio-app .gradio-container .hljs-formula {
  color: #6a737d
 }
 html body gradio-app .gradio-container .hljs-name,
 html body gradio-app .gradio-container .hljs-quote,
 html body gradio-app .gradio-container .hljs-selector-pseudo,
 html body gradio-app .gradio-container .hljs-selector-tag {
  color: #22863a
 }
 html body gradio-app .gradio-container .hljs-subst {
  color: #24292e
 }
 html body gradio-app .gradio-container .hljs-section {
  color: #005cc5;
  font-weight: 700
 }
 html body gradio-app .gradio-container .hljs-bullet {
  color: #735c0f
 }
 html body gradio-app .gradio-container .hljs-emphasis {
  color: #24292e;
  font-style: italic
 }
 html body gradio-app .gradio-container .hljs-strong {
  color: #24292e;
  font-weight: 700
 }
 html body gradio-app .gradio-container .hljs-addition {
  color: #22863a;
  background-color: #f0fff4
 }
 html body gradio-app .gradio-container .hljs-deletion {
  color: #b31d28;
  background-color: #ffeef0
 }
--- a/css/highlightjs/highlightjs-copy.min.css
+++ b/css/highlightjs/highlightjs-copy.min.css
@ -0,0 +1 @@
 .hljs-copy-wrapper{position:relative;overflow:hidden}.hljs-copy-wrapper:hover .hljs-copy-button,.hljs-copy-button:focus{transform:translateX(0)}.hljs-copy-button{position:absolute;transform:translateX(calc(100% + 1.125em));top:1em;right:1em;width:2rem;height:2rem;text-indent:-9999px;color:#fff;border-radius:.25rem;border:1px solid #ffffff22;background-color:#2d2b57;background-color:var(--hljs-theme-background);background-image:url('data:image/svg+xml;utf-8,<svg width="16" height="16" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" clip-rule="evenodd" d="M6 5C5.73478 5 5.48043 5.10536 5.29289 5.29289C5.10536 5.48043 5 5.73478 5 6V20C5 20.2652 5.10536 20.5196 5.29289 20.7071C5.48043 20.8946 5.73478 21 6 21H18C18.2652 21 18.5196 20.8946 18.7071 20.7071C18.8946 20.5196 19 20.2652 19 20V6C19 5.73478 18.8946 5.48043 18.7071 5.29289C18.5196 5.10536 18.2652 5 18 5H16C15.4477 5 15 4.55228 15 4C15 3.44772 15.4477 3 16 3H18C18.7956 3 19.5587 3.31607 20.1213 3.87868C20.6839 4.44129 21 5.20435 21 6V20C21 20.7957 20.6839 21.5587 20.1213 22.1213C19.5587 22.6839 18.7957 23 18 23H6C5.20435 23 4.44129 22.6839 3.87868 22.1213C3.31607 21.5587 3 20.7957 3 20V6C3 5.20435 3.31607 4.44129 3.87868 3.87868C4.44129 3.31607 5.20435 3 6 3H8C8.55228 3 9 3.44772 9 4C9 4.55228 8.55228 5 8 5H6Z" fill="white"/><path fill-rule="evenodd" clip-rule="evenodd" d="M7 3C7 1.89543 7.89543 1 9 1H15C16.1046 1 17 1.89543 17 3V5C17 6.10457 16.1046 7 15 7H9C7.89543 7 7 6.10457 7 5V3ZM15 3H9V5H15V3Z" fill="white"/></svg>');background-repeat:no-repeat;background-position:center;transition:background-color 200ms ease,transform 200ms ease-out}.hljs-copy-button:hover{border-color:#ffffff44}.hljs-copy-button:active{border-color:#ffffff66}.hljs-copy-button[data-copied="true"]{text-indent:0;width:auto;background-image:none}@media(prefers-reduced-motion){.hljs-copy-button{transition:none}}.hljs-copy-alert{clip:rect(0 0 0 0);clip-path:inset(50%);height:1px;overflow:hidden;position:absolute;white-space:nowrap;width:1px}
--- a/css/html_4chan_style.css
+++ b/css/html_4chan_style.css
@ -1,104 +0,0 @@
 #parent #container {
    background-color: #eef2ff;
    padding: 17px;
 }
 #parent #container .reply {
    background-color: rgb(214, 218, 240);
    border-bottom-color: rgb(183, 197, 217);
    border-bottom-style: solid;
    border-bottom-width: 1px;
    border-image-outset: 0;
    border-image-repeat: stretch;
    border-image-slice: 100%;
    border-image-source: none;
    border-image-width: 1;
    border-left-color: rgb(0, 0, 0);
    border-left-style: none;
    border-left-width: 0px;
    border-right-color: rgb(183, 197, 217);
    border-right-style: solid;
    border-right-width: 1px;
    border-top-color: rgb(0, 0, 0);
    border-top-style: none;
    border-top-width: 0px;
    color: rgb(0, 0, 0);
    display: table;
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
    margin-bottom: 4px;
    margin-left: 0px;
    margin-right: 0px;
    margin-top: 4px;
    overflow-x: hidden;
    overflow-y: hidden;
    padding-bottom: 4px;
    padding-left: 2px;
    padding-right: 2px;
    padding-top: 4px;
 }
 #parent #container .number {
    color: rgb(0, 0, 0);
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
    width: 342.65px;
    margin-right: 7px;
 }
 #parent #container .op {
    color: rgb(0, 0, 0);
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
    margin-bottom: 8px;
    margin-left: 0px;
    margin-right: 0px;
    margin-top: 4px;
    overflow-x: hidden;
    overflow-y: hidden;
 }
 #parent #container .op blockquote {
    margin-left: 0px !important;
 }
 #parent #container .name {
    color: rgb(17, 119, 67);
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
    font-weight: 700;
    margin-left: 7px;
 }
 #parent #container .quote {
    color: rgb(221, 0, 0);
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
    text-decoration-color: rgb(221, 0, 0);
    text-decoration-line: underline;
    text-decoration-style: solid;
    text-decoration-thickness: auto;
 }
 #parent #container .greentext {
    color: rgb(120, 153, 34);
    font-family: arial, helvetica, sans-serif;
    font-size: 13.3333px;
 }
 #parent #container blockquote {
    margin: 0px !important;
    margin-block-start: 1em;
    margin-block-end: 1em;
    margin-inline-start: 40px;
    margin-inline-end: 40px;
    margin-top: 13.33px !important;
    margin-bottom: 13.33px !important;
    margin-left: 40px !important;
    margin-right: 40px !important;
 }
 #parent #container .message_4chan {
    color: black;
    border: none;
 }
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@ -1,64 +1,97 @@
-.message {
+.chat {
-    display: grid;
+    background: transparent;
-    grid-template-columns: 60px 1fr;
+    padding: 0;
-    padding-bottom: 25px;
+    padding-top: 0;
-    font-size: 15px;
+}
-    font-family: 'Noto Sans', Helvetica, Arial, sans-serif;
+
-    line-height: 22px;
+.chat > .messages:first-child {
    padding-top: 0 !important;
 }
 .chat .message-body p, .chat .message-body li {
    font-size: 1rem !important;
    line-height: 28px !important;
 }
 .dark .chat .message-body :is(p,li,h1,h2,h3,h4,h5,h6),
 .dark .chat .message-body em:not(:is(h1,h2,h3,h4,h5,h6,b,strong) em),
 .dark .chat .message-body q:not(:is(h1,h2,h3,h4,h5,h6,b,strong) q) {
    color: #d1d5db !important;
 }
 .chat .message-body :is(th, td),
 .prose hr {
    border-color: #40404096 !important;
 }
 .dark .chat .message-body :is(th, td),
 .dark .prose hr {
    border-color: rgb(255 255 255 / 30%) !important;
 }
 .chat .message-body :is(p, ul, ol) {
    margin: 1.25em 0 !important;
 }
 .chat .message-body :is(p, ul, ol):first-child {
    margin-top: 0 !important;
 }
 .chat .message-body :is(p, ul, ol):last-child {
    margin-bottom: 0 !important;
 }
 .user-message, .assistant-message {
    font-family: Inter, Helvetica, Arial, sans-serif;
 }
 .message:first-child {
    padding-top: 0;
 }
 .username {
    display: none;
 }
-.message-body p {
+.chat .user-message {
-    font-size: 15px !important;
+    background: #f3f4f6;
-    line-height: 22px !important;
+    padding: 1.5rem 1rem;
-    margin-bottom: 1.25em !important;
+    padding-bottom: 2rem;
    border-radius: 0;
    border-bottom-right-radius: 0;
 }
-.chat .message-body ul, .chat .message-body ol {
+.chat .assistant-message {
-    margin-bottom: 1.25em !important;
+    padding: 1.5rem 1rem;
-}
+    padding-bottom: 2rem;
-
+    border-radius: 0;
-.dark .message-body p em {
+    border: 0;
    color: rgb(198, 202, 214) !important;
 }
 .message-body p em {
    color: rgb(110, 110, 110) !important;
 }
 .gradio-container .chat .assistant-message {
    padding: 15px;
    border-radius: 20px;
    background-color: #0000000f;
    margin-top: 9px !important;
    margin-bottom: 18px !important;
 }
 .gradio-container .chat .user-message {
    padding: 15px;
    border-radius: 20px;
    margin-bottom: 9px !important;
 }
 .gradio-container .chat .assistant-message:last-child, .gradio-container .chat .user-message:last-child {
    margin-bottom: 0px !important;
 }
 .dark .chat .assistant-message {
    background-color: #1f2937;
 }
 .dark .chat .user-message {
-    background-color: transparent;
+    background: var(--light-gray);
 }
-code {
+.dark .chat .assistant-message {
-    background-color: white !important;
+    background: transparent;
 }
-.dark code {
+.chat .user-message .text,
-    background-color: #0e1321 !important;
+.chat .assistant-message .text {
    max-width: 724px;
    margin-left: auto;
    margin-right: auto;
 }
 /* Create space between two assistant messages in a row */
 .assistant-message + .assistant-message {
    margin-top: 1.5rem;
 }
 pre > code {
    background-color: #f3f4f6 !important;
 }
 .dark pre > code {
    background-color: #1f2937 !important;
 }
--- a/css/html_readable_style.css
+++ b/css/html_readable_style.css
@ -1,33 +1,33 @@
-.container {
+.readable-container {
    max-width: 600px;
    margin-left: auto;
    margin-right: auto;
-    background-color: rgb(31, 41, 55);
+    background-color: rgb(31 41 55);
    padding: 3em;
    word-break: break-word;
    overflow-wrap: anywhere;
    color: #efefef !important;
 }
-.container p, .container li {
+.readable-container p, .readable-container li {
    font-size: 16px !important;
    color: #efefef !important;
    margin-bottom: 22px;
    line-height: 1.4 !important;
 }
-.container li > p {
+.readable-container li > p {
    display: inline !important;
 }
-.container code {
+.readable-container code {
    overflow-x: auto;
 }
-.container :not(pre) > code {
+.readable-container :not(pre) > code {
    white-space: normal !important;
 }
-.container .hoverable {
+.readable-container .hoverable {
    font-size: 14px;
 }
--- a/css/katex/fonts/KaTeX_AMS-Regular.ttf
+++ b/css/katex/fonts/KaTeX_AMS-Regular.ttf
--- a/css/katex/fonts/KaTeX_AMS-Regular.woff
+++ b/css/katex/fonts/KaTeX_AMS-Regular.woff
--- a/css/katex/fonts/KaTeX_AMS-Regular.woff2
+++ b/css/katex/fonts/KaTeX_AMS-Regular.woff2
--- a/css/katex/fonts/KaTeX_Caligraphic-Bold.ttf
+++ b/css/katex/fonts/KaTeX_Caligraphic-Bold.ttf
--- a/css/katex/fonts/KaTeX_Caligraphic-Bold.woff
+++ b/css/katex/fonts/KaTeX_Caligraphic-Bold.woff
--- a/css/katex/fonts/KaTeX_Caligraphic-Bold.woff2
+++ b/css/katex/fonts/KaTeX_Caligraphic-Bold.woff2
--- a/css/katex/fonts/KaTeX_Caligraphic-Regular.ttf
+++ b/css/katex/fonts/KaTeX_Caligraphic-Regular.ttf
--- a/css/katex/fonts/KaTeX_Caligraphic-Regular.woff
+++ b/css/katex/fonts/KaTeX_Caligraphic-Regular.woff
--- a/css/katex/fonts/KaTeX_Caligraphic-Regular.woff2
+++ b/css/katex/fonts/KaTeX_Caligraphic-Regular.woff2
--- a/css/katex/fonts/KaTeX_Fraktur-Bold.ttf
+++ b/css/katex/fonts/KaTeX_Fraktur-Bold.ttf
--- a/css/katex/fonts/KaTeX_Fraktur-Bold.woff
+++ b/css/katex/fonts/KaTeX_Fraktur-Bold.woff
--- a/css/katex/fonts/KaTeX_Fraktur-Bold.woff2
+++ b/css/katex/fonts/KaTeX_Fraktur-Bold.woff2
--- a/css/katex/fonts/KaTeX_Fraktur-Regular.ttf
+++ b/css/katex/fonts/KaTeX_Fraktur-Regular.ttf
--- a/css/katex/fonts/KaTeX_Fraktur-Regular.woff
+++ b/css/katex/fonts/KaTeX_Fraktur-Regular.woff
--- a/css/katex/fonts/KaTeX_Fraktur-Regular.woff2
+++ b/css/katex/fonts/KaTeX_Fraktur-Regular.woff2
--- a/css/katex/fonts/KaTeX_Main-Bold.ttf
+++ b/css/katex/fonts/KaTeX_Main-Bold.ttf
--- a/css/katex/fonts/KaTeX_Main-Bold.woff
+++ b/css/katex/fonts/KaTeX_Main-Bold.woff
--- a/css/katex/fonts/KaTeX_Main-Bold.woff2
+++ b/css/katex/fonts/KaTeX_Main-Bold.woff2
--- a/css/katex/fonts/KaTeX_Main-BoldItalic.ttf
+++ b/css/katex/fonts/KaTeX_Main-BoldItalic.ttf
--- a/css/katex/fonts/KaTeX_Main-BoldItalic.woff
+++ b/css/katex/fonts/KaTeX_Main-BoldItalic.woff
--- a/css/katex/fonts/KaTeX_Main-BoldItalic.woff2
+++ b/css/katex/fonts/KaTeX_Main-BoldItalic.woff2
--- a/css/katex/fonts/KaTeX_Main-Italic.ttf
+++ b/css/katex/fonts/KaTeX_Main-Italic.ttf
--- a/css/katex/fonts/KaTeX_Main-Italic.woff
+++ b/css/katex/fonts/KaTeX_Main-Italic.woff
--- a/css/katex/fonts/KaTeX_Main-Italic.woff2
+++ b/css/katex/fonts/KaTeX_Main-Italic.woff2
--- a/css/katex/fonts/KaTeX_Main-Regular.ttf
+++ b/css/katex/fonts/KaTeX_Main-Regular.ttf
--- a/css/katex/fonts/KaTeX_Main-Regular.woff
+++ b/css/katex/fonts/KaTeX_Main-Regular.woff
--- a/css/katex/fonts/KaTeX_Main-Regular.woff2
+++ b/css/katex/fonts/KaTeX_Main-Regular.woff2
--- a/css/katex/fonts/KaTeX_Math-BoldItalic.ttf
+++ b/css/katex/fonts/KaTeX_Math-BoldItalic.ttf
--- a/css/katex/fonts/KaTeX_Math-BoldItalic.woff
+++ b/css/katex/fonts/KaTeX_Math-BoldItalic.woff
--- a/css/katex/fonts/KaTeX_Math-BoldItalic.woff2
+++ b/css/katex/fonts/KaTeX_Math-BoldItalic.woff2
--- a/css/katex/fonts/KaTeX_Math-Italic.ttf
+++ b/css/katex/fonts/KaTeX_Math-Italic.ttf
--- a/css/katex/fonts/KaTeX_Math-Italic.woff
+++ b/css/katex/fonts/KaTeX_Math-Italic.woff
--- a/css/katex/fonts/KaTeX_Math-Italic.woff2
+++ b/css/katex/fonts/KaTeX_Math-Italic.woff2
--- a/css/katex/fonts/KaTeX_SansSerif-Bold.ttf
+++ b/css/katex/fonts/KaTeX_SansSerif-Bold.ttf
--- a/css/katex/fonts/KaTeX_SansSerif-Bold.woff
+++ b/css/katex/fonts/KaTeX_SansSerif-Bold.woff
--- a/css/katex/fonts/KaTeX_SansSerif-Bold.woff2
+++ b/css/katex/fonts/KaTeX_SansSerif-Bold.woff2
--- a/css/katex/fonts/KaTeX_SansSerif-Italic.ttf
+++ b/css/katex/fonts/KaTeX_SansSerif-Italic.ttf
--- a/css/katex/fonts/KaTeX_SansSerif-Italic.woff
+++ b/css/katex/fonts/KaTeX_SansSerif-Italic.woff
--- a/css/katex/fonts/KaTeX_SansSerif-Italic.woff2
+++ b/css/katex/fonts/KaTeX_SansSerif-Italic.woff2
--- a/css/katex/fonts/KaTeX_SansSerif-Regular.ttf
+++ b/css/katex/fonts/KaTeX_SansSerif-Regular.ttf
--- a/css/katex/fonts/KaTeX_SansSerif-Regular.woff
+++ b/css/katex/fonts/KaTeX_SansSerif-Regular.woff
--- a/css/katex/fonts/KaTeX_SansSerif-Regular.woff2
+++ b/css/katex/fonts/KaTeX_SansSerif-Regular.woff2
--- a/css/katex/fonts/KaTeX_Script-Regular.ttf
+++ b/css/katex/fonts/KaTeX_Script-Regular.ttf
--- a/css/katex/fonts/KaTeX_Script-Regular.woff
+++ b/css/katex/fonts/KaTeX_Script-Regular.woff
--- a/css/katex/fonts/KaTeX_Script-Regular.woff2
+++ b/css/katex/fonts/KaTeX_Script-Regular.woff2
--- a/css/katex/fonts/KaTeX_Size1-Regular.ttf
+++ b/css/katex/fonts/KaTeX_Size1-Regular.ttf
--- a/css/katex/fonts/KaTeX_Size1-Regular.woff
+++ b/css/katex/fonts/KaTeX_Size1-Regular.woff
--- a/css/katex/fonts/KaTeX_Size1-Regular.woff2
+++ b/css/katex/fonts/KaTeX_Size1-Regular.woff2
--- a/css/katex/fonts/KaTeX_Size2-Regular.ttf
+++ b/css/katex/fonts/KaTeX_Size2-Regular.ttf
--- a/css/katex/fonts/KaTeX_Size2-Regular.woff
+++ b/css/katex/fonts/KaTeX_Size2-Regular.woff
--- a/css/katex/fonts/KaTeX_Size2-Regular.woff2
+++ b/css/katex/fonts/KaTeX_Size2-Regular.woff2
--- a/css/katex/fonts/KaTeX_Size3-Regular.ttf
+++ b/css/katex/fonts/KaTeX_Size3-Regular.ttf
--- a/css/katex/fonts/KaTeX_Size3-Regular.woff
+++ b/css/katex/fonts/KaTeX_Size3-Regular.woff
--- a/css/katex/fonts/KaTeX_Size3-Regular.woff2
+++ b/css/katex/fonts/KaTeX_Size3-Regular.woff2
--- a/css/katex/fonts/KaTeX_Size4-Regular.ttf
+++ b/css/katex/fonts/KaTeX_Size4-Regular.ttf
--- a/css/katex/fonts/KaTeX_Size4-Regular.woff
+++ b/css/katex/fonts/KaTeX_Size4-Regular.woff
--- a/css/katex/fonts/KaTeX_Size4-Regular.woff2
+++ b/css/katex/fonts/KaTeX_Size4-Regular.woff2
--- a/css/katex/fonts/KaTeX_Typewriter-Regular.ttf
+++ b/css/katex/fonts/KaTeX_Typewriter-Regular.ttf
--- a/css/katex/fonts/KaTeX_Typewriter-Regular.woff
+++ b/css/katex/fonts/KaTeX_Typewriter-Regular.woff
--- a/css/katex/fonts/KaTeX_Typewriter-Regular.woff2
+++ b/css/katex/fonts/KaTeX_Typewriter-Regular.woff2
--- a/css/katex/katex.min.css
+++ b/css/katex/katex.min.css
--- a/css/main.css
+++ b/css/main.css
--- a/docker/.dockerignore
+++ b/docker/.dockerignore
@ -1,9 +1,3 @@
 .env
 Dockerfile
-/characters
+/user_data
 /loras
 /models
 /presets
 /prompts
 /softprompts
 /training
--- a/Show more
+++ b/Show more
		`@ -0,0 +1 @@`
							.hljs-copy-wrapper{position:relative;overflow:hidden}.hljs-copy-wrapper:hover .hljs-copy-button,.hljs-copy-button:focus{transform:translateX(0)}.hljs-copy-button{position:absolute;transform:translateX(calc(100% + 1.125em));top:1em;right:1em;width:2rem;height:2rem;text-indent:-9999px;color:#fff;border-radius:.25rem;border:1px solid #ffffff22;background-color:#2d2b57;background-color:var(--hljs-theme-background);background-image:url('data:image/svg+xml;utf-8,<svg width="16" height="16" viewBox="0 0 24 24" fill="none" xmlns="http://www.w3.org/2000/svg"><path fill-rule="evenodd" clip-rule="evenodd" d="M6 5C5.73478 5 5.48043 5.10536 5.29289 5.29289C5.10536 5.48043 5 5.73478 5 6V20C5 20.2652 5.10536 20.5196 5.29289 20.7071C5.48043 20.8946 5.73478 21 6 21H18C18.2652 21 18.5196 20.8946 18.7071 20.7071C18.8946 20.5196 19 20.2652 19 20V6C19 5.73478 18.8946 5.48043 18.7071 5.29289C18.5196 5.10536 18.2652 5 18 5H16C15.4477 5 15 4.55228 15 4C15 3.44772 15.4477 3 16 3H18C18.7956 3 19.5587 3.31607 20.1213 3.87868C20.6839 4.44129 21 5.20435 21 6V20C21 20.7957 20.6839 21.5587 20.1213 22.1213C19.5587 22.6839 18.7957 23 18 23H6C5.20435 23 4.44129 22.6839 3.87868 22.1213C3.31607 21.5587 3 20.7957 3 20V6C3 5.20435 3.31607 4.44129 3.87868 3.87868C4.44129 3.31607 5.20435 3 6 3H8C8.55228 3 9 3.44772 9 4C9 4.55228 8.55228 5 8 5H6Z" fill="white"/><path fill-rule="evenodd" clip-rule="evenodd" d="M7 3C7 1.89543 7.89543 1 9 1H15C16.1046 1 17 1.89543 17 3V5C17 6.10457 16.1046 7 15 7H9C7.89543 7 7 6.10457 7 5V3ZM15 3H9V5H15V3Z" fill="white"/></svg>');background-repeat:no-repeat;background-position:center;transition:background-color 200ms ease,transform 200ms ease-out}.hljs-copy-button:hover{border-color:#ffffff44}.hljs-copy-button:active{border-color:#ffffff66}.hljs-copy-button[data-copied="true"]{text-indent:0;width:auto;background-image:none}@media(prefers-reduced-motion){.hljs-copy-button{transition:none}}.hljs-copy-alert{clip:rect(0 0 0 0);clip-path:inset(50%);height:1px;overflow:hidden;position:absolute;white-space:nowrap;width:1px}