mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-07 15:43:49 +00:00
Compare commits
83 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
9dcf574160 | ||
|
|
e18f32cba7 | ||
|
|
778e1c4d52 | ||
|
|
775c913de2 | ||
|
|
cb511928e2 | ||
|
|
193424cc93 | ||
|
|
c26ffdd24c | ||
|
|
4d6230a944 | ||
|
|
7b2f15e34a | ||
|
|
05e4842033 | ||
|
|
b1d06dcf96 | ||
|
|
abc3487f4d | ||
|
|
223dd4b801 | ||
|
|
f8db23b362 | ||
|
|
d78fc46114 | ||
|
|
422f42ca7f | ||
|
|
544fcb0b7f | ||
|
|
c63a79ee48 | ||
|
|
9805ddcde9 | ||
|
|
91f9b01516 | ||
|
|
1f49a64e1a | ||
|
|
e8b31c063a | ||
|
|
ee917cd5ed | ||
|
|
dfd8ec9c49 | ||
|
|
0c033caf0e | ||
|
|
1b403a4ffa | ||
|
|
8cb7fe9c47 | ||
|
|
41bce3f4de | ||
|
|
ffea8f282e | ||
|
|
7fed60f90a | ||
|
|
2eef90a323 | ||
|
|
9183dc444e | ||
|
|
e0ad4e60df | ||
|
|
16af11f868 | ||
|
|
54b2f39c78 | ||
|
|
b5afecc63b | ||
|
|
2fbaee58cd | ||
|
|
62e67adb55 | ||
|
|
fc35acab9b | ||
|
|
8ecdb41078 | ||
|
|
5fb8c4fbd6 | ||
|
|
0050a33f37 | ||
|
|
6b66da84d2 | ||
|
|
8e8e1ba898 | ||
|
|
131a9a0140 | ||
|
|
95d6c53e13 | ||
|
|
8bba9ecc3f | ||
|
|
66d1a22c73 | ||
|
|
000d776967 | ||
|
|
a1cb5b5dc0 | ||
|
|
b11379f328 | ||
|
|
42dfcdfc5b | ||
|
|
6e2b70bde6 | ||
|
|
9374a4e70d | ||
|
|
b108c55353 | ||
|
|
ae699ac570 | ||
|
|
7aab2fdf9a | ||
|
|
d84157403a | ||
|
|
a61bde509f | ||
|
|
091037ec20 | ||
|
|
f6f8f14c8d | ||
|
|
80e81a54ca | ||
|
|
468cb5cb87 | ||
|
|
6a1f720c7b | ||
|
|
8f8b57a029 | ||
|
|
c50e17bdbe | ||
|
|
ea1f8c71f2 | ||
|
|
c10c6e87ae | ||
|
|
a32ce254f2 | ||
|
|
4073164be0 | ||
|
|
328534b762 | ||
|
|
71c1a52afe | ||
|
|
6382fbef83 | ||
|
|
0466b6e271 | ||
|
|
be6fc0663a | ||
|
|
4979e87e48 | ||
|
|
9dd04b86ce | ||
|
|
bda95172bd | ||
|
|
4cbea02ed4 | ||
|
|
e154140021 | ||
|
|
368f37335f | ||
|
|
d6f1485dd1 | ||
|
|
807be11832 |
70 changed files with 1836 additions and 893 deletions
35
.github/workflows/build-everything-tgw.yml
vendored
35
.github/workflows/build-everything-tgw.yml
vendored
|
|
@ -41,6 +41,13 @@ jobs:
|
||||||
version: ${{ inputs.version }}
|
version: ${{ inputs.version }}
|
||||||
config: 'os:ubuntu-22.04'
|
config: 'os:ubuntu-22.04'
|
||||||
|
|
||||||
|
build_release_rocm_windows:
|
||||||
|
name: ROCm Windows
|
||||||
|
uses: ./.github/workflows/build-portable-release-rocm.yml
|
||||||
|
with:
|
||||||
|
version: ${{ inputs.version }}
|
||||||
|
config: 'os:windows-2022'
|
||||||
|
|
||||||
build_release_rocm_linux:
|
build_release_rocm_linux:
|
||||||
name: ROCm Linux
|
name: ROCm Linux
|
||||||
uses: ./.github/workflows/build-portable-release-rocm.yml
|
uses: ./.github/workflows/build-portable-release-rocm.yml
|
||||||
|
|
@ -68,3 +75,31 @@ jobs:
|
||||||
with:
|
with:
|
||||||
version: ${{ inputs.version }}
|
version: ${{ inputs.version }}
|
||||||
config: 'os:macos-15-intel,macos-14'
|
config: 'os:macos-15-intel,macos-14'
|
||||||
|
|
||||||
|
build_release_ik_cuda_windows:
|
||||||
|
name: ik CUDA Windows
|
||||||
|
uses: ./.github/workflows/build-portable-release-ik-cuda.yml
|
||||||
|
with:
|
||||||
|
version: ${{ inputs.version }}
|
||||||
|
config: 'os:windows-2022'
|
||||||
|
|
||||||
|
build_release_ik_cuda_linux:
|
||||||
|
name: ik CUDA Linux
|
||||||
|
uses: ./.github/workflows/build-portable-release-ik-cuda.yml
|
||||||
|
with:
|
||||||
|
version: ${{ inputs.version }}
|
||||||
|
config: 'os:ubuntu-22.04'
|
||||||
|
|
||||||
|
build_release_ik_cpu_windows:
|
||||||
|
name: ik CPU Windows
|
||||||
|
uses: ./.github/workflows/build-portable-release-ik.yml
|
||||||
|
with:
|
||||||
|
version: ${{ inputs.version }}
|
||||||
|
config: 'os:windows-2022'
|
||||||
|
|
||||||
|
build_release_ik_cpu_linux:
|
||||||
|
name: ik CPU Linux
|
||||||
|
uses: ./.github/workflows/build-portable-release-ik.yml
|
||||||
|
with:
|
||||||
|
version: ${{ inputs.version }}
|
||||||
|
config: 'os:ubuntu-22.04'
|
||||||
|
|
|
||||||
178
.github/workflows/build-portable-release-ik-cuda.yml
vendored
Normal file
178
.github/workflows/build-portable-release-ik-cuda.yml
vendored
Normal file
|
|
@ -0,0 +1,178 @@
|
||||||
|
name: Build ik CUDA
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
version:
|
||||||
|
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||||
|
default: 'v3.0'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
config:
|
||||||
|
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||||
|
default: 'Default'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
exclude:
|
||||||
|
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||||
|
default: 'None'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
version:
|
||||||
|
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||||
|
default: 'v3.0'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
config:
|
||||||
|
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||||
|
default: 'Default'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
exclude:
|
||||||
|
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||||
|
default: 'None'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
define_matrix:
|
||||||
|
name: Define Build Matrix
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: pwsh
|
||||||
|
env:
|
||||||
|
CONFIGIN: ${{ inputs.config }}
|
||||||
|
EXCLUDEIN: ${{ inputs.exclude }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Define Job Output
|
||||||
|
id: set-matrix
|
||||||
|
run: |
|
||||||
|
$matrix = @{
|
||||||
|
'os' = @('ubuntu-22.04', 'windows-2022')
|
||||||
|
'pyver' = @("3.13")
|
||||||
|
'cuda' = @("12.4", "13.1")
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
|
||||||
|
|
||||||
|
if ($env:EXCLUDEIN -ne 'None') {
|
||||||
|
$exclusions = @()
|
||||||
|
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
|
||||||
|
$matrix['exclude'] = $exclusions
|
||||||
|
}
|
||||||
|
|
||||||
|
$matrixOut = ConvertTo-Json $matrix -Compress
|
||||||
|
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
|
||||||
|
|
||||||
|
build_wheels:
|
||||||
|
name: ${{ matrix.os }} ${{ matrix.pyver }} CUDA ${{ matrix.cuda }}
|
||||||
|
needs: define_matrix
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: pwsh
|
||||||
|
env:
|
||||||
|
PCKGVER: ${{ inputs.version }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v6
|
||||||
|
with:
|
||||||
|
repository: 'oobabooga/text-generation-webui'
|
||||||
|
ref: ${{ inputs.version }}
|
||||||
|
submodules: 'recursive'
|
||||||
|
|
||||||
|
- uses: actions/setup-python@v6
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.pyver }}
|
||||||
|
|
||||||
|
- name: Build Package
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
VERSION_CLEAN="${{ inputs.version }}"
|
||||||
|
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||||
|
cd ..
|
||||||
|
cp -r text-generation-webui "text-generation-webui-ik-${VERSION_CLEAN}"
|
||||||
|
cd "text-generation-webui-ik-${VERSION_CLEAN}"
|
||||||
|
|
||||||
|
# Remove extensions that need additional requirements
|
||||||
|
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||||
|
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
|
||||||
|
|
||||||
|
# Define common variables
|
||||||
|
CUDA_VERSION="${{ matrix.cuda }}"
|
||||||
|
VERSION="${{ inputs.version }}"
|
||||||
|
|
||||||
|
# 1. Set platform-specific variables
|
||||||
|
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||||
|
PLATFORM="windows"
|
||||||
|
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz"
|
||||||
|
PIP_PATH="portable_env/python.exe -m pip"
|
||||||
|
PACKAGES_PATH="portable_env/Lib/site-packages"
|
||||||
|
rm start_linux.sh start_macos.sh
|
||||||
|
else
|
||||||
|
PLATFORM="linux"
|
||||||
|
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz"
|
||||||
|
PIP_PATH="portable_env/bin/python -m pip"
|
||||||
|
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
|
||||||
|
rm start_macos.sh start_windows.bat
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 2. Download and extract Python
|
||||||
|
cd ..
|
||||||
|
echo "Downloading Python for $PLATFORM..."
|
||||||
|
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||||
|
tar -xzf python-build.tar.gz
|
||||||
|
mv python "text-generation-webui-ik-${VERSION_CLEAN}/portable_env"
|
||||||
|
|
||||||
|
# 3. Prepare requirements file based on CUDA version
|
||||||
|
cd "text-generation-webui-ik-${VERSION_CLEAN}"
|
||||||
|
if [[ "$CUDA_VERSION" == "13.1" ]]; then
|
||||||
|
REQ_FILE="requirements/portable/requirements_ik_cuda131.txt"
|
||||||
|
else
|
||||||
|
REQ_FILE="requirements/portable/requirements_ik.txt"
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 4. Inject --ik into start scripts
|
||||||
|
sed -i 's/--portable/--portable --ik/g' start_linux.sh start_windows.bat 2>/dev/null || true
|
||||||
|
|
||||||
|
# 5. Install packages
|
||||||
|
echo "Installing Python packages from $REQ_FILE..."
|
||||||
|
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
|
||||||
|
|
||||||
|
# 6. Clean up
|
||||||
|
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
|
||||||
|
|
||||||
|
# 7. Create archive
|
||||||
|
cd ..
|
||||||
|
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||||
|
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
|
||||||
|
echo "Creating archive: $ARCHIVE_NAME"
|
||||||
|
powershell -Command "Compress-Archive -Path text-generation-webui-ik-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||||
|
else
|
||||||
|
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.tar.gz"
|
||||||
|
echo "Creating archive: $ARCHIVE_NAME"
|
||||||
|
tar czf "$ARCHIVE_NAME" "text-generation-webui-ik-${VERSION_CLEAN}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Upload files to a GitHub release
|
||||||
|
id: upload-release
|
||||||
|
uses: svenstaro/upload-release-action@2.7.0
|
||||||
|
continue-on-error: true
|
||||||
|
with:
|
||||||
|
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
file: ../textgen-portable-ik-*
|
||||||
|
tag: ${{ inputs.version }}
|
||||||
|
file_glob: true
|
||||||
|
make_latest: false
|
||||||
|
overwrite: true
|
||||||
173
.github/workflows/build-portable-release-ik.yml
vendored
Normal file
173
.github/workflows/build-portable-release-ik.yml
vendored
Normal file
|
|
@ -0,0 +1,173 @@
|
||||||
|
name: Build ik CPU
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
version:
|
||||||
|
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||||
|
default: 'v3.0'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
config:
|
||||||
|
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||||
|
default: 'Default'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
exclude:
|
||||||
|
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||||
|
default: 'None'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
version:
|
||||||
|
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||||
|
default: 'v3.0'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
config:
|
||||||
|
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||||
|
default: 'Default'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
exclude:
|
||||||
|
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||||
|
default: 'None'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
define_matrix:
|
||||||
|
name: Define Build Matrix
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: pwsh
|
||||||
|
env:
|
||||||
|
CONFIGIN: ${{ inputs.config }}
|
||||||
|
EXCLUDEIN: ${{ inputs.exclude }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Define Job Output
|
||||||
|
id: set-matrix
|
||||||
|
run: |
|
||||||
|
$matrix = @{
|
||||||
|
'os' = @('ubuntu-22.04', 'windows-2022')
|
||||||
|
'pyver' = @("3.13")
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
|
||||||
|
|
||||||
|
if ($env:EXCLUDEIN -ne 'None') {
|
||||||
|
$exclusions = @()
|
||||||
|
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
|
||||||
|
$matrix['exclude'] = $exclusions
|
||||||
|
}
|
||||||
|
|
||||||
|
$matrixOut = ConvertTo-Json $matrix -Compress
|
||||||
|
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
|
||||||
|
|
||||||
|
build_wheels:
|
||||||
|
name: ${{ matrix.os }} ${{ matrix.pyver }}
|
||||||
|
needs: define_matrix
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: pwsh
|
||||||
|
env:
|
||||||
|
PCKGVER: ${{ inputs.version }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v6
|
||||||
|
with:
|
||||||
|
repository: 'oobabooga/text-generation-webui'
|
||||||
|
ref: ${{ inputs.version }}
|
||||||
|
submodules: 'recursive'
|
||||||
|
|
||||||
|
- uses: actions/setup-python@v6
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.pyver }}
|
||||||
|
|
||||||
|
- name: Build Package
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
VERSION_CLEAN="${{ inputs.version }}"
|
||||||
|
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||||
|
cd ..
|
||||||
|
cp -r text-generation-webui "text-generation-webui-ik-${VERSION_CLEAN}"
|
||||||
|
cd "text-generation-webui-ik-${VERSION_CLEAN}"
|
||||||
|
|
||||||
|
# Remove extensions that need additional requirements
|
||||||
|
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||||
|
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
|
||||||
|
|
||||||
|
# Define common variables
|
||||||
|
VERSION="${{ inputs.version }}"
|
||||||
|
|
||||||
|
# 1. Set platform-specific variables
|
||||||
|
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||||
|
PLATFORM="windows-cpu"
|
||||||
|
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz"
|
||||||
|
PIP_PATH="portable_env/python.exe -m pip"
|
||||||
|
PACKAGES_PATH="portable_env/Lib/site-packages"
|
||||||
|
rm start_linux.sh start_macos.sh
|
||||||
|
else
|
||||||
|
PLATFORM="linux-cpu"
|
||||||
|
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz"
|
||||||
|
PIP_PATH="portable_env/bin/python -m pip"
|
||||||
|
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
|
||||||
|
rm start_macos.sh start_windows.bat
|
||||||
|
fi
|
||||||
|
|
||||||
|
# 2. Download and extract Python
|
||||||
|
echo "Downloading Python for $PLATFORM..."
|
||||||
|
cd ..
|
||||||
|
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||||
|
tar -xzf python-build.tar.gz
|
||||||
|
mv python "text-generation-webui-ik-${VERSION_CLEAN}/portable_env"
|
||||||
|
|
||||||
|
# 3. Prepare requirements file
|
||||||
|
cd "text-generation-webui-ik-${VERSION_CLEAN}"
|
||||||
|
REQ_FILE="requirements/portable/requirements_ik_cpu_only.txt"
|
||||||
|
echo "Using requirements file: $REQ_FILE"
|
||||||
|
|
||||||
|
# 4. Inject --ik into start scripts
|
||||||
|
sed -i 's/--portable/--portable --ik/g' start_linux.sh start_windows.bat 2>/dev/null || true
|
||||||
|
|
||||||
|
# 5. Install packages
|
||||||
|
echo "Installing Python packages from $REQ_FILE..."
|
||||||
|
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
|
||||||
|
|
||||||
|
# 6. Clean up
|
||||||
|
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
|
||||||
|
|
||||||
|
# 7. Create archive
|
||||||
|
cd ..
|
||||||
|
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||||
|
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.zip"
|
||||||
|
echo "Creating archive: $ARCHIVE_NAME"
|
||||||
|
powershell -Command "Compress-Archive -Path text-generation-webui-ik-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||||
|
else
|
||||||
|
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.tar.gz"
|
||||||
|
echo "Creating archive: $ARCHIVE_NAME"
|
||||||
|
tar czf "$ARCHIVE_NAME" "text-generation-webui-ik-${VERSION_CLEAN}"
|
||||||
|
fi
|
||||||
|
|
||||||
|
- name: Upload files to a GitHub release
|
||||||
|
id: upload-release
|
||||||
|
uses: svenstaro/upload-release-action@2.7.0
|
||||||
|
continue-on-error: true
|
||||||
|
with:
|
||||||
|
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
file: ../textgen-portable-ik-*
|
||||||
|
tag: ${{ inputs.version }}
|
||||||
|
file_glob: true
|
||||||
|
make_latest: false
|
||||||
|
overwrite: true
|
||||||
|
|
@ -24,9 +24,9 @@ A Gradio web UI for running Large Language Models locally. 100% private and offl
|
||||||
## Features
|
## Features
|
||||||
|
|
||||||
- **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set.
|
- **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set.
|
||||||
- **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting.
|
- **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [ik_llama.cpp](https://github.com/ikawrakow/ik_llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting.
|
||||||
- **OpenAI/Anthropic-compatible API**: Chat, Completions, and Messages endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI/Anthropic APIs ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)).
|
- **OpenAI/Anthropic-compatible API**: Chat, Completions, and Messages endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI/Anthropic APIs ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)).
|
||||||
- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file, easy to create and extend ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).
|
- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file. MCP servers are also supported ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).
|
||||||
- **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
|
- **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
|
||||||
- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
|
- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
|
||||||
- **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)).
|
- **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)).
|
||||||
|
|
|
||||||
|
|
@ -13,21 +13,12 @@
|
||||||
line-height: 28px !important;
|
line-height: 28px !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dark .chat .message-body :is(p,li,h1,h2,h3,h4,h5,h6),
|
.dark .chat .message-body :is(p,li),
|
||||||
.dark .chat .message-body em:not(:is(h1,h2,h3,h4,h5,h6,b,strong) em),
|
.dark .chat .message-body em:not(:is(h1,h2,h3,h4,h5,h6,b,strong) em),
|
||||||
.dark .chat .message-body q:not(:is(h1,h2,h3,h4,h5,h6,b,strong) q) {
|
.dark .chat .message-body q:not(:is(h1,h2,h3,h4,h5,h6,b,strong) q) {
|
||||||
color: #d1d5db !important;
|
color: #d1d5db !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.chat .message-body :is(th, td),
|
|
||||||
.prose hr {
|
|
||||||
border-color: #40404096 !important;
|
|
||||||
}
|
|
||||||
|
|
||||||
.dark .chat .message-body :is(th, td),
|
|
||||||
.dark .prose hr {
|
|
||||||
border-color: rgb(255 255 255 / 30%) !important;
|
|
||||||
}
|
|
||||||
|
|
||||||
.chat .message-body :is(p, ul, ol) {
|
.chat .message-body :is(p, ul, ol) {
|
||||||
margin: 1.25em 0 !important;
|
margin: 1.25em 0 !important;
|
||||||
|
|
|
||||||
177
css/main.css
177
css/main.css
|
|
@ -22,6 +22,17 @@
|
||||||
font-style: italic;
|
font-style: italic;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
/* Hide spin buttons on number inputs (look bad on Windows) */
|
||||||
|
input[type="number"]::-webkit-outer-spin-button,
|
||||||
|
input[type="number"]::-webkit-inner-spin-button {
|
||||||
|
-webkit-appearance: none;
|
||||||
|
margin: 0;
|
||||||
|
}
|
||||||
|
|
||||||
|
input[type="number"] {
|
||||||
|
-moz-appearance: textfield;
|
||||||
|
}
|
||||||
|
|
||||||
.padded.svelte-12cmxck {
|
.padded.svelte-12cmxck {
|
||||||
padding: 3px 0;
|
padding: 3px 0;
|
||||||
}
|
}
|
||||||
|
|
@ -246,8 +257,8 @@ button {
|
||||||
|
|
||||||
.pretty_scrollbar::-webkit-scrollbar,
|
.pretty_scrollbar::-webkit-scrollbar,
|
||||||
#image-history-gallery > :nth-child(2)::-webkit-scrollbar {
|
#image-history-gallery > :nth-child(2)::-webkit-scrollbar {
|
||||||
width: 8px;
|
width: 7px;
|
||||||
height: 8px;
|
height: 7px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.pretty_scrollbar::-webkit-scrollbar-track,
|
.pretty_scrollbar::-webkit-scrollbar-track,
|
||||||
|
|
@ -260,7 +271,7 @@ button {
|
||||||
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
|
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
|
||||||
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
|
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
|
||||||
background: var(--neutral-300);
|
background: var(--neutral-300);
|
||||||
border-radius: 30px;
|
border-radius: 9999px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dark .pretty_scrollbar::-webkit-scrollbar-thumb,
|
.dark .pretty_scrollbar::-webkit-scrollbar-thumb,
|
||||||
|
|
@ -268,18 +279,17 @@ button {
|
||||||
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
|
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
|
||||||
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
|
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
|
||||||
background: rgb(255 255 255 / 6.25%);
|
background: rgb(255 255 255 / 6.25%);
|
||||||
border-radius: 30px;
|
border-radius: 9999px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.pretty_scrollbar::-webkit-resizer,
|
.pretty_scrollbar::-webkit-resizer,
|
||||||
#image-history-gallery > :nth-child(2)::-webkit-resizer {
|
#image-history-gallery > :nth-child(2)::-webkit-resizer {
|
||||||
background: #d2d2d8;
|
background: transparent;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dark .pretty_scrollbar::-webkit-resizer,
|
.dark .pretty_scrollbar::-webkit-resizer,
|
||||||
.dark #image-history-gallery > :nth-child(2)::-webkit-resizer {
|
.dark #image-history-gallery > :nth-child(2)::-webkit-resizer {
|
||||||
background: rgb(255 255 255 / 10%);
|
background: transparent;
|
||||||
border-radius: 10px;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.pretty_scrollbar::-webkit-scrollbar-corner,
|
.pretty_scrollbar::-webkit-scrollbar-corner,
|
||||||
|
|
@ -436,15 +446,25 @@ audio {
|
||||||
.dark .message-body h4,
|
.dark .message-body h4,
|
||||||
.dark .message-body h5,
|
.dark .message-body h5,
|
||||||
.dark .message-body h6 {
|
.dark .message-body h6 {
|
||||||
color: white !important;
|
color: #e8e8e8 !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dark .message-body blockquote {
|
.message-body blockquote {
|
||||||
border-left-color: rgb(255 255 255 / 30%);
|
border-left-width: 4px;
|
||||||
|
border-left-color: var(--border-color-primary);
|
||||||
|
}
|
||||||
|
|
||||||
|
.message-body h1,
|
||||||
|
.message-body h2,
|
||||||
|
.message-body h3,
|
||||||
|
.message-body h4,
|
||||||
|
.message-body h5,
|
||||||
|
.message-body h6 {
|
||||||
|
color: #1a1a1a;
|
||||||
}
|
}
|
||||||
|
|
||||||
.message-body h1 {
|
.message-body h1 {
|
||||||
font-weight: 800;
|
font-weight: 700;
|
||||||
font-size: 2.25em;
|
font-size: 2.25em;
|
||||||
margin-top: 0;
|
margin-top: 0;
|
||||||
margin-bottom: 0.8888889em;
|
margin-bottom: 0.8888889em;
|
||||||
|
|
@ -476,13 +496,13 @@ audio {
|
||||||
}
|
}
|
||||||
|
|
||||||
.message-body h5 {
|
.message-body h5 {
|
||||||
font-weight: normal;
|
font-weight: 600;
|
||||||
font-size: 1em;
|
font-size: 1em;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
.message-body h6 {
|
.message-body h6 {
|
||||||
font-weight: normal;
|
font-weight: 600;
|
||||||
font-size: 1em;
|
font-size: 1em;
|
||||||
margin: 0;
|
margin: 0;
|
||||||
}
|
}
|
||||||
|
|
@ -590,7 +610,7 @@ audio {
|
||||||
}
|
}
|
||||||
|
|
||||||
#chat-input textarea::-webkit-scrollbar {
|
#chat-input textarea::-webkit-scrollbar {
|
||||||
width: 8px;
|
width: 7px;
|
||||||
}
|
}
|
||||||
|
|
||||||
#chat-input textarea::-webkit-scrollbar-track {
|
#chat-input textarea::-webkit-scrollbar-track {
|
||||||
|
|
@ -599,7 +619,7 @@ audio {
|
||||||
|
|
||||||
#chat-input textarea::-webkit-scrollbar-thumb {
|
#chat-input textarea::-webkit-scrollbar-thumb {
|
||||||
background: var(--neutral-300);
|
background: var(--neutral-300);
|
||||||
border-radius: 30px;
|
border-radius: 9999px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dark #chat-input textarea::-webkit-scrollbar-thumb {
|
.dark #chat-input textarea::-webkit-scrollbar-thumb {
|
||||||
|
|
@ -633,6 +653,10 @@ audio {
|
||||||
background: transparent;
|
background: transparent;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#chat-input .thumbnails {
|
||||||
|
padding-top: 3px;
|
||||||
|
}
|
||||||
|
|
||||||
.chat-input-positioned {
|
.chat-input-positioned {
|
||||||
max-width: 54rem;
|
max-width: 54rem;
|
||||||
left: 50%;
|
left: 50%;
|
||||||
|
|
@ -735,7 +759,30 @@ audio {
|
||||||
|
|
||||||
.hover-element {
|
.hover-element {
|
||||||
position: relative;
|
position: relative;
|
||||||
font-size: 24px;
|
padding-top: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
#hover-element-button {
|
||||||
|
display: flex;
|
||||||
|
align-items: center;
|
||||||
|
justify-content: center;
|
||||||
|
width: 32px;
|
||||||
|
height: 32px;
|
||||||
|
border-radius: 0.5rem;
|
||||||
|
cursor: pointer;
|
||||||
|
color: gray;
|
||||||
|
}
|
||||||
|
|
||||||
|
#hover-element-button:hover {
|
||||||
|
background-color: var(--background-fill-secondary);
|
||||||
|
}
|
||||||
|
|
||||||
|
#hover-element-button svg {
|
||||||
|
color: inherit;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dark #hover-element-button:hover {
|
||||||
|
background-color: var(--selected-item-color-dark);
|
||||||
}
|
}
|
||||||
|
|
||||||
.hover-menu {
|
.hover-menu {
|
||||||
|
|
@ -743,27 +790,40 @@ audio {
|
||||||
position: absolute;
|
position: absolute;
|
||||||
bottom: 100%;
|
bottom: 100%;
|
||||||
left: 0;
|
left: 0;
|
||||||
box-shadow: 0 2px 12px rgb(0 0 0 / 15%);
|
background: white;
|
||||||
border-radius: 0.5rem;
|
border: 1px solid rgba(0, 0, 0, 0.1);
|
||||||
|
box-shadow: 0 4px 16px rgb(0 0 0 / 12%), 0 1px 3px rgb(0 0 0 / 8%);
|
||||||
|
border-radius: 0.75rem;
|
||||||
z-index: 10000;
|
z-index: 10000;
|
||||||
min-width: 330px;
|
min-width: 330px;
|
||||||
flex-direction: column;
|
flex-direction: column;
|
||||||
overflow: hidden;
|
padding: 4px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hover-menu::before {
|
||||||
|
content: '';
|
||||||
|
position: absolute;
|
||||||
|
top: 100%;
|
||||||
|
left: 0;
|
||||||
|
width: 100%;
|
||||||
|
height: 8px;
|
||||||
|
}
|
||||||
|
|
||||||
|
.hover-menu > * {
|
||||||
|
border: none !important;
|
||||||
|
box-shadow: none !important;
|
||||||
}
|
}
|
||||||
|
|
||||||
.hover-menu button {
|
.hover-menu button {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
background: white !important;
|
background: transparent !important;
|
||||||
border-radius: 0 !important;
|
border: none !important;
|
||||||
|
border-radius: 0.5rem !important;
|
||||||
justify-content: space-between;
|
justify-content: space-between;
|
||||||
margin: 0 !important;
|
margin: 0 !important;
|
||||||
height: 36px;
|
height: 36px;
|
||||||
border-color: transparent !important;
|
font-weight: 500;
|
||||||
transition: background-color 0.15s ease;
|
box-shadow: none !important;
|
||||||
}
|
|
||||||
|
|
||||||
.hover-menu button:not(#clear-history-confirm) {
|
|
||||||
border-bottom: 0 !important;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.hover-menu button:hover {
|
.hover-menu button:hover {
|
||||||
|
|
@ -775,19 +835,26 @@ audio {
|
||||||
}
|
}
|
||||||
|
|
||||||
#show-controls {
|
#show-controls {
|
||||||
background-color: white;
|
background-color: transparent;
|
||||||
border-color: transparent !important;
|
border: none !important;
|
||||||
height: 36px;
|
height: 36px;
|
||||||
border-radius: 0;
|
border-radius: 0.5rem;
|
||||||
border-bottom: 0 !important;
|
|
||||||
padding-top: 3px;
|
padding-top: 3px;
|
||||||
padding-left: 4px;
|
padding-left: 4px;
|
||||||
display: flex;
|
display: flex;
|
||||||
font-weight: normal;
|
font-weight: normal;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#show-controls:hover {
|
||||||
|
background-color: #dbeafe;
|
||||||
|
}
|
||||||
|
|
||||||
.dark #show-controls {
|
.dark #show-controls {
|
||||||
background-color: var(--darker-gray);
|
background-color: transparent;
|
||||||
|
}
|
||||||
|
|
||||||
|
.dark #show-controls:hover {
|
||||||
|
background-color: var(--selected-item-color-dark);
|
||||||
}
|
}
|
||||||
|
|
||||||
#show-controls label {
|
#show-controls label {
|
||||||
|
|
@ -797,12 +864,12 @@ audio {
|
||||||
width: 100%;
|
width: 100%;
|
||||||
padding-right: 12px;
|
padding-right: 12px;
|
||||||
gap: 10px;
|
gap: 10px;
|
||||||
font-weight: 600;
|
font-weight: 500;
|
||||||
color: var(--button-secondary-text-color);
|
color: var(--button-secondary-text-color);
|
||||||
}
|
}
|
||||||
|
|
||||||
#show-controls label input {
|
#show-controls label input {
|
||||||
margin-top: 4px;
|
margin-top: 5px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.transparent-substring {
|
.transparent-substring {
|
||||||
|
|
@ -842,7 +909,7 @@ audio {
|
||||||
}
|
}
|
||||||
|
|
||||||
#chat-input-row {
|
#chat-input-row {
|
||||||
padding: 1rem;
|
padding: 0.5rem 1rem 1rem;
|
||||||
}
|
}
|
||||||
|
|
||||||
#chat-col {
|
#chat-col {
|
||||||
|
|
@ -1208,9 +1275,14 @@ audio {
|
||||||
color: #9ca3af;
|
color: #9ca3af;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.dark .hover-menu {
|
||||||
|
background: var(--darker-gray);
|
||||||
|
border-color: transparent;
|
||||||
|
box-shadow: 0 4px 16px rgb(0 0 0 / 40%);
|
||||||
|
}
|
||||||
|
|
||||||
.dark .hover-menu button {
|
.dark .hover-menu button {
|
||||||
border-color: var(--border-color-primary);
|
background-color: transparent !important;
|
||||||
background-color: var(--darker-gray) !important;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.dark #chat-controls,
|
.dark #chat-controls,
|
||||||
|
|
@ -1372,8 +1444,7 @@ audio {
|
||||||
}
|
}
|
||||||
|
|
||||||
.footer-button svg {
|
.footer-button svg {
|
||||||
stroke: rgb(156 163 175);
|
stroke: rgb(140 140 148);
|
||||||
transition: stroke 0.2s;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
.footer-button:hover svg {
|
.footer-button:hover svg {
|
||||||
|
|
@ -1388,12 +1459,12 @@ audio {
|
||||||
stroke: rgb(209 213 219);
|
stroke: rgb(209 213 219);
|
||||||
}
|
}
|
||||||
|
|
||||||
.tgw-accordion {
|
.block:has(> .label-wrap) {
|
||||||
padding: 10px 12px !important;
|
padding: 10px 12px !important;
|
||||||
border: 1px solid #d2d2d8;
|
border: 1px solid #d2d2d8;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dark .tgw-accordion {
|
.dark .block:has(> .label-wrap) {
|
||||||
border: 1px solid var(--border-color-dark);
|
border: 1px solid var(--border-color-dark);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -1903,14 +1974,24 @@ table, tr, td, th, thead {
|
||||||
border: 0;
|
border: 0;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.prose hr {
|
||||||
|
border-color: var(--border-color-primary);
|
||||||
|
}
|
||||||
|
|
||||||
td + td,
|
td + td,
|
||||||
th + th { border-left: 1px solid; }
|
th + th {
|
||||||
|
border-left: 1px solid var(--border-color-primary) !important;
|
||||||
|
}
|
||||||
|
|
||||||
tr + tr td,
|
tr + tr td,
|
||||||
tr + tr th { border-top: 1px solid; }
|
tr + tr th {
|
||||||
|
border-top: 1px solid var(--border-color-primary) !important;
|
||||||
|
}
|
||||||
|
|
||||||
thead + tbody tr:first-child td,
|
thead + tbody tr:first-child td,
|
||||||
thead + tbody tr:first-child th { border-top: 1px solid; }
|
thead + tbody tr:first-child th {
|
||||||
|
border-top: 1px solid var(--border-color-primary) !important;
|
||||||
|
}
|
||||||
|
|
||||||
/* ------------------------------------------------
|
/* ------------------------------------------------
|
||||||
Tools CheckboxGroup - vertical DragDrop-like style
|
Tools CheckboxGroup - vertical DragDrop-like style
|
||||||
|
|
@ -1942,8 +2023,8 @@ thead + tbody tr:first-child th { border-top: 1px solid; }
|
||||||
|
|
||||||
/* Pretty scrollbar for the tools list */
|
/* Pretty scrollbar for the tools list */
|
||||||
#tools-group .wrap::-webkit-scrollbar {
|
#tools-group .wrap::-webkit-scrollbar {
|
||||||
width: 8px;
|
width: 7px;
|
||||||
height: 8px;
|
height: 7px;
|
||||||
}
|
}
|
||||||
|
|
||||||
#tools-group .wrap::-webkit-scrollbar-track {
|
#tools-group .wrap::-webkit-scrollbar-track {
|
||||||
|
|
@ -1953,13 +2034,13 @@ thead + tbody tr:first-child th { border-top: 1px solid; }
|
||||||
#tools-group .wrap::-webkit-scrollbar-thumb,
|
#tools-group .wrap::-webkit-scrollbar-thumb,
|
||||||
#tools-group .wrap::-webkit-scrollbar-thumb:hover {
|
#tools-group .wrap::-webkit-scrollbar-thumb:hover {
|
||||||
background: var(--neutral-300);
|
background: var(--neutral-300);
|
||||||
border-radius: 30px;
|
border-radius: 9999px;
|
||||||
}
|
}
|
||||||
|
|
||||||
.dark #tools-group .wrap::-webkit-scrollbar-thumb,
|
.dark #tools-group .wrap::-webkit-scrollbar-thumb,
|
||||||
.dark #tools-group .wrap::-webkit-scrollbar-thumb:hover {
|
.dark #tools-group .wrap::-webkit-scrollbar-thumb:hover {
|
||||||
background: rgb(255 255 255 / 6.25%);
|
background: rgb(255 255 255 / 6.25%);
|
||||||
border-radius: 30px;
|
border-radius: 9999px;
|
||||||
}
|
}
|
||||||
|
|
||||||
#tools-group .wrap::-webkit-scrollbar-corner {
|
#tools-group .wrap::-webkit-scrollbar-corner {
|
||||||
|
|
|
||||||
|
|
@ -112,7 +112,7 @@ Used for talking to an instruction-following model using the prompt format defin
|
||||||
|
|
||||||
The prompt format is defined by the **Instruction template** parameter in "Parameters" > "Instruction template", which represents a Jinja2 template.
|
The prompt format is defined by the **Instruction template** parameter in "Parameters" > "Instruction template", which represents a Jinja2 template.
|
||||||
|
|
||||||
Note that when you load a model in the "Model" tab, the web UI will try to automatically detect its instruction template (if any), and will update the values under "Parameters" > "Instruction template" accordingly. This is done using a set of regular expressions defined in `user_data/models/config.yaml`. This detection is not guaranteed to be accurate. You should check the model card on Hugging Face to see if you are using the correct prompt format.
|
Note that when you load a model in the "Model" tab, the web UI will try to automatically detect its instruction template (if any) from the model metadata (e.g. `tokenizer_config.json` or GGUF metadata), and will update the values under "Parameters" > "Instruction template" accordingly. You should check the model card on Hugging Face to see if you are using the correct prompt format.
|
||||||
|
|
||||||
### Chat-instruct
|
### Chat-instruct
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -39,7 +39,7 @@ curl http://127.0.0.1:5000/v1/completions \
|
||||||
|
|
||||||
#### Chat completions
|
#### Chat completions
|
||||||
|
|
||||||
Works best with instruction-following models. If the "instruction_template" variable is not provided, it will be guessed automatically based on the model name using the regex patterns in `user_data/models/config.yaml`.
|
Works best with instruction-following models. If the "instruction_template" variable is not provided, it will be detected automatically from the model metadata.
|
||||||
|
|
||||||
```shell
|
```shell
|
||||||
curl http://127.0.0.1:5000/v1/chat/completions \
|
curl http://127.0.0.1:5000/v1/chat/completions \
|
||||||
|
|
@ -232,6 +232,17 @@ curl -k http://127.0.0.1:5000/v1/internal/model/load \
|
||||||
}'
|
}'
|
||||||
```
|
```
|
||||||
|
|
||||||
|
You can also set a default instruction template for all subsequent API requests by passing `instruction_template` (a template name from `user_data/instruction-templates/`) or `instruction_template_str` (a raw Jinja2 string):
|
||||||
|
|
||||||
|
```shell
|
||||||
|
curl -k http://127.0.0.1:5000/v1/internal/model/load \
|
||||||
|
-H "Content-Type: application/json" \
|
||||||
|
-d '{
|
||||||
|
"model_name": "Qwen_Qwen3-0.6B-Q4_K_M.gguf",
|
||||||
|
"instruction_template": "Alpaca"
|
||||||
|
}'
|
||||||
|
```
|
||||||
|
|
||||||
#### Python chat example
|
#### Python chat example
|
||||||
|
|
||||||
```python
|
```python
|
||||||
|
|
|
||||||
|
|
@ -80,6 +80,19 @@ def execute(arguments):
|
||||||
|
|
||||||
You can open the built-in tools in `user_data/tools/` for more examples.
|
You can open the built-in tools in `user_data/tools/` for more examples.
|
||||||
|
|
||||||
|
## MCP servers
|
||||||
|
|
||||||
|
You can connect to remote [MCP (Model Context Protocol)](https://modelcontextprotocol.io/) servers to use their tools alongside local ones.
|
||||||
|
|
||||||
|
In the chat sidebar, open the **MCP servers** accordion and enter one server URL per line. For servers that require authentication, append headers after the URL separated by commas:
|
||||||
|
|
||||||
|
```
|
||||||
|
https://example.com/mcp
|
||||||
|
https://other.com/mcp,Authorization: Bearer sk-xxx
|
||||||
|
```
|
||||||
|
|
||||||
|
All tools from the configured servers are automatically discovered and made available to the model during generation. If an MCP tool has the same name as a selected local tool, the local tool takes priority.
|
||||||
|
|
||||||
## Tool calling over the API
|
## Tool calling over the API
|
||||||
|
|
||||||
Tool calling over the API follows the [OpenAI API](https://platform.openai.com/docs/guides/function-calling) convention. Define your tools, send them with your messages, and handle tool calls in a loop until the model gives a final answer.
|
Tool calling over the API follows the [OpenAI API](https://platform.openai.com/docs/guides/function-calling) convention. Define your tools, send them with your messages, and handle tool calls in a loop until the model gives a final answer.
|
||||||
|
|
|
||||||
|
|
@ -158,28 +158,21 @@ class ModelDownloader:
|
||||||
# Also if GGUF and safetensors are available, download only safetensors
|
# Also if GGUF and safetensors are available, download only safetensors
|
||||||
if (has_pytorch or has_pt or has_gguf) and has_safetensors:
|
if (has_pytorch or has_pt or has_gguf) and has_safetensors:
|
||||||
has_gguf = False
|
has_gguf = False
|
||||||
for i in range(len(classifications) - 1, -1, -1):
|
keep = [i for i, c in enumerate(classifications) if c not in ['pytorch', 'pt', 'gguf']]
|
||||||
if classifications[i] in ['pytorch', 'pt', 'gguf']:
|
links = [links[i] for i in keep]
|
||||||
links.pop(i)
|
file_sizes = [file_sizes[i] for i in keep]
|
||||||
file_sizes.pop(i)
|
|
||||||
|
|
||||||
# For GGUF, try to download only the Q4_K_M if no specific file is specified.
|
# For GGUF, try to download only the Q4_K_M if no specific file is specified.
|
||||||
if has_gguf and specific_file is None:
|
if has_gguf and specific_file is None:
|
||||||
has_q4km = False
|
has_q4km = any('q4_k_m' in link.lower() for link in links)
|
||||||
for i in range(len(classifications) - 1, -1, -1):
|
|
||||||
if 'q4_k_m' in links[i].lower():
|
|
||||||
has_q4km = True
|
|
||||||
|
|
||||||
if has_q4km:
|
if has_q4km:
|
||||||
for i in range(len(classifications) - 1, -1, -1):
|
keep = [i for i, link in enumerate(links) if 'q4_k_m' in link.lower()]
|
||||||
if 'q4_k_m' not in links[i].lower():
|
|
||||||
links.pop(i)
|
|
||||||
file_sizes.pop(i)
|
|
||||||
else:
|
else:
|
||||||
for i in range(len(classifications) - 1, -1, -1):
|
keep = [i for i, link in enumerate(links) if not link.lower().endswith('.gguf')]
|
||||||
if links[i].lower().endswith('.gguf'):
|
|
||||||
links.pop(i)
|
links = [links[i] for i in keep]
|
||||||
file_sizes.pop(i)
|
file_sizes = [file_sizes[i] for i in keep]
|
||||||
|
|
||||||
is_llamacpp = has_gguf and specific_file is not None
|
is_llamacpp = has_gguf and specific_file is not None
|
||||||
return links, sha256, is_lora, is_llamacpp, file_sizes
|
return links, sha256, is_lora, is_llamacpp, file_sizes
|
||||||
|
|
|
||||||
|
|
@ -2,8 +2,11 @@ import concurrent.futures
|
||||||
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
|
from modules.web_search import _validate_url
|
||||||
|
|
||||||
|
|
||||||
def download_single(url):
|
def download_single(url):
|
||||||
|
_validate_url(url)
|
||||||
headers = {
|
headers = {
|
||||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -5,12 +5,14 @@ import requests
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
import extensions.superboogav2.parameters as parameters
|
import extensions.superboogav2.parameters as parameters
|
||||||
|
from modules.web_search import _validate_url
|
||||||
|
|
||||||
from .data_processor import process_and_add_to_collector
|
from .data_processor import process_and_add_to_collector
|
||||||
from .utils import create_metadata_source
|
from .utils import create_metadata_source
|
||||||
|
|
||||||
|
|
||||||
def _download_single(url):
|
def _download_single(url):
|
||||||
|
_validate_url(url)
|
||||||
response = requests.get(url, timeout=5)
|
response = requests.get(url, timeout=5)
|
||||||
if response.status_code == 200:
|
if response.status_code == 200:
|
||||||
return response.content
|
return response.content
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,6 @@
|
||||||
function toggleDarkMode() {
|
function toggleDarkMode() {
|
||||||
document.body.classList.toggle("dark");
|
document.body.classList.toggle("dark");
|
||||||
var currentCSS = document.getElementById("highlight-css");
|
const currentCSS = document.getElementById("highlight-css");
|
||||||
if (currentCSS.getAttribute("href") === "file/css/highlightjs/github-dark.min.css") {
|
if (currentCSS.getAttribute("href") === "file/css/highlightjs/github-dark.min.css") {
|
||||||
currentCSS.setAttribute("href", "file/css/highlightjs/github.min.css");
|
currentCSS.setAttribute("href", "file/css/highlightjs/github.min.css");
|
||||||
} else {
|
} else {
|
||||||
|
|
@ -9,12 +9,10 @@ function toggleDarkMode() {
|
||||||
|
|
||||||
// Re-highlight all code blocks once stylesheet loads
|
// Re-highlight all code blocks once stylesheet loads
|
||||||
currentCSS.onload = function() {
|
currentCSS.onload = function() {
|
||||||
const messageBodies = document.getElementById("chat").querySelectorAll(".message-body");
|
// Clear data-highlighted so hljs will re-process with the new theme
|
||||||
messageBodies.forEach((messageBody) => {
|
document.querySelectorAll("#chat .message-body pre code[data-highlighted]").forEach((codeBlock) => {
|
||||||
const codeBlocks = messageBody.querySelectorAll("pre code");
|
delete codeBlock.dataset.highlighted;
|
||||||
codeBlocks.forEach((codeBlock) => {
|
|
||||||
hljs.highlightElement(codeBlock);
|
|
||||||
});
|
|
||||||
});
|
});
|
||||||
|
doSyntaxHighlighting();
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,11 +1,35 @@
|
||||||
|
// -------------------------------------------------
|
||||||
|
// Shared helpers
|
||||||
|
// -------------------------------------------------
|
||||||
|
|
||||||
|
function getProfilePictureUrl() {
|
||||||
|
return "/file/user_data/cache/pfp_character.png?time=" + Date.now();
|
||||||
|
}
|
||||||
|
|
||||||
|
const MESSAGE_SELECTOR = ".message, .user-message, .assistant-message";
|
||||||
|
|
||||||
|
function getMessageElement(element) {
|
||||||
|
if (!element) return null;
|
||||||
|
return element.closest(MESSAGE_SELECTOR);
|
||||||
|
}
|
||||||
|
|
||||||
|
function isUserRole(messageElement) {
|
||||||
|
return messageElement.classList.contains("user-message") ||
|
||||||
|
messageElement.querySelector(".text-you") !== null ||
|
||||||
|
messageElement.querySelector(".circle-you") !== null;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Trigger a synthetic 'input' event so Gradio picks up programmatic value changes
|
||||||
|
function dispatchGradioInput(element) {
|
||||||
|
element.dispatchEvent(new Event("input", { bubbles: true }));
|
||||||
|
}
|
||||||
|
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
// Event handlers
|
// Event handlers
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
|
|
||||||
function copyToClipboard(element) {
|
function copyToClipboard(element) {
|
||||||
if (!element) return;
|
const messageElement = getMessageElement(element);
|
||||||
|
|
||||||
const messageElement = element.closest(".message, .user-message, .assistant-message");
|
|
||||||
if (!messageElement) return;
|
if (!messageElement) return;
|
||||||
|
|
||||||
const rawText = messageElement.getAttribute("data-raw");
|
const rawText = messageElement.getAttribute("data-raw");
|
||||||
|
|
@ -48,9 +72,7 @@ function fallbackCopyToClipboard(text) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function branchHere(element) {
|
function branchHere(element) {
|
||||||
if (!element) return;
|
const messageElement = getMessageElement(element);
|
||||||
|
|
||||||
const messageElement = element.closest(".message, .user-message, .assistant-message");
|
|
||||||
if (!messageElement) return;
|
if (!messageElement) return;
|
||||||
|
|
||||||
const index = messageElement.getAttribute("data-index");
|
const index = messageElement.getAttribute("data-index");
|
||||||
|
|
@ -69,11 +91,7 @@ function branchHere(element) {
|
||||||
}
|
}
|
||||||
|
|
||||||
branchIndexInput.value = index;
|
branchIndexInput.value = index;
|
||||||
|
dispatchGradioInput(branchIndexInput);
|
||||||
// Trigger any 'change' or 'input' events Gradio might be listening for
|
|
||||||
const event = new Event("input", { bubbles: true });
|
|
||||||
branchIndexInput.dispatchEvent(event);
|
|
||||||
|
|
||||||
branchButton.click();
|
branchButton.click();
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -82,9 +100,7 @@ function branchHere(element) {
|
||||||
// -------------------------------------------------
|
// -------------------------------------------------
|
||||||
|
|
||||||
function editHere(buttonElement) {
|
function editHere(buttonElement) {
|
||||||
if (!buttonElement) return;
|
const messageElement = getMessageElement(buttonElement);
|
||||||
|
|
||||||
const messageElement = buttonElement.closest(".message, .user-message, .assistant-message");
|
|
||||||
if (!messageElement) return;
|
if (!messageElement) return;
|
||||||
|
|
||||||
const messageBody = messageElement.querySelector(".message-body");
|
const messageBody = messageElement.querySelector(".message-body");
|
||||||
|
|
@ -97,12 +113,7 @@ function editHere(buttonElement) {
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Determine role based on message element - handle different chat modes
|
startEditing(messageElement, messageBody, isUserRole(messageElement));
|
||||||
const isUserMessage = messageElement.classList.contains("user-message") ||
|
|
||||||
messageElement.querySelector(".text-you") !== null ||
|
|
||||||
messageElement.querySelector(".circle-you") !== null;
|
|
||||||
|
|
||||||
startEditing(messageElement, messageBody, isUserMessage);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function startEditing(messageElement, messageBody, isUserMessage) {
|
function startEditing(messageElement, messageBody, isUserMessage) {
|
||||||
|
|
@ -209,30 +220,22 @@ function submitMessageEdit(index, newText, isUserMessage) {
|
||||||
editTextInput.value = newText;
|
editTextInput.value = newText;
|
||||||
editRoleInput.value = isUserMessage ? "user" : "assistant";
|
editRoleInput.value = isUserMessage ? "user" : "assistant";
|
||||||
|
|
||||||
editIndexInput.dispatchEvent(new Event("input", { bubbles: true }));
|
dispatchGradioInput(editIndexInput);
|
||||||
editTextInput.dispatchEvent(new Event("input", { bubbles: true }));
|
dispatchGradioInput(editTextInput);
|
||||||
editRoleInput.dispatchEvent(new Event("input", { bubbles: true }));
|
dispatchGradioInput(editRoleInput);
|
||||||
|
|
||||||
editButton.click();
|
editButton.click();
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
function navigateVersion(element, direction) {
|
function navigateVersion(element, direction) {
|
||||||
if (!element) return;
|
const messageElement = getMessageElement(element);
|
||||||
|
|
||||||
const messageElement = element.closest(".message, .user-message, .assistant-message");
|
|
||||||
if (!messageElement) return;
|
if (!messageElement) return;
|
||||||
|
|
||||||
const index = messageElement.getAttribute("data-index");
|
const index = messageElement.getAttribute("data-index");
|
||||||
if (!index) return;
|
if (!index) return;
|
||||||
|
|
||||||
// Determine role based on message element classes
|
const role = isUserRole(messageElement) ? "user" : "assistant";
|
||||||
let role = "assistant"; // Default role
|
|
||||||
if (messageElement.classList.contains("user-message") ||
|
|
||||||
messageElement.querySelector(".text-you") ||
|
|
||||||
messageElement.querySelector(".circle-you")) {
|
|
||||||
role = "user";
|
|
||||||
}
|
|
||||||
|
|
||||||
const indexInput = document.getElementById("Navigate-message-index")?.querySelector("input");
|
const indexInput = document.getElementById("Navigate-message-index")?.querySelector("input");
|
||||||
const directionInput = document.getElementById("Navigate-direction")?.querySelector("textarea");
|
const directionInput = document.getElementById("Navigate-direction")?.querySelector("textarea");
|
||||||
|
|
@ -248,11 +251,9 @@ function navigateVersion(element, direction) {
|
||||||
directionInput.value = direction;
|
directionInput.value = direction;
|
||||||
roleInput.value = role;
|
roleInput.value = role;
|
||||||
|
|
||||||
// Trigger 'input' events for Gradio to pick up changes
|
dispatchGradioInput(indexInput);
|
||||||
const event = new Event("input", { bubbles: true });
|
dispatchGradioInput(directionInput);
|
||||||
indexInput.dispatchEvent(event);
|
dispatchGradioInput(roleInput);
|
||||||
directionInput.dispatchEvent(event);
|
|
||||||
roleInput.dispatchEvent(event);
|
|
||||||
|
|
||||||
navigateButton.click();
|
navigateButton.click();
|
||||||
}
|
}
|
||||||
|
|
@ -313,7 +314,7 @@ function handleMorphdomUpdate(data) {
|
||||||
|
|
||||||
function applyMorphdomUpdate(data) {
|
function applyMorphdomUpdate(data) {
|
||||||
// Determine target element and use it as query scope
|
// Determine target element and use it as query scope
|
||||||
var target_element, target_html;
|
let target_element, target_html;
|
||||||
if (data.last_message_only) {
|
if (data.last_message_only) {
|
||||||
const childNodes = document.getElementsByClassName("messages")[0].childNodes;
|
const childNodes = document.getElementsByClassName("messages")[0].childNodes;
|
||||||
target_element = childNodes[childNodes.length - 1];
|
target_element = childNodes[childNodes.length - 1];
|
||||||
|
|
|
||||||
201
js/main.js
201
js/main.js
|
|
@ -4,8 +4,9 @@
|
||||||
|
|
||||||
// Sync highlight.js theme with the actual Gradio theme
|
// Sync highlight.js theme with the actual Gradio theme
|
||||||
var defined_hljs_css = document.body.classList.contains("dark") ? "file/css/highlightjs/github-dark.min.css" : "file/css/highlightjs/github.min.css";
|
var defined_hljs_css = document.body.classList.contains("dark") ? "file/css/highlightjs/github-dark.min.css" : "file/css/highlightjs/github.min.css";
|
||||||
if (document.getElementById("highlight-css").getAttribute("href") !== defined_hljs_css) {
|
var hljsCssElement = document.getElementById("highlight-css");
|
||||||
document.getElementById("highlight-css").setAttribute("href", defined_hljs_css);
|
if (hljsCssElement.getAttribute("href") !== defined_hljs_css) {
|
||||||
|
hljsCssElement.setAttribute("href", defined_hljs_css);
|
||||||
}
|
}
|
||||||
|
|
||||||
let main_parent = document.getElementById("chat-tab").parentNode;
|
let main_parent = document.getElementById("chat-tab").parentNode;
|
||||||
|
|
@ -49,21 +50,18 @@ document.querySelector(".header_bar").addEventListener("click", function(event)
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
|
|
||||||
// --- Helper functions --- //
|
// --- Helper functions --- //
|
||||||
function isModifiedKeyboardEvent() {
|
function isModifiedKeyboardEvent(event) {
|
||||||
return (event instanceof KeyboardEvent &&
|
return event instanceof KeyboardEvent &&
|
||||||
event.shiftKey ||
|
(event.shiftKey || event.ctrlKey || event.altKey || event.metaKey);
|
||||||
event.ctrlKey ||
|
|
||||||
event.altKey ||
|
|
||||||
event.metaKey);
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function isFocusedOnEditableTextbox() {
|
function isFocusedOnEditableTextbox(event) {
|
||||||
if (event.target.tagName === "INPUT" || event.target.tagName === "TEXTAREA") {
|
if (event.target.tagName === "INPUT" || event.target.tagName === "TEXTAREA") {
|
||||||
return !!event.target.value;
|
return !!event.target.value;
|
||||||
}
|
}
|
||||||
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
let previousTabId = "chat-tab-button";
|
|
||||||
document.addEventListener("keydown", function(event) {
|
document.addEventListener("keydown", function(event) {
|
||||||
// Stop generation on Esc pressed
|
// Stop generation on Esc pressed
|
||||||
if (event.key === "Escape") {
|
if (event.key === "Escape") {
|
||||||
|
|
@ -117,14 +115,14 @@ document.addEventListener("keydown", function(event) {
|
||||||
}
|
}
|
||||||
|
|
||||||
// --- Simple version navigation --- //
|
// --- Simple version navigation --- //
|
||||||
if (!isFocusedOnEditableTextbox()) {
|
if (!isFocusedOnEditableTextbox(event)) {
|
||||||
// Version navigation on Arrow keys (horizontal)
|
// Version navigation on Arrow keys (horizontal)
|
||||||
if (!isModifiedKeyboardEvent() && event.key === "ArrowLeft") {
|
if (!isModifiedKeyboardEvent(event) && event.key === "ArrowLeft") {
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
navigateLastAssistantMessage("left");
|
navigateLastAssistantMessage("left");
|
||||||
}
|
}
|
||||||
|
|
||||||
else if (!isModifiedKeyboardEvent() && event.key === "ArrowRight") {
|
else if (!isModifiedKeyboardEvent(event) && event.key === "ArrowRight") {
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
if (!navigateLastAssistantMessage("right")) {
|
if (!navigateLastAssistantMessage("right")) {
|
||||||
// If can't navigate right (last version), regenerate
|
// If can't navigate right (last version), regenerate
|
||||||
|
|
@ -159,9 +157,8 @@ targetElement.addEventListener("scroll", function() {
|
||||||
let diff = targetElement.scrollHeight - targetElement.clientHeight;
|
let diff = targetElement.scrollHeight - targetElement.clientHeight;
|
||||||
let isAtBottomNow = Math.abs(targetElement.scrollTop - diff) <= 10 || diff <= 0;
|
let isAtBottomNow = Math.abs(targetElement.scrollTop - diff) <= 10 || diff <= 0;
|
||||||
|
|
||||||
// Add scrolling class to disable hover effects
|
|
||||||
if (window.isScrolled || !isAtBottomNow) {
|
if (window.isScrolled || !isAtBottomNow) {
|
||||||
targetElement.classList.add("scrolling");
|
targetElement.classList.add("scrolling"); // Disables hover effects during scroll
|
||||||
}
|
}
|
||||||
|
|
||||||
if(isAtBottomNow) {
|
if(isAtBottomNow) {
|
||||||
|
|
@ -202,12 +199,8 @@ const observer = new MutationObserver(function() {
|
||||||
});
|
});
|
||||||
|
|
||||||
// Only watch for attribute changes on targetElement (e.g. _generating class)
|
// Only watch for attribute changes on targetElement (e.g. _generating class)
|
||||||
const config = {
|
|
||||||
attributes: true
|
|
||||||
};
|
|
||||||
|
|
||||||
// Start observing the target element
|
// Start observing the target element
|
||||||
observer.observe(targetElement, config);
|
observer.observe(targetElement, { attributes: true });
|
||||||
|
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
// Handle syntax highlighting / LaTeX
|
// Handle syntax highlighting / LaTeX
|
||||||
|
|
@ -228,7 +221,7 @@ window.doSyntaxHighlighting = function() {
|
||||||
if (messageBodies.length > 0) {
|
if (messageBodies.length > 0) {
|
||||||
let hasSeenVisible = false;
|
let hasSeenVisible = false;
|
||||||
|
|
||||||
// Go from last message to first
|
// Go from last message to first so we can early-exit once past visible area
|
||||||
for (let i = messageBodies.length - 1; i >= 0; i--) {
|
for (let i = messageBodies.length - 1; i >= 0; i--) {
|
||||||
const messageBody = messageBodies[i];
|
const messageBody = messageBodies[i];
|
||||||
|
|
||||||
|
|
@ -243,8 +236,8 @@ window.doSyntaxHighlighting = function() {
|
||||||
codeBlock.classList.add("pretty_scrollbar");
|
codeBlock.classList.add("pretty_scrollbar");
|
||||||
});
|
});
|
||||||
|
|
||||||
// Only render math in visible elements
|
|
||||||
const mathContainers = messageBody.querySelectorAll("p, span, li, td, th, h1, h2, h3, h4, h5, h6, blockquote, figcaption, caption, dd, dt");
|
const mathContainers = messageBody.querySelectorAll("p, span, li, td, th, h1, h2, h3, h4, h5, h6, blockquote, figcaption, caption, dd, dt");
|
||||||
|
// Only render math in individually visible containers (the outer check is on the message body)
|
||||||
mathContainers.forEach(container => {
|
mathContainers.forEach(container => {
|
||||||
if (isElementVisibleOnScreen(container)) {
|
if (isElementVisibleOnScreen(container)) {
|
||||||
renderMathInElement(container, {
|
renderMathInElement(container, {
|
||||||
|
|
@ -271,7 +264,7 @@ const doSyntaxHighlighting = window.doSyntaxHighlighting;
|
||||||
// Add some scrollbars
|
// Add some scrollbars
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
const scrollbarElements = document.querySelectorAll(".add_scrollbar textarea, .add_scrollbar .drag-drop-list");
|
const scrollbarElements = document.querySelectorAll(".add_scrollbar textarea, .add_scrollbar .drag-drop-list");
|
||||||
for(i = 0; i < scrollbarElements.length; i++) {
|
for(let i = 0; i < scrollbarElements.length; i++) {
|
||||||
scrollbarElements[i].classList.remove("scroll-hide");
|
scrollbarElements[i].classList.remove("scroll-hide");
|
||||||
scrollbarElements[i].classList.add("pretty_scrollbar");
|
scrollbarElements[i].classList.add("pretty_scrollbar");
|
||||||
scrollbarElements[i].style.resize = "none";
|
scrollbarElements[i].style.resize = "none";
|
||||||
|
|
@ -298,13 +291,13 @@ if (toolsInfo) {
|
||||||
// Remove some backgrounds
|
// Remove some backgrounds
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
const noBackgroundelements = document.querySelectorAll(".no-background");
|
const noBackgroundelements = document.querySelectorAll(".no-background");
|
||||||
for(i = 0; i < noBackgroundelements.length; i++) {
|
for(let i = 0; i < noBackgroundelements.length; i++) {
|
||||||
noBackgroundelements[i].parentNode.style.border = "none";
|
noBackgroundelements[i].parentNode.style.border = "none";
|
||||||
noBackgroundelements[i].parentNode.parentNode.parentNode.style.alignItems = "center";
|
noBackgroundelements[i].parentNode.parentNode.parentNode.style.alignItems = "center";
|
||||||
}
|
}
|
||||||
|
|
||||||
const slimDropdownElements = document.querySelectorAll(".slim-dropdown");
|
const slimDropdownElements = document.querySelectorAll(".slim-dropdown");
|
||||||
for (i = 0; i < slimDropdownElements.length; i++) {
|
for (let i = 0; i < slimDropdownElements.length; i++) {
|
||||||
const parentNode = slimDropdownElements[i].parentNode;
|
const parentNode = slimDropdownElements[i].parentNode;
|
||||||
parentNode.style.background = "transparent";
|
parentNode.style.background = "transparent";
|
||||||
parentNode.style.border = "0";
|
parentNode.style.border = "0";
|
||||||
|
|
@ -316,18 +309,19 @@ for (i = 0; i < slimDropdownElements.length; i++) {
|
||||||
// https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js
|
// https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button, #chat-tab #chat-buttons #show-controls");
|
var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button, #chat-tab #chat-buttons #show-controls");
|
||||||
|
var hoverContainer = document.getElementById("gr-hover-container");
|
||||||
var button = document.getElementById("hover-element-button");
|
var button = document.getElementById("hover-element-button");
|
||||||
var menu = document.getElementById("hover-menu");
|
var menu = document.getElementById("hover-menu");
|
||||||
var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement;
|
var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement;
|
||||||
|
|
||||||
function showMenu() {
|
function showMenu() {
|
||||||
menu.style.display = "flex"; // Show the menu
|
menu.style.display = "flex";
|
||||||
}
|
}
|
||||||
|
|
||||||
function hideMenu() {
|
function hideMenu() {
|
||||||
menu.style.display = "none"; // Hide the menu
|
menu.style.display = "none";
|
||||||
if (!istouchscreen) {
|
if (!istouchscreen) {
|
||||||
document.querySelector("#chat-input textarea").focus(); // Focus on the chat input
|
document.querySelector("#chat-input textarea").focus();
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -336,7 +330,6 @@ if (buttonsInChat.length > 0) {
|
||||||
const thisButton = buttonsInChat[i];
|
const thisButton = buttonsInChat[i];
|
||||||
menu.appendChild(thisButton);
|
menu.appendChild(thisButton);
|
||||||
|
|
||||||
// Only apply transformations to button elements
|
|
||||||
if (thisButton.tagName.toLowerCase() === "button") {
|
if (thisButton.tagName.toLowerCase() === "button") {
|
||||||
thisButton.addEventListener("click", () => {
|
thisButton.addEventListener("click", () => {
|
||||||
hideMenu();
|
hideMenu();
|
||||||
|
|
@ -346,7 +339,6 @@ if (buttonsInChat.length > 0) {
|
||||||
const matches = buttonText.match(/(\(.*?\))/);
|
const matches = buttonText.match(/(\(.*?\))/);
|
||||||
|
|
||||||
if (matches && matches.length > 1) {
|
if (matches && matches.length > 1) {
|
||||||
// Apply the transparent-substring class to the matched substring
|
|
||||||
const substring = matches[1];
|
const substring = matches[1];
|
||||||
const newText = buttonText.replace(substring, ` <span class="transparent-substring">${substring.slice(1, -1)}</span>`);
|
const newText = buttonText.replace(substring, ` <span class="transparent-substring">${substring.slice(1, -1)}</span>`);
|
||||||
thisButton.innerHTML = newText;
|
thisButton.innerHTML = newText;
|
||||||
|
|
@ -355,16 +347,19 @@ if (buttonsInChat.length > 0) {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function isMouseOverButtonOrMenu() {
|
var menuInteracting = false;
|
||||||
return menu.matches(":hover") || button.matches(":hover");
|
|
||||||
}
|
|
||||||
|
|
||||||
button.addEventListener("mouseenter", function () {
|
hoverContainer.addEventListener("mouseenter", function () {
|
||||||
if (!istouchscreen) {
|
if (!istouchscreen) {
|
||||||
showMenu();
|
showMenu();
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
|
hoverContainer.addEventListener("mousedown", function () {
|
||||||
|
menuInteracting = true;
|
||||||
|
setTimeout(function () { menuInteracting = false; }, 300);
|
||||||
|
});
|
||||||
|
|
||||||
button.addEventListener("click", function () {
|
button.addEventListener("click", function () {
|
||||||
if (menu.style.display === "flex") {
|
if (menu.style.display === "flex") {
|
||||||
hideMenu();
|
hideMenu();
|
||||||
|
|
@ -374,49 +369,39 @@ button.addEventListener("click", function () {
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
|
|
||||||
// Add event listener for mouseleave on the button
|
hoverContainer.addEventListener("mouseleave", function () {
|
||||||
button.addEventListener("mouseleave", function () {
|
if (!istouchscreen) {
|
||||||
// Delay to prevent menu hiding when the mouse leaves the button into the menu
|
setTimeout(function () {
|
||||||
setTimeout(function () {
|
if (!hoverContainer.matches(":hover") && !menu.matches(":hover")) {
|
||||||
if (!isMouseOverButtonOrMenu()) {
|
hideMenu();
|
||||||
hideMenu();
|
}
|
||||||
}
|
}, 50);
|
||||||
}, 100);
|
}
|
||||||
});
|
|
||||||
|
|
||||||
// Add event listener for mouseleave on the menu
|
|
||||||
menu.addEventListener("mouseleave", function () {
|
|
||||||
// Delay to prevent menu hide when the mouse leaves the menu into the button
|
|
||||||
setTimeout(function () {
|
|
||||||
if (!isMouseOverButtonOrMenu()) {
|
|
||||||
hideMenu();
|
|
||||||
}
|
|
||||||
}, 100);
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Add event listener for click anywhere in the document
|
// Add event listener for click anywhere in the document
|
||||||
document.addEventListener("click", function (event) {
|
document.addEventListener("click", function (event) {
|
||||||
const target = event.target;
|
|
||||||
|
|
||||||
// Check if the click is outside the button/menu and the menu is visible
|
// Check if the click is outside the button/menu and the menu is visible
|
||||||
if (!isMouseOverButtonOrMenu() && menu.style.display === "flex") {
|
if (!menuInteracting && !event.target.closest("#gr-hover-container") && menu.style.display === "flex") {
|
||||||
hideMenu();
|
hideMenu();
|
||||||
}
|
}
|
||||||
|
|
||||||
if (event.target.classList.contains("pfp_character")) {
|
const target = event.target;
|
||||||
|
|
||||||
|
if (target.classList.contains("pfp_character")) {
|
||||||
toggleBigPicture();
|
toggleBigPicture();
|
||||||
}
|
}
|
||||||
|
|
||||||
// Handle sidebar clicks on mobile
|
// Handle sidebar clicks on mobile
|
||||||
if (isMobile()) {
|
if (isMobile()) {
|
||||||
// Check if the click did NOT originate from any of the specified toggle buttons or elements
|
// Check if the click did NOT originate from any of the specified toggle buttons or elements
|
||||||
if (
|
if (
|
||||||
target.closest("#navigation-toggle") !== navigationToggle &&
|
target.closest("#navigation-toggle") !== navigationToggle &&
|
||||||
target.closest("#past-chats-toggle") !== pastChatsToggle &&
|
target.closest("#past-chats-toggle") !== pastChatsToggle &&
|
||||||
target.closest("#chat-controls-toggle") !== chatControlsToggle &&
|
target.closest("#chat-controls-toggle") !== chatControlsToggle &&
|
||||||
target.closest(".header_bar") !== headerBar &&
|
target.closest(".header_bar") !== headerBar &&
|
||||||
target.closest("#past-chats-row") !== pastChatsRow &&
|
target.closest("#past-chats-row") !== pastChatsRow &&
|
||||||
target.closest("#chat-controls") !== chatControlsRow
|
target.closest("#chat-controls") !== chatControlsRow
|
||||||
) {
|
) {
|
||||||
handleIndividualSidebarClose(event);
|
handleIndividualSidebarClose(event);
|
||||||
}
|
}
|
||||||
|
|
@ -433,27 +418,19 @@ document.getElementById("chat-input-row").classList.add("chat-input-positioned")
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
const chatTextArea = document.getElementById("chat-input").querySelector("textarea");
|
const chatTextArea = document.getElementById("chat-input").querySelector("textarea");
|
||||||
|
|
||||||
function respondToChatInputVisibility(element, callback) {
|
function focusOnVisible(element) {
|
||||||
var options = {
|
var observer = new IntersectionObserver((entries) => {
|
||||||
root: document.documentElement,
|
|
||||||
};
|
|
||||||
|
|
||||||
var observer = new IntersectionObserver((entries, observer) => {
|
|
||||||
entries.forEach(entry => {
|
entries.forEach(entry => {
|
||||||
callback(entry.intersectionRatio > 0);
|
if (entry.intersectionRatio > 0) {
|
||||||
|
element.focus();
|
||||||
|
}
|
||||||
});
|
});
|
||||||
}, options);
|
}, { root: document.documentElement });
|
||||||
|
|
||||||
observer.observe(element);
|
observer.observe(element);
|
||||||
}
|
}
|
||||||
|
|
||||||
function handleChatInputVisibilityChange(isVisible) {
|
focusOnVisible(chatTextArea);
|
||||||
if (isVisible) {
|
|
||||||
chatTextArea.focus();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
respondToChatInputVisibility(chatTextArea, handleChatInputVisibilityChange);
|
|
||||||
|
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
// Show enlarged character picture when the profile
|
// Show enlarged character picture when the profile
|
||||||
|
|
@ -463,8 +440,7 @@ let bigPictureVisible = false;
|
||||||
|
|
||||||
function addBigPicture() {
|
function addBigPicture() {
|
||||||
var imgElement = document.createElement("img");
|
var imgElement = document.createElement("img");
|
||||||
var timestamp = new Date().getTime();
|
imgElement.src = getProfilePictureUrl();
|
||||||
imgElement.src = "/file/user_data/cache/pfp_character.png?time=" + timestamp;
|
|
||||||
imgElement.classList.add("bigProfilePicture");
|
imgElement.classList.add("bigProfilePicture");
|
||||||
imgElement.addEventListener("load", function () {
|
imgElement.addEventListener("load", function () {
|
||||||
this.style.visibility = "visible";
|
this.style.visibility = "visible";
|
||||||
|
|
@ -478,9 +454,8 @@ function addBigPicture() {
|
||||||
}
|
}
|
||||||
|
|
||||||
function deleteBigPicture() {
|
function deleteBigPicture() {
|
||||||
var bigProfilePictures = document.querySelectorAll(".bigProfilePicture");
|
document.querySelectorAll(".bigProfilePicture").forEach(function (element) {
|
||||||
bigProfilePictures.forEach(function (element) {
|
element.remove();
|
||||||
element.parentNode.removeChild(element);
|
|
||||||
});
|
});
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
@ -494,44 +469,11 @@ function toggleBigPicture() {
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
//------------------------------------------------
|
|
||||||
// Handle the chat input box growth
|
|
||||||
//------------------------------------------------
|
|
||||||
|
|
||||||
// Cache DOM elements
|
|
||||||
const chatContainer = document.getElementById("chat").parentNode.parentNode.parentNode;
|
|
||||||
const chatInput = document.querySelector("#chat-input textarea");
|
|
||||||
|
|
||||||
// Variables to store current dimensions
|
|
||||||
let currentChatInputHeight = chatInput.clientHeight;
|
|
||||||
|
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
// Focus on the rename text area when it becomes visible
|
// Focus on the rename text area when it becomes visible
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
const renameTextArea = document.getElementById("rename-row").querySelector("textarea");
|
const renameTextArea = document.getElementById("rename-row").querySelector("textarea");
|
||||||
|
focusOnVisible(renameTextArea);
|
||||||
function respondToRenameVisibility(element, callback) {
|
|
||||||
var options = {
|
|
||||||
root: document.documentElement,
|
|
||||||
};
|
|
||||||
|
|
||||||
var observer = new IntersectionObserver((entries, observer) => {
|
|
||||||
entries.forEach(entry => {
|
|
||||||
callback(entry.intersectionRatio > 0);
|
|
||||||
});
|
|
||||||
}, options);
|
|
||||||
|
|
||||||
observer.observe(element);
|
|
||||||
}
|
|
||||||
|
|
||||||
|
|
||||||
function handleVisibilityChange(isVisible) {
|
|
||||||
if (isVisible) {
|
|
||||||
renameTextArea.focus();
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
respondToRenameVisibility(renameTextArea, handleVisibilityChange);
|
|
||||||
|
|
||||||
//------------------------------------------------
|
//------------------------------------------------
|
||||||
// Adjust the chat tab margin if no extension UI
|
// Adjust the chat tab margin if no extension UI
|
||||||
|
|
@ -737,21 +679,21 @@ function handleIndividualSidebarClose(event) {
|
||||||
|
|
||||||
// Close navigation bar if click is outside and it is open
|
// Close navigation bar if click is outside and it is open
|
||||||
if (!headerBar.contains(target) && !headerBar.classList.contains("sidebar-hidden")) {
|
if (!headerBar.contains(target) && !headerBar.classList.contains("sidebar-hidden")) {
|
||||||
toggleSidebar(headerBar, navigationToggle, true);
|
toggleSidebar(headerBar, navigationToggle);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close past chats row if click is outside and it is open
|
// Close past chats row if click is outside and it is open
|
||||||
if (!pastChatsRow.contains(target) && !pastChatsRow.classList.contains("sidebar-hidden")) {
|
if (!pastChatsRow.contains(target) && !pastChatsRow.classList.contains("sidebar-hidden")) {
|
||||||
toggleSidebar(pastChatsRow, pastChatsToggle, true);
|
toggleSidebar(pastChatsRow, pastChatsToggle);
|
||||||
}
|
}
|
||||||
|
|
||||||
// Close chat controls row if click is outside and it is open
|
// Close chat controls row if click is outside and it is open
|
||||||
if (!chatControlsRow.contains(target) && !chatControlsRow.classList.contains("sidebar-hidden")) {
|
if (!chatControlsRow.contains(target) && !chatControlsRow.classList.contains("sidebar-hidden")) {
|
||||||
toggleSidebar(chatControlsRow, chatControlsToggle, true);
|
toggleSidebar(chatControlsRow, chatControlsToggle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
function toggleSidebar(sidebar, toggle, forceClose = false) {
|
function toggleSidebar(sidebar, toggle) {
|
||||||
const isCurrentlyHidden = sidebar.classList.contains("sidebar-hidden");
|
const isCurrentlyHidden = sidebar.classList.contains("sidebar-hidden");
|
||||||
const shouldClose = !isCurrentlyHidden;
|
const shouldClose = !isCurrentlyHidden;
|
||||||
|
|
||||||
|
|
@ -776,11 +718,6 @@ function toggleSidebar(sidebar, toggle, forceClose = false) {
|
||||||
toggle.classList.toggle("chat-controls-open", !shouldClose);
|
toggle.classList.toggle("chat-controls-open", !shouldClose);
|
||||||
toggle.innerHTML = shouldClose ? leftArrowSVG : rightArrowSVG;
|
toggle.innerHTML = shouldClose ? leftArrowSVG : rightArrowSVG;
|
||||||
}
|
}
|
||||||
|
|
||||||
// Mobile handling
|
|
||||||
if (isMobile()) {
|
|
||||||
sidebar.classList.toggle("sidebar-shown", !shouldClose);
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Function to check if the device is mobile
|
// Function to check if the device is mobile
|
||||||
|
|
@ -840,17 +777,17 @@ pastChatsToggle.addEventListener("click", () => {
|
||||||
const isCurrentlyOpen = !pastChatsRow.classList.contains("sidebar-hidden");
|
const isCurrentlyOpen = !pastChatsRow.classList.contains("sidebar-hidden");
|
||||||
toggleSidebar(pastChatsRow, pastChatsToggle);
|
toggleSidebar(pastChatsRow, pastChatsToggle);
|
||||||
|
|
||||||
// On desktop, open/close both sidebars at the same time
|
// On desktop, sync both sidebars together
|
||||||
if (!isMobile()) {
|
if (!isMobile()) {
|
||||||
if (isCurrentlyOpen) {
|
if (isCurrentlyOpen) {
|
||||||
// If we just closed the left sidebar, also close the right sidebar
|
// If we just closed the left sidebar, also close the right sidebar
|
||||||
if (!chatControlsRow.classList.contains("sidebar-hidden")) {
|
if (!chatControlsRow.classList.contains("sidebar-hidden")) {
|
||||||
toggleSidebar(chatControlsRow, chatControlsToggle, true);
|
toggleSidebar(chatControlsRow, chatControlsToggle);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// If we just opened the left sidebar, also open the right sidebar
|
// If we just opened the left sidebar, also open the right sidebar
|
||||||
if (chatControlsRow.classList.contains("sidebar-hidden")) {
|
if (chatControlsRow.classList.contains("sidebar-hidden")) {
|
||||||
toggleSidebar(chatControlsRow, chatControlsToggle, false);
|
toggleSidebar(chatControlsRow, chatControlsToggle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -860,17 +797,17 @@ chatControlsToggle.addEventListener("click", () => {
|
||||||
const isCurrentlyOpen = !chatControlsRow.classList.contains("sidebar-hidden");
|
const isCurrentlyOpen = !chatControlsRow.classList.contains("sidebar-hidden");
|
||||||
toggleSidebar(chatControlsRow, chatControlsToggle);
|
toggleSidebar(chatControlsRow, chatControlsToggle);
|
||||||
|
|
||||||
// On desktop, open/close both sidebars at the same time
|
// On desktop, sync both sidebars together
|
||||||
if (!isMobile()) {
|
if (!isMobile()) {
|
||||||
if (isCurrentlyOpen) {
|
if (isCurrentlyOpen) {
|
||||||
// If we just closed the right sidebar, also close the left sidebar
|
// If we just closed the right sidebar, also close the left sidebar
|
||||||
if (!pastChatsRow.classList.contains("sidebar-hidden")) {
|
if (!pastChatsRow.classList.contains("sidebar-hidden")) {
|
||||||
toggleSidebar(pastChatsRow, pastChatsToggle, true);
|
toggleSidebar(pastChatsRow, pastChatsToggle);
|
||||||
}
|
}
|
||||||
} else {
|
} else {
|
||||||
// If we just opened the right sidebar, also open the left sidebar
|
// If we just opened the right sidebar, also open the left sidebar
|
||||||
if (pastChatsRow.classList.contains("sidebar-hidden")) {
|
if (pastChatsRow.classList.contains("sidebar-hidden")) {
|
||||||
toggleSidebar(pastChatsRow, pastChatsToggle, false);
|
toggleSidebar(pastChatsRow, pastChatsToggle);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
@ -890,7 +827,7 @@ if (isMobile()) {
|
||||||
const textarea = document.querySelector("#chat-input textarea");
|
const textarea = document.querySelector("#chat-input textarea");
|
||||||
|
|
||||||
if (textarea) {
|
if (textarea) {
|
||||||
// Simulate adding and removing a newline
|
// Force textarea height recalculation by simulating content change
|
||||||
textarea.value += "\n";
|
textarea.value += "\n";
|
||||||
textarea.dispatchEvent(new Event("input", { bubbles: true }));
|
textarea.dispatchEvent(new Event("input", { bubbles: true }));
|
||||||
textarea.value = textarea.value.slice(0, -1);
|
textarea.value = textarea.value.slice(0, -1);
|
||||||
|
|
|
||||||
|
|
@ -1,10 +1,9 @@
|
||||||
// Functions for downloading JSON files
|
// Functions for downloading JSON files
|
||||||
function getCurrentTimestamp() {
|
function getCurrentTimestamp() {
|
||||||
const now = new Date();
|
const now = new Date();
|
||||||
const timezoneOffset = now.getTimezoneOffset() * 60000; // Convert to milliseconds
|
const timezoneOffset = now.getTimezoneOffset() * 60000; // Convert minutes to milliseconds
|
||||||
const localTime = new Date(now.getTime() - timezoneOffset);
|
const localTime = new Date(now.getTime() - timezoneOffset);
|
||||||
const formattedTimestamp = localTime.toISOString().replace(/[-:]/g, "").slice(0, 15);
|
return localTime.toISOString().replace(/[-:]/g, "").slice(0, 15);
|
||||||
return formattedTimestamp;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function saveFile(contents, filename) {
|
function saveFile(contents, filename) {
|
||||||
|
|
@ -18,23 +17,18 @@ function saveFile(contents, filename) {
|
||||||
}
|
}
|
||||||
|
|
||||||
function saveHistory(history, character, mode) {
|
function saveHistory(history, character, mode) {
|
||||||
let path = null;
|
let path;
|
||||||
|
|
||||||
if (["chat", "chat-instruct"].includes(mode) && character && character.trim() !== "") {
|
if (["chat", "chat-instruct"].includes(mode) && character && character.trim() !== "") {
|
||||||
path = `history_${character}_${getCurrentTimestamp()}.json`;
|
path = `history_${character}_${getCurrentTimestamp()}.json`;
|
||||||
} else {
|
} else {
|
||||||
try {
|
path = `history_${mode || "unknown"}_${getCurrentTimestamp()}.json`;
|
||||||
path = `history_${mode}_${getCurrentTimestamp()}.json`;
|
|
||||||
} catch (error) {
|
|
||||||
path = `history_${getCurrentTimestamp()}.json`;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
saveFile(history, path);
|
saveFile(history, path);
|
||||||
}
|
}
|
||||||
|
|
||||||
function saveSession(session) {
|
function saveSession(session) {
|
||||||
let path = null;
|
const path = `session_${getCurrentTimestamp()}.json`;
|
||||||
|
|
||||||
path = `session_${getCurrentTimestamp()}.json`;
|
|
||||||
saveFile(session, path);
|
saveFile(session, path);
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -1,13 +1,11 @@
|
||||||
const chatParent = document.querySelector(".chat-parent");
|
|
||||||
|
|
||||||
function toggle_controls(value) {
|
function toggle_controls(value) {
|
||||||
|
const navToggle = document.getElementById("navigation-toggle");
|
||||||
|
const pastChatsToggle = document.getElementById("past-chats-toggle");
|
||||||
const extensions = document.querySelector("#extensions");
|
const extensions = document.querySelector("#extensions");
|
||||||
|
const galleryExtension = document.getElementById("gallery-extension");
|
||||||
|
|
||||||
if (value) {
|
if (value) {
|
||||||
// SHOW MODE: Click toggles to show hidden sidebars
|
// SHOW MODE: Click toggles to show hidden sidebars
|
||||||
const navToggle = document.getElementById("navigation-toggle");
|
|
||||||
const pastChatsToggle = document.getElementById("past-chats-toggle");
|
|
||||||
|
|
||||||
if (navToggle && document.querySelector(".header_bar")?.classList.contains("sidebar-hidden")) {
|
if (navToggle && document.querySelector(".header_bar")?.classList.contains("sidebar-hidden")) {
|
||||||
navToggle.click();
|
navToggle.click();
|
||||||
}
|
}
|
||||||
|
|
@ -19,17 +17,11 @@ function toggle_controls(value) {
|
||||||
if (extensions) {
|
if (extensions) {
|
||||||
extensions.style.display = "inherit";
|
extensions.style.display = "inherit";
|
||||||
}
|
}
|
||||||
|
if (galleryExtension) {
|
||||||
let gallery_element = document.getElementById("gallery-extension");
|
galleryExtension.style.display = "block";
|
||||||
if (gallery_element) {
|
|
||||||
gallery_element.style.display = "block";
|
|
||||||
}
|
}
|
||||||
|
|
||||||
} else {
|
} else {
|
||||||
// HIDE MODE: Click toggles to hide visible sidebars
|
// HIDE MODE: Click toggles to hide visible sidebars
|
||||||
const navToggle = document.getElementById("navigation-toggle");
|
|
||||||
const pastChatsToggle = document.getElementById("past-chats-toggle");
|
|
||||||
|
|
||||||
if (navToggle && !document.querySelector(".header_bar")?.classList.contains("sidebar-hidden")) {
|
if (navToggle && !document.querySelector(".header_bar")?.classList.contains("sidebar-hidden")) {
|
||||||
navToggle.click();
|
navToggle.click();
|
||||||
}
|
}
|
||||||
|
|
@ -41,5 +33,8 @@ function toggle_controls(value) {
|
||||||
if (extensions) {
|
if (extensions) {
|
||||||
extensions.style.display = "none";
|
extensions.style.display = "none";
|
||||||
}
|
}
|
||||||
|
if (galleryExtension) {
|
||||||
|
galleryExtension.style.display = "none";
|
||||||
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -2,17 +2,9 @@ function scrollToTop() {
|
||||||
window.scrollTo({ top: 0 });
|
window.scrollTo({ top: 0 });
|
||||||
}
|
}
|
||||||
|
|
||||||
function findButtonsByText(buttonText) {
|
function findButtonsByText(buttonText, container = document) {
|
||||||
const buttons = document.getElementsByTagName("button");
|
return Array.from(container.getElementsByTagName("button"))
|
||||||
const matchingButtons = [];
|
.filter(btn => btn.textContent.trim() === buttonText);
|
||||||
|
|
||||||
for (let i = 0; i < buttons.length; i++) {
|
|
||||||
if (buttons[i].textContent.trim() === buttonText) {
|
|
||||||
matchingButtons.push(buttons[i]);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return matchingButtons;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
function switch_to_chat() {
|
function switch_to_chat() {
|
||||||
|
|
@ -39,13 +31,9 @@ function switch_to_character() {
|
||||||
|
|
||||||
function switch_to_image_ai_generate() {
|
function switch_to_image_ai_generate() {
|
||||||
const container = document.querySelector("#image-ai-tab");
|
const container = document.querySelector("#image-ai-tab");
|
||||||
const buttons = container.getElementsByTagName("button");
|
const generateBtn = findButtonsByText("Generate", container)[0];
|
||||||
|
if (generateBtn) {
|
||||||
for (let i = 0; i < buttons.length; i++) {
|
generateBtn.click();
|
||||||
if (buttons[i].textContent.trim() === "Generate") {
|
|
||||||
buttons[i].click();
|
|
||||||
break;
|
|
||||||
}
|
|
||||||
}
|
}
|
||||||
|
|
||||||
scrollToTop();
|
scrollToTop();
|
||||||
|
|
|
||||||
|
|
@ -1,7 +1,6 @@
|
||||||
function updateBigPicture() {
|
function updateBigPicture() {
|
||||||
var existingElement = document.querySelector(".bigProfilePicture");
|
var existingElement = document.querySelector(".bigProfilePicture");
|
||||||
if (existingElement) {
|
if (existingElement) {
|
||||||
var timestamp = new Date().getTime();
|
existingElement.src = getProfilePictureUrl();
|
||||||
existingElement.src = "/file/user_data/cache/pfp_character.png?time=" + timestamp;
|
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -39,6 +39,146 @@ def load_chat_template_file(filepath):
|
||||||
return text
|
return text
|
||||||
|
|
||||||
|
|
||||||
|
def _first_token_display_str(token_id, prompt, tokenizer):
|
||||||
|
"""Return the display string for the first prompt token.
|
||||||
|
|
||||||
|
Returns empty string for BOS or tokens that don't appear at the start
|
||||||
|
of the prompt text, so they don't shift text_offset for subsequent tokens.
|
||||||
|
"""
|
||||||
|
token_id = int(token_id)
|
||||||
|
bos_id = getattr(tokenizer, 'bos_token_id', None)
|
||||||
|
if bos_id is not None and token_id == bos_id:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
import torch
|
||||||
|
tok = tokenizer.decode(torch.tensor([token_id]))
|
||||||
|
if not prompt.startswith(tok):
|
||||||
|
return ""
|
||||||
|
|
||||||
|
return tok
|
||||||
|
|
||||||
|
|
||||||
|
def _compute_prompt_logprob_entries(prompt, logprobs_count, input_ids=None):
|
||||||
|
"""Compute logprob entries for prompt tokens via a forward pass.
|
||||||
|
|
||||||
|
Returns a list of logprob entries in the standard format.
|
||||||
|
The first token gets a null entry (no conditioning context).
|
||||||
|
|
||||||
|
Supported for HF-compatible loaders (Transformers, ExLlamav3_HF, etc.)
|
||||||
|
via a single forward pass, and for llama.cpp via the server's
|
||||||
|
prompt_logprobs parameter. Returns [] for unsupported loaders.
|
||||||
|
"""
|
||||||
|
if input_ids is None:
|
||||||
|
input_ids = encode(prompt) # (1, seq_len) tensor or array
|
||||||
|
|
||||||
|
token_ids = input_ids[0]
|
||||||
|
n_tokens = len(token_ids)
|
||||||
|
|
||||||
|
if n_tokens == 0:
|
||||||
|
return []
|
||||||
|
|
||||||
|
loader = shared.args.loader
|
||||||
|
model = shared.model
|
||||||
|
|
||||||
|
if loader == 'llama.cpp':
|
||||||
|
return model.get_prompt_logprob_entries(token_ids, max(logprobs_count, 1), prompt=prompt)
|
||||||
|
|
||||||
|
first_token_str = _first_token_display_str(token_ids[0], prompt, shared.tokenizer)
|
||||||
|
|
||||||
|
if n_tokens <= 1:
|
||||||
|
return [{"token": first_token_str, "null_logprob": True}]
|
||||||
|
|
||||||
|
import torch
|
||||||
|
from modules.torch_utils import clear_torch_cache
|
||||||
|
|
||||||
|
if hasattr(model, 'get_prompt_logits'):
|
||||||
|
logits = model.get_prompt_logits(input_ids)
|
||||||
|
|
||||||
|
elif hasattr(model, 'forward'):
|
||||||
|
# HF-compatible loaders (Transformers, ExLlamav3_HF, etc.)
|
||||||
|
input_ids_tensor = input_ids if isinstance(input_ids, torch.Tensor) else torch.tensor(input_ids, dtype=torch.long)
|
||||||
|
if hasattr(model, 'device'):
|
||||||
|
input_ids_tensor = input_ids_tensor.to(model.device)
|
||||||
|
with torch.no_grad():
|
||||||
|
# Pass labels to ensure logits are returned for ALL positions,
|
||||||
|
# not just the last token (some HF wrappers like ExLlamav3_HF
|
||||||
|
# only compute the last-token logits when labels are absent).
|
||||||
|
outputs = model(input_ids=input_ids_tensor, labels=input_ids_tensor)
|
||||||
|
logits = outputs.logits # keep on GPU, (1, seq_len, vocab) in model dtype
|
||||||
|
del outputs
|
||||||
|
|
||||||
|
else:
|
||||||
|
return []
|
||||||
|
|
||||||
|
entries = [{"token": first_token_str, "null_logprob": True}]
|
||||||
|
|
||||||
|
logprobs_count = max(logprobs_count, 1)
|
||||||
|
k = min(logprobs_count, logits.shape[-1])
|
||||||
|
chunk_size = 2048
|
||||||
|
unique_ids = set(int(tid) for tid in token_ids[1:])
|
||||||
|
|
||||||
|
# Process logits in chunks on GPU, only move top-K results to CPU
|
||||||
|
all_top_log_probs_list = []
|
||||||
|
all_top_indices_list = []
|
||||||
|
all_actual_lps = []
|
||||||
|
|
||||||
|
for start in range(0, n_tokens - 1, chunk_size):
|
||||||
|
end = min(start + chunk_size, n_tokens - 1)
|
||||||
|
chunk_logits = logits[0, start:end].float() # (chunk, vocab) on GPU
|
||||||
|
chunk_lse = torch.logsumexp(chunk_logits, dim=-1)
|
||||||
|
chunk_top_values, chunk_top_indices = torch.topk(chunk_logits, k=k, dim=-1)
|
||||||
|
chunk_top_log_probs = chunk_top_values - chunk_lse.unsqueeze(-1)
|
||||||
|
|
||||||
|
# Compute logprob for actual next tokens in this chunk
|
||||||
|
chunk_top_sets = [set(chunk_top_indices[j].tolist()) for j in range(end - start)]
|
||||||
|
for j in range(end - start):
|
||||||
|
actual_tid = int(token_ids[start + j + 1])
|
||||||
|
if actual_tid not in chunk_top_sets[j]:
|
||||||
|
all_actual_lps.append((chunk_logits[j, actual_tid] - chunk_lse[j]).item())
|
||||||
|
else:
|
||||||
|
all_actual_lps.append(None) # will use top_log_probs
|
||||||
|
|
||||||
|
all_top_log_probs_list.append(chunk_top_log_probs.cpu())
|
||||||
|
all_top_indices_list.append(chunk_top_indices.cpu())
|
||||||
|
unique_ids.update(int(tid) for tid in chunk_top_indices.flatten().tolist())
|
||||||
|
del chunk_logits, chunk_lse, chunk_top_values
|
||||||
|
|
||||||
|
del logits
|
||||||
|
clear_torch_cache()
|
||||||
|
|
||||||
|
all_top_log_probs = torch.cat(all_top_log_probs_list, dim=0)
|
||||||
|
all_top_indices = torch.cat(all_top_indices_list, dim=0)
|
||||||
|
|
||||||
|
unique_ids_list = sorted(unique_ids)
|
||||||
|
decoded_list = shared.tokenizer.batch_decode([[tid] for tid in unique_ids_list]) if hasattr(shared.tokenizer, 'batch_decode') else [shared.tokenizer.decode(torch.tensor([tid])) for tid in unique_ids_list]
|
||||||
|
decoded_strs = dict(zip(unique_ids_list, decoded_list))
|
||||||
|
|
||||||
|
for i in range(1, n_tokens):
|
||||||
|
token_id = int(token_ids[i])
|
||||||
|
idx = i - 1
|
||||||
|
top_log_probs = all_top_log_probs[idx]
|
||||||
|
top_ids = all_top_indices[idx].tolist()
|
||||||
|
actual_token_str = decoded_strs[token_id]
|
||||||
|
|
||||||
|
if token_id in top_ids:
|
||||||
|
actual_lp = top_log_probs[top_ids.index(token_id)].item()
|
||||||
|
alternatives = [
|
||||||
|
{"token": decoded_strs[top_ids[j]], "token_id": top_ids[j], "logprob": top_log_probs[j].item()}
|
||||||
|
for j in range(k) if top_ids[j] != token_id
|
||||||
|
]
|
||||||
|
else:
|
||||||
|
actual_lp = all_actual_lps[idx]
|
||||||
|
alternatives = [
|
||||||
|
{"token": decoded_strs[top_ids[j]], "token_id": top_ids[j], "logprob": top_log_probs[j].item()}
|
||||||
|
for j in range(k - 1)
|
||||||
|
]
|
||||||
|
|
||||||
|
entry = {"top_logprobs": [{"token": actual_token_str, "token_id": token_id, "logprob": actual_lp}] + alternatives}
|
||||||
|
entries.append(entry)
|
||||||
|
|
||||||
|
return entries
|
||||||
|
|
||||||
|
|
||||||
def _get_raw_logprob_entries(offset=0):
|
def _get_raw_logprob_entries(offset=0):
|
||||||
"""Get raw logprob entries from llama.cpp/ExLlamav3 backend, starting from offset.
|
"""Get raw logprob entries from llama.cpp/ExLlamav3 backend, starting from offset.
|
||||||
|
|
||||||
|
|
@ -65,6 +205,21 @@ def _parse_entry_top(entry):
|
||||||
return entry.get('top_logprobs', entry.get('top_probs', []))
|
return entry.get('top_logprobs', entry.get('top_probs', []))
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_sampled_token(entry, top):
|
||||||
|
"""Get the actually sampled token and its logprob from a logprob entry.
|
||||||
|
|
||||||
|
Uses the entry-level token/logprob when available (the actually sampled
|
||||||
|
token), falling back to top[0] (highest-probability alternative) which
|
||||||
|
may differ with non-greedy sampling.
|
||||||
|
"""
|
||||||
|
if 'token' in entry:
|
||||||
|
return entry['token'], entry.get('logprob', entry.get('prob', 0))
|
||||||
|
|
||||||
|
token_str = top[0].get('token', '')
|
||||||
|
token_logprob = top[0].get('logprob', top[0].get('prob', 0))
|
||||||
|
return token_str, token_logprob
|
||||||
|
|
||||||
|
|
||||||
def format_chat_logprobs(entries):
|
def format_chat_logprobs(entries):
|
||||||
"""Format logprob entries into OpenAI chat completions logprobs format.
|
"""Format logprob entries into OpenAI chat completions logprobs format.
|
||||||
|
|
||||||
|
|
@ -79,9 +234,7 @@ def format_chat_logprobs(entries):
|
||||||
if not top:
|
if not top:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
chosen = top[0]
|
token_str, token_logprob = _extract_sampled_token(entry, top)
|
||||||
token_str = chosen.get('token', '')
|
|
||||||
token_logprob = chosen.get('logprob', chosen.get('prob', 0))
|
|
||||||
|
|
||||||
top_list = []
|
top_list = []
|
||||||
for item in top:
|
for item in top:
|
||||||
|
|
@ -106,7 +259,7 @@ def format_chat_logprobs(entries):
|
||||||
def format_completion_logprobs(entries):
|
def format_completion_logprobs(entries):
|
||||||
"""Format logprob entries into OpenAI completions logprobs format.
|
"""Format logprob entries into OpenAI completions logprobs format.
|
||||||
|
|
||||||
Output: {"tokens", "token_logprobs", "top_logprobs": [{token: prob}], "text_offset"}
|
Output: {"tokens", "token_logprobs", "top_logprobs": [{token: prob}], "top_logprobs_ids": [{token_id: prob}], "text_offset"}
|
||||||
"""
|
"""
|
||||||
if not entries:
|
if not entries:
|
||||||
return None
|
return None
|
||||||
|
|
@ -114,17 +267,27 @@ def format_completion_logprobs(entries):
|
||||||
tokens = []
|
tokens = []
|
||||||
token_logprobs = []
|
token_logprobs = []
|
||||||
top_logprobs = []
|
top_logprobs = []
|
||||||
|
top_logprobs_ids = []
|
||||||
text_offset = []
|
text_offset = []
|
||||||
offset = 0
|
offset = 0
|
||||||
|
|
||||||
for entry in entries:
|
for entry in entries:
|
||||||
|
# Handle null logprob entries (first prompt token with echo)
|
||||||
|
if entry.get("null_logprob"):
|
||||||
|
token_str = entry.get("token", "")
|
||||||
|
tokens.append(token_str)
|
||||||
|
token_logprobs.append(None)
|
||||||
|
top_logprobs.append(None)
|
||||||
|
top_logprobs_ids.append(None)
|
||||||
|
text_offset.append(offset)
|
||||||
|
offset += len(token_str)
|
||||||
|
continue
|
||||||
|
|
||||||
top = _parse_entry_top(entry)
|
top = _parse_entry_top(entry)
|
||||||
if not top:
|
if not top:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
chosen = top[0]
|
token_str, token_logprob = _extract_sampled_token(entry, top)
|
||||||
token_str = chosen.get('token', '')
|
|
||||||
token_logprob = chosen.get('logprob', chosen.get('prob', 0))
|
|
||||||
|
|
||||||
tokens.append(token_str)
|
tokens.append(token_str)
|
||||||
token_logprobs.append(token_logprob)
|
token_logprobs.append(token_logprob)
|
||||||
|
|
@ -132,21 +295,29 @@ def format_completion_logprobs(entries):
|
||||||
offset += len(token_str)
|
offset += len(token_str)
|
||||||
|
|
||||||
top_dict = {}
|
top_dict = {}
|
||||||
|
top_dict_ids = {}
|
||||||
for item in top:
|
for item in top:
|
||||||
t = item.get('token', '')
|
t = item.get('token', '')
|
||||||
lp = item.get('logprob', item.get('prob', 0))
|
lp = item.get('logprob', item.get('prob', 0))
|
||||||
top_dict[t] = lp
|
top_dict[t] = lp
|
||||||
|
tid = item.get('token_id', item.get('id'))
|
||||||
|
if tid is not None:
|
||||||
|
top_dict_ids[tid] = lp
|
||||||
top_logprobs.append(top_dict)
|
top_logprobs.append(top_dict)
|
||||||
|
top_logprobs_ids.append(top_dict_ids if top_dict_ids else None)
|
||||||
|
|
||||||
if not tokens:
|
if not tokens:
|
||||||
return None
|
return None
|
||||||
|
|
||||||
return {
|
result = {
|
||||||
"tokens": tokens,
|
"tokens": tokens,
|
||||||
"token_logprobs": token_logprobs,
|
"token_logprobs": token_logprobs,
|
||||||
"top_logprobs": top_logprobs,
|
"top_logprobs": top_logprobs,
|
||||||
"text_offset": text_offset
|
"text_offset": text_offset
|
||||||
}
|
}
|
||||||
|
if any(x is not None for x in top_logprobs_ids):
|
||||||
|
result["top_logprobs_ids"] = top_logprobs_ids
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def process_parameters(body, is_legacy=False):
|
def process_parameters(body, is_legacy=False):
|
||||||
|
|
@ -407,7 +578,10 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
|
||||||
})
|
})
|
||||||
|
|
||||||
max_tokens = generate_params['max_new_tokens']
|
max_tokens = generate_params['max_new_tokens']
|
||||||
if max_tokens in [None, 0]:
|
if max_tokens is not None and max_tokens <= 0:
|
||||||
|
raise InvalidRequestError(message="max_tokens must be greater than 0.", param="max_tokens")
|
||||||
|
|
||||||
|
if max_tokens is None:
|
||||||
generate_params['max_new_tokens'] = 512
|
generate_params['max_new_tokens'] = 512
|
||||||
generate_params['auto_max_new_tokens'] = True
|
generate_params['auto_max_new_tokens'] = True
|
||||||
|
|
||||||
|
|
@ -652,6 +826,15 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
|
||||||
# common params
|
# common params
|
||||||
generate_params = process_parameters(body, is_legacy=is_legacy)
|
generate_params = process_parameters(body, is_legacy=is_legacy)
|
||||||
max_tokens = generate_params['max_new_tokens']
|
max_tokens = generate_params['max_new_tokens']
|
||||||
|
if max_tokens is None:
|
||||||
|
generate_params['max_new_tokens'] = 512
|
||||||
|
generate_params['auto_max_new_tokens'] = True
|
||||||
|
max_tokens = 512
|
||||||
|
elif max_tokens < 0:
|
||||||
|
raise InvalidRequestError(message="max_tokens must be greater than or equal to 0.", param="max_tokens")
|
||||||
|
elif max_tokens == 0 and body.get('logprobs') is None:
|
||||||
|
raise InvalidRequestError(message="max_tokens is 0 but no logprobs parameter was specified.", param="max_tokens")
|
||||||
|
|
||||||
generate_params['stream'] = stream
|
generate_params['stream'] = stream
|
||||||
if stop_event is not None:
|
if stop_event is not None:
|
||||||
generate_params['stop_event'] = stop_event
|
generate_params['stop_event'] = stop_event
|
||||||
|
|
@ -700,9 +883,17 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
|
||||||
prompt = decode(prompt)[0]
|
prompt = decode(prompt)[0]
|
||||||
|
|
||||||
prefix = prompt if echo else ''
|
prefix = prompt if echo else ''
|
||||||
token_count = len(encode(prompt)[0])
|
prompt_input_ids = encode(prompt)
|
||||||
|
token_count = len(prompt_input_ids[0])
|
||||||
total_prompt_token_count += token_count
|
total_prompt_token_count += token_count
|
||||||
|
|
||||||
|
# Compute prompt logprobs once per prompt (shared across n_completions)
|
||||||
|
logprobs_val = body.get('logprobs', None)
|
||||||
|
if echo and logprobs_val is not None and logprobs_val >= 0:
|
||||||
|
prompt_entries = _compute_prompt_logprob_entries(prompt, logprobs_val, input_ids=prompt_input_ids)
|
||||||
|
else:
|
||||||
|
prompt_entries = None
|
||||||
|
|
||||||
original_seed = generate_params.get('seed', -1)
|
original_seed = generate_params.get('seed', -1)
|
||||||
for _n in range(n_completions):
|
for _n in range(n_completions):
|
||||||
# Increment seed for each completion to ensure diversity (matches llama.cpp native behavior)
|
# Increment seed for each completion to ensure diversity (matches llama.cpp native behavior)
|
||||||
|
|
@ -713,29 +904,41 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
|
||||||
logprob_proc.token_alternatives_history.clear()
|
logprob_proc.token_alternatives_history.clear()
|
||||||
|
|
||||||
# generate reply #######################################
|
# generate reply #######################################
|
||||||
debug_msg({'prompt': prompt, 'generate_params': generate_params})
|
if max_tokens == 0:
|
||||||
generator = generate_reply(prompt, generate_params, is_chat=False)
|
answer = ''
|
||||||
answer = ''
|
completion_token_count = 0
|
||||||
|
stop_reason = "stop"
|
||||||
for a in generator:
|
|
||||||
answer = a
|
|
||||||
|
|
||||||
completion_token_count = len(encode(answer)[0])
|
|
||||||
total_completion_token_count += completion_token_count
|
|
||||||
stop_reason = "stop"
|
|
||||||
if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= max_tokens:
|
|
||||||
stop_reason = "length"
|
|
||||||
|
|
||||||
if logprob_proc:
|
|
||||||
all_entries = []
|
|
||||||
for alt in logprob_proc.token_alternatives_history:
|
|
||||||
all_entries.extend(_dict_to_logprob_entries(alt))
|
|
||||||
completion_logprobs = format_completion_logprobs(all_entries)
|
|
||||||
elif shared.args.loader in ('llama.cpp', 'ExLlamav3'):
|
|
||||||
raw = getattr(shared.model, 'last_completion_probabilities', None)
|
|
||||||
completion_logprobs = format_completion_logprobs(raw)
|
|
||||||
else:
|
else:
|
||||||
completion_logprobs = None
|
debug_msg({'prompt': prompt, 'generate_params': generate_params})
|
||||||
|
generator = generate_reply(prompt, generate_params, is_chat=False)
|
||||||
|
answer = ''
|
||||||
|
|
||||||
|
for a in generator:
|
||||||
|
answer = a
|
||||||
|
|
||||||
|
completion_token_count = len(encode(answer)[0])
|
||||||
|
stop_reason = "stop"
|
||||||
|
if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= max_tokens:
|
||||||
|
stop_reason = "length"
|
||||||
|
|
||||||
|
total_completion_token_count += completion_token_count
|
||||||
|
|
||||||
|
if max_tokens == 0:
|
||||||
|
all_entries = []
|
||||||
|
else:
|
||||||
|
if logprob_proc:
|
||||||
|
all_entries = []
|
||||||
|
for alt in logprob_proc.token_alternatives_history:
|
||||||
|
all_entries.extend(_dict_to_logprob_entries(alt))
|
||||||
|
elif shared.args.loader in ('llama.cpp', 'ExLlamav3'):
|
||||||
|
all_entries = getattr(shared.model, 'last_completion_probabilities', None) or []
|
||||||
|
else:
|
||||||
|
all_entries = []
|
||||||
|
|
||||||
|
if prompt_entries:
|
||||||
|
all_entries = prompt_entries + all_entries
|
||||||
|
|
||||||
|
completion_logprobs = format_completion_logprobs(all_entries) if all_entries else None
|
||||||
|
|
||||||
respi = {
|
respi = {
|
||||||
"index": choice_index,
|
"index": choice_index,
|
||||||
|
|
@ -775,7 +978,8 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
|
||||||
raise InvalidRequestError(message="API Batched generation not yet supported.", param=prompt_str)
|
raise InvalidRequestError(message="API Batched generation not yet supported.", param=prompt_str)
|
||||||
|
|
||||||
prefix = prompt if echo else ''
|
prefix = prompt if echo else ''
|
||||||
token_count = len(encode(prompt)[0])
|
prompt_input_ids = encode(prompt)
|
||||||
|
token_count = len(prompt_input_ids[0])
|
||||||
|
|
||||||
# Check if usage should be included in streaming chunks per OpenAI spec
|
# Check if usage should be included in streaming chunks per OpenAI spec
|
||||||
stream_options = body.get('stream_options')
|
stream_options = body.get('stream_options')
|
||||||
|
|
@ -808,37 +1012,57 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
|
||||||
|
|
||||||
return chunk
|
return chunk
|
||||||
|
|
||||||
|
logprobs_val = body.get('logprobs', None)
|
||||||
|
if echo and logprobs_val is not None and logprobs_val >= 0:
|
||||||
|
prompt_entries = _compute_prompt_logprob_entries(prompt, logprobs_val, input_ids=prompt_input_ids)
|
||||||
|
prompt_logprobs_formatted = format_completion_logprobs(prompt_entries) if prompt_entries else None
|
||||||
|
else:
|
||||||
|
prompt_logprobs_formatted = None
|
||||||
|
|
||||||
|
# Clear stale logprobs from any previous request before building the
|
||||||
|
# first chunk, so text_streaming_chunk doesn't pick up old data.
|
||||||
|
if hasattr(shared.model, 'last_completion_probabilities'):
|
||||||
|
shared.model.last_completion_probabilities = []
|
||||||
|
cmpl_logprobs_offset[0] = 0
|
||||||
|
|
||||||
chunk = text_streaming_chunk(prefix)
|
chunk = text_streaming_chunk(prefix)
|
||||||
|
if prompt_logprobs_formatted is not None:
|
||||||
|
chunk[resp_list][0]["logprobs"] = prompt_logprobs_formatted
|
||||||
if include_usage:
|
if include_usage:
|
||||||
chunk['usage'] = None
|
chunk['usage'] = None
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
# generate reply #######################################
|
# generate reply #######################################
|
||||||
debug_msg({'prompt': prompt, 'generate_params': generate_params})
|
if max_tokens == 0:
|
||||||
generator = generate_reply(prompt, generate_params, is_chat=False)
|
answer = ''
|
||||||
answer = ''
|
completion_token_count = 0
|
||||||
seen_content = ''
|
stop_reason = "stop"
|
||||||
completion_token_count = 0
|
else:
|
||||||
|
debug_msg({'prompt': prompt, 'generate_params': generate_params})
|
||||||
|
generator = generate_reply(prompt, generate_params, is_chat=False)
|
||||||
|
answer = ''
|
||||||
|
seen_content = ''
|
||||||
|
completion_token_count = 0
|
||||||
|
|
||||||
for a in generator:
|
for a in generator:
|
||||||
answer = a
|
answer = a
|
||||||
|
|
||||||
len_seen = len(seen_content)
|
len_seen = len(seen_content)
|
||||||
new_content = answer[len_seen:]
|
new_content = answer[len_seen:]
|
||||||
|
|
||||||
if not new_content or chr(0xfffd) in new_content: # partial unicode character, don't send it yet.
|
if not new_content or chr(0xfffd) in new_content: # partial unicode character, don't send it yet.
|
||||||
continue
|
continue
|
||||||
|
|
||||||
seen_content = answer
|
seen_content = answer
|
||||||
chunk = text_streaming_chunk(new_content)
|
chunk = text_streaming_chunk(new_content)
|
||||||
if include_usage:
|
if include_usage:
|
||||||
chunk['usage'] = None
|
chunk['usage'] = None
|
||||||
yield chunk
|
yield chunk
|
||||||
|
|
||||||
completion_token_count = len(encode(answer)[0])
|
completion_token_count = len(encode(answer)[0])
|
||||||
stop_reason = "stop"
|
stop_reason = "stop"
|
||||||
if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= max_tokens:
|
if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= max_tokens:
|
||||||
stop_reason = "length"
|
stop_reason = "length"
|
||||||
|
|
||||||
chunk = text_streaming_chunk(suffix)
|
chunk = text_streaming_chunk(suffix)
|
||||||
chunk[resp_list][0]["finish_reason"] = stop_reason
|
chunk[resp_list][0]["finish_reason"] = stop_reason
|
||||||
|
|
|
||||||
|
|
@ -6,6 +6,7 @@ from transformers import AutoModel
|
||||||
from .errors import ServiceUnavailableError
|
from .errors import ServiceUnavailableError
|
||||||
from .utils import debug_msg, float_list_to_base64
|
from .utils import debug_msg, float_list_to_base64
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
|
from modules import shared
|
||||||
|
|
||||||
embeddings_params_initialized = False
|
embeddings_params_initialized = False
|
||||||
|
|
||||||
|
|
@ -41,7 +42,7 @@ def load_embedding_model(model: str):
|
||||||
try:
|
try:
|
||||||
logger.info(f"Try embedding model: {model} on {embeddings_device}")
|
logger.info(f"Try embedding model: {model} on {embeddings_device}")
|
||||||
if 'jina-embeddings' in model:
|
if 'jina-embeddings' in model:
|
||||||
embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True) # trust_remote_code is needed to use the encode method
|
embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=shared.args.trust_remote_code)
|
||||||
embeddings_model = embeddings_model.to(embeddings_device)
|
embeddings_model = embeddings_model.to(embeddings_device)
|
||||||
else:
|
else:
|
||||||
embeddings_model = SentenceTransformer(model, device=embeddings_device)
|
embeddings_model = SentenceTransformer(model, device=embeddings_device)
|
||||||
|
|
|
||||||
|
|
@ -4,8 +4,11 @@ OpenAI-compatible image generation using local diffusion models.
|
||||||
|
|
||||||
import base64
|
import base64
|
||||||
import io
|
import io
|
||||||
|
import json
|
||||||
import time
|
import time
|
||||||
|
|
||||||
|
from PIL.PngImagePlugin import PngInfo
|
||||||
|
|
||||||
from .errors import ServiceUnavailableError
|
from .errors import ServiceUnavailableError
|
||||||
from modules import shared
|
from modules import shared
|
||||||
|
|
||||||
|
|
@ -15,7 +18,7 @@ def generations(request):
|
||||||
Generate images using the loaded diffusion model.
|
Generate images using the loaded diffusion model.
|
||||||
Returns dict with 'created' timestamp and 'data' list of images.
|
Returns dict with 'created' timestamp and 'data' list of images.
|
||||||
"""
|
"""
|
||||||
from modules.ui_image_generation import generate
|
from modules.ui_image_generation import build_generation_metadata, generate
|
||||||
|
|
||||||
if shared.image_model is None:
|
if shared.image_model is None:
|
||||||
raise ServiceUnavailableError("No image model loaded. Load a model via the UI first.")
|
raise ServiceUnavailableError("No image model loaded. Load a model via the UI first.")
|
||||||
|
|
@ -46,10 +49,18 @@ def generations(request):
|
||||||
if not images:
|
if not images:
|
||||||
raise ServiceUnavailableError("Image generation failed or produced no images.")
|
raise ServiceUnavailableError("Image generation failed or produced no images.")
|
||||||
|
|
||||||
# Build response
|
# Build response with per-batch metadata (seed increments per batch)
|
||||||
|
base_seed = state.get('image_seed_resolved', state['image_seed'])
|
||||||
|
batch_size = int(state['image_batch_size'])
|
||||||
|
|
||||||
resp = {'created': int(time.time()), 'data': []}
|
resp = {'created': int(time.time()), 'data': []}
|
||||||
for img in images:
|
for idx, img in enumerate(images):
|
||||||
b64 = _image_to_base64(img)
|
batch_seed = base_seed + idx // batch_size
|
||||||
|
metadata = build_generation_metadata(state, batch_seed)
|
||||||
|
metadata_json = json.dumps(metadata, ensure_ascii=False)
|
||||||
|
png_info = PngInfo()
|
||||||
|
png_info.add_text("image_gen_settings", metadata_json)
|
||||||
|
b64 = _image_to_base64(img, png_info)
|
||||||
|
|
||||||
image_obj = {'revised_prompt': request.prompt}
|
image_obj = {'revised_prompt': request.prompt}
|
||||||
|
|
||||||
|
|
@ -63,7 +74,7 @@ def generations(request):
|
||||||
return resp
|
return resp
|
||||||
|
|
||||||
|
|
||||||
def _image_to_base64(image) -> str:
|
def _image_to_base64(image, png_info=None) -> str:
|
||||||
buffered = io.BytesIO()
|
buffered = io.BytesIO()
|
||||||
image.save(buffered, format="PNG")
|
image.save(buffered, format="PNG", pnginfo=png_info)
|
||||||
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
||||||
|
|
|
||||||
|
|
@ -2,7 +2,7 @@ from modules import loaders, shared
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
from modules.LoRA import add_lora_to_model
|
from modules.LoRA import add_lora_to_model
|
||||||
from modules.models import load_model, unload_model
|
from modules.models import load_model, unload_model
|
||||||
from modules.models_settings import get_model_metadata, update_model_parameters
|
from modules.models_settings import get_model_metadata, load_instruction_template, update_model_parameters
|
||||||
from modules.utils import get_available_loras, get_available_models
|
from modules.utils import get_available_loras, get_available_models
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -42,12 +42,10 @@ def model_info_dict(model_name: str) -> dict:
|
||||||
|
|
||||||
def _load_model(data):
|
def _load_model(data):
|
||||||
model_name = data["model_name"]
|
model_name = data["model_name"]
|
||||||
args = data["args"]
|
args = data.get("args")
|
||||||
settings = data["settings"]
|
|
||||||
|
|
||||||
unload_model()
|
unload_model()
|
||||||
model_settings = get_model_metadata(model_name)
|
model_settings = get_model_metadata(model_name)
|
||||||
update_model_parameters(model_settings)
|
|
||||||
|
|
||||||
# Update shared.args with custom model loading settings
|
# Update shared.args with custom model loading settings
|
||||||
# Security: only allow keys that correspond to model loading
|
# Security: only allow keys that correspond to model loading
|
||||||
|
|
@ -55,6 +53,16 @@ def _load_model(data):
|
||||||
# flags like trust_remote_code or extra_flags to be set via the API.
|
# flags like trust_remote_code or extra_flags to be set via the API.
|
||||||
blocked_keys = {'extra_flags'}
|
blocked_keys = {'extra_flags'}
|
||||||
allowed_keys = set(loaders.list_model_elements()) - blocked_keys
|
allowed_keys = set(loaders.list_model_elements()) - blocked_keys
|
||||||
|
|
||||||
|
# Reset all loader args to their startup values before applying new ones,
|
||||||
|
# so settings from a previous API load don't leak into this one.
|
||||||
|
# Include blocked keys in the reset (safe: restores startup value, not API-controlled).
|
||||||
|
for k in allowed_keys | blocked_keys:
|
||||||
|
if hasattr(shared.args, k) and hasattr(shared.original_args, k):
|
||||||
|
setattr(shared.args, k, getattr(shared.original_args, k))
|
||||||
|
|
||||||
|
update_model_parameters(model_settings)
|
||||||
|
|
||||||
if args:
|
if args:
|
||||||
for k in args:
|
for k in args:
|
||||||
if k in allowed_keys and hasattr(shared.args, k):
|
if k in allowed_keys and hasattr(shared.args, k):
|
||||||
|
|
@ -62,15 +70,12 @@ def _load_model(data):
|
||||||
|
|
||||||
shared.model, shared.tokenizer = load_model(model_name)
|
shared.model, shared.tokenizer = load_model(model_name)
|
||||||
|
|
||||||
# Update shared.settings with custom generation defaults
|
if data.get("instruction_template_str") is not None:
|
||||||
if settings:
|
shared.settings['instruction_template_str'] = data["instruction_template_str"]
|
||||||
for k in settings:
|
logger.info("INSTRUCTION TEMPLATE: set to custom Jinja2 string")
|
||||||
if k in shared.settings:
|
elif data.get("instruction_template") is not None:
|
||||||
shared.settings[k] = settings[k]
|
shared.settings['instruction_template_str'] = load_instruction_template(data["instruction_template"])
|
||||||
if k == 'truncation_length':
|
logger.info(f"INSTRUCTION TEMPLATE: {data['instruction_template']}")
|
||||||
logger.info(f"TRUNCATION LENGTH (UPDATED): {shared.settings['truncation_length']}")
|
|
||||||
elif k == 'instruction_template':
|
|
||||||
logger.info(f"INSTRUCTION TEMPLATE (UPDATED): {shared.settings['instruction_template']}")
|
|
||||||
|
|
||||||
|
|
||||||
def list_loras():
|
def list_loras():
|
||||||
|
|
|
||||||
|
|
@ -475,10 +475,8 @@ async def handle_list_models():
|
||||||
@app.post("/v1/internal/model/load", dependencies=check_admin_key)
|
@app.post("/v1/internal/model/load", dependencies=check_admin_key)
|
||||||
async def handle_load_model(request_data: LoadModelRequest):
|
async def handle_load_model(request_data: LoadModelRequest):
|
||||||
'''
|
'''
|
||||||
This endpoint is experimental and may change in the future.
|
The "args" parameter can be used to modify loader flags before loading
|
||||||
|
a model. Example:
|
||||||
The "args" parameter can be used to modify flags like "--load-in-4bit"
|
|
||||||
or "--n-gpu-layers" before loading a model. Example:
|
|
||||||
|
|
||||||
```
|
```
|
||||||
"args": {
|
"args": {
|
||||||
|
|
@ -487,18 +485,13 @@ async def handle_load_model(request_data: LoadModelRequest):
|
||||||
}
|
}
|
||||||
```
|
```
|
||||||
|
|
||||||
Note that those settings will remain after loading the model. So you
|
Loader args are reset to their startup defaults between loads, so
|
||||||
may need to change them back to load a second model.
|
settings from a previous load do not leak into the next one.
|
||||||
|
|
||||||
The "settings" parameter is also a dict but with keys for the
|
The "instruction_template" parameter sets the default instruction
|
||||||
shared.settings object. It can be used to modify the default instruction
|
template by name (from user_data/instruction-templates/). The
|
||||||
template like this:
|
"instruction_template_str" parameter sets it as a raw Jinja2 string
|
||||||
|
and takes precedence over "instruction_template".
|
||||||
```
|
|
||||||
"settings": {
|
|
||||||
"instruction_template": "Alpaca"
|
|
||||||
}
|
|
||||||
```
|
|
||||||
'''
|
'''
|
||||||
|
|
||||||
try:
|
try:
|
||||||
|
|
@ -544,8 +537,8 @@ async def handle_unload_loras():
|
||||||
def find_available_port(starting_port):
|
def find_available_port(starting_port):
|
||||||
"""Try the starting port, then find an available one if it's taken."""
|
"""Try the starting port, then find an available one if it's taken."""
|
||||||
try:
|
try:
|
||||||
# Try to create a socket with the starting port
|
|
||||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
|
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
s.bind(('', starting_port))
|
s.bind(('', starting_port))
|
||||||
return starting_port
|
return starting_port
|
||||||
except OSError:
|
except OSError:
|
||||||
|
|
@ -570,7 +563,7 @@ def run_server():
|
||||||
server_addrs.append(shared.args.listen_host)
|
server_addrs.append(shared.args.listen_host)
|
||||||
else:
|
else:
|
||||||
if os.environ.get('OPENEDAI_ENABLE_IPV6', shared.args.api_enable_ipv6):
|
if os.environ.get('OPENEDAI_ENABLE_IPV6', shared.args.api_enable_ipv6):
|
||||||
server_addrs.append('[::]' if shared.args.listen else '[::1]')
|
server_addrs.append('::' if shared.args.listen else '::1')
|
||||||
if not os.environ.get('OPENEDAI_DISABLE_IPV4', shared.args.api_disable_ipv4):
|
if not os.environ.get('OPENEDAI_DISABLE_IPV4', shared.args.api_disable_ipv4):
|
||||||
server_addrs.append('0.0.0.0' if shared.args.listen else '127.0.0.1')
|
server_addrs.append('0.0.0.0' if shared.args.listen else '127.0.0.1')
|
||||||
|
|
||||||
|
|
@ -587,7 +580,7 @@ def run_server():
|
||||||
)
|
)
|
||||||
else:
|
else:
|
||||||
url_proto = 'https://' if (ssl_certfile and ssl_keyfile) else 'http://'
|
url_proto = 'https://' if (ssl_certfile and ssl_keyfile) else 'http://'
|
||||||
urls = [f'{url_proto}{addr}:{port}/v1' for addr in server_addrs]
|
urls = [f'{url_proto}[{addr}]:{port}/v1' if ':' in addr else f'{url_proto}{addr}:{port}/v1' for addr in server_addrs]
|
||||||
if len(urls) > 1:
|
if len(urls) > 1:
|
||||||
logger.info('OpenAI/Anthropic-compatible API URLs:\n\n' + '\n'.join(urls) + '\n')
|
logger.info('OpenAI/Anthropic-compatible API URLs:\n\n' + '\n'.join(urls) + '\n')
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -271,7 +271,8 @@ class ModelListResponse(BaseModel):
|
||||||
class LoadModelRequest(BaseModel):
|
class LoadModelRequest(BaseModel):
|
||||||
model_name: str
|
model_name: str
|
||||||
args: dict | None = None
|
args: dict | None = None
|
||||||
settings: dict | None = None
|
instruction_template: str | None = Field(default=None, description="An instruction template defined under text-generation-webui/user_data/instruction-templates. Sets the default template for all subsequent API requests.")
|
||||||
|
instruction_template_str: str | None = Field(default=None, description="A Jinja2 instruction template string. If set, takes precedence over instruction_template.")
|
||||||
|
|
||||||
|
|
||||||
class LoraListResponse(BaseModel):
|
class LoraListResponse(BaseModel):
|
||||||
|
|
|
||||||
151
modules/chat.py
151
modules/chat.py
|
|
@ -210,6 +210,57 @@ def _expand_tool_sequence(tool_seq):
|
||||||
return messages
|
return messages
|
||||||
|
|
||||||
|
|
||||||
|
def _convert_to_tool_responses(messages):
|
||||||
|
"""Convert role:'tool' messages to tool_responses format.
|
||||||
|
|
||||||
|
Templates like Gemma 4 expect tool results as a ``tool_responses``
|
||||||
|
attribute on a message rather than separate ``role: 'tool'`` messages.
|
||||||
|
This function groups consecutive tool messages and rewrites them.
|
||||||
|
"""
|
||||||
|
result = []
|
||||||
|
tc_id_to_name = {}
|
||||||
|
|
||||||
|
i = 0
|
||||||
|
while i < len(messages):
|
||||||
|
msg = messages[i]
|
||||||
|
|
||||||
|
if msg.get('tool_calls'):
|
||||||
|
for tc in msg['tool_calls']:
|
||||||
|
tc_id = tc.get('id', '')
|
||||||
|
func_name = tc.get('function', {}).get('name', 'unknown')
|
||||||
|
if tc_id:
|
||||||
|
tc_id_to_name[tc_id] = func_name
|
||||||
|
|
||||||
|
if msg.get('role') == 'tool':
|
||||||
|
tool_responses = []
|
||||||
|
while i < len(messages) and messages[i].get('role') == 'tool':
|
||||||
|
tool_msg = messages[i]
|
||||||
|
tc_id = tool_msg.get('tool_call_id', '')
|
||||||
|
func_name = tc_id_to_name.get(tc_id, 'unknown')
|
||||||
|
|
||||||
|
content = tool_msg.get('content', '')
|
||||||
|
try:
|
||||||
|
response = json.loads(content)
|
||||||
|
except (json.JSONDecodeError, ValueError, TypeError):
|
||||||
|
response = content
|
||||||
|
|
||||||
|
tool_responses.append({
|
||||||
|
'name': func_name,
|
||||||
|
'response': response,
|
||||||
|
})
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
result.append({
|
||||||
|
'role': 'tool',
|
||||||
|
'tool_responses': tool_responses,
|
||||||
|
})
|
||||||
|
else:
|
||||||
|
result.append(msg)
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
def _format_attachments(attachments, include_text=True):
|
def _format_attachments(attachments, include_text=True):
|
||||||
"""Build image ref and text attachment strings from a list of attachments."""
|
"""Build image ref and text attachment strings from a list of attachments."""
|
||||||
attachments_text = ""
|
attachments_text = ""
|
||||||
|
|
@ -267,6 +318,9 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
||||||
tools=state['tools'] if 'tools' in state else None,
|
tools=state['tools'] if 'tools' in state else None,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
active_template_str = state['instruction_template_str'] if state['mode'] == 'instruct' else chat_template_str
|
||||||
|
uses_tool_responses = 'tool_responses' in active_template_str
|
||||||
|
|
||||||
messages = []
|
messages = []
|
||||||
|
|
||||||
if state['mode'] == 'instruct':
|
if state['mode'] == 'instruct':
|
||||||
|
|
@ -503,6 +557,9 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
||||||
|
|
||||||
return prompt
|
return prompt
|
||||||
|
|
||||||
|
if uses_tool_responses:
|
||||||
|
messages = _convert_to_tool_responses(messages)
|
||||||
|
|
||||||
prompt = make_prompt(messages)
|
prompt = make_prompt(messages)
|
||||||
|
|
||||||
# Handle truncation
|
# Handle truncation
|
||||||
|
|
@ -511,13 +568,24 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
||||||
encoded_length = get_encoded_length(prompt)
|
encoded_length = get_encoded_length(prompt)
|
||||||
while len(messages) > 0 and encoded_length > max_length:
|
while len(messages) > 0 and encoded_length > max_length:
|
||||||
|
|
||||||
# Remove old message, save system message
|
|
||||||
if len(messages) > 2 and messages[0]['role'] == 'system':
|
if len(messages) > 2 and messages[0]['role'] == 'system':
|
||||||
messages.pop(1)
|
pop_idx = 1
|
||||||
|
|
||||||
# Remove old message when no system message is present
|
|
||||||
elif len(messages) > 1 and messages[0]['role'] != 'system':
|
elif len(messages) > 1 and messages[0]['role'] != 'system':
|
||||||
messages.pop(0)
|
pop_idx = 0
|
||||||
|
else:
|
||||||
|
pop_idx = None
|
||||||
|
|
||||||
|
if pop_idx is not None:
|
||||||
|
messages.pop(pop_idx)
|
||||||
|
|
||||||
|
# Remove orphaned tool-call/tool-result messages that
|
||||||
|
# would be invalid without their partner.
|
||||||
|
while pop_idx < len(messages):
|
||||||
|
msg = messages[pop_idx]
|
||||||
|
if msg.get('role') == 'tool' or (msg.get('role') == 'assistant' and msg.get('tool_calls')):
|
||||||
|
messages.pop(pop_idx)
|
||||||
|
else:
|
||||||
|
break
|
||||||
|
|
||||||
# Resort to truncating the user input
|
# Resort to truncating the user input
|
||||||
else:
|
else:
|
||||||
|
|
@ -637,7 +705,7 @@ def get_stopping_strings(state):
|
||||||
# Find positions of each message content
|
# Find positions of each message content
|
||||||
first_user_end = prompt.find("first user message") + len("first user message")
|
first_user_end = prompt.find("first user message") + len("first user message")
|
||||||
first_assistant_start = prompt.find("first assistant message")
|
first_assistant_start = prompt.find("first assistant message")
|
||||||
first_assistant_end = prompt.find("first assistant message") + len("first assistant message")
|
first_assistant_end = first_assistant_start + len("first assistant message")
|
||||||
second_user_start = prompt.find("second user message")
|
second_user_start = prompt.find("second user message")
|
||||||
second_assistant_end = prompt.find("second assistant message") + len("second assistant message")
|
second_assistant_end = prompt.find("second assistant message") + len("second assistant message")
|
||||||
|
|
||||||
|
|
@ -671,7 +739,10 @@ def get_stopping_strings(state):
|
||||||
# Handle GPT-OSS as a special case
|
# Handle GPT-OSS as a special case
|
||||||
if '<|channel|>final<|message|>' in state['instruction_template_str'] and "<|end|>" in result:
|
if '<|channel|>final<|message|>' in state['instruction_template_str'] and "<|end|>" in result:
|
||||||
result.remove("<|end|>")
|
result.remove("<|end|>")
|
||||||
result.append("<|result|>")
|
if '<|result|>' in state['instruction_template_str']:
|
||||||
|
result.append("<|result|>")
|
||||||
|
elif '<|return|>' in state['instruction_template_str']:
|
||||||
|
result.append("<|return|>")
|
||||||
result = list(set(result))
|
result = list(set(result))
|
||||||
|
|
||||||
if shared.args.verbose:
|
if shared.args.verbose:
|
||||||
|
|
@ -1123,7 +1194,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
||||||
# visible text from before buffering started so raw markup doesn't flash
|
# visible text from before buffering started so raw markup doesn't flash
|
||||||
# in the UI. The internal text is left intact so the caller can still
|
# in the UI. The internal text is left intact so the caller can still
|
||||||
# parse tool calls from it.
|
# parse tool calls from it.
|
||||||
if is_stream and _check_tool_markers and streaming_tool_buffer_check(output['internal'][-1][1], markers=_streaming_markers, tool_names=_tool_names, check_bare_names=_check_bare_names):
|
if is_stream and _check_tool_markers and streaming_tool_buffer_check(output['internal'][-1][1], markers=_streaming_markers, tool_names=_tool_names, check_bare_names=_check_bare_names, partial_match=False):
|
||||||
output['visible'][-1][1] = _last_visible_before_tool_buffer or ''
|
output['visible'][-1][1] = _last_visible_before_tool_buffer or ''
|
||||||
|
|
||||||
yield output
|
yield output
|
||||||
|
|
@ -1204,14 +1275,23 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
|
||||||
|
|
||||||
# Load tools if any are selected
|
# Load tools if any are selected
|
||||||
selected = state.get('selected_tools', [])
|
selected = state.get('selected_tools', [])
|
||||||
|
mcp_servers = state.get('mcp_servers', '')
|
||||||
parse_tool_call = None
|
parse_tool_call = None
|
||||||
_tool_parsers = None
|
_tool_parsers = None
|
||||||
if selected:
|
if selected or mcp_servers:
|
||||||
from modules.tool_use import load_tools, execute_tool
|
from modules.tool_use import load_tools, load_mcp_tools, execute_tool
|
||||||
from modules.tool_parsing import parse_tool_call, get_tool_call_id, detect_tool_call_format
|
from modules.tool_parsing import parse_tool_call, get_tool_call_id, detect_tool_call_format
|
||||||
|
|
||||||
if selected:
|
|
||||||
tool_defs, tool_executors = load_tools(selected)
|
tool_defs, tool_executors = load_tools(selected)
|
||||||
|
if mcp_servers:
|
||||||
|
mcp_defs, mcp_executors = load_mcp_tools(mcp_servers)
|
||||||
|
for td in mcp_defs:
|
||||||
|
fn = td['function']['name']
|
||||||
|
if fn in tool_executors:
|
||||||
|
logger.warning(f'MCP tool "{fn}" conflicts with a local tool. Skipping.')
|
||||||
|
continue
|
||||||
|
tool_defs.append(td)
|
||||||
|
tool_executors[fn] = mcp_executors[fn]
|
||||||
state['tools'] = tool_defs
|
state['tools'] = tool_defs
|
||||||
tool_func_names = [t['function']['name'] for t in tool_defs]
|
tool_func_names = [t['function']['name'] for t in tool_defs]
|
||||||
_template_str = state.get('instruction_template_str', '') if state.get('mode') == 'instruct' else state.get('chat_template_str', '')
|
_template_str = state.get('instruction_template_str', '') if state.get('mode') == 'instruct' else state.get('chat_template_str', '')
|
||||||
|
|
@ -1759,7 +1839,8 @@ def load_history(unique_id, character, mode):
|
||||||
if not p.exists():
|
if not p.exists():
|
||||||
return {'internal': [], 'visible': [], 'metadata': {}}
|
return {'internal': [], 'visible': [], 'metadata': {}}
|
||||||
|
|
||||||
f = json.loads(open(p, 'rb').read())
|
with open(p, 'rb') as fh:
|
||||||
|
f = json.loads(fh.read())
|
||||||
if 'internal' in f and 'visible' in f:
|
if 'internal' in f and 'visible' in f:
|
||||||
history = f
|
history = f
|
||||||
else:
|
else:
|
||||||
|
|
@ -1823,19 +1904,17 @@ def generate_pfp_cache(character):
|
||||||
if not cache_folder.exists():
|
if not cache_folder.exists():
|
||||||
cache_folder.mkdir()
|
cache_folder.mkdir()
|
||||||
|
|
||||||
for path in [shared.user_data_dir / 'characters' / f"{character}.{extension}" for extension in ['png', 'jpg', 'jpeg']]:
|
for extension in ['png', 'jpg', 'jpeg']:
|
||||||
|
path = shared.user_data_dir / 'characters' / f"{character}.{extension}"
|
||||||
if path.exists():
|
if path.exists():
|
||||||
original_img = Image.open(path)
|
original_img = Image.open(path)
|
||||||
# Define file paths
|
pfp_path = cache_folder / 'pfp_character.png'
|
||||||
pfp_path = Path(f'{cache_folder}/pfp_character.png')
|
thumb_path = cache_folder / 'pfp_character_thumb.png'
|
||||||
thumb_path = Path(f'{cache_folder}/pfp_character_thumb.png')
|
|
||||||
|
|
||||||
# Save main picture and thumbnail
|
|
||||||
original_img.save(pfp_path, format='PNG')
|
original_img.save(pfp_path, format='PNG')
|
||||||
thumb = make_thumbnail(original_img)
|
thumb = make_thumbnail(original_img)
|
||||||
thumb.save(thumb_path, format='PNG')
|
thumb.save(thumb_path, format='PNG')
|
||||||
|
|
||||||
# Return the path to the thumbnail, not the in-memory PIL Image object.
|
|
||||||
return str(thumb_path)
|
return str(thumb_path)
|
||||||
|
|
||||||
return None
|
return None
|
||||||
|
|
@ -1856,13 +1935,13 @@ def load_character(character, name1, name2):
|
||||||
logger.error(f"Could not find the character \"{character}\" inside {shared.user_data_dir}/characters. No character has been loaded.")
|
logger.error(f"Could not find the character \"{character}\" inside {shared.user_data_dir}/characters. No character has been loaded.")
|
||||||
raise ValueError
|
raise ValueError
|
||||||
|
|
||||||
file_contents = open(filepath, 'r', encoding='utf-8').read()
|
with open(filepath, 'r', encoding='utf-8') as fh:
|
||||||
|
file_contents = fh.read()
|
||||||
data = json.loads(file_contents) if extension == "json" else yaml.safe_load(file_contents)
|
data = json.loads(file_contents) if extension == "json" else yaml.safe_load(file_contents)
|
||||||
cache_folder = Path(shared.args.disk_cache_dir)
|
cache_folder = Path(shared.args.disk_cache_dir)
|
||||||
|
|
||||||
for path in [Path(f"{cache_folder}/pfp_character.png"), Path(f"{cache_folder}/pfp_character_thumb.png")]:
|
for path in [cache_folder / "pfp_character.png", cache_folder / "pfp_character_thumb.png"]:
|
||||||
if path.exists():
|
path.unlink(missing_ok=True)
|
||||||
path.unlink()
|
|
||||||
|
|
||||||
picture = generate_pfp_cache(character)
|
picture = generate_pfp_cache(character)
|
||||||
|
|
||||||
|
|
@ -1918,9 +1997,7 @@ def clear_character_for_ui(state):
|
||||||
# Clear the cache files
|
# Clear the cache files
|
||||||
cache_folder = Path(shared.args.disk_cache_dir)
|
cache_folder = Path(shared.args.disk_cache_dir)
|
||||||
for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
|
for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
|
||||||
cache_path = Path(f'{cache_folder}/{cache_file}')
|
(cache_folder / cache_file).unlink(missing_ok=True)
|
||||||
if cache_path.exists():
|
|
||||||
cache_path.unlink()
|
|
||||||
|
|
||||||
return state, state['name2'], state['context'], state['greeting'], None
|
return state, state['name2'], state['context'], state['greeting'], None
|
||||||
|
|
||||||
|
|
@ -2015,11 +2092,10 @@ def upload_your_profile_picture(img_path):
|
||||||
cache_folder.mkdir()
|
cache_folder.mkdir()
|
||||||
|
|
||||||
if img is None:
|
if img is None:
|
||||||
if Path(f"{cache_folder}/pfp_me.png").exists():
|
(cache_folder / "pfp_me.png").unlink(missing_ok=True)
|
||||||
Path(f"{cache_folder}/pfp_me.png").unlink()
|
|
||||||
else:
|
else:
|
||||||
img = make_thumbnail(img)
|
img = make_thumbnail(img)
|
||||||
img.save(Path(f'{cache_folder}/pfp_me.png'))
|
img.save(cache_folder / 'pfp_me.png')
|
||||||
logger.info(f'Profile picture saved to "{cache_folder}/pfp_me.png"')
|
logger.info(f'Profile picture saved to "{cache_folder}/pfp_me.png"')
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -2075,13 +2151,12 @@ def generate_user_pfp_cache(user):
|
||||||
if not cache_folder.exists():
|
if not cache_folder.exists():
|
||||||
cache_folder.mkdir()
|
cache_folder.mkdir()
|
||||||
|
|
||||||
for path in [shared.user_data_dir / 'users' / f"{user}.{extension}" for extension in ['png', 'jpg', 'jpeg']]:
|
for extension in ['png', 'jpg', 'jpeg']:
|
||||||
|
path = shared.user_data_dir / 'users' / f"{user}.{extension}"
|
||||||
if path.exists():
|
if path.exists():
|
||||||
original_img = Image.open(path)
|
original_img = Image.open(path)
|
||||||
# Define file paths
|
pfp_path = cache_folder / 'pfp_me.png'
|
||||||
pfp_path = Path(f'{cache_folder}/pfp_me.png')
|
|
||||||
|
|
||||||
# Save thumbnail
|
|
||||||
thumb = make_thumbnail(original_img)
|
thumb = make_thumbnail(original_img)
|
||||||
thumb.save(pfp_path, format='PNG')
|
thumb.save(pfp_path, format='PNG')
|
||||||
logger.info(f'User profile picture cached to "{pfp_path}"')
|
logger.info(f'User profile picture cached to "{pfp_path}"')
|
||||||
|
|
@ -2113,9 +2188,7 @@ def load_user(user_name, name1, user_bio):
|
||||||
|
|
||||||
# Clear existing user picture cache
|
# Clear existing user picture cache
|
||||||
cache_folder = Path(shared.args.disk_cache_dir)
|
cache_folder = Path(shared.args.disk_cache_dir)
|
||||||
pfp_path = Path(f"{cache_folder}/pfp_me.png")
|
(cache_folder / "pfp_me.png").unlink(missing_ok=True)
|
||||||
if pfp_path.exists():
|
|
||||||
pfp_path.unlink()
|
|
||||||
|
|
||||||
# Generate new picture cache
|
# Generate new picture cache
|
||||||
picture = generate_user_pfp_cache(user_name)
|
picture = generate_user_pfp_cache(user_name)
|
||||||
|
|
@ -2539,15 +2612,13 @@ def handle_character_picture_change(picture_path):
|
||||||
|
|
||||||
if picture is not None:
|
if picture is not None:
|
||||||
# Save to cache
|
# Save to cache
|
||||||
picture.save(Path(f'{cache_folder}/pfp_character.png'), format='PNG')
|
picture.save(cache_folder / 'pfp_character.png', format='PNG')
|
||||||
thumb = make_thumbnail(picture)
|
thumb = make_thumbnail(picture)
|
||||||
thumb.save(Path(f'{cache_folder}/pfp_character_thumb.png'), format='PNG')
|
thumb.save(cache_folder / 'pfp_character_thumb.png', format='PNG')
|
||||||
else:
|
else:
|
||||||
# Remove cache files when picture is cleared
|
# Remove cache files when picture is cleared
|
||||||
for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
|
for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
|
||||||
cache_path = Path(f'{cache_folder}/{cache_file}')
|
(cache_folder / cache_file).unlink(missing_ok=True)
|
||||||
if cache_path.exists():
|
|
||||||
cache_path.unlink()
|
|
||||||
|
|
||||||
|
|
||||||
def handle_mode_change(state):
|
def handle_mode_change(state):
|
||||||
|
|
|
||||||
|
|
@ -423,6 +423,15 @@ class Exllamav3Model:
|
||||||
if logit_bias:
|
if logit_bias:
|
||||||
filters.append(LogitBiasFilter(self.tokenizer, logit_bias))
|
filters.append(LogitBiasFilter(self.tokenizer, logit_bias))
|
||||||
|
|
||||||
|
# Suppress EOS tokens via logit bias so they are never sampled
|
||||||
|
if state['ban_eos_token']:
|
||||||
|
eos_bias = {}
|
||||||
|
for eos_id in self.config.eos_token_id_list:
|
||||||
|
if eos_id is not None:
|
||||||
|
eos_bias[str(eos_id)] = float('-inf')
|
||||||
|
if eos_bias:
|
||||||
|
filters.append(LogitBiasFilter(self.tokenizer, eos_bias))
|
||||||
|
|
||||||
# Logprobs support (OpenAI API)
|
# Logprobs support (OpenAI API)
|
||||||
logprobs = state.get('logprobs', 0) or 0
|
logprobs = state.get('logprobs', 0) or 0
|
||||||
return_top_tokens = logprobs if logprobs > 0 else 0
|
return_top_tokens = logprobs if logprobs > 0 else 0
|
||||||
|
|
@ -480,15 +489,35 @@ class Exllamav3Model:
|
||||||
return
|
return
|
||||||
|
|
||||||
id_to_piece = self.tokenizer.get_id_to_piece_list(True)
|
id_to_piece = self.tokenizer.get_id_to_piece_list(True)
|
||||||
|
sampled_ids = result.get("token_ids") # (batch, seq_len) - actually sampled tokens
|
||||||
|
sampled_probs = result.get("token_probs") # (batch, seq_len) - their probabilities
|
||||||
|
|
||||||
|
def _piece(tid):
|
||||||
|
s = id_to_piece[tid] if tid < len(id_to_piece) else f"<{tid}>"
|
||||||
|
return s.replace('\u2581', ' ')
|
||||||
|
|
||||||
|
def _logprob(prob):
|
||||||
|
return math.log(prob) if prob > 0 else float("-inf")
|
||||||
|
|
||||||
# top_k_tokens shape: (batch, seq_len, k), top_k_probs same
|
# top_k_tokens shape: (batch, seq_len, k), top_k_probs same
|
||||||
for seq_idx in range(top_k_tokens.shape[1]):
|
for seq_idx in range(top_k_tokens.shape[1]):
|
||||||
entry = {"top_logprobs": []}
|
entry = {"top_logprobs": []}
|
||||||
for k_idx in range(top_k_tokens.shape[2]):
|
for k_idx in range(top_k_tokens.shape[2]):
|
||||||
token_id = top_k_tokens[0, seq_idx, k_idx].item()
|
token_id = top_k_tokens[0, seq_idx, k_idx].item()
|
||||||
prob = top_k_probs[0, seq_idx, k_idx].item()
|
prob = top_k_probs[0, seq_idx, k_idx].item()
|
||||||
token_str = id_to_piece[token_id] if token_id < len(id_to_piece) else f"<{token_id}>"
|
entry["top_logprobs"].append({"token": _piece(token_id), "logprob": _logprob(prob)})
|
||||||
logprob = math.log(prob) if prob > 0 else float("-inf")
|
|
||||||
entry["top_logprobs"].append({"token": token_str, "logprob": logprob})
|
# Record the actually sampled token at the entry level so
|
||||||
|
# format_completion_logprobs uses it instead of top_logprobs[0]
|
||||||
|
# (they differ with non-greedy sampling).
|
||||||
|
if sampled_ids is not None:
|
||||||
|
sid = sampled_ids[0, seq_idx].item()
|
||||||
|
entry["token"] = _piece(sid)
|
||||||
|
if sampled_probs is not None:
|
||||||
|
entry["logprob"] = _logprob(sampled_probs[0, seq_idx].item())
|
||||||
|
else:
|
||||||
|
entry["logprob"] = None
|
||||||
|
|
||||||
self.last_completion_probabilities.append(entry)
|
self.last_completion_probabilities.append(entry)
|
||||||
|
|
||||||
def generate(self, prompt, state):
|
def generate(self, prompt, state):
|
||||||
|
|
@ -498,42 +527,31 @@ class Exllamav3Model:
|
||||||
|
|
||||||
return output
|
return output
|
||||||
|
|
||||||
|
def get_prompt_logits(self, input_ids):
|
||||||
|
"""Return logits for all positions via a single no-cache forward pass.
|
||||||
|
|
||||||
|
Used by prompt logprobs computation. Returns (1, seq_len, vocab) on CPU in float32.
|
||||||
|
"""
|
||||||
|
import torch
|
||||||
|
input_ids_tensor = input_ids if isinstance(input_ids, torch.Tensor) else torch.tensor(input_ids, dtype=torch.long)
|
||||||
|
input_ids_tensor = input_ids_tensor.view(1, -1).cpu()
|
||||||
|
with torch.no_grad():
|
||||||
|
return self.model.forward(
|
||||||
|
input_ids=input_ids_tensor,
|
||||||
|
params={"attn_mode": "flash_attn_nc"}
|
||||||
|
).cpu().float()
|
||||||
|
|
||||||
def get_logits(self, token_ids, **kwargs):
|
def get_logits(self, token_ids, **kwargs):
|
||||||
"""
|
"""
|
||||||
Process a batch of token_ids and return the logits for the last token.
|
Process a batch of token_ids and return the logits for the last token.
|
||||||
This will reset and overwrite the model's cache.
|
Uses flash_attn_nc (no cache) for correct results with recurrent models.
|
||||||
"""
|
"""
|
||||||
# Initialize a single params dictionary that will be updated in-place
|
|
||||||
params = {
|
|
||||||
"cache": self.cache,
|
|
||||||
"reconstruct": False,
|
|
||||||
"attn_mode": "flash_attn",
|
|
||||||
"batch_shape": (1, self.max_tokens),
|
|
||||||
"past_len": 0
|
|
||||||
}
|
|
||||||
params.update(kwargs)
|
|
||||||
|
|
||||||
# Process prefix tokens to fill the cache and generate recurrent state
|
|
||||||
if token_ids.shape[-1] > 1:
|
|
||||||
prefix_ids = token_ids[:, :-1]
|
|
||||||
|
|
||||||
# This forward call updates the 'params' dict with the recurrent state
|
|
||||||
self.model.forward(
|
|
||||||
input_ids=prefix_ids,
|
|
||||||
params=params
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update past_len for the next call
|
|
||||||
params["past_len"] = prefix_ids.shape[-1]
|
|
||||||
|
|
||||||
# Process the last token, now using the state-filled 'params' dict
|
|
||||||
last_token_ids = token_ids[:, -1:]
|
|
||||||
logits = self.model.forward(
|
logits = self.model.forward(
|
||||||
input_ids=last_token_ids,
|
input_ids=token_ids,
|
||||||
params=params
|
params={"attn_mode": "flash_attn_nc"}
|
||||||
)
|
)
|
||||||
|
|
||||||
return logits.float().cpu()
|
return logits[:, -1:, :].float().cpu()
|
||||||
|
|
||||||
def encode(self, string, **kwargs):
|
def encode(self, string, **kwargs):
|
||||||
add_bos = kwargs.pop('add_bos', True)
|
add_bos = kwargs.pop('add_bos', True)
|
||||||
|
|
|
||||||
|
|
@ -26,6 +26,9 @@ except Exception:
|
||||||
class Exllamav3HF(PreTrainedModel, GenerationMixin):
|
class Exllamav3HF(PreTrainedModel, GenerationMixin):
|
||||||
def __init__(self, model_dir):
|
def __init__(self, model_dir):
|
||||||
hf_config = PretrainedConfig.from_pretrained(model_dir)
|
hf_config = PretrainedConfig.from_pretrained(model_dir)
|
||||||
|
# Ensure text_config is a proper object, not a dict (fixes qwen3_5_moe + transformers compat)
|
||||||
|
if isinstance(getattr(hf_config, 'text_config', None), dict):
|
||||||
|
hf_config.text_config = PretrainedConfig(**hf_config.text_config)
|
||||||
super().__init__(hf_config)
|
super().__init__(hf_config)
|
||||||
|
|
||||||
exl3_config = Config.from_directory(model_dir)
|
exl3_config = Config.from_directory(model_dir)
|
||||||
|
|
@ -199,30 +202,11 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
|
||||||
}
|
}
|
||||||
).to(input_ids.device).float()
|
).to(input_ids.device).float()
|
||||||
else:
|
else:
|
||||||
# Labels path: use cache for cross-chunk attention.
|
# Labels path: single pass without cache for correct logits
|
||||||
tokens_to_process = seq_tensor
|
logits = self.ex_model.forward(
|
||||||
all_logits = None
|
input_ids=seq_tensor.view(1, -1),
|
||||||
current_len = 0
|
params={"attn_mode": "flash_attn_nc"}
|
||||||
|
).float().cpu()
|
||||||
for i in range(0, tokens_to_process.shape[0], max_chunk_size):
|
|
||||||
chunk = tokens_to_process[i:i + max_chunk_size]
|
|
||||||
chunk_logits = self.ex_model.forward(
|
|
||||||
input_ids=chunk.view(1, -1),
|
|
||||||
params={
|
|
||||||
"attn_mode": "flash_attn",
|
|
||||||
"cache": ex_cache,
|
|
||||||
"past_len": current_len,
|
|
||||||
"batch_shape": (1, self.max_tokens),
|
|
||||||
}
|
|
||||||
).float()
|
|
||||||
current_len += chunk.shape[0]
|
|
||||||
|
|
||||||
if all_logits is None:
|
|
||||||
all_logits = chunk_logits
|
|
||||||
else:
|
|
||||||
all_logits = torch.cat([all_logits, chunk_logits], dim=1)
|
|
||||||
|
|
||||||
logits = all_logits
|
|
||||||
|
|
||||||
if is_negative:
|
if is_negative:
|
||||||
self.past_seq_negative = seq_tensor
|
self.past_seq_negative = seq_tensor
|
||||||
|
|
|
||||||
|
|
@ -191,21 +191,19 @@ def _apply_custom_generate_reply():
|
||||||
|
|
||||||
|
|
||||||
def _apply_custom_css():
|
def _apply_custom_css():
|
||||||
all_css = ''
|
return ''.join(
|
||||||
for extension, _ in iterator():
|
getattr(extension, 'custom_css')()
|
||||||
if hasattr(extension, 'custom_css'):
|
for extension, _ in iterator()
|
||||||
all_css += getattr(extension, 'custom_css')()
|
if hasattr(extension, 'custom_css')
|
||||||
|
)
|
||||||
return all_css
|
|
||||||
|
|
||||||
|
|
||||||
def _apply_custom_js():
|
def _apply_custom_js():
|
||||||
all_js = ''
|
return ''.join(
|
||||||
for extension, _ in iterator():
|
getattr(extension, 'custom_js')()
|
||||||
if hasattr(extension, 'custom_js'):
|
for extension, _ in iterator()
|
||||||
all_js += getattr(extension, 'custom_js')()
|
if hasattr(extension, 'custom_js')
|
||||||
|
)
|
||||||
return all_js
|
|
||||||
|
|
||||||
|
|
||||||
def create_extensions_block():
|
def create_extensions_block():
|
||||||
|
|
|
||||||
|
|
@ -14,6 +14,13 @@ from modules.reasoning import extract_reasoning
|
||||||
from modules.sane_markdown_lists import SaneListExtension
|
from modules.sane_markdown_lists import SaneListExtension
|
||||||
from modules.utils import get_available_chat_styles
|
from modules.utils import get_available_chat_styles
|
||||||
|
|
||||||
|
# Pre-compiled regex for protecting markdown-sensitive characters inside LaTeX.
|
||||||
|
# Covers $$...$$, \[...\], \(...\), and inline $...$ (when content contains \\).
|
||||||
|
_LATEX_PATTERN = re.compile(
|
||||||
|
r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)|(?<!\$)\$(?!\$)([^\$\n]*\\\\[^\$\n]*?)\$(?!\$)',
|
||||||
|
re.DOTALL
|
||||||
|
)
|
||||||
|
|
||||||
# This is to store the paths to the thumbnails of the profile pictures
|
# This is to store the paths to the thumbnails of the profile pictures
|
||||||
image_cache = {}
|
image_cache = {}
|
||||||
|
|
||||||
|
|
@ -185,28 +192,29 @@ def process_markdown_content(string):
|
||||||
if not string:
|
if not string:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
# Define unique placeholders for LaTeX asterisks and underscores
|
# Define unique placeholders for LaTeX characters that conflict with markdown
|
||||||
LATEX_ASTERISK_PLACEHOLDER = "LATEXASTERISKPLACEHOLDER"
|
LATEX_ASTERISK_PLACEHOLDER = "LATEXASTERISKPLACEHOLDER"
|
||||||
LATEX_UNDERSCORE_PLACEHOLDER = "LATEXUNDERSCOREPLACEHOLDER"
|
LATEX_UNDERSCORE_PLACEHOLDER = "LATEXUNDERSCOREPLACEHOLDER"
|
||||||
|
LATEX_PIPE_PLACEHOLDER = "LATEXPIPEPLACEHOLDER"
|
||||||
|
|
||||||
|
def protect_latex_content(content):
|
||||||
|
"""Protect markdown-sensitive characters inside LaTeX."""
|
||||||
|
content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
|
||||||
|
content = content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
|
||||||
|
content = content.replace('|', LATEX_PIPE_PLACEHOLDER)
|
||||||
|
return content
|
||||||
|
|
||||||
def protect_asterisks_underscores_in_latex(match):
|
def protect_asterisks_underscores_in_latex(match):
|
||||||
"""A replacer function for re.sub to protect asterisks and underscores in multiple LaTeX formats."""
|
"""A replacer function for re.sub to protect markdown-sensitive characters in multiple LaTeX formats."""
|
||||||
# Check which delimiter group was captured
|
# Check which delimiter group was captured
|
||||||
if match.group(1) is not None: # Content from $$...$$
|
if match.group(1) is not None: # Content from $$...$$
|
||||||
content = match.group(1)
|
return protect_latex_content(match.group(1))
|
||||||
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
|
|
||||||
modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
|
|
||||||
return f'{modified_content}'
|
|
||||||
elif match.group(2) is not None: # Content from \[...\]
|
elif match.group(2) is not None: # Content from \[...\]
|
||||||
content = match.group(2)
|
return f'\\[{protect_latex_content(match.group(2))}\\]'
|
||||||
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
|
|
||||||
modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
|
|
||||||
return f'\\[{modified_content}\\]'
|
|
||||||
elif match.group(3) is not None: # Content from \(...\)
|
elif match.group(3) is not None: # Content from \(...\)
|
||||||
content = match.group(3)
|
return f'\\({protect_latex_content(match.group(3))}\\)'
|
||||||
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
|
elif match.group(4) is not None: # Content from $...$
|
||||||
modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
|
return f'${protect_latex_content(match.group(4).strip())}$'
|
||||||
return f'\\({modified_content}\\)'
|
|
||||||
|
|
||||||
return match.group(0) # Fallback
|
return match.group(0) # Fallback
|
||||||
|
|
||||||
|
|
@ -240,9 +248,7 @@ def process_markdown_content(string):
|
||||||
string = re.sub(r"(.)```", r"\1\n```", string)
|
string = re.sub(r"(.)```", r"\1\n```", string)
|
||||||
|
|
||||||
# Protect asterisks and underscores within all LaTeX blocks before markdown conversion
|
# Protect asterisks and underscores within all LaTeX blocks before markdown conversion
|
||||||
latex_pattern = re.compile(r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)',
|
string = _LATEX_PATTERN.sub(protect_asterisks_underscores_in_latex, string)
|
||||||
re.DOTALL)
|
|
||||||
string = latex_pattern.sub(protect_asterisks_underscores_in_latex, string)
|
|
||||||
|
|
||||||
result = ''
|
result = ''
|
||||||
is_code = False
|
is_code = False
|
||||||
|
|
@ -306,6 +312,7 @@ def process_markdown_content(string):
|
||||||
# Restore the LaTeX asterisks and underscores after markdown conversion
|
# Restore the LaTeX asterisks and underscores after markdown conversion
|
||||||
html_output = html_output.replace(LATEX_ASTERISK_PLACEHOLDER, '*')
|
html_output = html_output.replace(LATEX_ASTERISK_PLACEHOLDER, '*')
|
||||||
html_output = html_output.replace(LATEX_UNDERSCORE_PLACEHOLDER, '_')
|
html_output = html_output.replace(LATEX_UNDERSCORE_PLACEHOLDER, '_')
|
||||||
|
html_output = html_output.replace(LATEX_PIPE_PLACEHOLDER, '|')
|
||||||
|
|
||||||
# Remove extra newlines before </code>
|
# Remove extra newlines before </code>
|
||||||
html_output = re.sub(r'\s*</code>', '</code>', html_output)
|
html_output = re.sub(r'\s*</code>', '</code>', html_output)
|
||||||
|
|
|
||||||
|
|
@ -10,72 +10,49 @@ def get_quantization_config(quant_method):
|
||||||
Get the appropriate quantization config based on the selected method.
|
Get the appropriate quantization config based on the selected method.
|
||||||
Applies quantization to both the transformer and the text_encoder.
|
Applies quantization to both the transformer and the text_encoder.
|
||||||
"""
|
"""
|
||||||
|
if quant_method == 'none' or not quant_method:
|
||||||
|
return None
|
||||||
|
|
||||||
import torch
|
import torch
|
||||||
# Import BitsAndBytesConfig from BOTH libraries to be safe
|
|
||||||
from diffusers import BitsAndBytesConfig as DiffusersBnBConfig
|
from diffusers import BitsAndBytesConfig as DiffusersBnBConfig
|
||||||
from diffusers import TorchAoConfig
|
from diffusers import TorchAoConfig
|
||||||
from diffusers.quantizers import PipelineQuantizationConfig
|
from diffusers.quantizers import PipelineQuantizationConfig
|
||||||
from transformers import BitsAndBytesConfig as TransformersBnBConfig
|
from transformers import BitsAndBytesConfig as TransformersBnBConfig
|
||||||
|
|
||||||
if quant_method == 'none' or not quant_method:
|
torchao_methods = {
|
||||||
return None
|
'torchao-int8wo': 'int8wo',
|
||||||
|
'torchao-fp4': 'fp4_e2m1',
|
||||||
|
'torchao-float8wo': 'float8wo',
|
||||||
|
}
|
||||||
|
|
||||||
# Bitsandbytes 8-bit quantization
|
if quant_method == 'bnb-8bit':
|
||||||
elif quant_method == 'bnb-8bit':
|
|
||||||
return PipelineQuantizationConfig(
|
return PipelineQuantizationConfig(
|
||||||
quant_mapping={
|
quant_mapping={
|
||||||
"transformer": DiffusersBnBConfig(
|
"transformer": DiffusersBnBConfig(load_in_8bit=True),
|
||||||
load_in_8bit=True
|
"text_encoder": TransformersBnBConfig(load_in_8bit=True)
|
||||||
),
|
|
||||||
"text_encoder": TransformersBnBConfig(
|
|
||||||
load_in_8bit=True
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# Bitsandbytes 4-bit quantization
|
|
||||||
elif quant_method == 'bnb-4bit':
|
elif quant_method == 'bnb-4bit':
|
||||||
|
bnb_4bit_kwargs = dict(
|
||||||
|
load_in_4bit=True,
|
||||||
|
bnb_4bit_quant_type="nf4",
|
||||||
|
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||||
|
bnb_4bit_use_double_quant=True
|
||||||
|
)
|
||||||
return PipelineQuantizationConfig(
|
return PipelineQuantizationConfig(
|
||||||
quant_mapping={
|
quant_mapping={
|
||||||
"transformer": DiffusersBnBConfig(
|
"transformer": DiffusersBnBConfig(**bnb_4bit_kwargs),
|
||||||
load_in_4bit=True,
|
"text_encoder": TransformersBnBConfig(**bnb_4bit_kwargs)
|
||||||
bnb_4bit_quant_type="nf4",
|
|
||||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
|
||||||
bnb_4bit_use_double_quant=True
|
|
||||||
),
|
|
||||||
"text_encoder": TransformersBnBConfig(
|
|
||||||
load_in_4bit=True,
|
|
||||||
bnb_4bit_quant_type="nf4",
|
|
||||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
|
||||||
bnb_4bit_use_double_quant=True
|
|
||||||
)
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
# torchao int8 weight-only
|
elif quant_method in torchao_methods:
|
||||||
elif quant_method == 'torchao-int8wo':
|
ao_type = torchao_methods[quant_method]
|
||||||
return PipelineQuantizationConfig(
|
return PipelineQuantizationConfig(
|
||||||
quant_mapping={
|
quant_mapping={
|
||||||
"transformer": TorchAoConfig("int8wo"),
|
"transformer": TorchAoConfig(ao_type),
|
||||||
"text_encoder": TorchAoConfig("int8wo")
|
"text_encoder": TorchAoConfig(ao_type)
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# torchao fp4 (e2m1)
|
|
||||||
elif quant_method == 'torchao-fp4':
|
|
||||||
return PipelineQuantizationConfig(
|
|
||||||
quant_mapping={
|
|
||||||
"transformer": TorchAoConfig("fp4_e2m1"),
|
|
||||||
"text_encoder": TorchAoConfig("fp4_e2m1")
|
|
||||||
}
|
|
||||||
)
|
|
||||||
|
|
||||||
# torchao float8 weight-only
|
|
||||||
elif quant_method == 'torchao-float8wo':
|
|
||||||
return PipelineQuantizationConfig(
|
|
||||||
quant_mapping={
|
|
||||||
"transformer": TorchAoConfig("float8wo"),
|
|
||||||
"text_encoder": TorchAoConfig("float8wo")
|
|
||||||
}
|
}
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
@ -152,7 +129,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
|
||||||
|
|
||||||
modules = ["transformer", "unet"]
|
modules = ["transformer", "unet"]
|
||||||
|
|
||||||
# Set attention backend
|
# Set attention backend (diffusers defaults to native/SDPA)
|
||||||
if attn_backend == 'flash_attention_2':
|
if attn_backend == 'flash_attention_2':
|
||||||
for name in modules:
|
for name in modules:
|
||||||
mod = getattr(pipe, name, None)
|
mod = getattr(pipe, name, None)
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,6 @@ import time
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Any, List
|
from typing import Any, List
|
||||||
|
|
||||||
import llama_cpp_binaries
|
|
||||||
import requests
|
import requests
|
||||||
|
|
||||||
from modules import shared
|
from modules import shared
|
||||||
|
|
@ -311,8 +310,45 @@ class LlamaServer:
|
||||||
else:
|
else:
|
||||||
raise Exception(f"Unexpected response format: 'completion_probabilities' not found in {result}")
|
raise Exception(f"Unexpected response format: 'completion_probabilities' not found in {result}")
|
||||||
|
|
||||||
|
def get_prompt_logprob_entries(self, token_ids, n_probs=5, prompt=""):
|
||||||
|
"""Get logprob entries for prompt tokens via a single n_predict=0 request.
|
||||||
|
|
||||||
|
Requires llama.cpp server with prompt_logprobs support.
|
||||||
|
Returns entries in the standard format for format_completion_logprobs().
|
||||||
|
"""
|
||||||
|
token_ids_list = token_ids.tolist() if hasattr(token_ids, 'tolist') else list(token_ids)
|
||||||
|
|
||||||
|
url = f"http://127.0.0.1:{self.port}/completion"
|
||||||
|
payload = {
|
||||||
|
"prompt": token_ids_list,
|
||||||
|
"n_predict": 0,
|
||||||
|
"n_probs": n_probs,
|
||||||
|
"prompt_logprobs": True,
|
||||||
|
"stream": False,
|
||||||
|
"cache_prompt": False,
|
||||||
|
}
|
||||||
|
|
||||||
|
response = self.session.post(url, json=payload)
|
||||||
|
result = response.json()
|
||||||
|
|
||||||
|
prompt_probs = result.get("prompt_probabilities", [])
|
||||||
|
if not prompt_probs:
|
||||||
|
return []
|
||||||
|
|
||||||
|
# Null first token (no conditioning context); use empty string for BOS
|
||||||
|
# or tokens that don't appear at the start of the prompt text.
|
||||||
|
first_token_str = self.decode([token_ids_list[0]])
|
||||||
|
if self.bos_token and first_token_str == self.bos_token:
|
||||||
|
first_token_str = ""
|
||||||
|
elif not prompt.startswith(first_token_str):
|
||||||
|
first_token_str = ""
|
||||||
|
|
||||||
|
entries = [{"token": first_token_str, "null_logprob": True}]
|
||||||
|
entries.extend(prompt_probs)
|
||||||
|
return entries
|
||||||
|
|
||||||
def _get_vocabulary_size(self):
|
def _get_vocabulary_size(self):
|
||||||
"""Get and store the model's maximum context length."""
|
"""Get and store the model's vocabulary size."""
|
||||||
url = f"http://127.0.0.1:{self.port}/v1/models"
|
url = f"http://127.0.0.1:{self.port}/v1/models"
|
||||||
response = self.session.get(url).json()
|
response = self.session.get(url).json()
|
||||||
|
|
||||||
|
|
@ -337,6 +373,7 @@ class LlamaServer:
|
||||||
"""Check if a port is available for use."""
|
"""Check if a port is available for use."""
|
||||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||||
try:
|
try:
|
||||||
|
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||||
s.bind(('', port))
|
s.bind(('', port))
|
||||||
return True
|
return True
|
||||||
except OSError:
|
except OSError:
|
||||||
|
|
@ -357,7 +394,16 @@ class LlamaServer:
|
||||||
"""Start the llama.cpp server and wait until it's ready."""
|
"""Start the llama.cpp server and wait until it's ready."""
|
||||||
# Determine the server path
|
# Determine the server path
|
||||||
if self.server_path is None:
|
if self.server_path is None:
|
||||||
self.server_path = llama_cpp_binaries.get_binary_path()
|
if shared.args.ik:
|
||||||
|
try:
|
||||||
|
import ik_llama_cpp_binaries
|
||||||
|
except ImportError:
|
||||||
|
raise ImportError("--ik requires the ik_llama_cpp_binaries package. Install it with: pip install <ik_llama_cpp_binaries wheel URL>")
|
||||||
|
|
||||||
|
self.server_path = ik_llama_cpp_binaries.get_binary_path()
|
||||||
|
else:
|
||||||
|
import llama_cpp_binaries
|
||||||
|
self.server_path = llama_cpp_binaries.get_binary_path()
|
||||||
|
|
||||||
# Build the command
|
# Build the command
|
||||||
cmd = [
|
cmd = [
|
||||||
|
|
@ -470,6 +516,10 @@ class LlamaServer:
|
||||||
else:
|
else:
|
||||||
cmd.append(f"--{flag_item}")
|
cmd.append(f"--{flag_item}")
|
||||||
|
|
||||||
|
# Patch flags for ik_llama.cpp compatibility
|
||||||
|
if shared.args.ik:
|
||||||
|
cmd = _patch_cmd_for_ik(cmd)
|
||||||
|
|
||||||
env = os.environ.copy()
|
env = os.environ.copy()
|
||||||
if os.name == 'posix':
|
if os.name == 'posix':
|
||||||
current_path = env.get('LD_LIBRARY_PATH', '')
|
current_path = env.get('LD_LIBRARY_PATH', '')
|
||||||
|
|
@ -607,3 +657,49 @@ def filter_stderr_with_progress(process_stderr):
|
||||||
process_stderr.close()
|
process_stderr.close()
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
def _patch_cmd_for_ik(cmd):
|
||||||
|
"""
|
||||||
|
Rewrite upstream llama.cpp flags to ik_llama.cpp equivalents:
|
||||||
|
--no-webui → --webui none
|
||||||
|
--fit off → (removed)
|
||||||
|
--fit on / --fit-ctx → --fit (bare flag)
|
||||||
|
--fit-target → --fit-margin
|
||||||
|
--cache-reuse → (removed, unsupported)
|
||||||
|
--swa-full → (removed, unsupported)
|
||||||
|
"""
|
||||||
|
# Add Hadamard KV cache rotation when using quantized cache types.
|
||||||
|
# This significantly improves quantized cache quality (especially q4_0)
|
||||||
|
# and is a no-op for MLA models like DeepSeek.
|
||||||
|
if shared.args.cache_type in ("q8_0", "q4_0"):
|
||||||
|
cmd += ["-khad", "-vhad"]
|
||||||
|
|
||||||
|
patched = []
|
||||||
|
i = 0
|
||||||
|
while i < len(cmd):
|
||||||
|
arg = cmd[i]
|
||||||
|
|
||||||
|
if arg == "--no-webui":
|
||||||
|
patched += ["--webui", "none"]
|
||||||
|
elif arg == "--fit" and i + 1 < len(cmd) and cmd[i + 1] in ("on", "off"):
|
||||||
|
val = cmd[i + 1]
|
||||||
|
i += 1
|
||||||
|
if val == "on":
|
||||||
|
patched.append("--fit")
|
||||||
|
# "off" → drop entirely
|
||||||
|
elif arg == "--fit-ctx":
|
||||||
|
patched.append("--fit")
|
||||||
|
i += 1 # skip the value
|
||||||
|
elif arg == "--fit-target":
|
||||||
|
patched.append("--fit-margin")
|
||||||
|
elif arg == "--cache-reuse":
|
||||||
|
i += 1 # skip the value
|
||||||
|
elif arg == "--swa-full":
|
||||||
|
pass # bare flag, just drop it
|
||||||
|
else:
|
||||||
|
patched.append(arg)
|
||||||
|
|
||||||
|
i += 1
|
||||||
|
|
||||||
|
return patched
|
||||||
|
|
|
||||||
|
|
@ -20,6 +20,7 @@ loaders_and_params = OrderedDict({
|
||||||
'no_mmap',
|
'no_mmap',
|
||||||
'mlock',
|
'mlock',
|
||||||
'numa',
|
'numa',
|
||||||
|
'ik',
|
||||||
'parallel',
|
'parallel',
|
||||||
'model_draft',
|
'model_draft',
|
||||||
'draft_max',
|
'draft_max',
|
||||||
|
|
@ -290,16 +291,21 @@ def blacklist_samplers(loader, dynamic_temperature):
|
||||||
|
|
||||||
@functools.cache
|
@functools.cache
|
||||||
def get_all_params():
|
def get_all_params():
|
||||||
|
from modules import shared
|
||||||
all_params = set()
|
all_params = set()
|
||||||
for k in loaders_and_params:
|
for k in loaders_and_params:
|
||||||
for el in loaders_and_params[k]:
|
for el in loaders_and_params[k]:
|
||||||
all_params.add(el)
|
all_params.add(el)
|
||||||
|
|
||||||
|
if shared.args.portable:
|
||||||
|
all_params.discard('ik')
|
||||||
|
|
||||||
return sorted(all_params)
|
return sorted(all_params)
|
||||||
|
|
||||||
|
|
||||||
|
@functools.cache
|
||||||
def list_model_elements():
|
def list_model_elements():
|
||||||
return [
|
elements = [
|
||||||
'filter_by_loader',
|
'filter_by_loader',
|
||||||
'loader',
|
'loader',
|
||||||
'cpu_memory',
|
'cpu_memory',
|
||||||
|
|
@ -347,6 +353,12 @@ def list_model_elements():
|
||||||
'mmproj',
|
'mmproj',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
from modules import shared
|
||||||
|
if not shared.args.portable:
|
||||||
|
elements.append('ik')
|
||||||
|
|
||||||
|
return elements
|
||||||
|
|
||||||
|
|
||||||
def make_loader_params_visible(loader):
|
def make_loader_params_visible(loader):
|
||||||
import gradio as gr
|
import gradio as gr
|
||||||
|
|
|
||||||
|
|
@ -4,7 +4,6 @@ import numpy as np
|
||||||
|
|
||||||
from modules import models, shared
|
from modules import models, shared
|
||||||
from modules.logging_colors import logger
|
from modules.logging_colors import logger
|
||||||
from modules.models import load_model
|
|
||||||
from modules.text_generation import generate_reply
|
from modules.text_generation import generate_reply
|
||||||
from modules.utils import check_model_loaded
|
from modules.utils import check_model_loaded
|
||||||
|
|
||||||
|
|
@ -12,8 +11,7 @@ global_scores = None
|
||||||
|
|
||||||
|
|
||||||
def get_next_logits(*args, **kwargs):
|
def get_next_logits(*args, **kwargs):
|
||||||
if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:
|
models.load_model_if_idle_unloaded()
|
||||||
shared.model, shared.tokenizer = load_model(shared.model_name)
|
|
||||||
|
|
||||||
needs_lock = not args[2] # use_samplers
|
needs_lock = not args[2] # use_samplers
|
||||||
if needs_lock:
|
if needs_lock:
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,5 @@
|
||||||
import sys
|
import sys
|
||||||
|
import threading
|
||||||
import time
|
import time
|
||||||
|
|
||||||
import modules.shared as shared
|
import modules.shared as shared
|
||||||
|
|
@ -7,6 +8,15 @@ from modules.models_settings import get_model_metadata
|
||||||
from modules.utils import resolve_model_path
|
from modules.utils import resolve_model_path
|
||||||
|
|
||||||
last_generation_time = time.time()
|
last_generation_time = time.time()
|
||||||
|
active_generation_count = 0
|
||||||
|
_generation_count_lock = threading.Lock()
|
||||||
|
|
||||||
|
|
||||||
|
def load_model_if_idle_unloaded():
|
||||||
|
global last_generation_time
|
||||||
|
if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:
|
||||||
|
shared.model, shared.tokenizer = load_model(shared.model_name)
|
||||||
|
last_generation_time = time.time()
|
||||||
|
|
||||||
|
|
||||||
def load_model(model_name, loader=None):
|
def load_model(model_name, loader=None):
|
||||||
|
|
@ -66,8 +76,7 @@ def load_model(model_name, loader=None):
|
||||||
|
|
||||||
logger.info(f"Loaded \"{model_name}\" in {(time.time()-t0):.2f} seconds.")
|
logger.info(f"Loaded \"{model_name}\" in {(time.time()-t0):.2f} seconds.")
|
||||||
logger.info(f"LOADER: \"{loader}\"")
|
logger.info(f"LOADER: \"{loader}\"")
|
||||||
logger.info(f"TRUNCATION LENGTH: {shared.settings['truncation_length']}")
|
logger.info(f"CONTEXT LENGTH: {shared.settings['truncation_length']}")
|
||||||
logger.info(f"INSTRUCTION TEMPLATE: \"{metadata['instruction_template']}\"")
|
|
||||||
return model, tokenizer
|
return model, tokenizer
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -159,7 +168,10 @@ def unload_model_if_idle():
|
||||||
while True:
|
while True:
|
||||||
shared.generation_lock.acquire()
|
shared.generation_lock.acquire()
|
||||||
try:
|
try:
|
||||||
if time.time() - last_generation_time > shared.args.idle_timeout * 60:
|
with _generation_count_lock:
|
||||||
|
is_active = active_generation_count > 0
|
||||||
|
|
||||||
|
if not is_active and time.time() - last_generation_time > shared.args.idle_timeout * 60:
|
||||||
if shared.model is not None:
|
if shared.model is not None:
|
||||||
logger.info("Unloading the model for inactivity.")
|
logger.info("Unloading the model for inactivity.")
|
||||||
unload_model(keep_model_name=True)
|
unload_model(keep_model_name=True)
|
||||||
|
|
|
||||||
|
|
@ -23,14 +23,9 @@ def get_fallback_settings():
|
||||||
|
|
||||||
def get_model_metadata(model):
|
def get_model_metadata(model):
|
||||||
model_path = resolve_model_path(model)
|
model_path = resolve_model_path(model)
|
||||||
model_settings = {}
|
|
||||||
|
|
||||||
# Get settings from user_data/models/config.yaml and user_data/models/config-user.yaml
|
# Fallback settings
|
||||||
settings = shared.model_config
|
model_settings = get_fallback_settings()
|
||||||
for pat in settings:
|
|
||||||
if re.match(pat.lower(), Path(model).name.lower()):
|
|
||||||
for k in settings[pat]:
|
|
||||||
model_settings[k] = settings[pat][k]
|
|
||||||
|
|
||||||
path = model_path / 'config.json'
|
path = model_path / 'config.json'
|
||||||
if path.exists():
|
if path.exists():
|
||||||
|
|
@ -405,14 +400,19 @@ def load_instruction_template(template):
|
||||||
if template == 'None':
|
if template == 'None':
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
for filepath in [shared.user_data_dir / 'instruction-templates' / f'{template}.yaml', shared.user_data_dir / 'instruction-templates' / 'Alpaca.yaml']:
|
for name in (template, 'Alpaca'):
|
||||||
if filepath.exists():
|
path = shared.user_data_dir / 'instruction-templates' / f'{name}.yaml'
|
||||||
break
|
try:
|
||||||
|
with open(path, 'r', encoding='utf-8') as f:
|
||||||
|
file_contents = f.read()
|
||||||
|
except FileNotFoundError:
|
||||||
|
if name == template:
|
||||||
|
logger.warning(f"Instruction template '{template}' not found, falling back to Alpaca")
|
||||||
|
continue
|
||||||
|
|
||||||
|
break
|
||||||
else:
|
else:
|
||||||
return ''
|
return ''
|
||||||
|
|
||||||
with open(filepath, 'r', encoding='utf-8') as f:
|
|
||||||
file_contents = f.read()
|
|
||||||
data = yaml.safe_load(file_contents)
|
data = yaml.safe_load(file_contents)
|
||||||
if 'instruction_template' in data:
|
if 'instruction_template' in data:
|
||||||
return data['instruction_template']
|
return data['instruction_template']
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
from modules import shared, utils
|
from modules import shared, utils
|
||||||
|
from modules.utils import sanitize_filename
|
||||||
from modules.text_generation import get_encoded_length
|
from modules.text_generation import get_encoded_length
|
||||||
|
|
||||||
|
|
||||||
|
|
@ -18,6 +19,7 @@ def load_prompt(fname):
|
||||||
|
|
||||||
return initial_content
|
return initial_content
|
||||||
|
|
||||||
|
fname = sanitize_filename(fname)
|
||||||
file_path = shared.user_data_dir / 'logs' / 'notebook' / f'{fname}.txt'
|
file_path = shared.user_data_dir / 'logs' / 'notebook' / f'{fname}.txt'
|
||||||
if file_path.exists():
|
if file_path.exists():
|
||||||
with open(file_path, 'r', encoding='utf-8') as f:
|
with open(file_path, 'r', encoding='utf-8') as f:
|
||||||
|
|
|
||||||
|
|
@ -7,6 +7,7 @@ THINKING_FORMATS = [
|
||||||
('<|channel|>analysis<|message|>', '<|end|>', '<|channel|>final<|message|>'),
|
('<|channel|>analysis<|message|>', '<|end|>', '<|channel|>final<|message|>'),
|
||||||
('<|channel|>commentary<|message|>', '<|end|>', '<|channel|>final<|message|>'),
|
('<|channel|>commentary<|message|>', '<|end|>', '<|channel|>final<|message|>'),
|
||||||
('<seed:think>', '</seed:think>', None),
|
('<seed:think>', '</seed:think>', None),
|
||||||
|
('<|channel>thought', '<channel|>', None), # Gemma 4
|
||||||
('<|think|>', '<|end|>', '<|content|>'), # Solar Open
|
('<|think|>', '<|end|>', '<|content|>'), # Solar Open
|
||||||
# ('Thinking Process:', '</think>', None), # Qwen3.5 verbose thinking outside tags -- removed: too prone to false positives in streaming
|
# ('Thinking Process:', '</think>', None), # Qwen3.5 verbose thinking outside tags -- removed: too prone to false positives in streaming
|
||||||
(None, '</think>', None), # End-only variant (e.g., Qwen3-next)
|
(None, '</think>', None), # End-only variant (e.g., Qwen3-next)
|
||||||
|
|
@ -72,9 +73,16 @@ def extract_reasoning(text, html_escaped=False):
|
||||||
if content_pos != -1:
|
if content_pos != -1:
|
||||||
content_start = content_pos + len(content_esc)
|
content_start = content_pos + len(content_esc)
|
||||||
else:
|
else:
|
||||||
# Content tag not present — fall back to content after
|
# Content tag not present yet. In GPT-OSS the region
|
||||||
# end_tag (e.g. GPT-OSS tool calls skip the final channel).
|
# between <|end|> and the content tag contains internal
|
||||||
content_start = end_pos + len(end_esc)
|
# markup (<|start|>assistant…) that must not be shown.
|
||||||
|
# Suppress it to prevent tag leaks during streaming.
|
||||||
|
remainder = text[end_pos + len(end_esc):].lstrip()
|
||||||
|
framing_token = esc('<|start|>')
|
||||||
|
if not remainder or remainder.startswith(framing_token) or framing_token.startswith(remainder):
|
||||||
|
content_start = len(text)
|
||||||
|
else:
|
||||||
|
content_start = end_pos + len(end_esc)
|
||||||
else:
|
else:
|
||||||
content_start = end_pos + len(end_esc)
|
content_start = end_pos + len(end_esc)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -110,6 +110,7 @@ group.add_argument('--numa', action='store_true', help='Activate NUMA task alloc
|
||||||
group.add_argument('--parallel', type=int, default=1, help='Number of parallel request slots. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
|
group.add_argument('--parallel', type=int, default=1, help='Number of parallel request slots. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
|
||||||
group.add_argument('--fit-target', type=str, default='512', help='Target VRAM margin per device for auto GPU layers, comma-separated list of values in MiB. A single value is broadcast across all devices.')
|
group.add_argument('--fit-target', type=str, default='512', help='Target VRAM margin per device for auto GPU layers, comma-separated list of values in MiB. A single value is broadcast across all devices.')
|
||||||
group.add_argument('--extra-flags', type=str, default=None, help='Extra flags to pass to llama-server. Example: "--jinja --rpc 192.168.1.100:50052"')
|
group.add_argument('--extra-flags', type=str, default=None, help='Extra flags to pass to llama-server. Example: "--jinja --rpc 192.168.1.100:50052"')
|
||||||
|
group.add_argument('--ik', action='store_true', help='Use ik_llama.cpp instead of upstream llama.cpp. Requires the ik_llama_cpp_binaries package to be installed.')
|
||||||
|
|
||||||
# Transformers/Accelerate
|
# Transformers/Accelerate
|
||||||
group = parser.add_argument_group('Transformers/Accelerate')
|
group = parser.add_argument_group('Transformers/Accelerate')
|
||||||
|
|
@ -258,6 +259,7 @@ settings = {
|
||||||
'enable_web_search': False,
|
'enable_web_search': False,
|
||||||
'web_search_pages': 3,
|
'web_search_pages': 3,
|
||||||
'selected_tools': [],
|
'selected_tools': [],
|
||||||
|
'mcp_servers': '',
|
||||||
'prompt-notebook': '',
|
'prompt-notebook': '',
|
||||||
'preset': 'Top-P' if (user_data_dir / 'presets/Top-P.yaml').exists() else None,
|
'preset': 'Top-P' if (user_data_dir / 'presets/Top-P.yaml').exists() else None,
|
||||||
'max_new_tokens': 512,
|
'max_new_tokens': 512,
|
||||||
|
|
@ -362,7 +364,7 @@ settings = {
|
||||||
'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.',
|
'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.',
|
||||||
'image_model_menu': 'None',
|
'image_model_menu': 'None',
|
||||||
'image_dtype': 'bfloat16',
|
'image_dtype': 'bfloat16',
|
||||||
'image_attn_backend': 'flash_attention_2',
|
'image_attn_backend': 'sdpa',
|
||||||
'image_cpu_offload': False,
|
'image_cpu_offload': False,
|
||||||
'image_compile': False,
|
'image_compile': False,
|
||||||
'image_quant': 'none',
|
'image_quant': 'none',
|
||||||
|
|
@ -454,17 +456,7 @@ def load_user_config():
|
||||||
|
|
||||||
args.loader = fix_loader_name(args.loader)
|
args.loader = fix_loader_name(args.loader)
|
||||||
|
|
||||||
# Load model-specific settings
|
|
||||||
p = Path(f'{args.model_dir}/config.yaml')
|
|
||||||
if p.exists():
|
|
||||||
model_config = yaml.safe_load(open(p, 'r').read())
|
|
||||||
else:
|
|
||||||
model_config = {}
|
|
||||||
del p
|
|
||||||
|
|
||||||
|
|
||||||
# Load custom model-specific settings
|
# Load custom model-specific settings
|
||||||
user_config = load_user_config()
|
user_config = load_user_config()
|
||||||
|
|
||||||
model_config = OrderedDict(model_config)
|
|
||||||
user_config = OrderedDict(user_config)
|
user_config = OrderedDict(user_config)
|
||||||
|
|
|
||||||
|
|
@ -17,9 +17,7 @@ from modules.utils import check_model_loaded
|
||||||
|
|
||||||
|
|
||||||
def generate_reply(*args, **kwargs):
|
def generate_reply(*args, **kwargs):
|
||||||
if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:
|
models.load_model_if_idle_unloaded()
|
||||||
from modules.models import load_model
|
|
||||||
shared.model, shared.tokenizer = load_model(shared.model_name)
|
|
||||||
|
|
||||||
state = args[1] if len(args) > 1 else kwargs.get('state', {})
|
state = args[1] if len(args) > 1 else kwargs.get('state', {})
|
||||||
use_parallel = (
|
use_parallel = (
|
||||||
|
|
@ -31,10 +29,16 @@ def generate_reply(*args, **kwargs):
|
||||||
if not use_parallel:
|
if not use_parallel:
|
||||||
shared.generation_lock.acquire()
|
shared.generation_lock.acquire()
|
||||||
|
|
||||||
|
with models._generation_count_lock:
|
||||||
|
models.active_generation_count += 1
|
||||||
|
|
||||||
try:
|
try:
|
||||||
for result in _generate_reply(*args, **kwargs):
|
for result in _generate_reply(*args, **kwargs):
|
||||||
yield result
|
yield result
|
||||||
finally:
|
finally:
|
||||||
|
with models._generation_count_lock:
|
||||||
|
models.active_generation_count -= 1
|
||||||
|
|
||||||
models.last_generation_time = time.time()
|
models.last_generation_time = time.time()
|
||||||
if not use_parallel:
|
if not use_parallel:
|
||||||
shared.generation_lock.release()
|
shared.generation_lock.release()
|
||||||
|
|
@ -126,7 +130,9 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
|
||||||
|
|
||||||
def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None):
|
def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None):
|
||||||
if shared.tokenizer is None:
|
if shared.tokenizer is None:
|
||||||
raise ValueError('No tokenizer is loaded')
|
models.load_model_if_idle_unloaded()
|
||||||
|
if shared.tokenizer is None:
|
||||||
|
raise ValueError('No tokenizer is loaded')
|
||||||
|
|
||||||
# llama.cpp case
|
# llama.cpp case
|
||||||
if shared.model.__class__.__name__ == 'LlamaServer':
|
if shared.model.__class__.__name__ == 'LlamaServer':
|
||||||
|
|
@ -176,7 +182,9 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
|
||||||
|
|
||||||
def decode(output_ids, skip_special_tokens=True):
|
def decode(output_ids, skip_special_tokens=True):
|
||||||
if shared.tokenizer is None:
|
if shared.tokenizer is None:
|
||||||
raise ValueError('No tokenizer is loaded')
|
models.load_model_if_idle_unloaded()
|
||||||
|
if shared.tokenizer is None:
|
||||||
|
raise ValueError('No tokenizer is loaded')
|
||||||
|
|
||||||
return shared.tokenizer.decode(output_ids, skip_special_tokens=skip_special_tokens)
|
return shared.tokenizer.decode(output_ids, skip_special_tokens=skip_special_tokens)
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -27,10 +27,11 @@ TOOL_CALL_OPENING_MARKERS = [
|
||||||
'[TOOL_CALLS]',
|
'[TOOL_CALLS]',
|
||||||
'to=functions.',
|
'to=functions.',
|
||||||
'<|channel|>commentary',
|
'<|channel|>commentary',
|
||||||
|
'<|tool_call>call:',
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|
||||||
def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_names=False):
|
def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_names=False, partial_match=True):
|
||||||
'''
|
'''
|
||||||
Check whether streaming output should be withheld because it may
|
Check whether streaming output should be withheld because it may
|
||||||
contain tool-call markup.
|
contain tool-call markup.
|
||||||
|
|
@ -42,6 +43,10 @@ def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_
|
||||||
tool_names: List of tool function names.
|
tool_names: List of tool function names.
|
||||||
check_bare_names: Whether to do partial-prefix matching on tool
|
check_bare_names: Whether to do partial-prefix matching on tool
|
||||||
names (for models with unknown template format).
|
names (for models with unknown template format).
|
||||||
|
partial_match: Whether to check partial prefixes of markers/names.
|
||||||
|
Set to False for end-of-generation checks where a
|
||||||
|
partial prefix is just normal text, not an incomplete
|
||||||
|
tool call.
|
||||||
'''
|
'''
|
||||||
# Strip thinking blocks so tool-call syntax inside <think> doesn't
|
# Strip thinking blocks so tool-call syntax inside <think> doesn't
|
||||||
# trigger false positives.
|
# trigger false positives.
|
||||||
|
|
@ -59,6 +64,9 @@ def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_
|
||||||
if name + '{' in text or name + ' {' in text:
|
if name + '{' in text or name + ' {' in text:
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
if not partial_match:
|
||||||
|
return False
|
||||||
|
|
||||||
# Partial-prefix matching: only for template-specific markers.
|
# Partial-prefix matching: only for template-specific markers.
|
||||||
for marker in (markers if markers is not None else TOOL_CALL_OPENING_MARKERS):
|
for marker in (markers if markers is not None else TOOL_CALL_OPENING_MARKERS):
|
||||||
for prefix_len in range(min(len(marker) - 1, len(text)), 0, -1):
|
for prefix_len in range(min(len(marker) - 1, len(text)), 0, -1):
|
||||||
|
|
@ -400,6 +408,78 @@ def _parse_glm_tool_calls(answer: str, tool_names: list[str]):
|
||||||
return matches, start_pos
|
return matches, start_pos
|
||||||
|
|
||||||
|
|
||||||
|
def _extract_gemma4_balanced(text, start):
|
||||||
|
"""Extract balanced braces from Gemma 4 format, using <|"|> as string delimiters."""
|
||||||
|
if start >= len(text) or text[start] != '{':
|
||||||
|
return None
|
||||||
|
depth = 0
|
||||||
|
in_string = False
|
||||||
|
quote_token = '<|"|>'
|
||||||
|
quote_len = len(quote_token)
|
||||||
|
i = start
|
||||||
|
while i < len(text):
|
||||||
|
if text[i:i + quote_len] == quote_token:
|
||||||
|
in_string = not in_string
|
||||||
|
i += quote_len
|
||||||
|
continue
|
||||||
|
if in_string:
|
||||||
|
i += 1
|
||||||
|
continue
|
||||||
|
c = text[i]
|
||||||
|
if c == '{':
|
||||||
|
depth += 1
|
||||||
|
elif c == '}':
|
||||||
|
depth -= 1
|
||||||
|
if depth == 0:
|
||||||
|
return text[start:i + 1]
|
||||||
|
i += 1
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_gemma4_tool_calls(answer: str, tool_names: list[str]):
|
||||||
|
"""Parse Gemma 4-style tool calls.
|
||||||
|
|
||||||
|
Format:
|
||||||
|
<|tool_call>call:func_name{key:<|"|>value<|"|>,...}<tool_call|>
|
||||||
|
|
||||||
|
Values use <|"|> tokens instead of standard JSON quotes, and keys are
|
||||||
|
bare identifiers.
|
||||||
|
"""
|
||||||
|
matches = []
|
||||||
|
start_pos = None
|
||||||
|
|
||||||
|
for m in re.finditer(r'<\|tool_call>call:([^\s{]+)\s*', answer):
|
||||||
|
func_name = m.group(1).strip()
|
||||||
|
if func_name not in tool_names:
|
||||||
|
continue
|
||||||
|
|
||||||
|
brace_start = m.end()
|
||||||
|
if brace_start >= len(answer) or answer[brace_start] != '{':
|
||||||
|
continue
|
||||||
|
|
||||||
|
content = _extract_gemma4_balanced(answer, brace_start)
|
||||||
|
if content is None:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# Convert to JSON: split on <|"|> tokens so that key quoting
|
||||||
|
# only applies outside string values (even-indexed parts),
|
||||||
|
# then rejoin with real quotes.
|
||||||
|
parts = content.split('<|"|>')
|
||||||
|
for idx in range(0, len(parts), 2):
|
||||||
|
parts[idx] = re.sub(r'(^|[{,\[])\s*(\w+)\s*:', r'\1"\2":', parts[idx])
|
||||||
|
json_str = '"'.join(parts)
|
||||||
|
|
||||||
|
try:
|
||||||
|
arguments = json.loads(json_str)
|
||||||
|
if start_pos is None:
|
||||||
|
start_pos = m.start()
|
||||||
|
matches.append(_make_tool_call(func_name, arguments))
|
||||||
|
except (json.JSONDecodeError, ValueError):
|
||||||
|
pass
|
||||||
|
|
||||||
|
return matches, start_pos
|
||||||
|
|
||||||
|
|
||||||
def _parse_pythonic_tool_calls(answer: str, tool_names: list[str]):
|
def _parse_pythonic_tool_calls(answer: str, tool_names: list[str]):
|
||||||
"""Parse pythonic-style tool calls used by Llama 4 and similar models.
|
"""Parse pythonic-style tool calls used by Llama 4 and similar models.
|
||||||
|
|
||||||
|
|
@ -472,6 +552,11 @@ TOOL_CALL_FORMATS = [
|
||||||
'parser': _parse_channel_tool_calls,
|
'parser': _parse_channel_tool_calls,
|
||||||
'markers': ['to=functions.', '<|channel|>commentary'],
|
'markers': ['to=functions.', '<|channel|>commentary'],
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
'template_hints': ['<|tool_call>call:'],
|
||||||
|
'parser': _parse_gemma4_tool_calls,
|
||||||
|
'markers': ['<|tool_call>call:'],
|
||||||
|
},
|
||||||
{
|
{
|
||||||
'template_hints': ['minimax:tool_call'],
|
'template_hints': ['minimax:tool_call'],
|
||||||
'parser': _parse_minimax_tool_calls,
|
'parser': _parse_minimax_tool_calls,
|
||||||
|
|
@ -504,6 +589,7 @@ ALL_PARSERS = [
|
||||||
_parse_deep_seek_tool_calls,
|
_parse_deep_seek_tool_calls,
|
||||||
_parse_kimi_tool_calls,
|
_parse_kimi_tool_calls,
|
||||||
_parse_channel_tool_calls,
|
_parse_channel_tool_calls,
|
||||||
|
_parse_gemma4_tool_calls,
|
||||||
_parse_minimax_tool_calls,
|
_parse_minimax_tool_calls,
|
||||||
_parse_glm_tool_calls,
|
_parse_glm_tool_calls,
|
||||||
_parse_xml_param_tool_calls,
|
_parse_xml_param_tool_calls,
|
||||||
|
|
@ -552,9 +638,15 @@ def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = Fa
|
||||||
# Strip thinking blocks so tool-call syntax inside <think> is ignored.
|
# Strip thinking blocks so tool-call syntax inside <think> is ignored.
|
||||||
original_answer = answer
|
original_answer = answer
|
||||||
_, answer = extract_reasoning(answer)
|
_, answer = extract_reasoning(answer)
|
||||||
# Offset between original and stripped text, used to map start_pos
|
# Reasoning extraction returns empty content when GPT-OSS internal
|
||||||
# back to the original string when returning a prefix.
|
# markup (<|start|>assistant…) follows the thinking block without a
|
||||||
reasoning_offset = len(original_answer) - len(answer)
|
# content tag. Fall back to the full text so tool-call markers can
|
||||||
|
# be found.
|
||||||
|
if not answer.strip():
|
||||||
|
answer = original_answer
|
||||||
|
reasoning_offset = 0
|
||||||
|
else:
|
||||||
|
reasoning_offset = len(original_answer) - len(answer)
|
||||||
|
|
||||||
matches = []
|
matches = []
|
||||||
start_pos = None
|
start_pos = None
|
||||||
|
|
@ -620,6 +712,8 @@ def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = Fa
|
||||||
if not isinstance(candidates, list):
|
if not isinstance(candidates, list):
|
||||||
candidates = [candidates]
|
candidates = [candidates]
|
||||||
for candidate_dict in candidates:
|
for candidate_dict in candidates:
|
||||||
|
if not isinstance(candidate_dict, dict):
|
||||||
|
continue
|
||||||
checked_candidate = check_and_sanitize_tool_call_candidate(candidate_dict, tool_names)
|
checked_candidate = check_and_sanitize_tool_call_candidate(candidate_dict, tool_names)
|
||||||
if checked_candidate is not None:
|
if checked_candidate is not None:
|
||||||
matches.append(checked_candidate)
|
matches.append(checked_candidate)
|
||||||
|
|
|
||||||
|
|
@ -1,3 +1,4 @@
|
||||||
|
import asyncio
|
||||||
import importlib.util
|
import importlib.util
|
||||||
import json
|
import json
|
||||||
|
|
||||||
|
|
@ -55,6 +56,119 @@ def load_tools(selected_names):
|
||||||
return tool_defs, executors
|
return tool_defs, executors
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_mcp_servers(servers_str):
|
||||||
|
"""Parse MCP servers textbox: one server per line, format 'url' or 'url,Header: value,Header2: value2'."""
|
||||||
|
servers = []
|
||||||
|
for line in servers_str.strip().splitlines():
|
||||||
|
line = line.strip()
|
||||||
|
if not line:
|
||||||
|
continue
|
||||||
|
parts = line.split(',')
|
||||||
|
url = parts[0].strip()
|
||||||
|
headers = {}
|
||||||
|
for part in parts[1:]:
|
||||||
|
part = part.strip()
|
||||||
|
if ':' in part:
|
||||||
|
key, val = part.split(':', 1)
|
||||||
|
headers[key.strip()] = val.strip()
|
||||||
|
servers.append((url, headers))
|
||||||
|
return servers
|
||||||
|
|
||||||
|
|
||||||
|
def _mcp_tool_to_openai(tool):
|
||||||
|
"""Convert an MCP Tool object to OpenAI-format tool dict."""
|
||||||
|
return {
|
||||||
|
"type": "function",
|
||||||
|
"function": {
|
||||||
|
"name": tool.name,
|
||||||
|
"description": tool.description or "",
|
||||||
|
"parameters": tool.inputSchema or {"type": "object", "properties": {}}
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
async def _mcp_session(url, headers, callback):
|
||||||
|
"""Open an MCP session and pass it to the callback."""
|
||||||
|
from mcp.client.streamable_http import streamablehttp_client
|
||||||
|
from mcp import ClientSession
|
||||||
|
|
||||||
|
async with streamablehttp_client(url, headers=headers or None) as (read_stream, write_stream, _):
|
||||||
|
async with ClientSession(read_stream, write_stream) as session:
|
||||||
|
await session.initialize()
|
||||||
|
return await callback(session)
|
||||||
|
|
||||||
|
|
||||||
|
def _make_mcp_executor(name, url, headers):
|
||||||
|
def executor(arguments):
|
||||||
|
return asyncio.run(_call_mcp_tool(name, arguments, url, headers))
|
||||||
|
return executor
|
||||||
|
|
||||||
|
|
||||||
|
async def _connect_mcp_server(url, headers):
|
||||||
|
"""Connect to one MCP server and return (tool_defs, executors)."""
|
||||||
|
|
||||||
|
async def _discover(session):
|
||||||
|
result = await session.list_tools()
|
||||||
|
tool_defs = []
|
||||||
|
executors = {}
|
||||||
|
for tool in result.tools:
|
||||||
|
tool_defs.append(_mcp_tool_to_openai(tool))
|
||||||
|
executors[tool.name] = _make_mcp_executor(tool.name, url, headers)
|
||||||
|
return tool_defs, executors
|
||||||
|
|
||||||
|
return await _mcp_session(url, headers, _discover)
|
||||||
|
|
||||||
|
|
||||||
|
async def _call_mcp_tool(name, arguments, url, headers):
|
||||||
|
"""Connect to an MCP server and call a single tool."""
|
||||||
|
|
||||||
|
async def _invoke(session):
|
||||||
|
result = await session.call_tool(name, arguments)
|
||||||
|
parts = []
|
||||||
|
for content in result.content:
|
||||||
|
if hasattr(content, 'text'):
|
||||||
|
parts.append(content.text)
|
||||||
|
else:
|
||||||
|
parts.append(str(content))
|
||||||
|
return '\n'.join(parts) if parts else ''
|
||||||
|
|
||||||
|
return await _mcp_session(url, headers, _invoke)
|
||||||
|
|
||||||
|
|
||||||
|
async def _connect_all_mcp_servers(servers):
|
||||||
|
"""Connect to all MCP servers concurrently."""
|
||||||
|
results = await asyncio.gather(
|
||||||
|
*(_connect_mcp_server(url, headers) for url, headers in servers),
|
||||||
|
return_exceptions=True
|
||||||
|
)
|
||||||
|
all_defs = []
|
||||||
|
all_executors = {}
|
||||||
|
for (url, _), result in zip(servers, results):
|
||||||
|
if isinstance(result, Exception):
|
||||||
|
logger.exception(f'Failed to connect to MCP server "{url}"', exc_info=result)
|
||||||
|
continue
|
||||||
|
defs, execs = result
|
||||||
|
for td, (fn, ex) in zip(defs, execs.items()):
|
||||||
|
if fn in all_executors:
|
||||||
|
logger.warning(f'MCP tool "{fn}" from {url} conflicts with an already loaded tool. Skipping.')
|
||||||
|
continue
|
||||||
|
all_defs.append(td)
|
||||||
|
all_executors[fn] = ex
|
||||||
|
return all_defs, all_executors
|
||||||
|
|
||||||
|
|
||||||
|
def load_mcp_tools(servers_str):
|
||||||
|
"""
|
||||||
|
Parse MCP servers string and discover tools from each server.
|
||||||
|
Returns (tool_defs, executors) in the same format as load_tools.
|
||||||
|
"""
|
||||||
|
servers = _parse_mcp_servers(servers_str)
|
||||||
|
if not servers:
|
||||||
|
return [], {}
|
||||||
|
|
||||||
|
return asyncio.run(_connect_all_mcp_servers(servers))
|
||||||
|
|
||||||
|
|
||||||
def execute_tool(func_name, arguments, executors):
|
def execute_tool(func_name, arguments, executors):
|
||||||
"""Execute a tool by function name. Returns result as a JSON string."""
|
"""Execute a tool by function name. Returns result as a JSON string."""
|
||||||
fn = executors.get(func_name)
|
fn = executors.get(func_name)
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ def create_ui():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
|
always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
|
||||||
|
|
||||||
with gr.Accordion(label='Target Modules', open=False, elem_classes='tgw-accordion'):
|
with gr.Accordion(label='Target Modules', open=False):
|
||||||
gr.Markdown("Selects which modules to target in training. Targeting more modules is closer to a full fine-tune at the cost of increased VRAM and adapter size.")
|
gr.Markdown("Selects which modules to target in training. Targeting more modules is closer to a full fine-tune at the cost of increased VRAM and adapter size.")
|
||||||
all_linear = gr.Checkbox(label='Target all linear layers', value=True, info='Targets every nn.Linear layer except lm_head. Works for any model architecture. When checked, the individual module checkboxes below are ignored.', elem_classes=['no-background'])
|
all_linear = gr.Checkbox(label='Target all linear layers', value=True, info='Targets every nn.Linear layer except lm_head. Works for any model architecture. When checked, the individual module checkboxes below are ignored.', elem_classes=['no-background'])
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
|
|
@ -87,7 +87,7 @@ def create_ui():
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='cosine', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.', elem_classes=['slim-dropdown'])
|
lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='cosine', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.', elem_classes=['slim-dropdown'])
|
||||||
|
|
||||||
with gr.Accordion(label='Advanced Options', open=False, elem_classes='tgw-accordion'):
|
with gr.Accordion(label='Advanced Options', open=False):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Optimizer algorithm. adamw_torch is the standard choice. adamw_bnb_8bit uses less VRAM. adafactor is memory-efficient for large models.', elem_classes=['slim-dropdown'])
|
optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Optimizer algorithm. adamw_torch is the standard choice. adamw_bnb_8bit uses less VRAM. adafactor is memory-efficient for large models.', elem_classes=['slim-dropdown'])
|
||||||
|
|
|
||||||
|
|
@ -109,7 +109,6 @@ def load_model_HF(model_name):
|
||||||
params = {
|
params = {
|
||||||
'low_cpu_mem_usage': True,
|
'low_cpu_mem_usage': True,
|
||||||
'attn_implementation': shared.args.attn_implementation,
|
'attn_implementation': shared.args.attn_implementation,
|
||||||
'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
|
|
||||||
}
|
}
|
||||||
|
|
||||||
if shared.original_args.trust_remote_code:
|
if shared.original_args.trust_remote_code:
|
||||||
|
|
@ -120,6 +119,17 @@ def load_model_HF(model_name):
|
||||||
|
|
||||||
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.original_args.trust_remote_code)
|
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.original_args.trust_remote_code)
|
||||||
|
|
||||||
|
# Determine torch_dtype: respect --bf16 flag, otherwise autodetect
|
||||||
|
# from model config, but never allow float32.
|
||||||
|
if shared.args.bf16:
|
||||||
|
params['torch_dtype'] = torch.bfloat16
|
||||||
|
else:
|
||||||
|
dtype = getattr(config, 'torch_dtype', None) or getattr(getattr(config, 'text_config', None), 'torch_dtype', None)
|
||||||
|
if dtype in (torch.float16, torch.bfloat16):
|
||||||
|
params['torch_dtype'] = dtype
|
||||||
|
else:
|
||||||
|
params['torch_dtype'] = torch.float16
|
||||||
|
|
||||||
if 'chatglm' in model_name.lower():
|
if 'chatglm' in model_name.lower():
|
||||||
LoaderClass = AutoModel
|
LoaderClass = AutoModel
|
||||||
else:
|
else:
|
||||||
|
|
|
||||||
|
|
@ -75,7 +75,7 @@ if not shared.args.old_colors:
|
||||||
background_fill_primary_dark='var(--darker-gray, #1C1C1D)',
|
background_fill_primary_dark='var(--darker-gray, #1C1C1D)',
|
||||||
body_background_fill="white",
|
body_background_fill="white",
|
||||||
block_background_fill="transparent",
|
block_background_fill="transparent",
|
||||||
body_text_color='rgb(64, 64, 64)',
|
body_text_color='#1a1a1a',
|
||||||
button_secondary_background_fill="white",
|
button_secondary_background_fill="white",
|
||||||
button_secondary_border_color="var(--border-color-primary)",
|
button_secondary_border_color="var(--border-color-primary)",
|
||||||
block_title_text_color='*body_text_color',
|
block_title_text_color='*body_text_color',
|
||||||
|
|
@ -209,6 +209,7 @@ def list_interface_input_elements():
|
||||||
'textbox',
|
'textbox',
|
||||||
'start_with',
|
'start_with',
|
||||||
'selected_tools',
|
'selected_tools',
|
||||||
|
'mcp_servers',
|
||||||
'mode',
|
'mode',
|
||||||
'chat_style',
|
'chat_style',
|
||||||
'chat-instruct_command',
|
'chat-instruct_command',
|
||||||
|
|
@ -434,6 +435,7 @@ def setup_auto_save():
|
||||||
'custom_system_message',
|
'custom_system_message',
|
||||||
'chat_template_str',
|
'chat_template_str',
|
||||||
'selected_tools',
|
'selected_tools',
|
||||||
|
'mcp_servers',
|
||||||
|
|
||||||
# Parameters tab (ui_parameters.py) - Generation parameters
|
# Parameters tab (ui_parameters.py) - Generation parameters
|
||||||
'preset_menu',
|
'preset_menu',
|
||||||
|
|
|
||||||
|
|
@ -52,7 +52,7 @@ def create_ui():
|
||||||
shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': [], 'metadata': {}}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)
|
shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': [], 'metadata': {}}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)
|
||||||
with gr.Row(elem_id="chat-input-row"):
|
with gr.Row(elem_id="chat-input-row"):
|
||||||
with gr.Column(scale=1, elem_id='gr-hover-container'):
|
with gr.Column(scale=1, elem_id='gr-hover-container'):
|
||||||
gr.HTML(value='<div class="hover-element" onclick="void(0)"><span style="width: 100px; display: block" id="hover-element-button">☰</span><div class="hover-menu" id="hover-menu"></div>', elem_id='gr-hover')
|
gr.HTML(value='<div class="hover-element" onclick="void(0)"><span id="hover-element-button"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><line x1="4" y1="6" x2="20" y2="6"></line><line x1="4" y1="12" x2="20" y2="12"></line><line x1="4" y1="18" x2="20" y2="18"></line></svg></span><div class="hover-menu" id="hover-menu"></div></div>', elem_id='gr-hover')
|
||||||
|
|
||||||
with gr.Column(scale=10, elem_id='chat-input-container'):
|
with gr.Column(scale=10, elem_id='chat-input-container'):
|
||||||
shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf', 'image'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
|
shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf', 'image'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
|
||||||
|
|
@ -82,7 +82,7 @@ def create_ui():
|
||||||
gr.HTML("<div class='sidebar-vertical-separator'></div>")
|
gr.HTML("<div class='sidebar-vertical-separator'></div>")
|
||||||
|
|
||||||
shared.gradio['reasoning_effort'] = gr.Dropdown(value=shared.settings['reasoning_effort'], choices=['low', 'medium', 'high'], label='Reasoning effort', info='Used by GPT-OSS.')
|
shared.gradio['reasoning_effort'] = gr.Dropdown(value=shared.settings['reasoning_effort'], choices=['low', 'medium', 'high'], label='Reasoning effort', info='Used by GPT-OSS.')
|
||||||
shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='Used by Seed-OSS and pre-2507 Qwen3.')
|
shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='For models with thinking support.')
|
||||||
|
|
||||||
gr.HTML("<div class='sidebar-vertical-separator'></div>")
|
gr.HTML("<div class='sidebar-vertical-separator'></div>")
|
||||||
|
|
||||||
|
|
@ -105,6 +105,9 @@ def create_ui():
|
||||||
|
|
||||||
shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False)
|
shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False)
|
||||||
|
|
||||||
|
with gr.Accordion('MCP servers', open=False):
|
||||||
|
shared.gradio['mcp_servers'] = gr.Textbox(value=shared.settings.get('mcp_servers', ''), lines=3, max_lines=3, label='', info='One url per line. For headers, write url,Header: value,Header2: value2', elem_classes=['add_scrollbar'])
|
||||||
|
|
||||||
gr.HTML("<div class='sidebar-vertical-separator'></div>")
|
gr.HTML("<div class='sidebar-vertical-separator'></div>")
|
||||||
|
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
|
|
|
||||||
|
|
@ -10,7 +10,7 @@ from modules.text_generation import (
|
||||||
stop_everything_event
|
stop_everything_event
|
||||||
)
|
)
|
||||||
from modules.ui_notebook import store_notebook_state_and_debounce
|
from modules.ui_notebook import store_notebook_state_and_debounce
|
||||||
from modules.utils import gradio
|
from modules.utils import gradio, sanitize_filename
|
||||||
|
|
||||||
inputs = ('textbox-default', 'interface_state')
|
inputs = ('textbox-default', 'interface_state')
|
||||||
outputs = ('output_textbox', 'html-default')
|
outputs = ('output_textbox', 'html-default')
|
||||||
|
|
@ -167,6 +167,7 @@ def handle_new_prompt():
|
||||||
|
|
||||||
|
|
||||||
def handle_delete_prompt_confirm_default(prompt_name):
|
def handle_delete_prompt_confirm_default(prompt_name):
|
||||||
|
prompt_name = sanitize_filename(prompt_name)
|
||||||
available_prompts = utils.get_available_prompts()
|
available_prompts = utils.get_available_prompts()
|
||||||
current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
|
current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
|
||||||
|
|
||||||
|
|
@ -199,6 +200,8 @@ def handle_rename_prompt_click_default(current_name):
|
||||||
|
|
||||||
|
|
||||||
def handle_rename_prompt_confirm_default(new_name, current_name):
|
def handle_rename_prompt_confirm_default(new_name, current_name):
|
||||||
|
new_name = sanitize_filename(new_name)
|
||||||
|
current_name = sanitize_filename(current_name)
|
||||||
old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
|
old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
|
||||||
new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
|
new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -798,6 +798,9 @@ def generate(state, save_images=True):
|
||||||
if seed == -1:
|
if seed == -1:
|
||||||
seed = random.randint(0, 2**32 - 1)
|
seed = random.randint(0, 2**32 - 1)
|
||||||
|
|
||||||
|
# Store resolved seed back so callers (e.g. API) can access it
|
||||||
|
state['image_seed_resolved'] = seed
|
||||||
|
|
||||||
device = get_device()
|
device = get_device()
|
||||||
if device is None:
|
if device is None:
|
||||||
device = "cpu"
|
device = "cpu"
|
||||||
|
|
|
||||||
|
|
@ -51,7 +51,9 @@ def create_ui():
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
|
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
|
||||||
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
|
if not shared.args.portable:
|
||||||
|
shared.gradio['ik'] = gr.Checkbox(label="ik", value=shared.args.ik, info='Use ik_llama.cpp instead of upstream llama.cpp.')
|
||||||
|
|
||||||
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||||
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
|
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
|
||||||
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
|
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
|
||||||
|
|
@ -64,13 +66,13 @@ def create_ui():
|
||||||
)
|
)
|
||||||
|
|
||||||
# Multimodal
|
# Multimodal
|
||||||
with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
|
with gr.Accordion("Multimodal (vision)", open=False) as shared.gradio['mmproj_accordion']:
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info=f'Select a file that matches your model. Must be placed in {shared.user_data_dir}/mmproj/', interactive=not mu)
|
shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info=f'Select a file that matches your model. Must be placed in {shared.user_data_dir}/mmproj/', interactive=not mu)
|
||||||
ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
|
ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
|
||||||
|
|
||||||
# Speculative decoding
|
# Speculative decoding
|
||||||
with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
|
with gr.Accordion("Speculative decoding", open=False) as shared.gradio['speculative_decoding_accordion']:
|
||||||
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Maximum number of tokens to draft for speculative decoding. Recommended: 4 for draft model, 64 for n-gram.')
|
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Maximum number of tokens to draft for speculative decoding. Recommended: 4 for draft model, 64 for n-gram.')
|
||||||
|
|
||||||
gr.Markdown('#### Draft model')
|
gr.Markdown('#### Draft model')
|
||||||
|
|
@ -89,7 +91,7 @@ def create_ui():
|
||||||
shared.gradio['spec_ngram_min_hits'] = gr.Number(label="spec-ngram-min-hits", precision=0, step=1, value=shared.args.spec_ngram_min_hits, info='Minimum n-gram hits for ngram-map speculative decoding.', visible=shared.args.spec_type != 'none')
|
shared.gradio['spec_ngram_min_hits'] = gr.Number(label="spec-ngram-min-hits", precision=0, step=1, value=shared.args.spec_ngram_min_hits, info='Minimum n-gram hits for ngram-map speculative decoding.', visible=shared.args.spec_type != 'none')
|
||||||
|
|
||||||
gr.Markdown("## Other options")
|
gr.Markdown("## Other options")
|
||||||
with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
|
with gr.Accordion("See more options", open=False):
|
||||||
with gr.Row():
|
with gr.Row():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
shared.gradio['parallel'] = gr.Slider(label="parallel", minimum=1, step=1, maximum=64, value=shared.args.parallel, info='Number of parallel request slots for the API. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
|
shared.gradio['parallel'] = gr.Slider(label="parallel", minimum=1, step=1, maximum=64, value=shared.args.parallel, info='Number of parallel request slots for the API. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
|
||||||
|
|
@ -106,6 +108,7 @@ def create_ui():
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
|
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
|
||||||
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
|
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
|
||||||
|
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
|
||||||
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
|
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
|
||||||
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.')
|
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.')
|
||||||
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
|
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
|
||||||
|
|
|
||||||
|
|
@ -11,7 +11,7 @@ from modules.text_generation import (
|
||||||
get_token_ids,
|
get_token_ids,
|
||||||
stop_everything_event
|
stop_everything_event
|
||||||
)
|
)
|
||||||
from modules.utils import gradio
|
from modules.utils import gradio, sanitize_filename
|
||||||
|
|
||||||
_notebook_file_lock = threading.Lock()
|
_notebook_file_lock = threading.Lock()
|
||||||
_notebook_auto_save_timer = None
|
_notebook_auto_save_timer = None
|
||||||
|
|
@ -202,6 +202,7 @@ def handle_new_prompt():
|
||||||
|
|
||||||
|
|
||||||
def handle_delete_prompt_confirm_notebook(prompt_name):
|
def handle_delete_prompt_confirm_notebook(prompt_name):
|
||||||
|
prompt_name = sanitize_filename(prompt_name)
|
||||||
available_prompts = utils.get_available_prompts()
|
available_prompts = utils.get_available_prompts()
|
||||||
current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
|
current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
|
||||||
|
|
||||||
|
|
@ -233,6 +234,8 @@ def handle_rename_prompt_click_notebook(current_name):
|
||||||
|
|
||||||
|
|
||||||
def handle_rename_prompt_confirm_notebook(new_name, current_name):
|
def handle_rename_prompt_confirm_notebook(new_name, current_name):
|
||||||
|
new_name = sanitize_filename(new_name)
|
||||||
|
current_name = sanitize_filename(current_name)
|
||||||
old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
|
old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
|
||||||
new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
|
new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
|
||||||
|
|
||||||
|
|
@ -249,6 +252,7 @@ def handle_rename_prompt_confirm_notebook(new_name, current_name):
|
||||||
|
|
||||||
def autosave_prompt(text, prompt_name):
|
def autosave_prompt(text, prompt_name):
|
||||||
"""Automatically save the text to the selected prompt file"""
|
"""Automatically save the text to the selected prompt file"""
|
||||||
|
prompt_name = sanitize_filename(prompt_name)
|
||||||
if prompt_name and text.strip():
|
if prompt_name and text.strip():
|
||||||
prompt_path = shared.user_data_dir / "logs" / "notebook" / f"{prompt_name}.txt"
|
prompt_path = shared.user_data_dir / "logs" / "notebook" / f"{prompt_name}.txt"
|
||||||
prompt_path.parent.mkdir(parents=True, exist_ok=True)
|
prompt_path.parent.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
|
||||||
|
|
@ -105,6 +105,9 @@ def resolve_model_path(model_name_or_path, image_model=False):
|
||||||
before the default models directory.
|
before the default models directory.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
if model_name_or_path is None:
|
||||||
|
raise FileNotFoundError("No model specified.")
|
||||||
|
|
||||||
path_candidate = Path(model_name_or_path)
|
path_candidate = Path(model_name_or_path)
|
||||||
if path_candidate.exists():
|
if path_candidate.exists():
|
||||||
return path_candidate
|
return path_candidate
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
accelerate==1.12.*
|
accelerate==1.13.*
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
bitsandbytes==0.49.*
|
bitsandbytes==0.49.*
|
||||||
datasets
|
datasets
|
||||||
|
|
@ -9,6 +9,7 @@ flash-linear-attention==0.4.*
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.18.*
|
peft==0.18.*
|
||||||
|
|
@ -25,14 +26,14 @@ sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
torchao==0.15.*
|
torchao==0.15.*
|
||||||
trafilatura==2.0.0
|
trafilatura==2.0.0
|
||||||
transformers==5.3.*
|
transformers==5.5.*
|
||||||
triton-windows==3.5.1.post24; platform_system == "Windows"
|
triton-windows==3.5.1.post24; platform_system == "Windows"
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -40,9 +41,11 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.26/exllamav3-0.0.26+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.26/exllamav3-0.0.26+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
|
||||||
|
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
|
||||||
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
|
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
|
||||||
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
|
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
accelerate==1.12.*
|
accelerate==1.13.*
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
datasets
|
datasets
|
||||||
diffusers==0.37.*
|
diffusers==0.37.*
|
||||||
|
|
@ -7,6 +7,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.18.*
|
peft==0.18.*
|
||||||
|
|
@ -22,14 +23,14 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
torchao==0.15.*
|
torchao==0.15.*
|
||||||
transformers==5.3.*
|
transformers==5.5.*
|
||||||
tqdm
|
tqdm
|
||||||
trafilatura==2.0.0
|
trafilatura==2.0.0
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -37,5 +38,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
accelerate==1.12.*
|
accelerate==1.13.*
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
datasets
|
datasets
|
||||||
diffusers==0.37.*
|
diffusers==0.37.*
|
||||||
|
|
@ -7,6 +7,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.18.*
|
peft==0.18.*
|
||||||
|
|
@ -22,14 +23,14 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
torchao==0.15.*
|
torchao==0.15.*
|
||||||
transformers==5.3.*
|
transformers==5.5.*
|
||||||
tqdm
|
tqdm
|
||||||
trafilatura==2.0.0
|
trafilatura==2.0.0
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -37,4 +38,4 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
accelerate==1.12.*
|
accelerate==1.13.*
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
datasets
|
datasets
|
||||||
diffusers==0.37.*
|
diffusers==0.37.*
|
||||||
|
|
@ -7,6 +7,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.18.*
|
peft==0.18.*
|
||||||
|
|
@ -22,14 +23,14 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
torchao==0.15.*
|
torchao==0.15.*
|
||||||
transformers==5.3.*
|
transformers==5.5.*
|
||||||
tqdm
|
tqdm
|
||||||
trafilatura==2.0.0
|
trafilatura==2.0.0
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -37,4 +38,4 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
accelerate==1.12.*
|
accelerate==1.13.*
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
datasets
|
datasets
|
||||||
diffusers==0.37.*
|
diffusers==0.37.*
|
||||||
|
|
@ -7,6 +7,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.18.*
|
peft==0.18.*
|
||||||
|
|
@ -22,14 +23,14 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
torchao==0.15.*
|
torchao==0.15.*
|
||||||
transformers==5.3.*
|
transformers==5.5.*
|
||||||
tqdm
|
tqdm
|
||||||
trafilatura==2.0.0
|
trafilatura==2.0.0
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -37,5 +38,7 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama.cpp (CPU only)
|
# llama.cpp (CPU only)
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
|
||||||
|
|
@ -1,4 +1,4 @@
|
||||||
accelerate==1.12.*
|
accelerate==1.13.*
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
datasets
|
datasets
|
||||||
diffusers==0.37.*
|
diffusers==0.37.*
|
||||||
|
|
@ -7,6 +7,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.18.*
|
peft==0.18.*
|
||||||
|
|
@ -22,14 +23,14 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
torchao==0.15.*
|
torchao==0.15.*
|
||||||
transformers==5.3.*
|
transformers==5.5.*
|
||||||
tqdm
|
tqdm
|
||||||
trafilatura==2.0.0
|
trafilatura==2.0.0
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
pymupdf==1.27.*
|
pymupdf==1.27.*
|
||||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
||||||
tqdm
|
tqdm
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
pymupdf==1.27.*
|
pymupdf==1.27.*
|
||||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
||||||
tqdm
|
tqdm
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
pymupdf==1.27.*
|
pymupdf==1.27.*
|
||||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
||||||
tqdm
|
tqdm
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -23,4 +24,4 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
pymupdf==1.27.*
|
pymupdf==1.27.*
|
||||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
||||||
tqdm
|
tqdm
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -23,4 +24,4 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
pymupdf==1.27.*
|
pymupdf==1.27.*
|
||||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
||||||
tqdm
|
tqdm
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama.cpp (CPU only)
|
# llama.cpp (CPU only)
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
pymupdf==1.27.*
|
pymupdf==1.27.*
|
||||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
||||||
tqdm
|
tqdm
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
|
||||||
28
requirements/portable/requirements_ik.txt
Normal file
28
requirements/portable/requirements_ik.txt
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
|
fastapi==0.112.4
|
||||||
|
huggingface-hub==1.5.*
|
||||||
|
jinja2==3.1.6
|
||||||
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
|
numpy==2.2.*
|
||||||
|
pydantic==2.11.0
|
||||||
|
pymupdf==1.27.*
|
||||||
|
python-docx==1.1.2
|
||||||
|
pyyaml
|
||||||
|
requests
|
||||||
|
rich
|
||||||
|
trafilatura==2.0.0
|
||||||
|
tqdm
|
||||||
|
|
||||||
|
# Gradio
|
||||||
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
|
# API
|
||||||
|
flask_cloudflared==0.0.15
|
||||||
|
sse-starlette==1.6.5
|
||||||
|
tiktoken
|
||||||
|
|
||||||
|
# CUDA wheels
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
28
requirements/portable/requirements_ik_cpu_only.txt
Normal file
28
requirements/portable/requirements_ik_cpu_only.txt
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
|
fastapi==0.112.4
|
||||||
|
huggingface-hub==1.5.*
|
||||||
|
jinja2==3.1.6
|
||||||
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
|
numpy==2.2.*
|
||||||
|
pydantic==2.11.0
|
||||||
|
pymupdf==1.27.*
|
||||||
|
python-docx==1.1.2
|
||||||
|
pyyaml
|
||||||
|
requests
|
||||||
|
rich
|
||||||
|
trafilatura==2.0.0
|
||||||
|
tqdm
|
||||||
|
|
||||||
|
# Gradio
|
||||||
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
|
# API
|
||||||
|
flask_cloudflared==0.0.15
|
||||||
|
sse-starlette==1.6.5
|
||||||
|
tiktoken
|
||||||
|
|
||||||
|
# ik_llama.cpp (CPU only)
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
28
requirements/portable/requirements_ik_cuda131.txt
Normal file
28
requirements/portable/requirements_ik_cuda131.txt
Normal file
|
|
@ -0,0 +1,28 @@
|
||||||
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
|
fastapi==0.112.4
|
||||||
|
huggingface-hub==1.5.*
|
||||||
|
jinja2==3.1.6
|
||||||
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
|
numpy==2.2.*
|
||||||
|
pydantic==2.11.0
|
||||||
|
pymupdf==1.27.*
|
||||||
|
python-docx==1.1.2
|
||||||
|
pyyaml
|
||||||
|
requests
|
||||||
|
rich
|
||||||
|
trafilatura==2.0.0
|
||||||
|
tqdm
|
||||||
|
|
||||||
|
# Gradio
|
||||||
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
|
# API
|
||||||
|
flask_cloudflared==0.0.15
|
||||||
|
sse-starlette==1.6.5
|
||||||
|
tiktoken
|
||||||
|
|
||||||
|
# CUDA wheels
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
pymupdf==1.27.*
|
pymupdf==1.27.*
|
||||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
||||||
tqdm
|
tqdm
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
|
||||||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
||||||
huggingface-hub==1.5.*
|
huggingface-hub==1.5.*
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
|
mcp==1.27.0
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
pymupdf==1.27.*
|
pymupdf==1.27.*
|
||||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
||||||
tqdm
|
tqdm
|
||||||
|
|
||||||
# Gradio
|
# Gradio
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||||
|
|
||||||
# API
|
# API
|
||||||
flask_cloudflared==0.0.15
|
flask_cloudflared==0.0.15
|
||||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# Vulkan wheels
|
# Vulkan wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
|
||||||
|
|
@ -18,7 +18,6 @@ import modules.extensions as extensions_module
|
||||||
from modules.LoRA import add_lora_to_model
|
from modules.LoRA import add_lora_to_model
|
||||||
from modules.models import load_model, unload_model_if_idle
|
from modules.models import load_model, unload_model_if_idle
|
||||||
from modules.models_settings import (
|
from modules.models_settings import (
|
||||||
get_fallback_settings,
|
|
||||||
get_model_metadata,
|
get_model_metadata,
|
||||||
update_model_parameters
|
update_model_parameters
|
||||||
)
|
)
|
||||||
|
|
@ -271,10 +270,6 @@ if __name__ == "__main__":
|
||||||
# Apply CLI overrides for image model settings (CLI flags take precedence over saved settings)
|
# Apply CLI overrides for image model settings (CLI flags take precedence over saved settings)
|
||||||
shared.apply_image_model_cli_overrides()
|
shared.apply_image_model_cli_overrides()
|
||||||
|
|
||||||
# Fallback settings for models
|
|
||||||
shared.model_config['.*'] = get_fallback_settings()
|
|
||||||
shared.model_config.move_to_end('.*', last=False) # Move to the beginning
|
|
||||||
|
|
||||||
# Activate the extensions listed on settings.yaml
|
# Activate the extensions listed on settings.yaml
|
||||||
extensions_module.available_extensions = utils.get_available_extensions()
|
extensions_module.available_extensions = utils.get_available_extensions()
|
||||||
for extension in shared.settings['default_extensions']:
|
for extension in shared.settings['default_extensions']:
|
||||||
|
|
|
||||||
|
|
@ -1,203 +0,0 @@
|
||||||
.*(llama|alpac|vicuna|guanaco|koala|llava|wizardlm|metharme|pygmalion-7b|pygmalion-2|mythalion|wizard-mega|openbuddy|vigogne|h2ogpt-research|manticore):
|
|
||||||
model_type: 'llama'
|
|
||||||
.*(opt-|opt_|opt1|opt3|optfor|galactica|galpaca|pygmalion-350m):
|
|
||||||
model_type: 'opt'
|
|
||||||
.*(gpt-j|gptj|gpt4all-j|malion-6b|pygway|pygmalion-6b|dolly-v1):
|
|
||||||
model_type: 'gptj'
|
|
||||||
.*(gpt-neox|koalpaca-polyglot|polyglot.*koalpaca|polyglot-ko|polyglot_ko|pythia|stablelm|incite|dolly-v2|polycoder|h2ogpt-oig|h2ogpt-oasst1|h2ogpt-gm):
|
|
||||||
model_type: 'gptneox'
|
|
||||||
.*bloom:
|
|
||||||
model_type: 'bloom'
|
|
||||||
.*gpt2:
|
|
||||||
model_type: 'gpt2'
|
|
||||||
.*falcon:
|
|
||||||
model_type: 'falcon'
|
|
||||||
.*mpt:
|
|
||||||
model_type: 'mpt'
|
|
||||||
.*(starcoder|starchat):
|
|
||||||
model_type: 'starcoder'
|
|
||||||
.*dolly-v2:
|
|
||||||
model_type: 'dollyv2'
|
|
||||||
.*replit:
|
|
||||||
model_type: 'replit'
|
|
||||||
.*(oasst|openassistant-|stablelm-7b-sft-v7-epoch-3):
|
|
||||||
instruction_template: 'Open Assistant'
|
|
||||||
skip_special_tokens: false
|
|
||||||
(?!.*galactica)(?!.*reward).*openassistant:
|
|
||||||
instruction_template: 'Open Assistant'
|
|
||||||
skip_special_tokens: false
|
|
||||||
.*galactica:
|
|
||||||
skip_special_tokens: false
|
|
||||||
.*dolly-v[0-9]-[0-9]*b:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
skip_special_tokens: false
|
|
||||||
.*alpaca-native-4bit:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*llava:
|
|
||||||
instruction_template: 'LLaVA'
|
|
||||||
.*llava.*1.5:
|
|
||||||
instruction_template: 'Vicuna-v1.1'
|
|
||||||
.*wizard.*mega:
|
|
||||||
instruction_template: 'Wizard-Mega'
|
|
||||||
.*starchat-beta:
|
|
||||||
instruction_template: 'Starchat-Beta'
|
|
||||||
(?!.*v0)(?!.*1.1)(?!.*1_1)(?!.*stable)(?!.*chinese).*vicuna:
|
|
||||||
instruction_template: 'Vicuna-v0'
|
|
||||||
.*vicuna.*v0:
|
|
||||||
instruction_template: 'Vicuna-v0'
|
|
||||||
.*vicuna.*(1.1|1_1|1.3|1_3):
|
|
||||||
instruction_template: 'Vicuna-v1.1'
|
|
||||||
.*vicuna.*(1.5|1_5):
|
|
||||||
instruction_template: 'Vicuna-v1.1'
|
|
||||||
.*stable.*vicuna:
|
|
||||||
instruction_template: 'StableVicuna'
|
|
||||||
(?!.*chat).*chinese-vicuna:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*chinese-vicuna.*chat:
|
|
||||||
instruction_template: 'Chinese-Vicuna-Chat'
|
|
||||||
.*alpaca:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*koala:
|
|
||||||
instruction_template: 'Koala'
|
|
||||||
.*chatglm:
|
|
||||||
instruction_template: 'ChatGLM'
|
|
||||||
.*(metharme|pygmalion|mythalion):
|
|
||||||
instruction_template: 'Metharme'
|
|
||||||
.*raven:
|
|
||||||
instruction_template: 'RWKV-Raven'
|
|
||||||
.*moss-moon.*sft:
|
|
||||||
instruction_template: 'MOSS'
|
|
||||||
.*stablelm-tuned:
|
|
||||||
instruction_template: 'StableLM'
|
|
||||||
.*galactica.*finetuned:
|
|
||||||
instruction_template: 'Galactica Finetuned'
|
|
||||||
.*galactica.*-v2:
|
|
||||||
instruction_template: 'Galactica v2'
|
|
||||||
(?!.*finetuned)(?!.*-v2).*galactica:
|
|
||||||
instruction_template: 'Galactica'
|
|
||||||
.*guanaco:
|
|
||||||
instruction_template: 'Guanaco non-chat'
|
|
||||||
.*baize:
|
|
||||||
instruction_template: 'Baize'
|
|
||||||
.*mpt-.*instruct:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*mpt-.*chat:
|
|
||||||
instruction_template: 'ChatML'
|
|
||||||
(?!.*-flan-)(?!.*-t5-).*lamini-:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*incite.*chat:
|
|
||||||
instruction_template: 'INCITE-Chat'
|
|
||||||
.*incite.*instruct:
|
|
||||||
instruction_template: 'INCITE-Instruct'
|
|
||||||
.*ziya-:
|
|
||||||
instruction_template: 'Ziya'
|
|
||||||
.*koalpaca:
|
|
||||||
instruction_template: 'KoAlpaca'
|
|
||||||
.*openbuddy:
|
|
||||||
instruction_template: 'OpenBuddy'
|
|
||||||
(?!.*chat).*vigogne:
|
|
||||||
instruction_template: 'Vigogne-Instruct'
|
|
||||||
.*vigogne.*chat:
|
|
||||||
instruction_template: 'Vigogne-Chat'
|
|
||||||
.*(llama-deus|supercot|llama-natural-instructions|open-llama-0.3t-7b-instruct-dolly-hhrlhf|open-llama-0.3t-7b-open-instruct):
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*bactrian:
|
|
||||||
instruction_template: 'Bactrian'
|
|
||||||
.*(h2ogpt-oig-|h2ogpt-oasst1-|h2ogpt-research-oasst1-):
|
|
||||||
instruction_template: 'INCITE-Chat'
|
|
||||||
.*h2ogpt-gm-:
|
|
||||||
instruction_template: 'H2O-prompt_answer'
|
|
||||||
.*manticore:
|
|
||||||
instruction_template: 'Manticore Chat'
|
|
||||||
.*bluemoonrp-(30|13)b:
|
|
||||||
instruction_template: 'Bluemoon'
|
|
||||||
.*Nous-Hermes-13b:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*airoboros:
|
|
||||||
instruction_template: 'Vicuna-v1.1'
|
|
||||||
.*airoboros.*1.2:
|
|
||||||
instruction_template: 'Airoboros-v1.2'
|
|
||||||
.*alpa(cino|sta):
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*hippogriff:
|
|
||||||
instruction_template: 'Hippogriff'
|
|
||||||
.*lazarus:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*guanaco-.*(7|13|33|65)b:
|
|
||||||
instruction_template: 'Vicuna-v0'
|
|
||||||
.*hypermantis:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*open-llama-.*-open-instruct:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*starcoder-gpteacher-code-instruct:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*tulu:
|
|
||||||
instruction_template: 'Tulu'
|
|
||||||
.*chronos:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*samantha:
|
|
||||||
instruction_template: 'Samantha'
|
|
||||||
.*wizardcoder:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*minotaur:
|
|
||||||
instruction_template: 'Manticore Chat'
|
|
||||||
.*orca_mini:
|
|
||||||
instruction_template: 'Orca Mini'
|
|
||||||
.*(platypus|gplatty|superplatty):
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*(openorca-platypus2):
|
|
||||||
instruction_template: 'OpenOrca-Platypus2'
|
|
||||||
.*longchat:
|
|
||||||
instruction_template: 'Vicuna-v1.1'
|
|
||||||
.*vicuna-33b:
|
|
||||||
instruction_template: 'Vicuna-v1.1'
|
|
||||||
.*redmond-hermes-coder:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*wizardcoder-15b:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*wizardlm:
|
|
||||||
instruction_template: 'Vicuna-v1.1'
|
|
||||||
.*godzilla:
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*llama(-?)(2|v2).*chat:
|
|
||||||
instruction_template: 'Llama-v2'
|
|
||||||
.*newhope:
|
|
||||||
instruction_template: 'NewHope'
|
|
||||||
.*stablebeluga2:
|
|
||||||
instruction_template: 'StableBeluga2'
|
|
||||||
.*openchat:
|
|
||||||
instruction_template: 'OpenChat'
|
|
||||||
.*codellama.*instruct:
|
|
||||||
instruction_template: 'Llama-v2'
|
|
||||||
.*(mistral|mixtral).*instruct:
|
|
||||||
instruction_template: 'Mistral'
|
|
||||||
.*mistral.*openorca:
|
|
||||||
instruction_template: 'ChatML'
|
|
||||||
.*(WizardCoder-Python-34B-V1.0|Phind-CodeLlama-34B-v2|CodeBooga-34B-v0.1):
|
|
||||||
instruction_template: 'Alpaca'
|
|
||||||
.*orca-2-(13|7)b:
|
|
||||||
instruction_template: 'ChatML'
|
|
||||||
.*openhermes.*mistral:
|
|
||||||
instruction_template: 'ChatML'
|
|
||||||
.*Yi-34B-Chat:
|
|
||||||
instruction_template: 'ChatML'
|
|
||||||
(dolphin).*:
|
|
||||||
instruction_template: 'ChatML'
|
|
||||||
.*synthia:
|
|
||||||
instruction_template: 'Synthia'
|
|
||||||
.*(hercules|hyperion):
|
|
||||||
instruction_template: 'ChatML'
|
|
||||||
.*command-r:
|
|
||||||
instruction_template: 'Command-R'
|
|
||||||
.*xwin-lm-70b-v0.1:
|
|
||||||
instruction_template: 'Vicuna-v1.1'
|
|
||||||
.*platypus-yi-34b:
|
|
||||||
instruction_template: 'Vicuna-v1.1'
|
|
||||||
.*CausalLM-RP-34B:
|
|
||||||
instruction_template: 'ChatML'
|
|
||||||
34b-beta:
|
|
||||||
instruction_template: 'ChatML'
|
|
||||||
.*airoboros-3_1-yi-34b-200k:
|
|
||||||
instruction_template: 'Llama-v2'
|
|
||||||
.*chatqa:
|
|
||||||
instruction_template: 'NVIDIA-ChatQA'
|
|
||||||
Loading…
Add table
Add a link
Reference in a new issue