mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-20 22:13:43 +00:00
Compare commits
92 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
841aded786 | ||
|
|
57be34d7a6 | ||
|
|
7bae526ce4 | ||
|
|
a56e281204 | ||
|
|
dd77820a3a | ||
|
|
e4168ee8b8 | ||
|
|
5992fa9044 | ||
|
|
eeb328803f | ||
|
|
f66a01bd06 | ||
|
|
90559376c3 | ||
|
|
734d3c38a0 | ||
|
|
b06b36a6a6 | ||
|
|
4e978dd037 | ||
|
|
4bce4a38fc | ||
|
|
2aee3a1898 | ||
|
|
160a5e1a61 | ||
|
|
9e323425d8 | ||
|
|
a8a0f8dc82 | ||
|
|
ed61081a25 | ||
|
|
c7ca2f2326 | ||
|
|
75bf2feb59 | ||
|
|
fbd95bd5e6 | ||
|
|
533b43640b | ||
|
|
3dcb30b970 | ||
|
|
5f16b86db0 | ||
|
|
65cef2c731 | ||
|
|
61bfc2ffd5 | ||
|
|
a9afc48ba7 | ||
|
|
1ae749ac76 | ||
|
|
fae17dc5b8 | ||
|
|
7e4c8fa209 | ||
|
|
add91613db | ||
|
|
e9d7feb151 | ||
|
|
456afad585 | ||
|
|
326867e799 | ||
|
|
9bc3f9f3dd | ||
|
|
24cc0e2236 | ||
|
|
0c3720ac49 | ||
|
|
5ad199e9bb | ||
|
|
9dcf574160 | ||
|
|
e18f32cba7 | ||
|
|
778e1c4d52 | ||
|
|
775c913de2 | ||
|
|
cb511928e2 | ||
|
|
193424cc93 | ||
|
|
c26ffdd24c | ||
|
|
4d6230a944 | ||
|
|
7b2f15e34a | ||
|
|
05e4842033 | ||
|
|
b1d06dcf96 | ||
|
|
abc3487f4d | ||
|
|
223dd4b801 | ||
|
|
f8db23b362 | ||
|
|
d78fc46114 | ||
|
|
422f42ca7f | ||
|
|
544fcb0b7f | ||
|
|
c63a79ee48 | ||
|
|
9805ddcde9 | ||
|
|
91f9b01516 | ||
|
|
1f49a64e1a | ||
|
|
e8b31c063a | ||
|
|
ee917cd5ed | ||
|
|
dfd8ec9c49 | ||
|
|
0c033caf0e | ||
|
|
1b403a4ffa | ||
|
|
8cb7fe9c47 | ||
|
|
41bce3f4de | ||
|
|
ffea8f282e | ||
|
|
7fed60f90a | ||
|
|
2eef90a323 | ||
|
|
9183dc444e | ||
|
|
e0ad4e60df | ||
|
|
16af11f868 | ||
|
|
54b2f39c78 | ||
|
|
b5afecc63b | ||
|
|
2fbaee58cd | ||
|
|
62e67adb55 | ||
|
|
fc35acab9b | ||
|
|
8ecdb41078 | ||
|
|
5fb8c4fbd6 | ||
|
|
0050a33f37 | ||
|
|
6b66da84d2 | ||
|
|
8e8e1ba898 | ||
|
|
131a9a0140 | ||
|
|
95d6c53e13 | ||
|
|
8bba9ecc3f | ||
|
|
66d1a22c73 | ||
|
|
000d776967 | ||
|
|
a1cb5b5dc0 | ||
|
|
b11379f328 | ||
|
|
42dfcdfc5b | ||
|
|
6e2b70bde6 |
86 changed files with 1044 additions and 568 deletions
2
.github/pull_request_template.md
vendored
2
.github/pull_request_template.md
vendored
|
|
@ -1,3 +1,3 @@
|
|||
## Checklist:
|
||||
|
||||
- [ ] I have read the [Contributing guidelines](https://github.com/oobabooga/text-generation-webui/wiki/Contributing-guidelines).
|
||||
- [ ] I have read the [Contributing guidelines](https://github.com/oobabooga/textgen/wiki/Contributing-guidelines).
|
||||
|
|
|
|||
9
.github/workflows/build-everything-tgw.yml
vendored
9
.github/workflows/build-everything-tgw.yml
vendored
|
|
@ -4,7 +4,7 @@ on:
|
|||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -41,6 +41,13 @@ jobs:
|
|||
version: ${{ inputs.version }}
|
||||
config: 'os:ubuntu-22.04'
|
||||
|
||||
build_release_rocm_windows:
|
||||
name: ROCm Windows
|
||||
uses: ./.github/workflows/build-portable-release-rocm.yml
|
||||
with:
|
||||
version: ${{ inputs.version }}
|
||||
config: 'os:windows-2022'
|
||||
|
||||
build_release_rocm_linux:
|
||||
name: ROCm Linux
|
||||
uses: ./.github/workflows/build-portable-release-rocm.yml
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ on:
|
|||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -21,7 +21,7 @@ on:
|
|||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -88,7 +88,7 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: 'oobabooga/text-generation-webui'
|
||||
repository: 'oobabooga/textgen'
|
||||
ref: ${{ inputs.version }}
|
||||
submodules: 'recursive'
|
||||
|
||||
|
|
@ -102,8 +102,8 @@ jobs:
|
|||
VERSION_CLEAN="${{ inputs.version }}"
|
||||
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||
cd ..
|
||||
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cp -r textgen "textgen-${VERSION_CLEAN}"
|
||||
cd "textgen-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
|
|
@ -133,10 +133,10 @@ jobs:
|
|||
echo "Downloading Python for $PLATFORM..."
|
||||
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||
tar -xzf python-build.tar.gz
|
||||
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
|
||||
mv python "textgen-${VERSION_CLEAN}/portable_env"
|
||||
|
||||
# 3. Prepare requirements file based on CUDA version
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "textgen-${VERSION_CLEAN}"
|
||||
if [[ "$CUDA_VERSION" == "13.1" ]]; then
|
||||
REQ_FILE="requirements/portable/requirements_cuda131.txt"
|
||||
else
|
||||
|
|
@ -155,11 +155,11 @@ jobs:
|
|||
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path textgen-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
else
|
||||
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.tar.gz"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
|
||||
tar czf "$ARCHIVE_NAME" "textgen-${VERSION_CLEAN}"
|
||||
fi
|
||||
|
||||
- name: Upload files to a GitHub release
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ on:
|
|||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -21,7 +21,7 @@ on:
|
|||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -88,7 +88,7 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: 'oobabooga/text-generation-webui'
|
||||
repository: 'oobabooga/textgen'
|
||||
ref: ${{ inputs.version }}
|
||||
submodules: 'recursive'
|
||||
|
||||
|
|
@ -102,8 +102,8 @@ jobs:
|
|||
VERSION_CLEAN="${{ inputs.version }}"
|
||||
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||
cd ..
|
||||
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cp -r textgen "textgen-ik-${VERSION_CLEAN}"
|
||||
cd "textgen-ik-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
|
|
@ -133,10 +133,10 @@ jobs:
|
|||
echo "Downloading Python for $PLATFORM..."
|
||||
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||
tar -xzf python-build.tar.gz
|
||||
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
|
||||
mv python "textgen-ik-${VERSION_CLEAN}/portable_env"
|
||||
|
||||
# 3. Prepare requirements file based on CUDA version
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "textgen-ik-${VERSION_CLEAN}"
|
||||
if [[ "$CUDA_VERSION" == "13.1" ]]; then
|
||||
REQ_FILE="requirements/portable/requirements_ik_cuda131.txt"
|
||||
else
|
||||
|
|
@ -158,11 +158,11 @@ jobs:
|
|||
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path textgen-ik-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
else
|
||||
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.tar.gz"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
|
||||
tar czf "$ARCHIVE_NAME" "textgen-ik-${VERSION_CLEAN}"
|
||||
fi
|
||||
|
||||
- name: Upload files to a GitHub release
|
||||
|
|
|
|||
18
.github/workflows/build-portable-release-ik.yml
vendored
18
.github/workflows/build-portable-release-ik.yml
vendored
|
|
@ -4,7 +4,7 @@ on:
|
|||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -21,7 +21,7 @@ on:
|
|||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -87,7 +87,7 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: 'oobabooga/text-generation-webui'
|
||||
repository: 'oobabooga/textgen'
|
||||
ref: ${{ inputs.version }}
|
||||
submodules: 'recursive'
|
||||
|
||||
|
|
@ -101,8 +101,8 @@ jobs:
|
|||
VERSION_CLEAN="${{ inputs.version }}"
|
||||
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||
cd ..
|
||||
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cp -r textgen "textgen-ik-${VERSION_CLEAN}"
|
||||
cd "textgen-ik-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
|
|
@ -131,10 +131,10 @@ jobs:
|
|||
cd ..
|
||||
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||
tar -xzf python-build.tar.gz
|
||||
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
|
||||
mv python "textgen-ik-${VERSION_CLEAN}/portable_env"
|
||||
|
||||
# 3. Prepare requirements file
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "textgen-ik-${VERSION_CLEAN}"
|
||||
REQ_FILE="requirements/portable/requirements_ik_cpu_only.txt"
|
||||
echo "Using requirements file: $REQ_FILE"
|
||||
|
||||
|
|
@ -153,11 +153,11 @@ jobs:
|
|||
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.zip"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path textgen-ik-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
else
|
||||
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.tar.gz"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
|
||||
tar czf "$ARCHIVE_NAME" "textgen-ik-${VERSION_CLEAN}"
|
||||
fi
|
||||
|
||||
- name: Upload files to a GitHub release
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ on:
|
|||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -21,7 +21,7 @@ on:
|
|||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -87,7 +87,7 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: 'oobabooga/text-generation-webui'
|
||||
repository: 'oobabooga/textgen'
|
||||
ref: ${{ inputs.version }}
|
||||
submodules: 'recursive'
|
||||
|
||||
|
|
@ -101,8 +101,8 @@ jobs:
|
|||
VERSION_CLEAN="${{ inputs.version }}"
|
||||
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||
cd ..
|
||||
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cp -r textgen "textgen-${VERSION_CLEAN}"
|
||||
cd "textgen-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
|
|
@ -131,12 +131,12 @@ jobs:
|
|||
echo "Downloading Python for $PLATFORM..."
|
||||
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||
tar -xzf python-build.tar.gz
|
||||
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
|
||||
mv python "textgen-${VERSION_CLEAN}/portable_env"
|
||||
|
||||
# 3. Prepare requirements file
|
||||
REQ_FILE="requirements/portable/requirements_amd.txt"
|
||||
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "textgen-${VERSION_CLEAN}"
|
||||
|
||||
# 4. Install packages
|
||||
echo "Installing Python packages from $REQ_FILE..."
|
||||
|
|
@ -150,11 +150,11 @@ jobs:
|
|||
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm7.2.zip"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path textgen-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
else
|
||||
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm7.2.tar.gz"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
|
||||
tar czf "$ARCHIVE_NAME" "textgen-${VERSION_CLEAN}"
|
||||
fi
|
||||
|
||||
- name: Upload files to a GitHub release
|
||||
|
|
|
|||
|
|
@ -4,7 +4,7 @@ on:
|
|||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -21,7 +21,7 @@ on:
|
|||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -87,7 +87,7 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: 'oobabooga/text-generation-webui'
|
||||
repository: 'oobabooga/textgen'
|
||||
ref: ${{ inputs.version }}
|
||||
submodules: 'recursive'
|
||||
|
||||
|
|
@ -101,8 +101,8 @@ jobs:
|
|||
VERSION_CLEAN="${{ inputs.version }}"
|
||||
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||
cd ..
|
||||
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cp -r textgen "textgen-${VERSION_CLEAN}"
|
||||
cd "textgen-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
|
|
@ -131,12 +131,12 @@ jobs:
|
|||
echo "Downloading Python for $PLATFORM..."
|
||||
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||
tar -xzf python-build.tar.gz
|
||||
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
|
||||
mv python "textgen-${VERSION_CLEAN}/portable_env"
|
||||
|
||||
# 3. Prepare requirements file
|
||||
REQ_FILE="requirements/portable/requirements_vulkan.txt"
|
||||
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "textgen-${VERSION_CLEAN}"
|
||||
|
||||
# 4. Install packages
|
||||
echo "Installing Python packages from $REQ_FILE..."
|
||||
|
|
@ -150,11 +150,11 @@ jobs:
|
|||
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.zip"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path textgen-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
else
|
||||
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.tar.gz"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
|
||||
tar czf "$ARCHIVE_NAME" "textgen-${VERSION_CLEAN}"
|
||||
fi
|
||||
|
||||
- name: Upload files to a GitHub release
|
||||
|
|
|
|||
18
.github/workflows/build-portable-release.yml
vendored
18
.github/workflows/build-portable-release.yml
vendored
|
|
@ -4,7 +4,7 @@ on:
|
|||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -21,7 +21,7 @@ on:
|
|||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
description: 'Version tag of textgen to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
|
|
@ -87,7 +87,7 @@ jobs:
|
|||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: 'oobabooga/text-generation-webui'
|
||||
repository: 'oobabooga/textgen'
|
||||
ref: ${{ inputs.version }}
|
||||
submodules: 'recursive'
|
||||
|
||||
|
|
@ -101,8 +101,8 @@ jobs:
|
|||
VERSION_CLEAN="${{ inputs.version }}"
|
||||
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||
cd ..
|
||||
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cp -r textgen "textgen-${VERSION_CLEAN}"
|
||||
cd "textgen-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
|
|
@ -146,10 +146,10 @@ jobs:
|
|||
cd ..
|
||||
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||
tar -xzf python-build.tar.gz
|
||||
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
|
||||
mv python "textgen-${VERSION_CLEAN}/portable_env"
|
||||
|
||||
# 3. Prepare requirements file based on platform
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "textgen-${VERSION_CLEAN}"
|
||||
|
||||
# Select requirements file based on platform
|
||||
if [[ "$RUNNER_OS" == "macOS" ]]; then
|
||||
|
|
@ -176,11 +176,11 @@ jobs:
|
|||
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}.zip"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path textgen-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
else
|
||||
ARCHIVE_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}.tar.gz"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
|
||||
tar czf "$ARCHIVE_NAME" "textgen-${VERSION_CLEAN}"
|
||||
fi
|
||||
|
||||
- name: Upload files to a GitHub release
|
||||
|
|
|
|||
|
|
@ -20,11 +20,11 @@
|
|||
{
|
||||
"cell_type": "markdown",
|
||||
"source": [
|
||||
"# oobabooga/text-generation-webui\n",
|
||||
"# oobabooga/textgen\n",
|
||||
"\n",
|
||||
"After running both cells, a public gradio URL will appear at the bottom in around 10 minutes. You can optionally generate an API link.\n",
|
||||
"\n",
|
||||
"* Project page: https://github.com/oobabooga/text-generation-webui\n",
|
||||
"* Project page: https://github.com/oobabooga/textgen\n",
|
||||
"* Gradio server status: https://status.gradio.app/"
|
||||
],
|
||||
"metadata": {
|
||||
|
|
@ -59,11 +59,11 @@
|
|||
"os.environ.pop('PYTHONPATH', None)\n",
|
||||
"os.environ.pop('MPLBACKEND', None)\n",
|
||||
"\n",
|
||||
"if Path.cwd().name != 'text-generation-webui':\n",
|
||||
"if Path.cwd().name != 'textgen':\n",
|
||||
" print(\"\\033[1;32;1m\\n --> Installing the web UI. This will take a while, but after the initial setup, you can download and test as many models as you like.\\033[0;37;0m\\n\")\n",
|
||||
"\n",
|
||||
" !git clone https://github.com/oobabooga/text-generation-webui\n",
|
||||
" %cd text-generation-webui\n",
|
||||
" !git clone https://github.com/oobabooga/textgen\n",
|
||||
" %cd textgen\n",
|
||||
"\n",
|
||||
" # Install the project in an isolated environment\n",
|
||||
" !GPU_CHOICE=A \\\n",
|
||||
|
|
|
|||
49
README.md
49
README.md
|
|
@ -11,11 +11,11 @@
|
|||
</div>
|
||||
<hr>
|
||||
|
||||
# Text Generation Web UI
|
||||
# TextGen
|
||||
|
||||
A Gradio web UI for running Large Language Models locally. 100% private and offline. Supports text generation, vision, tool-calling, training, image generation, and more.
|
||||
**The original local LLM interface.** Text, vision, tool-calling, training, image generation. UI + API, 100% offline and private.
|
||||
|
||||
[Try the Deep Reason extension](https://oobabooga.gumroad.com/l/deep_reason)
|
||||
For recommended GGUF quants, check out my new project: [LocalBench](https://localbench.substack.com).
|
||||
|
||||
| |  |
|
||||
|:---:|:---:|
|
||||
|
|
@ -23,21 +23,20 @@ A Gradio web UI for running Large Language Models locally. 100% private and offl
|
|||
|
||||
## Features
|
||||
|
||||
- **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set.
|
||||
- **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting.
|
||||
- **OpenAI/Anthropic-compatible API**: Chat, Completions, and Messages endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI/Anthropic APIs ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)).
|
||||
- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file, easy to create and extend ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).
|
||||
- **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
|
||||
- **Easy setup**: [Portable builds](https://github.com/oobabooga/textgen/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set.
|
||||
- **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [ik_llama.cpp](https://github.com/ikawrakow/ik_llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting.
|
||||
- **OpenAI/Anthropic-compatible API**: Chat, Completions, and Messages endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI/Anthropic APIs ([examples](https://github.com/oobabooga/textgen/wiki/12-%E2%80%90-OpenAI-API#examples)).
|
||||
- **Tool-calling**: Models can call custom functions during chat, including web search, page fetching, and math. Each tool is a single `.py` file. MCP servers are also supported ([tutorial](https://github.com/oobabooga/textgen/wiki/Tool-Calling-Tutorial)).
|
||||
- **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/textgen/wiki/Multimodal-Tutorial)).
|
||||
- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
|
||||
- **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)).
|
||||
- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Image-Generation-Tutorial)).
|
||||
- **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/textgen/wiki/05-%E2%80%90-Training-Tab)).
|
||||
- **Image generation**: A dedicated tab for `diffusers` models like **Z-Image-Turbo**. Features 4-bit/8-bit quantization and a persistent gallery with metadata ([tutorial](https://github.com/oobabooga/textgen/wiki/Image-Generation-Tutorial)).
|
||||
- 100% offline and private, with zero telemetry, external resources, or remote update requests.
|
||||
- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters. Prompts are automatically formatted with Jinja2 templates.
|
||||
- Edit messages, navigate between message versions, and branch conversations at any point.
|
||||
- Free-form text generation in the Notebook tab without being limited to chat turns.
|
||||
- Multiple sampling parameters and generation options for sophisticated text generation control.
|
||||
- Dark/light themes, syntax highlighting for code blocks, and LaTeX rendering for mathematical expressions.
|
||||
- Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details.
|
||||
- Extension support, with built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/textgen/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/textgen-extensions) for details.
|
||||
|
||||
## How to install
|
||||
|
||||
|
|
@ -45,19 +44,19 @@ A Gradio web UI for running Large Language Models locally. 100% private and offl
|
|||
|
||||
No installation needed – just download, unzip and run. All dependencies included.
|
||||
|
||||
Download from here: **https://github.com/oobabooga/text-generation-webui/releases**
|
||||
Download from here: **https://github.com/oobabooga/textgen/releases**
|
||||
|
||||
- Builds are provided for Linux, Windows, and macOS, with options for CUDA, Vulkan, ROCm, and CPU-only.
|
||||
- Compatible with GGUF (llama.cpp) models.
|
||||
|
||||
#### Option 2: Manual portable install with venv
|
||||
|
||||
Very fast setup that should work on any Python 3.9+:
|
||||
Fast setup on any Python 3.9+:
|
||||
|
||||
```bash
|
||||
# Clone repository
|
||||
git clone https://github.com/oobabooga/text-generation-webui
|
||||
cd text-generation-webui
|
||||
git clone https://github.com/oobabooga/textgen
|
||||
cd textgen
|
||||
|
||||
# Create virtual environment
|
||||
python -m venv venv
|
||||
|
|
@ -82,7 +81,7 @@ deactivate
|
|||
|
||||
For users who need additional backends (ExLlamaV3, Transformers), training, image generation, or extensions (TTS, voice input, translation, etc). Requires ~10GB disk space and downloads PyTorch.
|
||||
|
||||
1. Clone the repository, or [download its source code](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) and extract it.
|
||||
1. Clone the repository, or [download its source code](https://github.com/oobabooga/textgen/archive/refs/heads/main.zip) and extract it.
|
||||
2. Run the startup script for your OS: `start_windows.bat`, `start_linux.sh`, or `start_macos.sh`.
|
||||
3. When prompted, select your GPU vendor.
|
||||
4. After installation, open `http://127.0.0.1:7860` in your browser.
|
||||
|
|
@ -161,8 +160,8 @@ conda install -y -c "nvidia/label/cuda-12.8.1" cuda
|
|||
#### 3. Install the web UI
|
||||
|
||||
```
|
||||
git clone https://github.com/oobabooga/text-generation-webui
|
||||
cd text-generation-webui
|
||||
git clone https://github.com/oobabooga/textgen
|
||||
cd textgen
|
||||
pip install -r requirements/full/<requirements file according to table below>
|
||||
```
|
||||
|
||||
|
|
@ -180,7 +179,7 @@ Requirements file to use:
|
|||
|
||||
```
|
||||
conda activate textgen
|
||||
cd text-generation-webui
|
||||
cd textgen
|
||||
python server.py
|
||||
```
|
||||
|
||||
|
|
@ -215,7 +214,7 @@ mkdir -p user_data/logs user_data/cache
|
|||
docker compose up --build
|
||||
```
|
||||
|
||||
* You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/text-generation-webui/wiki/09-%E2%80%90-Docker) for instructions.
|
||||
* You need to have Docker Compose v2.17 or higher installed. See [this guide](https://github.com/oobabooga/textgen/wiki/09-%E2%80%90-Docker) for instructions.
|
||||
* For additional docker files, check out [this repository](https://github.com/Atinoda/text-generation-webui-docker).
|
||||
|
||||
### Updating the requirements
|
||||
|
|
@ -224,7 +223,7 @@ From time to time, the `requirements*.txt` change. To update, use these commands
|
|||
|
||||
```
|
||||
conda activate textgen
|
||||
cd text-generation-webui
|
||||
cd textgen
|
||||
pip install -r <requirements file that you have used> --upgrade
|
||||
```
|
||||
</details>
|
||||
|
|
@ -254,7 +253,7 @@ usage: server.py [-h] [--user-data-dir USER_DATA_DIR] [--multi-user] [--model MO
|
|||
[--do-sample | --no-do-sample] [--dynamic-temperature | --no-dynamic-temperature] [--temperature-last | --no-temperature-last] [--sampler-priority N] [--dry-sequence-breakers N]
|
||||
[--enable-thinking | --no-enable-thinking] [--reasoning-effort N] [--chat-template-file CHAT_TEMPLATE_FILE]
|
||||
|
||||
Text Generation Web UI
|
||||
TextGen
|
||||
|
||||
options:
|
||||
-h, --help show this help message and exit
|
||||
|
|
@ -436,7 +435,7 @@ To estimate how much memory a model will use, you can use the [GGUF Memory Calcu
|
|||
Models that consist of multiple files (like 16-bit Transformers models and EXL3 models) should be placed in a subfolder inside `user_data/models`:
|
||||
|
||||
```
|
||||
text-generation-webui
|
||||
textgen
|
||||
└── user_data
|
||||
└── models
|
||||
└── Qwen_Qwen3-8B
|
||||
|
|
@ -453,7 +452,7 @@ These formats require the one-click installer (not the portable build).
|
|||
|
||||
## Documentation
|
||||
|
||||
https://github.com/oobabooga/text-generation-webui/wiki
|
||||
https://github.com/oobabooga/textgen/wiki
|
||||
|
||||
## Community
|
||||
|
||||
|
|
|
|||
|
|
@ -13,21 +13,12 @@
|
|||
line-height: 28px !important;
|
||||
}
|
||||
|
||||
.dark .chat .message-body :is(p,li,h1,h2,h3,h4,h5,h6),
|
||||
.dark .chat .message-body :is(p,li),
|
||||
.dark .chat .message-body em:not(:is(h1,h2,h3,h4,h5,h6,b,strong) em),
|
||||
.dark .chat .message-body q:not(:is(h1,h2,h3,h4,h5,h6,b,strong) q) {
|
||||
color: #d1d5db !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(th, td),
|
||||
.prose hr {
|
||||
border-color: #40404096 !important;
|
||||
}
|
||||
|
||||
.dark .chat .message-body :is(th, td),
|
||||
.dark .prose hr {
|
||||
border-color: rgb(255 255 255 / 30%) !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(p, ul, ol) {
|
||||
margin: 1.25em 0 !important;
|
||||
|
|
|
|||
200
css/main.css
200
css/main.css
|
|
@ -22,6 +22,17 @@
|
|||
font-style: italic;
|
||||
}
|
||||
|
||||
/* Hide spin buttons on number inputs (look bad on Windows) */
|
||||
input[type="number"]::-webkit-outer-spin-button,
|
||||
input[type="number"]::-webkit-inner-spin-button {
|
||||
-webkit-appearance: none;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
input[type="number"] {
|
||||
-moz-appearance: textfield;
|
||||
}
|
||||
|
||||
.padded.svelte-12cmxck {
|
||||
padding: 3px 0;
|
||||
}
|
||||
|
|
@ -246,8 +257,8 @@ button {
|
|||
|
||||
.pretty_scrollbar::-webkit-scrollbar,
|
||||
#image-history-gallery > :nth-child(2)::-webkit-scrollbar {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
width: 7px;
|
||||
height: 7px;
|
||||
}
|
||||
|
||||
.pretty_scrollbar::-webkit-scrollbar-track,
|
||||
|
|
@ -260,7 +271,7 @@ button {
|
|||
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
|
||||
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
|
||||
background: var(--neutral-300);
|
||||
border-radius: 30px;
|
||||
border-radius: 9999px;
|
||||
}
|
||||
|
||||
.dark .pretty_scrollbar::-webkit-scrollbar-thumb,
|
||||
|
|
@ -268,18 +279,17 @@ button {
|
|||
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
|
||||
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
|
||||
background: rgb(255 255 255 / 6.25%);
|
||||
border-radius: 30px;
|
||||
border-radius: 9999px;
|
||||
}
|
||||
|
||||
.pretty_scrollbar::-webkit-resizer,
|
||||
#image-history-gallery > :nth-child(2)::-webkit-resizer {
|
||||
background: #d2d2d8;
|
||||
background: transparent;
|
||||
}
|
||||
|
||||
.dark .pretty_scrollbar::-webkit-resizer,
|
||||
.dark #image-history-gallery > :nth-child(2)::-webkit-resizer {
|
||||
background: rgb(255 255 255 / 10%);
|
||||
border-radius: 10px;
|
||||
background: transparent;
|
||||
}
|
||||
|
||||
.pretty_scrollbar::-webkit-scrollbar-corner,
|
||||
|
|
@ -405,7 +415,13 @@ audio {
|
|||
text-align: start;
|
||||
padding-left: 1rem;
|
||||
padding-right: 1rem;
|
||||
contain: layout;
|
||||
contain: layout paint;
|
||||
}
|
||||
|
||||
.message,
|
||||
.user-message,
|
||||
.assistant-message {
|
||||
contain: layout paint;
|
||||
}
|
||||
|
||||
.chat .message .timestamp {
|
||||
|
|
@ -436,15 +452,25 @@ audio {
|
|||
.dark .message-body h4,
|
||||
.dark .message-body h5,
|
||||
.dark .message-body h6 {
|
||||
color: white !important;
|
||||
color: #e8e8e8 !important;
|
||||
}
|
||||
|
||||
.dark .message-body blockquote {
|
||||
border-left-color: rgb(255 255 255 / 30%);
|
||||
.message-body blockquote {
|
||||
border-left-width: 4px;
|
||||
border-left-color: var(--border-color-primary);
|
||||
}
|
||||
|
||||
.message-body h1,
|
||||
.message-body h2,
|
||||
.message-body h3,
|
||||
.message-body h4,
|
||||
.message-body h5,
|
||||
.message-body h6 {
|
||||
color: #1a1a1a;
|
||||
}
|
||||
|
||||
.message-body h1 {
|
||||
font-weight: 800;
|
||||
font-weight: 700;
|
||||
font-size: 2.25em;
|
||||
margin-top: 0;
|
||||
margin-bottom: 0.8888889em;
|
||||
|
|
@ -476,13 +502,13 @@ audio {
|
|||
}
|
||||
|
||||
.message-body h5 {
|
||||
font-weight: normal;
|
||||
font-weight: 600;
|
||||
font-size: 1em;
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
.message-body h6 {
|
||||
font-weight: normal;
|
||||
font-weight: 600;
|
||||
font-size: 1em;
|
||||
margin: 0;
|
||||
}
|
||||
|
|
@ -491,6 +517,10 @@ audio {
|
|||
color: #f5b031;
|
||||
}
|
||||
|
||||
.message q {
|
||||
color: #3480be;
|
||||
}
|
||||
|
||||
.message-body q::before, .message-body q::after {
|
||||
content: "";
|
||||
}
|
||||
|
|
@ -590,7 +620,7 @@ audio {
|
|||
}
|
||||
|
||||
#chat-input textarea::-webkit-scrollbar {
|
||||
width: 8px;
|
||||
width: 7px;
|
||||
}
|
||||
|
||||
#chat-input textarea::-webkit-scrollbar-track {
|
||||
|
|
@ -599,7 +629,7 @@ audio {
|
|||
|
||||
#chat-input textarea::-webkit-scrollbar-thumb {
|
||||
background: var(--neutral-300);
|
||||
border-radius: 30px;
|
||||
border-radius: 9999px;
|
||||
}
|
||||
|
||||
.dark #chat-input textarea::-webkit-scrollbar-thumb {
|
||||
|
|
@ -633,6 +663,10 @@ audio {
|
|||
background: transparent;
|
||||
}
|
||||
|
||||
#chat-input .thumbnails {
|
||||
padding-top: 3px;
|
||||
}
|
||||
|
||||
.chat-input-positioned {
|
||||
max-width: 54rem;
|
||||
left: 50%;
|
||||
|
|
@ -735,7 +769,30 @@ audio {
|
|||
|
||||
.hover-element {
|
||||
position: relative;
|
||||
font-size: 24px;
|
||||
padding-top: 4px;
|
||||
}
|
||||
|
||||
#hover-element-button {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
justify-content: center;
|
||||
width: 32px;
|
||||
height: 32px;
|
||||
border-radius: 0.5rem;
|
||||
cursor: pointer;
|
||||
color: gray;
|
||||
}
|
||||
|
||||
#hover-element-button:hover {
|
||||
background-color: var(--background-fill-secondary);
|
||||
}
|
||||
|
||||
#hover-element-button svg {
|
||||
color: inherit;
|
||||
}
|
||||
|
||||
.dark #hover-element-button:hover {
|
||||
background-color: var(--selected-item-color-dark);
|
||||
}
|
||||
|
||||
.hover-menu {
|
||||
|
|
@ -743,27 +800,40 @@ audio {
|
|||
position: absolute;
|
||||
bottom: 100%;
|
||||
left: 0;
|
||||
box-shadow: 0 2px 12px rgb(0 0 0 / 15%);
|
||||
border-radius: 0.5rem;
|
||||
background: white;
|
||||
border: 1px solid rgba(0, 0, 0, 0.1);
|
||||
box-shadow: 0 4px 16px rgb(0 0 0 / 12%), 0 1px 3px rgb(0 0 0 / 8%);
|
||||
border-radius: 0.75rem;
|
||||
z-index: 10000;
|
||||
min-width: 330px;
|
||||
flex-direction: column;
|
||||
overflow: hidden;
|
||||
padding: 4px;
|
||||
}
|
||||
|
||||
.hover-menu::before {
|
||||
content: '';
|
||||
position: absolute;
|
||||
top: 100%;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 8px;
|
||||
}
|
||||
|
||||
.hover-menu > * {
|
||||
border: none !important;
|
||||
box-shadow: none !important;
|
||||
}
|
||||
|
||||
.hover-menu button {
|
||||
width: 100%;
|
||||
background: white !important;
|
||||
border-radius: 0 !important;
|
||||
background: transparent !important;
|
||||
border: none !important;
|
||||
border-radius: 0.5rem !important;
|
||||
justify-content: space-between;
|
||||
margin: 0 !important;
|
||||
height: 36px;
|
||||
border-color: transparent !important;
|
||||
transition: background-color 0.15s ease;
|
||||
}
|
||||
|
||||
.hover-menu button:not(#clear-history-confirm) {
|
||||
border-bottom: 0 !important;
|
||||
font-weight: 500;
|
||||
box-shadow: none !important;
|
||||
}
|
||||
|
||||
.hover-menu button:hover {
|
||||
|
|
@ -775,19 +845,26 @@ audio {
|
|||
}
|
||||
|
||||
#show-controls {
|
||||
background-color: white;
|
||||
border-color: transparent !important;
|
||||
background-color: transparent;
|
||||
border: none !important;
|
||||
height: 36px;
|
||||
border-radius: 0;
|
||||
border-bottom: 0 !important;
|
||||
border-radius: 0.5rem;
|
||||
padding-top: 3px;
|
||||
padding-left: 4px;
|
||||
display: flex;
|
||||
font-weight: normal;
|
||||
}
|
||||
|
||||
#show-controls:hover {
|
||||
background-color: #dbeafe;
|
||||
}
|
||||
|
||||
.dark #show-controls {
|
||||
background-color: var(--darker-gray);
|
||||
background-color: transparent;
|
||||
}
|
||||
|
||||
.dark #show-controls:hover {
|
||||
background-color: var(--selected-item-color-dark);
|
||||
}
|
||||
|
||||
#show-controls label {
|
||||
|
|
@ -797,12 +874,12 @@ audio {
|
|||
width: 100%;
|
||||
padding-right: 12px;
|
||||
gap: 10px;
|
||||
font-weight: 600;
|
||||
font-weight: 500;
|
||||
color: var(--button-secondary-text-color);
|
||||
}
|
||||
|
||||
#show-controls label input {
|
||||
margin-top: 4px;
|
||||
margin-top: 5px;
|
||||
}
|
||||
|
||||
.transparent-substring {
|
||||
|
|
@ -842,7 +919,7 @@ audio {
|
|||
}
|
||||
|
||||
#chat-input-row {
|
||||
padding: 1rem;
|
||||
padding: 0.5rem 1rem 1rem;
|
||||
}
|
||||
|
||||
#chat-col {
|
||||
|
|
@ -1208,9 +1285,14 @@ audio {
|
|||
color: #9ca3af;
|
||||
}
|
||||
|
||||
.dark .hover-menu {
|
||||
background: var(--darker-gray);
|
||||
border-color: transparent;
|
||||
box-shadow: 0 4px 16px rgb(0 0 0 / 40%);
|
||||
}
|
||||
|
||||
.dark .hover-menu button {
|
||||
border-color: var(--border-color-primary);
|
||||
background-color: var(--darker-gray) !important;
|
||||
background-color: transparent !important;
|
||||
}
|
||||
|
||||
.dark #chat-controls,
|
||||
|
|
@ -1372,8 +1454,7 @@ audio {
|
|||
}
|
||||
|
||||
.footer-button svg {
|
||||
stroke: rgb(156 163 175);
|
||||
transition: stroke 0.2s;
|
||||
stroke: rgb(140 140 148);
|
||||
}
|
||||
|
||||
.footer-button:hover svg {
|
||||
|
|
@ -1388,12 +1469,12 @@ audio {
|
|||
stroke: rgb(209 213 219);
|
||||
}
|
||||
|
||||
.tgw-accordion {
|
||||
.block:has(> .label-wrap) {
|
||||
padding: 10px 12px !important;
|
||||
border: 1px solid #d2d2d8;
|
||||
}
|
||||
|
||||
.dark .tgw-accordion {
|
||||
.dark .block:has(> .label-wrap) {
|
||||
border: 1px solid var(--border-color-dark);
|
||||
}
|
||||
|
||||
|
|
@ -1447,6 +1528,10 @@ audio {
|
|||
.thinking-icon {
|
||||
margin-right: 8px;
|
||||
color: rgb(0 0 0 / 50%);
|
||||
|
||||
/* Prevents the SVG from shrinking
|
||||
* when tool call arguments are long */
|
||||
flex-shrink: 0;
|
||||
}
|
||||
|
||||
.thinking-title {
|
||||
|
|
@ -1748,14 +1833,9 @@ button:focus {
|
|||
}
|
||||
|
||||
.chat-parent {
|
||||
/* Optimize for scrolling performance */
|
||||
will-change: scroll-position;
|
||||
contain: style paint;
|
||||
|
||||
/* Ensure GPU acceleration */
|
||||
contain: style;
|
||||
transform: translateZ(0);
|
||||
|
||||
/* Prevent layout shifts */
|
||||
overflow-anchor: none;
|
||||
}
|
||||
|
||||
|
|
@ -1903,14 +1983,24 @@ table, tr, td, th, thead {
|
|||
border: 0;
|
||||
}
|
||||
|
||||
.prose hr {
|
||||
border-color: var(--border-color-primary);
|
||||
}
|
||||
|
||||
td + td,
|
||||
th + th { border-left: 1px solid; }
|
||||
th + th {
|
||||
border-left: 1px solid var(--border-color-primary) !important;
|
||||
}
|
||||
|
||||
tr + tr td,
|
||||
tr + tr th { border-top: 1px solid; }
|
||||
tr + tr th {
|
||||
border-top: 1px solid var(--border-color-primary) !important;
|
||||
}
|
||||
|
||||
thead + tbody tr:first-child td,
|
||||
thead + tbody tr:first-child th { border-top: 1px solid; }
|
||||
thead + tbody tr:first-child th {
|
||||
border-top: 1px solid var(--border-color-primary) !important;
|
||||
}
|
||||
|
||||
/* ------------------------------------------------
|
||||
Tools CheckboxGroup - vertical DragDrop-like style
|
||||
|
|
@ -1942,8 +2032,8 @@ thead + tbody tr:first-child th { border-top: 1px solid; }
|
|||
|
||||
/* Pretty scrollbar for the tools list */
|
||||
#tools-group .wrap::-webkit-scrollbar {
|
||||
width: 8px;
|
||||
height: 8px;
|
||||
width: 7px;
|
||||
height: 7px;
|
||||
}
|
||||
|
||||
#tools-group .wrap::-webkit-scrollbar-track {
|
||||
|
|
@ -1953,13 +2043,13 @@ thead + tbody tr:first-child th { border-top: 1px solid; }
|
|||
#tools-group .wrap::-webkit-scrollbar-thumb,
|
||||
#tools-group .wrap::-webkit-scrollbar-thumb:hover {
|
||||
background: var(--neutral-300);
|
||||
border-radius: 30px;
|
||||
border-radius: 9999px;
|
||||
}
|
||||
|
||||
.dark #tools-group .wrap::-webkit-scrollbar-thumb,
|
||||
.dark #tools-group .wrap::-webkit-scrollbar-thumb:hover {
|
||||
background: rgb(255 255 255 / 6.25%);
|
||||
border-radius: 30px;
|
||||
border-radius: 9999px;
|
||||
}
|
||||
|
||||
#tools-group .wrap::-webkit-scrollbar-corner {
|
||||
|
|
|
|||
|
|
@ -9,9 +9,9 @@ WORKDIR /app
|
|||
# This is needed to avoid an error about "Failed to build mpi4py" in the next command
|
||||
ENV LD_LIBRARY_PATH=/usr/lib/x86_64-linux-gnu:$LD_LIBRARY_PATH
|
||||
|
||||
# Install text-generation-webui
|
||||
RUN git clone https://github.com/oobabooga/text-generation-webui
|
||||
WORKDIR /app/text-generation-webui
|
||||
# Install textgen
|
||||
RUN git clone https://github.com/oobabooga/textgen
|
||||
WORKDIR /app/textgen
|
||||
RUN pip install --break-system-packages -r requirements/full/requirements.txt
|
||||
|
||||
# Install TensorRT-LLM
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
|
|||
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /home/app/
|
||||
RUN git clone https://github.com/oobabooga/text-generation-webui.git
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
RUN git clone https://github.com/oobabooga/textgen.git
|
||||
WORKDIR /home/app/textgen
|
||||
RUN GPU_CHOICE=B LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
|
||||
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000}
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
WORKDIR /home/app/textgen
|
||||
# set umask to ensure group read / write at runtime
|
||||
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh --listen
|
||||
CMD umask 0002 && export HOME=/home/app/textgen && ./start_linux.sh --listen
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
version: "3.3"
|
||||
services:
|
||||
text-generation-webui:
|
||||
textgen:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
|
|
@ -25,4 +25,4 @@ services:
|
|||
security_opt:
|
||||
- seccomp=unconfined
|
||||
volumes:
|
||||
- ./user_data:/home/app/text-generation-webui/user_data
|
||||
- ./user_data:/home/app/textgen/user_data
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
|
|||
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /home/app/
|
||||
RUN git clone https://github.com/oobabooga/text-generation-webui.git
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
RUN git clone https://github.com/oobabooga/textgen.git
|
||||
WORKDIR /home/app/textgen
|
||||
RUN GPU_CHOICE=N LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
|
||||
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000}
|
||||
# set umask to ensure group read / write at runtime
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh --listen
|
||||
WORKDIR /home/app/textgen
|
||||
CMD umask 0002 && export HOME=/home/app/textgen && ./start_linux.sh --listen
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
version: "3.3"
|
||||
services:
|
||||
text-generation-webui:
|
||||
textgen:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
|
|
@ -15,4 +15,4 @@ services:
|
|||
stdin_open: true
|
||||
tty: true
|
||||
volumes:
|
||||
- ./user_data:/home/app/text-generation-webui/user_data
|
||||
- ./user_data:/home/app/textgen/user_data
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
|
|||
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /home/app/
|
||||
RUN git clone https://github.com/oobabooga/text-generation-webui.git
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
RUN git clone https://github.com/oobabooga/textgen.git
|
||||
WORKDIR /home/app/textgen
|
||||
RUN GPU_CHOICE=D LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
|
||||
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000}
|
||||
# set umask to ensure group read / write at runtime
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh --listen
|
||||
WORKDIR /home/app/textgen
|
||||
CMD umask 0002 && export HOME=/home/app/textgen && ./start_linux.sh --listen
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
version: "3.3"
|
||||
services:
|
||||
text-generation-webui:
|
||||
textgen:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
|
|
@ -25,4 +25,4 @@ services:
|
|||
security_opt:
|
||||
- seccomp=unconfined
|
||||
volumes:
|
||||
- ./user_data:/home/app/text-generation-webui/user_data
|
||||
- ./user_data:/home/app/textgen/user_data
|
||||
|
|
|
|||
|
|
@ -11,10 +11,10 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
|
|||
apt install --no-install-recommends -y git vim build-essential python3-dev pip bash curl && \
|
||||
rm -rf /var/lib/apt/lists/*
|
||||
WORKDIR /home/app/
|
||||
RUN git clone https://github.com/oobabooga/text-generation-webui.git
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
RUN git clone https://github.com/oobabooga/textgen.git
|
||||
WORKDIR /home/app/textgen
|
||||
RUN GPU_CHOICE=A LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
|
||||
EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000}
|
||||
WORKDIR /home/app/text-generation-webui
|
||||
WORKDIR /home/app/textgen
|
||||
# set umask to ensure group read / write at runtime
|
||||
CMD umask 0002 && export HOME=/home/app/text-generation-webui && ./start_linux.sh --listen
|
||||
CMD umask 0002 && export HOME=/home/app/textgen && ./start_linux.sh --listen
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
version: "3.3"
|
||||
services:
|
||||
text-generation-webui:
|
||||
textgen:
|
||||
build:
|
||||
context: .
|
||||
args:
|
||||
|
|
@ -17,7 +17,7 @@ services:
|
|||
stdin_open: true
|
||||
tty: true
|
||||
volumes:
|
||||
- ./user_data:/home/app/text-generation-webui/user_data
|
||||
- ./user_data:/home/app/textgen/user_data
|
||||
deploy:
|
||||
resources:
|
||||
reservations:
|
||||
|
|
|
|||
|
|
@ -146,14 +146,14 @@ Note that you can get creative: instead of writing something trivial like "Write
|
|||
|
||||
And it works:
|
||||
|
||||

|
||||

|
||||
|
||||
## Chat style
|
||||
|
||||
This defines the visual style of the chat UI. Each option is a CSS file defined under `text-generation-webui/css/chat_style-name.css`, where "name" is how this style is called in the dropdown menu. You can add new styles by simply copying `chat_style-cai-chat.css` to `chat_style-myNewStyle.css` and editing the contents of this new file. If you end up with a style that you like, you are highly encouraged to submit it to the repository.
|
||||
This defines the visual style of the chat UI. Each option is a CSS file defined under `textgen/css/chat_style-name.css`, where "name" is how this style is called in the dropdown menu. You can add new styles by simply copying `chat_style-cai-chat.css` to `chat_style-myNewStyle.css` and editing the contents of this new file. If you end up with a style that you like, you are highly encouraged to submit it to the repository.
|
||||
|
||||
The styles are only applied to chat and chat-instruct modes. Instruct mode has its separate style defined in `text-generation-webui/css/html_instruct_style.css`.
|
||||
The styles are only applied to chat and chat-instruct modes. Instruct mode has its separate style defined in `textgen/css/html_instruct_style.css`.
|
||||
|
||||
## Character gallery
|
||||
|
||||
This menu is a built-in extension defined under `text-generation-webui/extensions/gallery`. It displays a gallery with your characters, and if you click on a character, it will be automatically selected in the Character tab.
|
||||
This menu is a built-in extension defined under `textgen/extensions/gallery`. It displays a gallery with your characters, and if you click on a character, it will be automatically selected in the Character tab.
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ Five tabs can be found:
|
|||
|
||||
* **Raw**: where the raw text generated by the model appears.
|
||||
* **Markdown**: it contains a "Render" button. You can click on it at any time to render the current output as markdown. This is particularly useful for models that generate LaTeX equations like GALACTICA.
|
||||
* **HTML**: displays the output in an HTML style that is meant to be easier to read. Its style is defined under `text-generation-webui/css/html_readable_style.css`.
|
||||
* **HTML**: displays the output in an HTML style that is meant to be easier to read. Its style is defined under `textgen/css/html_readable_style.css`.
|
||||
* **Logits**: when you click on "Get next token probabilities", this tab displays the 50 most likely next tokens and their probabilities based on your current input. If "Use samplers" is checked, the probabilities will be the ones after the sampling parameters in the "Parameters" > "Generation" tab are applied. Otherwise, they will be the raw probabilities generated by the model.
|
||||
* **Tokens**: allows you to tokenize your prompt and see the ID numbers for the individual tokens.
|
||||
|
||||
|
|
|
|||
|
|
@ -92,7 +92,7 @@ If the **Autoload the model** checkbox is selected, the model will be loaded as
|
|||
|
||||
## LoRA dropdown
|
||||
|
||||
Used to apply LoRAs to the model. Note that LoRA support is not implemented for all loaders. Check the [What Works](https://github.com/oobabooga/text-generation-webui/wiki/What-Works) page for details.
|
||||
Used to apply LoRAs to the model. Note that LoRA support is not implemented for all loaders. Check the [What Works](https://github.com/oobabooga/textgen/wiki/What-Works) page for details.
|
||||
|
||||
## Download model or LoRA
|
||||
|
||||
|
|
|
|||
|
|
@ -9,14 +9,14 @@ Here you can restart the UI with new settings.
|
|||
|
||||
## Extensions & flags
|
||||
|
||||
* **Available extensions**: shows a list of extensions available under `text-generation-webui/extensions` and `text-generation-webui/user_data/extensions`. Note that some of these extensions may require manually installing Python requirements through the command: `pip install -r extensions/extension_name/requirements.txt`.
|
||||
* **Available extensions**: shows a list of extensions available under `textgen/extensions` and `textgen/user_data/extensions`. Note that some of these extensions may require manually installing Python requirements through the command: `pip install -r extensions/extension_name/requirements.txt`.
|
||||
* **Boolean command-line flags**: shows command-line flags of bool (true/false) type.
|
||||
|
||||
After selecting your desired flags and extensions, you can restart the UI by clicking on **Apply flags/extensions and restart**.
|
||||
|
||||
## Install or update an extension
|
||||
|
||||
In this field, you can enter the GitHub URL for an extension and press enter to either install it (i.e. cloning it into `text-generation-webui/extensions`) or update it with `git pull` in case it is already cloned.
|
||||
In this field, you can enter the GitHub URL for an extension and press enter to either install it (i.e. cloning it into `textgen/extensions`) or update it with `git pull` in case it is already cloned.
|
||||
|
||||
Note that some extensions may include additional Python requirements. In this case, to install those you have to run the command
|
||||
|
||||
|
|
|
|||
|
|
@ -1,8 +1,8 @@
|
|||
# Extensions
|
||||
|
||||
Extensions are defined by files named `script.py` inside subfolders of either:
|
||||
- `text-generation-webui/extensions`
|
||||
- `text-generation-webui/user_data/extensions`
|
||||
- `textgen/extensions`
|
||||
- `textgen/user_data/extensions`
|
||||
|
||||
They are loaded at startup if the folder name is specified after the `--extensions` flag.
|
||||
|
||||
|
|
@ -10,7 +10,7 @@ For instance, `extensions/silero_tts/script.py` or `user_data/extensions/silero_
|
|||
|
||||
**Note:** Extensions in `user_data/extensions/` take priority over those in `extensions/` when both exist with the same name.
|
||||
|
||||
## [text-generation-webui-extensions](https://github.com/oobabooga/text-generation-webui-extensions)
|
||||
## [textgen-extensions](https://github.com/oobabooga/textgen-extensions)
|
||||
|
||||
The repository above contains a directory of user extensions.
|
||||
|
||||
|
|
@ -20,19 +20,19 @@ If you create an extension, you are welcome to host it in a GitHub repository an
|
|||
|
||||
|Extension|Description|
|
||||
|---------|-----------|
|
||||
|[superboogav2](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/superboogav2)| Enhanced RAG extension with support for PDF, DOCX, and PPTX files. |
|
||||
|[send_pictures](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/send_pictures/)| Creates an image upload field that can be used to send images to the bot in chat mode. Captions are automatically generated using BLIP. |
|
||||
|[coqui_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/coqui_tts)| Text-to-speech extension using Coqui XTTS v2. |
|
||||
|[silero_tts](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/silero_tts)| Text-to-speech extension using [Silero](https://github.com/snakers4/silero-models). When used in chat mode, responses are replaced with an audio widget. |
|
||||
|[whisper_stt](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/whisper_stt)| Allows you to enter your inputs in chat mode using your microphone. |
|
||||
|[perplexity_colors](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/perplexity_colors)| Colors each token in the output text by its associated probability, as derived from the model logits. |
|
||||
|[google_translate](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/google_translate)| Automatically translates inputs and outputs using Google Translate.|
|
||||
|[gallery](https://github.com/oobabooga/text-generation-webui/blob/main/extensions/gallery/)| Creates a gallery with the chat characters and their pictures. |
|
||||
|[sd_api_pictures](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/sd_api_pictures)| Allows you to request pictures from the bot in chat mode, which will be generated using the AUTOMATIC1111 Stable Diffusion API. See examples [here](https://github.com/oobabooga/text-generation-webui/pull/309). |
|
||||
|[long_replies](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/long_replies)| Forces longer replies by suppressing early newlines in the model output. |
|
||||
|[ngrok](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/ngrok)| Allows you to access the web UI remotely using the ngrok reverse tunnel service (free). It's an alternative to the built-in Gradio `--share` feature. |
|
||||
|[superbooga](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/superbooga)| An extension that uses ChromaDB to create an arbitrarily large pseudocontext, taking as input text files, URLs, or pasted text. Based on https://github.com/kaiokendev/superbig. |
|
||||
|[character_bias](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/character_bias)| Just a very simple example that adds a hidden string at the beginning of the bot's reply in chat mode. |
|
||||
|[superboogav2](https://github.com/oobabooga/textgen/tree/main/extensions/superboogav2)| Enhanced RAG extension with support for PDF, DOCX, and PPTX files. |
|
||||
|[send_pictures](https://github.com/oobabooga/textgen/blob/main/extensions/send_pictures/)| Creates an image upload field that can be used to send images to the bot in chat mode. Captions are automatically generated using BLIP. |
|
||||
|[coqui_tts](https://github.com/oobabooga/textgen/tree/main/extensions/coqui_tts)| Text-to-speech extension using Coqui XTTS v2. |
|
||||
|[silero_tts](https://github.com/oobabooga/textgen/tree/main/extensions/silero_tts)| Text-to-speech extension using [Silero](https://github.com/snakers4/silero-models). When used in chat mode, responses are replaced with an audio widget. |
|
||||
|[whisper_stt](https://github.com/oobabooga/textgen/tree/main/extensions/whisper_stt)| Allows you to enter your inputs in chat mode using your microphone. |
|
||||
|[perplexity_colors](https://github.com/oobabooga/textgen/tree/main/extensions/perplexity_colors)| Colors each token in the output text by its associated probability, as derived from the model logits. |
|
||||
|[google_translate](https://github.com/oobabooga/textgen/tree/main/extensions/google_translate)| Automatically translates inputs and outputs using Google Translate.|
|
||||
|[gallery](https://github.com/oobabooga/textgen/blob/main/extensions/gallery/)| Creates a gallery with the chat characters and their pictures. |
|
||||
|[sd_api_pictures](https://github.com/oobabooga/textgen/tree/main/extensions/sd_api_pictures)| Allows you to request pictures from the bot in chat mode, which will be generated using the AUTOMATIC1111 Stable Diffusion API. See examples [here](https://github.com/oobabooga/textgen/pull/309). |
|
||||
|[long_replies](https://github.com/oobabooga/textgen/tree/main/extensions/long_replies)| Forces longer replies by suppressing early newlines in the model output. |
|
||||
|[ngrok](https://github.com/oobabooga/textgen/tree/main/extensions/ngrok)| Allows you to access the web UI remotely using the ngrok reverse tunnel service (free). It's an alternative to the built-in Gradio `--share` feature. |
|
||||
|[superbooga](https://github.com/oobabooga/textgen/tree/main/extensions/superbooga)| An extension that uses ChromaDB to create an arbitrarily large pseudocontext, taking as input text files, URLs, or pasted text. Based on https://github.com/kaiokendev/superbig. |
|
||||
|[character_bias](https://github.com/oobabooga/textgen/tree/main/extensions/character_bias)| Just a very simple example that adds a hidden string at the beginning of the bot's reply in chat mode. |
|
||||
|
||||
## How to write an extension
|
||||
|
||||
|
|
@ -104,7 +104,7 @@ only the first declaration encountered will be used and the rest will be ignored
|
|||
|
||||
## A full example
|
||||
|
||||
The source code below can be found at [extensions/example/script.py](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/example/script.py).
|
||||
The source code below can be found at [extensions/example/script.py](https://github.com/oobabooga/textgen/tree/main/extensions/example/script.py).
|
||||
|
||||
```python
|
||||
"""
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
## Audio notification
|
||||
|
||||
If your computer takes a long time to generate each response for the model that you are using, you can enable an audio notification for when the response is completed. This feature was kindly contributed by HappyWorldGames in [#1277](https://github.com/oobabooga/text-generation-webui/pull/1277).
|
||||
If your computer takes a long time to generate each response for the model that you are using, you can enable an audio notification for when the response is completed. This feature was kindly contributed by HappyWorldGames in [#1277](https://github.com/oobabooga/textgen/pull/1277).
|
||||
|
||||
### Installation
|
||||
|
||||
|
|
|
|||
|
|
@ -27,7 +27,7 @@ There are four Docker variants available under `docker/`:
|
|||
To launch (using NVIDIA as an example):
|
||||
|
||||
```bash
|
||||
cd text-generation-webui/docker/nvidia
|
||||
cd textgen/docker/nvidia
|
||||
cp ../.env.example .env
|
||||
# Optionally edit .env to customize ports, TORCH_CUDA_ARCH_LIST, etc.
|
||||
docker compose up --build
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ Add `--api` to your command-line flags.
|
|||
|
||||
### Examples
|
||||
|
||||
For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/text-generation-webui/blob/main/modules/api/typing.py) file.
|
||||
For the documentation with all the endpoints, parameters and their types, consult `http://127.0.0.1:5000/docs` or the [typing.py](https://github.com/oobabooga/textgen/blob/main/modules/api/typing.py) file.
|
||||
|
||||
The official examples in the [OpenAI documentation](https://platform.openai.com/docs/api-reference) should also work, and the same parameters apply (although the API here has more optional parameters).
|
||||
|
||||
|
|
@ -232,6 +232,17 @@ curl -k http://127.0.0.1:5000/v1/internal/model/load \
|
|||
}'
|
||||
```
|
||||
|
||||
You can also set a default instruction template for all subsequent API requests by passing `instruction_template` (a template name from `user_data/instruction-templates/`) or `instruction_template_str` (a raw Jinja2 string):
|
||||
|
||||
```shell
|
||||
curl -k http://127.0.0.1:5000/v1/internal/model/load \
|
||||
-H "Content-Type: application/json" \
|
||||
-d '{
|
||||
"model_name": "Qwen_Qwen3-0.6B-Q4_K_M.gguf",
|
||||
"instruction_template": "Alpaca"
|
||||
}'
|
||||
```
|
||||
|
||||
#### Python chat example
|
||||
|
||||
```python
|
||||
|
|
|
|||
|
|
@ -10,10 +10,10 @@ This feature allows you to generate images using `diffusers` models like [Tongyi
|
|||
1. Clone the repository with
|
||||
|
||||
```
|
||||
git clone https://github.com/oobabooga/text-generation-webui
|
||||
git clone https://github.com/oobabooga/textgen
|
||||
```
|
||||
|
||||
or download it from [here](https://github.com/oobabooga/text-generation-webui/archive/refs/heads/main.zip) and unzip it.
|
||||
or download it from [here](https://github.com/oobabooga/textgen/archive/refs/heads/main.zip) and unzip it.
|
||||
|
||||
2. Use the one-click installer.
|
||||
|
||||
|
|
@ -21,7 +21,7 @@ or download it from [here](https://github.com/oobabooga/text-generation-webui/ar
|
|||
- Linux: Run `./start_linux.sh`
|
||||
- macOS: Run `./start_macos.sh`
|
||||
|
||||
Note: Image generation does not work with the portable builds in `.zip` format in the [Releases page](https://github.com/oobabooga/text-generation-webui/releases). You need the "full" version of the web UI.
|
||||
Note: Image generation does not work with the portable builds in `.zip` format in the [Releases page](https://github.com/oobabooga/textgen/releases). You need the "full" version of the web UI.
|
||||
|
||||
## Downloading a model
|
||||
|
||||
|
|
@ -64,7 +64,7 @@ To use this feature, you need to load an LLM in the main "Model" page on the lef
|
|||
|
||||
If you have no idea what to use, do this to get started:
|
||||
|
||||
1. Download [Qwen3-4B-Q3_K_M.gguf](https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q3_K_M.gguf) to your `text-generation-webui/user_data/models` folder.
|
||||
1. Download [Qwen3-4B-Q3_K_M.gguf](https://huggingface.co/unsloth/Qwen3-4B-GGUF/resolve/main/Qwen3-4B-Q3_K_M.gguf) to your `textgen/user_data/models` folder.
|
||||
2. Select the model in the dropdown menu in the "Model" page.
|
||||
3. Click Load.
|
||||
|
||||
|
|
|
|||
|
|
@ -14,7 +14,7 @@ As an example, download
|
|||
|
||||
https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/gemma-3-4b-it-Q4_K_S.gguf?download=true
|
||||
|
||||
to your `text-generation-webui/user_data/models` folder.
|
||||
to your `textgen/user_data/models` folder.
|
||||
|
||||
### 3. Download the associated mmproj file to `user_data/mmproj`
|
||||
|
||||
|
|
@ -22,7 +22,7 @@ Then download
|
|||
|
||||
https://huggingface.co/unsloth/gemma-3-4b-it-GGUF/resolve/main/mmproj-F16.gguf?download=true
|
||||
|
||||
to your `text-generation-webui/user_data/mmproj` folder. Name it `mmproj-gemma-3-4b-it-F16.gguf` to give it a recognizable name.
|
||||
to your `textgen/user_data/mmproj` folder. Name it `mmproj-gemma-3-4b-it-F16.gguf` to give it a recognizable name.
|
||||
|
||||
### 4. Load the model
|
||||
|
||||
|
|
@ -63,4 +63,4 @@ Examples of models that you can use:
|
|||
|
||||
In the page below you can find some ready-to-use examples:
|
||||
|
||||
[Multimodal/vision (llama.cpp and ExLlamaV3)](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#multimodalvision-llamacpp-and-exllamav3)
|
||||
[Multimodal/vision (llama.cpp and ExLlamaV3)](https://github.com/oobabooga/textgen/wiki/12-%E2%80%90-OpenAI-API#multimodalvision-llamacpp-and-exllamav3)
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
These files are a mirror of the documentation at:
|
||||
|
||||
# https://github.com/oobabooga/text-generation-webui/wiki
|
||||
# https://github.com/oobabooga/textgen/wiki
|
||||
|
||||
It is recommended to browse it there. Contributions can be sent here and will later be synced with the wiki.
|
||||
|
|
|
|||
|
|
@ -80,6 +80,19 @@ def execute(arguments):
|
|||
|
||||
You can open the built-in tools in `user_data/tools/` for more examples.
|
||||
|
||||
## MCP servers
|
||||
|
||||
You can connect to remote [MCP (Model Context Protocol)](https://modelcontextprotocol.io/) servers to use their tools alongside local ones.
|
||||
|
||||
In the chat sidebar, open the **MCP servers** accordion and enter one server URL per line. For servers that require authentication, append headers after the URL separated by commas:
|
||||
|
||||
```
|
||||
https://example.com/mcp
|
||||
https://other.com/mcp,Authorization: Bearer sk-xxx
|
||||
```
|
||||
|
||||
All tools from the configured servers are automatically discovered and made available to the model during generation. If an MCP tool has the same name as a selected local tool, the local tool takes priority.
|
||||
|
||||
## Tool calling over the API
|
||||
|
||||
Tool calling over the API follows the [OpenAI API](https://platform.openai.com/docs/guides/function-calling) convention. Define your tools, send them with your messages, and handle tool calls in a loop until the model gives a final answer.
|
||||
|
|
|
|||
|
|
@ -18,7 +18,7 @@ port = shared.args.listen_port if shared.args.listen_port else '7860'
|
|||
options = {
|
||||
'addr': f"{host}:{port}",
|
||||
'authtoken_from_env': True,
|
||||
'session_metadata': 'text-generation-webui',
|
||||
'session_metadata': 'textgen',
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@
|
|||
TL;DR: Lets the bot answer you with a picture!
|
||||
|
||||
Stable Diffusion API pictures for TextGen, v.1.2.0
|
||||
An extension to [oobabooga's textgen-webui](https://github.com/oobabooga/text-generation-webui) allowing you to receive pics generated by [Automatic1111's SD-WebUI API](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
|
||||
An extension to [oobabooga's TextGen](https://github.com/oobabooga/textgen) allowing you to receive pics generated by [Automatic1111's SD-WebUI API](https://github.com/AUTOMATIC1111/stable-diffusion-webui)
|
||||
|
||||
<details>
|
||||
<summary>Interface overview</summary>
|
||||
|
|
@ -17,7 +17,7 @@ Load it in the `--chat` mode with `--extension sd_api_pictures` alongside `send_
|
|||
|
||||
## History
|
||||
|
||||
Consider the version included with [oobabooga's repository](https://github.com/oobabooga/text-generation-webui/tree/main/extensions/sd_api_pictures) to be STABLE, experimental developments and untested features are pushed in [Brawlence/SD_api_pics](https://github.com/Brawlence/SD_api_pics)
|
||||
Consider the version included with [oobabooga's repository](https://github.com/oobabooga/textgen/tree/main/extensions/sd_api_pictures) to be STABLE, experimental developments and untested features are pushed in [Brawlence/SD_api_pics](https://github.com/Brawlence/SD_api_pics)
|
||||
|
||||
Lastest change:
|
||||
1.1.0 → 1.1.1 Fixed not having Auto1111's metadata in received images
|
||||
|
|
@ -48,7 +48,7 @@ Green mark confirms the ability to communicate with Auto1111's API on this addre
|
|||
|
||||
### Persistents settings
|
||||
|
||||
Create or modify the `settings.json` in the `text-generation-webui` root directory to override the defaults
|
||||
Create or modify the `settings.json` in the `textgen` root directory to override the defaults
|
||||
present in script.py, ex:
|
||||
|
||||
```json
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ Enhance your LLM with additional information from text, URLs, and files for more
|
|||
|
||||
## Installation and Activation
|
||||
|
||||
1. Start the conda environment by running `cmd_windows.bat` or the equivalent for your system in the root directory of `text-generation-webui`.
|
||||
1. Start the conda environment by running `cmd_windows.bat` or the equivalent for your system in the root directory of `textgen`.
|
||||
2. Install the necessary packages:
|
||||
```
|
||||
pip install -r extensions/superboogav2/requirements.txt
|
||||
|
|
@ -38,4 +38,4 @@ SuperboogaV2 utilizes MuPDF, pandas, python-docx, and python-pptx to extract tex
|
|||
|
||||
SuperboogaV2 processes your data into context-aware chunks, applies cleaning techniques, and stores them as embeddings to minimize redundant computations. Relevance is determined using distance calculations and prioritization of recent information.
|
||||
|
||||
For a detailed description and more information, refer to the comments in this pull request: [https://github.com/oobabooga/text-generation-webui/pull/3272](https://github.com/oobabooga/text-generation-webui/pull/3272)
|
||||
For a detailed description and more information, refer to the comments in this pull request: [https://github.com/oobabooga/textgen/pull/3272](https://github.com/oobabooga/textgen/pull/3272)
|
||||
|
|
|
|||
|
|
@ -270,16 +270,23 @@ function removeLastClick() {
|
|||
document.getElementById("Remove-last").click();
|
||||
}
|
||||
|
||||
let _scrollPending = false;
|
||||
|
||||
function autoScrollToBottom() {
|
||||
if (!window.isScrolled) {
|
||||
const chatParent = document.getElementById("chat")?.parentNode?.parentNode?.parentNode;
|
||||
if (chatParent) {
|
||||
const maxScroll = chatParent.scrollHeight - chatParent.clientHeight;
|
||||
if (maxScroll > 0 && chatParent.scrollTop < maxScroll - 1) {
|
||||
chatParent.scrollTop = maxScroll;
|
||||
if (_scrollPending) return;
|
||||
_scrollPending = true;
|
||||
queueMicrotask(() => {
|
||||
_scrollPending = false;
|
||||
if (!window.isScrolled) {
|
||||
const chatParent = document.getElementById("chat")?.parentNode?.parentNode?.parentNode;
|
||||
if (chatParent) {
|
||||
const maxScroll = chatParent.scrollHeight - chatParent.clientHeight;
|
||||
if (maxScroll > 0 && chatParent.scrollTop < maxScroll - 1) {
|
||||
chatParent.scrollTop = maxScroll;
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
});
|
||||
}
|
||||
|
||||
function updateInstructPadding() {
|
||||
|
|
|
|||
45
js/main.js
45
js/main.js
|
|
@ -309,18 +309,19 @@ for (let i = 0; i < slimDropdownElements.length; i++) {
|
|||
// https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js
|
||||
//------------------------------------------------
|
||||
var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button, #chat-tab #chat-buttons #show-controls");
|
||||
var hoverContainer = document.getElementById("gr-hover-container");
|
||||
var button = document.getElementById("hover-element-button");
|
||||
var menu = document.getElementById("hover-menu");
|
||||
var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement;
|
||||
|
||||
function showMenu() {
|
||||
menu.style.display = "flex"; // Show the menu
|
||||
menu.style.display = "flex";
|
||||
}
|
||||
|
||||
function hideMenu() {
|
||||
menu.style.display = "none"; // Hide the menu
|
||||
menu.style.display = "none";
|
||||
if (!istouchscreen) {
|
||||
document.querySelector("#chat-input textarea").focus(); // Focus on the chat input
|
||||
document.querySelector("#chat-input textarea").focus();
|
||||
}
|
||||
}
|
||||
|
||||
|
|
@ -329,7 +330,6 @@ if (buttonsInChat.length > 0) {
|
|||
const thisButton = buttonsInChat[i];
|
||||
menu.appendChild(thisButton);
|
||||
|
||||
// Only apply transformations to button elements
|
||||
if (thisButton.tagName.toLowerCase() === "button") {
|
||||
thisButton.addEventListener("click", () => {
|
||||
hideMenu();
|
||||
|
|
@ -339,7 +339,6 @@ if (buttonsInChat.length > 0) {
|
|||
const matches = buttonText.match(/(\(.*?\))/);
|
||||
|
||||
if (matches && matches.length > 1) {
|
||||
// Apply the transparent-substring class to the matched substring
|
||||
const substring = matches[1];
|
||||
const newText = buttonText.replace(substring, ` <span class="transparent-substring">${substring.slice(1, -1)}</span>`);
|
||||
thisButton.innerHTML = newText;
|
||||
|
|
@ -348,16 +347,19 @@ if (buttonsInChat.length > 0) {
|
|||
}
|
||||
}
|
||||
|
||||
function isMouseOverButtonOrMenu() {
|
||||
return menu.matches(":hover") || button.matches(":hover");
|
||||
}
|
||||
var menuInteracting = false;
|
||||
|
||||
button.addEventListener("mouseenter", function () {
|
||||
hoverContainer.addEventListener("mouseenter", function () {
|
||||
if (!istouchscreen) {
|
||||
showMenu();
|
||||
}
|
||||
});
|
||||
|
||||
hoverContainer.addEventListener("mousedown", function () {
|
||||
menuInteracting = true;
|
||||
setTimeout(function () { menuInteracting = false; }, 300);
|
||||
});
|
||||
|
||||
button.addEventListener("click", function () {
|
||||
if (menu.style.display === "flex") {
|
||||
hideMenu();
|
||||
|
|
@ -367,24 +369,20 @@ button.addEventListener("click", function () {
|
|||
}
|
||||
});
|
||||
|
||||
// Delay to prevent menu hiding when the mouse leaves the button or menu
|
||||
function delayedHideMenu() {
|
||||
setTimeout(function () {
|
||||
if (!isMouseOverButtonOrMenu()) {
|
||||
hideMenu();
|
||||
}
|
||||
}, 100);
|
||||
}
|
||||
|
||||
// Add event listener for mouseleave on the button
|
||||
button.addEventListener("mouseleave", delayedHideMenu);
|
||||
// Add event listener for mouseleave on the menu
|
||||
menu.addEventListener("mouseleave", delayedHideMenu);
|
||||
hoverContainer.addEventListener("mouseleave", function () {
|
||||
if (!istouchscreen) {
|
||||
setTimeout(function () {
|
||||
if (!hoverContainer.matches(":hover") && !menu.matches(":hover")) {
|
||||
hideMenu();
|
||||
}
|
||||
}, 50);
|
||||
}
|
||||
});
|
||||
|
||||
// Add event listener for click anywhere in the document
|
||||
document.addEventListener("click", function (event) {
|
||||
// Check if the click is outside the button/menu and the menu is visible
|
||||
if (!isMouseOverButtonOrMenu() && menu.style.display === "flex") {
|
||||
if (!menuInteracting && !event.target.closest("#gr-hover-container") && menu.style.display === "flex") {
|
||||
hideMenu();
|
||||
}
|
||||
|
||||
|
|
@ -1006,6 +1004,7 @@ function addMiniDeletes() {
|
|||
|
||||
confirmBtn.onclick = (e) => {
|
||||
e.stopPropagation();
|
||||
label.querySelector("input").click();
|
||||
document.querySelector("#delete_chat-confirm").click();
|
||||
resetButtons();
|
||||
};
|
||||
|
|
|
|||
|
|
@ -95,16 +95,15 @@ def _compute_prompt_logprob_entries(prompt, logprobs_count, input_ids=None):
|
|||
logits = model.get_prompt_logits(input_ids)
|
||||
|
||||
elif hasattr(model, 'forward'):
|
||||
# HF-compatible loaders (Transformers, ExLlamav3_HF, etc.)
|
||||
# HF-compatible loaders (Transformers, etc.). Loaders that need a
|
||||
# custom path (e.g. wrappers that only compute last-token logits in
|
||||
# __call__) should expose get_prompt_logits() above.
|
||||
input_ids_tensor = input_ids if isinstance(input_ids, torch.Tensor) else torch.tensor(input_ids, dtype=torch.long)
|
||||
if hasattr(model, 'device'):
|
||||
input_ids_tensor = input_ids_tensor.to(model.device)
|
||||
with torch.no_grad():
|
||||
# Pass labels to ensure logits are returned for ALL positions,
|
||||
# not just the last token (some HF wrappers like ExLlamav3_HF
|
||||
# only compute the last-token logits when labels are absent).
|
||||
outputs = model(input_ids=input_ids_tensor, labels=input_ids_tensor)
|
||||
logits = outputs.logits # keep on GPU, (1, seq_len, vocab) in model dtype
|
||||
with torch.inference_mode():
|
||||
outputs = model(input_ids=input_ids_tensor)
|
||||
logits = outputs.logits # keep on device, (1, seq_len, vocab) in model dtype
|
||||
del outputs
|
||||
|
||||
else:
|
||||
|
|
@ -117,14 +116,14 @@ def _compute_prompt_logprob_entries(prompt, logprobs_count, input_ids=None):
|
|||
chunk_size = 2048
|
||||
unique_ids = set(int(tid) for tid in token_ids[1:])
|
||||
|
||||
# Process logits in chunks on GPU, only move top-K results to CPU
|
||||
# Process logits in chunks, only move top-K results to CPU
|
||||
all_top_log_probs_list = []
|
||||
all_top_indices_list = []
|
||||
all_actual_lps = []
|
||||
|
||||
for start in range(0, n_tokens - 1, chunk_size):
|
||||
end = min(start + chunk_size, n_tokens - 1)
|
||||
chunk_logits = logits[0, start:end].float() # (chunk, vocab) on GPU
|
||||
chunk_logits = logits[0, start:end].float() # (chunk, vocab) on logits.device
|
||||
chunk_lse = torch.logsumexp(chunk_logits, dim=-1)
|
||||
chunk_top_values, chunk_top_indices = torch.topk(chunk_logits, k=k, dim=-1)
|
||||
chunk_top_log_probs = chunk_top_values - chunk_lse.unsqueeze(-1)
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ from transformers import AutoModel
|
|||
from .errors import ServiceUnavailableError
|
||||
from .utils import debug_msg, float_list_to_base64
|
||||
from modules.logging_colors import logger
|
||||
from modules import shared
|
||||
|
||||
embeddings_params_initialized = False
|
||||
|
||||
|
|
@ -41,7 +42,7 @@ def load_embedding_model(model: str):
|
|||
try:
|
||||
logger.info(f"Try embedding model: {model} on {embeddings_device}")
|
||||
if 'jina-embeddings' in model:
|
||||
embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True) # trust_remote_code is needed to use the encode method
|
||||
embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=shared.args.trust_remote_code)
|
||||
embeddings_model = embeddings_model.to(embeddings_device)
|
||||
else:
|
||||
embeddings_model = SentenceTransformer(model, device=embeddings_device)
|
||||
|
|
|
|||
|
|
@ -4,8 +4,11 @@ OpenAI-compatible image generation using local diffusion models.
|
|||
|
||||
import base64
|
||||
import io
|
||||
import json
|
||||
import time
|
||||
|
||||
from PIL.PngImagePlugin import PngInfo
|
||||
|
||||
from .errors import ServiceUnavailableError
|
||||
from modules import shared
|
||||
|
||||
|
|
@ -15,7 +18,7 @@ def generations(request):
|
|||
Generate images using the loaded diffusion model.
|
||||
Returns dict with 'created' timestamp and 'data' list of images.
|
||||
"""
|
||||
from modules.ui_image_generation import generate
|
||||
from modules.ui_image_generation import build_generation_metadata, generate
|
||||
|
||||
if shared.image_model is None:
|
||||
raise ServiceUnavailableError("No image model loaded. Load a model via the UI first.")
|
||||
|
|
@ -46,10 +49,18 @@ def generations(request):
|
|||
if not images:
|
||||
raise ServiceUnavailableError("Image generation failed or produced no images.")
|
||||
|
||||
# Build response
|
||||
# Build response with per-batch metadata (seed increments per batch)
|
||||
base_seed = state.get('image_seed_resolved', state['image_seed'])
|
||||
batch_size = int(state['image_batch_size'])
|
||||
|
||||
resp = {'created': int(time.time()), 'data': []}
|
||||
for img in images:
|
||||
b64 = _image_to_base64(img)
|
||||
for idx, img in enumerate(images):
|
||||
batch_seed = base_seed + idx // batch_size
|
||||
metadata = build_generation_metadata(state, batch_seed)
|
||||
metadata_json = json.dumps(metadata, ensure_ascii=False)
|
||||
png_info = PngInfo()
|
||||
png_info.add_text("image_gen_settings", metadata_json)
|
||||
b64 = _image_to_base64(img, png_info)
|
||||
|
||||
image_obj = {'revised_prompt': request.prompt}
|
||||
|
||||
|
|
@ -63,7 +74,7 @@ def generations(request):
|
|||
return resp
|
||||
|
||||
|
||||
def _image_to_base64(image) -> str:
|
||||
def _image_to_base64(image, png_info=None) -> str:
|
||||
buffered = io.BytesIO()
|
||||
image.save(buffered, format="PNG")
|
||||
image.save(buffered, format="PNG", pnginfo=png_info)
|
||||
return base64.b64encode(buffered.getvalue()).decode('utf-8')
|
||||
|
|
|
|||
|
|
@ -2,7 +2,7 @@ from modules import loaders, shared
|
|||
from modules.logging_colors import logger
|
||||
from modules.LoRA import add_lora_to_model
|
||||
from modules.models import load_model, unload_model
|
||||
from modules.models_settings import get_model_metadata, update_model_parameters
|
||||
from modules.models_settings import get_model_metadata, load_instruction_template, update_model_parameters
|
||||
from modules.utils import get_available_loras, get_available_models
|
||||
|
||||
|
||||
|
|
@ -42,12 +42,10 @@ def model_info_dict(model_name: str) -> dict:
|
|||
|
||||
def _load_model(data):
|
||||
model_name = data["model_name"]
|
||||
args = data["args"]
|
||||
settings = data["settings"]
|
||||
args = data.get("args")
|
||||
|
||||
unload_model()
|
||||
model_settings = get_model_metadata(model_name)
|
||||
update_model_parameters(model_settings)
|
||||
|
||||
# Update shared.args with custom model loading settings
|
||||
# Security: only allow keys that correspond to model loading
|
||||
|
|
@ -55,6 +53,16 @@ def _load_model(data):
|
|||
# flags like trust_remote_code or extra_flags to be set via the API.
|
||||
blocked_keys = {'extra_flags'}
|
||||
allowed_keys = set(loaders.list_model_elements()) - blocked_keys
|
||||
|
||||
# Reset all loader args to their startup values before applying new ones,
|
||||
# so settings from a previous API load don't leak into this one.
|
||||
# Include blocked keys in the reset (safe: restores startup value, not API-controlled).
|
||||
for k in allowed_keys | blocked_keys:
|
||||
if hasattr(shared.args, k) and hasattr(shared.original_args, k):
|
||||
setattr(shared.args, k, getattr(shared.original_args, k))
|
||||
|
||||
update_model_parameters(model_settings)
|
||||
|
||||
if args:
|
||||
for k in args:
|
||||
if k in allowed_keys and hasattr(shared.args, k):
|
||||
|
|
@ -62,15 +70,12 @@ def _load_model(data):
|
|||
|
||||
shared.model, shared.tokenizer = load_model(model_name)
|
||||
|
||||
# Update shared.settings with custom generation defaults
|
||||
if settings:
|
||||
for k in settings:
|
||||
if k in shared.settings:
|
||||
shared.settings[k] = settings[k]
|
||||
if k == 'truncation_length':
|
||||
logger.info(f"CONTEXT LENGTH (UPDATED): {shared.settings['truncation_length']}")
|
||||
elif k == 'instruction_template':
|
||||
logger.info(f"INSTRUCTION TEMPLATE (UPDATED): {shared.settings['instruction_template']}")
|
||||
if data.get("instruction_template_str") is not None:
|
||||
shared.settings['instruction_template_str'] = data["instruction_template_str"]
|
||||
logger.info("INSTRUCTION TEMPLATE: set to custom Jinja2 string")
|
||||
elif data.get("instruction_template") is not None:
|
||||
shared.settings['instruction_template_str'] = load_instruction_template(data["instruction_template"])
|
||||
logger.info(f"INSTRUCTION TEMPLATE: {data['instruction_template']}")
|
||||
|
||||
|
||||
def list_loras():
|
||||
|
|
|
|||
|
|
@ -475,10 +475,8 @@ async def handle_list_models():
|
|||
@app.post("/v1/internal/model/load", dependencies=check_admin_key)
|
||||
async def handle_load_model(request_data: LoadModelRequest):
|
||||
'''
|
||||
This endpoint is experimental and may change in the future.
|
||||
|
||||
The "args" parameter can be used to modify flags like "--load-in-4bit"
|
||||
or "--n-gpu-layers" before loading a model. Example:
|
||||
The "args" parameter can be used to modify loader flags before loading
|
||||
a model. Example:
|
||||
|
||||
```
|
||||
"args": {
|
||||
|
|
@ -487,18 +485,13 @@ async def handle_load_model(request_data: LoadModelRequest):
|
|||
}
|
||||
```
|
||||
|
||||
Note that those settings will remain after loading the model. So you
|
||||
may need to change them back to load a second model.
|
||||
Loader args are reset to their startup defaults between loads, so
|
||||
settings from a previous load do not leak into the next one.
|
||||
|
||||
The "settings" parameter is also a dict but with keys for the
|
||||
shared.settings object. It can be used to modify the default instruction
|
||||
template like this:
|
||||
|
||||
```
|
||||
"settings": {
|
||||
"instruction_template": "Alpaca"
|
||||
}
|
||||
```
|
||||
The "instruction_template" parameter sets the default instruction
|
||||
template by name (from user_data/instruction-templates/). The
|
||||
"instruction_template_str" parameter sets it as a raw Jinja2 string
|
||||
and takes precedence over "instruction_template".
|
||||
'''
|
||||
|
||||
try:
|
||||
|
|
@ -544,8 +537,8 @@ async def handle_unload_loras():
|
|||
def find_available_port(starting_port):
|
||||
"""Try the starting port, then find an available one if it's taken."""
|
||||
try:
|
||||
# Try to create a socket with the starting port
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
s.bind(('', starting_port))
|
||||
return starting_port
|
||||
except OSError:
|
||||
|
|
@ -570,7 +563,7 @@ def run_server():
|
|||
server_addrs.append(shared.args.listen_host)
|
||||
else:
|
||||
if os.environ.get('OPENEDAI_ENABLE_IPV6', shared.args.api_enable_ipv6):
|
||||
server_addrs.append('[::]' if shared.args.listen else '[::1]')
|
||||
server_addrs.append('::' if shared.args.listen else '::1')
|
||||
if not os.environ.get('OPENEDAI_DISABLE_IPV4', shared.args.api_disable_ipv4):
|
||||
server_addrs.append('0.0.0.0' if shared.args.listen else '127.0.0.1')
|
||||
|
||||
|
|
@ -587,7 +580,7 @@ def run_server():
|
|||
)
|
||||
else:
|
||||
url_proto = 'https://' if (ssl_certfile and ssl_keyfile) else 'http://'
|
||||
urls = [f'{url_proto}{addr}:{port}/v1' for addr in server_addrs]
|
||||
urls = [f'{url_proto}[{addr}]:{port}/v1' if ':' in addr else f'{url_proto}{addr}:{port}/v1' for addr in server_addrs]
|
||||
if len(urls) > 1:
|
||||
logger.info('OpenAI/Anthropic-compatible API URLs:\n\n' + '\n'.join(urls) + '\n')
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -8,7 +8,7 @@ from modules import shared
|
|||
|
||||
|
||||
class GenerationOptions(BaseModel):
|
||||
preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/user_data/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.")
|
||||
preset: str | None = Field(default=None, description="The name of a file under textgen/user_data/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.")
|
||||
dynatemp_low: float = shared.args.dynatemp_low
|
||||
dynatemp_high: float = shared.args.dynatemp_high
|
||||
dynatemp_exponent: float = shared.args.dynatemp_exponent
|
||||
|
|
@ -173,10 +173,10 @@ class ChatCompletionRequestParams(BaseModel):
|
|||
|
||||
mode: str = Field(default='instruct', description="Valid options: instruct, chat, chat-instruct.")
|
||||
|
||||
instruction_template: str | None = Field(default=None, description="An instruction template defined under text-generation-webui/user_data/instruction-templates. If not set, the correct template will be automatically obtained from the model metadata.")
|
||||
instruction_template: str | None = Field(default=None, description="An instruction template defined under textgen/user_data/instruction-templates. If not set, the correct template will be automatically obtained from the model metadata.")
|
||||
instruction_template_str: str | None = Field(default=None, description="A Jinja2 instruction template. If set, will take precedence over everything else.")
|
||||
|
||||
character: str | None = Field(default=None, description="A character defined under text-generation-webui/user_data/characters. If not set, the default \"Assistant\" character will be used.")
|
||||
character: str | None = Field(default=None, description="A character defined under textgen/user_data/characters. If not set, the default \"Assistant\" character will be used.")
|
||||
bot_name: str | None = Field(default=None, description="Overwrites the value set by character field.", alias="name2")
|
||||
context: str | None = Field(default=None, description="Overwrites the value set by character field.")
|
||||
greeting: str | None = Field(default=None, description="Overwrites the value set by character field.")
|
||||
|
|
@ -271,7 +271,8 @@ class ModelListResponse(BaseModel):
|
|||
class LoadModelRequest(BaseModel):
|
||||
model_name: str
|
||||
args: dict | None = None
|
||||
settings: dict | None = None
|
||||
instruction_template: str | None = Field(default=None, description="An instruction template defined under textgen/user_data/instruction-templates. Sets the default template for all subsequent API requests.")
|
||||
instruction_template_str: str | None = Field(default=None, description="A Jinja2 instruction template string. If set, takes precedence over instruction_template.")
|
||||
|
||||
|
||||
class LoraListResponse(BaseModel):
|
||||
|
|
|
|||
180
modules/chat.py
180
modules/chat.py
|
|
@ -210,6 +210,57 @@ def _expand_tool_sequence(tool_seq):
|
|||
return messages
|
||||
|
||||
|
||||
def _convert_to_tool_responses(messages):
|
||||
"""Convert role:'tool' messages to tool_responses format.
|
||||
|
||||
Templates like Gemma 4 expect tool results as a ``tool_responses``
|
||||
attribute on the preceding assistant message rather than separate
|
||||
``role: 'tool'`` messages. This function groups consecutive tool
|
||||
messages and attaches them to the assistant message that issued the
|
||||
tool calls.
|
||||
"""
|
||||
result = []
|
||||
tc_id_to_name = {}
|
||||
|
||||
i = 0
|
||||
while i < len(messages):
|
||||
msg = messages[i]
|
||||
|
||||
if msg.get('tool_calls'):
|
||||
for tc in msg['tool_calls']:
|
||||
tc_id = tc.get('id', '')
|
||||
func_name = tc.get('function', {}).get('name', 'unknown')
|
||||
if tc_id:
|
||||
tc_id_to_name[tc_id] = func_name
|
||||
|
||||
if msg.get('role') == 'tool':
|
||||
tool_responses = []
|
||||
while i < len(messages) and messages[i].get('role') == 'tool':
|
||||
tool_msg = messages[i]
|
||||
tc_id = tool_msg.get('tool_call_id', '')
|
||||
func_name = tc_id_to_name.get(tc_id, 'unknown')
|
||||
|
||||
content = tool_msg.get('content', '')
|
||||
try:
|
||||
response = json.loads(content)
|
||||
except (json.JSONDecodeError, ValueError, TypeError):
|
||||
response = content
|
||||
|
||||
tool_responses.append({
|
||||
'name': func_name,
|
||||
'response': response,
|
||||
})
|
||||
i += 1
|
||||
|
||||
if result and result[-1].get('role') == 'assistant':
|
||||
result[-1]['tool_responses'] = tool_responses
|
||||
else:
|
||||
result.append(msg)
|
||||
i += 1
|
||||
|
||||
return result
|
||||
|
||||
|
||||
def _format_attachments(attachments, include_text=True):
|
||||
"""Build image ref and text attachment strings from a list of attachments."""
|
||||
attachments_text = ""
|
||||
|
|
@ -267,6 +318,9 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||
tools=state['tools'] if 'tools' in state else None,
|
||||
)
|
||||
|
||||
active_template_str = state['instruction_template_str'] if state['mode'] == 'instruct' else chat_template_str
|
||||
uses_tool_responses = 'tool_responses' in active_template_str
|
||||
|
||||
messages = []
|
||||
|
||||
if state['mode'] == 'instruct':
|
||||
|
|
@ -503,6 +557,9 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||
|
||||
return prompt
|
||||
|
||||
if uses_tool_responses:
|
||||
messages = _convert_to_tool_responses(messages)
|
||||
|
||||
prompt = make_prompt(messages)
|
||||
|
||||
# Handle truncation
|
||||
|
|
@ -511,13 +568,24 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||
encoded_length = get_encoded_length(prompt)
|
||||
while len(messages) > 0 and encoded_length > max_length:
|
||||
|
||||
# Remove old message, save system message
|
||||
if len(messages) > 2 and messages[0]['role'] == 'system':
|
||||
messages.pop(1)
|
||||
|
||||
# Remove old message when no system message is present
|
||||
pop_idx = 1
|
||||
elif len(messages) > 1 and messages[0]['role'] != 'system':
|
||||
messages.pop(0)
|
||||
pop_idx = 0
|
||||
else:
|
||||
pop_idx = None
|
||||
|
||||
if pop_idx is not None:
|
||||
messages.pop(pop_idx)
|
||||
|
||||
# Remove orphaned tool-call/tool-result messages that
|
||||
# would be invalid without their partner.
|
||||
while pop_idx < len(messages):
|
||||
msg = messages[pop_idx]
|
||||
if msg.get('role') == 'tool' or (msg.get('role') == 'assistant' and msg.get('tool_calls')):
|
||||
messages.pop(pop_idx)
|
||||
else:
|
||||
break
|
||||
|
||||
# Resort to truncating the user input
|
||||
else:
|
||||
|
|
@ -637,7 +705,7 @@ def get_stopping_strings(state):
|
|||
# Find positions of each message content
|
||||
first_user_end = prompt.find("first user message") + len("first user message")
|
||||
first_assistant_start = prompt.find("first assistant message")
|
||||
first_assistant_end = prompt.find("first assistant message") + len("first assistant message")
|
||||
first_assistant_end = first_assistant_start + len("first assistant message")
|
||||
second_user_start = prompt.find("second user message")
|
||||
second_assistant_end = prompt.find("second assistant message") + len("second assistant message")
|
||||
|
||||
|
|
@ -987,6 +1055,14 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||
|
||||
row_idx = len(output['internal']) - 1
|
||||
|
||||
# Check if the current row has version metadata to sync during streaming
|
||||
_version_meta = output['metadata'].get(f"assistant_{row_idx}")
|
||||
_sync_versions = (
|
||||
_version_meta is not None
|
||||
and 'current_version_index' in _version_meta
|
||||
and not state.get('_tool_turn')
|
||||
)
|
||||
|
||||
# Collect image attachments for multimodal generation from the entire history
|
||||
all_image_attachments = []
|
||||
if 'metadata' in output:
|
||||
|
|
@ -1074,12 +1150,9 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||
output['internal'][-1] = [text, reply.lstrip(' ')]
|
||||
output['visible'][-1] = [visible_text, visible_reply.lstrip(' ')]
|
||||
|
||||
# Keep version metadata in sync during streaming (for regeneration)
|
||||
if regenerate and not state.get('_tool_turn'):
|
||||
row_idx = len(output['internal']) - 1
|
||||
key = f"assistant_{row_idx}"
|
||||
current_idx = output['metadata'][key]['current_version_index']
|
||||
output['metadata'][key]['versions'][current_idx].update({
|
||||
# Keep version metadata in sync during streaming
|
||||
if _sync_versions:
|
||||
_version_meta['versions'][_version_meta['current_version_index']].update({
|
||||
'content': output['internal'][row_idx][1],
|
||||
'visible_content': output['visible'][row_idx][1]
|
||||
})
|
||||
|
|
@ -1113,11 +1186,8 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||
output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
|
||||
|
||||
# Final sync for version metadata (in case streaming was disabled)
|
||||
if regenerate and not state.get('_tool_turn'):
|
||||
row_idx = len(output['internal']) - 1
|
||||
key = f"assistant_{row_idx}"
|
||||
current_idx = output['metadata'][key]['current_version_index']
|
||||
output['metadata'][key]['versions'][current_idx].update({
|
||||
if _sync_versions:
|
||||
_version_meta['versions'][_version_meta['current_version_index']].update({
|
||||
'content': output['internal'][row_idx][1],
|
||||
'visible_content': output['visible'][row_idx][1]
|
||||
})
|
||||
|
|
@ -1126,7 +1196,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||
# visible text from before buffering started so raw markup doesn't flash
|
||||
# in the UI. The internal text is left intact so the caller can still
|
||||
# parse tool calls from it.
|
||||
if is_stream and _check_tool_markers and streaming_tool_buffer_check(output['internal'][-1][1], markers=_streaming_markers, tool_names=_tool_names, check_bare_names=_check_bare_names):
|
||||
if is_stream and _check_tool_markers and streaming_tool_buffer_check(output['internal'][-1][1], markers=_streaming_markers, tool_names=_tool_names, check_bare_names=_check_bare_names, partial_match=False):
|
||||
output['visible'][-1][1] = _last_visible_before_tool_buffer or ''
|
||||
|
||||
yield output
|
||||
|
|
@ -1136,6 +1206,11 @@ def impersonate_wrapper(textbox, state):
|
|||
text = textbox['text']
|
||||
static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
|
||||
|
||||
model_is_loaded, error_message = utils.check_model_loaded()
|
||||
if not model_is_loaded:
|
||||
import gradio as gr
|
||||
raise gr.Error(error_message)
|
||||
|
||||
prompt = generate_chat_prompt('', state, impersonate=True)
|
||||
stopping_strings = get_stopping_strings(state)
|
||||
|
||||
|
|
@ -1184,6 +1259,11 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
|
|||
if not character_is_loaded(state):
|
||||
return
|
||||
|
||||
model_is_loaded, error_message = utils.check_model_loaded()
|
||||
if not model_is_loaded:
|
||||
import gradio as gr
|
||||
raise gr.Error(error_message)
|
||||
|
||||
if state['start_with'] != '' and not _continue:
|
||||
if regenerate:
|
||||
text, state['history'] = remove_last_message(state['history'])
|
||||
|
|
@ -1207,14 +1287,23 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
|
|||
|
||||
# Load tools if any are selected
|
||||
selected = state.get('selected_tools', [])
|
||||
mcp_servers = state.get('mcp_servers', '')
|
||||
parse_tool_call = None
|
||||
_tool_parsers = None
|
||||
if selected:
|
||||
from modules.tool_use import load_tools, execute_tool
|
||||
if selected or mcp_servers:
|
||||
from modules.tool_use import load_tools, load_mcp_tools, execute_tool
|
||||
from modules.tool_parsing import parse_tool_call, get_tool_call_id, detect_tool_call_format
|
||||
|
||||
if selected:
|
||||
tool_defs, tool_executors = load_tools(selected)
|
||||
if mcp_servers:
|
||||
mcp_defs, mcp_executors = load_mcp_tools(mcp_servers)
|
||||
for td in mcp_defs:
|
||||
fn = td['function']['name']
|
||||
if fn in tool_executors:
|
||||
logger.warning(f'MCP tool "{fn}" conflicts with a local tool. Skipping.')
|
||||
continue
|
||||
tool_defs.append(td)
|
||||
tool_executors[fn] = mcp_executors[fn]
|
||||
state['tools'] = tool_defs
|
||||
tool_func_names = [t['function']['name'] for t in tool_defs]
|
||||
_template_str = state.get('instruction_template_str', '') if state.get('mode') == 'instruct' else state.get('chat_template_str', '')
|
||||
|
|
@ -1762,7 +1851,8 @@ def load_history(unique_id, character, mode):
|
|||
if not p.exists():
|
||||
return {'internal': [], 'visible': [], 'metadata': {}}
|
||||
|
||||
f = json.loads(open(p, 'rb').read())
|
||||
with open(p, 'rb') as fh:
|
||||
f = json.loads(fh.read())
|
||||
if 'internal' in f and 'visible' in f:
|
||||
history = f
|
||||
else:
|
||||
|
|
@ -1826,19 +1916,17 @@ def generate_pfp_cache(character):
|
|||
if not cache_folder.exists():
|
||||
cache_folder.mkdir()
|
||||
|
||||
for path in [shared.user_data_dir / 'characters' / f"{character}.{extension}" for extension in ['png', 'jpg', 'jpeg']]:
|
||||
for extension in ['png', 'jpg', 'jpeg']:
|
||||
path = shared.user_data_dir / 'characters' / f"{character}.{extension}"
|
||||
if path.exists():
|
||||
original_img = Image.open(path)
|
||||
# Define file paths
|
||||
pfp_path = Path(f'{cache_folder}/pfp_character.png')
|
||||
thumb_path = Path(f'{cache_folder}/pfp_character_thumb.png')
|
||||
pfp_path = cache_folder / 'pfp_character.png'
|
||||
thumb_path = cache_folder / 'pfp_character_thumb.png'
|
||||
|
||||
# Save main picture and thumbnail
|
||||
original_img.save(pfp_path, format='PNG')
|
||||
thumb = make_thumbnail(original_img)
|
||||
thumb.save(thumb_path, format='PNG')
|
||||
|
||||
# Return the path to the thumbnail, not the in-memory PIL Image object.
|
||||
return str(thumb_path)
|
||||
|
||||
return None
|
||||
|
|
@ -1859,13 +1947,13 @@ def load_character(character, name1, name2):
|
|||
logger.error(f"Could not find the character \"{character}\" inside {shared.user_data_dir}/characters. No character has been loaded.")
|
||||
raise ValueError
|
||||
|
||||
file_contents = open(filepath, 'r', encoding='utf-8').read()
|
||||
with open(filepath, 'r', encoding='utf-8') as fh:
|
||||
file_contents = fh.read()
|
||||
data = json.loads(file_contents) if extension == "json" else yaml.safe_load(file_contents)
|
||||
cache_folder = Path(shared.args.disk_cache_dir)
|
||||
|
||||
for path in [Path(f"{cache_folder}/pfp_character.png"), Path(f"{cache_folder}/pfp_character_thumb.png")]:
|
||||
if path.exists():
|
||||
path.unlink()
|
||||
for path in [cache_folder / "pfp_character.png", cache_folder / "pfp_character_thumb.png"]:
|
||||
path.unlink(missing_ok=True)
|
||||
|
||||
picture = generate_pfp_cache(character)
|
||||
|
||||
|
|
@ -1921,9 +2009,7 @@ def clear_character_for_ui(state):
|
|||
# Clear the cache files
|
||||
cache_folder = Path(shared.args.disk_cache_dir)
|
||||
for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
|
||||
cache_path = Path(f'{cache_folder}/{cache_file}')
|
||||
if cache_path.exists():
|
||||
cache_path.unlink()
|
||||
(cache_folder / cache_file).unlink(missing_ok=True)
|
||||
|
||||
return state, state['name2'], state['context'], state['greeting'], None
|
||||
|
||||
|
|
@ -2018,11 +2104,10 @@ def upload_your_profile_picture(img_path):
|
|||
cache_folder.mkdir()
|
||||
|
||||
if img is None:
|
||||
if Path(f"{cache_folder}/pfp_me.png").exists():
|
||||
Path(f"{cache_folder}/pfp_me.png").unlink()
|
||||
(cache_folder / "pfp_me.png").unlink(missing_ok=True)
|
||||
else:
|
||||
img = make_thumbnail(img)
|
||||
img.save(Path(f'{cache_folder}/pfp_me.png'))
|
||||
img.save(cache_folder / 'pfp_me.png')
|
||||
logger.info(f'Profile picture saved to "{cache_folder}/pfp_me.png"')
|
||||
|
||||
|
||||
|
|
@ -2078,13 +2163,12 @@ def generate_user_pfp_cache(user):
|
|||
if not cache_folder.exists():
|
||||
cache_folder.mkdir()
|
||||
|
||||
for path in [shared.user_data_dir / 'users' / f"{user}.{extension}" for extension in ['png', 'jpg', 'jpeg']]:
|
||||
for extension in ['png', 'jpg', 'jpeg']:
|
||||
path = shared.user_data_dir / 'users' / f"{user}.{extension}"
|
||||
if path.exists():
|
||||
original_img = Image.open(path)
|
||||
# Define file paths
|
||||
pfp_path = Path(f'{cache_folder}/pfp_me.png')
|
||||
pfp_path = cache_folder / 'pfp_me.png'
|
||||
|
||||
# Save thumbnail
|
||||
thumb = make_thumbnail(original_img)
|
||||
thumb.save(pfp_path, format='PNG')
|
||||
logger.info(f'User profile picture cached to "{pfp_path}"')
|
||||
|
|
@ -2116,9 +2200,7 @@ def load_user(user_name, name1, user_bio):
|
|||
|
||||
# Clear existing user picture cache
|
||||
cache_folder = Path(shared.args.disk_cache_dir)
|
||||
pfp_path = Path(f"{cache_folder}/pfp_me.png")
|
||||
if pfp_path.exists():
|
||||
pfp_path.unlink()
|
||||
(cache_folder / "pfp_me.png").unlink(missing_ok=True)
|
||||
|
||||
# Generate new picture cache
|
||||
picture = generate_user_pfp_cache(user_name)
|
||||
|
|
@ -2542,15 +2624,13 @@ def handle_character_picture_change(picture_path):
|
|||
|
||||
if picture is not None:
|
||||
# Save to cache
|
||||
picture.save(Path(f'{cache_folder}/pfp_character.png'), format='PNG')
|
||||
picture.save(cache_folder / 'pfp_character.png', format='PNG')
|
||||
thumb = make_thumbnail(picture)
|
||||
thumb.save(Path(f'{cache_folder}/pfp_character_thumb.png'), format='PNG')
|
||||
thumb.save(cache_folder / 'pfp_character_thumb.png', format='PNG')
|
||||
else:
|
||||
# Remove cache files when picture is cleared
|
||||
for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
|
||||
cache_path = Path(f'{cache_folder}/{cache_file}')
|
||||
if cache_path.exists():
|
||||
cache_path.unlink()
|
||||
(cache_folder / cache_file).unlink(missing_ok=True)
|
||||
|
||||
|
||||
def handle_mode_change(state):
|
||||
|
|
|
|||
|
|
@ -535,11 +535,14 @@ class Exllamav3Model:
|
|||
import torch
|
||||
input_ids_tensor = input_ids if isinstance(input_ids, torch.Tensor) else torch.tensor(input_ids, dtype=torch.long)
|
||||
input_ids_tensor = input_ids_tensor.view(1, -1).cpu()
|
||||
with torch.no_grad():
|
||||
return self.model.forward(
|
||||
with torch.inference_mode():
|
||||
output = self.model.forward(
|
||||
input_ids=input_ids_tensor,
|
||||
params={"attn_mode": "flash_attn_nc"}
|
||||
).cpu().float()
|
||||
# Mask padding slots beyond the real vocab so they can't appear in top-k
|
||||
output[..., self.model.config.vocab_size:] = float("-inf")
|
||||
return output
|
||||
|
||||
def get_logits(self, token_ids, **kwargs):
|
||||
"""
|
||||
|
|
@ -555,6 +558,8 @@ class Exllamav3Model:
|
|||
|
||||
def encode(self, string, **kwargs):
|
||||
add_bos = kwargs.pop('add_bos', True)
|
||||
if add_bos and self.tokenizer.bos_token and string.startswith(self.tokenizer.bos_token):
|
||||
add_bos = False
|
||||
return self.tokenizer.encode(string, add_bos=add_bos, **kwargs)
|
||||
|
||||
def decode(self, ids, **kwargs):
|
||||
|
|
|
|||
|
|
@ -98,6 +98,22 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
|
|||
def _validate_model_kwargs(self, model_kwargs: Dict[str, Any]):
|
||||
pass
|
||||
|
||||
def get_prompt_logits(self, input_ids):
|
||||
"""Return logits for all positions via a single no-cache forward pass.
|
||||
|
||||
Used by prompt logprobs computation. Returns (1, seq_len, vocab) on CPU in float32.
|
||||
"""
|
||||
input_ids_tensor = input_ids if isinstance(input_ids, torch.Tensor) else torch.tensor(input_ids, dtype=torch.long)
|
||||
input_ids_tensor = input_ids_tensor.view(1, -1).cpu()
|
||||
with torch.inference_mode():
|
||||
output = self.ex_model.forward(
|
||||
input_ids=input_ids_tensor,
|
||||
params={"attn_mode": "flash_attn_nc"}
|
||||
).cpu().float()
|
||||
# Mask padding slots beyond the real vocab so they can't appear in top-k
|
||||
output[..., self.ex_model.config.vocab_size:] = float("-inf")
|
||||
return output
|
||||
|
||||
def prepare_inputs_for_generation(self, input_ids, **kwargs):
|
||||
return {'input_ids': input_ids, **kwargs}
|
||||
|
||||
|
|
|
|||
|
|
@ -14,6 +14,13 @@ from modules.reasoning import extract_reasoning
|
|||
from modules.sane_markdown_lists import SaneListExtension
|
||||
from modules.utils import get_available_chat_styles
|
||||
|
||||
# Pre-compiled regex for protecting markdown-sensitive characters inside LaTeX.
|
||||
# Covers $$...$$, \[...\], \(...\), and inline $...$ (when content contains \\).
|
||||
_LATEX_PATTERN = re.compile(
|
||||
r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)|(?<!\$)\$(?!\$)([^\$\n]*\\\\[^\$\n]*?)\$(?!\$)',
|
||||
re.DOTALL
|
||||
)
|
||||
|
||||
# This is to store the paths to the thumbnails of the profile pictures
|
||||
image_cache = {}
|
||||
|
||||
|
|
@ -169,14 +176,6 @@ def build_thinking_block(thinking_content, message_id, has_remaining_content, th
|
|||
'''
|
||||
|
||||
|
||||
def build_main_content_block(content):
|
||||
"""Build HTML for the main content block."""
|
||||
if not content:
|
||||
return ""
|
||||
|
||||
return process_markdown_content(content)
|
||||
|
||||
|
||||
def process_markdown_content(string):
|
||||
"""
|
||||
Process a string through the markdown conversion pipeline.
|
||||
|
|
@ -185,28 +184,29 @@ def process_markdown_content(string):
|
|||
if not string:
|
||||
return ""
|
||||
|
||||
# Define unique placeholders for LaTeX asterisks and underscores
|
||||
# Define unique placeholders for LaTeX characters that conflict with markdown
|
||||
LATEX_ASTERISK_PLACEHOLDER = "LATEXASTERISKPLACEHOLDER"
|
||||
LATEX_UNDERSCORE_PLACEHOLDER = "LATEXUNDERSCOREPLACEHOLDER"
|
||||
LATEX_PIPE_PLACEHOLDER = "LATEXPIPEPLACEHOLDER"
|
||||
|
||||
def protect_latex_content(content):
|
||||
"""Protect markdown-sensitive characters inside LaTeX."""
|
||||
content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
|
||||
content = content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
|
||||
content = content.replace('|', LATEX_PIPE_PLACEHOLDER)
|
||||
return content
|
||||
|
||||
def protect_asterisks_underscores_in_latex(match):
|
||||
"""A replacer function for re.sub to protect asterisks and underscores in multiple LaTeX formats."""
|
||||
"""A replacer function for re.sub to protect markdown-sensitive characters in multiple LaTeX formats."""
|
||||
# Check which delimiter group was captured
|
||||
if match.group(1) is not None: # Content from $$...$$
|
||||
content = match.group(1)
|
||||
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
|
||||
modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
|
||||
return f'{modified_content}'
|
||||
return protect_latex_content(match.group(1))
|
||||
elif match.group(2) is not None: # Content from \[...\]
|
||||
content = match.group(2)
|
||||
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
|
||||
modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
|
||||
return f'\\[{modified_content}\\]'
|
||||
return f'\\[{protect_latex_content(match.group(2))}\\]'
|
||||
elif match.group(3) is not None: # Content from \(...\)
|
||||
content = match.group(3)
|
||||
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
|
||||
modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
|
||||
return f'\\({modified_content}\\)'
|
||||
return f'\\({protect_latex_content(match.group(3))}\\)'
|
||||
elif match.group(4) is not None: # Content from $...$
|
||||
return f'${protect_latex_content(match.group(4).strip())}$'
|
||||
|
||||
return match.group(0) # Fallback
|
||||
|
||||
|
|
@ -240,9 +240,7 @@ def process_markdown_content(string):
|
|||
string = re.sub(r"(.)```", r"\1\n```", string)
|
||||
|
||||
# Protect asterisks and underscores within all LaTeX blocks before markdown conversion
|
||||
latex_pattern = re.compile(r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)',
|
||||
re.DOTALL)
|
||||
string = latex_pattern.sub(protect_asterisks_underscores_in_latex, string)
|
||||
string = _LATEX_PATTERN.sub(protect_asterisks_underscores_in_latex, string)
|
||||
|
||||
result = ''
|
||||
is_code = False
|
||||
|
|
@ -306,6 +304,7 @@ def process_markdown_content(string):
|
|||
# Restore the LaTeX asterisks and underscores after markdown conversion
|
||||
html_output = html_output.replace(LATEX_ASTERISK_PLACEHOLDER, '*')
|
||||
html_output = html_output.replace(LATEX_UNDERSCORE_PLACEHOLDER, '_')
|
||||
html_output = html_output.replace(LATEX_PIPE_PLACEHOLDER, '|')
|
||||
|
||||
# Remove extra newlines before </code>
|
||||
html_output = re.sub(r'\s*</code>', '</code>', html_output)
|
||||
|
|
@ -342,20 +341,6 @@ def convert_to_markdown(string, message_id=None):
|
|||
tool_call_pattern = re.compile(r'<tool_call>(.*?)\n(.*?)\n</tool_call>', re.DOTALL)
|
||||
tool_calls = list(tool_call_pattern.finditer(string))
|
||||
|
||||
if not tool_calls:
|
||||
# No tool calls — use original single-pass extraction
|
||||
thinking_content, remaining_content = extract_thinking_block(string)
|
||||
blocks = []
|
||||
thinking_html = build_thinking_block(thinking_content, message_id, bool(remaining_content))
|
||||
if thinking_html:
|
||||
blocks.append(thinking_html)
|
||||
|
||||
main_html = build_main_content_block(remaining_content)
|
||||
if main_html:
|
||||
blocks.append(main_html)
|
||||
|
||||
return ''.join(blocks)
|
||||
|
||||
# Split string into text segments around tool_call blocks and
|
||||
# run extract_thinking_block on each segment for full format support.
|
||||
html_parts = []
|
||||
|
|
|
|||
|
|
@ -10,72 +10,49 @@ def get_quantization_config(quant_method):
|
|||
Get the appropriate quantization config based on the selected method.
|
||||
Applies quantization to both the transformer and the text_encoder.
|
||||
"""
|
||||
if quant_method == 'none' or not quant_method:
|
||||
return None
|
||||
|
||||
import torch
|
||||
# Import BitsAndBytesConfig from BOTH libraries to be safe
|
||||
from diffusers import BitsAndBytesConfig as DiffusersBnBConfig
|
||||
from diffusers import TorchAoConfig
|
||||
from diffusers.quantizers import PipelineQuantizationConfig
|
||||
from transformers import BitsAndBytesConfig as TransformersBnBConfig
|
||||
|
||||
if quant_method == 'none' or not quant_method:
|
||||
return None
|
||||
torchao_methods = {
|
||||
'torchao-int8wo': 'int8wo',
|
||||
'torchao-fp4': 'fp4_e2m1',
|
||||
'torchao-float8wo': 'float8wo',
|
||||
}
|
||||
|
||||
# Bitsandbytes 8-bit quantization
|
||||
elif quant_method == 'bnb-8bit':
|
||||
if quant_method == 'bnb-8bit':
|
||||
return PipelineQuantizationConfig(
|
||||
quant_mapping={
|
||||
"transformer": DiffusersBnBConfig(
|
||||
load_in_8bit=True
|
||||
),
|
||||
"text_encoder": TransformersBnBConfig(
|
||||
load_in_8bit=True
|
||||
)
|
||||
"transformer": DiffusersBnBConfig(load_in_8bit=True),
|
||||
"text_encoder": TransformersBnBConfig(load_in_8bit=True)
|
||||
}
|
||||
)
|
||||
|
||||
# Bitsandbytes 4-bit quantization
|
||||
elif quant_method == 'bnb-4bit':
|
||||
bnb_4bit_kwargs = dict(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
bnb_4bit_use_double_quant=True
|
||||
)
|
||||
return PipelineQuantizationConfig(
|
||||
quant_mapping={
|
||||
"transformer": DiffusersBnBConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
bnb_4bit_use_double_quant=True
|
||||
),
|
||||
"text_encoder": TransformersBnBConfig(
|
||||
load_in_4bit=True,
|
||||
bnb_4bit_quant_type="nf4",
|
||||
bnb_4bit_compute_dtype=torch.bfloat16,
|
||||
bnb_4bit_use_double_quant=True
|
||||
)
|
||||
"transformer": DiffusersBnBConfig(**bnb_4bit_kwargs),
|
||||
"text_encoder": TransformersBnBConfig(**bnb_4bit_kwargs)
|
||||
}
|
||||
)
|
||||
|
||||
# torchao int8 weight-only
|
||||
elif quant_method == 'torchao-int8wo':
|
||||
elif quant_method in torchao_methods:
|
||||
ao_type = torchao_methods[quant_method]
|
||||
return PipelineQuantizationConfig(
|
||||
quant_mapping={
|
||||
"transformer": TorchAoConfig("int8wo"),
|
||||
"text_encoder": TorchAoConfig("int8wo")
|
||||
}
|
||||
)
|
||||
|
||||
# torchao fp4 (e2m1)
|
||||
elif quant_method == 'torchao-fp4':
|
||||
return PipelineQuantizationConfig(
|
||||
quant_mapping={
|
||||
"transformer": TorchAoConfig("fp4_e2m1"),
|
||||
"text_encoder": TorchAoConfig("fp4_e2m1")
|
||||
}
|
||||
)
|
||||
|
||||
# torchao float8 weight-only
|
||||
elif quant_method == 'torchao-float8wo':
|
||||
return PipelineQuantizationConfig(
|
||||
quant_mapping={
|
||||
"transformer": TorchAoConfig("float8wo"),
|
||||
"text_encoder": TorchAoConfig("float8wo")
|
||||
"transformer": TorchAoConfig(ao_type),
|
||||
"text_encoder": TorchAoConfig(ao_type)
|
||||
}
|
||||
)
|
||||
|
||||
|
|
@ -152,7 +129,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
|
|||
|
||||
modules = ["transformer", "unet"]
|
||||
|
||||
# Set attention backend
|
||||
# Set attention backend (diffusers defaults to native/SDPA)
|
||||
if attn_backend == 'flash_attention_2':
|
||||
for name in modules:
|
||||
mod = getattr(pipe, name, None)
|
||||
|
|
|
|||
|
|
@ -296,19 +296,26 @@ class LlamaServer:
|
|||
pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(printable_payload)
|
||||
print()
|
||||
|
||||
for retry in range(5):
|
||||
response = self.session.post(url, json=payload)
|
||||
result = response.json()
|
||||
def _try_fetch_logits():
|
||||
for retry in range(5):
|
||||
response = self.session.post(url, json=payload)
|
||||
result = response.json()
|
||||
|
||||
if "completion_probabilities" in result:
|
||||
if use_samplers:
|
||||
return result["completion_probabilities"][0]["top_probs"]
|
||||
else:
|
||||
return result["completion_probabilities"][0]["top_logprobs"]
|
||||
if "completion_probabilities" in result:
|
||||
if use_samplers:
|
||||
return result["completion_probabilities"][0]["top_probs"]
|
||||
else:
|
||||
return result["completion_probabilities"][0]["top_logprobs"]
|
||||
|
||||
time.sleep(0.05)
|
||||
else:
|
||||
raise Exception(f"Unexpected response format: 'completion_probabilities' not found in {result}")
|
||||
time.sleep(0.05)
|
||||
else:
|
||||
raise Exception(f"Unexpected response format: 'completion_probabilities' not found in {result}")
|
||||
|
||||
result = _try_fetch_logits()
|
||||
for entry in result:
|
||||
if not entry.get('token'):
|
||||
entry['token'] = self.decode([entry['id']])
|
||||
return result
|
||||
|
||||
def get_prompt_logprob_entries(self, token_ids, n_probs=5, prompt=""):
|
||||
"""Get logprob entries for prompt tokens via a single n_predict=0 request.
|
||||
|
|
@ -373,6 +380,7 @@ class LlamaServer:
|
|||
"""Check if a port is available for use."""
|
||||
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
|
||||
try:
|
||||
s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
|
||||
s.bind(('', port))
|
||||
return True
|
||||
except OSError:
|
||||
|
|
@ -667,6 +675,7 @@ def _patch_cmd_for_ik(cmd):
|
|||
--fit-target → --fit-margin
|
||||
--cache-reuse → (removed, unsupported)
|
||||
--swa-full → (removed, unsupported)
|
||||
--split-mode row → --split-mode graph
|
||||
"""
|
||||
# Add Hadamard KV cache rotation when using quantized cache types.
|
||||
# This significantly improves quantized cache quality (especially q4_0)
|
||||
|
|
@ -694,6 +703,9 @@ def _patch_cmd_for_ik(cmd):
|
|||
patched.append("--fit-margin")
|
||||
elif arg == "--cache-reuse":
|
||||
i += 1 # skip the value
|
||||
elif arg == "--split-mode" and i + 1 < len(cmd) and cmd[i + 1] == "row":
|
||||
patched += ["--split-mode", "graph"]
|
||||
i += 1 # skip the value
|
||||
elif arg == "--swa-full":
|
||||
pass # bare flag, just drop it
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
import logging
|
||||
|
||||
logger = logging.getLogger('text-generation-webui')
|
||||
logger = logging.getLogger('textgen')
|
||||
|
||||
|
||||
def setup_logging():
|
||||
|
|
|
|||
|
|
@ -36,15 +36,22 @@ def _get_next_logits(prompt, state, use_samplers, previous, top_logits=25, retur
|
|||
return error_message, previous
|
||||
|
||||
# llama.cpp case
|
||||
def _escaped(token):
|
||||
chars = []
|
||||
for a in token:
|
||||
# C0 and DEL and C1
|
||||
if ord(a) <= 0x1F or 0x7F <= ord(a) <= 0x9F:
|
||||
chars.append(repr(a)[1:-1])
|
||||
else:
|
||||
chars.append(a)
|
||||
return ''.join(chars)
|
||||
if shared.model.__class__.__name__ == 'LlamaServer':
|
||||
logprobs = shared.model.get_logits(prompt, state, n_probs=top_logits, use_samplers=use_samplers)
|
||||
|
||||
if return_dict:
|
||||
output = {}
|
||||
for entry in logprobs:
|
||||
token = repr(entry['token'])
|
||||
if len(token) > 2 and token.startswith("'") and token.endswith("'"):
|
||||
token = token[1:-1]
|
||||
token = _escaped(entry['token'])
|
||||
|
||||
prob = entry['prob'] if use_samplers else np.exp(entry['logprob'])
|
||||
output[token] = prob
|
||||
|
|
@ -52,12 +59,11 @@ def _get_next_logits(prompt, state, use_samplers, previous, top_logits=25, retur
|
|||
else:
|
||||
output = ''
|
||||
for entry in logprobs:
|
||||
token = repr(entry['token'])
|
||||
if len(token) > 2 and token.startswith("'") and token.endswith("'"):
|
||||
token = token[1:-1]
|
||||
token = _escaped(entry['token'])
|
||||
token_id = entry['id']
|
||||
|
||||
prob = entry['prob'] if use_samplers else np.exp(entry['logprob'])
|
||||
output += f"{prob:.5f} - {token}\n"
|
||||
output += f"{prob:.5f} - [{token}] ({token_id})\n"
|
||||
return output, previous
|
||||
|
||||
# All other model types
|
||||
|
|
|
|||
|
|
@ -42,6 +42,10 @@ def get_model_metadata(model):
|
|||
hf_quant_method=quant_method
|
||||
)
|
||||
|
||||
# Default bos/eos tokens (may be overridden by GGUF metadata or tokenizer_config.json)
|
||||
shared.bos_token = '<s>'
|
||||
shared.eos_token = '</s>'
|
||||
|
||||
# GGUF metadata
|
||||
if model_settings['loader'] == 'llama.cpp':
|
||||
path = model_path
|
||||
|
|
@ -133,6 +137,14 @@ def get_model_metadata(model):
|
|||
with open(path, 'r', encoding='utf-8') as f:
|
||||
metadata = json.loads(f.read())
|
||||
|
||||
for k in ['eos_token', 'bos_token']:
|
||||
if k in metadata:
|
||||
value = metadata[k]
|
||||
if isinstance(value, dict):
|
||||
value = value['content']
|
||||
|
||||
setattr(shared, k, value)
|
||||
|
||||
# Only read from metadata if we haven't already loaded from .jinja or .json
|
||||
if template is None and 'chat_template' in metadata:
|
||||
template = metadata['chat_template']
|
||||
|
|
@ -141,17 +153,6 @@ def get_model_metadata(model):
|
|||
|
||||
# 4. If a template was found from any source, process it
|
||||
if template:
|
||||
shared.bos_token = '<s>'
|
||||
shared.eos_token = '</s>'
|
||||
|
||||
for k in ['eos_token', 'bos_token']:
|
||||
if k in metadata:
|
||||
value = metadata[k]
|
||||
if isinstance(value, dict):
|
||||
value = value['content']
|
||||
|
||||
setattr(shared, k, value)
|
||||
|
||||
template = re.sub(r"\{\{-?\s*raise_exception\(.*?\)\s*-?\}\}", "", template, flags=re.DOTALL)
|
||||
template = re.sub(r'raise_exception\([^)]*\)', "''", template)
|
||||
model_settings['instruction_template'] = 'Custom (obtained from model metadata)'
|
||||
|
|
@ -400,14 +401,19 @@ def load_instruction_template(template):
|
|||
if template == 'None':
|
||||
return ''
|
||||
|
||||
for filepath in [shared.user_data_dir / 'instruction-templates' / f'{template}.yaml', shared.user_data_dir / 'instruction-templates' / 'Alpaca.yaml']:
|
||||
if filepath.exists():
|
||||
break
|
||||
for name in (template, 'Alpaca'):
|
||||
path = shared.user_data_dir / 'instruction-templates' / f'{name}.yaml'
|
||||
try:
|
||||
with open(path, 'r', encoding='utf-8') as f:
|
||||
file_contents = f.read()
|
||||
except FileNotFoundError:
|
||||
if name == template:
|
||||
logger.warning(f"Instruction template '{template}' not found, falling back to Alpaca")
|
||||
continue
|
||||
|
||||
break
|
||||
else:
|
||||
return ''
|
||||
|
||||
with open(filepath, 'r', encoding='utf-8') as f:
|
||||
file_contents = f.read()
|
||||
data = yaml.safe_load(file_contents)
|
||||
if 'instruction_template' in data:
|
||||
return data['instruction_template']
|
||||
|
|
|
|||
|
|
@ -1,6 +1,7 @@
|
|||
from pathlib import Path
|
||||
|
||||
from modules import shared, utils
|
||||
from modules.utils import sanitize_filename
|
||||
from modules.text_generation import get_encoded_length
|
||||
|
||||
|
||||
|
|
@ -18,6 +19,7 @@ def load_prompt(fname):
|
|||
|
||||
return initial_content
|
||||
|
||||
fname = sanitize_filename(fname)
|
||||
file_path = shared.user_data_dir / 'logs' / 'notebook' / f'{fname}.txt'
|
||||
if file_path.exists():
|
||||
with open(file_path, 'r', encoding='utf-8') as f:
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ THINKING_FORMATS = [
|
|||
('<|channel|>analysis<|message|>', '<|end|>', '<|channel|>final<|message|>'),
|
||||
('<|channel|>commentary<|message|>', '<|end|>', '<|channel|>final<|message|>'),
|
||||
('<seed:think>', '</seed:think>', None),
|
||||
('<|channel>thought', '<channel|>', None), # Gemma 4
|
||||
('<|think|>', '<|end|>', '<|content|>'), # Solar Open
|
||||
# ('Thinking Process:', '</think>', None), # Qwen3.5 verbose thinking outside tags -- removed: too prone to false positives in streaming
|
||||
(None, '</think>', None), # End-only variant (e.g., Qwen3-next)
|
||||
|
|
@ -72,9 +73,16 @@ def extract_reasoning(text, html_escaped=False):
|
|||
if content_pos != -1:
|
||||
content_start = content_pos + len(content_esc)
|
||||
else:
|
||||
# Content tag not present — fall back to content after
|
||||
# end_tag (e.g. GPT-OSS tool calls skip the final channel).
|
||||
content_start = end_pos + len(end_esc)
|
||||
# Content tag not present yet. In GPT-OSS the region
|
||||
# between <|end|> and the content tag contains internal
|
||||
# markup (<|start|>assistant…) that must not be shown.
|
||||
# Suppress it to prevent tag leaks during streaming.
|
||||
remainder = text[end_pos + len(end_esc):].lstrip()
|
||||
framing_token = esc('<|start|>')
|
||||
if not remainder or remainder.startswith(framing_token) or framing_token.startswith(remainder):
|
||||
content_start = len(text)
|
||||
else:
|
||||
content_start = end_pos + len(end_esc)
|
||||
else:
|
||||
content_start = end_pos + len(end_esc)
|
||||
|
||||
|
|
|
|||
|
|
@ -42,7 +42,7 @@ persistent_interface_state = {}
|
|||
need_restart = False
|
||||
|
||||
# Parser copied from https://github.com/vladmandic/automatic
|
||||
parser = argparse.ArgumentParser(description="Text Generation Web UI", conflict_handler='resolve', add_help=True, formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=55, indent_increment=2, width=200))
|
||||
parser = argparse.ArgumentParser(description="TextGen", conflict_handler='resolve', add_help=True, formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=55, indent_increment=2, width=200))
|
||||
|
||||
# Basic settings
|
||||
group = parser.add_argument_group('Basic settings')
|
||||
|
|
@ -259,6 +259,7 @@ settings = {
|
|||
'enable_web_search': False,
|
||||
'web_search_pages': 3,
|
||||
'selected_tools': [],
|
||||
'mcp_servers': '',
|
||||
'prompt-notebook': '',
|
||||
'preset': 'Top-P' if (user_data_dir / 'presets/Top-P.yaml').exists() else None,
|
||||
'max_new_tokens': 512,
|
||||
|
|
@ -363,7 +364,7 @@ settings = {
|
|||
'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.',
|
||||
'image_model_menu': 'None',
|
||||
'image_dtype': 'bfloat16',
|
||||
'image_attn_backend': 'flash_attention_2',
|
||||
'image_attn_backend': 'sdpa',
|
||||
'image_cpu_offload': False,
|
||||
'image_compile': False,
|
||||
'image_quant': 'none',
|
||||
|
|
|
|||
|
|
@ -216,6 +216,11 @@ def generate_reply_wrapper(question, state, stopping_strings=None):
|
|||
"""
|
||||
Returns formatted outputs for the UI
|
||||
"""
|
||||
model_is_loaded, error_message = check_model_loaded()
|
||||
if not model_is_loaded:
|
||||
import gradio as gr
|
||||
raise gr.Error(error_message)
|
||||
|
||||
reply = question if not shared.is_seq2seq else ''
|
||||
yield formatted_outputs(reply, shared.model_name)
|
||||
|
||||
|
|
|
|||
|
|
@ -27,10 +27,11 @@ TOOL_CALL_OPENING_MARKERS = [
|
|||
'[TOOL_CALLS]',
|
||||
'to=functions.',
|
||||
'<|channel|>commentary',
|
||||
'<|tool_call>call:',
|
||||
]
|
||||
|
||||
|
||||
def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_names=False):
|
||||
def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_names=False, partial_match=True):
|
||||
'''
|
||||
Check whether streaming output should be withheld because it may
|
||||
contain tool-call markup.
|
||||
|
|
@ -42,6 +43,10 @@ def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_
|
|||
tool_names: List of tool function names.
|
||||
check_bare_names: Whether to do partial-prefix matching on tool
|
||||
names (for models with unknown template format).
|
||||
partial_match: Whether to check partial prefixes of markers/names.
|
||||
Set to False for end-of-generation checks where a
|
||||
partial prefix is just normal text, not an incomplete
|
||||
tool call.
|
||||
'''
|
||||
# Strip thinking blocks so tool-call syntax inside <think> doesn't
|
||||
# trigger false positives.
|
||||
|
|
@ -59,6 +64,9 @@ def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_
|
|||
if name + '{' in text or name + ' {' in text:
|
||||
return True
|
||||
|
||||
if not partial_match:
|
||||
return False
|
||||
|
||||
# Partial-prefix matching: only for template-specific markers.
|
||||
for marker in (markers if markers is not None else TOOL_CALL_OPENING_MARKERS):
|
||||
for prefix_len in range(min(len(marker) - 1, len(text)), 0, -1):
|
||||
|
|
@ -400,6 +408,82 @@ def _parse_glm_tool_calls(answer: str, tool_names: list[str]):
|
|||
return matches, start_pos
|
||||
|
||||
|
||||
def _extract_gemma4_balanced(text, start):
|
||||
"""Extract balanced braces from Gemma 4 format, using <|"|> as string delimiters."""
|
||||
if start >= len(text) or text[start] != '{':
|
||||
return None
|
||||
depth = 0
|
||||
in_string = False
|
||||
quote_token = '<|"|>'
|
||||
quote_len = len(quote_token)
|
||||
i = start
|
||||
while i < len(text):
|
||||
if text[i:i + quote_len] == quote_token:
|
||||
in_string = not in_string
|
||||
i += quote_len
|
||||
continue
|
||||
if in_string:
|
||||
i += 1
|
||||
continue
|
||||
c = text[i]
|
||||
if c == '{':
|
||||
depth += 1
|
||||
elif c == '}':
|
||||
depth -= 1
|
||||
if depth == 0:
|
||||
return text[start:i + 1]
|
||||
i += 1
|
||||
return None
|
||||
|
||||
|
||||
def _parse_gemma4_tool_calls(answer: str, tool_names: list[str]):
|
||||
"""Parse Gemma 4-style tool calls.
|
||||
|
||||
Format:
|
||||
<|tool_call>call:func_name{key:<|"|>value<|"|>,...}<tool_call|>
|
||||
|
||||
Values use <|"|> tokens instead of standard JSON quotes, and keys are
|
||||
bare identifiers.
|
||||
"""
|
||||
matches = []
|
||||
start_pos = None
|
||||
|
||||
for m in re.finditer(r'<\|tool_call>call:([^\s{]+)\s*', answer):
|
||||
func_name = m.group(1).strip()
|
||||
if func_name not in tool_names:
|
||||
continue
|
||||
|
||||
brace_start = m.end()
|
||||
if brace_start >= len(answer) or answer[brace_start] != '{':
|
||||
continue
|
||||
|
||||
content = _extract_gemma4_balanced(answer, brace_start)
|
||||
if content is None:
|
||||
continue
|
||||
|
||||
# Convert to JSON: split on <|"|> tokens so that key quoting
|
||||
# only applies outside string values (even-indexed parts),
|
||||
# escape newlines and double quotes in arguments (odd-indexed parts),
|
||||
# then rejoin with real quotes.
|
||||
parts = content.split('<|"|>')
|
||||
for idx in range(len(parts)):
|
||||
if idx % 2 == 0:
|
||||
parts[idx] = re.sub(r'(^|[{,\[])\s*(\w+)\s*:', r'\1"\2":', parts[idx])
|
||||
else:
|
||||
parts[idx] = json.dumps(parts[idx])[1:-1]
|
||||
json_str = '"'.join(parts)
|
||||
|
||||
try:
|
||||
arguments = json.loads(json_str)
|
||||
if start_pos is None:
|
||||
start_pos = m.start()
|
||||
matches.append(_make_tool_call(func_name, arguments))
|
||||
except (json.JSONDecodeError, ValueError):
|
||||
pass
|
||||
|
||||
return matches, start_pos
|
||||
|
||||
|
||||
def _parse_pythonic_tool_calls(answer: str, tool_names: list[str]):
|
||||
"""Parse pythonic-style tool calls used by Llama 4 and similar models.
|
||||
|
||||
|
|
@ -472,6 +556,11 @@ TOOL_CALL_FORMATS = [
|
|||
'parser': _parse_channel_tool_calls,
|
||||
'markers': ['to=functions.', '<|channel|>commentary'],
|
||||
},
|
||||
{
|
||||
'template_hints': ['<|tool_call>call:'],
|
||||
'parser': _parse_gemma4_tool_calls,
|
||||
'markers': ['<|tool_call>call:'],
|
||||
},
|
||||
{
|
||||
'template_hints': ['minimax:tool_call'],
|
||||
'parser': _parse_minimax_tool_calls,
|
||||
|
|
@ -504,6 +593,7 @@ ALL_PARSERS = [
|
|||
_parse_deep_seek_tool_calls,
|
||||
_parse_kimi_tool_calls,
|
||||
_parse_channel_tool_calls,
|
||||
_parse_gemma4_tool_calls,
|
||||
_parse_minimax_tool_calls,
|
||||
_parse_glm_tool_calls,
|
||||
_parse_xml_param_tool_calls,
|
||||
|
|
@ -552,9 +642,15 @@ def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = Fa
|
|||
# Strip thinking blocks so tool-call syntax inside <think> is ignored.
|
||||
original_answer = answer
|
||||
_, answer = extract_reasoning(answer)
|
||||
# Offset between original and stripped text, used to map start_pos
|
||||
# back to the original string when returning a prefix.
|
||||
reasoning_offset = len(original_answer) - len(answer)
|
||||
# Reasoning extraction returns empty content when GPT-OSS internal
|
||||
# markup (<|start|>assistant…) follows the thinking block without a
|
||||
# content tag. Fall back to the full text so tool-call markers can
|
||||
# be found.
|
||||
if not answer.strip():
|
||||
answer = original_answer
|
||||
reasoning_offset = 0
|
||||
else:
|
||||
reasoning_offset = len(original_answer) - len(answer)
|
||||
|
||||
matches = []
|
||||
start_pos = None
|
||||
|
|
@ -620,6 +716,8 @@ def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = Fa
|
|||
if not isinstance(candidates, list):
|
||||
candidates = [candidates]
|
||||
for candidate_dict in candidates:
|
||||
if not isinstance(candidate_dict, dict):
|
||||
continue
|
||||
checked_candidate = check_and_sanitize_tool_call_candidate(candidate_dict, tool_names)
|
||||
if checked_candidate is not None:
|
||||
matches.append(checked_candidate)
|
||||
|
|
|
|||
|
|
@ -1,3 +1,4 @@
|
|||
import asyncio
|
||||
import importlib.util
|
||||
import json
|
||||
|
||||
|
|
@ -55,6 +56,119 @@ def load_tools(selected_names):
|
|||
return tool_defs, executors
|
||||
|
||||
|
||||
def _parse_mcp_servers(servers_str):
|
||||
"""Parse MCP servers textbox: one server per line, format 'url' or 'url,Header: value,Header2: value2'."""
|
||||
servers = []
|
||||
for line in servers_str.strip().splitlines():
|
||||
line = line.strip()
|
||||
if not line:
|
||||
continue
|
||||
parts = line.split(',')
|
||||
url = parts[0].strip()
|
||||
headers = {}
|
||||
for part in parts[1:]:
|
||||
part = part.strip()
|
||||
if ':' in part:
|
||||
key, val = part.split(':', 1)
|
||||
headers[key.strip()] = val.strip()
|
||||
servers.append((url, headers))
|
||||
return servers
|
||||
|
||||
|
||||
def _mcp_tool_to_openai(tool):
|
||||
"""Convert an MCP Tool object to OpenAI-format tool dict."""
|
||||
return {
|
||||
"type": "function",
|
||||
"function": {
|
||||
"name": tool.name,
|
||||
"description": tool.description or "",
|
||||
"parameters": tool.inputSchema or {"type": "object", "properties": {}}
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
async def _mcp_session(url, headers, callback):
|
||||
"""Open an MCP session and pass it to the callback."""
|
||||
from mcp.client.streamable_http import streamablehttp_client
|
||||
from mcp import ClientSession
|
||||
|
||||
async with streamablehttp_client(url, headers=headers or None) as (read_stream, write_stream, _):
|
||||
async with ClientSession(read_stream, write_stream) as session:
|
||||
await session.initialize()
|
||||
return await callback(session)
|
||||
|
||||
|
||||
def _make_mcp_executor(name, url, headers):
|
||||
def executor(arguments):
|
||||
return asyncio.run(_call_mcp_tool(name, arguments, url, headers))
|
||||
return executor
|
||||
|
||||
|
||||
async def _connect_mcp_server(url, headers):
|
||||
"""Connect to one MCP server and return (tool_defs, executors)."""
|
||||
|
||||
async def _discover(session):
|
||||
result = await session.list_tools()
|
||||
tool_defs = []
|
||||
executors = {}
|
||||
for tool in result.tools:
|
||||
tool_defs.append(_mcp_tool_to_openai(tool))
|
||||
executors[tool.name] = _make_mcp_executor(tool.name, url, headers)
|
||||
return tool_defs, executors
|
||||
|
||||
return await _mcp_session(url, headers, _discover)
|
||||
|
||||
|
||||
async def _call_mcp_tool(name, arguments, url, headers):
|
||||
"""Connect to an MCP server and call a single tool."""
|
||||
|
||||
async def _invoke(session):
|
||||
result = await session.call_tool(name, arguments)
|
||||
parts = []
|
||||
for content in result.content:
|
||||
if hasattr(content, 'text'):
|
||||
parts.append(content.text)
|
||||
else:
|
||||
parts.append(str(content))
|
||||
return '\n'.join(parts) if parts else ''
|
||||
|
||||
return await _mcp_session(url, headers, _invoke)
|
||||
|
||||
|
||||
async def _connect_all_mcp_servers(servers):
|
||||
"""Connect to all MCP servers concurrently."""
|
||||
results = await asyncio.gather(
|
||||
*(_connect_mcp_server(url, headers) for url, headers in servers),
|
||||
return_exceptions=True
|
||||
)
|
||||
all_defs = []
|
||||
all_executors = {}
|
||||
for (url, _), result in zip(servers, results):
|
||||
if isinstance(result, Exception):
|
||||
logger.exception(f'Failed to connect to MCP server "{url}"', exc_info=result)
|
||||
continue
|
||||
defs, execs = result
|
||||
for td, (fn, ex) in zip(defs, execs.items()):
|
||||
if fn in all_executors:
|
||||
logger.warning(f'MCP tool "{fn}" from {url} conflicts with an already loaded tool. Skipping.')
|
||||
continue
|
||||
all_defs.append(td)
|
||||
all_executors[fn] = ex
|
||||
return all_defs, all_executors
|
||||
|
||||
|
||||
def load_mcp_tools(servers_str):
|
||||
"""
|
||||
Parse MCP servers string and discover tools from each server.
|
||||
Returns (tool_defs, executors) in the same format as load_tools.
|
||||
"""
|
||||
servers = _parse_mcp_servers(servers_str)
|
||||
if not servers:
|
||||
return [], {}
|
||||
|
||||
return asyncio.run(_connect_all_mcp_servers(servers))
|
||||
|
||||
|
||||
def execute_tool(func_name, arguments, executors):
|
||||
"""Execute a tool by function name. Returns result as a JSON string."""
|
||||
fn = executors.get(func_name)
|
||||
|
|
|
|||
|
|
@ -40,7 +40,7 @@ def create_ui():
|
|||
tmp = gr.State('')
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
gr.Markdown("[Tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)")
|
||||
gr.Markdown("[Tutorial](https://github.com/oobabooga/textgen/wiki/05-%E2%80%90-Training-Tab)")
|
||||
|
||||
with gr.Row():
|
||||
copy_from = gr.Dropdown(label='Copy parameters from', value='None', choices=utils.get_available_loras(), elem_classes=['slim-dropdown'], interactive=not mu)
|
||||
|
|
@ -52,7 +52,7 @@ def create_ui():
|
|||
with gr.Column():
|
||||
always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
|
||||
|
||||
with gr.Accordion(label='Target Modules', open=False, elem_classes='tgw-accordion'):
|
||||
with gr.Accordion(label='Target Modules', open=False):
|
||||
gr.Markdown("Selects which modules to target in training. Targeting more modules is closer to a full fine-tune at the cost of increased VRAM and adapter size.")
|
||||
all_linear = gr.Checkbox(label='Target all linear layers', value=True, info='Targets every nn.Linear layer except lm_head. Works for any model architecture. When checked, the individual module checkboxes below are ignored.', elem_classes=['no-background'])
|
||||
with gr.Row():
|
||||
|
|
@ -87,7 +87,7 @@ def create_ui():
|
|||
with gr.Row():
|
||||
lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='cosine', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.', elem_classes=['slim-dropdown'])
|
||||
|
||||
with gr.Accordion(label='Advanced Options', open=False, elem_classes='tgw-accordion'):
|
||||
with gr.Accordion(label='Advanced Options', open=False):
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Optimizer algorithm. adamw_torch is the standard choice. adamw_bnb_8bit uses less VRAM. adafactor is memory-efficient for large models.', elem_classes=['slim-dropdown'])
|
||||
|
|
|
|||
|
|
@ -75,7 +75,7 @@ if not shared.args.old_colors:
|
|||
background_fill_primary_dark='var(--darker-gray, #1C1C1D)',
|
||||
body_background_fill="white",
|
||||
block_background_fill="transparent",
|
||||
body_text_color='rgb(64, 64, 64)',
|
||||
body_text_color='#1a1a1a',
|
||||
button_secondary_background_fill="white",
|
||||
button_secondary_border_color="var(--border-color-primary)",
|
||||
block_title_text_color='*body_text_color',
|
||||
|
|
@ -209,6 +209,7 @@ def list_interface_input_elements():
|
|||
'textbox',
|
||||
'start_with',
|
||||
'selected_tools',
|
||||
'mcp_servers',
|
||||
'mode',
|
||||
'chat_style',
|
||||
'chat-instruct_command',
|
||||
|
|
@ -434,6 +435,7 @@ def setup_auto_save():
|
|||
'custom_system_message',
|
||||
'chat_template_str',
|
||||
'selected_tools',
|
||||
'mcp_servers',
|
||||
|
||||
# Parameters tab (ui_parameters.py) - Generation parameters
|
||||
'preset_menu',
|
||||
|
|
|
|||
|
|
@ -52,7 +52,7 @@ def create_ui():
|
|||
shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': [], 'metadata': {}}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)
|
||||
with gr.Row(elem_id="chat-input-row"):
|
||||
with gr.Column(scale=1, elem_id='gr-hover-container'):
|
||||
gr.HTML(value='<div class="hover-element" onclick="void(0)"><span style="width: 100px; display: block" id="hover-element-button">☰</span><div class="hover-menu" id="hover-menu"></div>', elem_id='gr-hover')
|
||||
gr.HTML(value='<div class="hover-element" onclick="void(0)"><span id="hover-element-button"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><line x1="4" y1="6" x2="20" y2="6"></line><line x1="4" y1="12" x2="20" y2="12"></line><line x1="4" y1="18" x2="20" y2="18"></line></svg></span><div class="hover-menu" id="hover-menu"></div></div>', elem_id='gr-hover')
|
||||
|
||||
with gr.Column(scale=10, elem_id='chat-input-container'):
|
||||
shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf', 'image'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
|
||||
|
|
@ -105,6 +105,9 @@ def create_ui():
|
|||
|
||||
shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False)
|
||||
|
||||
with gr.Accordion('MCP servers', open=False):
|
||||
shared.gradio['mcp_servers'] = gr.Textbox(value=shared.settings.get('mcp_servers', ''), lines=3, max_lines=3, label='', info='One url per line. For headers, write url,Header: value,Header2: value2', elem_classes=['add_scrollbar'])
|
||||
|
||||
gr.HTML("<div class='sidebar-vertical-separator'></div>")
|
||||
|
||||
with gr.Row():
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ from modules.text_generation import (
|
|||
stop_everything_event
|
||||
)
|
||||
from modules.ui_notebook import store_notebook_state_and_debounce
|
||||
from modules.utils import gradio
|
||||
from modules.utils import gradio, sanitize_filename
|
||||
|
||||
inputs = ('textbox-default', 'interface_state')
|
||||
outputs = ('output_textbox', 'html-default')
|
||||
|
|
@ -167,6 +167,7 @@ def handle_new_prompt():
|
|||
|
||||
|
||||
def handle_delete_prompt_confirm_default(prompt_name):
|
||||
prompt_name = sanitize_filename(prompt_name)
|
||||
available_prompts = utils.get_available_prompts()
|
||||
current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
|
||||
|
||||
|
|
@ -199,6 +200,8 @@ def handle_rename_prompt_click_default(current_name):
|
|||
|
||||
|
||||
def handle_rename_prompt_confirm_default(new_name, current_name):
|
||||
new_name = sanitize_filename(new_name)
|
||||
current_name = sanitize_filename(current_name)
|
||||
old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
|
||||
new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
|
||||
|
||||
|
|
|
|||
|
|
@ -798,6 +798,9 @@ def generate(state, save_images=True):
|
|||
if seed == -1:
|
||||
seed = random.randint(0, 2**32 - 1)
|
||||
|
||||
# Store resolved seed back so callers (e.g. API) can access it
|
||||
state['image_seed_resolved'] = seed
|
||||
|
||||
device = get_device()
|
||||
if device is None:
|
||||
device = "cpu"
|
||||
|
|
|
|||
|
|
@ -54,7 +54,6 @@ def create_ui():
|
|||
if not shared.args.portable:
|
||||
shared.gradio['ik'] = gr.Checkbox(label="ik", value=shared.args.ik, info='Use ik_llama.cpp instead of upstream llama.cpp.')
|
||||
|
||||
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
|
||||
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
|
||||
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
|
||||
|
|
@ -67,13 +66,13 @@ def create_ui():
|
|||
)
|
||||
|
||||
# Multimodal
|
||||
with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
|
||||
with gr.Accordion("Multimodal (vision)", open=False) as shared.gradio['mmproj_accordion']:
|
||||
with gr.Row():
|
||||
shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info=f'Select a file that matches your model. Must be placed in {shared.user_data_dir}/mmproj/', interactive=not mu)
|
||||
ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
|
||||
|
||||
# Speculative decoding
|
||||
with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
|
||||
with gr.Accordion("Speculative decoding", open=False) as shared.gradio['speculative_decoding_accordion']:
|
||||
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Maximum number of tokens to draft for speculative decoding. Recommended: 4 for draft model, 64 for n-gram.')
|
||||
|
||||
gr.Markdown('#### Draft model')
|
||||
|
|
@ -92,7 +91,7 @@ def create_ui():
|
|||
shared.gradio['spec_ngram_min_hits'] = gr.Number(label="spec-ngram-min-hits", precision=0, step=1, value=shared.args.spec_ngram_min_hits, info='Minimum n-gram hits for ngram-map speculative decoding.', visible=shared.args.spec_type != 'none')
|
||||
|
||||
gr.Markdown("## Other options")
|
||||
with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
|
||||
with gr.Accordion("See more options", open=False):
|
||||
with gr.Row():
|
||||
with gr.Column():
|
||||
shared.gradio['parallel'] = gr.Slider(label="parallel", minimum=1, step=1, maximum=64, value=shared.args.parallel, info='Number of parallel request slots for the API. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
|
||||
|
|
@ -109,6 +108,7 @@ def create_ui():
|
|||
with gr.Column():
|
||||
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
|
||||
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
|
||||
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
|
||||
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
|
||||
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.')
|
||||
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
|
||||
|
|
|
|||
|
|
@ -11,7 +11,7 @@ from modules.text_generation import (
|
|||
get_token_ids,
|
||||
stop_everything_event
|
||||
)
|
||||
from modules.utils import gradio
|
||||
from modules.utils import gradio, sanitize_filename
|
||||
|
||||
_notebook_file_lock = threading.Lock()
|
||||
_notebook_auto_save_timer = None
|
||||
|
|
@ -202,6 +202,7 @@ def handle_new_prompt():
|
|||
|
||||
|
||||
def handle_delete_prompt_confirm_notebook(prompt_name):
|
||||
prompt_name = sanitize_filename(prompt_name)
|
||||
available_prompts = utils.get_available_prompts()
|
||||
current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
|
||||
|
||||
|
|
@ -233,6 +234,8 @@ def handle_rename_prompt_click_notebook(current_name):
|
|||
|
||||
|
||||
def handle_rename_prompt_confirm_notebook(new_name, current_name):
|
||||
new_name = sanitize_filename(new_name)
|
||||
current_name = sanitize_filename(current_name)
|
||||
old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
|
||||
new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
|
||||
|
||||
|
|
@ -249,6 +252,7 @@ def handle_rename_prompt_confirm_notebook(new_name, current_name):
|
|||
|
||||
def autosave_prompt(text, prompt_name):
|
||||
"""Automatically save the text to the selected prompt file"""
|
||||
prompt_name = sanitize_filename(prompt_name)
|
||||
if prompt_name and text.strip():
|
||||
prompt_path = shared.user_data_dir / "logs" / "notebook" / f"{prompt_name}.txt"
|
||||
prompt_path.parent.mkdir(parents=True, exist_ok=True)
|
||||
|
|
|
|||
|
|
@ -88,9 +88,8 @@ def natural_keys(text):
|
|||
def check_model_loaded():
|
||||
if shared.model_name == 'None' or shared.model is None:
|
||||
if len(get_available_models()) == 0:
|
||||
error_msg = f"No model is loaded.\n\nTo get started:\n1) Place a GGUF file in your {shared.user_data_dir}/models folder\n2) Go to the Model tab and select it"
|
||||
logger.error(error_msg)
|
||||
return False, error_msg
|
||||
logger.error(f"No model is loaded. To get started: 1) Place a GGUF file in your {shared.user_data_dir}/models folder, 2) Go to the Model tab and select it")
|
||||
return False, f"No model is loaded. Place a GGUF model in your {shared.user_data_dir}/models folder, then select it in the Model tab."
|
||||
else:
|
||||
error_msg = "No model is loaded. Please select one in the Model tab."
|
||||
logger.error(error_msg)
|
||||
|
|
@ -105,6 +104,9 @@ def resolve_model_path(model_name_or_path, image_model=False):
|
|||
before the default models directory.
|
||||
"""
|
||||
|
||||
if model_name_or_path is None:
|
||||
raise FileNotFoundError("No model specified.")
|
||||
|
||||
path_candidate = Path(model_name_or_path)
|
||||
if path_candidate.exists():
|
||||
return path_candidate
|
||||
|
|
|
|||
|
|
@ -324,7 +324,7 @@ def update_requirements(initial_installation=False, pull=True):
|
|||
# Create .git directory if missing
|
||||
if not os.path.exists(os.path.join(script_dir, ".git")):
|
||||
run_cmd(
|
||||
"git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && "
|
||||
"git init -b main && git remote add origin https://github.com/oobabooga/textgen && "
|
||||
"git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && "
|
||||
"git reset --hard origin/main && git branch --set-upstream-to=origin/main",
|
||||
environment=True,
|
||||
|
|
@ -337,7 +337,7 @@ def update_requirements(initial_installation=False, pull=True):
|
|||
"Your current installation uses Python {}.{}, which is outdated.\n"
|
||||
"Python {} is now required. A clean installation is needed.\n\n"
|
||||
"INSTRUCTIONS:\n"
|
||||
"1. Delete the 'installer_files' folder in your text-generation-webui directory.\n"
|
||||
"1. Delete the 'installer_files' folder in your textgen directory.\n"
|
||||
"2. Run the start script again (e.g., start_windows.bat).\n\n"
|
||||
"This will create a fresh environment with the latest software.".format(*sys.version_info[:2], PYTHON_VERSION)
|
||||
)
|
||||
|
|
@ -350,7 +350,7 @@ def update_requirements(initial_installation=False, pull=True):
|
|||
"Your current installation uses CUDA 12.4, which has been removed.\n"
|
||||
"To update to the new default (CUDA 12.8), a clean installation is required.\n\n"
|
||||
"INSTRUCTIONS:\n"
|
||||
"1. Delete the 'installer_files' folder in your text-generation-webui directory.\n"
|
||||
"1. Delete the 'installer_files' folder in your textgen directory.\n"
|
||||
"2. Run the start script again (e.g., start_windows.bat).\n\n"
|
||||
"This will create a fresh environment with the latest software."
|
||||
)
|
||||
|
|
|
|||
|
|
@ -9,6 +9,7 @@ flash-linear-attention==0.4.*
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.18.*
|
||||
|
|
@ -31,8 +32,8 @@ tqdm
|
|||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -40,10 +41,10 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/ik_llama_cpp_binaries-0.116.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/ik_llama_cpp_binaries-0.116.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
|
||||
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.18.*
|
||||
|
|
@ -28,8 +29,8 @@ trafilatura==2.0.0
|
|||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -37,5 +38,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.18.*
|
||||
|
|
@ -28,8 +29,8 @@ trafilatura==2.0.0
|
|||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -37,4 +38,4 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.18.*
|
||||
|
|
@ -28,8 +29,8 @@ trafilatura==2.0.0
|
|||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -37,4 +38,4 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.18.*
|
||||
|
|
@ -28,8 +29,8 @@ trafilatura==2.0.0
|
|||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -37,7 +38,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/ik_llama_cpp_binaries-0.116.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/ik_llama_cpp_binaries-0.116.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -7,6 +7,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.18.*
|
||||
|
|
@ -28,8 +29,8 @@ trafilatura==2.0.0
|
|||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,4 +24,4 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,4 +24,4 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/ik_llama_cpp_binaries-0.116.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/ik_llama_cpp_binaries-0.116.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# ik_llama.cpp (CPU only)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/ik_llama_cpp_binaries-0.116.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/ik_llama_cpp_binaries-0.116.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/ik_llama_cpp_binaries-0.116.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/ik_llama_cpp_binaries-0.116.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
|
|||
|
|
@ -3,6 +3,7 @@ fastapi==0.112.4
|
|||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
mcp==1.27.0
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
|
|
@ -14,8 +15,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +24,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Vulkan wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.116.0/llama_cpp_binaries-0.116.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -36,7 +36,7 @@ def signal_handler(sig, frame):
|
|||
signal.signal(signal.SIGINT, signal.SIG_DFL)
|
||||
signal.signal(signal.SIGTERM, signal.SIG_DFL)
|
||||
|
||||
logger.info("Received Ctrl+C. Shutting down Text Generation Web UI gracefully.")
|
||||
logger.info("Received Ctrl+C. Shutting down TextGen gracefully.")
|
||||
|
||||
# Explicitly stop LlamaServer to avoid __del__ cleanup issues during shutdown
|
||||
if shared.model and shared.model.__class__.__name__ == 'LlamaServer':
|
||||
|
|
@ -85,7 +85,7 @@ def create_interface():
|
|||
'GRADIO_TEMP_DIR': str(gradio_temp_path)
|
||||
})
|
||||
|
||||
title = 'Text Generation Web UI'
|
||||
title = 'TextGen'
|
||||
|
||||
# Password authentication
|
||||
auth = []
|
||||
|
|
@ -249,7 +249,7 @@ def create_interface():
|
|||
|
||||
if __name__ == "__main__":
|
||||
|
||||
logger.info("Starting Text Generation Web UI")
|
||||
logger.info("Starting TextGen")
|
||||
do_cmd_flags_warnings()
|
||||
|
||||
# Load custom settings
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue