mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-06 07:03:37 +00:00
commit
ae699ac570
50 changed files with 1160 additions and 651 deletions
28
.github/workflows/build-everything-tgw.yml
vendored
28
.github/workflows/build-everything-tgw.yml
vendored
|
|
@ -68,3 +68,31 @@ jobs:
|
|||
with:
|
||||
version: ${{ inputs.version }}
|
||||
config: 'os:macos-15-intel,macos-14'
|
||||
|
||||
build_release_ik_cuda_windows:
|
||||
name: ik CUDA Windows
|
||||
uses: ./.github/workflows/build-portable-release-ik-cuda.yml
|
||||
with:
|
||||
version: ${{ inputs.version }}
|
||||
config: 'os:windows-2022'
|
||||
|
||||
build_release_ik_cuda_linux:
|
||||
name: ik CUDA Linux
|
||||
uses: ./.github/workflows/build-portable-release-ik-cuda.yml
|
||||
with:
|
||||
version: ${{ inputs.version }}
|
||||
config: 'os:ubuntu-22.04'
|
||||
|
||||
build_release_ik_cpu_windows:
|
||||
name: ik CPU Windows
|
||||
uses: ./.github/workflows/build-portable-release-ik.yml
|
||||
with:
|
||||
version: ${{ inputs.version }}
|
||||
config: 'os:windows-2022'
|
||||
|
||||
build_release_ik_cpu_linux:
|
||||
name: ik CPU Linux
|
||||
uses: ./.github/workflows/build-portable-release-ik.yml
|
||||
with:
|
||||
version: ${{ inputs.version }}
|
||||
config: 'os:ubuntu-22.04'
|
||||
|
|
|
|||
178
.github/workflows/build-portable-release-ik-cuda.yml
vendored
Normal file
178
.github/workflows/build-portable-release-ik-cuda.yml
vendored
Normal file
|
|
@ -0,0 +1,178 @@
|
|||
name: Build ik CUDA
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
config:
|
||||
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||
default: 'Default'
|
||||
required: false
|
||||
type: string
|
||||
exclude:
|
||||
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||
default: 'None'
|
||||
required: false
|
||||
type: string
|
||||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
config:
|
||||
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||
default: 'Default'
|
||||
required: false
|
||||
type: string
|
||||
exclude:
|
||||
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||
default: 'None'
|
||||
required: false
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
define_matrix:
|
||||
name: Define Build Matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
defaults:
|
||||
run:
|
||||
shell: pwsh
|
||||
env:
|
||||
CONFIGIN: ${{ inputs.config }}
|
||||
EXCLUDEIN: ${{ inputs.exclude }}
|
||||
|
||||
steps:
|
||||
- name: Define Job Output
|
||||
id: set-matrix
|
||||
run: |
|
||||
$matrix = @{
|
||||
'os' = @('ubuntu-22.04', 'windows-2022')
|
||||
'pyver' = @("3.13")
|
||||
'cuda' = @("12.4", "13.1")
|
||||
}
|
||||
|
||||
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
|
||||
|
||||
if ($env:EXCLUDEIN -ne 'None') {
|
||||
$exclusions = @()
|
||||
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
|
||||
$matrix['exclude'] = $exclusions
|
||||
}
|
||||
|
||||
$matrixOut = ConvertTo-Json $matrix -Compress
|
||||
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
|
||||
|
||||
build_wheels:
|
||||
name: ${{ matrix.os }} ${{ matrix.pyver }} CUDA ${{ matrix.cuda }}
|
||||
needs: define_matrix
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
|
||||
defaults:
|
||||
run:
|
||||
shell: pwsh
|
||||
env:
|
||||
PCKGVER: ${{ inputs.version }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: 'oobabooga/text-generation-webui'
|
||||
ref: ${{ inputs.version }}
|
||||
submodules: 'recursive'
|
||||
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.pyver }}
|
||||
|
||||
- name: Build Package
|
||||
shell: bash
|
||||
run: |
|
||||
VERSION_CLEAN="${{ inputs.version }}"
|
||||
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||
cd ..
|
||||
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
|
||||
|
||||
# Define common variables
|
||||
CUDA_VERSION="${{ matrix.cuda }}"
|
||||
VERSION="${{ inputs.version }}"
|
||||
|
||||
# 1. Set platform-specific variables
|
||||
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||
PLATFORM="windows"
|
||||
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz"
|
||||
PIP_PATH="portable_env/python.exe -m pip"
|
||||
PACKAGES_PATH="portable_env/Lib/site-packages"
|
||||
rm start_linux.sh start_macos.sh
|
||||
else
|
||||
PLATFORM="linux"
|
||||
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz"
|
||||
PIP_PATH="portable_env/bin/python -m pip"
|
||||
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
|
||||
rm start_macos.sh start_windows.bat
|
||||
fi
|
||||
|
||||
# 2. Download and extract Python
|
||||
cd ..
|
||||
echo "Downloading Python for $PLATFORM..."
|
||||
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||
tar -xzf python-build.tar.gz
|
||||
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
|
||||
|
||||
# 3. Prepare requirements file based on CUDA version
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
if [[ "$CUDA_VERSION" == "13.1" ]]; then
|
||||
REQ_FILE="requirements/portable/requirements_ik_cuda131.txt"
|
||||
else
|
||||
REQ_FILE="requirements/portable/requirements_ik.txt"
|
||||
fi
|
||||
|
||||
# 4. Inject --ik into start scripts
|
||||
sed -i 's/--portable/--portable --ik/g' start_linux.sh start_windows.bat 2>/dev/null || true
|
||||
|
||||
# 5. Install packages
|
||||
echo "Installing Python packages from $REQ_FILE..."
|
||||
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
|
||||
|
||||
# 6. Clean up
|
||||
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
|
||||
|
||||
# 7. Create archive
|
||||
cd ..
|
||||
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
else
|
||||
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.tar.gz"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
|
||||
fi
|
||||
|
||||
- name: Upload files to a GitHub release
|
||||
id: upload-release
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
continue-on-error: true
|
||||
with:
|
||||
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
file: ../textgen-portable-ik-*
|
||||
tag: ${{ inputs.version }}
|
||||
file_glob: true
|
||||
make_latest: false
|
||||
overwrite: true
|
||||
173
.github/workflows/build-portable-release-ik.yml
vendored
Normal file
173
.github/workflows/build-portable-release-ik.yml
vendored
Normal file
|
|
@ -0,0 +1,173 @@
|
|||
name: Build ik CPU
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
config:
|
||||
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||
default: 'Default'
|
||||
required: false
|
||||
type: string
|
||||
exclude:
|
||||
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||
default: 'None'
|
||||
required: false
|
||||
type: string
|
||||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
config:
|
||||
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||
default: 'Default'
|
||||
required: false
|
||||
type: string
|
||||
exclude:
|
||||
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||
default: 'None'
|
||||
required: false
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
define_matrix:
|
||||
name: Define Build Matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
defaults:
|
||||
run:
|
||||
shell: pwsh
|
||||
env:
|
||||
CONFIGIN: ${{ inputs.config }}
|
||||
EXCLUDEIN: ${{ inputs.exclude }}
|
||||
|
||||
steps:
|
||||
- name: Define Job Output
|
||||
id: set-matrix
|
||||
run: |
|
||||
$matrix = @{
|
||||
'os' = @('ubuntu-22.04', 'windows-2022')
|
||||
'pyver' = @("3.13")
|
||||
}
|
||||
|
||||
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
|
||||
|
||||
if ($env:EXCLUDEIN -ne 'None') {
|
||||
$exclusions = @()
|
||||
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
|
||||
$matrix['exclude'] = $exclusions
|
||||
}
|
||||
|
||||
$matrixOut = ConvertTo-Json $matrix -Compress
|
||||
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
|
||||
|
||||
build_wheels:
|
||||
name: ${{ matrix.os }} ${{ matrix.pyver }}
|
||||
needs: define_matrix
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
|
||||
defaults:
|
||||
run:
|
||||
shell: pwsh
|
||||
env:
|
||||
PCKGVER: ${{ inputs.version }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v6
|
||||
with:
|
||||
repository: 'oobabooga/text-generation-webui'
|
||||
ref: ${{ inputs.version }}
|
||||
submodules: 'recursive'
|
||||
|
||||
- uses: actions/setup-python@v6
|
||||
with:
|
||||
python-version: ${{ matrix.pyver }}
|
||||
|
||||
- name: Build Package
|
||||
shell: bash
|
||||
run: |
|
||||
VERSION_CLEAN="${{ inputs.version }}"
|
||||
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||
cd ..
|
||||
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "sd_api_pictures")
|
||||
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
|
||||
|
||||
# Define common variables
|
||||
VERSION="${{ inputs.version }}"
|
||||
|
||||
# 1. Set platform-specific variables
|
||||
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||
PLATFORM="windows-cpu"
|
||||
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz"
|
||||
PIP_PATH="portable_env/python.exe -m pip"
|
||||
PACKAGES_PATH="portable_env/Lib/site-packages"
|
||||
rm start_linux.sh start_macos.sh
|
||||
else
|
||||
PLATFORM="linux-cpu"
|
||||
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz"
|
||||
PIP_PATH="portable_env/bin/python -m pip"
|
||||
PACKAGES_PATH="portable_env/lib/python3.13/site-packages"
|
||||
rm start_macos.sh start_windows.bat
|
||||
fi
|
||||
|
||||
# 2. Download and extract Python
|
||||
echo "Downloading Python for $PLATFORM..."
|
||||
cd ..
|
||||
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||
tar -xzf python-build.tar.gz
|
||||
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
|
||||
|
||||
# 3. Prepare requirements file
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
REQ_FILE="requirements/portable/requirements_ik_cpu_only.txt"
|
||||
echo "Using requirements file: $REQ_FILE"
|
||||
|
||||
# 4. Inject --ik into start scripts
|
||||
sed -i 's/--portable/--portable --ik/g' start_linux.sh start_windows.bat 2>/dev/null || true
|
||||
|
||||
# 5. Install packages
|
||||
echo "Installing Python packages from $REQ_FILE..."
|
||||
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
|
||||
|
||||
# 6. Clean up
|
||||
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
|
||||
|
||||
# 7. Create archive
|
||||
cd ..
|
||||
if [[ "$RUNNER_OS" == "Windows" ]]; then
|
||||
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.zip"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
|
||||
else
|
||||
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.tar.gz"
|
||||
echo "Creating archive: $ARCHIVE_NAME"
|
||||
tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
|
||||
fi
|
||||
|
||||
- name: Upload files to a GitHub release
|
||||
id: upload-release
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
continue-on-error: true
|
||||
with:
|
||||
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
file: ../textgen-portable-ik-*
|
||||
tag: ${{ inputs.version }}
|
||||
file_glob: true
|
||||
make_latest: false
|
||||
overwrite: true
|
||||
|
|
@ -112,7 +112,7 @@ Used for talking to an instruction-following model using the prompt format defin
|
|||
|
||||
The prompt format is defined by the **Instruction template** parameter in "Parameters" > "Instruction template", which represents a Jinja2 template.
|
||||
|
||||
Note that when you load a model in the "Model" tab, the web UI will try to automatically detect its instruction template (if any), and will update the values under "Parameters" > "Instruction template" accordingly. This is done using a set of regular expressions defined in `user_data/models/config.yaml`. This detection is not guaranteed to be accurate. You should check the model card on Hugging Face to see if you are using the correct prompt format.
|
||||
Note that when you load a model in the "Model" tab, the web UI will try to automatically detect its instruction template (if any) from the model metadata (e.g. `tokenizer_config.json` or GGUF metadata), and will update the values under "Parameters" > "Instruction template" accordingly. You should check the model card on Hugging Face to see if you are using the correct prompt format.
|
||||
|
||||
### Chat-instruct
|
||||
|
||||
|
|
|
|||
|
|
@ -39,7 +39,7 @@ curl http://127.0.0.1:5000/v1/completions \
|
|||
|
||||
#### Chat completions
|
||||
|
||||
Works best with instruction-following models. If the "instruction_template" variable is not provided, it will be guessed automatically based on the model name using the regex patterns in `user_data/models/config.yaml`.
|
||||
Works best with instruction-following models. If the "instruction_template" variable is not provided, it will be detected automatically from the model metadata.
|
||||
|
||||
```shell
|
||||
curl http://127.0.0.1:5000/v1/chat/completions \
|
||||
|
|
|
|||
|
|
@ -158,28 +158,21 @@ class ModelDownloader:
|
|||
# Also if GGUF and safetensors are available, download only safetensors
|
||||
if (has_pytorch or has_pt or has_gguf) and has_safetensors:
|
||||
has_gguf = False
|
||||
for i in range(len(classifications) - 1, -1, -1):
|
||||
if classifications[i] in ['pytorch', 'pt', 'gguf']:
|
||||
links.pop(i)
|
||||
file_sizes.pop(i)
|
||||
keep = [i for i, c in enumerate(classifications) if c not in ['pytorch', 'pt', 'gguf']]
|
||||
links = [links[i] for i in keep]
|
||||
file_sizes = [file_sizes[i] for i in keep]
|
||||
|
||||
# For GGUF, try to download only the Q4_K_M if no specific file is specified.
|
||||
if has_gguf and specific_file is None:
|
||||
has_q4km = False
|
||||
for i in range(len(classifications) - 1, -1, -1):
|
||||
if 'q4_k_m' in links[i].lower():
|
||||
has_q4km = True
|
||||
has_q4km = any('q4_k_m' in link.lower() for link in links)
|
||||
|
||||
if has_q4km:
|
||||
for i in range(len(classifications) - 1, -1, -1):
|
||||
if 'q4_k_m' not in links[i].lower():
|
||||
links.pop(i)
|
||||
file_sizes.pop(i)
|
||||
keep = [i for i, link in enumerate(links) if 'q4_k_m' in link.lower()]
|
||||
else:
|
||||
for i in range(len(classifications) - 1, -1, -1):
|
||||
if links[i].lower().endswith('.gguf'):
|
||||
links.pop(i)
|
||||
file_sizes.pop(i)
|
||||
keep = [i for i, link in enumerate(links) if not link.lower().endswith('.gguf')]
|
||||
|
||||
links = [links[i] for i in keep]
|
||||
file_sizes = [file_sizes[i] for i in keep]
|
||||
|
||||
is_llamacpp = has_gguf and specific_file is not None
|
||||
return links, sha256, is_lora, is_llamacpp, file_sizes
|
||||
|
|
|
|||
|
|
@ -2,8 +2,11 @@ import concurrent.futures
|
|||
|
||||
import requests
|
||||
|
||||
from modules.web_search import _validate_url
|
||||
|
||||
|
||||
def download_single(url):
|
||||
_validate_url(url)
|
||||
headers = {
|
||||
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/58.0.3029.110 Safari/537.3'
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,12 +5,14 @@ import requests
|
|||
from bs4 import BeautifulSoup
|
||||
|
||||
import extensions.superboogav2.parameters as parameters
|
||||
from modules.web_search import _validate_url
|
||||
|
||||
from .data_processor import process_and_add_to_collector
|
||||
from .utils import create_metadata_source
|
||||
|
||||
|
||||
def _download_single(url):
|
||||
_validate_url(url)
|
||||
response = requests.get(url, timeout=5)
|
||||
if response.status_code == 200:
|
||||
return response.content
|
||||
|
|
|
|||
|
|
@ -1,6 +1,6 @@
|
|||
function toggleDarkMode() {
|
||||
document.body.classList.toggle("dark");
|
||||
var currentCSS = document.getElementById("highlight-css");
|
||||
const currentCSS = document.getElementById("highlight-css");
|
||||
if (currentCSS.getAttribute("href") === "file/css/highlightjs/github-dark.min.css") {
|
||||
currentCSS.setAttribute("href", "file/css/highlightjs/github.min.css");
|
||||
} else {
|
||||
|
|
@ -9,12 +9,10 @@ function toggleDarkMode() {
|
|||
|
||||
// Re-highlight all code blocks once stylesheet loads
|
||||
currentCSS.onload = function() {
|
||||
const messageBodies = document.getElementById("chat").querySelectorAll(".message-body");
|
||||
messageBodies.forEach((messageBody) => {
|
||||
const codeBlocks = messageBody.querySelectorAll("pre code");
|
||||
codeBlocks.forEach((codeBlock) => {
|
||||
hljs.highlightElement(codeBlock);
|
||||
});
|
||||
// Clear data-highlighted so hljs will re-process with the new theme
|
||||
document.querySelectorAll("#chat .message-body pre code[data-highlighted]").forEach((codeBlock) => {
|
||||
delete codeBlock.dataset.highlighted;
|
||||
});
|
||||
doSyntaxHighlighting();
|
||||
};
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,11 +1,35 @@
|
|||
// -------------------------------------------------
|
||||
// Shared helpers
|
||||
// -------------------------------------------------
|
||||
|
||||
function getProfilePictureUrl() {
|
||||
return "/file/user_data/cache/pfp_character.png?time=" + Date.now();
|
||||
}
|
||||
|
||||
const MESSAGE_SELECTOR = ".message, .user-message, .assistant-message";
|
||||
|
||||
function getMessageElement(element) {
|
||||
if (!element) return null;
|
||||
return element.closest(MESSAGE_SELECTOR);
|
||||
}
|
||||
|
||||
function isUserRole(messageElement) {
|
||||
return messageElement.classList.contains("user-message") ||
|
||||
messageElement.querySelector(".text-you") !== null ||
|
||||
messageElement.querySelector(".circle-you") !== null;
|
||||
}
|
||||
|
||||
// Trigger a synthetic 'input' event so Gradio picks up programmatic value changes
|
||||
function dispatchGradioInput(element) {
|
||||
element.dispatchEvent(new Event("input", { bubbles: true }));
|
||||
}
|
||||
|
||||
// -------------------------------------------------
|
||||
// Event handlers
|
||||
// -------------------------------------------------
|
||||
|
||||
function copyToClipboard(element) {
|
||||
if (!element) return;
|
||||
|
||||
const messageElement = element.closest(".message, .user-message, .assistant-message");
|
||||
const messageElement = getMessageElement(element);
|
||||
if (!messageElement) return;
|
||||
|
||||
const rawText = messageElement.getAttribute("data-raw");
|
||||
|
|
@ -48,9 +72,7 @@ function fallbackCopyToClipboard(text) {
|
|||
}
|
||||
|
||||
function branchHere(element) {
|
||||
if (!element) return;
|
||||
|
||||
const messageElement = element.closest(".message, .user-message, .assistant-message");
|
||||
const messageElement = getMessageElement(element);
|
||||
if (!messageElement) return;
|
||||
|
||||
const index = messageElement.getAttribute("data-index");
|
||||
|
|
@ -69,11 +91,7 @@ function branchHere(element) {
|
|||
}
|
||||
|
||||
branchIndexInput.value = index;
|
||||
|
||||
// Trigger any 'change' or 'input' events Gradio might be listening for
|
||||
const event = new Event("input", { bubbles: true });
|
||||
branchIndexInput.dispatchEvent(event);
|
||||
|
||||
dispatchGradioInput(branchIndexInput);
|
||||
branchButton.click();
|
||||
}
|
||||
|
||||
|
|
@ -82,9 +100,7 @@ function branchHere(element) {
|
|||
// -------------------------------------------------
|
||||
|
||||
function editHere(buttonElement) {
|
||||
if (!buttonElement) return;
|
||||
|
||||
const messageElement = buttonElement.closest(".message, .user-message, .assistant-message");
|
||||
const messageElement = getMessageElement(buttonElement);
|
||||
if (!messageElement) return;
|
||||
|
||||
const messageBody = messageElement.querySelector(".message-body");
|
||||
|
|
@ -97,12 +113,7 @@ function editHere(buttonElement) {
|
|||
return;
|
||||
}
|
||||
|
||||
// Determine role based on message element - handle different chat modes
|
||||
const isUserMessage = messageElement.classList.contains("user-message") ||
|
||||
messageElement.querySelector(".text-you") !== null ||
|
||||
messageElement.querySelector(".circle-you") !== null;
|
||||
|
||||
startEditing(messageElement, messageBody, isUserMessage);
|
||||
startEditing(messageElement, messageBody, isUserRole(messageElement));
|
||||
}
|
||||
|
||||
function startEditing(messageElement, messageBody, isUserMessage) {
|
||||
|
|
@ -209,30 +220,22 @@ function submitMessageEdit(index, newText, isUserMessage) {
|
|||
editTextInput.value = newText;
|
||||
editRoleInput.value = isUserMessage ? "user" : "assistant";
|
||||
|
||||
editIndexInput.dispatchEvent(new Event("input", { bubbles: true }));
|
||||
editTextInput.dispatchEvent(new Event("input", { bubbles: true }));
|
||||
editRoleInput.dispatchEvent(new Event("input", { bubbles: true }));
|
||||
dispatchGradioInput(editIndexInput);
|
||||
dispatchGradioInput(editTextInput);
|
||||
dispatchGradioInput(editRoleInput);
|
||||
|
||||
editButton.click();
|
||||
return true;
|
||||
}
|
||||
|
||||
function navigateVersion(element, direction) {
|
||||
if (!element) return;
|
||||
|
||||
const messageElement = element.closest(".message, .user-message, .assistant-message");
|
||||
const messageElement = getMessageElement(element);
|
||||
if (!messageElement) return;
|
||||
|
||||
const index = messageElement.getAttribute("data-index");
|
||||
if (!index) return;
|
||||
|
||||
// Determine role based on message element classes
|
||||
let role = "assistant"; // Default role
|
||||
if (messageElement.classList.contains("user-message") ||
|
||||
messageElement.querySelector(".text-you") ||
|
||||
messageElement.querySelector(".circle-you")) {
|
||||
role = "user";
|
||||
}
|
||||
const role = isUserRole(messageElement) ? "user" : "assistant";
|
||||
|
||||
const indexInput = document.getElementById("Navigate-message-index")?.querySelector("input");
|
||||
const directionInput = document.getElementById("Navigate-direction")?.querySelector("textarea");
|
||||
|
|
@ -248,11 +251,9 @@ function navigateVersion(element, direction) {
|
|||
directionInput.value = direction;
|
||||
roleInput.value = role;
|
||||
|
||||
// Trigger 'input' events for Gradio to pick up changes
|
||||
const event = new Event("input", { bubbles: true });
|
||||
indexInput.dispatchEvent(event);
|
||||
directionInput.dispatchEvent(event);
|
||||
roleInput.dispatchEvent(event);
|
||||
dispatchGradioInput(indexInput);
|
||||
dispatchGradioInput(directionInput);
|
||||
dispatchGradioInput(roleInput);
|
||||
|
||||
navigateButton.click();
|
||||
}
|
||||
|
|
@ -313,7 +314,7 @@ function handleMorphdomUpdate(data) {
|
|||
|
||||
function applyMorphdomUpdate(data) {
|
||||
// Determine target element and use it as query scope
|
||||
var target_element, target_html;
|
||||
let target_element, target_html;
|
||||
if (data.last_message_only) {
|
||||
const childNodes = document.getElementsByClassName("messages")[0].childNodes;
|
||||
target_element = childNodes[childNodes.length - 1];
|
||||
|
|
|
|||
171
js/main.js
171
js/main.js
|
|
@ -4,8 +4,9 @@
|
|||
|
||||
// Sync highlight.js theme with the actual Gradio theme
|
||||
var defined_hljs_css = document.body.classList.contains("dark") ? "file/css/highlightjs/github-dark.min.css" : "file/css/highlightjs/github.min.css";
|
||||
if (document.getElementById("highlight-css").getAttribute("href") !== defined_hljs_css) {
|
||||
document.getElementById("highlight-css").setAttribute("href", defined_hljs_css);
|
||||
var hljsCssElement = document.getElementById("highlight-css");
|
||||
if (hljsCssElement.getAttribute("href") !== defined_hljs_css) {
|
||||
hljsCssElement.setAttribute("href", defined_hljs_css);
|
||||
}
|
||||
|
||||
let main_parent = document.getElementById("chat-tab").parentNode;
|
||||
|
|
@ -49,21 +50,18 @@ document.querySelector(".header_bar").addEventListener("click", function(event)
|
|||
//------------------------------------------------
|
||||
|
||||
// --- Helper functions --- //
|
||||
function isModifiedKeyboardEvent() {
|
||||
return (event instanceof KeyboardEvent &&
|
||||
event.shiftKey ||
|
||||
event.ctrlKey ||
|
||||
event.altKey ||
|
||||
event.metaKey);
|
||||
function isModifiedKeyboardEvent(event) {
|
||||
return event instanceof KeyboardEvent &&
|
||||
(event.shiftKey || event.ctrlKey || event.altKey || event.metaKey);
|
||||
}
|
||||
|
||||
function isFocusedOnEditableTextbox() {
|
||||
function isFocusedOnEditableTextbox(event) {
|
||||
if (event.target.tagName === "INPUT" || event.target.tagName === "TEXTAREA") {
|
||||
return !!event.target.value;
|
||||
}
|
||||
return false;
|
||||
}
|
||||
|
||||
let previousTabId = "chat-tab-button";
|
||||
document.addEventListener("keydown", function(event) {
|
||||
// Stop generation on Esc pressed
|
||||
if (event.key === "Escape") {
|
||||
|
|
@ -117,14 +115,14 @@ document.addEventListener("keydown", function(event) {
|
|||
}
|
||||
|
||||
// --- Simple version navigation --- //
|
||||
if (!isFocusedOnEditableTextbox()) {
|
||||
if (!isFocusedOnEditableTextbox(event)) {
|
||||
// Version navigation on Arrow keys (horizontal)
|
||||
if (!isModifiedKeyboardEvent() && event.key === "ArrowLeft") {
|
||||
if (!isModifiedKeyboardEvent(event) && event.key === "ArrowLeft") {
|
||||
event.preventDefault();
|
||||
navigateLastAssistantMessage("left");
|
||||
}
|
||||
|
||||
else if (!isModifiedKeyboardEvent() && event.key === "ArrowRight") {
|
||||
else if (!isModifiedKeyboardEvent(event) && event.key === "ArrowRight") {
|
||||
event.preventDefault();
|
||||
if (!navigateLastAssistantMessage("right")) {
|
||||
// If can't navigate right (last version), regenerate
|
||||
|
|
@ -159,9 +157,8 @@ targetElement.addEventListener("scroll", function() {
|
|||
let diff = targetElement.scrollHeight - targetElement.clientHeight;
|
||||
let isAtBottomNow = Math.abs(targetElement.scrollTop - diff) <= 10 || diff <= 0;
|
||||
|
||||
// Add scrolling class to disable hover effects
|
||||
if (window.isScrolled || !isAtBottomNow) {
|
||||
targetElement.classList.add("scrolling");
|
||||
targetElement.classList.add("scrolling"); // Disables hover effects during scroll
|
||||
}
|
||||
|
||||
if(isAtBottomNow) {
|
||||
|
|
@ -202,12 +199,8 @@ const observer = new MutationObserver(function() {
|
|||
});
|
||||
|
||||
// Only watch for attribute changes on targetElement (e.g. _generating class)
|
||||
const config = {
|
||||
attributes: true
|
||||
};
|
||||
|
||||
// Start observing the target element
|
||||
observer.observe(targetElement, config);
|
||||
observer.observe(targetElement, { attributes: true });
|
||||
|
||||
//------------------------------------------------
|
||||
// Handle syntax highlighting / LaTeX
|
||||
|
|
@ -228,7 +221,7 @@ window.doSyntaxHighlighting = function() {
|
|||
if (messageBodies.length > 0) {
|
||||
let hasSeenVisible = false;
|
||||
|
||||
// Go from last message to first
|
||||
// Go from last message to first so we can early-exit once past visible area
|
||||
for (let i = messageBodies.length - 1; i >= 0; i--) {
|
||||
const messageBody = messageBodies[i];
|
||||
|
||||
|
|
@ -243,8 +236,8 @@ window.doSyntaxHighlighting = function() {
|
|||
codeBlock.classList.add("pretty_scrollbar");
|
||||
});
|
||||
|
||||
// Only render math in visible elements
|
||||
const mathContainers = messageBody.querySelectorAll("p, span, li, td, th, h1, h2, h3, h4, h5, h6, blockquote, figcaption, caption, dd, dt");
|
||||
// Only render math in individually visible containers (the outer check is on the message body)
|
||||
mathContainers.forEach(container => {
|
||||
if (isElementVisibleOnScreen(container)) {
|
||||
renderMathInElement(container, {
|
||||
|
|
@ -271,7 +264,7 @@ const doSyntaxHighlighting = window.doSyntaxHighlighting;
|
|||
// Add some scrollbars
|
||||
//------------------------------------------------
|
||||
const scrollbarElements = document.querySelectorAll(".add_scrollbar textarea, .add_scrollbar .drag-drop-list");
|
||||
for(i = 0; i < scrollbarElements.length; i++) {
|
||||
for(let i = 0; i < scrollbarElements.length; i++) {
|
||||
scrollbarElements[i].classList.remove("scroll-hide");
|
||||
scrollbarElements[i].classList.add("pretty_scrollbar");
|
||||
scrollbarElements[i].style.resize = "none";
|
||||
|
|
@ -298,13 +291,13 @@ if (toolsInfo) {
|
|||
// Remove some backgrounds
|
||||
//------------------------------------------------
|
||||
const noBackgroundelements = document.querySelectorAll(".no-background");
|
||||
for(i = 0; i < noBackgroundelements.length; i++) {
|
||||
for(let i = 0; i < noBackgroundelements.length; i++) {
|
||||
noBackgroundelements[i].parentNode.style.border = "none";
|
||||
noBackgroundelements[i].parentNode.parentNode.parentNode.style.alignItems = "center";
|
||||
}
|
||||
|
||||
const slimDropdownElements = document.querySelectorAll(".slim-dropdown");
|
||||
for (i = 0; i < slimDropdownElements.length; i++) {
|
||||
for (let i = 0; i < slimDropdownElements.length; i++) {
|
||||
const parentNode = slimDropdownElements[i].parentNode;
|
||||
parentNode.style.background = "transparent";
|
||||
parentNode.style.border = "0";
|
||||
|
|
@ -374,49 +367,43 @@ button.addEventListener("click", function () {
|
|||
}
|
||||
});
|
||||
|
||||
// Add event listener for mouseleave on the button
|
||||
button.addEventListener("mouseleave", function () {
|
||||
// Delay to prevent menu hiding when the mouse leaves the button into the menu
|
||||
// Delay to prevent menu hiding when the mouse leaves the button or menu
|
||||
function delayedHideMenu() {
|
||||
setTimeout(function () {
|
||||
if (!isMouseOverButtonOrMenu()) {
|
||||
hideMenu();
|
||||
}
|
||||
}, 100);
|
||||
});
|
||||
}
|
||||
|
||||
// Add event listener for mouseleave on the button
|
||||
button.addEventListener("mouseleave", delayedHideMenu);
|
||||
// Add event listener for mouseleave on the menu
|
||||
menu.addEventListener("mouseleave", function () {
|
||||
// Delay to prevent menu hide when the mouse leaves the menu into the button
|
||||
setTimeout(function () {
|
||||
if (!isMouseOverButtonOrMenu()) {
|
||||
hideMenu();
|
||||
}
|
||||
}, 100);
|
||||
});
|
||||
menu.addEventListener("mouseleave", delayedHideMenu);
|
||||
|
||||
// Add event listener for click anywhere in the document
|
||||
document.addEventListener("click", function (event) {
|
||||
const target = event.target;
|
||||
|
||||
// Check if the click is outside the button/menu and the menu is visible
|
||||
if (!isMouseOverButtonOrMenu() && menu.style.display === "flex") {
|
||||
hideMenu();
|
||||
}
|
||||
|
||||
if (event.target.classList.contains("pfp_character")) {
|
||||
const target = event.target;
|
||||
|
||||
if (target.classList.contains("pfp_character")) {
|
||||
toggleBigPicture();
|
||||
}
|
||||
|
||||
// Handle sidebar clicks on mobile
|
||||
if (isMobile()) {
|
||||
// Check if the click did NOT originate from any of the specified toggle buttons or elements
|
||||
// Check if the click did NOT originate from any of the specified toggle buttons or elements
|
||||
if (
|
||||
target.closest("#navigation-toggle") !== navigationToggle &&
|
||||
target.closest("#past-chats-toggle") !== pastChatsToggle &&
|
||||
target.closest("#chat-controls-toggle") !== chatControlsToggle &&
|
||||
target.closest(".header_bar") !== headerBar &&
|
||||
target.closest("#past-chats-row") !== pastChatsRow &&
|
||||
target.closest("#chat-controls") !== chatControlsRow
|
||||
target.closest("#past-chats-toggle") !== pastChatsToggle &&
|
||||
target.closest("#chat-controls-toggle") !== chatControlsToggle &&
|
||||
target.closest(".header_bar") !== headerBar &&
|
||||
target.closest("#past-chats-row") !== pastChatsRow &&
|
||||
target.closest("#chat-controls") !== chatControlsRow
|
||||
) {
|
||||
handleIndividualSidebarClose(event);
|
||||
}
|
||||
|
|
@ -433,27 +420,19 @@ document.getElementById("chat-input-row").classList.add("chat-input-positioned")
|
|||
//------------------------------------------------
|
||||
const chatTextArea = document.getElementById("chat-input").querySelector("textarea");
|
||||
|
||||
function respondToChatInputVisibility(element, callback) {
|
||||
var options = {
|
||||
root: document.documentElement,
|
||||
};
|
||||
|
||||
var observer = new IntersectionObserver((entries, observer) => {
|
||||
function focusOnVisible(element) {
|
||||
var observer = new IntersectionObserver((entries) => {
|
||||
entries.forEach(entry => {
|
||||
callback(entry.intersectionRatio > 0);
|
||||
if (entry.intersectionRatio > 0) {
|
||||
element.focus();
|
||||
}
|
||||
});
|
||||
}, options);
|
||||
}, { root: document.documentElement });
|
||||
|
||||
observer.observe(element);
|
||||
}
|
||||
|
||||
function handleChatInputVisibilityChange(isVisible) {
|
||||
if (isVisible) {
|
||||
chatTextArea.focus();
|
||||
}
|
||||
}
|
||||
|
||||
respondToChatInputVisibility(chatTextArea, handleChatInputVisibilityChange);
|
||||
focusOnVisible(chatTextArea);
|
||||
|
||||
//------------------------------------------------
|
||||
// Show enlarged character picture when the profile
|
||||
|
|
@ -463,8 +442,7 @@ let bigPictureVisible = false;
|
|||
|
||||
function addBigPicture() {
|
||||
var imgElement = document.createElement("img");
|
||||
var timestamp = new Date().getTime();
|
||||
imgElement.src = "/file/user_data/cache/pfp_character.png?time=" + timestamp;
|
||||
imgElement.src = getProfilePictureUrl();
|
||||
imgElement.classList.add("bigProfilePicture");
|
||||
imgElement.addEventListener("load", function () {
|
||||
this.style.visibility = "visible";
|
||||
|
|
@ -478,9 +456,8 @@ function addBigPicture() {
|
|||
}
|
||||
|
||||
function deleteBigPicture() {
|
||||
var bigProfilePictures = document.querySelectorAll(".bigProfilePicture");
|
||||
bigProfilePictures.forEach(function (element) {
|
||||
element.parentNode.removeChild(element);
|
||||
document.querySelectorAll(".bigProfilePicture").forEach(function (element) {
|
||||
element.remove();
|
||||
});
|
||||
}
|
||||
|
||||
|
|
@ -494,44 +471,11 @@ function toggleBigPicture() {
|
|||
}
|
||||
}
|
||||
|
||||
//------------------------------------------------
|
||||
// Handle the chat input box growth
|
||||
//------------------------------------------------
|
||||
|
||||
// Cache DOM elements
|
||||
const chatContainer = document.getElementById("chat").parentNode.parentNode.parentNode;
|
||||
const chatInput = document.querySelector("#chat-input textarea");
|
||||
|
||||
// Variables to store current dimensions
|
||||
let currentChatInputHeight = chatInput.clientHeight;
|
||||
|
||||
//------------------------------------------------
|
||||
// Focus on the rename text area when it becomes visible
|
||||
//------------------------------------------------
|
||||
const renameTextArea = document.getElementById("rename-row").querySelector("textarea");
|
||||
|
||||
function respondToRenameVisibility(element, callback) {
|
||||
var options = {
|
||||
root: document.documentElement,
|
||||
};
|
||||
|
||||
var observer = new IntersectionObserver((entries, observer) => {
|
||||
entries.forEach(entry => {
|
||||
callback(entry.intersectionRatio > 0);
|
||||
});
|
||||
}, options);
|
||||
|
||||
observer.observe(element);
|
||||
}
|
||||
|
||||
|
||||
function handleVisibilityChange(isVisible) {
|
||||
if (isVisible) {
|
||||
renameTextArea.focus();
|
||||
}
|
||||
}
|
||||
|
||||
respondToRenameVisibility(renameTextArea, handleVisibilityChange);
|
||||
focusOnVisible(renameTextArea);
|
||||
|
||||
//------------------------------------------------
|
||||
// Adjust the chat tab margin if no extension UI
|
||||
|
|
@ -737,21 +681,21 @@ function handleIndividualSidebarClose(event) {
|
|||
|
||||
// Close navigation bar if click is outside and it is open
|
||||
if (!headerBar.contains(target) && !headerBar.classList.contains("sidebar-hidden")) {
|
||||
toggleSidebar(headerBar, navigationToggle, true);
|
||||
toggleSidebar(headerBar, navigationToggle);
|
||||
}
|
||||
|
||||
// Close past chats row if click is outside and it is open
|
||||
if (!pastChatsRow.contains(target) && !pastChatsRow.classList.contains("sidebar-hidden")) {
|
||||
toggleSidebar(pastChatsRow, pastChatsToggle, true);
|
||||
toggleSidebar(pastChatsRow, pastChatsToggle);
|
||||
}
|
||||
|
||||
// Close chat controls row if click is outside and it is open
|
||||
if (!chatControlsRow.contains(target) && !chatControlsRow.classList.contains("sidebar-hidden")) {
|
||||
toggleSidebar(chatControlsRow, chatControlsToggle, true);
|
||||
toggleSidebar(chatControlsRow, chatControlsToggle);
|
||||
}
|
||||
}
|
||||
|
||||
function toggleSidebar(sidebar, toggle, forceClose = false) {
|
||||
function toggleSidebar(sidebar, toggle) {
|
||||
const isCurrentlyHidden = sidebar.classList.contains("sidebar-hidden");
|
||||
const shouldClose = !isCurrentlyHidden;
|
||||
|
||||
|
|
@ -776,11 +720,6 @@ function toggleSidebar(sidebar, toggle, forceClose = false) {
|
|||
toggle.classList.toggle("chat-controls-open", !shouldClose);
|
||||
toggle.innerHTML = shouldClose ? leftArrowSVG : rightArrowSVG;
|
||||
}
|
||||
|
||||
// Mobile handling
|
||||
if (isMobile()) {
|
||||
sidebar.classList.toggle("sidebar-shown", !shouldClose);
|
||||
}
|
||||
}
|
||||
|
||||
// Function to check if the device is mobile
|
||||
|
|
@ -840,17 +779,17 @@ pastChatsToggle.addEventListener("click", () => {
|
|||
const isCurrentlyOpen = !pastChatsRow.classList.contains("sidebar-hidden");
|
||||
toggleSidebar(pastChatsRow, pastChatsToggle);
|
||||
|
||||
// On desktop, open/close both sidebars at the same time
|
||||
// On desktop, sync both sidebars together
|
||||
if (!isMobile()) {
|
||||
if (isCurrentlyOpen) {
|
||||
// If we just closed the left sidebar, also close the right sidebar
|
||||
if (!chatControlsRow.classList.contains("sidebar-hidden")) {
|
||||
toggleSidebar(chatControlsRow, chatControlsToggle, true);
|
||||
toggleSidebar(chatControlsRow, chatControlsToggle);
|
||||
}
|
||||
} else {
|
||||
// If we just opened the left sidebar, also open the right sidebar
|
||||
if (chatControlsRow.classList.contains("sidebar-hidden")) {
|
||||
toggleSidebar(chatControlsRow, chatControlsToggle, false);
|
||||
toggleSidebar(chatControlsRow, chatControlsToggle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -860,17 +799,17 @@ chatControlsToggle.addEventListener("click", () => {
|
|||
const isCurrentlyOpen = !chatControlsRow.classList.contains("sidebar-hidden");
|
||||
toggleSidebar(chatControlsRow, chatControlsToggle);
|
||||
|
||||
// On desktop, open/close both sidebars at the same time
|
||||
// On desktop, sync both sidebars together
|
||||
if (!isMobile()) {
|
||||
if (isCurrentlyOpen) {
|
||||
// If we just closed the right sidebar, also close the left sidebar
|
||||
if (!pastChatsRow.classList.contains("sidebar-hidden")) {
|
||||
toggleSidebar(pastChatsRow, pastChatsToggle, true);
|
||||
toggleSidebar(pastChatsRow, pastChatsToggle);
|
||||
}
|
||||
} else {
|
||||
// If we just opened the right sidebar, also open the left sidebar
|
||||
if (pastChatsRow.classList.contains("sidebar-hidden")) {
|
||||
toggleSidebar(pastChatsRow, pastChatsToggle, false);
|
||||
toggleSidebar(pastChatsRow, pastChatsToggle);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
@ -890,7 +829,7 @@ if (isMobile()) {
|
|||
const textarea = document.querySelector("#chat-input textarea");
|
||||
|
||||
if (textarea) {
|
||||
// Simulate adding and removing a newline
|
||||
// Force textarea height recalculation by simulating content change
|
||||
textarea.value += "\n";
|
||||
textarea.dispatchEvent(new Event("input", { bubbles: true }));
|
||||
textarea.value = textarea.value.slice(0, -1);
|
||||
|
|
|
|||
|
|
@ -1,10 +1,9 @@
|
|||
// Functions for downloading JSON files
|
||||
function getCurrentTimestamp() {
|
||||
const now = new Date();
|
||||
const timezoneOffset = now.getTimezoneOffset() * 60000; // Convert to milliseconds
|
||||
const timezoneOffset = now.getTimezoneOffset() * 60000; // Convert minutes to milliseconds
|
||||
const localTime = new Date(now.getTime() - timezoneOffset);
|
||||
const formattedTimestamp = localTime.toISOString().replace(/[-:]/g, "").slice(0, 15);
|
||||
return formattedTimestamp;
|
||||
return localTime.toISOString().replace(/[-:]/g, "").slice(0, 15);
|
||||
}
|
||||
|
||||
function saveFile(contents, filename) {
|
||||
|
|
@ -18,23 +17,18 @@ function saveFile(contents, filename) {
|
|||
}
|
||||
|
||||
function saveHistory(history, character, mode) {
|
||||
let path = null;
|
||||
let path;
|
||||
|
||||
if (["chat", "chat-instruct"].includes(mode) && character && character.trim() !== "") {
|
||||
path = `history_${character}_${getCurrentTimestamp()}.json`;
|
||||
} else {
|
||||
try {
|
||||
path = `history_${mode}_${getCurrentTimestamp()}.json`;
|
||||
} catch (error) {
|
||||
path = `history_${getCurrentTimestamp()}.json`;
|
||||
}
|
||||
path = `history_${mode || "unknown"}_${getCurrentTimestamp()}.json`;
|
||||
}
|
||||
|
||||
saveFile(history, path);
|
||||
}
|
||||
|
||||
function saveSession(session) {
|
||||
let path = null;
|
||||
|
||||
path = `session_${getCurrentTimestamp()}.json`;
|
||||
const path = `session_${getCurrentTimestamp()}.json`;
|
||||
saveFile(session, path);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -1,13 +1,11 @@
|
|||
const chatParent = document.querySelector(".chat-parent");
|
||||
|
||||
function toggle_controls(value) {
|
||||
const navToggle = document.getElementById("navigation-toggle");
|
||||
const pastChatsToggle = document.getElementById("past-chats-toggle");
|
||||
const extensions = document.querySelector("#extensions");
|
||||
const galleryExtension = document.getElementById("gallery-extension");
|
||||
|
||||
if (value) {
|
||||
// SHOW MODE: Click toggles to show hidden sidebars
|
||||
const navToggle = document.getElementById("navigation-toggle");
|
||||
const pastChatsToggle = document.getElementById("past-chats-toggle");
|
||||
|
||||
if (navToggle && document.querySelector(".header_bar")?.classList.contains("sidebar-hidden")) {
|
||||
navToggle.click();
|
||||
}
|
||||
|
|
@ -19,17 +17,11 @@ function toggle_controls(value) {
|
|||
if (extensions) {
|
||||
extensions.style.display = "inherit";
|
||||
}
|
||||
|
||||
let gallery_element = document.getElementById("gallery-extension");
|
||||
if (gallery_element) {
|
||||
gallery_element.style.display = "block";
|
||||
if (galleryExtension) {
|
||||
galleryExtension.style.display = "block";
|
||||
}
|
||||
|
||||
} else {
|
||||
// HIDE MODE: Click toggles to hide visible sidebars
|
||||
const navToggle = document.getElementById("navigation-toggle");
|
||||
const pastChatsToggle = document.getElementById("past-chats-toggle");
|
||||
|
||||
if (navToggle && !document.querySelector(".header_bar")?.classList.contains("sidebar-hidden")) {
|
||||
navToggle.click();
|
||||
}
|
||||
|
|
@ -41,5 +33,8 @@ function toggle_controls(value) {
|
|||
if (extensions) {
|
||||
extensions.style.display = "none";
|
||||
}
|
||||
if (galleryExtension) {
|
||||
galleryExtension.style.display = "none";
|
||||
}
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -2,17 +2,9 @@ function scrollToTop() {
|
|||
window.scrollTo({ top: 0 });
|
||||
}
|
||||
|
||||
function findButtonsByText(buttonText) {
|
||||
const buttons = document.getElementsByTagName("button");
|
||||
const matchingButtons = [];
|
||||
|
||||
for (let i = 0; i < buttons.length; i++) {
|
||||
if (buttons[i].textContent.trim() === buttonText) {
|
||||
matchingButtons.push(buttons[i]);
|
||||
}
|
||||
}
|
||||
|
||||
return matchingButtons;
|
||||
function findButtonsByText(buttonText, container = document) {
|
||||
return Array.from(container.getElementsByTagName("button"))
|
||||
.filter(btn => btn.textContent.trim() === buttonText);
|
||||
}
|
||||
|
||||
function switch_to_chat() {
|
||||
|
|
@ -39,13 +31,9 @@ function switch_to_character() {
|
|||
|
||||
function switch_to_image_ai_generate() {
|
||||
const container = document.querySelector("#image-ai-tab");
|
||||
const buttons = container.getElementsByTagName("button");
|
||||
|
||||
for (let i = 0; i < buttons.length; i++) {
|
||||
if (buttons[i].textContent.trim() === "Generate") {
|
||||
buttons[i].click();
|
||||
break;
|
||||
}
|
||||
const generateBtn = findButtonsByText("Generate", container)[0];
|
||||
if (generateBtn) {
|
||||
generateBtn.click();
|
||||
}
|
||||
|
||||
scrollToTop();
|
||||
|
|
|
|||
|
|
@ -1,7 +1,6 @@
|
|||
function updateBigPicture() {
|
||||
var existingElement = document.querySelector(".bigProfilePicture");
|
||||
if (existingElement) {
|
||||
var timestamp = new Date().getTime();
|
||||
existingElement.src = "/file/user_data/cache/pfp_character.png?time=" + timestamp;
|
||||
existingElement.src = getProfilePictureUrl();
|
||||
}
|
||||
}
|
||||
|
|
|
|||
|
|
@ -39,6 +39,146 @@ def load_chat_template_file(filepath):
|
|||
return text
|
||||
|
||||
|
||||
def _first_token_display_str(token_id, prompt, tokenizer):
|
||||
"""Return the display string for the first prompt token.
|
||||
|
||||
Returns empty string for BOS or tokens that don't appear at the start
|
||||
of the prompt text, so they don't shift text_offset for subsequent tokens.
|
||||
"""
|
||||
token_id = int(token_id)
|
||||
bos_id = getattr(tokenizer, 'bos_token_id', None)
|
||||
if bos_id is not None and token_id == bos_id:
|
||||
return ""
|
||||
|
||||
import torch
|
||||
tok = tokenizer.decode(torch.tensor([token_id]))
|
||||
if not prompt.startswith(tok):
|
||||
return ""
|
||||
|
||||
return tok
|
||||
|
||||
|
||||
def _compute_prompt_logprob_entries(prompt, logprobs_count, input_ids=None):
|
||||
"""Compute logprob entries for prompt tokens via a forward pass.
|
||||
|
||||
Returns a list of logprob entries in the standard format.
|
||||
The first token gets a null entry (no conditioning context).
|
||||
|
||||
Supported for HF-compatible loaders (Transformers, ExLlamav3_HF, etc.)
|
||||
via a single forward pass, and for llama.cpp via the server's
|
||||
prompt_logprobs parameter. Returns [] for unsupported loaders.
|
||||
"""
|
||||
if input_ids is None:
|
||||
input_ids = encode(prompt) # (1, seq_len) tensor or array
|
||||
|
||||
token_ids = input_ids[0]
|
||||
n_tokens = len(token_ids)
|
||||
|
||||
if n_tokens == 0:
|
||||
return []
|
||||
|
||||
loader = shared.args.loader
|
||||
model = shared.model
|
||||
|
||||
if loader == 'llama.cpp':
|
||||
return model.get_prompt_logprob_entries(token_ids, max(logprobs_count, 1), prompt=prompt)
|
||||
|
||||
first_token_str = _first_token_display_str(token_ids[0], prompt, shared.tokenizer)
|
||||
|
||||
if n_tokens <= 1:
|
||||
return [{"token": first_token_str, "null_logprob": True}]
|
||||
|
||||
import torch
|
||||
from modules.torch_utils import clear_torch_cache
|
||||
|
||||
if hasattr(model, 'get_prompt_logits'):
|
||||
logits = model.get_prompt_logits(input_ids)
|
||||
|
||||
elif hasattr(model, 'forward'):
|
||||
# HF-compatible loaders (Transformers, ExLlamav3_HF, etc.)
|
||||
input_ids_tensor = input_ids if isinstance(input_ids, torch.Tensor) else torch.tensor(input_ids, dtype=torch.long)
|
||||
if hasattr(model, 'device'):
|
||||
input_ids_tensor = input_ids_tensor.to(model.device)
|
||||
with torch.no_grad():
|
||||
# Pass labels to ensure logits are returned for ALL positions,
|
||||
# not just the last token (some HF wrappers like ExLlamav3_HF
|
||||
# only compute the last-token logits when labels are absent).
|
||||
outputs = model(input_ids=input_ids_tensor, labels=input_ids_tensor)
|
||||
logits = outputs.logits # keep on GPU, (1, seq_len, vocab) in model dtype
|
||||
del outputs
|
||||
|
||||
else:
|
||||
return []
|
||||
|
||||
entries = [{"token": first_token_str, "null_logprob": True}]
|
||||
|
||||
logprobs_count = max(logprobs_count, 1)
|
||||
k = min(logprobs_count, logits.shape[-1])
|
||||
chunk_size = 2048
|
||||
unique_ids = set(int(tid) for tid in token_ids[1:])
|
||||
|
||||
# Process logits in chunks on GPU, only move top-K results to CPU
|
||||
all_top_log_probs_list = []
|
||||
all_top_indices_list = []
|
||||
all_actual_lps = []
|
||||
|
||||
for start in range(0, n_tokens - 1, chunk_size):
|
||||
end = min(start + chunk_size, n_tokens - 1)
|
||||
chunk_logits = logits[0, start:end].float() # (chunk, vocab) on GPU
|
||||
chunk_lse = torch.logsumexp(chunk_logits, dim=-1)
|
||||
chunk_top_values, chunk_top_indices = torch.topk(chunk_logits, k=k, dim=-1)
|
||||
chunk_top_log_probs = chunk_top_values - chunk_lse.unsqueeze(-1)
|
||||
|
||||
# Compute logprob for actual next tokens in this chunk
|
||||
chunk_top_sets = [set(chunk_top_indices[j].tolist()) for j in range(end - start)]
|
||||
for j in range(end - start):
|
||||
actual_tid = int(token_ids[start + j + 1])
|
||||
if actual_tid not in chunk_top_sets[j]:
|
||||
all_actual_lps.append((chunk_logits[j, actual_tid] - chunk_lse[j]).item())
|
||||
else:
|
||||
all_actual_lps.append(None) # will use top_log_probs
|
||||
|
||||
all_top_log_probs_list.append(chunk_top_log_probs.cpu())
|
||||
all_top_indices_list.append(chunk_top_indices.cpu())
|
||||
unique_ids.update(int(tid) for tid in chunk_top_indices.flatten().tolist())
|
||||
del chunk_logits, chunk_lse, chunk_top_values
|
||||
|
||||
del logits
|
||||
clear_torch_cache()
|
||||
|
||||
all_top_log_probs = torch.cat(all_top_log_probs_list, dim=0)
|
||||
all_top_indices = torch.cat(all_top_indices_list, dim=0)
|
||||
|
||||
unique_ids_list = sorted(unique_ids)
|
||||
decoded_list = shared.tokenizer.batch_decode([[tid] for tid in unique_ids_list]) if hasattr(shared.tokenizer, 'batch_decode') else [shared.tokenizer.decode(torch.tensor([tid])) for tid in unique_ids_list]
|
||||
decoded_strs = dict(zip(unique_ids_list, decoded_list))
|
||||
|
||||
for i in range(1, n_tokens):
|
||||
token_id = int(token_ids[i])
|
||||
idx = i - 1
|
||||
top_log_probs = all_top_log_probs[idx]
|
||||
top_ids = all_top_indices[idx].tolist()
|
||||
actual_token_str = decoded_strs[token_id]
|
||||
|
||||
if token_id in top_ids:
|
||||
actual_lp = top_log_probs[top_ids.index(token_id)].item()
|
||||
alternatives = [
|
||||
{"token": decoded_strs[top_ids[j]], "token_id": top_ids[j], "logprob": top_log_probs[j].item()}
|
||||
for j in range(k) if top_ids[j] != token_id
|
||||
]
|
||||
else:
|
||||
actual_lp = all_actual_lps[idx]
|
||||
alternatives = [
|
||||
{"token": decoded_strs[top_ids[j]], "token_id": top_ids[j], "logprob": top_log_probs[j].item()}
|
||||
for j in range(k - 1)
|
||||
]
|
||||
|
||||
entry = {"top_logprobs": [{"token": actual_token_str, "token_id": token_id, "logprob": actual_lp}] + alternatives}
|
||||
entries.append(entry)
|
||||
|
||||
return entries
|
||||
|
||||
|
||||
def _get_raw_logprob_entries(offset=0):
|
||||
"""Get raw logprob entries from llama.cpp/ExLlamav3 backend, starting from offset.
|
||||
|
||||
|
|
@ -65,6 +205,21 @@ def _parse_entry_top(entry):
|
|||
return entry.get('top_logprobs', entry.get('top_probs', []))
|
||||
|
||||
|
||||
def _extract_sampled_token(entry, top):
|
||||
"""Get the actually sampled token and its logprob from a logprob entry.
|
||||
|
||||
Uses the entry-level token/logprob when available (the actually sampled
|
||||
token), falling back to top[0] (highest-probability alternative) which
|
||||
may differ with non-greedy sampling.
|
||||
"""
|
||||
if 'token' in entry:
|
||||
return entry['token'], entry.get('logprob', entry.get('prob', 0))
|
||||
|
||||
token_str = top[0].get('token', '')
|
||||
token_logprob = top[0].get('logprob', top[0].get('prob', 0))
|
||||
return token_str, token_logprob
|
||||
|
||||
|
||||
def format_chat_logprobs(entries):
|
||||
"""Format logprob entries into OpenAI chat completions logprobs format.
|
||||
|
||||
|
|
@ -79,9 +234,7 @@ def format_chat_logprobs(entries):
|
|||
if not top:
|
||||
continue
|
||||
|
||||
chosen = top[0]
|
||||
token_str = chosen.get('token', '')
|
||||
token_logprob = chosen.get('logprob', chosen.get('prob', 0))
|
||||
token_str, token_logprob = _extract_sampled_token(entry, top)
|
||||
|
||||
top_list = []
|
||||
for item in top:
|
||||
|
|
@ -106,7 +259,7 @@ def format_chat_logprobs(entries):
|
|||
def format_completion_logprobs(entries):
|
||||
"""Format logprob entries into OpenAI completions logprobs format.
|
||||
|
||||
Output: {"tokens", "token_logprobs", "top_logprobs": [{token: prob}], "text_offset"}
|
||||
Output: {"tokens", "token_logprobs", "top_logprobs": [{token: prob}], "top_logprobs_ids": [{token_id: prob}], "text_offset"}
|
||||
"""
|
||||
if not entries:
|
||||
return None
|
||||
|
|
@ -114,17 +267,27 @@ def format_completion_logprobs(entries):
|
|||
tokens = []
|
||||
token_logprobs = []
|
||||
top_logprobs = []
|
||||
top_logprobs_ids = []
|
||||
text_offset = []
|
||||
offset = 0
|
||||
|
||||
for entry in entries:
|
||||
# Handle null logprob entries (first prompt token with echo)
|
||||
if entry.get("null_logprob"):
|
||||
token_str = entry.get("token", "")
|
||||
tokens.append(token_str)
|
||||
token_logprobs.append(None)
|
||||
top_logprobs.append(None)
|
||||
top_logprobs_ids.append(None)
|
||||
text_offset.append(offset)
|
||||
offset += len(token_str)
|
||||
continue
|
||||
|
||||
top = _parse_entry_top(entry)
|
||||
if not top:
|
||||
continue
|
||||
|
||||
chosen = top[0]
|
||||
token_str = chosen.get('token', '')
|
||||
token_logprob = chosen.get('logprob', chosen.get('prob', 0))
|
||||
token_str, token_logprob = _extract_sampled_token(entry, top)
|
||||
|
||||
tokens.append(token_str)
|
||||
token_logprobs.append(token_logprob)
|
||||
|
|
@ -132,21 +295,29 @@ def format_completion_logprobs(entries):
|
|||
offset += len(token_str)
|
||||
|
||||
top_dict = {}
|
||||
top_dict_ids = {}
|
||||
for item in top:
|
||||
t = item.get('token', '')
|
||||
lp = item.get('logprob', item.get('prob', 0))
|
||||
top_dict[t] = lp
|
||||
tid = item.get('token_id', item.get('id'))
|
||||
if tid is not None:
|
||||
top_dict_ids[tid] = lp
|
||||
top_logprobs.append(top_dict)
|
||||
top_logprobs_ids.append(top_dict_ids if top_dict_ids else None)
|
||||
|
||||
if not tokens:
|
||||
return None
|
||||
|
||||
return {
|
||||
result = {
|
||||
"tokens": tokens,
|
||||
"token_logprobs": token_logprobs,
|
||||
"top_logprobs": top_logprobs,
|
||||
"text_offset": text_offset
|
||||
}
|
||||
if any(x is not None for x in top_logprobs_ids):
|
||||
result["top_logprobs_ids"] = top_logprobs_ids
|
||||
return result
|
||||
|
||||
|
||||
def process_parameters(body, is_legacy=False):
|
||||
|
|
@ -407,7 +578,10 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
|
|||
})
|
||||
|
||||
max_tokens = generate_params['max_new_tokens']
|
||||
if max_tokens in [None, 0]:
|
||||
if max_tokens is not None and max_tokens <= 0:
|
||||
raise InvalidRequestError(message="max_tokens must be greater than 0.", param="max_tokens")
|
||||
|
||||
if max_tokens is None:
|
||||
generate_params['max_new_tokens'] = 512
|
||||
generate_params['auto_max_new_tokens'] = True
|
||||
|
||||
|
|
@ -652,6 +826,15 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
|
|||
# common params
|
||||
generate_params = process_parameters(body, is_legacy=is_legacy)
|
||||
max_tokens = generate_params['max_new_tokens']
|
||||
if max_tokens is None:
|
||||
generate_params['max_new_tokens'] = 512
|
||||
generate_params['auto_max_new_tokens'] = True
|
||||
max_tokens = 512
|
||||
elif max_tokens < 0:
|
||||
raise InvalidRequestError(message="max_tokens must be greater than or equal to 0.", param="max_tokens")
|
||||
elif max_tokens == 0 and body.get('logprobs') is None:
|
||||
raise InvalidRequestError(message="max_tokens is 0 but no logprobs parameter was specified.", param="max_tokens")
|
||||
|
||||
generate_params['stream'] = stream
|
||||
if stop_event is not None:
|
||||
generate_params['stop_event'] = stop_event
|
||||
|
|
@ -700,9 +883,17 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
|
|||
prompt = decode(prompt)[0]
|
||||
|
||||
prefix = prompt if echo else ''
|
||||
token_count = len(encode(prompt)[0])
|
||||
prompt_input_ids = encode(prompt)
|
||||
token_count = len(prompt_input_ids[0])
|
||||
total_prompt_token_count += token_count
|
||||
|
||||
# Compute prompt logprobs once per prompt (shared across n_completions)
|
||||
logprobs_val = body.get('logprobs', None)
|
||||
if echo and logprobs_val is not None and logprobs_val >= 0:
|
||||
prompt_entries = _compute_prompt_logprob_entries(prompt, logprobs_val, input_ids=prompt_input_ids)
|
||||
else:
|
||||
prompt_entries = None
|
||||
|
||||
original_seed = generate_params.get('seed', -1)
|
||||
for _n in range(n_completions):
|
||||
# Increment seed for each completion to ensure diversity (matches llama.cpp native behavior)
|
||||
|
|
@ -713,29 +904,41 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
|
|||
logprob_proc.token_alternatives_history.clear()
|
||||
|
||||
# generate reply #######################################
|
||||
debug_msg({'prompt': prompt, 'generate_params': generate_params})
|
||||
generator = generate_reply(prompt, generate_params, is_chat=False)
|
||||
answer = ''
|
||||
|
||||
for a in generator:
|
||||
answer = a
|
||||
|
||||
completion_token_count = len(encode(answer)[0])
|
||||
total_completion_token_count += completion_token_count
|
||||
stop_reason = "stop"
|
||||
if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= max_tokens:
|
||||
stop_reason = "length"
|
||||
|
||||
if logprob_proc:
|
||||
all_entries = []
|
||||
for alt in logprob_proc.token_alternatives_history:
|
||||
all_entries.extend(_dict_to_logprob_entries(alt))
|
||||
completion_logprobs = format_completion_logprobs(all_entries)
|
||||
elif shared.args.loader in ('llama.cpp', 'ExLlamav3'):
|
||||
raw = getattr(shared.model, 'last_completion_probabilities', None)
|
||||
completion_logprobs = format_completion_logprobs(raw)
|
||||
if max_tokens == 0:
|
||||
answer = ''
|
||||
completion_token_count = 0
|
||||
stop_reason = "stop"
|
||||
else:
|
||||
completion_logprobs = None
|
||||
debug_msg({'prompt': prompt, 'generate_params': generate_params})
|
||||
generator = generate_reply(prompt, generate_params, is_chat=False)
|
||||
answer = ''
|
||||
|
||||
for a in generator:
|
||||
answer = a
|
||||
|
||||
completion_token_count = len(encode(answer)[0])
|
||||
stop_reason = "stop"
|
||||
if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= max_tokens:
|
||||
stop_reason = "length"
|
||||
|
||||
total_completion_token_count += completion_token_count
|
||||
|
||||
if max_tokens == 0:
|
||||
all_entries = []
|
||||
else:
|
||||
if logprob_proc:
|
||||
all_entries = []
|
||||
for alt in logprob_proc.token_alternatives_history:
|
||||
all_entries.extend(_dict_to_logprob_entries(alt))
|
||||
elif shared.args.loader in ('llama.cpp', 'ExLlamav3'):
|
||||
all_entries = getattr(shared.model, 'last_completion_probabilities', None) or []
|
||||
else:
|
||||
all_entries = []
|
||||
|
||||
if prompt_entries:
|
||||
all_entries = prompt_entries + all_entries
|
||||
|
||||
completion_logprobs = format_completion_logprobs(all_entries) if all_entries else None
|
||||
|
||||
respi = {
|
||||
"index": choice_index,
|
||||
|
|
@ -775,7 +978,8 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
|
|||
raise InvalidRequestError(message="API Batched generation not yet supported.", param=prompt_str)
|
||||
|
||||
prefix = prompt if echo else ''
|
||||
token_count = len(encode(prompt)[0])
|
||||
prompt_input_ids = encode(prompt)
|
||||
token_count = len(prompt_input_ids[0])
|
||||
|
||||
# Check if usage should be included in streaming chunks per OpenAI spec
|
||||
stream_options = body.get('stream_options')
|
||||
|
|
@ -808,37 +1012,57 @@ def completions_common(body: dict, is_legacy: bool = False, stream=False, stop_e
|
|||
|
||||
return chunk
|
||||
|
||||
logprobs_val = body.get('logprobs', None)
|
||||
if echo and logprobs_val is not None and logprobs_val >= 0:
|
||||
prompt_entries = _compute_prompt_logprob_entries(prompt, logprobs_val, input_ids=prompt_input_ids)
|
||||
prompt_logprobs_formatted = format_completion_logprobs(prompt_entries) if prompt_entries else None
|
||||
else:
|
||||
prompt_logprobs_formatted = None
|
||||
|
||||
# Clear stale logprobs from any previous request before building the
|
||||
# first chunk, so text_streaming_chunk doesn't pick up old data.
|
||||
if hasattr(shared.model, 'last_completion_probabilities'):
|
||||
shared.model.last_completion_probabilities = []
|
||||
cmpl_logprobs_offset[0] = 0
|
||||
|
||||
chunk = text_streaming_chunk(prefix)
|
||||
if prompt_logprobs_formatted is not None:
|
||||
chunk[resp_list][0]["logprobs"] = prompt_logprobs_formatted
|
||||
if include_usage:
|
||||
chunk['usage'] = None
|
||||
yield chunk
|
||||
|
||||
# generate reply #######################################
|
||||
debug_msg({'prompt': prompt, 'generate_params': generate_params})
|
||||
generator = generate_reply(prompt, generate_params, is_chat=False)
|
||||
answer = ''
|
||||
seen_content = ''
|
||||
completion_token_count = 0
|
||||
if max_tokens == 0:
|
||||
answer = ''
|
||||
completion_token_count = 0
|
||||
stop_reason = "stop"
|
||||
else:
|
||||
debug_msg({'prompt': prompt, 'generate_params': generate_params})
|
||||
generator = generate_reply(prompt, generate_params, is_chat=False)
|
||||
answer = ''
|
||||
seen_content = ''
|
||||
completion_token_count = 0
|
||||
|
||||
for a in generator:
|
||||
answer = a
|
||||
for a in generator:
|
||||
answer = a
|
||||
|
||||
len_seen = len(seen_content)
|
||||
new_content = answer[len_seen:]
|
||||
len_seen = len(seen_content)
|
||||
new_content = answer[len_seen:]
|
||||
|
||||
if not new_content or chr(0xfffd) in new_content: # partial unicode character, don't send it yet.
|
||||
continue
|
||||
if not new_content or chr(0xfffd) in new_content: # partial unicode character, don't send it yet.
|
||||
continue
|
||||
|
||||
seen_content = answer
|
||||
chunk = text_streaming_chunk(new_content)
|
||||
if include_usage:
|
||||
chunk['usage'] = None
|
||||
yield chunk
|
||||
seen_content = answer
|
||||
chunk = text_streaming_chunk(new_content)
|
||||
if include_usage:
|
||||
chunk['usage'] = None
|
||||
yield chunk
|
||||
|
||||
completion_token_count = len(encode(answer)[0])
|
||||
stop_reason = "stop"
|
||||
if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= max_tokens:
|
||||
stop_reason = "length"
|
||||
completion_token_count = len(encode(answer)[0])
|
||||
stop_reason = "stop"
|
||||
if token_count + completion_token_count >= generate_params['truncation_length'] or completion_token_count >= max_tokens:
|
||||
stop_reason = "length"
|
||||
|
||||
chunk = text_streaming_chunk(suffix)
|
||||
chunk[resp_list][0]["finish_reason"] = stop_reason
|
||||
|
|
|
|||
|
|
@ -68,7 +68,7 @@ def _load_model(data):
|
|||
if k in shared.settings:
|
||||
shared.settings[k] = settings[k]
|
||||
if k == 'truncation_length':
|
||||
logger.info(f"TRUNCATION LENGTH (UPDATED): {shared.settings['truncation_length']}")
|
||||
logger.info(f"CONTEXT LENGTH (UPDATED): {shared.settings['truncation_length']}")
|
||||
elif k == 'instruction_template':
|
||||
logger.info(f"INSTRUCTION TEMPLATE (UPDATED): {shared.settings['instruction_template']}")
|
||||
|
||||
|
|
|
|||
|
|
@ -671,7 +671,10 @@ def get_stopping_strings(state):
|
|||
# Handle GPT-OSS as a special case
|
||||
if '<|channel|>final<|message|>' in state['instruction_template_str'] and "<|end|>" in result:
|
||||
result.remove("<|end|>")
|
||||
result.append("<|result|>")
|
||||
if '<|result|>' in state['instruction_template_str']:
|
||||
result.append("<|result|>")
|
||||
elif '<|return|>' in state['instruction_template_str']:
|
||||
result.append("<|return|>")
|
||||
result = list(set(result))
|
||||
|
||||
if shared.args.verbose:
|
||||
|
|
|
|||
|
|
@ -423,6 +423,15 @@ class Exllamav3Model:
|
|||
if logit_bias:
|
||||
filters.append(LogitBiasFilter(self.tokenizer, logit_bias))
|
||||
|
||||
# Suppress EOS tokens via logit bias so they are never sampled
|
||||
if state['ban_eos_token']:
|
||||
eos_bias = {}
|
||||
for eos_id in self.config.eos_token_id_list:
|
||||
if eos_id is not None:
|
||||
eos_bias[str(eos_id)] = float('-inf')
|
||||
if eos_bias:
|
||||
filters.append(LogitBiasFilter(self.tokenizer, eos_bias))
|
||||
|
||||
# Logprobs support (OpenAI API)
|
||||
logprobs = state.get('logprobs', 0) or 0
|
||||
return_top_tokens = logprobs if logprobs > 0 else 0
|
||||
|
|
@ -480,15 +489,35 @@ class Exllamav3Model:
|
|||
return
|
||||
|
||||
id_to_piece = self.tokenizer.get_id_to_piece_list(True)
|
||||
sampled_ids = result.get("token_ids") # (batch, seq_len) - actually sampled tokens
|
||||
sampled_probs = result.get("token_probs") # (batch, seq_len) - their probabilities
|
||||
|
||||
def _piece(tid):
|
||||
s = id_to_piece[tid] if tid < len(id_to_piece) else f"<{tid}>"
|
||||
return s.replace('\u2581', ' ')
|
||||
|
||||
def _logprob(prob):
|
||||
return math.log(prob) if prob > 0 else float("-inf")
|
||||
|
||||
# top_k_tokens shape: (batch, seq_len, k), top_k_probs same
|
||||
for seq_idx in range(top_k_tokens.shape[1]):
|
||||
entry = {"top_logprobs": []}
|
||||
for k_idx in range(top_k_tokens.shape[2]):
|
||||
token_id = top_k_tokens[0, seq_idx, k_idx].item()
|
||||
prob = top_k_probs[0, seq_idx, k_idx].item()
|
||||
token_str = id_to_piece[token_id] if token_id < len(id_to_piece) else f"<{token_id}>"
|
||||
logprob = math.log(prob) if prob > 0 else float("-inf")
|
||||
entry["top_logprobs"].append({"token": token_str, "logprob": logprob})
|
||||
entry["top_logprobs"].append({"token": _piece(token_id), "logprob": _logprob(prob)})
|
||||
|
||||
# Record the actually sampled token at the entry level so
|
||||
# format_completion_logprobs uses it instead of top_logprobs[0]
|
||||
# (they differ with non-greedy sampling).
|
||||
if sampled_ids is not None:
|
||||
sid = sampled_ids[0, seq_idx].item()
|
||||
entry["token"] = _piece(sid)
|
||||
if sampled_probs is not None:
|
||||
entry["logprob"] = _logprob(sampled_probs[0, seq_idx].item())
|
||||
else:
|
||||
entry["logprob"] = None
|
||||
|
||||
self.last_completion_probabilities.append(entry)
|
||||
|
||||
def generate(self, prompt, state):
|
||||
|
|
@ -498,42 +527,31 @@ class Exllamav3Model:
|
|||
|
||||
return output
|
||||
|
||||
def get_prompt_logits(self, input_ids):
|
||||
"""Return logits for all positions via a single no-cache forward pass.
|
||||
|
||||
Used by prompt logprobs computation. Returns (1, seq_len, vocab) on CPU in float32.
|
||||
"""
|
||||
import torch
|
||||
input_ids_tensor = input_ids if isinstance(input_ids, torch.Tensor) else torch.tensor(input_ids, dtype=torch.long)
|
||||
input_ids_tensor = input_ids_tensor.view(1, -1).cpu()
|
||||
with torch.no_grad():
|
||||
return self.model.forward(
|
||||
input_ids=input_ids_tensor,
|
||||
params={"attn_mode": "flash_attn_nc"}
|
||||
).cpu().float()
|
||||
|
||||
def get_logits(self, token_ids, **kwargs):
|
||||
"""
|
||||
Process a batch of token_ids and return the logits for the last token.
|
||||
This will reset and overwrite the model's cache.
|
||||
Uses flash_attn_nc (no cache) for correct results with recurrent models.
|
||||
"""
|
||||
# Initialize a single params dictionary that will be updated in-place
|
||||
params = {
|
||||
"cache": self.cache,
|
||||
"reconstruct": False,
|
||||
"attn_mode": "flash_attn",
|
||||
"batch_shape": (1, self.max_tokens),
|
||||
"past_len": 0
|
||||
}
|
||||
params.update(kwargs)
|
||||
|
||||
# Process prefix tokens to fill the cache and generate recurrent state
|
||||
if token_ids.shape[-1] > 1:
|
||||
prefix_ids = token_ids[:, :-1]
|
||||
|
||||
# This forward call updates the 'params' dict with the recurrent state
|
||||
self.model.forward(
|
||||
input_ids=prefix_ids,
|
||||
params=params
|
||||
)
|
||||
|
||||
# Update past_len for the next call
|
||||
params["past_len"] = prefix_ids.shape[-1]
|
||||
|
||||
# Process the last token, now using the state-filled 'params' dict
|
||||
last_token_ids = token_ids[:, -1:]
|
||||
logits = self.model.forward(
|
||||
input_ids=last_token_ids,
|
||||
params=params
|
||||
input_ids=token_ids,
|
||||
params={"attn_mode": "flash_attn_nc"}
|
||||
)
|
||||
|
||||
return logits.float().cpu()
|
||||
return logits[:, -1:, :].float().cpu()
|
||||
|
||||
def encode(self, string, **kwargs):
|
||||
add_bos = kwargs.pop('add_bos', True)
|
||||
|
|
|
|||
|
|
@ -26,6 +26,9 @@ except Exception:
|
|||
class Exllamav3HF(PreTrainedModel, GenerationMixin):
|
||||
def __init__(self, model_dir):
|
||||
hf_config = PretrainedConfig.from_pretrained(model_dir)
|
||||
# Ensure text_config is a proper object, not a dict (fixes qwen3_5_moe + transformers compat)
|
||||
if isinstance(getattr(hf_config, 'text_config', None), dict):
|
||||
hf_config.text_config = PretrainedConfig(**hf_config.text_config)
|
||||
super().__init__(hf_config)
|
||||
|
||||
exl3_config = Config.from_directory(model_dir)
|
||||
|
|
@ -199,30 +202,11 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
|
|||
}
|
||||
).to(input_ids.device).float()
|
||||
else:
|
||||
# Labels path: use cache for cross-chunk attention.
|
||||
tokens_to_process = seq_tensor
|
||||
all_logits = None
|
||||
current_len = 0
|
||||
|
||||
for i in range(0, tokens_to_process.shape[0], max_chunk_size):
|
||||
chunk = tokens_to_process[i:i + max_chunk_size]
|
||||
chunk_logits = self.ex_model.forward(
|
||||
input_ids=chunk.view(1, -1),
|
||||
params={
|
||||
"attn_mode": "flash_attn",
|
||||
"cache": ex_cache,
|
||||
"past_len": current_len,
|
||||
"batch_shape": (1, self.max_tokens),
|
||||
}
|
||||
).float()
|
||||
current_len += chunk.shape[0]
|
||||
|
||||
if all_logits is None:
|
||||
all_logits = chunk_logits
|
||||
else:
|
||||
all_logits = torch.cat([all_logits, chunk_logits], dim=1)
|
||||
|
||||
logits = all_logits
|
||||
# Labels path: single pass without cache for correct logits
|
||||
logits = self.ex_model.forward(
|
||||
input_ids=seq_tensor.view(1, -1),
|
||||
params={"attn_mode": "flash_attn_nc"}
|
||||
).float().cpu()
|
||||
|
||||
if is_negative:
|
||||
self.past_seq_negative = seq_tensor
|
||||
|
|
|
|||
|
|
@ -191,21 +191,19 @@ def _apply_custom_generate_reply():
|
|||
|
||||
|
||||
def _apply_custom_css():
|
||||
all_css = ''
|
||||
for extension, _ in iterator():
|
||||
if hasattr(extension, 'custom_css'):
|
||||
all_css += getattr(extension, 'custom_css')()
|
||||
|
||||
return all_css
|
||||
return ''.join(
|
||||
getattr(extension, 'custom_css')()
|
||||
for extension, _ in iterator()
|
||||
if hasattr(extension, 'custom_css')
|
||||
)
|
||||
|
||||
|
||||
def _apply_custom_js():
|
||||
all_js = ''
|
||||
for extension, _ in iterator():
|
||||
if hasattr(extension, 'custom_js'):
|
||||
all_js += getattr(extension, 'custom_js')()
|
||||
|
||||
return all_js
|
||||
return ''.join(
|
||||
getattr(extension, 'custom_js')()
|
||||
for extension, _ in iterator()
|
||||
if hasattr(extension, 'custom_js')
|
||||
)
|
||||
|
||||
|
||||
def create_extensions_block():
|
||||
|
|
|
|||
|
|
@ -11,7 +11,6 @@ import time
|
|||
from pathlib import Path
|
||||
from typing import Any, List
|
||||
|
||||
import llama_cpp_binaries
|
||||
import requests
|
||||
|
||||
from modules import shared
|
||||
|
|
@ -311,8 +310,45 @@ class LlamaServer:
|
|||
else:
|
||||
raise Exception(f"Unexpected response format: 'completion_probabilities' not found in {result}")
|
||||
|
||||
def get_prompt_logprob_entries(self, token_ids, n_probs=5, prompt=""):
|
||||
"""Get logprob entries for prompt tokens via a single n_predict=0 request.
|
||||
|
||||
Requires llama.cpp server with prompt_logprobs support.
|
||||
Returns entries in the standard format for format_completion_logprobs().
|
||||
"""
|
||||
token_ids_list = token_ids.tolist() if hasattr(token_ids, 'tolist') else list(token_ids)
|
||||
|
||||
url = f"http://127.0.0.1:{self.port}/completion"
|
||||
payload = {
|
||||
"prompt": token_ids_list,
|
||||
"n_predict": 0,
|
||||
"n_probs": n_probs,
|
||||
"prompt_logprobs": True,
|
||||
"stream": False,
|
||||
"cache_prompt": False,
|
||||
}
|
||||
|
||||
response = self.session.post(url, json=payload)
|
||||
result = response.json()
|
||||
|
||||
prompt_probs = result.get("prompt_probabilities", [])
|
||||
if not prompt_probs:
|
||||
return []
|
||||
|
||||
# Null first token (no conditioning context); use empty string for BOS
|
||||
# or tokens that don't appear at the start of the prompt text.
|
||||
first_token_str = self.decode([token_ids_list[0]])
|
||||
if self.bos_token and first_token_str == self.bos_token:
|
||||
first_token_str = ""
|
||||
elif not prompt.startswith(first_token_str):
|
||||
first_token_str = ""
|
||||
|
||||
entries = [{"token": first_token_str, "null_logprob": True}]
|
||||
entries.extend(prompt_probs)
|
||||
return entries
|
||||
|
||||
def _get_vocabulary_size(self):
|
||||
"""Get and store the model's maximum context length."""
|
||||
"""Get and store the model's vocabulary size."""
|
||||
url = f"http://127.0.0.1:{self.port}/v1/models"
|
||||
response = self.session.get(url).json()
|
||||
|
||||
|
|
@ -357,7 +393,16 @@ class LlamaServer:
|
|||
"""Start the llama.cpp server and wait until it's ready."""
|
||||
# Determine the server path
|
||||
if self.server_path is None:
|
||||
self.server_path = llama_cpp_binaries.get_binary_path()
|
||||
if shared.args.ik:
|
||||
try:
|
||||
import ik_llama_cpp_binaries
|
||||
except ImportError:
|
||||
raise ImportError("--ik requires the ik_llama_cpp_binaries package. Install it with: pip install <ik_llama_cpp_binaries wheel URL>")
|
||||
|
||||
self.server_path = ik_llama_cpp_binaries.get_binary_path()
|
||||
else:
|
||||
import llama_cpp_binaries
|
||||
self.server_path = llama_cpp_binaries.get_binary_path()
|
||||
|
||||
# Build the command
|
||||
cmd = [
|
||||
|
|
@ -470,6 +515,10 @@ class LlamaServer:
|
|||
else:
|
||||
cmd.append(f"--{flag_item}")
|
||||
|
||||
# Patch flags for ik_llama.cpp compatibility
|
||||
if shared.args.ik:
|
||||
cmd = _patch_cmd_for_ik(cmd)
|
||||
|
||||
env = os.environ.copy()
|
||||
if os.name == 'posix':
|
||||
current_path = env.get('LD_LIBRARY_PATH', '')
|
||||
|
|
@ -607,3 +656,49 @@ def filter_stderr_with_progress(process_stderr):
|
|||
process_stderr.close()
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
|
||||
def _patch_cmd_for_ik(cmd):
|
||||
"""
|
||||
Rewrite upstream llama.cpp flags to ik_llama.cpp equivalents:
|
||||
--no-webui → --webui none
|
||||
--fit off → (removed)
|
||||
--fit on / --fit-ctx → --fit (bare flag)
|
||||
--fit-target → --fit-margin
|
||||
--cache-reuse → (removed, unsupported)
|
||||
--swa-full → (removed, unsupported)
|
||||
"""
|
||||
# Add Hadamard KV cache rotation when using quantized cache types.
|
||||
# This significantly improves quantized cache quality (especially q4_0)
|
||||
# and is a no-op for MLA models like DeepSeek.
|
||||
if shared.args.cache_type in ("q8_0", "q4_0"):
|
||||
cmd += ["-khad", "-vhad"]
|
||||
|
||||
patched = []
|
||||
i = 0
|
||||
while i < len(cmd):
|
||||
arg = cmd[i]
|
||||
|
||||
if arg == "--no-webui":
|
||||
patched += ["--webui", "none"]
|
||||
elif arg == "--fit" and i + 1 < len(cmd) and cmd[i + 1] in ("on", "off"):
|
||||
val = cmd[i + 1]
|
||||
i += 1
|
||||
if val == "on":
|
||||
patched.append("--fit")
|
||||
# "off" → drop entirely
|
||||
elif arg == "--fit-ctx":
|
||||
patched.append("--fit")
|
||||
i += 1 # skip the value
|
||||
elif arg == "--fit-target":
|
||||
patched.append("--fit-margin")
|
||||
elif arg == "--cache-reuse":
|
||||
i += 1 # skip the value
|
||||
elif arg == "--swa-full":
|
||||
pass # bare flag, just drop it
|
||||
else:
|
||||
patched.append(arg)
|
||||
|
||||
i += 1
|
||||
|
||||
return patched
|
||||
|
|
|
|||
|
|
@ -20,6 +20,7 @@ loaders_and_params = OrderedDict({
|
|||
'no_mmap',
|
||||
'mlock',
|
||||
'numa',
|
||||
'ik',
|
||||
'parallel',
|
||||
'model_draft',
|
||||
'draft_max',
|
||||
|
|
@ -345,6 +346,7 @@ def list_model_elements():
|
|||
'spec_ngram_size_m',
|
||||
'spec_ngram_min_hits',
|
||||
'mmproj',
|
||||
'ik',
|
||||
]
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -4,7 +4,6 @@ import numpy as np
|
|||
|
||||
from modules import models, shared
|
||||
from modules.logging_colors import logger
|
||||
from modules.models import load_model
|
||||
from modules.text_generation import generate_reply
|
||||
from modules.utils import check_model_loaded
|
||||
|
||||
|
|
@ -12,8 +11,7 @@ global_scores = None
|
|||
|
||||
|
||||
def get_next_logits(*args, **kwargs):
|
||||
if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:
|
||||
shared.model, shared.tokenizer = load_model(shared.model_name)
|
||||
models.load_model_if_idle_unloaded()
|
||||
|
||||
needs_lock = not args[2] # use_samplers
|
||||
if needs_lock:
|
||||
|
|
|
|||
|
|
@ -1,4 +1,5 @@
|
|||
import sys
|
||||
import threading
|
||||
import time
|
||||
|
||||
import modules.shared as shared
|
||||
|
|
@ -7,6 +8,15 @@ from modules.models_settings import get_model_metadata
|
|||
from modules.utils import resolve_model_path
|
||||
|
||||
last_generation_time = time.time()
|
||||
active_generation_count = 0
|
||||
_generation_count_lock = threading.Lock()
|
||||
|
||||
|
||||
def load_model_if_idle_unloaded():
|
||||
global last_generation_time
|
||||
if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:
|
||||
shared.model, shared.tokenizer = load_model(shared.model_name)
|
||||
last_generation_time = time.time()
|
||||
|
||||
|
||||
def load_model(model_name, loader=None):
|
||||
|
|
@ -66,8 +76,7 @@ def load_model(model_name, loader=None):
|
|||
|
||||
logger.info(f"Loaded \"{model_name}\" in {(time.time()-t0):.2f} seconds.")
|
||||
logger.info(f"LOADER: \"{loader}\"")
|
||||
logger.info(f"TRUNCATION LENGTH: {shared.settings['truncation_length']}")
|
||||
logger.info(f"INSTRUCTION TEMPLATE: \"{metadata['instruction_template']}\"")
|
||||
logger.info(f"CONTEXT LENGTH: {shared.settings['truncation_length']}")
|
||||
return model, tokenizer
|
||||
|
||||
|
||||
|
|
@ -159,7 +168,10 @@ def unload_model_if_idle():
|
|||
while True:
|
||||
shared.generation_lock.acquire()
|
||||
try:
|
||||
if time.time() - last_generation_time > shared.args.idle_timeout * 60:
|
||||
with _generation_count_lock:
|
||||
is_active = active_generation_count > 0
|
||||
|
||||
if not is_active and time.time() - last_generation_time > shared.args.idle_timeout * 60:
|
||||
if shared.model is not None:
|
||||
logger.info("Unloading the model for inactivity.")
|
||||
unload_model(keep_model_name=True)
|
||||
|
|
|
|||
|
|
@ -23,14 +23,9 @@ def get_fallback_settings():
|
|||
|
||||
def get_model_metadata(model):
|
||||
model_path = resolve_model_path(model)
|
||||
model_settings = {}
|
||||
|
||||
# Get settings from user_data/models/config.yaml and user_data/models/config-user.yaml
|
||||
settings = shared.model_config
|
||||
for pat in settings:
|
||||
if re.match(pat.lower(), Path(model).name.lower()):
|
||||
for k in settings[pat]:
|
||||
model_settings[k] = settings[pat][k]
|
||||
# Fallback settings
|
||||
model_settings = get_fallback_settings()
|
||||
|
||||
path = model_path / 'config.json'
|
||||
if path.exists():
|
||||
|
|
|
|||
|
|
@ -110,6 +110,7 @@ group.add_argument('--numa', action='store_true', help='Activate NUMA task alloc
|
|||
group.add_argument('--parallel', type=int, default=1, help='Number of parallel request slots. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
|
||||
group.add_argument('--fit-target', type=str, default='512', help='Target VRAM margin per device for auto GPU layers, comma-separated list of values in MiB. A single value is broadcast across all devices.')
|
||||
group.add_argument('--extra-flags', type=str, default=None, help='Extra flags to pass to llama-server. Example: "--jinja --rpc 192.168.1.100:50052"')
|
||||
group.add_argument('--ik', action='store_true', help='Use ik_llama.cpp instead of upstream llama.cpp. Requires the ik_llama_cpp_binaries package to be installed.')
|
||||
|
||||
# Transformers/Accelerate
|
||||
group = parser.add_argument_group('Transformers/Accelerate')
|
||||
|
|
@ -454,17 +455,7 @@ def load_user_config():
|
|||
|
||||
args.loader = fix_loader_name(args.loader)
|
||||
|
||||
# Load model-specific settings
|
||||
p = Path(f'{args.model_dir}/config.yaml')
|
||||
if p.exists():
|
||||
model_config = yaml.safe_load(open(p, 'r').read())
|
||||
else:
|
||||
model_config = {}
|
||||
del p
|
||||
|
||||
|
||||
# Load custom model-specific settings
|
||||
user_config = load_user_config()
|
||||
|
||||
model_config = OrderedDict(model_config)
|
||||
user_config = OrderedDict(user_config)
|
||||
|
|
|
|||
|
|
@ -17,9 +17,7 @@ from modules.utils import check_model_loaded
|
|||
|
||||
|
||||
def generate_reply(*args, **kwargs):
|
||||
if shared.args.idle_timeout > 0 and shared.model is None and shared.model_name not in [None, 'None']:
|
||||
from modules.models import load_model
|
||||
shared.model, shared.tokenizer = load_model(shared.model_name)
|
||||
models.load_model_if_idle_unloaded()
|
||||
|
||||
state = args[1] if len(args) > 1 else kwargs.get('state', {})
|
||||
use_parallel = (
|
||||
|
|
@ -31,10 +29,16 @@ def generate_reply(*args, **kwargs):
|
|||
if not use_parallel:
|
||||
shared.generation_lock.acquire()
|
||||
|
||||
with models._generation_count_lock:
|
||||
models.active_generation_count += 1
|
||||
|
||||
try:
|
||||
for result in _generate_reply(*args, **kwargs):
|
||||
yield result
|
||||
finally:
|
||||
with models._generation_count_lock:
|
||||
models.active_generation_count -= 1
|
||||
|
||||
models.last_generation_time = time.time()
|
||||
if not use_parallel:
|
||||
shared.generation_lock.release()
|
||||
|
|
@ -126,7 +130,9 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
|
|||
|
||||
def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_length=None):
|
||||
if shared.tokenizer is None:
|
||||
raise ValueError('No tokenizer is loaded')
|
||||
models.load_model_if_idle_unloaded()
|
||||
if shared.tokenizer is None:
|
||||
raise ValueError('No tokenizer is loaded')
|
||||
|
||||
# llama.cpp case
|
||||
if shared.model.__class__.__name__ == 'LlamaServer':
|
||||
|
|
@ -176,7 +182,9 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
|
|||
|
||||
def decode(output_ids, skip_special_tokens=True):
|
||||
if shared.tokenizer is None:
|
||||
raise ValueError('No tokenizer is loaded')
|
||||
models.load_model_if_idle_unloaded()
|
||||
if shared.tokenizer is None:
|
||||
raise ValueError('No tokenizer is loaded')
|
||||
|
||||
return shared.tokenizer.decode(output_ids, skip_special_tokens=skip_special_tokens)
|
||||
|
||||
|
|
|
|||
|
|
@ -109,7 +109,6 @@ def load_model_HF(model_name):
|
|||
params = {
|
||||
'low_cpu_mem_usage': True,
|
||||
'attn_implementation': shared.args.attn_implementation,
|
||||
'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
|
||||
}
|
||||
|
||||
if shared.original_args.trust_remote_code:
|
||||
|
|
@ -120,6 +119,17 @@ def load_model_HF(model_name):
|
|||
|
||||
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.original_args.trust_remote_code)
|
||||
|
||||
# Determine torch_dtype: respect --bf16 flag, otherwise autodetect
|
||||
# from model config, but never allow float32.
|
||||
if shared.args.bf16:
|
||||
params['torch_dtype'] = torch.bfloat16
|
||||
else:
|
||||
dtype = getattr(config, 'torch_dtype', None) or getattr(getattr(config, 'text_config', None), 'torch_dtype', None)
|
||||
if dtype in (torch.float16, torch.bfloat16):
|
||||
params['torch_dtype'] = dtype
|
||||
else:
|
||||
params['torch_dtype'] = torch.float16
|
||||
|
||||
if 'chatglm' in model_name.lower():
|
||||
LoaderClass = AutoModel
|
||||
else:
|
||||
|
|
|
|||
|
|
@ -82,7 +82,7 @@ def create_ui():
|
|||
gr.HTML("<div class='sidebar-vertical-separator'></div>")
|
||||
|
||||
shared.gradio['reasoning_effort'] = gr.Dropdown(value=shared.settings['reasoning_effort'], choices=['low', 'medium', 'high'], label='Reasoning effort', info='Used by GPT-OSS.')
|
||||
shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='Used by Seed-OSS and pre-2507 Qwen3.')
|
||||
shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='For models with thinking support.')
|
||||
|
||||
gr.HTML("<div class='sidebar-vertical-separator'></div>")
|
||||
|
||||
|
|
|
|||
|
|
@ -51,6 +51,9 @@ def create_ui():
|
|||
|
||||
with gr.Column():
|
||||
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
|
||||
if not shared.args.portable:
|
||||
shared.gradio['ik'] = gr.Checkbox(label="ik", value=shared.args.ik, info='Use ik_llama.cpp instead of upstream llama.cpp.')
|
||||
|
||||
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
|
||||
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.12.*
|
||||
accelerate==1.13.*
|
||||
audioop-lts<1.0; python_version >= "3.13"
|
||||
bitsandbytes==0.49.*
|
||||
datasets
|
||||
|
|
@ -25,14 +25,14 @@ sentencepiece
|
|||
tensorboard
|
||||
torchao==0.15.*
|
||||
trafilatura==2.0.0
|
||||
transformers==5.3.*
|
||||
transformers==5.5.*
|
||||
triton-windows==3.5.1.post24; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -40,9 +40,11 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.26/exllamav3-0.0.26+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.26/exllamav3-0.0.26+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
|
||||
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
|
||||
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.12.*
|
||||
accelerate==1.13.*
|
||||
audioop-lts<1.0; python_version >= "3.13"
|
||||
datasets
|
||||
diffusers==0.37.*
|
||||
|
|
@ -22,14 +22,14 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
torchao==0.15.*
|
||||
transformers==5.3.*
|
||||
transformers==5.5.*
|
||||
tqdm
|
||||
trafilatura==2.0.0
|
||||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -37,5 +37,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.12.*
|
||||
accelerate==1.13.*
|
||||
audioop-lts<1.0; python_version >= "3.13"
|
||||
datasets
|
||||
diffusers==0.37.*
|
||||
|
|
@ -22,14 +22,14 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
torchao==0.15.*
|
||||
transformers==5.3.*
|
||||
transformers==5.5.*
|
||||
tqdm
|
||||
trafilatura==2.0.0
|
||||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -37,4 +37,4 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.12.*
|
||||
accelerate==1.13.*
|
||||
audioop-lts<1.0; python_version >= "3.13"
|
||||
datasets
|
||||
diffusers==0.37.*
|
||||
|
|
@ -22,14 +22,14 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
torchao==0.15.*
|
||||
transformers==5.3.*
|
||||
transformers==5.5.*
|
||||
tqdm
|
||||
trafilatura==2.0.0
|
||||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -37,4 +37,4 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.12.*
|
||||
accelerate==1.13.*
|
||||
audioop-lts<1.0; python_version >= "3.13"
|
||||
datasets
|
||||
diffusers==0.37.*
|
||||
|
|
@ -22,14 +22,14 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
torchao==0.15.*
|
||||
transformers==5.3.*
|
||||
transformers==5.5.*
|
||||
tqdm
|
||||
trafilatura==2.0.0
|
||||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -37,5 +37,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.12.*
|
||||
accelerate==1.13.*
|
||||
audioop-lts<1.0; python_version >= "3.13"
|
||||
datasets
|
||||
diffusers==0.37.*
|
||||
|
|
@ -22,14 +22,14 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
torchao==0.15.*
|
||||
transformers==5.3.*
|
||||
transformers==5.5.*
|
||||
tqdm
|
||||
trafilatura==2.0.0
|
||||
wandb
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,4 +23,4 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,4 +23,4 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
27
requirements/portable/requirements_ik.txt
Normal file
27
requirements/portable/requirements_ik.txt
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
audioop-lts<1.0; python_version >= "3.13"
|
||||
fastapi==0.112.4
|
||||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
trafilatura==2.0.0
|
||||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
27
requirements/portable/requirements_ik_cpu_only.txt
Normal file
27
requirements/portable/requirements_ik_cpu_only.txt
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
audioop-lts<1.0; python_version >= "3.13"
|
||||
fastapi==0.112.4
|
||||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
trafilatura==2.0.0
|
||||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# ik_llama.cpp (CPU only)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
27
requirements/portable/requirements_ik_cuda131.txt
Normal file
27
requirements/portable/requirements_ik_cuda131.txt
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
audioop-lts<1.0; python_version >= "3.13"
|
||||
fastapi==0.112.4
|
||||
huggingface-hub==1.5.*
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
pymupdf==1.27.*
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
trafilatura==2.0.0
|
||||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
@ -14,8 +14,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
|
|||
|
|
@ -14,8 +14,8 @@ trafilatura==2.0.0
|
|||
tqdm
|
||||
|
||||
# Gradio
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
|
||||
https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.15
|
||||
|
|
@ -23,5 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Vulkan wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -18,7 +18,6 @@ import modules.extensions as extensions_module
|
|||
from modules.LoRA import add_lora_to_model
|
||||
from modules.models import load_model, unload_model_if_idle
|
||||
from modules.models_settings import (
|
||||
get_fallback_settings,
|
||||
get_model_metadata,
|
||||
update_model_parameters
|
||||
)
|
||||
|
|
@ -271,10 +270,6 @@ if __name__ == "__main__":
|
|||
# Apply CLI overrides for image model settings (CLI flags take precedence over saved settings)
|
||||
shared.apply_image_model_cli_overrides()
|
||||
|
||||
# Fallback settings for models
|
||||
shared.model_config['.*'] = get_fallback_settings()
|
||||
shared.model_config.move_to_end('.*', last=False) # Move to the beginning
|
||||
|
||||
# Activate the extensions listed on settings.yaml
|
||||
extensions_module.available_extensions = utils.get_available_extensions()
|
||||
for extension in shared.settings['default_extensions']:
|
||||
|
|
|
|||
|
|
@ -1,203 +0,0 @@
|
|||
.*(llama|alpac|vicuna|guanaco|koala|llava|wizardlm|metharme|pygmalion-7b|pygmalion-2|mythalion|wizard-mega|openbuddy|vigogne|h2ogpt-research|manticore):
|
||||
model_type: 'llama'
|
||||
.*(opt-|opt_|opt1|opt3|optfor|galactica|galpaca|pygmalion-350m):
|
||||
model_type: 'opt'
|
||||
.*(gpt-j|gptj|gpt4all-j|malion-6b|pygway|pygmalion-6b|dolly-v1):
|
||||
model_type: 'gptj'
|
||||
.*(gpt-neox|koalpaca-polyglot|polyglot.*koalpaca|polyglot-ko|polyglot_ko|pythia|stablelm|incite|dolly-v2|polycoder|h2ogpt-oig|h2ogpt-oasst1|h2ogpt-gm):
|
||||
model_type: 'gptneox'
|
||||
.*bloom:
|
||||
model_type: 'bloom'
|
||||
.*gpt2:
|
||||
model_type: 'gpt2'
|
||||
.*falcon:
|
||||
model_type: 'falcon'
|
||||
.*mpt:
|
||||
model_type: 'mpt'
|
||||
.*(starcoder|starchat):
|
||||
model_type: 'starcoder'
|
||||
.*dolly-v2:
|
||||
model_type: 'dollyv2'
|
||||
.*replit:
|
||||
model_type: 'replit'
|
||||
.*(oasst|openassistant-|stablelm-7b-sft-v7-epoch-3):
|
||||
instruction_template: 'Open Assistant'
|
||||
skip_special_tokens: false
|
||||
(?!.*galactica)(?!.*reward).*openassistant:
|
||||
instruction_template: 'Open Assistant'
|
||||
skip_special_tokens: false
|
||||
.*galactica:
|
||||
skip_special_tokens: false
|
||||
.*dolly-v[0-9]-[0-9]*b:
|
||||
instruction_template: 'Alpaca'
|
||||
skip_special_tokens: false
|
||||
.*alpaca-native-4bit:
|
||||
instruction_template: 'Alpaca'
|
||||
.*llava:
|
||||
instruction_template: 'LLaVA'
|
||||
.*llava.*1.5:
|
||||
instruction_template: 'Vicuna-v1.1'
|
||||
.*wizard.*mega:
|
||||
instruction_template: 'Wizard-Mega'
|
||||
.*starchat-beta:
|
||||
instruction_template: 'Starchat-Beta'
|
||||
(?!.*v0)(?!.*1.1)(?!.*1_1)(?!.*stable)(?!.*chinese).*vicuna:
|
||||
instruction_template: 'Vicuna-v0'
|
||||
.*vicuna.*v0:
|
||||
instruction_template: 'Vicuna-v0'
|
||||
.*vicuna.*(1.1|1_1|1.3|1_3):
|
||||
instruction_template: 'Vicuna-v1.1'
|
||||
.*vicuna.*(1.5|1_5):
|
||||
instruction_template: 'Vicuna-v1.1'
|
||||
.*stable.*vicuna:
|
||||
instruction_template: 'StableVicuna'
|
||||
(?!.*chat).*chinese-vicuna:
|
||||
instruction_template: 'Alpaca'
|
||||
.*chinese-vicuna.*chat:
|
||||
instruction_template: 'Chinese-Vicuna-Chat'
|
||||
.*alpaca:
|
||||
instruction_template: 'Alpaca'
|
||||
.*koala:
|
||||
instruction_template: 'Koala'
|
||||
.*chatglm:
|
||||
instruction_template: 'ChatGLM'
|
||||
.*(metharme|pygmalion|mythalion):
|
||||
instruction_template: 'Metharme'
|
||||
.*raven:
|
||||
instruction_template: 'RWKV-Raven'
|
||||
.*moss-moon.*sft:
|
||||
instruction_template: 'MOSS'
|
||||
.*stablelm-tuned:
|
||||
instruction_template: 'StableLM'
|
||||
.*galactica.*finetuned:
|
||||
instruction_template: 'Galactica Finetuned'
|
||||
.*galactica.*-v2:
|
||||
instruction_template: 'Galactica v2'
|
||||
(?!.*finetuned)(?!.*-v2).*galactica:
|
||||
instruction_template: 'Galactica'
|
||||
.*guanaco:
|
||||
instruction_template: 'Guanaco non-chat'
|
||||
.*baize:
|
||||
instruction_template: 'Baize'
|
||||
.*mpt-.*instruct:
|
||||
instruction_template: 'Alpaca'
|
||||
.*mpt-.*chat:
|
||||
instruction_template: 'ChatML'
|
||||
(?!.*-flan-)(?!.*-t5-).*lamini-:
|
||||
instruction_template: 'Alpaca'
|
||||
.*incite.*chat:
|
||||
instruction_template: 'INCITE-Chat'
|
||||
.*incite.*instruct:
|
||||
instruction_template: 'INCITE-Instruct'
|
||||
.*ziya-:
|
||||
instruction_template: 'Ziya'
|
||||
.*koalpaca:
|
||||
instruction_template: 'KoAlpaca'
|
||||
.*openbuddy:
|
||||
instruction_template: 'OpenBuddy'
|
||||
(?!.*chat).*vigogne:
|
||||
instruction_template: 'Vigogne-Instruct'
|
||||
.*vigogne.*chat:
|
||||
instruction_template: 'Vigogne-Chat'
|
||||
.*(llama-deus|supercot|llama-natural-instructions|open-llama-0.3t-7b-instruct-dolly-hhrlhf|open-llama-0.3t-7b-open-instruct):
|
||||
instruction_template: 'Alpaca'
|
||||
.*bactrian:
|
||||
instruction_template: 'Bactrian'
|
||||
.*(h2ogpt-oig-|h2ogpt-oasst1-|h2ogpt-research-oasst1-):
|
||||
instruction_template: 'INCITE-Chat'
|
||||
.*h2ogpt-gm-:
|
||||
instruction_template: 'H2O-prompt_answer'
|
||||
.*manticore:
|
||||
instruction_template: 'Manticore Chat'
|
||||
.*bluemoonrp-(30|13)b:
|
||||
instruction_template: 'Bluemoon'
|
||||
.*Nous-Hermes-13b:
|
||||
instruction_template: 'Alpaca'
|
||||
.*airoboros:
|
||||
instruction_template: 'Vicuna-v1.1'
|
||||
.*airoboros.*1.2:
|
||||
instruction_template: 'Airoboros-v1.2'
|
||||
.*alpa(cino|sta):
|
||||
instruction_template: 'Alpaca'
|
||||
.*hippogriff:
|
||||
instruction_template: 'Hippogriff'
|
||||
.*lazarus:
|
||||
instruction_template: 'Alpaca'
|
||||
.*guanaco-.*(7|13|33|65)b:
|
||||
instruction_template: 'Vicuna-v0'
|
||||
.*hypermantis:
|
||||
instruction_template: 'Alpaca'
|
||||
.*open-llama-.*-open-instruct:
|
||||
instruction_template: 'Alpaca'
|
||||
.*starcoder-gpteacher-code-instruct:
|
||||
instruction_template: 'Alpaca'
|
||||
.*tulu:
|
||||
instruction_template: 'Tulu'
|
||||
.*chronos:
|
||||
instruction_template: 'Alpaca'
|
||||
.*samantha:
|
||||
instruction_template: 'Samantha'
|
||||
.*wizardcoder:
|
||||
instruction_template: 'Alpaca'
|
||||
.*minotaur:
|
||||
instruction_template: 'Manticore Chat'
|
||||
.*orca_mini:
|
||||
instruction_template: 'Orca Mini'
|
||||
.*(platypus|gplatty|superplatty):
|
||||
instruction_template: 'Alpaca'
|
||||
.*(openorca-platypus2):
|
||||
instruction_template: 'OpenOrca-Platypus2'
|
||||
.*longchat:
|
||||
instruction_template: 'Vicuna-v1.1'
|
||||
.*vicuna-33b:
|
||||
instruction_template: 'Vicuna-v1.1'
|
||||
.*redmond-hermes-coder:
|
||||
instruction_template: 'Alpaca'
|
||||
.*wizardcoder-15b:
|
||||
instruction_template: 'Alpaca'
|
||||
.*wizardlm:
|
||||
instruction_template: 'Vicuna-v1.1'
|
||||
.*godzilla:
|
||||
instruction_template: 'Alpaca'
|
||||
.*llama(-?)(2|v2).*chat:
|
||||
instruction_template: 'Llama-v2'
|
||||
.*newhope:
|
||||
instruction_template: 'NewHope'
|
||||
.*stablebeluga2:
|
||||
instruction_template: 'StableBeluga2'
|
||||
.*openchat:
|
||||
instruction_template: 'OpenChat'
|
||||
.*codellama.*instruct:
|
||||
instruction_template: 'Llama-v2'
|
||||
.*(mistral|mixtral).*instruct:
|
||||
instruction_template: 'Mistral'
|
||||
.*mistral.*openorca:
|
||||
instruction_template: 'ChatML'
|
||||
.*(WizardCoder-Python-34B-V1.0|Phind-CodeLlama-34B-v2|CodeBooga-34B-v0.1):
|
||||
instruction_template: 'Alpaca'
|
||||
.*orca-2-(13|7)b:
|
||||
instruction_template: 'ChatML'
|
||||
.*openhermes.*mistral:
|
||||
instruction_template: 'ChatML'
|
||||
.*Yi-34B-Chat:
|
||||
instruction_template: 'ChatML'
|
||||
(dolphin).*:
|
||||
instruction_template: 'ChatML'
|
||||
.*synthia:
|
||||
instruction_template: 'Synthia'
|
||||
.*(hercules|hyperion):
|
||||
instruction_template: 'ChatML'
|
||||
.*command-r:
|
||||
instruction_template: 'Command-R'
|
||||
.*xwin-lm-70b-v0.1:
|
||||
instruction_template: 'Vicuna-v1.1'
|
||||
.*platypus-yi-34b:
|
||||
instruction_template: 'Vicuna-v1.1'
|
||||
.*CausalLM-RP-34B:
|
||||
instruction_template: 'ChatML'
|
||||
34b-beta:
|
||||
instruction_template: 'ChatML'
|
||||
.*airoboros-3_1-yi-34b-200k:
|
||||
instruction_template: 'Llama-v2'
|
||||
.*chatqa:
|
||||
instruction_template: 'NVIDIA-ChatQA'
|
||||
Loading…
Add table
Add a link
Reference in a new issue