mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-12-06 07:12:10 +01:00
commit
1afe0827ba
7
.github/workflows/build-everything-tgw.yml
vendored
7
.github/workflows/build-everything-tgw.yml
vendored
|
|
@ -41,6 +41,13 @@ jobs:
|
|||
version: ${{ inputs.version }}
|
||||
config: 'os:ubuntu-22.04'
|
||||
|
||||
build_release_rocm_linux:
|
||||
name: ROCm Linux
|
||||
uses: ./.github/workflows/build-portable-release-rocm.yml
|
||||
with:
|
||||
version: ${{ inputs.version }}
|
||||
config: 'os:ubuntu-22.04'
|
||||
|
||||
build_release_cpu_windows:
|
||||
name: CPU Windows
|
||||
uses: ./.github/workflows/build-portable-release.yml
|
||||
|
|
|
|||
165
.github/workflows/build-portable-release-rocm.yml
vendored
Normal file
165
.github/workflows/build-portable-release-rocm.yml
vendored
Normal file
|
|
@ -0,0 +1,165 @@
|
|||
name: Build ROCm
|
||||
|
||||
on:
|
||||
workflow_dispatch:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
config:
|
||||
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||
default: 'Default'
|
||||
required: false
|
||||
type: string
|
||||
exclude:
|
||||
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||
default: 'None'
|
||||
required: false
|
||||
type: string
|
||||
workflow_call:
|
||||
inputs:
|
||||
version:
|
||||
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||
default: 'v3.0'
|
||||
required: true
|
||||
type: string
|
||||
config:
|
||||
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||
default: 'Default'
|
||||
required: false
|
||||
type: string
|
||||
exclude:
|
||||
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||
default: 'None'
|
||||
required: false
|
||||
type: string
|
||||
|
||||
permissions:
|
||||
contents: write
|
||||
|
||||
jobs:
|
||||
define_matrix:
|
||||
name: Define Build Matrix
|
||||
runs-on: ubuntu-latest
|
||||
outputs:
|
||||
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||
defaults:
|
||||
run:
|
||||
shell: pwsh
|
||||
env:
|
||||
CONFIGIN: ${{ inputs.config }}
|
||||
EXCLUDEIN: ${{ inputs.exclude }}
|
||||
|
||||
steps:
|
||||
- name: Define Job Output
|
||||
id: set-matrix
|
||||
run: |
|
||||
$matrix = @{
|
||||
'os' = @('ubuntu-22.04')
|
||||
'pyver' = @("3.11")
|
||||
'avx' = @("AVX2")
|
||||
}
|
||||
|
||||
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
|
||||
|
||||
if ($env:EXCLUDEIN -ne 'None') {
|
||||
$exclusions = @()
|
||||
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
|
||||
$matrix['exclude'] = $exclusions
|
||||
}
|
||||
|
||||
$matrixOut = ConvertTo-Json $matrix -Compress
|
||||
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
|
||||
|
||||
build_wheels:
|
||||
name: ${{ matrix.os }} ${{ matrix.pyver }} CPU ${{ matrix.avx }}
|
||||
needs: define_matrix
|
||||
runs-on: ${{ matrix.os }}
|
||||
strategy:
|
||||
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
|
||||
defaults:
|
||||
run:
|
||||
shell: pwsh
|
||||
env:
|
||||
AVXVER: ${{ matrix.avx }}
|
||||
PCKGVER: ${{ inputs.version }}
|
||||
|
||||
steps:
|
||||
- uses: actions/checkout@v4
|
||||
with:
|
||||
repository: 'oobabooga/text-generation-webui'
|
||||
ref: ${{ inputs.version }}
|
||||
submodules: 'recursive'
|
||||
|
||||
- uses: actions/setup-python@v4
|
||||
with:
|
||||
python-version: ${{ matrix.pyver }}
|
||||
|
||||
- name: Build Package
|
||||
shell: bash
|
||||
run: |
|
||||
VERSION_CLEAN="${{ inputs.version }}"
|
||||
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||
cd ..
|
||||
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
|
||||
# Remove extensions that need additional requirements
|
||||
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
|
||||
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
|
||||
|
||||
# Define common variables
|
||||
AVX_SUPPORT="${{ matrix.avx }}"
|
||||
VERSION="${{ inputs.version }}"
|
||||
|
||||
# 1. Set platform-specific variables (Linux only for ROCm)
|
||||
PLATFORM="linux"
|
||||
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20250409/cpython-3.11.12+20250409-x86_64-unknown-linux-gnu-install_only.tar.gz"
|
||||
PIP_PATH="portable_env/bin/python -m pip"
|
||||
PACKAGES_PATH="portable_env/lib/python3.11/site-packages"
|
||||
rm start_macos.sh start_windows.bat
|
||||
|
||||
# 2. Download and extract Python
|
||||
cd ..
|
||||
echo "Downloading Python for $PLATFORM..."
|
||||
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||
tar -xzf python-build.tar.gz
|
||||
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
|
||||
|
||||
# 3. Prepare requirements file based on AVX
|
||||
if [[ "$AVX_SUPPORT" == "AVX2" ]]; then
|
||||
BASE_REQ_FILE="requirements/portable/requirements_amd.txt"
|
||||
else
|
||||
BASE_REQ_FILE="requirements/portable/requirements_amd_noavx2.txt"
|
||||
fi
|
||||
REQ_FILE="$BASE_REQ_FILE"
|
||||
|
||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||
|
||||
# 4. Install packages
|
||||
echo "Installing Python packages from $REQ_FILE..."
|
||||
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
|
||||
|
||||
# 5. Clean up
|
||||
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
|
||||
|
||||
# 6. Create ZIP file
|
||||
cd ..
|
||||
ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm.zip"
|
||||
echo "Creating archive: $ZIP_NAME"
|
||||
|
||||
zip -r "$ZIP_NAME" "text-generation-webui-${VERSION_CLEAN}"
|
||||
|
||||
- name: Upload files to a GitHub release
|
||||
id: upload-release
|
||||
uses: svenstaro/upload-release-action@2.7.0
|
||||
continue-on-error: true
|
||||
with:
|
||||
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||
file: ../textgen-portable-*.zip
|
||||
tag: ${{ inputs.version }}
|
||||
file_glob: true
|
||||
make_latest: false
|
||||
overwrite: true
|
||||
|
|
@ -326,6 +326,8 @@ class LlamaServer:
|
|||
cmd += ["--threads", str(shared.args.threads)]
|
||||
if shared.args.threads_batch > 0:
|
||||
cmd += ["--threads-batch", str(shared.args.threads_batch)]
|
||||
if shared.args.cpu_moe:
|
||||
cmd.append("--cpu-moe")
|
||||
if shared.args.no_mmap:
|
||||
cmd.append("--no-mmap")
|
||||
if shared.args.mlock:
|
||||
|
|
|
|||
|
|
@ -6,6 +6,7 @@ import gradio as gr
|
|||
loaders_and_params = OrderedDict({
|
||||
'llama.cpp': [
|
||||
'gpu_layers',
|
||||
'cpu_moe',
|
||||
'threads',
|
||||
'threads_batch',
|
||||
'batch_size',
|
||||
|
|
|
|||
|
|
@ -66,6 +66,7 @@ group.add_argument('--ctx-size-draft', type=int, default=0, help='Size of the pr
|
|||
# llama.cpp
|
||||
group = parser.add_argument_group('llama.cpp')
|
||||
group.add_argument('--gpu-layers', '--n-gpu-layers', type=int, default=256, metavar='N', help='Number of layers to offload to the GPU.')
|
||||
group.add_argument('--cpu-moe', action='store_true', help='Move the experts to the CPU (for MoE models).')
|
||||
group.add_argument('--mmproj', type=str, default=None, help='Path to the mmproj file for vision models.')
|
||||
group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||
group.add_argument('--tensor-split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.')
|
||||
|
|
|
|||
|
|
@ -125,6 +125,7 @@ def list_model_elements():
|
|||
'loader',
|
||||
'cpu_memory',
|
||||
'gpu_layers',
|
||||
'cpu_moe',
|
||||
'threads',
|
||||
'threads_batch',
|
||||
'batch_size',
|
||||
|
|
|
|||
|
|
@ -50,6 +50,7 @@ def create_ui():
|
|||
|
||||
with gr.Column():
|
||||
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
|
||||
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
|
||||
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
|
||||
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
|
||||
|
|
@ -94,7 +95,7 @@ def create_ui():
|
|||
shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
|
||||
|
||||
with gr.Column():
|
||||
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
|
||||
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
|
||||
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
|
||||
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
|
||||
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.17.*
|
||||
peft==0.18.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
|
|
@ -26,7 +26,7 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.57.*
|
||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
||||
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -40,10 +40,10 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.12/exllamav3-0.0.12+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.12/exllamav3-0.0.12+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.17.*
|
||||
peft==0.18.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
|
|
@ -24,7 +24,7 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.57.*
|
||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
||||
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.17.*
|
||||
peft==0.18.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
|
|
@ -24,7 +24,7 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.57.*
|
||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
||||
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -38,7 +38,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.17.*
|
||||
peft==0.18.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
|
|
@ -24,7 +24,7 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.57.*
|
||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
||||
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -38,5 +38,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.17.*
|
||||
peft==0.18.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
|
|
@ -24,7 +24,7 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.57.*
|
||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
||||
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -38,6 +38,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.17.*
|
||||
peft==0.18.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
|
|
@ -24,7 +24,7 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.57.*
|
||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
||||
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -38,5 +38,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.17.*
|
||||
peft==0.18.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
|
|
@ -24,7 +24,7 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.57.*
|
||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
||||
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -38,5 +38,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -12,7 +12,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.17.*
|
||||
peft==0.18.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
|
|
@ -26,7 +26,7 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.57.*
|
||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
||||
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -40,10 +40,10 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.12/exllamav3-0.0.12+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.12/exllamav3-0.0.12+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
|
|
|
|||
|
|
@ -10,7 +10,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.17.*
|
||||
peft==0.18.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.11.0
|
||||
|
|
@ -24,7 +24,7 @@ scipy
|
|||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.57.*
|
||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
||||
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
|
|||
|
|
@ -23,5 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
27
requirements/portable/requirements_amd.txt
Normal file
27
requirements/portable/requirements_amd.txt
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
audioop-lts<1.0; python_version >= "3.13"
|
||||
fastapi==0.112.4
|
||||
html2text==2025.4.15
|
||||
huggingface-hub==0.36.0
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
tqdm
|
||||
|
||||
# Gradio
|
||||
gradio==4.37.*
|
||||
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.14
|
||||
sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
27
requirements/portable/requirements_amd_noavx2.txt
Normal file
27
requirements/portable/requirements_amd_noavx2.txt
Normal file
|
|
@ -0,0 +1,27 @@
|
|||
audioop-lts<1.0; python_version >= "3.13"
|
||||
fastapi==0.112.4
|
||||
html2text==2025.4.15
|
||||
huggingface-hub==0.36.0
|
||||
jinja2==3.1.6
|
||||
markdown
|
||||
numpy==2.2.*
|
||||
pydantic==2.11.0
|
||||
PyPDF2==3.0.1
|
||||
python-docx==1.1.2
|
||||
pyyaml
|
||||
requests
|
||||
rich
|
||||
tqdm
|
||||
|
||||
# Gradio
|
||||
gradio==4.37.*
|
||||
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
|
||||
|
||||
# API
|
||||
flask_cloudflared==0.0.14
|
||||
sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
@ -23,6 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
|
|
|
|||
|
|
@ -23,6 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
|
|
|
|||
|
|
@ -23,5 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -23,5 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -23,5 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -22,6 +22,6 @@ flask_cloudflared==0.0.14
|
|||
sse-starlette==1.6.5
|
||||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
# Vulkan wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -23,5 +23,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
Loading…
Reference in a new issue