From 4979e87e48c78d5e3186e4d9b2fbc8b30e86164f Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 28 Mar 2026 11:49:47 -0300 Subject: [PATCH] Add ik_llama.cpp support via ik_llama_cpp_binaries package --- .github/workflows/build-everything-tgw.yml | 35 +++ .../build-portable-release-ik-cuda.yml | 179 +++++++++++++++ .../workflows/build-portable-release-ik.yml | 205 ++++++++++++++++++ modules/llama_cpp_server.py | 21 +- modules/loaders.py | 2 + modules/shared.py | 2 +- modules/ui_model_menu.py | 3 + requirements/full/requirements.txt | 6 +- requirements/full/requirements_amd.txt | 4 +- .../full/requirements_apple_intel.txt | 3 +- .../full/requirements_apple_silicon.txt | 3 +- requirements/full/requirements_cpu_only.txt | 6 +- requirements/portable/requirements.txt | 4 +- requirements/portable/requirements_amd.txt | 4 +- .../portable/requirements_apple_intel.txt | 2 +- .../portable/requirements_apple_silicon.txt | 2 +- .../portable/requirements_cpu_only.txt | 4 +- .../portable/requirements_cuda131.txt | 4 +- requirements/portable/requirements_vulkan.txt | 4 +- 19 files changed, 469 insertions(+), 24 deletions(-) create mode 100644 .github/workflows/build-portable-release-ik-cuda.yml create mode 100644 .github/workflows/build-portable-release-ik.yml diff --git a/.github/workflows/build-everything-tgw.yml b/.github/workflows/build-everything-tgw.yml index 9322f859..4de591f4 100644 --- a/.github/workflows/build-everything-tgw.yml +++ b/.github/workflows/build-everything-tgw.yml @@ -68,3 +68,38 @@ jobs: with: version: ${{ inputs.version }} config: 'os:macos-15-intel,macos-14' + + build_release_ik_cuda_windows: + name: ik CUDA Windows + uses: ./.github/workflows/build-portable-release-ik-cuda.yml + with: + version: ${{ inputs.version }} + config: 'os:windows-2022' + + build_release_ik_cuda_linux: + name: ik CUDA Linux + uses: ./.github/workflows/build-portable-release-ik-cuda.yml + with: + version: ${{ inputs.version }} + config: 'os:ubuntu-22.04' + + build_release_ik_cpu_windows: + name: ik CPU Windows + uses: ./.github/workflows/build-portable-release-ik.yml + with: + version: ${{ inputs.version }} + config: 'os:windows-2022' + + build_release_ik_cpu_linux: + name: ik CPU Linux + uses: ./.github/workflows/build-portable-release-ik.yml + with: + version: ${{ inputs.version }} + config: 'os:ubuntu-22.04' + + build_release_ik_macos: + name: ik macOS + uses: ./.github/workflows/build-portable-release-ik.yml + with: + version: ${{ inputs.version }} + config: 'os:macos-14' diff --git a/.github/workflows/build-portable-release-ik-cuda.yml b/.github/workflows/build-portable-release-ik-cuda.yml new file mode 100644 index 00000000..40b4b92f --- /dev/null +++ b/.github/workflows/build-portable-release-ik-cuda.yml @@ -0,0 +1,179 @@ +name: Build ik CUDA + +on: + workflow_dispatch: + inputs: + version: + description: 'Version tag of text-generation-webui to build: v3.0' + default: 'v3.0' + required: true + type: string + config: + description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2' + default: 'Default' + required: false + type: string + exclude: + description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2' + default: 'None' + required: false + type: string + workflow_call: + inputs: + version: + description: 'Version tag of text-generation-webui to build: v3.0' + default: 'v3.0' + required: true + type: string + config: + description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2' + default: 'Default' + required: false + type: string + exclude: + description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2' + default: 'None' + required: false + type: string + +permissions: + contents: write + +jobs: + define_matrix: + name: Define Build Matrix + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + defaults: + run: + shell: pwsh + env: + CONFIGIN: ${{ inputs.config }} + EXCLUDEIN: ${{ inputs.exclude }} + + steps: + - name: Define Job Output + id: set-matrix + run: | + $matrix = @{ + 'os' = @('ubuntu-22.04', 'windows-2022') + 'pyver' = @("3.13") + 'cuda' = @("12.4", "13.1") + } + + if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})} + + if ($env:EXCLUDEIN -ne 'None') { + $exclusions = @() + $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData + $matrix['exclude'] = $exclusions + } + + $matrixOut = ConvertTo-Json $matrix -Compress + Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT + + build_wheels: + name: ${{ matrix.os }} ${{ matrix.pyver }} CUDA ${{ matrix.cuda }} + needs: define_matrix + runs-on: ${{ matrix.os }} + strategy: + matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }} + defaults: + run: + shell: pwsh + env: + PCKGVER: ${{ inputs.version }} + + steps: + - uses: actions/checkout@v6 + with: + repository: 'oobabooga/text-generation-webui' + ref: ${{ inputs.version }} + submodules: 'recursive' + + - uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.pyver }} + + - name: Build Package + shell: bash + run: | + VERSION_CLEAN="${{ inputs.version }}" + VERSION_CLEAN="${VERSION_CLEAN#v}" + cd .. + cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}" + cd "text-generation-webui-${VERSION_CLEAN}" + + # Remove extensions that need additional requirements + allowed=("character_bias" "gallery" "sd_api_pictures") + find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf + + # Define common variables + CUDA_VERSION="${{ matrix.cuda }}" + VERSION="${{ inputs.version }}" + + # 1. Set platform-specific variables + if [[ "$RUNNER_OS" == "Windows" ]]; then + PLATFORM="windows" + PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" + PIP_PATH="portable_env/python.exe -m pip" + PACKAGES_PATH="portable_env/Lib/site-packages" + rm start_linux.sh start_macos.sh + else + PLATFORM="linux" + PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" + PIP_PATH="portable_env/bin/python -m pip" + PACKAGES_PATH="portable_env/lib/python3.13/site-packages" + rm start_macos.sh start_windows.bat + fi + + # 2. Download and extract Python + cd .. + echo "Downloading Python for $PLATFORM..." + curl -L -o python-build.tar.gz "$PYTHON_URL" + tar -xzf python-build.tar.gz + mv python "text-generation-webui-${VERSION_CLEAN}/portable_env" + + # 3. Prepare requirements file based on CUDA version + cd "text-generation-webui-${VERSION_CLEAN}" + if [[ "$CUDA_VERSION" == "13.1" ]]; then + REQ_FILE="requirements/portable/requirements_cuda131.txt" + else + REQ_FILE="requirements/portable/requirements.txt" + fi + + # 4. Swap llama.cpp wheels for ik_llama.cpp and inject --ik into start scripts + sed -i 's|/llama_cpp_binaries-|/ik_llama_cpp_binaries-|g' "$REQ_FILE" + sed -i 's/--portable/--portable --ik/g' start_linux.sh start_windows.bat start_macos.sh 2>/dev/null || true + + # 5. Install packages + echo "Installing Python packages from $REQ_FILE..." + $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE" + + # 6. Clean up + rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py + + # 7. Create archive + cd .. + if [[ "$RUNNER_OS" == "Windows" ]]; then + ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip" + echo "Creating archive: $ARCHIVE_NAME" + powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME" + else + ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.tar.gz" + echo "Creating archive: $ARCHIVE_NAME" + tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}" + fi + + - name: Upload files to a GitHub release + id: upload-release + uses: svenstaro/upload-release-action@2.7.0 + continue-on-error: true + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: ../textgen-portable-ik-* + tag: ${{ inputs.version }} + file_glob: true + make_latest: false + overwrite: true diff --git a/.github/workflows/build-portable-release-ik.yml b/.github/workflows/build-portable-release-ik.yml new file mode 100644 index 00000000..afb2e763 --- /dev/null +++ b/.github/workflows/build-portable-release-ik.yml @@ -0,0 +1,205 @@ +name: Build ik CPU and macOS + +on: + workflow_dispatch: + inputs: + version: + description: 'Version tag of text-generation-webui to build: v3.0' + default: 'v3.0' + required: true + type: string + config: + description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2' + default: 'Default' + required: false + type: string + exclude: + description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2' + default: 'None' + required: false + type: string + workflow_call: + inputs: + version: + description: 'Version tag of text-generation-webui to build: v3.0' + default: 'v3.0' + required: true + type: string + config: + description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2' + default: 'Default' + required: false + type: string + exclude: + description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2' + default: 'None' + required: false + type: string + +permissions: + contents: write + +jobs: + define_matrix: + name: Define Build Matrix + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + defaults: + run: + shell: pwsh + env: + CONFIGIN: ${{ inputs.config }} + EXCLUDEIN: ${{ inputs.exclude }} + + steps: + - name: Define Job Output + id: set-matrix + run: | + $matrix = @{ + 'os' = @('ubuntu-22.04', 'windows-2022', 'macos-14') + 'pyver' = @("3.13") + } + + if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})} + + if ($env:EXCLUDEIN -ne 'None') { + $exclusions = @() + $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData + $matrix['exclude'] = $exclusions + } + + $matrixOut = ConvertTo-Json $matrix -Compress + Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT + + build_wheels: + name: ${{ matrix.os }} ${{ matrix.pyver }} + needs: define_matrix + runs-on: ${{ matrix.os }} + strategy: + matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }} + defaults: + run: + shell: pwsh + env: + PCKGVER: ${{ inputs.version }} + + steps: + - uses: actions/checkout@v6 + with: + repository: 'oobabooga/text-generation-webui' + ref: ${{ inputs.version }} + submodules: 'recursive' + + - uses: actions/setup-python@v6 + with: + python-version: ${{ matrix.pyver }} + + - name: Build Package + shell: bash + run: | + VERSION_CLEAN="${{ inputs.version }}" + VERSION_CLEAN="${VERSION_CLEAN#v}" + cd .. + cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}" + cd "text-generation-webui-${VERSION_CLEAN}" + + # Remove extensions that need additional requirements + allowed=("character_bias" "gallery" "sd_api_pictures") + find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf + + # Define common variables + VERSION="${{ inputs.version }}" + OS_TYPE="${{ matrix.os }}" + + # 1. Set platform-specific variables + if [[ "$RUNNER_OS" == "Windows" ]]; then + PLATFORM="windows-cpu" + PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-pc-windows-msvc-install_only_stripped.tar.gz" + PIP_PATH="portable_env/python.exe -m pip" + PACKAGES_PATH="portable_env/Lib/site-packages" + rm start_linux.sh start_macos.sh + elif [[ "$RUNNER_OS" == "macOS" ]]; then + if [[ "$OS_TYPE" == "macos-15-intel" ]]; then + PLATFORM="macos-x86_64" + PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-apple-darwin-install_only_stripped.tar.gz" + REQ_TYPE="apple_intel" + else + PLATFORM="macos-arm64" + PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-aarch64-apple-darwin-install_only_stripped.tar.gz" + REQ_TYPE="apple_silicon" + fi + PIP_PATH="portable_env/bin/python -m pip" + PACKAGES_PATH="portable_env/lib/python3.13/site-packages" + rm start_linux.sh start_windows.bat + else + # Linux case + PLATFORM="linux-cpu" + PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20260303/cpython-3.13.12+20260303-x86_64-unknown-linux-gnu-install_only_stripped.tar.gz" + PIP_PATH="portable_env/bin/python -m pip" + PACKAGES_PATH="portable_env/lib/python3.13/site-packages" + rm start_macos.sh start_windows.bat + fi + + # 2. Download and extract Python + echo "Downloading Python for $PLATFORM..." + cd .. + curl -L -o python-build.tar.gz "$PYTHON_URL" + tar -xzf python-build.tar.gz + mv python "text-generation-webui-${VERSION_CLEAN}/portable_env" + + # 3. Prepare requirements file based on platform + cd "text-generation-webui-${VERSION_CLEAN}" + + # Select requirements file based on platform + if [[ "$RUNNER_OS" == "macOS" ]]; then + if [[ "$OS_TYPE" == "macos-15-intel" ]]; then + REQ_FILE="requirements/portable/requirements_apple_intel.txt" + else + REQ_FILE="requirements/portable/requirements_apple_silicon.txt" + fi + else + REQ_FILE="requirements/portable/requirements_cpu_only.txt" + fi + + echo "Using requirements file: $REQ_FILE" + + # 4. Swap llama.cpp wheels for ik_llama.cpp and inject --ik into start scripts + if [[ "$RUNNER_OS" == "macOS" ]]; then + sed -i '' 's|/llama_cpp_binaries-|/ik_llama_cpp_binaries-|g' "$REQ_FILE" + sed -i '' 's/--portable/--portable --ik/g' start_macos.sh + else + sed -i 's|/llama_cpp_binaries-|/ik_llama_cpp_binaries-|g' "$REQ_FILE" + sed -i 's/--portable/--portable --ik/g' start_linux.sh start_windows.bat 2>/dev/null || true + fi + + # 5. Install packages + echo "Installing Python packages from $REQ_FILE..." + $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE" + + # 6. Clean up + rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py + + # 7. Create archive + cd .. + if [[ "$RUNNER_OS" == "Windows" ]]; then + ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.zip" + echo "Creating archive: $ARCHIVE_NAME" + powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME" + else + ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.tar.gz" + echo "Creating archive: $ARCHIVE_NAME" + tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}" + fi + + - name: Upload files to a GitHub release + id: upload-release + uses: svenstaro/upload-release-action@2.7.0 + continue-on-error: true + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: ../textgen-portable-ik-* + tag: ${{ inputs.version }} + file_glob: true + make_latest: false + overwrite: true diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 9b9756a9..5e2decfa 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -11,7 +11,6 @@ import time from pathlib import Path from typing import Any, List -import llama_cpp_binaries import requests from modules import shared @@ -357,7 +356,16 @@ class LlamaServer: """Start the llama.cpp server and wait until it's ready.""" # Determine the server path if self.server_path is None: - self.server_path = llama_cpp_binaries.get_binary_path() + if shared.args.ik: + try: + import ik_llama_cpp_binaries + except ImportError: + raise ImportError("--ik requires the ik_llama_cpp_binaries package. Install it with: pip install ") + + self.server_path = ik_llama_cpp_binaries.get_binary_path() + else: + import llama_cpp_binaries + self.server_path = llama_cpp_binaries.get_binary_path() # Build the command cmd = [ @@ -616,10 +624,12 @@ def filter_stderr_with_progress(process_stderr): def _patch_cmd_for_ik(cmd): """ Rewrite upstream llama.cpp flags to ik_llama.cpp equivalents: - --no-webui → --webui none + --no-webui → --webui none --fit off → (removed) --fit on / --fit-ctx → --fit (bare flag) --fit-target → --fit-margin + --cache-reuse → (removed, unsupported) + --swa-full → (removed, unsupported) """ patched = [] i = 0 @@ -635,9 +645,14 @@ def _patch_cmd_for_ik(cmd): patched.append("--fit") # "off" → drop entirely elif arg == "--fit-ctx": + patched.append("--fit") i += 1 # skip the value elif arg == "--fit-target": patched.append("--fit-margin") + elif arg == "--cache-reuse": + i += 1 # skip the value + elif arg == "--swa-full": + pass # bare flag, just drop it else: patched.append(arg) diff --git a/modules/loaders.py b/modules/loaders.py index c90f2ebb..cb1f3d3b 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -20,6 +20,7 @@ loaders_and_params = OrderedDict({ 'no_mmap', 'mlock', 'numa', + 'ik', 'parallel', 'model_draft', 'draft_max', @@ -345,6 +346,7 @@ def list_model_elements(): 'spec_ngram_size_m', 'spec_ngram_min_hits', 'mmproj', + 'ik', ] diff --git a/modules/shared.py b/modules/shared.py index c50736d7..13843f0c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -110,7 +110,7 @@ group.add_argument('--numa', action='store_true', help='Activate NUMA task alloc group.add_argument('--parallel', type=int, default=1, help='Number of parallel request slots. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.') group.add_argument('--fit-target', type=str, default='512', help='Target VRAM margin per device for auto GPU layers, comma-separated list of values in MiB. A single value is broadcast across all devices.') group.add_argument('--extra-flags', type=str, default=None, help='Extra flags to pass to llama-server. Example: "--jinja --rpc 192.168.1.100:50052"') -group.add_argument('--ik', action='store_true', help='Use ik_llama.cpp instead of upstream llama.cpp. To install: build ik_llama.cpp, then delete all files inside /lib/pythonX.Y/site-packages/llama_cpp_binaries/bin/ and copy or symlink the ik_llama.cpp build outputs into that folder.') +group.add_argument('--ik', action='store_true', help='Use ik_llama.cpp instead of upstream llama.cpp. Requires the ik_llama_cpp_binaries package to be installed.') # Transformers/Accelerate group = parser.add_argument_group('Transformers/Accelerate') diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 5b7621a7..16505afa 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -51,6 +51,9 @@ def create_ui(): with gr.Column(): shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info()) + if not shared.args.portable: + shared.gradio['ik'] = gr.Checkbox(label="ik", value=shared.args.ik, info='Use ik_llama.cpp instead of upstream llama.cpp.') + shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 56619627..100c99d1 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -40,8 +40,10 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/ik_llama_cpp_binaries-0.99.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/ik_llama_cpp_binaries-0.99.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.26/exllamav3-0.0.26+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.26/exllamav3-0.0.26+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13" https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 620683cc..66fa4ac7 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -37,5 +37,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index b1f109b2..98dc8be6 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -37,4 +37,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/ik_llama_cpp_binaries-0.99.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index a54476a9..e33264cf 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -37,4 +37,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/ik_llama_cpp_binaries-0.99.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index be82c904..cd083f6d 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -37,5 +37,7 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/ik_llama_cpp_binaries-0.99.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/ik_llama_cpp_binaries-0.99.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index 188da380..67182225 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt index 4562b6d0..5f5b2f8d 100644 --- a/requirements/portable/requirements_amd.txt +++ b/requirements/portable/requirements_amd.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 04dcf25e..f5f7d6ee 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -23,4 +23,4 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 4b8af78a..e51fc296 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -23,4 +23,4 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 5b0eaf89..683f94c8 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cuda131.txt b/requirements/portable/requirements_cuda131.txt index 90b3234f..942d0877 100644 --- a/requirements/portable/requirements_cuda131.txt +++ b/requirements/portable/requirements_cuda131.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index ea72b4ec..ae784e00 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Vulkan wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.99.0/llama_cpp_binaries-0.99.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"