From 6a3bf1de927d1cd99442be258eb03529b8dad616 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 9 Nov 2025 19:43:53 -0800 Subject: [PATCH 1/9] Update exllamav3 to 0.0.14 --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index aa666532..eea0e5fa 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -42,8 +42,8 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.12/exllamav3-0.0.12+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.12/exllamav3-0.0.12+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.14/exllamav3-0.0.14+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.14/exllamav3-0.0.14+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 74758177..ed2e3953 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -42,8 +42,8 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.12/exllamav3-0.0.12+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.12/exllamav3-0.0.12+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.14/exllamav3-0.0.14+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.14/exllamav3-0.0.14+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" From a26e28bdeaeb726e9f399158023258342f22b153 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 18 Nov 2025 11:24:16 -0800 Subject: [PATCH 2/9] Update exllamav3 to 0.0.15 --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index eea0e5fa..ddbbcd86 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -42,8 +42,8 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.14/exllamav3-0.0.14+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.14/exllamav3-0.0.14+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index ed2e3953..ec00c407 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -42,8 +42,8 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.14/exllamav3-0.0.14+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.14/exllamav3-0.0.14+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" From 3d7e9856a2c3608974ef1f3d791a99bb38c3988f Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 18 Nov 2025 18:51:15 -0300 Subject: [PATCH 3/9] Update peft requirement from ==0.17.* to ==0.18.* in /requirements/full (#7310) --- requirements/full/requirements.txt | 2 +- requirements/full/requirements_amd.txt | 2 +- requirements/full/requirements_amd_noavx2.txt | 2 +- requirements/full/requirements_apple_intel.txt | 2 +- requirements/full/requirements_apple_silicon.txt | 2 +- requirements/full/requirements_cpu_only.txt | 2 +- requirements/full/requirements_cpu_only_noavx2.txt | 2 +- requirements/full/requirements_noavx2.txt | 2 +- requirements/full/requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index ddbbcd86..a4a0e35c 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -12,7 +12,7 @@ jinja2==3.1.6 markdown numpy==2.2.* pandas -peft==0.17.* +peft==0.18.* Pillow>=9.5.0 psutil pydantic==2.11.0 diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 3bdb0774..7f0391f4 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -10,7 +10,7 @@ jinja2==3.1.6 markdown numpy==2.2.* pandas -peft==0.17.* +peft==0.18.* Pillow>=9.5.0 psutil pydantic==2.11.0 diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 0ad11242..685e1cb1 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -10,7 +10,7 @@ jinja2==3.1.6 markdown numpy==2.2.* pandas -peft==0.17.* +peft==0.18.* Pillow>=9.5.0 psutil pydantic==2.11.0 diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 6212441e..15fb2459 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -10,7 +10,7 @@ jinja2==3.1.6 markdown numpy==2.2.* pandas -peft==0.17.* +peft==0.18.* Pillow>=9.5.0 psutil pydantic==2.11.0 diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index a24cdd7a..f786f4e1 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -10,7 +10,7 @@ jinja2==3.1.6 markdown numpy==2.2.* pandas -peft==0.17.* +peft==0.18.* Pillow>=9.5.0 psutil pydantic==2.11.0 diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 62dc99e0..e743bda0 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -10,7 +10,7 @@ jinja2==3.1.6 markdown numpy==2.2.* pandas -peft==0.17.* +peft==0.18.* Pillow>=9.5.0 psutil pydantic==2.11.0 diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 7f889de2..28793d43 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -10,7 +10,7 @@ jinja2==3.1.6 markdown numpy==2.2.* pandas -peft==0.17.* +peft==0.18.* Pillow>=9.5.0 psutil pydantic==2.11.0 diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index ec00c407..c6afa75a 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -12,7 +12,7 @@ jinja2==3.1.6 markdown numpy==2.2.* pandas -peft==0.17.* +peft==0.18.* Pillow>=9.5.0 psutil pydantic==2.11.0 diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 09a12418..2d7debc2 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -10,7 +10,7 @@ jinja2==3.1.6 markdown numpy==2.2.* pandas -peft==0.17.* +peft==0.18.* Pillow>=9.5.0 psutil pydantic==2.11.0 From 4a36b7be5b90db498834d5688f5f511d3f195db9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Tue, 18 Nov 2025 18:51:26 -0300 Subject: [PATCH 4/9] Bump triton-windows in /requirements/full (#7311) --- requirements/full/requirements.txt | 2 +- requirements/full/requirements_amd.txt | 2 +- requirements/full/requirements_amd_noavx2.txt | 2 +- requirements/full/requirements_apple_intel.txt | 2 +- requirements/full/requirements_apple_silicon.txt | 2 +- requirements/full/requirements_cpu_only.txt | 2 +- requirements/full/requirements_cpu_only_noavx2.txt | 2 +- requirements/full/requirements_noavx2.txt | 2 +- requirements/full/requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index a4a0e35c..d9fde6d6 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -26,7 +26,7 @@ scipy sentencepiece tensorboard transformers==4.57.* -triton-windows==3.5.0.post21; platform_system == "Windows" +triton-windows==3.5.1.post21; platform_system == "Windows" tqdm wandb diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 7f0391f4..2b9738cc 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -24,7 +24,7 @@ scipy sentencepiece tensorboard transformers==4.57.* -triton-windows==3.5.0.post21; platform_system == "Windows" +triton-windows==3.5.1.post21; platform_system == "Windows" tqdm wandb diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 685e1cb1..e07e5f28 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -24,7 +24,7 @@ scipy sentencepiece tensorboard transformers==4.57.* -triton-windows==3.5.0.post21; platform_system == "Windows" +triton-windows==3.5.1.post21; platform_system == "Windows" tqdm wandb diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 15fb2459..b0330a45 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -24,7 +24,7 @@ scipy sentencepiece tensorboard transformers==4.57.* -triton-windows==3.5.0.post21; platform_system == "Windows" +triton-windows==3.5.1.post21; platform_system == "Windows" tqdm wandb diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index f786f4e1..684c38c9 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -24,7 +24,7 @@ scipy sentencepiece tensorboard transformers==4.57.* -triton-windows==3.5.0.post21; platform_system == "Windows" +triton-windows==3.5.1.post21; platform_system == "Windows" tqdm wandb diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index e743bda0..383d458f 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -24,7 +24,7 @@ scipy sentencepiece tensorboard transformers==4.57.* -triton-windows==3.5.0.post21; platform_system == "Windows" +triton-windows==3.5.1.post21; platform_system == "Windows" tqdm wandb diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 28793d43..375839bb 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -24,7 +24,7 @@ scipy sentencepiece tensorboard transformers==4.57.* -triton-windows==3.5.0.post21; platform_system == "Windows" +triton-windows==3.5.1.post21; platform_system == "Windows" tqdm wandb diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index c6afa75a..9862519a 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -26,7 +26,7 @@ scipy sentencepiece tensorboard transformers==4.57.* -triton-windows==3.5.0.post21; platform_system == "Windows" +triton-windows==3.5.1.post21; platform_system == "Windows" tqdm wandb diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 2d7debc2..818d2244 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -24,7 +24,7 @@ scipy sentencepiece tensorboard transformers==4.57.* -triton-windows==3.5.0.post21; platform_system == "Windows" +triton-windows==3.5.1.post21; platform_system == "Windows" tqdm wandb From d85b95bb15cd9cff7a9593be4c6127ff718976fb Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 18 Nov 2025 14:06:04 -0800 Subject: [PATCH 5/9] Update llama.cpp --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 4 ++-- requirements/full/requirements_apple_silicon.txt | 6 +++--- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/full/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 6 +++--- requirements/portable/requirements_apple_silicon.txt | 6 +++--- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/portable/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- requirements/portable/requirements_vulkan_noavx2.txt | 4 ++-- 16 files changed, 35 insertions(+), 35 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index ddbbcd86..16aa0ad7 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -40,8 +40,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 3bdb0774..b339f549 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -38,7 +38,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 0ad11242..7acdabb8 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -38,7 +38,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 6212441e..b658dc96 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -38,5 +38,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index a24cdd7a..34a9fd49 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -38,6 +38,6 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 62dc99e0..c10e85ee 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -38,5 +38,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 7f889de2..76585ec8 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -38,5 +38,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index ec00c407..1ca034f7 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -40,8 +40,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index f5c10dc4..a876e0b8 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 2dc56261..1e151785 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -23,6 +23,6 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 0256fe98..a8c775be 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -23,6 +23,6 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 9f68d17c..008c20af 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index 7a444f81..d43ab758 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 68cb06b7..c7f4703d 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 350ddb09..49874942 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 4a88967d..1b4e3bd7 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.60.0/llama_cpp_binaries-0.60.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From c45f35ccc27b02c3db5089ff84837c82f5c086a8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 18 Nov 2025 14:06:42 -0800 Subject: [PATCH 6/9] Remove the macos 13 wheels (deprecated by GitHub) --- requirements/full/requirements_apple_silicon.txt | 1 - requirements/portable/requirements_apple_intel.txt | 1 - requirements/portable/requirements_apple_silicon.txt | 1 - 3 files changed, 3 deletions(-) diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 34a9fd49..83161b27 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -40,4 +40,3 @@ tiktoken # Mac wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 1e151785..f7e4e6e4 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -25,4 +25,3 @@ tiktoken # Mac wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index a8c775be..99f55d33 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -25,4 +25,3 @@ tiktoken # Mac wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" From 327a234d236064049b252fed32de10d4436cea8a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 18 Nov 2025 16:24:27 -0800 Subject: [PATCH 7/9] Add ROCm requirements.txt files --- requirements/full/requirements.txt | 4 +-- requirements/full/requirements_amd.txt | 2 +- .../full/requirements_apple_intel.txt | 4 +-- .../full/requirements_apple_silicon.txt | 4 +-- requirements/full/requirements_cpu_only.txt | 4 +-- .../full/requirements_cpu_only_noavx2.txt | 4 +-- requirements/full/requirements_noavx2.txt | 4 +-- requirements/portable/requirements_amd.txt | 27 +++++++++++++++++++ .../portable/requirements_amd_noavx2.txt | 27 +++++++++++++++++++ requirements/portable/requirements_vulkan.txt | 2 +- 10 files changed, 68 insertions(+), 14 deletions(-) create mode 100644 requirements/portable/requirements_amd.txt create mode 100644 requirements/portable/requirements_amd_noavx2.txt diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 0f4eac52..7075f955 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -40,8 +40,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index f8d78dbc..39e4abd6 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -39,6 +39,6 @@ tiktoken # AMD wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 5cc516d2..dc1d1820 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -38,5 +38,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 9639c6b5..0ccf95a5 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -38,5 +38,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 914de4e8..357daa9f 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -38,5 +38,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index d25879ba..4e8db9e9 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -38,5 +38,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 731839a7..2204a40e 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -40,8 +40,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.15/exllamav3-0.0.15+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt new file mode 100644 index 00000000..3dfa6d3f --- /dev/null +++ b/requirements/portable/requirements_amd.txt @@ -0,0 +1,27 @@ +audioop-lts<1.0; python_version >= "3.13" +fastapi==0.112.4 +html2text==2025.4.15 +huggingface-hub==0.36.0 +jinja2==3.1.6 +markdown +numpy==2.2.* +pydantic==2.11.0 +PyPDF2==3.0.1 +python-docx==1.1.2 +pyyaml +requests +rich +tqdm + +# Gradio +gradio==4.37.* +https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl + +# API +flask_cloudflared==0.0.14 +sse-starlette==1.6.5 +tiktoken + +# AMD wheels +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_amd_noavx2.txt b/requirements/portable/requirements_amd_noavx2.txt new file mode 100644 index 00000000..57c6d552 --- /dev/null +++ b/requirements/portable/requirements_amd_noavx2.txt @@ -0,0 +1,27 @@ +audioop-lts<1.0; python_version >= "3.13" +fastapi==0.112.4 +html2text==2025.4.15 +huggingface-hub==0.36.0 +jinja2==3.1.6 +markdown +numpy==2.2.* +pydantic==2.11.0 +PyPDF2==3.0.1 +python-docx==1.1.2 +pyyaml +requests +rich +tqdm + +# Gradio +gradio==4.37.* +https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl + +# API +flask_cloudflared==0.0.14 +sse-starlette==1.6.5 +tiktoken + +# AMD wheels +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 49874942..d339f97f 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -22,6 +22,6 @@ flask_cloudflared==0.0.14 sse-starlette==1.6.5 tiktoken -# CUDA wheels +# Vulkan wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.61.0/llama_cpp_binaries-0.61.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From d6f39e1fef16c5e92fe8eb490d51e76d0e230d99 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 18 Nov 2025 16:32:20 -0800 Subject: [PATCH 8/9] Add ROCm portable builds --- .github/workflows/build-everything-tgw.yml | 7 + .../workflows/build-portable-release-rocm.yml | 165 ++++++++++++++++++ 2 files changed, 172 insertions(+) create mode 100644 .github/workflows/build-portable-release-rocm.yml diff --git a/.github/workflows/build-everything-tgw.yml b/.github/workflows/build-everything-tgw.yml index 23773010..78fd2d6b 100644 --- a/.github/workflows/build-everything-tgw.yml +++ b/.github/workflows/build-everything-tgw.yml @@ -41,6 +41,13 @@ jobs: version: ${{ inputs.version }} config: 'os:ubuntu-22.04' + build_release_rocm_linux: + name: ROCm Linux + uses: ./.github/workflows/build-portable-release-rocm.yml + with: + version: ${{ inputs.version }} + config: 'os:ubuntu-22.04' + build_release_cpu_windows: name: CPU Windows uses: ./.github/workflows/build-portable-release.yml diff --git a/.github/workflows/build-portable-release-rocm.yml b/.github/workflows/build-portable-release-rocm.yml new file mode 100644 index 00000000..ed7f3a87 --- /dev/null +++ b/.github/workflows/build-portable-release-rocm.yml @@ -0,0 +1,165 @@ +name: Build ROCm + +on: + workflow_dispatch: + inputs: + version: + description: 'Version tag of text-generation-webui to build: v3.0' + default: 'v3.0' + required: true + type: string + config: + description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2' + default: 'Default' + required: false + type: string + exclude: + description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2' + default: 'None' + required: false + type: string + workflow_call: + inputs: + version: + description: 'Version tag of text-generation-webui to build: v3.0' + default: 'v3.0' + required: true + type: string + config: + description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2' + default: 'Default' + required: false + type: string + exclude: + description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2' + default: 'None' + required: false + type: string + +permissions: + contents: write + +jobs: + define_matrix: + name: Define Build Matrix + runs-on: ubuntu-latest + outputs: + matrix: ${{ steps.set-matrix.outputs.matrix }} + defaults: + run: + shell: pwsh + env: + CONFIGIN: ${{ inputs.config }} + EXCLUDEIN: ${{ inputs.exclude }} + + steps: + - name: Define Job Output + id: set-matrix + run: | + $matrix = @{ + 'os' = @('ubuntu-22.04') + 'pyver' = @("3.11") + 'avx' = @("AVX2") + } + + if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})} + + if ($env:EXCLUDEIN -ne 'None') { + $exclusions = @() + $exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData + $matrix['exclude'] = $exclusions + } + + $matrixOut = ConvertTo-Json $matrix -Compress + Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT + + build_wheels: + name: ${{ matrix.os }} ${{ matrix.pyver }} CPU ${{ matrix.avx }} + needs: define_matrix + runs-on: ${{ matrix.os }} + strategy: + matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }} + defaults: + run: + shell: pwsh + env: + AVXVER: ${{ matrix.avx }} + PCKGVER: ${{ inputs.version }} + + steps: + - uses: actions/checkout@v4 + with: + repository: 'oobabooga/text-generation-webui' + ref: ${{ inputs.version }} + submodules: 'recursive' + + - uses: actions/setup-python@v4 + with: + python-version: ${{ matrix.pyver }} + + - name: Build Package + shell: bash + run: | + VERSION_CLEAN="${{ inputs.version }}" + VERSION_CLEAN="${VERSION_CLEAN#v}" + cd .. + cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}" + cd "text-generation-webui-${VERSION_CLEAN}" + + # Remove extensions that need additional requirements + allowed=("character_bias" "gallery" "openai" "sd_api_pictures") + find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf + + # Define common variables + AVX_SUPPORT="${{ matrix.avx }}" + VERSION="${{ inputs.version }}" + + # 1. Set platform-specific variables (Linux only for ROCm) + PLATFORM="linux" + PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20250409/cpython-3.11.12+20250409-x86_64-unknown-linux-gnu-install_only.tar.gz" + PIP_PATH="portable_env/bin/python -m pip" + PACKAGES_PATH="portable_env/lib/python3.11/site-packages" + rm start_macos.sh start_windows.bat + + # 2. Download and extract Python + cd .. + echo "Downloading Python for $PLATFORM..." + curl -L -o python-build.tar.gz "$PYTHON_URL" + tar -xzf python-build.tar.gz + mv python "text-generation-webui-${VERSION_CLEAN}/portable_env" + + # 3. Prepare requirements file based on AVX + if [[ "$AVX_SUPPORT" == "AVX2" ]]; then + BASE_REQ_FILE="requirements/portable/requirements_amd.txt" + else + BASE_REQ_FILE="requirements/portable/requirements_amd_noavx2.txt" + fi + REQ_FILE="$BASE_REQ_FILE" + + cd "text-generation-webui-${VERSION_CLEAN}" + + # 4. Install packages + echo "Installing Python packages from $REQ_FILE..." + $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE" + + # 5. Clean up + rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py + + # 6. Create ZIP file + cd .. + ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm.zip" + echo "Creating archive: $ZIP_NAME" + + zip -r "$ZIP_NAME" "text-generation-webui-${VERSION_CLEAN}" + + - name: Upload files to a GitHub release + id: upload-release + uses: svenstaro/upload-release-action@2.7.0 + continue-on-error: true + with: + repo_token: ${{ secrets.GITHUB_TOKEN }} + file: ../textgen-portable-*.zip + tag: ${{ inputs.version }} + file_glob: true + make_latest: false + overwrite: true From 0d4eff284ca43c84bc37e49ea24d95106dacf098 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 19 Nov 2025 05:23:43 -0800 Subject: [PATCH 9/9] Add a --cpu-moe model for llama.cpp --- modules/llama_cpp_server.py | 2 ++ modules/loaders.py | 1 + modules/shared.py | 1 + modules/ui.py | 1 + modules/ui_model_menu.py | 3 ++- 5 files changed, 7 insertions(+), 1 deletion(-) diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 27890d8c..47d9d27c 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -326,6 +326,8 @@ class LlamaServer: cmd += ["--threads", str(shared.args.threads)] if shared.args.threads_batch > 0: cmd += ["--threads-batch", str(shared.args.threads_batch)] + if shared.args.cpu_moe: + cmd.append("--cpu-moe") if shared.args.no_mmap: cmd.append("--no-mmap") if shared.args.mlock: diff --git a/modules/loaders.py b/modules/loaders.py index 609a54c6..0f0f6d1e 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -6,6 +6,7 @@ import gradio as gr loaders_and_params = OrderedDict({ 'llama.cpp': [ 'gpu_layers', + 'cpu_moe', 'threads', 'threads_batch', 'batch_size', diff --git a/modules/shared.py b/modules/shared.py index e54ba654..1cca1233 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -66,6 +66,7 @@ group.add_argument('--ctx-size-draft', type=int, default=0, help='Size of the pr # llama.cpp group = parser.add_argument_group('llama.cpp') group.add_argument('--gpu-layers', '--n-gpu-layers', type=int, default=256, metavar='N', help='Number of layers to offload to the GPU.') +group.add_argument('--cpu-moe', action='store_true', help='Move the experts to the CPU (for MoE models).') group.add_argument('--mmproj', type=str, default=None, help='Path to the mmproj file for vision models.') group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') group.add_argument('--tensor-split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.') diff --git a/modules/ui.py b/modules/ui.py index 76533767..d8dcedfb 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -125,6 +125,7 @@ def list_model_elements(): 'loader', 'cpu_memory', 'gpu_layers', + 'cpu_moe', 'threads', 'threads_batch', 'batch_size', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 50ada9f9..31ab929f 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -50,6 +50,7 @@ def create_ui(): with gr.Column(): shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info()) + shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.') shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.') shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit) shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit) @@ -94,7 +95,7 @@ def create_ui(): shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.') with gr.Column(): - shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.') + shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.') shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk) shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.') shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')