From 2beaa4b97173fcba4affa2d66074eb53113b0fd8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 6 Mar 2026 14:39:35 -0800 Subject: [PATCH] Update llama.cpp --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 2 +- requirements/full/requirements_apple_silicon.txt | 2 +- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_amd.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 2 +- requirements/portable/requirements_apple_silicon.txt | 2 +- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cuda131.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- 12 files changed, 20 insertions(+), 20 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 85fbd5df..45d92132 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -40,8 +40,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.23/exllamav3-0.0.23+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13" https://github.com/turboderp-org/exllamav3/releases/download/v0.0.23/exllamav3-0.0.23+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13" https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index de41f606..415c54c6 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -37,5 +37,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+rocm6.4-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+rocm6.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+rocm6.4-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+rocm6.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 9f0a113a..7fe16ea3 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -37,4 +37,4 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index e7c95a71..d8d4030d 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -37,4 +37,4 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 28112860..90f834bf 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -37,5 +37,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index 9ff8a558..dd8fa191 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt index b79defaf..f34ed929 100644 --- a/requirements/portable/requirements_amd.txt +++ b/requirements/portable/requirements_amd.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+rocm6.4-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+rocm6.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+rocm6.4-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+rocm6.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 19310f9c..b9b58985 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -23,4 +23,4 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 3a269054..26a6a48b 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -23,4 +23,4 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index b5d319ac..10612c20 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cuda131.txt b/requirements/portable/requirements_cuda131.txt index 1e05de22..f713db86 100644 --- a/requirements/portable/requirements_cuda131.txt +++ b/requirements/portable/requirements_cuda131.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 6298be11..4f37acc1 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Vulkan wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.85.0/llama_cpp_binaries-0.85.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.86.0/llama_cpp_binaries-0.86.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"