Add RTX 50XX Nvidia blackwell support (ExLlamaV2/V3 and Transformers) (#7011)

---------

Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
This commit is contained in:
rakha abadi susilo 2025-06-08 07:44:15 +07:00 committed by GitHub
parent 2d263f227d
commit db847eed4c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
3 changed files with 106 additions and 7 deletions

View file

@ -17,8 +17,6 @@ import sys
# Define the required versions # Define the required versions
TORCH_VERSION = "2.6.0" TORCH_VERSION = "2.6.0"
TORCHVISION_VERSION = "0.21.0"
TORCHAUDIO_VERSION = "2.6.0"
PYTHON_VERSION = "3.11" PYTHON_VERSION = "3.11"
LIBSTDCXX_VERSION_LINUX = "12.1.0" LIBSTDCXX_VERSION_LINUX = "12.1.0"
@ -119,12 +117,13 @@ def get_gpu_choice():
'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4', 'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
'C': 'Apple M Series', 'C': 'Apple M Series',
'D': 'Intel Arc (beta)', 'D': 'Intel Arc (beta)',
'E': 'NVIDIA - CUDA 12.8',
'N': 'CPU mode' 'N': 'CPU mode'
}, },
) )
# Convert choice to GPU name # Convert choice to GPU name
gpu_choice = {"A": "NVIDIA", "B": "AMD", "C": "APPLE", "D": "INTEL", "N": "NONE"}[choice] gpu_choice = {"A": "NVIDIA", "B": "AMD", "C": "APPLE", "D": "INTEL", "E": "NVIDIA_CUDA128", "N": "NONE"}[choice]
# Save choice to state # Save choice to state
state['gpu_choice'] = gpu_choice state['gpu_choice'] = gpu_choice
@ -135,29 +134,33 @@ def get_gpu_choice():
def get_pytorch_install_command(gpu_choice): def get_pytorch_install_command(gpu_choice):
"""Get PyTorch installation command based on GPU choice""" """Get PyTorch installation command based on GPU choice"""
base_cmd = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} " base_cmd = f"python -m pip install torch=={TORCH_VERSION} "
if gpu_choice == "NVIDIA": if gpu_choice == "NVIDIA":
return base_cmd + "--index-url https://download.pytorch.org/whl/cu124" return base_cmd + "--index-url https://download.pytorch.org/whl/cu124"
elif gpu_choice == "NVIDIA_CUDA128":
return "python -m pip install torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
elif gpu_choice == "AMD": elif gpu_choice == "AMD":
return base_cmd + "--index-url https://download.pytorch.org/whl/rocm6.2.4" return base_cmd + "--index-url https://download.pytorch.org/whl/rocm6.2.4"
elif gpu_choice in ["APPLE", "NONE"]: elif gpu_choice in ["APPLE", "NONE"]:
return base_cmd + "--index-url https://download.pytorch.org/whl/cpu" return base_cmd + "--index-url https://download.pytorch.org/whl/cpu"
elif gpu_choice == "INTEL": elif gpu_choice == "INTEL":
if is_linux(): if is_linux():
return "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
else: else:
return "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
else: else:
return base_cmd return base_cmd
def get_pytorch_update_command(gpu_choice): def get_pytorch_update_command(gpu_choice):
"""Get PyTorch update command based on GPU choice""" """Get PyTorch update command based on GPU choice"""
base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}" base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} "
if gpu_choice == "NVIDIA": if gpu_choice == "NVIDIA":
return f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124" return f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
elif gpu_choice == "NVIDIA_CUDA128":
return "python -m pip install --upgrade torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
elif gpu_choice == "AMD": elif gpu_choice == "AMD":
return f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4" return f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
elif gpu_choice in ["APPLE", "NONE"]: elif gpu_choice in ["APPLE", "NONE"]:
@ -181,6 +184,8 @@ def get_requirements_file(gpu_choice):
file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt" file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
elif gpu_choice == "NVIDIA": elif gpu_choice == "NVIDIA":
file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt" file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
elif gpu_choice == "NVIDIA_CUDA128":
file_name = f"requirements_cuda128{'_noavx2' if not cpu_has_avx2() else ''}.txt"
else: else:
raise ValueError(f"Unknown GPU choice: {gpu_choice}") raise ValueError(f"Unknown GPU choice: {gpu_choice}")
@ -328,6 +333,8 @@ def install_webui():
# Handle CUDA version display # Handle CUDA version display
elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA": elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA":
print("CUDA: 12.4") print("CUDA: 12.4")
elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA_CUDA128":
print("CUDA: 12.8")
# No PyTorch for AMD on Windows (?) # No PyTorch for AMD on Windows (?)
elif is_windows() and gpu_choice == "AMD": elif is_windows() and gpu_choice == "AMD":

View file

@ -0,0 +1,46 @@
accelerate==1.5.*
beautifulsoup4==4.13.4
bitsandbytes==0.45.*
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.52.*
triton-windows; platform_system == "Windows"
tqdm
wandb
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

View file

@ -0,0 +1,46 @@
accelerate==1.5.*
beautifulsoup4==4.13.4
bitsandbytes==0.45.*
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.52.*
triton-windows; platform_system == "Windows"
tqdm
wandb
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"