diff --git a/one_click.py b/one_click.py index 050da76b..486e893e 100644 --- a/one_click.py +++ b/one_click.py @@ -16,7 +16,7 @@ import sys # os.environ["HCC_AMDGPU_TARGET"] = 'gfx1030' # Define the required versions -TORCH_VERSION = "2.6.0" +TORCH_VERSION = "2.7.0" PYTHON_VERSION = "3.11" LIBSTDCXX_VERSION_LINUX = "12.1.0" @@ -113,17 +113,16 @@ def get_gpu_choice(): choice = get_user_choice( "What is your GPU?", { - 'A': 'NVIDIA - CUDA 12.4', + 'A': 'NVIDIA', 'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4', 'C': 'Apple M Series', 'D': 'Intel Arc (beta)', - 'E': 'NVIDIA - CUDA 12.8', 'N': 'CPU mode' }, ) # Convert choice to GPU name - gpu_choice = {"A": "NVIDIA", "B": "AMD", "C": "APPLE", "D": "INTEL", "E": "NVIDIA_CUDA128", "N": "NONE"}[choice] + gpu_choice = {"A": "NVIDIA_CUDA128", "B": "AMD", "C": "APPLE", "D": "INTEL", "N": "NONE"}[choice] # Save choice to state state['gpu_choice'] = gpu_choice @@ -368,6 +367,19 @@ def update_requirements(initial_installation=False, pull=True): assert_success=True ) + # Check for outdated CUDA 12.4 installs and refuse to update + state = load_state() + if state.get('gpu_choice') == 'NVIDIA': + print_big_message( + "Your current installation uses CUDA 12.4, which has been removed.\n" + "To update to the new default (CUDA 12.8), a clean installation is required.\n\n" + "INSTRUCTIONS:\n" + "1. Delete the 'installer_files' folder in your text-generation-webui directory.\n" + "2. Run the start script again (e.g., start_windows.bat).\n\n" + "This will create a fresh environment with the latest software." + ) + sys.exit(0) + current_commit = get_current_commit() wheels_changed = not os.path.exists(state_file) if not wheels_changed: diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index d0282ee9..eeee9ff3 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -24,7 +24,7 @@ scipy sentencepiece tensorboard transformers==4.55.* -triton-windows==3.2.0.post19; platform_system == "Windows" +triton-windows==3.3.1.post19; platform_system == "Windows" tqdm wandb @@ -36,10 +36,10 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.36.0/llama_cpp_binaries-0.36.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.36.0/llama_cpp_binaries-0.36.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt deleted file mode 100644 index eeee9ff3..00000000 --- a/requirements/full/requirements_cuda128.txt +++ /dev/null @@ -1,45 +0,0 @@ -accelerate==1.8.* -bitsandbytes==0.46.* -colorama -datasets -einops -fastapi==0.112.4 -gradio==4.37.* -html2text==2025.4.15 -jinja2==3.1.6 -markdown -numpy==2.2.* -pandas -peft==0.16.* -Pillow>=9.5.0 -psutil -pydantic==2.8.2 -PyPDF2==3.0.1 -python-docx==1.1.2 -pyyaml -requests -rich -safetensors==0.5.* -scipy -sentencepiece -tensorboard -transformers==4.55.* -triton-windows==3.3.1.post19; platform_system == "Windows" -tqdm -wandb - -# API -flask_cloudflared==0.0.14 -sse-starlette==1.6.5 -tiktoken - -# CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.36.0/llama_cpp_binaries-0.36.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.36.0/llama_cpp_binaries-0.36.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt deleted file mode 100644 index d08f23ca..00000000 --- a/requirements/full/requirements_cuda128_noavx2.txt +++ /dev/null @@ -1,45 +0,0 @@ -accelerate==1.8.* -bitsandbytes==0.46.* -colorama -datasets -einops -fastapi==0.112.4 -gradio==4.37.* -html2text==2025.4.15 -jinja2==3.1.6 -markdown -numpy==2.2.* -pandas -peft==0.16.* -Pillow>=9.5.0 -psutil -pydantic==2.8.2 -PyPDF2==3.0.1 -python-docx==1.1.2 -pyyaml -requests -rich -safetensors==0.5.* -scipy -sentencepiece -tensorboard -transformers==4.55.* -triton-windows==3.3.1.post19; platform_system == "Windows" -tqdm -wandb - -# API -flask_cloudflared==0.0.14 -sse-starlette==1.6.5 -tiktoken - -# CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.36.0/llama_cpp_binaries-0.36.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.36.0/llama_cpp_binaries-0.36.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 71fd227d..d08f23ca 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -24,7 +24,7 @@ scipy sentencepiece tensorboard transformers==4.55.* -triton-windows==3.2.0.post19; platform_system == "Windows" +triton-windows==3.3.1.post19; platform_system == "Windows" tqdm wandb @@ -36,10 +36,10 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.36.0/llama_cpp_binaries-0.36.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.36.0/llama_cpp_binaries-0.36.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.6/exllamav3-0.0.6+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"