diff --git a/modules/shared.py b/modules/shared.py index b2aeadc6..0a27f33d 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -112,7 +112,7 @@ group.add_argument('--no-cache', action='store_true', help='Set use_cache to Fal group.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.') group.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.') group.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.') -group.add_argument('--attn-implementation', type=str, default='flash_attention_2', metavar="IMPLEMENTATION", help='Attention implementation. Valid options: flash_attention_2, sdpa, eager.') +group.add_argument('--attn-implementation', type=str, default='sdpa', metavar="IMPLEMENTATION", help='Attention implementation. Valid options: sdpa, eager, flash_attention_2.') # bitsandbytes 4-bit group = parser.add_argument_group('bitsandbytes 4-bit') diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index aab0fcaf..86adc229 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -44,7 +44,7 @@ def create_ui(): shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.') shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') - shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['flash_attention_2', 'sdpa', 'eager'], value=shared.args.attn_implementation, info='Attention implementation.') + shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.') shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).') shared.gradio['tp_backend'] = gr.Dropdown(label="tp-backend", choices=['native', 'nccl'], value=shared.args.tp_backend, info='The backend for tensor parallelism.') diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 8bfa1318..79292a9f 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -3,6 +3,7 @@ audioop-lts<1.0; python_version >= "3.13" bitsandbytes==0.48.* colorama datasets +diffusers==0.36.* einops fastapi==0.112.4 flash-linear-attention==0.4.0 @@ -21,13 +22,13 @@ python-docx==1.1.2 pyyaml requests rich -safetensors==0.6.* +safetensors==0.7.* scipy sentencepiece tensorboard torchao==0.14.* transformers==4.57.* -triton-windows==3.5.1.post21; platform_system == "Windows" +triton-windows==3.5.1.post22; platform_system == "Windows" tqdm wandb @@ -35,19 +36,16 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl -# Diffusers -diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 - # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 088f7713..b6b7850d 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -2,6 +2,7 @@ accelerate==1.8.* audioop-lts<1.0; python_version >= "3.13" colorama datasets +diffusers==0.36.* einops fastapi==0.112.4 html2text==2025.4.15 @@ -19,13 +20,13 @@ python-docx==1.1.2 pyyaml requests rich -safetensors==0.6.* +safetensors==0.7.* scipy sentencepiece tensorboard torchao==0.14.* transformers==4.57.* -triton-windows==3.5.1.post21; platform_system == "Windows" +triton-windows==3.5.1.post22; platform_system == "Windows" tqdm wandb @@ -33,16 +34,13 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl -# Diffusers -diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 - # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index e4e55e4d..e1b0823c 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -2,6 +2,7 @@ accelerate==1.8.* audioop-lts<1.0; python_version >= "3.13" colorama datasets +diffusers==0.36.* einops fastapi==0.112.4 html2text==2025.4.15 @@ -19,13 +20,13 @@ python-docx==1.1.2 pyyaml requests rich -safetensors==0.6.* +safetensors==0.7.* scipy sentencepiece tensorboard torchao==0.14.* transformers==4.57.* -triton-windows==3.5.1.post21; platform_system == "Windows" +triton-windows==3.5.1.post22; platform_system == "Windows" tqdm wandb @@ -33,16 +34,13 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl -# Diffusers -diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 - # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 5903fe60..08abd111 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -2,6 +2,7 @@ accelerate==1.8.* audioop-lts<1.0; python_version >= "3.13" colorama datasets +diffusers==0.36.* einops fastapi==0.112.4 html2text==2025.4.15 @@ -19,13 +20,13 @@ python-docx==1.1.2 pyyaml requests rich -safetensors==0.6.* +safetensors==0.7.* scipy sentencepiece tensorboard torchao==0.14.* transformers==4.57.* -triton-windows==3.5.1.post21; platform_system == "Windows" +triton-windows==3.5.1.post22; platform_system == "Windows" tqdm wandb @@ -33,14 +34,11 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl -# Diffusers -diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 - # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 47517578..6cc0c40e 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -2,6 +2,7 @@ accelerate==1.8.* audioop-lts<1.0; python_version >= "3.13" colorama datasets +diffusers==0.36.* einops fastapi==0.112.4 html2text==2025.4.15 @@ -19,13 +20,13 @@ python-docx==1.1.2 pyyaml requests rich -safetensors==0.6.* +safetensors==0.7.* scipy sentencepiece tensorboard torchao==0.14.* transformers==4.57.* -triton-windows==3.5.1.post21; platform_system == "Windows" +triton-windows==3.5.1.post22; platform_system == "Windows" tqdm wandb @@ -33,14 +34,11 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl -# Diffusers -diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 - # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 1fee7121..7b851307 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -2,6 +2,7 @@ accelerate==1.8.* audioop-lts<1.0; python_version >= "3.13" colorama datasets +diffusers==0.36.* einops fastapi==0.112.4 html2text==2025.4.15 @@ -19,13 +20,13 @@ python-docx==1.1.2 pyyaml requests rich -safetensors==0.6.* +safetensors==0.7.* scipy sentencepiece tensorboard torchao==0.14.* transformers==4.57.* -triton-windows==3.5.1.post21; platform_system == "Windows" +triton-windows==3.5.1.post22; platform_system == "Windows" tqdm wandb @@ -33,14 +34,11 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl -# Diffusers -diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 - # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 204af32a..6f7b5e15 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -2,6 +2,7 @@ accelerate==1.8.* audioop-lts<1.0; python_version >= "3.13" colorama datasets +diffusers==0.36.* einops fastapi==0.112.4 html2text==2025.4.15 @@ -19,13 +20,13 @@ python-docx==1.1.2 pyyaml requests rich -safetensors==0.6.* +safetensors==0.7.* scipy sentencepiece tensorboard torchao==0.14.* transformers==4.57.* -triton-windows==3.5.1.post21; platform_system == "Windows" +triton-windows==3.5.1.post22; platform_system == "Windows" tqdm wandb @@ -33,14 +34,11 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl -# Diffusers -diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 - # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 86208b9f..7f9e1162 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -3,6 +3,7 @@ audioop-lts<1.0; python_version >= "3.13" bitsandbytes==0.48.* colorama datasets +diffusers==0.36.* einops fastapi==0.112.4 flash-linear-attention==0.4.0 @@ -21,13 +22,13 @@ python-docx==1.1.2 pyyaml requests rich -safetensors==0.6.* +safetensors==0.7.* scipy sentencepiece tensorboard torchao==0.14.* transformers==4.57.* -triton-windows==3.5.1.post21; platform_system == "Windows" +triton-windows==3.5.1.post22; platform_system == "Windows" tqdm wandb @@ -35,19 +36,16 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl -# Diffusers -diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 - # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index ac24f0cb..070e5fd0 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -2,6 +2,7 @@ accelerate==1.8.* audioop-lts<1.0; python_version >= "3.13" colorama datasets +diffusers==0.36.* einops fastapi==0.112.4 html2text==2025.4.15 @@ -19,13 +20,13 @@ python-docx==1.1.2 pyyaml requests rich -safetensors==0.6.* +safetensors==0.7.* scipy sentencepiece tensorboard torchao==0.14.* transformers==4.57.* -triton-windows==3.5.1.post21; platform_system == "Windows" +triton-windows==3.5.1.post22; platform_system == "Windows" tqdm wandb @@ -33,9 +34,6 @@ wandb gradio==4.37.* https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl -# Diffusers -diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6 - # API flask_cloudflared==0.0.14 sse-starlette==1.6.5 diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index 4a92fb8d..91a2c60b 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt index 5ad1fe3a..b8d55d3a 100644 --- a/requirements/portable/requirements_amd.txt +++ b/requirements/portable/requirements_amd.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_amd_noavx2.txt b/requirements/portable/requirements_amd_noavx2.txt index 9c3e2851..296e8b2e 100644 --- a/requirements/portable/requirements_amd_noavx2.txt +++ b/requirements/portable/requirements_amd_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 8a618e35..50fd898d 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index c1c643e8..e7268b09 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index f4bc8cab..b7d99339 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index 81f9bf2e..043d9db5 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 6cc87a4e..341f8f45 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 25c3638f..6a69a824 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # Vulkan wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 54d0cd1a..2df6df01 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -23,5 +23,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.71.0/llama_cpp_binaries-0.71.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"