Compare commits

..

No commits in common. "main" and "v3.20" have entirely different histories.
main ... v3.20

24 changed files with 104 additions and 109 deletions

View file

@ -19,14 +19,12 @@
color: #d1d5db !important;
}
.chat .message-body :is(th, td),
.prose hr {
.chat .message-body :is(th, td) {
border-color: #40404096 !important;
}
.dark .chat .message-body :is(th, td),
.dark .prose hr {
border-color: rgb(255 255 255 / 30%) !important;
.dark .chat .message-body :is(th, td) {
border-color: #ffffff75 !important;
}
.chat .message-body :is(p, ul, ol) {

View file

@ -1797,20 +1797,3 @@ button#swap-height-width {
top: 0;
left: calc(100% - 174px);
}
table {
border-collapse: collapse;
}
table, tr, td, th, thead {
border: 0;
}
td + td,
th + th { border-left: 1px solid; }
tr + tr td,
tr + tr th { border-top: 1px solid; }
thead + tbody tr:first-child td,
thead + tbody tr:first-child th { border-top: 1px solid; }

View file

@ -83,11 +83,7 @@ def get_model_metadata(model):
if 'tokenizer.chat_template' in metadata:
template = metadata['tokenizer.chat_template']
if 'tokenizer.ggml.eos_token_id' in metadata:
eos_token = metadata['tokenizer.ggml.tokens'][metadata['tokenizer.ggml.eos_token_id']]
else:
eos_token = ""
eos_token = metadata['tokenizer.ggml.tokens'][metadata['tokenizer.ggml.eos_token_id']]
if 'tokenizer.ggml.bos_token_id' in metadata:
bos_token = metadata['tokenizer.ggml.tokens'][metadata['tokenizer.ggml.bos_token_id']]
else:

View file

@ -112,7 +112,7 @@ group.add_argument('--no-cache', action='store_true', help='Set use_cache to Fal
group.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.')
group.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')
group.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.')
group.add_argument('--attn-implementation', type=str, default='sdpa', metavar="IMPLEMENTATION", help='Attention implementation. Valid options: sdpa, eager, flash_attention_2.')
group.add_argument('--attn-implementation', type=str, default='flash_attention_2', metavar="IMPLEMENTATION", help='Attention implementation. Valid options: flash_attention_2, sdpa, eager.')
# bitsandbytes 4-bit
group = parser.add_argument_group('bitsandbytes 4-bit')

View file

@ -44,7 +44,7 @@ def create_ui():
shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.')
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.')
shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['flash_attention_2', 'sdpa', 'eager'], value=shared.args.attn_implementation, info='Attention implementation.')
shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
shared.gradio['tp_backend'] = gr.Dropdown(label="tp-backend", choices=['native', 'nccl'], value=shared.args.tp_backend, info='The backend for tensor parallelism.')

View file

@ -1,12 +1,11 @@
accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
bitsandbytes==0.49.*
bitsandbytes==0.48.*
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
flash-linear-attention==0.4.*
flash-linear-attention==0.4.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
@ -22,13 +21,13 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.15.*
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm
wandb
@ -36,16 +35,19 @@ wandb
gradio==4.37.*
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
# Diffusers
diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"

View file

@ -2,7 +2,6 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
@ -20,13 +19,13 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.15.*
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm
wandb
@ -34,13 +33,16 @@ wandb
gradio==4.37.*
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
# Diffusers
diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View file

@ -2,7 +2,6 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
@ -20,13 +19,13 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.15.*
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm
wandb
@ -34,13 +33,16 @@ wandb
gradio==4.37.*
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
# Diffusers
diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View file

@ -2,7 +2,6 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
@ -20,13 +19,13 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.15.*
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm
wandb
@ -34,11 +33,14 @@ wandb
gradio==4.37.*
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
# Diffusers
diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"

View file

@ -2,7 +2,6 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
@ -20,13 +19,13 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.15.*
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm
wandb
@ -34,11 +33,14 @@ wandb
gradio==4.37.*
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
# Diffusers
diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"

View file

@ -2,7 +2,6 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
@ -20,13 +19,13 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.15.*
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm
wandb
@ -34,11 +33,14 @@ wandb
gradio==4.37.*
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
# Diffusers
diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"

View file

@ -2,7 +2,6 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
@ -20,13 +19,13 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.15.*
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm
wandb
@ -34,11 +33,14 @@ wandb
gradio==4.37.*
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
# Diffusers
diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"

View file

@ -1,12 +1,11 @@
accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
bitsandbytes==0.49.*
bitsandbytes==0.48.*
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
flash-linear-attention==0.4.*
flash-linear-attention==0.4.0
html2text==2025.4.15
huggingface-hub==0.36.0
jinja2==3.1.6
@ -22,13 +21,13 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.15.*
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm
wandb
@ -36,16 +35,19 @@ wandb
gradio==4.37.*
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
# Diffusers
diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.18/exllamav3-0.0.18+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.17/exllamav3-0.0.17+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"

View file

@ -2,7 +2,6 @@ accelerate==1.8.*
audioop-lts<1.0; python_version >= "3.13"
colorama
datasets
diffusers==0.36.*
einops
fastapi==0.112.4
html2text==2025.4.15
@ -20,13 +19,13 @@ python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.7.*
safetensors==0.6.*
scipy
sentencepiece
tensorboard
torchao==0.15.*
torchao==0.14.*
transformers==4.57.*
triton-windows==3.5.1.post22; platform_system == "Windows"
triton-windows==3.5.1.post21; platform_system == "Windows"
tqdm
wandb
@ -34,6 +33,9 @@ wandb
gradio==4.37.*
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
# Diffusers
diffusers @ git+https://github.com/huggingface/diffusers.git@edf36f5128abf3e6ecf92b5145115514363c58e6
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5

View file

@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"

View file

@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"

View file

@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"

View file

@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"

View file

@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# Vulkan wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -23,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.74.0/llama_cpp_binaries-0.74.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.66.0/llama_cpp_binaries-0.66.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"