mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-01-07 01:00:04 +01:00
Merge branch 'oobabooga:main' into main
This commit is contained in:
commit
10947b3e53
10
README.md
10
README.md
|
|
@ -235,9 +235,9 @@ List of command-line flags
|
|||
```txt
|
||||
usage: server.py [-h] [--multi-user] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]
|
||||
[--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR]
|
||||
[--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--torch-compile] [--load-in-4bit]
|
||||
[--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--threads THREADS] [--threads-batch THREADS_BATCH] [--batch-size BATCH_SIZE] [--no-mmap]
|
||||
[--mlock] [--gpu-layers N] [--tensor-split TENSOR_SPLIT] [--numa] [--no-kv-offload] [--row-split] [--extra-flags EXTRA_FLAGS] [--streaming-llm] [--ctx-size N] [--cache-type N]
|
||||
[--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast] [--attn-implementation IMPLEMENTATION] [--load-in-4bit] [--use_double_quant]
|
||||
[--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--threads THREADS] [--threads-batch THREADS_BATCH] [--batch-size BATCH_SIZE] [--no-mmap] [--mlock]
|
||||
[--gpu-layers N] [--tensor-split TENSOR_SPLIT] [--numa] [--no-kv-offload] [--row-split] [--extra-flags EXTRA_FLAGS] [--streaming-llm] [--ctx-size N] [--cache-type N]
|
||||
[--model-draft MODEL_DRAFT] [--draft-max DRAFT_MAX] [--gpu-layers-draft GPU_LAYERS_DRAFT] [--device-draft DEVICE_DRAFT] [--ctx-size-draft CTX_SIZE_DRAFT] [--gpu-split GPU_SPLIT]
|
||||
[--autosplit] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa] [--num_experts_per_token N] [--enable_tp] [--cpp-runner] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR]
|
||||
[--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT]
|
||||
|
|
@ -278,9 +278,7 @@ Transformers/Accelerate:
|
|||
--trust-remote-code Set trust_remote_code=True while loading the model. Necessary for some models.
|
||||
--force-safetensors Set use_safetensors=True while loading the model. This prevents arbitrary code execution.
|
||||
--no_use_fast Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast.
|
||||
--use_flash_attention_2 Set use_flash_attention_2=True while loading the model.
|
||||
--use_eager_attention Set attn_implementation= eager while loading the model.
|
||||
--torch-compile Compile the model with torch.compile for improved performance.
|
||||
--attn-implementation IMPLEMENTATION Attention implementation. Valid options: sdpa, eager, flash_attention_2.
|
||||
|
||||
bitsandbytes 4-bit:
|
||||
--load-in-4bit Load the model with 4-bit precision (using bitsandbytes).
|
||||
|
|
|
|||
|
|
@ -35,6 +35,10 @@
|
|||
color: #f0f0f0; /* Light text color for readability */
|
||||
}
|
||||
|
||||
.text p {
|
||||
margin-top: 2px;
|
||||
}
|
||||
|
||||
.username {
|
||||
padding-left: 10px;
|
||||
font-size: 20px;
|
||||
|
|
@ -87,6 +91,7 @@
|
|||
}
|
||||
|
||||
.message-body p {
|
||||
margin-bottom: 0 !important;
|
||||
font-size: 16px !important;
|
||||
line-height: 1.5 !important;
|
||||
color: #e0e0e0 !important; /* Light color for text */
|
||||
|
|
@ -124,16 +129,3 @@
|
|||
font-size: 18px; /* Smaller username for mobile */
|
||||
}
|
||||
}
|
||||
|
||||
/* Standard spacing from instruct style */
|
||||
.chat .message-body :is(p, ul, ol) {
|
||||
margin: 1.25em 0 !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(p, ul, ol):first-child {
|
||||
margin-top: 0 !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(p, ul, ol):last-child {
|
||||
margin-bottom: 0 !important;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -38,6 +38,10 @@
|
|||
text-shadow: 2px 2px 2px rgb(0 0 0 / 40%);
|
||||
}
|
||||
|
||||
.text p {
|
||||
margin-top: 2px;
|
||||
}
|
||||
|
||||
.username {
|
||||
padding-left: 10px;
|
||||
font-size: 22px;
|
||||
|
|
@ -83,6 +87,7 @@
|
|||
}
|
||||
|
||||
.message-body p {
|
||||
margin-bottom: 0 !important;
|
||||
font-size: 18px !important;
|
||||
line-height: 1.428571429 !important;
|
||||
color: rgb(243 244 246) !important;
|
||||
|
|
@ -130,16 +135,3 @@
|
|||
font-size: 20px;
|
||||
}
|
||||
}
|
||||
|
||||
/* Standard spacing from instruct style */
|
||||
.chat .message-body :is(p, ul, ol) {
|
||||
margin: 1.25em 0 !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(p, ul, ol):first-child {
|
||||
margin-top: 0 !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(p, ul, ol):last-child {
|
||||
margin-bottom: 0 !important;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -9,6 +9,11 @@
|
|||
line-height: 22.5px !important;
|
||||
}
|
||||
|
||||
.message-body {
|
||||
margin-top: 3px;
|
||||
font-size: 15px !important;
|
||||
}
|
||||
|
||||
.circle-you {
|
||||
width: 50px;
|
||||
height: 50px;
|
||||
|
|
@ -47,6 +52,10 @@
|
|||
font-weight: 500;
|
||||
}
|
||||
|
||||
.message-body p, .chat .message-body ul, .chat .message-body ol {
|
||||
margin-bottom: 10px !important;
|
||||
}
|
||||
|
||||
.dark .message-body p em {
|
||||
color: rgb(138 138 138) !important;
|
||||
}
|
||||
|
|
@ -55,16 +64,3 @@
|
|||
color: rgb(110 110 110) !important;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
/* Standard spacing from instruct style */
|
||||
.chat .message-body :is(p, ul, ol) {
|
||||
margin: 1.25em 0 !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(p, ul, ol):first-child {
|
||||
margin-top: 0 !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(p, ul, ol):last-child {
|
||||
margin-bottom: 0 !important;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -68,10 +68,17 @@
|
|||
max-width: 80%;
|
||||
}
|
||||
|
||||
.text p {
|
||||
margin-top: 5px;
|
||||
}
|
||||
|
||||
.username {
|
||||
font-weight: bold;
|
||||
}
|
||||
|
||||
.message-body {
|
||||
}
|
||||
|
||||
.message-body img {
|
||||
max-width: 300px;
|
||||
max-height: 300px;
|
||||
|
|
@ -79,6 +86,7 @@
|
|||
}
|
||||
|
||||
.message-body p {
|
||||
margin-bottom: 0 !important;
|
||||
font-size: 15px !important;
|
||||
line-height: 1.428571429 !important;
|
||||
font-weight: 500;
|
||||
|
|
@ -91,16 +99,3 @@
|
|||
.message-body p em {
|
||||
color: rgb(110 110 110) !important;
|
||||
}
|
||||
|
||||
/* Standard spacing from instruct style */
|
||||
.chat .message-body :is(p, ul, ol) {
|
||||
margin: 1.25em 0 !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(p, ul, ol):first-child {
|
||||
margin-top: 0 !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(p, ul, ol):last-child {
|
||||
margin-bottom: 0 !important;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -83,6 +83,10 @@
|
|||
font-weight: 400;
|
||||
}
|
||||
|
||||
.message-body p:first-child {
|
||||
margin-top: 0 !important;
|
||||
}
|
||||
|
||||
.dark .message-body p em {
|
||||
color: rgb(170 170 170) !important;
|
||||
}
|
||||
|
|
@ -96,15 +100,6 @@
|
|||
margin-top: 8px;
|
||||
}
|
||||
|
||||
/* Standard spacing from instruct style */
|
||||
.chat .message-body :is(p, ul, ol) {
|
||||
margin: 1.25em 0 !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(p, ul, ol):first-child {
|
||||
margin-top: 0 !important;
|
||||
}
|
||||
|
||||
.chat .message-body :is(p, ul, ol):last-child {
|
||||
margin-bottom: 0 !important;
|
||||
.message-body p, .chat .message-body ul, .chat .message-body ol {
|
||||
margin-bottom: 10px !important;
|
||||
}
|
||||
|
|
|
|||
|
|
@ -11,8 +11,9 @@
|
|||
|
||||
.readable-container p, .readable-container li {
|
||||
font-size: 16px !important;
|
||||
line-height: 1.4 !important;
|
||||
color: #efefef !important;
|
||||
margin-bottom: 22px;
|
||||
line-height: 1.4 !important;
|
||||
}
|
||||
|
||||
.readable-container li > p {
|
||||
|
|
@ -29,17 +30,4 @@
|
|||
|
||||
.readable-container .hoverable {
|
||||
font-size: 14px;
|
||||
}
|
||||
|
||||
/* Standard spacing from instruct style */
|
||||
.readable-container :is(p, ul, ol) {
|
||||
margin: 1.25em 0 !important;
|
||||
}
|
||||
|
||||
.readable-container :is(p, ul, ol):first-child {
|
||||
margin-top: 0 !important;
|
||||
}
|
||||
|
||||
.readable-container :is(p, ul, ol):last-child {
|
||||
margin-bottom: 0 !important;
|
||||
}
|
||||
}
|
||||
|
|
@ -643,6 +643,10 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||
output = apply_extensions('history', output)
|
||||
state = apply_extensions('state', state)
|
||||
|
||||
# Let the jinja2 template handle the BOS token
|
||||
if state['mode'] in ['instruct', 'chat-instruct']:
|
||||
state['add_bos_token'] = False
|
||||
|
||||
# Initialize metadata if not present
|
||||
if 'metadata' not in output:
|
||||
output['metadata'] = {}
|
||||
|
|
|
|||
|
|
@ -39,12 +39,10 @@ loaders_and_params = OrderedDict({
|
|||
'quant_type',
|
||||
'load_in_8bit',
|
||||
'load_in_4bit',
|
||||
'torch_compile',
|
||||
'use_flash_attention_2',
|
||||
'attn_implementation',
|
||||
'cpu',
|
||||
'disk',
|
||||
'use_double_quant',
|
||||
'use_eager_attention',
|
||||
'bf16',
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
|
|
|
|||
|
|
@ -15,7 +15,6 @@ from modules.logging_colors import logger
|
|||
def get_fallback_settings():
|
||||
return {
|
||||
'bf16': False,
|
||||
'use_eager_attention': False,
|
||||
'ctx_size': 2048,
|
||||
'rope_freq_base': 0,
|
||||
'compress_pos_emb': 1,
|
||||
|
|
@ -118,14 +117,9 @@ def get_model_metadata(model):
|
|||
if metadata['rope_scaling']['type'] == 'linear':
|
||||
model_settings['compress_pos_emb'] = metadata['rope_scaling']['factor']
|
||||
|
||||
# For Gemma-2
|
||||
if 'torch_dtype' in metadata and metadata['torch_dtype'] == 'bfloat16':
|
||||
model_settings['bf16'] = True
|
||||
|
||||
# For Gemma-2
|
||||
if 'architectures' in metadata and isinstance(metadata['architectures'], list) and 'Gemma2ForCausalLM' in metadata['architectures']:
|
||||
model_settings['use_eager_attention'] = True
|
||||
|
||||
# Try to find the Jinja instruct template
|
||||
path = Path(f'{shared.args.model_dir}/{model}') / 'tokenizer_config.json'
|
||||
if path.exists():
|
||||
|
|
|
|||
|
|
@ -1,13 +0,0 @@
|
|||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
class RelativeImport:
|
||||
def __init__(self, path):
|
||||
self.import_path = Path(path)
|
||||
|
||||
def __enter__(self):
|
||||
sys.path.insert(0, str(self.import_path))
|
||||
|
||||
def __exit__(self, exc_type, exc_value, traceback):
|
||||
sys.path.remove(str(self.import_path))
|
||||
|
|
@ -61,9 +61,7 @@ group.add_argument('--no-cache', action='store_true', help='Set use_cache to Fal
|
|||
group.add_argument('--trust-remote-code', action='store_true', help='Set trust_remote_code=True while loading the model. Necessary for some models.')
|
||||
group.add_argument('--force-safetensors', action='store_true', help='Set use_safetensors=True while loading the model. This prevents arbitrary code execution.')
|
||||
group.add_argument('--no_use_fast', action='store_true', help='Set use_fast=False while loading the tokenizer (it\'s True by default). Use this if you have any problems related to use_fast.')
|
||||
group.add_argument('--use_flash_attention_2', action='store_true', help='Set use_flash_attention_2=True while loading the model.')
|
||||
group.add_argument('--use_eager_attention', action='store_true', help='Set attn_implementation= eager while loading the model.')
|
||||
group.add_argument('--torch-compile', action='store_true', help='Compile the model with torch.compile for improved performance.')
|
||||
group.add_argument('--attn-implementation', type=str, default='sdpa', metavar="IMPLEMENTATION", help='Attention implementation. Valid options: sdpa, eager, flash_attention_2.')
|
||||
|
||||
# bitsandbytes 4-bit
|
||||
group = parser.add_argument_group('bitsandbytes 4-bit')
|
||||
|
|
|
|||
|
|
@ -134,7 +134,6 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
|
|||
input_ids = np.array(input_ids).reshape(1, len(input_ids))
|
||||
else:
|
||||
input_ids = shared.tokenizer.encode(str(prompt), return_tensors='pt', add_special_tokens=add_special_tokens)
|
||||
|
||||
if hasattr(shared.tokenizer, 'bos_token_id') and shared.tokenizer.bos_token_id is not None:
|
||||
if add_bos_token:
|
||||
# Add BOS token if missing
|
||||
|
|
@ -142,13 +141,9 @@ def encode(prompt, add_special_tokens=True, add_bos_token=True, truncation_lengt
|
|||
bos_tensor = torch.tensor([[shared.tokenizer.bos_token_id]])
|
||||
input_ids = torch.cat((bos_tensor, input_ids), 1)
|
||||
|
||||
# Prevent double BOS tokens from jinja templates
|
||||
while len(input_ids[0]) > 1 and input_ids[0][0] == shared.tokenizer.bos_token_id and input_ids[0][1] == shared.tokenizer.bos_token_id:
|
||||
input_ids = input_ids[:, 1:]
|
||||
else:
|
||||
# Remove BOS tokens when not wanted
|
||||
while len(input_ids[0]) > 0 and input_ids[0][0] == shared.tokenizer.bos_token_id:
|
||||
input_ids = input_ids[:, 1:]
|
||||
# Always prevent double BOS tokens (regardless of add_bos_token setting)
|
||||
while len(input_ids[0]) > 1 and input_ids[0][0] == shared.tokenizer.bos_token_id and input_ids[0][1] == shared.tokenizer.bos_token_id:
|
||||
input_ids = input_ids[:, 1:]
|
||||
|
||||
if truncation_length is not None:
|
||||
input_ids = input_ids[:, -truncation_length:]
|
||||
|
|
|
|||
|
|
@ -131,24 +131,21 @@ def load_tokenizer(model_name, tokenizer_dir=None):
|
|||
|
||||
|
||||
def load_model_HF(model_name):
|
||||
torch._dynamo.config.disable = True
|
||||
|
||||
path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
|
||||
params = {
|
||||
'low_cpu_mem_usage': True,
|
||||
'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
|
||||
'attn_implementation': shared.args.attn_implementation,
|
||||
}
|
||||
|
||||
if shared.args.trust_remote_code:
|
||||
params['trust_remote_code'] = True
|
||||
|
||||
if shared.args.use_flash_attention_2:
|
||||
params['use_flash_attention_2'] = True
|
||||
|
||||
if shared.args.force_safetensors:
|
||||
params['force_safetensors'] = True
|
||||
|
||||
if shared.args.use_eager_attention:
|
||||
params['attn_implementation'] = 'eager'
|
||||
|
||||
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.args.trust_remote_code)
|
||||
|
||||
if 'chatglm' in model_name.lower():
|
||||
|
|
@ -261,9 +258,6 @@ def load_model_HF(model_name):
|
|||
print()
|
||||
model = LoaderClass.from_pretrained(path_to_model, **params)
|
||||
|
||||
if shared.args.torch_compile:
|
||||
model = torch.compile(model)
|
||||
|
||||
return model
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -142,9 +142,8 @@ def list_model_elements():
|
|||
'num_experts_per_token',
|
||||
'load_in_8bit',
|
||||
'load_in_4bit',
|
||||
'torch_compile',
|
||||
'flash_attn',
|
||||
'use_flash_attention_2',
|
||||
'attn_implementation',
|
||||
'cpu',
|
||||
'disk',
|
||||
'row_split',
|
||||
|
|
@ -153,7 +152,6 @@ def list_model_elements():
|
|||
'mlock',
|
||||
'numa',
|
||||
'use_double_quant',
|
||||
'use_eager_attention',
|
||||
'bf16',
|
||||
'autosplit',
|
||||
'enable_tp',
|
||||
|
|
|
|||
|
|
@ -44,6 +44,7 @@ def create_ui():
|
|||
shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
|
||||
shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072. ⚠️ Lower this value if you can\'t load the model.')
|
||||
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
|
||||
shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.')
|
||||
shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
|
||||
with gr.Column():
|
||||
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
|
||||
|
|
@ -51,8 +52,6 @@ def create_ui():
|
|||
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
|
||||
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
|
||||
shared.gradio['torch_compile'] = gr.Checkbox(label="torch-compile", value=shared.args.torch_compile, info='Compile the model with torch.compile for improved performance.')
|
||||
shared.gradio['use_flash_attention_2'] = gr.Checkbox(label="use_flash_attention_2", value=shared.args.use_flash_attention_2, info='Set use_flash_attention_2=True while loading the model.')
|
||||
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant, info='Used by load-in-4bit.')
|
||||
shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.')
|
||||
shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info='Enable Tensor Parallelism (TP).')
|
||||
|
|
@ -63,8 +62,8 @@ def create_ui():
|
|||
# Speculative decoding
|
||||
with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
|
||||
with gr.Row():
|
||||
shared.gradio['model_draft'] = gr.Dropdown(label="model-draft", choices=utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', info='Draft model. Speculative decoding only works with models sharing the same vocabulary (e.g., same model family).', interactive=not mu)
|
||||
ui.create_refresh_button(shared.gradio['model_draft'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
|
||||
shared.gradio['model_draft'] = gr.Dropdown(label="model-draft", choices=['None'] + utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', info='Draft model. Speculative decoding only works with models sharing the same vocabulary (e.g., same model family).', interactive=not mu)
|
||||
ui.create_refresh_button(shared.gradio['model_draft'], lambda: None, lambda: {'choices': ['None'] + utils.get_available_models()}, 'refresh-button', interactive=not mu)
|
||||
|
||||
shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.')
|
||||
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Number of tokens to draft for speculative decoding. Recommended value: 4.')
|
||||
|
|
@ -96,7 +95,6 @@ def create_ui():
|
|||
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
|
||||
shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
|
||||
shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
|
||||
shared.gradio['use_eager_attention'] = gr.Checkbox(label="use_eager_attention", value=shared.args.use_eager_attention, info='Set attn_implementation= eager while loading the model.')
|
||||
shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
|
||||
shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn)
|
||||
shared.gradio['no_xformers'] = gr.Checkbox(label="no_xformers", value=shared.args.no_xformers)
|
||||
|
|
@ -132,7 +130,7 @@ def create_ui():
|
|||
def create_event_handlers():
|
||||
mu = shared.args.multi_user
|
||||
if mu:
|
||||
return
|
||||
return
|
||||
|
||||
shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params()), show_progress=False)
|
||||
|
||||
|
|
|
|||
|
|
@ -84,7 +84,7 @@ def create_ui():
|
|||
|
||||
shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
|
||||
shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
|
||||
shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
|
||||
shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Only applies to text completion (notebook). In chat mode, templates control BOS tokens.')
|
||||
shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.')
|
||||
shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming')
|
||||
shared.gradio['static_cache'] = gr.Checkbox(value=shared.settings['static_cache'], label='Static KV cache', info='Use a static cache for improved performance.')
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
accelerate==1.5.*
|
||||
bitsandbytes==0.45.*
|
||||
accelerate==1.8.*
|
||||
bitsandbytes==0.46.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
|
|
@ -10,7 +10,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.15.*
|
||||
peft==0.16.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
|
|
@ -23,7 +23,8 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.50.*
|
||||
transformers==4.53.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -33,12 +34,12 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.5.*
|
||||
accelerate==1.8.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
|
|
@ -9,7 +9,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.15.*
|
||||
peft==0.16.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
|
|
@ -22,7 +22,8 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.50.*
|
||||
transformers==4.53.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -32,7 +33,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.5.*
|
||||
accelerate==1.8.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
|
|
@ -9,7 +9,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.15.*
|
||||
peft==0.16.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
|
|
@ -22,7 +22,8 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.50.*
|
||||
transformers==4.53.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -32,7 +33,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.5.*
|
||||
accelerate==1.8.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
|
|
@ -9,7 +9,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.15.*
|
||||
peft==0.16.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
|
|
@ -22,7 +22,8 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.50.*
|
||||
transformers==4.53.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -32,7 +33,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4-py3-none-any.whl
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5-py3-none-any.whl
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.5.*
|
||||
accelerate==1.8.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
|
|
@ -10,7 +10,7 @@ markdown
|
|||
mlx-lm>=0.26.3
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.15.*
|
||||
peft==0.16.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
|
|
@ -23,7 +23,8 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.50.*
|
||||
transformers==4.53.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -33,8 +34,8 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4-py3-none-any.whl
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5-py3-none-any.whl
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.5.*
|
||||
accelerate==1.8.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
|
|
@ -9,7 +9,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.15.*
|
||||
peft==0.16.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
|
|
@ -22,7 +22,8 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.50.*
|
||||
transformers==4.53.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -32,5 +33,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.5.*
|
||||
accelerate==1.8.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
|
|
@ -9,7 +9,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.15.*
|
||||
peft==0.16.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
|
|
@ -22,7 +22,8 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.50.*
|
||||
transformers==4.53.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -32,5 +33,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
accelerate==1.5.*
|
||||
bitsandbytes==0.45.*
|
||||
accelerate==1.8.*
|
||||
bitsandbytes==0.46.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
|
|
@ -10,7 +10,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.15.*
|
||||
peft==0.16.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
|
|
@ -23,7 +23,8 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.50.*
|
||||
transformers==4.53.*
|
||||
triton-windows==3.3.1.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -33,12 +34,12 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
accelerate==1.5.*
|
||||
bitsandbytes==0.45.*
|
||||
accelerate==1.8.*
|
||||
bitsandbytes==0.46.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
|
|
@ -10,7 +10,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.15.*
|
||||
peft==0.16.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
|
|
@ -23,7 +23,8 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.50.*
|
||||
transformers==4.53.*
|
||||
triton-windows==3.3.1.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -33,12 +34,12 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -1,5 +1,5 @@
|
|||
accelerate==1.5.*
|
||||
bitsandbytes==0.45.*
|
||||
accelerate==1.8.*
|
||||
bitsandbytes==0.46.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
|
|
@ -10,7 +10,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.15.*
|
||||
peft==0.16.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
|
|
@ -23,7 +23,8 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.50.*
|
||||
transformers==4.53.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -33,12 +34,12 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -1,4 +1,4 @@
|
|||
accelerate==1.5.*
|
||||
accelerate==1.8.*
|
||||
colorama
|
||||
datasets
|
||||
einops
|
||||
|
|
@ -9,7 +9,7 @@ jinja2==3.1.6
|
|||
markdown
|
||||
numpy==2.2.*
|
||||
pandas
|
||||
peft==0.15.*
|
||||
peft==0.16.*
|
||||
Pillow>=9.5.0
|
||||
psutil
|
||||
pydantic==2.8.2
|
||||
|
|
@ -22,7 +22,8 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.50.*
|
||||
transformers==4.53.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
|
|
|
|||
|
|
@ -19,6 +19,6 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
Loading…
Reference in a new issue