mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-12-06 07:12:10 +01:00
Merge branch 'oobabooga:main' into main
This commit is contained in:
commit
fe0bef40d2
|
|
@ -43,6 +43,7 @@ class GenerationOptions(BaseModel):
|
|||
ban_eos_token: bool = False
|
||||
add_bos_token: bool = True
|
||||
enable_thinking: bool = True
|
||||
reasoning_effort: str = "medium"
|
||||
skip_special_tokens: bool = True
|
||||
static_cache: bool = False
|
||||
truncation_length: int = 0
|
||||
|
|
|
|||
122
modules/chat.py
122
modules/chat.py
|
|
@ -175,7 +175,8 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||
builtin_tools=None,
|
||||
tools=state['tools'] if 'tools' in state else None,
|
||||
tools_in_user_message=False,
|
||||
add_generation_prompt=False
|
||||
add_generation_prompt=False,
|
||||
reasoning_effort=state['reasoning_effort']
|
||||
)
|
||||
|
||||
chat_renderer = partial(
|
||||
|
|
@ -210,7 +211,57 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||
messages.insert(insert_pos, {"role": "tool", "content": tool_msg})
|
||||
|
||||
if assistant_msg:
|
||||
messages.insert(insert_pos, {"role": "assistant", "content": assistant_msg})
|
||||
# Handle GPT-OSS as a special case
|
||||
if '<|channel|>analysis<|message|>' in assistant_msg or '<|channel|>final<|message|>' in assistant_msg:
|
||||
|
||||
thinking_content = ""
|
||||
final_content = ""
|
||||
|
||||
# Extract analysis content if present
|
||||
if '<|channel|>analysis<|message|>' in assistant_msg:
|
||||
# Split the message by the analysis tag to isolate the content that follows
|
||||
parts = assistant_msg.split('<|channel|>analysis<|message|>', 1)
|
||||
if len(parts) > 1:
|
||||
# The content is everything after the tag
|
||||
potential_content = parts[1]
|
||||
|
||||
# Now, find the end of this content block
|
||||
analysis_end_tag = '<|end|>'
|
||||
if analysis_end_tag in potential_content:
|
||||
thinking_content = potential_content.split(analysis_end_tag, 1)[0].strip()
|
||||
else:
|
||||
# Fallback: if no <|end|> tag, stop at the start of the final channel if it exists
|
||||
final_channel_tag = '<|channel|>final<|message|>'
|
||||
if final_channel_tag in potential_content:
|
||||
thinking_content = potential_content.split(final_channel_tag, 1)[0].strip()
|
||||
else:
|
||||
thinking_content = potential_content.strip()
|
||||
|
||||
# Extract final content if present
|
||||
final_tag_to_find = '<|channel|>final<|message|>'
|
||||
if final_tag_to_find in assistant_msg:
|
||||
# Split the message by the final tag to isolate the content that follows
|
||||
parts = assistant_msg.split(final_tag_to_find, 1)
|
||||
if len(parts) > 1:
|
||||
# The content is everything after the tag
|
||||
potential_content = parts[1]
|
||||
|
||||
# Now, find the end of this content block
|
||||
final_end_tag = '<|end|>'
|
||||
if final_end_tag in potential_content:
|
||||
final_content = potential_content.split(final_end_tag, 1)[0].strip()
|
||||
else:
|
||||
final_content = potential_content.strip()
|
||||
|
||||
# Insert as structured message
|
||||
msg_dict = {"role": "assistant", "content": final_content}
|
||||
if '<|channel|>analysis<|message|>' in assistant_msg:
|
||||
msg_dict["thinking"] = thinking_content
|
||||
|
||||
messages.insert(insert_pos, msg_dict)
|
||||
|
||||
else:
|
||||
messages.insert(insert_pos, {"role": "assistant", "content": assistant_msg})
|
||||
|
||||
if user_msg not in ['', '<|BEGIN-VISIBLE-CHAT|>']:
|
||||
# Check for user message attachments in metadata
|
||||
|
|
@ -295,18 +346,44 @@ def generate_chat_prompt(user_input, state, **kwargs):
|
|||
if len(suffix) > 0:
|
||||
prompt = prompt[:-len(suffix)]
|
||||
else:
|
||||
if _continue:
|
||||
suffix = get_generation_prompt(renderer, impersonate=impersonate)[1]
|
||||
if len(suffix) > 0:
|
||||
prompt = prompt[:-len(suffix)]
|
||||
# Handle GPT-OSS as a special case when continuing
|
||||
if _continue and '<|channel|>final<|message|>' in state['instruction_template_str']:
|
||||
last_message_to_continue = messages[-1]
|
||||
prompt = renderer(messages=messages[:-1])
|
||||
|
||||
# Start the assistant turn wrapper
|
||||
assistant_reply_so_far = "<|start|>assistant"
|
||||
|
||||
if 'thinking' in last_message_to_continue:
|
||||
assistant_reply_so_far += f"<|channel|>analysis<|message|>{last_message_to_continue['thinking']}<|end|>"
|
||||
|
||||
assistant_reply_so_far += f"<|channel|>final<|message|>{last_message_to_continue.get('content', '')}"
|
||||
|
||||
prompt += assistant_reply_so_far
|
||||
|
||||
else:
|
||||
prefix = get_generation_prompt(renderer, impersonate=impersonate)[0]
|
||||
if state['mode'] == 'chat' and not impersonate:
|
||||
prefix = apply_extensions('bot_prefix', prefix, state)
|
||||
prompt = renderer(messages=messages)
|
||||
if _continue:
|
||||
suffix = get_generation_prompt(renderer, impersonate=impersonate)[1]
|
||||
if len(suffix) > 0:
|
||||
prompt = prompt[:-len(suffix)]
|
||||
else:
|
||||
prefix = get_generation_prompt(renderer, impersonate=impersonate)[0]
|
||||
|
||||
prompt += prefix
|
||||
# Handle GPT-OSS as a special case when not continuing
|
||||
if '<|channel|>final<|message|>' in state['instruction_template_str']:
|
||||
if prefix.endswith("<|channel|>final<|message|>"):
|
||||
prefix = prefix[:-len("<|channel|>final<|message|>")]
|
||||
|
||||
if state['mode'] == 'instruct' and not any((_continue, impersonate, state['enable_thinking'])):
|
||||
if impersonate:
|
||||
prefix += "<|message|>"
|
||||
|
||||
if state['mode'] == 'chat' and not impersonate:
|
||||
prefix = apply_extensions('bot_prefix', prefix, state)
|
||||
|
||||
prompt += prefix
|
||||
|
||||
if state['mode'] == 'instruct' and 'enable_thinking' in state['instruction_template_str'] and not any((_continue, impersonate, state['enable_thinking'])):
|
||||
prompt += get_thinking_suppression_string(instruction_template)
|
||||
|
||||
return prompt
|
||||
|
|
@ -459,6 +536,12 @@ def get_stopping_strings(state):
|
|||
result = [item for item in stopping_strings if not any(item.startswith(other) and item != other for other in stopping_strings)]
|
||||
result = list(set(result))
|
||||
|
||||
# Handle GPT-OSS as a special case
|
||||
if '<|channel|>final<|message|>' in state['instruction_template_str'] and "<|end|>" in result:
|
||||
result.remove("<|end|>")
|
||||
result.append("<|result|>")
|
||||
result = list(set(result))
|
||||
|
||||
if shared.args.verbose:
|
||||
logger.info("STOPPING_STRINGS=")
|
||||
pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(result)
|
||||
|
|
@ -611,9 +694,9 @@ def generate_search_query(user_message, state):
|
|||
|
||||
# Use a minimal state for search query generation but keep the full history
|
||||
search_state = state.copy()
|
||||
search_state['max_new_tokens'] = 64
|
||||
search_state['auto_max_new_tokens'] = False
|
||||
search_state['auto_max_new_tokens'] = True
|
||||
search_state['enable_thinking'] = False
|
||||
search_state['reasoning_effort'] = 'low'
|
||||
search_state['start_with'] = ""
|
||||
|
||||
# Generate the full prompt using existing history + augmented message
|
||||
|
|
@ -623,6 +706,12 @@ def generate_search_query(user_message, state):
|
|||
for reply in generate_reply(formatted_prompt, search_state, stopping_strings=[], is_chat=True):
|
||||
query = reply
|
||||
|
||||
# Check for thinking block delimiters and extract content after them
|
||||
if "</think>" in query:
|
||||
query = query.rsplit("</think>", 1)[1]
|
||||
elif "<|start|>assistant<|channel|>final<|message|>" in query:
|
||||
query = query.rsplit("<|start|>assistant<|channel|>final<|message|>", 1)[1]
|
||||
|
||||
# Strip and remove surrounding quotes if present
|
||||
query = query.strip()
|
||||
if len(query) >= 2 and query.startswith('"') and query.endswith('"'):
|
||||
|
|
@ -643,6 +732,10 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||
output = apply_extensions('history', output)
|
||||
state = apply_extensions('state', state)
|
||||
|
||||
# Handle GPT-OSS as a special case
|
||||
if '<|channel|>final<|message|>' in state['instruction_template_str']:
|
||||
state['skip_special_tokens'] = False
|
||||
|
||||
# Let the jinja2 template handle the BOS token
|
||||
if state['mode'] in ['instruct', 'chat-instruct']:
|
||||
state['add_bos_token'] = False
|
||||
|
|
@ -1175,6 +1268,9 @@ def save_last_chat_state(character, mode, unique_id):
|
|||
def load_history(unique_id, character, mode):
|
||||
p = get_history_file_path(unique_id, character, mode)
|
||||
|
||||
if not p.exists():
|
||||
return {'internal': [], 'visible': [], 'metadata': {}}
|
||||
|
||||
f = json.loads(open(p, 'rb').read())
|
||||
if 'internal' in f and 'visible' in f:
|
||||
history = f
|
||||
|
|
|
|||
|
|
@ -463,7 +463,7 @@ class IncrementalGrammarConstraint(GrammarConstraint):
|
|||
super().__init__(grammar_str, start_rule_name, tokenizer)
|
||||
|
||||
def accept_char(self, char, stacks):
|
||||
byte = ord(char)
|
||||
byte = char if isinstance(char, int) else ord(char)
|
||||
new_stacks = []
|
||||
for stack in stacks:
|
||||
# stack is empty
|
||||
|
|
@ -549,7 +549,7 @@ class IncrementalGrammarConstraint(GrammarConstraint):
|
|||
# For each sub-rule in the grammar, cache whether each byte is accepted.
|
||||
@lru_cache(maxsize=None)
|
||||
def pos_char_acceptance(self, pos, char):
|
||||
byte = ord(char)
|
||||
byte = char if isinstance(char, int) else ord(char)
|
||||
num_chars = self.grammar_encoding[pos]
|
||||
pos += 1
|
||||
for i in range(0, num_chars, 2):
|
||||
|
|
|
|||
|
|
@ -116,29 +116,60 @@ def extract_thinking_block(string):
|
|||
THINK_START_TAG = "<think>"
|
||||
THINK_END_TAG = "</think>"
|
||||
|
||||
# Look for think tag
|
||||
# Look for think tag first
|
||||
start_pos = string.find(THINK_START_TAG)
|
||||
end_pos = string.find(THINK_END_TAG)
|
||||
|
||||
# Return if neither tag is in string
|
||||
if start_pos == -1 and end_pos == -1:
|
||||
return None, string
|
||||
# If think tags found, use existing logic
|
||||
if start_pos != -1 or end_pos != -1:
|
||||
# handle missing start or end tags
|
||||
if start_pos == -1:
|
||||
thought_start = 0
|
||||
else:
|
||||
thought_start = start_pos + len(THINK_START_TAG)
|
||||
if end_pos == -1:
|
||||
thought_end = len(string)
|
||||
content_start = len(string)
|
||||
else:
|
||||
thought_end = end_pos
|
||||
content_start = end_pos + len(THINK_END_TAG)
|
||||
thinking_content = string[thought_start:thought_end]
|
||||
remaining_content = string[content_start:]
|
||||
return thinking_content, remaining_content
|
||||
|
||||
# handle missing start or end tags
|
||||
if start_pos == -1:
|
||||
thought_start = 0
|
||||
else:
|
||||
thought_start = start_pos + len(THINK_START_TAG)
|
||||
if end_pos == -1:
|
||||
thought_end = len(string)
|
||||
content_start = len(string)
|
||||
else:
|
||||
thought_end = end_pos
|
||||
content_start = end_pos + len(THINK_END_TAG)
|
||||
# If think tags not found, try alternative format
|
||||
ALT_START = "<|channel|>analysis<|message|>"
|
||||
ALT_END = "<|end|>"
|
||||
ALT_CONTENT_START = "<|start|>assistant<|channel|>final<|message|>"
|
||||
|
||||
thinking_content = string[thought_start:thought_end]
|
||||
remaining_content = string[content_start:]
|
||||
return thinking_content, remaining_content
|
||||
alt_start_pos = string.find(ALT_START)
|
||||
alt_end_pos = string.find(ALT_END)
|
||||
alt_content_pos = string.find(ALT_CONTENT_START)
|
||||
|
||||
if alt_start_pos != -1 or alt_end_pos != -1:
|
||||
if alt_start_pos == -1:
|
||||
thought_start = 0
|
||||
else:
|
||||
thought_start = alt_start_pos + len(ALT_START)
|
||||
|
||||
# If no explicit end tag but content start exists, use content start as end
|
||||
if alt_end_pos == -1:
|
||||
if alt_content_pos != -1:
|
||||
thought_end = alt_content_pos
|
||||
content_start = alt_content_pos + len(ALT_CONTENT_START)
|
||||
else:
|
||||
thought_end = len(string)
|
||||
content_start = len(string)
|
||||
else:
|
||||
thought_end = alt_end_pos
|
||||
content_start = alt_content_pos + len(ALT_CONTENT_START) if alt_content_pos != -1 else alt_end_pos + len(ALT_END)
|
||||
|
||||
thinking_content = string[thought_start:thought_end]
|
||||
remaining_content = string[content_start:]
|
||||
return thinking_content, remaining_content
|
||||
|
||||
# Return if neither format is found
|
||||
return None, string
|
||||
|
||||
|
||||
@functools.lru_cache(maxsize=None)
|
||||
|
|
|
|||
|
|
@ -140,6 +140,7 @@ def transformers_samplers():
|
|||
'ban_eos_token',
|
||||
'add_bos_token',
|
||||
'enable_thinking',
|
||||
'reasoning_effort',
|
||||
'skip_special_tokens',
|
||||
'static_cache',
|
||||
'seed',
|
||||
|
|
@ -192,6 +193,7 @@ loaders_samplers = {
|
|||
'ban_eos_token',
|
||||
'add_bos_token',
|
||||
'enable_thinking',
|
||||
'reasoning_effort',
|
||||
'skip_special_tokens',
|
||||
'seed',
|
||||
'sampler_priority',
|
||||
|
|
@ -239,6 +241,7 @@ loaders_samplers = {
|
|||
'ban_eos_token',
|
||||
'add_bos_token',
|
||||
'enable_thinking',
|
||||
'reasoning_effort',
|
||||
'skip_special_tokens',
|
||||
'seed',
|
||||
'sampler_priority',
|
||||
|
|
@ -278,6 +281,7 @@ loaders_samplers = {
|
|||
'ban_eos_token',
|
||||
'add_bos_token',
|
||||
'enable_thinking',
|
||||
'reasoning_effort',
|
||||
'skip_special_tokens',
|
||||
'seed',
|
||||
'custom_token_bans',
|
||||
|
|
@ -311,6 +315,7 @@ loaders_samplers = {
|
|||
'ban_eos_token',
|
||||
'add_bos_token',
|
||||
'enable_thinking',
|
||||
'reasoning_effort',
|
||||
'seed',
|
||||
'sampler_priority',
|
||||
'dry_sequence_breakers',
|
||||
|
|
|
|||
|
|
@ -90,8 +90,10 @@ def get_model_metadata(model):
|
|||
template = template.replace('eos_token', "'{}'".format(eos_token))
|
||||
template = template.replace('bos_token', "'{}'".format(bos_token))
|
||||
|
||||
template = re.sub(r"\{\{-?\s*raise_exception\(.*?\)\s*-?\}\}", "", template, flags=re.DOTALL)
|
||||
template = re.sub(r'raise_exception\([^)]*\)', "''", template)
|
||||
template = re.sub(r'{% if add_generation_prompt %}.*', '', template, flags=re.DOTALL)
|
||||
template = re.sub(r'elif loop\.last and not add_generation_prompt', 'elif False', template) # Handle GPT-OSS
|
||||
model_settings['instruction_template'] = 'Custom (obtained from model metadata)'
|
||||
model_settings['instruction_template_str'] = template
|
||||
|
||||
|
|
@ -122,13 +124,25 @@ def get_model_metadata(model):
|
|||
|
||||
# Try to find the Jinja instruct template
|
||||
path = Path(f'{shared.args.model_dir}/{model}') / 'tokenizer_config.json'
|
||||
template = None
|
||||
|
||||
# 1. Prioritize reading from chat_template.jinja if it exists
|
||||
jinja_path = Path(f'{shared.args.model_dir}/{model}') / 'chat_template.jinja'
|
||||
if jinja_path.exists():
|
||||
with open(jinja_path, 'r', encoding='utf-8') as f:
|
||||
template = f.read()
|
||||
|
||||
if path.exists():
|
||||
metadata = json.loads(open(path, 'r', encoding='utf-8').read())
|
||||
if 'chat_template' in metadata:
|
||||
|
||||
# 2. Only read from metadata if we haven't already loaded from .jinja
|
||||
if template is None and 'chat_template' in metadata:
|
||||
template = metadata['chat_template']
|
||||
if isinstance(template, list):
|
||||
template = template[0]['template']
|
||||
|
||||
# 3. If a template was found from either source, process it
|
||||
if template:
|
||||
for k in ['eos_token', 'bos_token']:
|
||||
if k in metadata:
|
||||
value = metadata[k]
|
||||
|
|
@ -137,8 +151,10 @@ def get_model_metadata(model):
|
|||
|
||||
template = template.replace(k, "'{}'".format(value))
|
||||
|
||||
template = re.sub(r"\{\{-?\s*raise_exception\(.*?\)\s*-?\}\}", "", template, flags=re.DOTALL)
|
||||
template = re.sub(r'raise_exception\([^)]*\)', "''", template)
|
||||
template = re.sub(r'{% if add_generation_prompt %}.*', '', template, flags=re.DOTALL)
|
||||
template = re.sub(r'elif loop\.last and not add_generation_prompt', 'elif False', template) # Handle GPT-OSS
|
||||
model_settings['instruction_template'] = 'Custom (obtained from model metadata)'
|
||||
model_settings['instruction_template_str'] = template
|
||||
|
||||
|
|
|
|||
|
|
@ -211,6 +211,7 @@ settings = {
|
|||
'ban_eos_token': False,
|
||||
'add_bos_token': True,
|
||||
'enable_thinking': True,
|
||||
'reasoning_effort': 'medium',
|
||||
'skip_special_tokens': True,
|
||||
'stream': True,
|
||||
'static_cache': False,
|
||||
|
|
|
|||
|
|
@ -136,7 +136,6 @@ def load_model_HF(model_name):
|
|||
path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
|
||||
params = {
|
||||
'low_cpu_mem_usage': True,
|
||||
'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
|
||||
'attn_implementation': shared.args.attn_implementation,
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -215,6 +215,7 @@ def list_interface_input_elements():
|
|||
'ban_eos_token',
|
||||
'add_bos_token',
|
||||
'enable_thinking',
|
||||
'reasoning_effort',
|
||||
'skip_special_tokens',
|
||||
'stream',
|
||||
'static_cache',
|
||||
|
|
@ -482,6 +483,7 @@ def setup_auto_save():
|
|||
'ban_eos_token',
|
||||
'add_bos_token',
|
||||
'enable_thinking',
|
||||
'reasoning_effort',
|
||||
'skip_special_tokens',
|
||||
'stream',
|
||||
'static_cache',
|
||||
|
|
|
|||
|
|
@ -78,7 +78,8 @@ def create_ui():
|
|||
with gr.Row():
|
||||
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
|
||||
|
||||
shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='Used by Qwen3 to toggle <think> mode.')
|
||||
shared.gradio['reasoning_effort'] = gr.Dropdown(value=shared.settings['reasoning_effort'], choices=['low', 'medium', 'high'], label='Reasoning effort', info='Used by GPT-OSS.')
|
||||
shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='Used by pre-2507 Qwen3.')
|
||||
shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search')
|
||||
with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:
|
||||
shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.53.*
|
||||
transformers==4.55.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
|
@ -34,8 +34,8 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.53.*
|
||||
transformers==4.55.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
|
@ -33,7 +33,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.53.*
|
||||
transformers==4.55.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
|
@ -33,7 +33,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# AMD wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.53.*
|
||||
transformers==4.55.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
|
@ -33,7 +33,7 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5-py3-none-any.whl
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.53.*
|
||||
transformers==4.55.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
|
@ -34,8 +34,8 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5-py3-none-any.whl
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.53.*
|
||||
transformers==4.55.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
|
@ -33,5 +33,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.53.*
|
||||
transformers==4.55.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
|
@ -33,5 +33,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.53.*
|
||||
transformers==4.55.*
|
||||
triton-windows==3.3.1.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
|
@ -34,8 +34,8 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.53.*
|
||||
transformers==4.55.*
|
||||
triton-windows==3.3.1.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
|
@ -34,8 +34,8 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -23,7 +23,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.53.*
|
||||
transformers==4.55.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
|
@ -34,8 +34,8 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav3/releases/download/v0.0.5/exllamav3-0.0.5+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
|
|
|
|||
|
|
@ -22,7 +22,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.53.*
|
||||
transformers==4.55.*
|
||||
triton-windows==3.2.0.post19; platform_system == "Windows"
|
||||
tqdm
|
||||
wandb
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
|
|
|
|||
|
|
@ -19,6 +19,6 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# Mac wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# llama.cpp (CPU only, no AVX2)
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
|
|
@ -18,5 +18,5 @@ sse-starlette==1.6.5
|
|||
tiktoken
|
||||
|
||||
# CUDA wheels
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.31.0/llama_cpp_binaries-0.31.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.33.0/llama_cpp_binaries-0.33.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||
|
|
|
|||
Loading…
Reference in a new issue