mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-12-06 07:12:10 +01:00
Compare commits
29 commits
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
bd9f2de73a | ||
|
|
661e42d2b7 | ||
|
|
5327bc9397 | ||
|
|
78b315344a | ||
|
|
3cad0cd4c1 | ||
|
|
400bb0694b | ||
|
|
8f0048663d | ||
|
|
b0baf7518b | ||
|
|
1afe0827ba | ||
|
|
0d4eff284c | ||
|
|
d6f39e1fef | ||
|
|
327a234d23 | ||
|
|
4e4abd0841 | ||
|
|
c45f35ccc2 | ||
|
|
d85b95bb15 | ||
|
|
4a36b7be5b | ||
|
|
3d7e9856a2 | ||
|
|
a26e28bdea | ||
|
|
6a3bf1de92 | ||
|
|
9ad9afad7d | ||
|
|
e7534a90d8 | ||
|
|
6be1bfcc87 | ||
|
|
92d9cd36a6 | ||
|
|
67f9288891 | ||
|
|
16f77b74c4 | ||
|
|
cd645f80f8 | ||
|
|
6871484398 | ||
|
|
338ae36f73 | ||
|
|
c8cd840b24 |
7
.github/workflows/build-everything-tgw.yml
vendored
7
.github/workflows/build-everything-tgw.yml
vendored
|
|
@ -41,6 +41,13 @@ jobs:
|
||||||
version: ${{ inputs.version }}
|
version: ${{ inputs.version }}
|
||||||
config: 'os:ubuntu-22.04'
|
config: 'os:ubuntu-22.04'
|
||||||
|
|
||||||
|
build_release_rocm_linux:
|
||||||
|
name: ROCm Linux
|
||||||
|
uses: ./.github/workflows/build-portable-release-rocm.yml
|
||||||
|
with:
|
||||||
|
version: ${{ inputs.version }}
|
||||||
|
config: 'os:ubuntu-22.04'
|
||||||
|
|
||||||
build_release_cpu_windows:
|
build_release_cpu_windows:
|
||||||
name: CPU Windows
|
name: CPU Windows
|
||||||
uses: ./.github/workflows/build-portable-release.yml
|
uses: ./.github/workflows/build-portable-release.yml
|
||||||
|
|
|
||||||
|
|
@ -60,7 +60,7 @@ jobs:
|
||||||
'os' = @('ubuntu-22.04', 'windows-2022')
|
'os' = @('ubuntu-22.04', 'windows-2022')
|
||||||
'pyver' = @("3.11")
|
'pyver' = @("3.11")
|
||||||
'avx' = @("AVX2")
|
'avx' = @("AVX2")
|
||||||
'cuda' = @("11.7", "12.4")
|
'cuda' = @("12.4")
|
||||||
}
|
}
|
||||||
|
|
||||||
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
|
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
|
||||||
|
|
@ -147,22 +147,13 @@ jobs:
|
||||||
|
|
||||||
# Create CUDA-specific requirements file if needed
|
# Create CUDA-specific requirements file if needed
|
||||||
cd "text-generation-webui-${VERSION_CLEAN}"
|
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||||
if [[ "$CUDA_VERSION" == "11.7" ]]; then
|
|
||||||
echo "Creating CUDA 11.7 specific requirements file"
|
|
||||||
sed 's/cu124/cu117/g' "$BASE_REQ_FILE" > requirements_cuda_temp.txt
|
|
||||||
REQ_FILE="requirements_cuda_temp.txt"
|
|
||||||
else
|
|
||||||
REQ_FILE="$BASE_REQ_FILE"
|
REQ_FILE="$BASE_REQ_FILE"
|
||||||
fi
|
|
||||||
|
|
||||||
# 4. Install packages
|
# 4. Install packages
|
||||||
echo "Installing Python packages from $REQ_FILE..."
|
echo "Installing Python packages from $REQ_FILE..."
|
||||||
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
|
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
|
||||||
|
|
||||||
# 5. Clean up
|
# 5. Clean up
|
||||||
if [[ "$CUDA_VERSION" == "11.7" ]]; then
|
|
||||||
rm requirements_cuda_temp.txt
|
|
||||||
fi
|
|
||||||
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
|
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
|
||||||
|
|
||||||
# 6. Create ZIP file
|
# 6. Create ZIP file
|
||||||
|
|
|
||||||
165
.github/workflows/build-portable-release-rocm.yml
vendored
Normal file
165
.github/workflows/build-portable-release-rocm.yml
vendored
Normal file
|
|
@ -0,0 +1,165 @@
|
||||||
|
name: Build ROCm
|
||||||
|
|
||||||
|
on:
|
||||||
|
workflow_dispatch:
|
||||||
|
inputs:
|
||||||
|
version:
|
||||||
|
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||||
|
default: 'v3.0'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
config:
|
||||||
|
description: 'Override configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||||
|
default: 'Default'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
exclude:
|
||||||
|
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||||
|
default: 'None'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
workflow_call:
|
||||||
|
inputs:
|
||||||
|
version:
|
||||||
|
description: 'Version tag of text-generation-webui to build: v3.0'
|
||||||
|
default: 'v3.0'
|
||||||
|
required: true
|
||||||
|
type: string
|
||||||
|
config:
|
||||||
|
description: 'Configurations to build: key1:item1-1,item1-2;key2:item2-1,item2-2'
|
||||||
|
default: 'Default'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
exclude:
|
||||||
|
description: 'Exclude build configurations: key1-1:item1-1,key1-2:item1-2;key2-1:item2-1,key2-2:item2-2'
|
||||||
|
default: 'None'
|
||||||
|
required: false
|
||||||
|
type: string
|
||||||
|
|
||||||
|
permissions:
|
||||||
|
contents: write
|
||||||
|
|
||||||
|
jobs:
|
||||||
|
define_matrix:
|
||||||
|
name: Define Build Matrix
|
||||||
|
runs-on: ubuntu-latest
|
||||||
|
outputs:
|
||||||
|
matrix: ${{ steps.set-matrix.outputs.matrix }}
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: pwsh
|
||||||
|
env:
|
||||||
|
CONFIGIN: ${{ inputs.config }}
|
||||||
|
EXCLUDEIN: ${{ inputs.exclude }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- name: Define Job Output
|
||||||
|
id: set-matrix
|
||||||
|
run: |
|
||||||
|
$matrix = @{
|
||||||
|
'os' = @('ubuntu-22.04')
|
||||||
|
'pyver' = @("3.11")
|
||||||
|
'avx' = @("AVX2")
|
||||||
|
}
|
||||||
|
|
||||||
|
if ($env:CONFIGIN -ne 'Default') {$env:CONFIGIN.split(';').foreach({$matrix[$_.split(':')[0]] = $_.split(':')[1].split(',')})}
|
||||||
|
|
||||||
|
if ($env:EXCLUDEIN -ne 'None') {
|
||||||
|
$exclusions = @()
|
||||||
|
$exclusions += $env:EXCLUDEIN.split(';').replace(':','=').replace(',',"`n") | ConvertFrom-StringData
|
||||||
|
$matrix['exclude'] = $exclusions
|
||||||
|
}
|
||||||
|
|
||||||
|
$matrixOut = ConvertTo-Json $matrix -Compress
|
||||||
|
Write-Output ('matrix=' + $matrixOut) >> $env:GITHUB_OUTPUT
|
||||||
|
|
||||||
|
build_wheels:
|
||||||
|
name: ${{ matrix.os }} ${{ matrix.pyver }} CPU ${{ matrix.avx }}
|
||||||
|
needs: define_matrix
|
||||||
|
runs-on: ${{ matrix.os }}
|
||||||
|
strategy:
|
||||||
|
matrix: ${{ fromJSON(needs.define_matrix.outputs.matrix) }}
|
||||||
|
defaults:
|
||||||
|
run:
|
||||||
|
shell: pwsh
|
||||||
|
env:
|
||||||
|
AVXVER: ${{ matrix.avx }}
|
||||||
|
PCKGVER: ${{ inputs.version }}
|
||||||
|
|
||||||
|
steps:
|
||||||
|
- uses: actions/checkout@v4
|
||||||
|
with:
|
||||||
|
repository: 'oobabooga/text-generation-webui'
|
||||||
|
ref: ${{ inputs.version }}
|
||||||
|
submodules: 'recursive'
|
||||||
|
|
||||||
|
- uses: actions/setup-python@v4
|
||||||
|
with:
|
||||||
|
python-version: ${{ matrix.pyver }}
|
||||||
|
|
||||||
|
- name: Build Package
|
||||||
|
shell: bash
|
||||||
|
run: |
|
||||||
|
VERSION_CLEAN="${{ inputs.version }}"
|
||||||
|
VERSION_CLEAN="${VERSION_CLEAN#v}"
|
||||||
|
cd ..
|
||||||
|
cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
|
||||||
|
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||||
|
|
||||||
|
# Remove extensions that need additional requirements
|
||||||
|
allowed=("character_bias" "gallery" "openai" "sd_api_pictures")
|
||||||
|
find extensions/ -mindepth 1 -maxdepth 1 -type d | grep -v -E "$(printf '%s|' "${allowed[@]}" | sed 's/|$//')" | xargs rm -rf
|
||||||
|
|
||||||
|
# Define common variables
|
||||||
|
AVX_SUPPORT="${{ matrix.avx }}"
|
||||||
|
VERSION="${{ inputs.version }}"
|
||||||
|
|
||||||
|
# 1. Set platform-specific variables (Linux only for ROCm)
|
||||||
|
PLATFORM="linux"
|
||||||
|
PYTHON_URL="https://github.com/astral-sh/python-build-standalone/releases/download/20250409/cpython-3.11.12+20250409-x86_64-unknown-linux-gnu-install_only.tar.gz"
|
||||||
|
PIP_PATH="portable_env/bin/python -m pip"
|
||||||
|
PACKAGES_PATH="portable_env/lib/python3.11/site-packages"
|
||||||
|
rm start_macos.sh start_windows.bat
|
||||||
|
|
||||||
|
# 2. Download and extract Python
|
||||||
|
cd ..
|
||||||
|
echo "Downloading Python for $PLATFORM..."
|
||||||
|
curl -L -o python-build.tar.gz "$PYTHON_URL"
|
||||||
|
tar -xzf python-build.tar.gz
|
||||||
|
mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
|
||||||
|
|
||||||
|
# 3. Prepare requirements file based on AVX
|
||||||
|
if [[ "$AVX_SUPPORT" == "AVX2" ]]; then
|
||||||
|
BASE_REQ_FILE="requirements/portable/requirements_amd.txt"
|
||||||
|
else
|
||||||
|
BASE_REQ_FILE="requirements/portable/requirements_amd_noavx2.txt"
|
||||||
|
fi
|
||||||
|
REQ_FILE="$BASE_REQ_FILE"
|
||||||
|
|
||||||
|
cd "text-generation-webui-${VERSION_CLEAN}"
|
||||||
|
|
||||||
|
# 4. Install packages
|
||||||
|
echo "Installing Python packages from $REQ_FILE..."
|
||||||
|
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
|
||||||
|
|
||||||
|
# 5. Clean up
|
||||||
|
rm -rf .git cmd* update_wizard* Colab-TextGen-GPU.ipynb docker setup.cfg .github .gitignore requirements/ one_click.py
|
||||||
|
|
||||||
|
# 6. Create ZIP file
|
||||||
|
cd ..
|
||||||
|
ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-rocm.zip"
|
||||||
|
echo "Creating archive: $ZIP_NAME"
|
||||||
|
|
||||||
|
zip -r "$ZIP_NAME" "text-generation-webui-${VERSION_CLEAN}"
|
||||||
|
|
||||||
|
- name: Upload files to a GitHub release
|
||||||
|
id: upload-release
|
||||||
|
uses: svenstaro/upload-release-action@2.7.0
|
||||||
|
continue-on-error: true
|
||||||
|
with:
|
||||||
|
repo_token: ${{ secrets.GITHUB_TOKEN }}
|
||||||
|
file: ../textgen-portable-*.zip
|
||||||
|
tag: ${{ inputs.version }}
|
||||||
|
file_glob: true
|
||||||
|
make_latest: false
|
||||||
|
overwrite: true
|
||||||
2
.github/workflows/build-portable-release.yml
vendored
2
.github/workflows/build-portable-release.yml
vendored
|
|
@ -57,7 +57,7 @@ jobs:
|
||||||
id: set-matrix
|
id: set-matrix
|
||||||
run: |
|
run: |
|
||||||
$matrix = @{
|
$matrix = @{
|
||||||
'os' = @('ubuntu-22.04', 'windows-2022', 'macos-13', 'macos-14')
|
'os' = @('ubuntu-22.04', 'windows-2022', 'macos-14')
|
||||||
'pyver' = @("3.11")
|
'pyver' = @("3.11")
|
||||||
'avx' = @("AVX2")
|
'avx' = @("AVX2")
|
||||||
}
|
}
|
||||||
|
|
|
||||||
|
|
@ -823,7 +823,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
|
||||||
lora_model = get_peft_model(shared.model, config)
|
lora_model = get_peft_model(shared.model, config)
|
||||||
if not always_override and Path(f"{lora_file_path}/adapter_model.bin").is_file():
|
if not always_override and Path(f"{lora_file_path}/adapter_model.bin").is_file():
|
||||||
logger.info("Loading existing LoRA data...")
|
logger.info("Loading existing LoRA data...")
|
||||||
state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin")
|
state_dict_peft = torch.load(f"{lora_file_path}/adapter_model.bin", weights_only=True)
|
||||||
set_peft_model_state_dict(lora_model, state_dict_peft)
|
set_peft_model_state_dict(lora_model, state_dict_peft)
|
||||||
|
|
||||||
print(f" + Continue Training on {RED}{lora_file_path}/adapter_model.bin{RESET}")
|
print(f" + Continue Training on {RED}{lora_file_path}/adapter_model.bin{RESET}")
|
||||||
|
|
|
||||||
|
|
@ -1 +1 @@
|
||||||
coqui-tts==0.25.1
|
coqui-tts>=0.27.0
|
||||||
|
|
|
||||||
|
|
@ -196,33 +196,22 @@ def extract_thinking_block(string):
|
||||||
return None, string
|
return None, string
|
||||||
|
|
||||||
|
|
||||||
@functools.lru_cache(maxsize=None)
|
def build_thinking_block(thinking_content, message_id, has_remaining_content):
|
||||||
def convert_to_markdown(string, message_id=None):
|
"""Build HTML for a thinking block."""
|
||||||
if not string:
|
if thinking_content is None:
|
||||||
return ""
|
return None
|
||||||
|
|
||||||
# Use a default message ID if none provided
|
# Process the thinking content through markdown
|
||||||
if message_id is None:
|
|
||||||
message_id = "unknown"
|
|
||||||
|
|
||||||
# Extract thinking block if present
|
|
||||||
thinking_content, remaining_content = extract_thinking_block(string)
|
|
||||||
|
|
||||||
# Process the main content
|
|
||||||
html_output = process_markdown_content(remaining_content)
|
|
||||||
|
|
||||||
# If thinking content was found, process it using the same function
|
|
||||||
if thinking_content is not None:
|
|
||||||
thinking_html = process_markdown_content(thinking_content)
|
thinking_html = process_markdown_content(thinking_content)
|
||||||
|
|
||||||
# Generate unique ID for the thinking block
|
# Generate unique ID for the thinking block
|
||||||
block_id = f"thinking-{message_id}-0"
|
block_id = f"thinking-{message_id}-0"
|
||||||
|
|
||||||
# Check if thinking is complete or still in progress
|
# Check if thinking is complete or still in progress
|
||||||
is_streaming = not remaining_content
|
is_streaming = not has_remaining_content
|
||||||
title_text = "Thinking..." if is_streaming else "Thought"
|
title_text = "Thinking..." if is_streaming else "Thought"
|
||||||
|
|
||||||
thinking_block = f'''
|
return f'''
|
||||||
<details class="thinking-block" data-block-id="{block_id}" data-streaming="{str(is_streaming).lower()}">
|
<details class="thinking-block" data-block-id="{block_id}" data-streaming="{str(is_streaming).lower()}">
|
||||||
<summary class="thinking-header">
|
<summary class="thinking-header">
|
||||||
{info_svg_small}
|
{info_svg_small}
|
||||||
|
|
@ -232,14 +221,20 @@ def convert_to_markdown(string, message_id=None):
|
||||||
</details>
|
</details>
|
||||||
'''
|
'''
|
||||||
|
|
||||||
# Prepend the thinking block to the message HTML
|
|
||||||
html_output = thinking_block + html_output
|
|
||||||
|
|
||||||
return html_output
|
def build_main_content_block(content):
|
||||||
|
"""Build HTML for the main content block."""
|
||||||
|
if not content:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
return process_markdown_content(content)
|
||||||
|
|
||||||
|
|
||||||
def process_markdown_content(string):
|
def process_markdown_content(string):
|
||||||
"""Process a string through the markdown conversion pipeline."""
|
"""
|
||||||
|
Process a string through the markdown conversion pipeline.
|
||||||
|
Uses robust manual parsing to ensure correct LaTeX and Code Block rendering.
|
||||||
|
"""
|
||||||
if not string:
|
if not string:
|
||||||
return ""
|
return ""
|
||||||
|
|
||||||
|
|
@ -280,7 +275,7 @@ def process_markdown_content(string):
|
||||||
pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
|
pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL)
|
||||||
string = pattern.sub(replace_blockquote, string)
|
string = pattern.sub(replace_blockquote, string)
|
||||||
|
|
||||||
# Code
|
# Code block standardization
|
||||||
string = string.replace('\\begin{code}', '```')
|
string = string.replace('\\begin{code}', '```')
|
||||||
string = string.replace('\\end{code}', '```')
|
string = string.replace('\\end{code}', '```')
|
||||||
string = string.replace('\\begin{align*}', '$$')
|
string = string.replace('\\begin{align*}', '$$')
|
||||||
|
|
@ -301,6 +296,7 @@ def process_markdown_content(string):
|
||||||
is_code = False
|
is_code = False
|
||||||
is_latex = False
|
is_latex = False
|
||||||
|
|
||||||
|
# Manual line iteration for robust structure parsing
|
||||||
for line in string.split('\n'):
|
for line in string.split('\n'):
|
||||||
stripped_line = line.strip()
|
stripped_line = line.strip()
|
||||||
|
|
||||||
|
|
@ -371,6 +367,39 @@ def process_markdown_content(string):
|
||||||
return html_output
|
return html_output
|
||||||
|
|
||||||
|
|
||||||
|
@functools.lru_cache(maxsize=None)
|
||||||
|
def convert_to_markdown(string, message_id=None):
|
||||||
|
"""
|
||||||
|
Convert a string to markdown HTML with support for multiple block types.
|
||||||
|
Blocks are assembled in order: thinking, main content, etc.
|
||||||
|
"""
|
||||||
|
if not string:
|
||||||
|
return ""
|
||||||
|
|
||||||
|
# Use a default message ID if none provided
|
||||||
|
if message_id is None:
|
||||||
|
message_id = "unknown"
|
||||||
|
|
||||||
|
# Extract different components from the string
|
||||||
|
thinking_content, remaining_content = extract_thinking_block(string)
|
||||||
|
|
||||||
|
# Build individual HTML blocks
|
||||||
|
blocks = []
|
||||||
|
|
||||||
|
# Add thinking block if present
|
||||||
|
thinking_html = build_thinking_block(thinking_content, message_id, bool(remaining_content))
|
||||||
|
if thinking_html:
|
||||||
|
blocks.append(thinking_html)
|
||||||
|
|
||||||
|
# Add main content block
|
||||||
|
main_html = build_main_content_block(remaining_content)
|
||||||
|
if main_html:
|
||||||
|
blocks.append(main_html)
|
||||||
|
|
||||||
|
# Assemble all blocks into final HTML
|
||||||
|
return ''.join(blocks)
|
||||||
|
|
||||||
|
|
||||||
def convert_to_markdown_wrapped(string, message_id=None, use_cache=True):
|
def convert_to_markdown_wrapped(string, message_id=None, use_cache=True):
|
||||||
'''
|
'''
|
||||||
Used to avoid caching convert_to_markdown calls during streaming.
|
Used to avoid caching convert_to_markdown calls during streaming.
|
||||||
|
|
|
||||||
|
|
@ -317,6 +317,7 @@ class LlamaServer:
|
||||||
"--ctx-size", str(shared.args.ctx_size),
|
"--ctx-size", str(shared.args.ctx_size),
|
||||||
"--gpu-layers", str(shared.args.gpu_layers),
|
"--gpu-layers", str(shared.args.gpu_layers),
|
||||||
"--batch-size", str(shared.args.batch_size),
|
"--batch-size", str(shared.args.batch_size),
|
||||||
|
"--ubatch-size", str(shared.args.ubatch_size),
|
||||||
"--port", str(self.port),
|
"--port", str(self.port),
|
||||||
"--no-webui",
|
"--no-webui",
|
||||||
"--flash-attn", "on",
|
"--flash-attn", "on",
|
||||||
|
|
@ -326,6 +327,8 @@ class LlamaServer:
|
||||||
cmd += ["--threads", str(shared.args.threads)]
|
cmd += ["--threads", str(shared.args.threads)]
|
||||||
if shared.args.threads_batch > 0:
|
if shared.args.threads_batch > 0:
|
||||||
cmd += ["--threads-batch", str(shared.args.threads_batch)]
|
cmd += ["--threads-batch", str(shared.args.threads_batch)]
|
||||||
|
if shared.args.cpu_moe:
|
||||||
|
cmd.append("--cpu-moe")
|
||||||
if shared.args.no_mmap:
|
if shared.args.no_mmap:
|
||||||
cmd.append("--no-mmap")
|
cmd.append("--no-mmap")
|
||||||
if shared.args.mlock:
|
if shared.args.mlock:
|
||||||
|
|
|
||||||
|
|
@ -6,9 +6,11 @@ import gradio as gr
|
||||||
loaders_and_params = OrderedDict({
|
loaders_and_params = OrderedDict({
|
||||||
'llama.cpp': [
|
'llama.cpp': [
|
||||||
'gpu_layers',
|
'gpu_layers',
|
||||||
|
'cpu_moe',
|
||||||
'threads',
|
'threads',
|
||||||
'threads_batch',
|
'threads_batch',
|
||||||
'batch_size',
|
'batch_size',
|
||||||
|
'ubatch_size',
|
||||||
'ctx_size',
|
'ctx_size',
|
||||||
'cache_type',
|
'cache_type',
|
||||||
'tensor_split',
|
'tensor_split',
|
||||||
|
|
|
||||||
|
|
@ -66,6 +66,7 @@ group.add_argument('--ctx-size-draft', type=int, default=0, help='Size of the pr
|
||||||
# llama.cpp
|
# llama.cpp
|
||||||
group = parser.add_argument_group('llama.cpp')
|
group = parser.add_argument_group('llama.cpp')
|
||||||
group.add_argument('--gpu-layers', '--n-gpu-layers', type=int, default=256, metavar='N', help='Number of layers to offload to the GPU.')
|
group.add_argument('--gpu-layers', '--n-gpu-layers', type=int, default=256, metavar='N', help='Number of layers to offload to the GPU.')
|
||||||
|
group.add_argument('--cpu-moe', action='store_true', help='Move the experts to the CPU (for MoE models).')
|
||||||
group.add_argument('--mmproj', type=str, default=None, help='Path to the mmproj file for vision models.')
|
group.add_argument('--mmproj', type=str, default=None, help='Path to the mmproj file for vision models.')
|
||||||
group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||||
group.add_argument('--tensor-split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.')
|
group.add_argument('--tensor-split', type=str, default=None, help='Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40.')
|
||||||
|
|
@ -73,7 +74,8 @@ group.add_argument('--row-split', action='store_true', help='Split the model by
|
||||||
group.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')
|
group.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')
|
||||||
group.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
|
group.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
|
||||||
group.add_argument('--no-kv-offload', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
|
group.add_argument('--no-kv-offload', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
|
||||||
group.add_argument('--batch-size', type=int, default=256, help='Maximum number of prompt tokens to batch together when calling llama_eval.')
|
group.add_argument('--batch-size', type=int, default=1024, help='Maximum number of prompt tokens to batch together when calling llama-server. This is the application level batch size.')
|
||||||
|
group.add_argument('--ubatch-size', type=int, default=1024, help='Maximum number of prompt tokens to batch together when calling llama-server. This is the max physical batch size for computation (device level).')
|
||||||
group.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
|
group.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
|
||||||
group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')
|
group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')
|
||||||
group.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')
|
group.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')
|
||||||
|
|
|
||||||
|
|
@ -611,7 +611,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
|
||||||
bf16=shared.args.bf16,
|
bf16=shared.args.bf16,
|
||||||
optim=optimizer,
|
optim=optimizer,
|
||||||
logging_steps=2 if stop_at_loss > 0 else 5,
|
logging_steps=2 if stop_at_loss > 0 else 5,
|
||||||
evaluation_strategy="steps" if eval_data is not None else "no",
|
eval_strategy="steps" if eval_data is not None else "no",
|
||||||
eval_steps=math.ceil(eval_steps / gradient_accumulation_steps) if eval_data is not None else None,
|
eval_steps=math.ceil(eval_steps / gradient_accumulation_steps) if eval_data is not None else None,
|
||||||
save_strategy="steps" if eval_data is not None else "no",
|
save_strategy="steps" if eval_data is not None else "no",
|
||||||
output_dir=lora_file_path,
|
output_dir=lora_file_path,
|
||||||
|
|
@ -620,7 +620,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
|
||||||
# TODO: Enable multi-device support
|
# TODO: Enable multi-device support
|
||||||
ddp_find_unused_parameters=None,
|
ddp_find_unused_parameters=None,
|
||||||
no_cuda=shared.args.cpu,
|
no_cuda=shared.args.cpu,
|
||||||
use_ipex=True if is_torch_xpu_available() and not shared.args.cpu else False
|
# use_ipex=True if is_torch_xpu_available() and not shared.args.cpu else False
|
||||||
),
|
),
|
||||||
data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
|
data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
|
||||||
callbacks=list([Callbacks()])
|
callbacks=list([Callbacks()])
|
||||||
|
|
|
||||||
|
|
@ -125,9 +125,11 @@ def list_model_elements():
|
||||||
'loader',
|
'loader',
|
||||||
'cpu_memory',
|
'cpu_memory',
|
||||||
'gpu_layers',
|
'gpu_layers',
|
||||||
|
'cpu_moe',
|
||||||
'threads',
|
'threads',
|
||||||
'threads_batch',
|
'threads_batch',
|
||||||
'batch_size',
|
'batch_size',
|
||||||
|
'ubatch_size',
|
||||||
'ctx_size',
|
'ctx_size',
|
||||||
'cache_type',
|
'cache_type',
|
||||||
'tensor_split',
|
'tensor_split',
|
||||||
|
|
|
||||||
|
|
@ -50,6 +50,7 @@ def create_ui():
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
|
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
|
||||||
|
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
|
||||||
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
|
||||||
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
|
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
|
||||||
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
|
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
|
||||||
|
|
@ -83,6 +84,7 @@ def create_ui():
|
||||||
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
|
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
|
||||||
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
|
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
|
||||||
shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
|
shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
|
||||||
|
shared.gradio['ubatch_size'] = gr.Slider(label="ubatch_size", minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)
|
||||||
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
|
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
|
||||||
shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"', value=shared.args.extra_flags)
|
shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"', value=shared.args.extra_flags)
|
||||||
shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
|
shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
|
||||||
|
|
@ -94,7 +96,7 @@ def create_ui():
|
||||||
shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
|
shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
|
||||||
|
|
||||||
with gr.Column():
|
with gr.Column():
|
||||||
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
|
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
|
||||||
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
|
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
|
||||||
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
|
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
|
||||||
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
|
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
|
||||||
|
|
|
||||||
|
|
@ -5,13 +5,14 @@ colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
flash-linear-attention==0.3.2
|
flash-linear-attention==0.4.0
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.17.*
|
peft==0.18.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
|
|
@ -25,7 +26,7 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.57.*
|
transformers==4.57.*
|
||||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
|
@ -39,10 +40,10 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.11/exllamav3-0.0.11+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.11/exllamav3-0.0.11+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,12 @@ datasets
|
||||||
einops
|
einops
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.17.*
|
peft==0.18.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
|
|
@ -23,7 +24,7 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.57.*
|
transformers==4.57.*
|
||||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
|
@ -37,7 +38,7 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,12 @@ datasets
|
||||||
einops
|
einops
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.17.*
|
peft==0.18.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
|
|
@ -23,7 +24,7 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.57.*
|
transformers==4.57.*
|
||||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
|
@ -37,7 +38,7 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# AMD wheels
|
# AMD wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,12 @@ datasets
|
||||||
einops
|
einops
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.17.*
|
peft==0.18.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
|
|
@ -23,7 +24,7 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.57.*
|
transformers==4.57.*
|
||||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
|
@ -37,5 +38,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,12 @@ datasets
|
||||||
einops
|
einops
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.17.*
|
peft==0.18.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
|
|
@ -23,7 +24,7 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.57.*
|
transformers==4.57.*
|
||||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
|
@ -37,6 +38,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
|
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,12 @@ datasets
|
||||||
einops
|
einops
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.17.*
|
peft==0.18.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
|
|
@ -23,7 +24,7 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.57.*
|
transformers==4.57.*
|
||||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
|
@ -37,5 +38,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama.cpp (CPU only, AVX2)
|
# llama.cpp (CPU only, AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,12 @@ datasets
|
||||||
einops
|
einops
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.17.*
|
peft==0.18.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
|
|
@ -23,7 +24,7 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.57.*
|
transformers==4.57.*
|
||||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
|
@ -37,5 +38,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama.cpp (CPU only, no AVX2)
|
# llama.cpp (CPU only, no AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
|
||||||
|
|
@ -5,13 +5,14 @@ colorama
|
||||||
datasets
|
datasets
|
||||||
einops
|
einops
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
flash-linear-attention==0.3.2
|
flash-linear-attention==0.4.0
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.17.*
|
peft==0.18.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
|
|
@ -25,7 +26,7 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.57.*
|
transformers==4.57.*
|
||||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
|
@ -39,10 +40,10 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.11/exllamav3-0.0.11+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.11/exllamav3-0.0.11+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.16/exllamav3-0.0.16+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||||
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.2/exllamav2-0.3.2-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||||
|
|
|
||||||
|
|
@ -5,11 +5,12 @@ datasets
|
||||||
einops
|
einops
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
pandas
|
pandas
|
||||||
peft==0.17.*
|
peft==0.18.*
|
||||||
Pillow>=9.5.0
|
Pillow>=9.5.0
|
||||||
psutil
|
psutil
|
||||||
pydantic==2.11.0
|
pydantic==2.11.0
|
||||||
|
|
@ -23,7 +24,7 @@ scipy
|
||||||
sentencepiece
|
sentencepiece
|
||||||
tensorboard
|
tensorboard
|
||||||
transformers==4.57.*
|
transformers==4.57.*
|
||||||
triton-windows==3.5.0.post21; platform_system == "Windows"
|
triton-windows==3.5.1.post21; platform_system == "Windows"
|
||||||
tqdm
|
tqdm
|
||||||
wandb
|
wandb
|
||||||
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
|
|
@ -22,5 +23,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
|
||||||
27
requirements/portable/requirements_amd.txt
Normal file
27
requirements/portable/requirements_amd.txt
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
|
fastapi==0.112.4
|
||||||
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
|
jinja2==3.1.6
|
||||||
|
markdown
|
||||||
|
numpy==2.2.*
|
||||||
|
pydantic==2.11.0
|
||||||
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
|
pyyaml
|
||||||
|
requests
|
||||||
|
rich
|
||||||
|
tqdm
|
||||||
|
|
||||||
|
# Gradio
|
||||||
|
gradio==4.37.*
|
||||||
|
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
|
||||||
|
|
||||||
|
# API
|
||||||
|
flask_cloudflared==0.0.14
|
||||||
|
sse-starlette==1.6.5
|
||||||
|
tiktoken
|
||||||
|
|
||||||
|
# AMD wheels
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+rocm6.4.4-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
27
requirements/portable/requirements_amd_noavx2.txt
Normal file
27
requirements/portable/requirements_amd_noavx2.txt
Normal file
|
|
@ -0,0 +1,27 @@
|
||||||
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
|
fastapi==0.112.4
|
||||||
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
|
jinja2==3.1.6
|
||||||
|
markdown
|
||||||
|
numpy==2.2.*
|
||||||
|
pydantic==2.11.0
|
||||||
|
PyPDF2==3.0.1
|
||||||
|
python-docx==1.1.2
|
||||||
|
pyyaml
|
||||||
|
requests
|
||||||
|
rich
|
||||||
|
tqdm
|
||||||
|
|
||||||
|
# Gradio
|
||||||
|
gradio==4.37.*
|
||||||
|
https://github.com/oobabooga/gradio/releases/download/custom-build/gradio_client-1.0.2+custom.1-py3-none-any.whl
|
||||||
|
|
||||||
|
# API
|
||||||
|
flask_cloudflared==0.0.14
|
||||||
|
sse-starlette==1.6.5
|
||||||
|
tiktoken
|
||||||
|
|
||||||
|
# AMD wheels
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+rocm6.4.4avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
|
|
@ -22,6 +23,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
|
|
@ -22,6 +23,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# Mac wheels
|
# Mac wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
|
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
|
|
@ -22,5 +23,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama.cpp (CPU only, AVX2)
|
# llama.cpp (CPU only, AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
|
|
@ -22,5 +23,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# llama.cpp (CPU only, no AVX2)
|
# llama.cpp (CPU only, no AVX2)
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
|
|
@ -22,5 +23,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
|
|
@ -21,6 +22,6 @@ flask_cloudflared==0.0.14
|
||||||
sse-starlette==1.6.5
|
sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# CUDA wheels
|
# Vulkan wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
|
||||||
|
|
@ -1,6 +1,7 @@
|
||||||
audioop-lts<1.0; python_version >= "3.13"
|
audioop-lts<1.0; python_version >= "3.13"
|
||||||
fastapi==0.112.4
|
fastapi==0.112.4
|
||||||
html2text==2025.4.15
|
html2text==2025.4.15
|
||||||
|
huggingface-hub==0.36.0
|
||||||
jinja2==3.1.6
|
jinja2==3.1.6
|
||||||
markdown
|
markdown
|
||||||
numpy==2.2.*
|
numpy==2.2.*
|
||||||
|
|
@ -22,5 +23,5 @@ sse-starlette==1.6.5
|
||||||
tiktoken
|
tiktoken
|
||||||
|
|
||||||
# CUDA wheels
|
# CUDA wheels
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
|
||||||
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.56.0/llama_cpp_binaries-0.56.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.62.0/llama_cpp_binaries-0.62.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
|
||||||
|
|
|
||||||
Loading…
Reference in a new issue