diff --git a/.gitignore b/.gitignore index ca307c4a..318e147d 100644 --- a/.gitignore +++ b/.gitignore @@ -26,6 +26,7 @@ .DS_Store .eslintrc.js .idea +.installer_state.json .venv venv .envrc diff --git a/README.md b/README.md index f1633334..3642fc58 100644 --- a/README.md +++ b/README.md @@ -380,7 +380,7 @@ text-generation-webui │   │   └── tokenizer.model ``` -In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download it via the command-line with +In both cases, you can use the "Model" tab of the UI to download the model from Hugging Face automatically. It is also possible to download it via the command-line with: ``` python download-model.py organization/model diff --git a/css/main.css b/css/main.css index 1a7efe70..b10d1980 100644 --- a/css/main.css +++ b/css/main.css @@ -1259,6 +1259,16 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { left: 25px; } +.footer-button.footer-continue-button { + bottom: -23px; + left: 50px; +} + +.footer-button.footer-remove-button { + bottom: -23px; + left: 75px; +} + .message:hover .footer-button, .user-message:hover .footer-button, .assistant-message:hover .footer-button { diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md index 9b4f89bf..364c6b09 100644 --- a/docs/12 - OpenAI API.md +++ b/docs/12 - OpenAI API.md @@ -14,7 +14,7 @@ Add `--api` to your command-line flags. * To create a public Cloudflare URL, add the `--public-api` flag. * To listen on your local network, add the `--listen` flag. * To change the port, which is 5000 by default, use `--api-port 1234` (change 1234 to your desired port number). -* To use SSL, add `--ssl-keyfile key.pem --ssl-certfile cert.pem`. Note that it doesn't work with `--public-api`. +* To use SSL, add `--ssl-keyfile key.pem --ssl-certfile cert.pem`. ⚠️ **Note**: this doesn't work with `--public-api` since Cloudflare already uses HTTPS by default. * To use an API key for authentication, add `--api-key yourkey`. ### Examples @@ -51,8 +51,7 @@ curl http://127.0.0.1:5000/v1/chat/completions \ "content": "Hello!" } ], - "mode": "instruct", - "instruction_template": "Alpaca" + "mode": "instruct" }' ``` @@ -86,7 +85,6 @@ curl http://127.0.0.1:5000/v1/chat/completions \ } ], "mode": "instruct", - "instruction_template": "Alpaca", "stream": true }' ``` @@ -131,9 +129,6 @@ curl -k http://127.0.0.1:5000/v1/internal/model/load \ "args": { "load_in_4bit": true, "n_gpu_layers": 12 - }, - "settings": { - "instruction_template": "Alpaca" } }' ``` @@ -198,7 +193,7 @@ while True: assistant_message = '' for event in client.events(): payload = json.loads(event.data) - chunk = payload['choices'][0]['message']['content'] + chunk = payload['choices'][0]['delta']['content'] assistant_message += chunk print(chunk, end='') @@ -241,6 +236,27 @@ for event in client.events(): print() ``` +#### Python example with API key + +Replace + +```python +headers = { + "Content-Type": "application/json" +} +``` + +with + +```python +headers = { + "Content-Type": "application/json", + "Authorization": "Bearer yourPassword123" +} +``` + +in any of the examples above. + ### Environment variables The following environment variables can be used (they take precedence over everything else): diff --git a/download-model.py b/download-model.py index 8fe94371..8ff1d69c 100644 --- a/download-model.py +++ b/download-model.py @@ -14,6 +14,7 @@ import json import os import re import sys +from multiprocessing import Array from pathlib import Path from time import sleep @@ -27,9 +28,10 @@ base = os.environ.get("HF_ENDPOINT") or "https://huggingface.co" class ModelDownloader: - def __init__(self, max_retries=5): + def __init__(self, max_retries=7): self.max_retries = max_retries self.session = self.get_session() + self._progress_bar_slots = None def get_session(self): session = requests.Session() @@ -186,73 +188,112 @@ class ModelDownloader: output_folder = Path(base_folder) / output_folder return output_folder + @property + def progress_bar_slots(self): + if self._progress_bar_slots is None: + raise RuntimeError("Progress bar slots not initialized. Start download threads first.") + + return self._progress_bar_slots + + def initialize_progress_bar_slots(self, num_threads): + self._progress_bar_slots = Array("B", [0] * num_threads) + + def get_progress_bar_position(self): + with self.progress_bar_slots.get_lock(): + for i in range(len(self.progress_bar_slots)): + if self.progress_bar_slots[i] == 0: + self.progress_bar_slots[i] = 1 + return i + + return 0 # fallback + + def release_progress_bar_position(self, slot): + with self.progress_bar_slots.get_lock(): + self.progress_bar_slots[slot] = 0 + def get_single_file(self, url, output_folder, start_from_scratch=False): filename = Path(url.rsplit('/', 1)[1]) output_path = output_folder / filename + progress_bar_position = self.get_progress_bar_position() - max_retries = 7 + max_retries = self.max_retries attempt = 0 - while attempt < max_retries: - attempt += 1 - session = self.session - headers = {} - mode = 'wb' + try: + while attempt < max_retries: + attempt += 1 + session = self.session + headers = {} + mode = 'wb' - try: - if output_path.exists() and not start_from_scratch: - # Resume download - r = session.get(url, stream=True, timeout=20) - total_size = int(r.headers.get('content-length', 0)) - if output_path.stat().st_size >= total_size: - return + try: + if output_path.exists() and not start_from_scratch: + # Resume download + r = session.get(url, stream=True, timeout=20) + total_size = int(r.headers.get('content-length', 0)) + if output_path.stat().st_size >= total_size: + return - headers = {'Range': f'bytes={output_path.stat().st_size}-'} - mode = 'ab' + headers = {'Range': f'bytes={output_path.stat().st_size}-'} + mode = 'ab' - with session.get(url, stream=True, headers=headers, timeout=30) as r: - r.raise_for_status() # If status is not 2xx, raise an error - total_size = int(r.headers.get('content-length', 0)) - block_size = 1024 * 1024 # 1MB + with session.get(url, stream=True, headers=headers, timeout=30) as r: + r.raise_for_status() # If status is not 2xx, raise an error + total_size = int(r.headers.get('content-length', 0)) + block_size = 1024 * 1024 # 1MB - filename_str = str(filename) # Convert PosixPath to string if necessary + filename_str = str(filename) # Convert PosixPath to string if necessary - tqdm_kwargs = { - 'total': total_size, - 'unit': 'B', - 'unit_scale': True, - 'unit_divisor': 1024, - 'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]', - 'desc': f"{filename_str}: " - } + tqdm_kwargs = { + 'total': total_size, + 'unit': 'B', + 'unit_scale': True, + 'unit_divisor': 1024, + 'bar_format': '{desc}{percentage:3.0f}%|{bar:50}| {n_fmt}/{total_fmt} [{elapsed}<{remaining}, {rate_fmt}]', + 'desc': f"{filename_str}: ", + 'position': progress_bar_position, + 'leave': False + } - if 'COLAB_GPU' in os.environ: - tqdm_kwargs.update({ - 'position': 0, - 'leave': True - }) + if 'COLAB_GPU' in os.environ: + tqdm_kwargs.update({ + 'position': 0, + 'leave': True + }) - with open(output_path, mode) as f: - with tqdm.tqdm(**tqdm_kwargs) as t: - count = 0 - for data in r.iter_content(block_size): - f.write(data) - t.update(len(data)) - if total_size != 0 and self.progress_bar is not None: - count += len(data) - self.progress_bar(float(count) / float(total_size), f"{filename_str}") + with open(output_path, mode) as f: + with tqdm.tqdm(**tqdm_kwargs) as t: + count = 0 + for data in r.iter_content(block_size): + f.write(data) + t.update(len(data)) + if total_size != 0 and self.progress_bar is not None: + count += len(data) + self.progress_bar(float(count) / float(total_size), f"{filename_str}") - break # Exit loop if successful - except (RequestException, ConnectionError, Timeout) as e: - print(f"Error downloading {filename}: {e}.") - print(f"That was attempt {attempt}/{max_retries}.", end=' ') - if attempt < max_retries: - print(f"Retry begins in {2 ** attempt} seconds.") - sleep(2 ** attempt) - else: - print("Failed to download after the maximum number of attempts.") + break # Exit loop if successful + except (RequestException, ConnectionError, Timeout) as e: + print(f"Error downloading {filename}: {e}.") + print(f"That was attempt {attempt}/{max_retries}.", end=' ') + if attempt < max_retries: + print(f"Retry begins in {2 ** attempt} seconds.") + sleep(2 ** attempt) + else: + print("Failed to download after the maximum number of attempts.") + finally: + self.release_progress_bar_position(progress_bar_position) def start_download_threads(self, file_list, output_folder, start_from_scratch=False, threads=4): - thread_map(lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), file_list, max_workers=threads, disable=True) + self.initialize_progress_bar_slots(threads) + tqdm.tqdm.set_lock(tqdm.tqdm.get_lock()) + try: + thread_map( + lambda url: self.get_single_file(url, output_folder, start_from_scratch=start_from_scratch), + file_list, + max_workers=threads, + disable=True + ) + finally: + print(f"\nDownload of {len(file_list)} files to {output_folder} completed.") def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False): self.progress_bar = progress_bar @@ -318,7 +359,7 @@ if __name__ == '__main__': parser.add_argument('--model-dir', type=str, default=None, help='Save the model files to a subfolder of this folder instead of the default one (text-generation-webui/models).') parser.add_argument('--clean', action='store_true', help='Does not resume the previous download.') parser.add_argument('--check', action='store_true', help='Validates the checksums of model files.') - parser.add_argument('--max-retries', type=int, default=5, help='Max retries count when get error in download time.') + parser.add_argument('--max-retries', type=int, default=7, help='Max retries count when get error in download time.') args = parser.parse_args() branch = args.branch diff --git a/extensions/Training_PRO/script.py b/extensions/Training_PRO/script.py index 01bcf67d..f553e482 100644 --- a/extensions/Training_PRO/script.py +++ b/extensions/Training_PRO/script.py @@ -557,12 +557,6 @@ def calc_trainable_parameters(model): def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch_size: int, batch_size: int, epochs: int, learning_rate: str, lr_scheduler_type: str, lora_rank: int, lora_alpha: int, lora_dropout: float, cutoff_len: int, dataset: str, eval_dataset: str, format: str, eval_steps: int, raw_text_file: str, higher_rank_limit: bool, warmup_steps: int, optimizer: str, hard_cut_string: str, train_only_after: str, stop_at_loss: float, add_eos_token: bool, min_chars: int, report_to: str, precize_slicing_overlap: bool, add_eos_token_type: str, save_steps_under_loss: float, add_bos_token: bool, training_projection: str,sliding_window:bool,warmup_ratio:float, grad_accumulation: int,neft_noise_alpha:float): - if shared.args.monkey_patch: - from alpaca_lora_4bit.monkeypatch.peft_tuners_lora_monkey_patch import ( - replace_peft_model_with_int4_lora_model - ) - replace_peft_model_with_int4_lora_model() - global train_log_graph global WANT_INTERRUPT WANT_INTERRUPT = False @@ -600,10 +594,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch time.sleep(5) - if shared.args.loader == 'GPTQ-for-LLaMa' and not shared.args.monkey_patch: - yield "LoRA training with GPTQ-for-LLaMa requires loading with `--monkey-patch`", zero_pd - return - if cutoff_len <= 0 or micro_batch_size <= 0 or actual_lr <= 0 or lora_rank <= 0 or lora_alpha <= 0: yield "Cannot input zeroes.", zero_pd return @@ -865,15 +855,6 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch yield traceback.format_exc().replace('\n', '\n\n'), zero_pd return - if shared.args.monkey_patch: - from alpaca_lora_4bit.autograd_4bit import Autograd4bitQuantLinear - from alpaca_lora_4bit.models import Linear4bitLt - for _, m in lora_model.named_modules(): - if isinstance(m, Autograd4bitQuantLinear) or isinstance(m, Linear4bitLt): - if m.is_v1_model: - m.zeros = m.zeros.half() - m.scales = m.scales.half() - class Tracked(): def __init__(self): self.current_steps = 0 diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py index 2cefc22b..f1a60645 100644 --- a/extensions/openai/completions.py +++ b/extensions/openai/completions.py @@ -146,7 +146,7 @@ def convert_history(history): for item in entry['content']: if not isinstance(item, dict): continue - + image_url = None content = None if item['type'] == 'image_url' and isinstance(item['image_url'], dict): @@ -205,7 +205,7 @@ def convert_history(history): else: chat_dialogue.append(['', current_reply]) elif role == "system": - system_message = content + system_message += f"\n{content}" if system_message else content if not user_input_last: user_input = "" diff --git a/js/global_scope_js.js b/js/global_scope_js.js index 6bf0f0e3..f308edb9 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -22,6 +22,14 @@ function regenerateClick() { document.getElementById("Regenerate").click(); } +function continueClick() { + document.getElementById("Continue").click(); +} + +function removeLastClick() { + document.getElementById("Remove-last").click(); +} + function handleMorphdomUpdate(text) { morphdom( document.getElementById("chat").parentNode, diff --git a/modules/chat.py b/modules/chat.py index 694c137b..0e47da29 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -30,8 +30,13 @@ from modules.text_generation import ( ) from modules.utils import delete_file, get_available_characters, save_file -# Copied from the Transformers library + +def strftime_now(format): + return datetime.now().strftime(format) + + jinja_env = ImmutableSandboxedEnvironment(trim_blocks=True, lstrip_blocks=True) +jinja_env.globals["strftime_now"] = strftime_now def str_presenter(dumper, data): diff --git a/modules/html_generator.py b/modules/html_generator.py index 29973412..3edbef5e 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -154,6 +154,8 @@ def add_long_list_class(html): @functools.lru_cache(maxsize=None) def convert_to_markdown(string): + if not string: + return "" # Make \[ \] LaTeX equations inline pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$' @@ -304,8 +306,13 @@ def get_image_cache(path): copy_svg = '''''' refresh_svg = '''''' -copy_button = f'' -refresh_button = f'' +continue_svg = '''''' +remove_svg = '''''' + +copy_button = f'' +refresh_button = f'' +continue_button = f'' +remove_button = f'' def generate_instruct_html(history): @@ -334,6 +341,8 @@ def generate_instruct_html(history): f'
{converted_visible[1]}
' f'{copy_button}' f'{refresh_button if i == len(history["visible"]) - 1 else ""}' + f'{continue_button if i == len(history["visible"]) - 1 else ""}' + f'{remove_button if i == len(history["visible"]) - 1 else ""}' f'' f'' ) @@ -383,6 +392,8 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= f'
{converted_visible[1]}
' f'{copy_button}' f'{refresh_button if i == len(history["visible"]) - 1 else ""}' + f'{continue_button if i == len(history["visible"]) - 1 else ""}' + f'{remove_button if i == len(history["visible"]) - 1 else ""}' f'' f'' ) @@ -417,6 +428,8 @@ def generate_chat_html(history, name1, name2, reset_cache=False): f'
{converted_visible[1]}
' f'{copy_button}' f'{refresh_button if i == len(history["visible"]) - 1 else ""}' + f'{continue_button if i == len(history["visible"]) - 1 else ""}' + f'{remove_button if i == len(history["visible"]) - 1 else ""}' f'' f'' ) diff --git a/modules/llamacpp_hf.py b/modules/llamacpp_hf.py index f9964fe8..b3761e0f 100644 --- a/modules/llamacpp_hf.py +++ b/modules/llamacpp_hf.py @@ -202,7 +202,19 @@ class LlamacppHF(PreTrainedModel): params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) Llama = llama_cpp_lib().Llama - model = Llama(**params) + try: + model = Llama(**params) + except Exception as e: + error_message = ( + f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n" + f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})." + f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}" + "\n" + f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})." + ) + + raise type(e)(error_message) from e + model.last_updated_index = -1 return LlamacppHF(model, model_file) diff --git a/modules/llamacpp_model.py b/modules/llamacpp_model.py index c79755e4..db25c66c 100644 --- a/modules/llamacpp_model.py +++ b/modules/llamacpp_model.py @@ -108,7 +108,19 @@ class LlamaCppModel: params["type_k"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) params["type_v"] = get_llamacpp_cache_type_for_string(shared.args.cache_type) - result.model = Llama(**params) + try: + result.model = Llama(**params) + except Exception as e: + error_message = ( + f"Failed loading the model. **This usually happens due to lack of memory**. Try these steps:\n" + f"1. Reduce the context length `n_ctx` (currently {shared.args.n_ctx})." + f"{' Try a lower value like 4096.' if shared.args.n_ctx > 4096 else '.'}" + "\n" + f"2. Lower the `n-gpu-layers` value (currently {shared.args.n_gpu_layers})." + ) + + raise type(e)(error_message) from e + if cache_capacity > 0: result.model.set_cache(LlamaCache(capacity_bytes=cache_capacity)) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index d5116938..1264a9fd 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -89,8 +89,8 @@ def create_ui(): shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch) shared.gradio['n_batch'] = gr.Slider(label="n_batch", minimum=1, maximum=2048, step=1, value=shared.args.n_batch) shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend) - shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. ⚠️ Lower this value if you can\'t load the model.') - shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. ⚠️ Lower this value if you can\'t load the model.') + shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.') + shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.') shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.') shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') diff --git a/one_click.py b/one_click.py index e78a2450..effc7d43 100644 --- a/one_click.py +++ b/one_click.py @@ -1,6 +1,7 @@ import argparse import glob import hashlib +import json import os import platform import re @@ -101,32 +102,25 @@ def torch_version(): def update_pytorch(): - print_big_message("Checking for PyTorch updates") - + print_big_message("Checking for PyTorch updates.") torver = torch_version() - is_cuda = '+cu' in torver - is_cuda118 = '+cu118' in torver # 2.1.0+cu118 - is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2 - is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi - is_cpu = '+cpu' in torver # 2.0.1+cpu + base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}" - install_pytorch = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} " + if "+cu118" in torver: + install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu118" + elif "+cu" in torver: + install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu121" + elif "+rocm" in torver: + install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.1" + elif "+cpu" in torver: + install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu" + elif "+cxx11" in torver: + intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10" + install_cmd = f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" + else: + install_cmd = base_cmd - if is_cuda118: - install_pytorch += "--index-url https://download.pytorch.org/whl/cu118" - elif is_cuda: - install_pytorch += "--index-url https://download.pytorch.org/whl/cu121" - elif is_rocm: - install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.1" - elif is_cpu: - install_pytorch += "--index-url https://download.pytorch.org/whl/cpu" - elif is_intel: - if is_linux(): - install_pytorch = "python -m pip install --upgrade torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" - else: - install_pytorch = "python -m pip install --upgrade torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/" - - run_cmd(f"{install_pytorch}", assert_success=True, environment=True) + run_cmd(install_cmd, assert_success=True, environment=True) def is_installed(): @@ -155,6 +149,11 @@ def check_env(): sys.exit(1) +def get_current_commit(): + result = run_cmd("git rev-parse HEAD", capture_output=True, environment=True) + return result.stdout.decode('utf-8').strip() + + def clear_cache(): run_cmd("conda clean -a -y", environment=True) run_cmd("python -m pip cache purge", environment=True) @@ -340,27 +339,84 @@ def install_extensions_requirements(): def update_requirements(initial_installation=False, pull=True): # Create .git directory if missing if not os.path.exists(os.path.join(script_dir, ".git")): - git_creation_cmd = 'git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && git reset --hard origin/main && git branch --set-upstream-to=origin/main' - run_cmd(git_creation_cmd, environment=True, assert_success=True) + run_cmd( + "git init -b main && git remote add origin https://github.com/oobabooga/text-generation-webui && " + "git fetch && git symbolic-ref refs/remotes/origin/HEAD refs/remotes/origin/main && " + "git reset --hard origin/main && git branch --set-upstream-to=origin/main", + environment=True, + assert_success=True + ) + + torver = torch_version() + if "+rocm" in torver: + requirements_file = "requirements_amd" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt" + elif "+cpu" in torver or "+cxx11" in torver: + requirements_file = "requirements_cpu_only" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt" + elif is_macos(): + requirements_file = "requirements_apple_" + ("intel" if is_x86_64() else "silicon") + ".txt" + else: + requirements_file = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt" + + # Load state from JSON file + state_file = '.installer_state.json' + current_commit = get_current_commit() + wheels_changed = False + if os.path.exists(state_file): + with open(state_file, 'r') as f: + last_state = json.load(f) + + if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit: + wheels_changed = True + else: + wheels_changed = True if pull: - print_big_message("Updating the local copy of the repository with \"git pull\"") + # Read .whl lines before pulling + before_pull_whl_lines = [] + if os.path.exists(requirements_file): + with open(requirements_file, 'r') as f: + before_pull_whl_lines = [line for line in f if '.whl' in line] + print_big_message('Updating the local copy of the repository with "git pull"') + + # Hash files before pulling files_to_check = [ 'start_linux.sh', 'start_macos.sh', 'start_windows.bat', 'start_wsl.bat', 'update_wizard_linux.sh', 'update_wizard_macos.sh', 'update_wizard_windows.bat', 'update_wizard_wsl.bat', 'one_click.py' ] + before_hashes = {file: calculate_file_hash(file) for file in files_to_check} - before_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} + # Perform the git pull run_cmd("git pull --autostash", assert_success=True, environment=True) - after_pull_hashes = {file_name: calculate_file_hash(file_name) for file_name in files_to_check} - # Check for differences in installation file hashes - for file_name in files_to_check: - if before_pull_hashes[file_name] != after_pull_hashes[file_name]: - print_big_message(f"File '{file_name}' was updated during 'git pull'. Please run the script again.") - exit(1) + # Check hashes after pulling + after_hashes = {file: calculate_file_hash(file) for file in files_to_check} + if os.path.exists(requirements_file): + with open(requirements_file, 'r') as f: + after_pull_whl_lines = [line for line in f if '.whl' in line] + + wheels_changed = wheels_changed or (before_pull_whl_lines != after_pull_whl_lines) + + # Check for changes to installer files + for file in files_to_check: + if before_hashes[file] != after_hashes[file]: + print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.") + + # Save state before exiting + current_state = {} + if wheels_changed: + current_state['wheels_changed'] = True + + with open(state_file, 'w') as f: + json.dump(current_state, f) + + sys.exit(1) + + # Save current state + current_state = {'last_installed_commit': current_commit} + with open(state_file, 'w') as f: + json.dump(current_state, f) if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"): install_extensions_requirements() @@ -369,38 +425,23 @@ def update_requirements(initial_installation=False, pull=True): if not initial_installation: update_pytorch() - # Detect the PyTorch version - torver = torch_version() - is_cuda = '+cu' in torver - is_cuda118 = '+cu118' in torver # 2.1.0+cu118 - is_rocm = '+rocm' in torver # 2.0.1+rocm5.4.2 - is_intel = '+cxx11' in torver # 2.0.1a0+cxx11.abi - is_cpu = '+cpu' in torver # 2.0.1+cpu - - if is_rocm: - base_requirements = "requirements_amd" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt" - elif is_cpu or is_intel: - base_requirements = "requirements_cpu_only" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt" - elif is_macos(): - base_requirements = "requirements_apple_" + ("intel" if is_x86_64() else "silicon") + ".txt" - else: - base_requirements = "requirements" + ("_noavx2" if not cpu_has_avx2() else "") + ".txt" - - requirements_file = base_requirements - print_big_message(f"Installing webui requirements from file: {requirements_file}") print(f"TORCH: {torver}\n") # Prepare the requirements file textgen_requirements = open(requirements_file).read().splitlines() - if is_cuda118: + + if not initial_installation and not wheels_changed: + textgen_requirements = [line for line in textgen_requirements if '.whl' not in line] + + if "+cu118" in torver: textgen_requirements = [ req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') for req in textgen_requirements if "autoawq" not in req.lower() ] - if is_windows() and is_cuda118: # No flash-attention on Windows for CUDA 11 + if is_windows() and "+cu118" in torver: # No flash-attention on Windows for CUDA 11 textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] with open('temp_requirements.txt', 'w') as file: @@ -416,16 +457,9 @@ def update_requirements(initial_installation=False, pull=True): # Install/update the project requirements run_cmd("python -m pip install -r temp_requirements.txt --upgrade", assert_success=True, environment=True) + + # Clean up os.remove('temp_requirements.txt') - - # Check for '+cu' or '+rocm' in version string to determine if torch uses CUDA or ROCm. Check for pytorch-cuda as well for backwards compatibility - if not any((is_cuda, is_rocm)) and run_cmd("conda list -f pytorch-cuda | grep pytorch-cuda", environment=True, capture_output=True).returncode == 1: - clear_cache() - return - - if not os.path.exists("repositories/"): - os.mkdir("repositories") - clear_cache() diff --git a/requirements.txt b/requirements.txt index 18872431..4ff7a6df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -accelerate==1.2.* +accelerate==1.3.* bitsandbytes==0.45.* colorama datasets @@ -32,22 +32,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 87ee93d1..e30f30ee 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -1,4 +1,4 @@ -accelerate==1.2.* +accelerate==1.3.* colorama datasets einops @@ -31,14 +31,14 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.6+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.6+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.7+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.7+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index fa2f5ca7..15d25caa 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==1.2.* +accelerate==1.3.* colorama datasets einops @@ -31,10 +31,10 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index e9838295..b614acf4 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -1,4 +1,4 @@ -accelerate==1.2.* +accelerate==1.3.* colorama datasets einops @@ -31,8 +31,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index bef02feb..ca9cc3ac 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -1,4 +1,4 @@ -accelerate==1.2.* +accelerate==1.3.* colorama datasets einops @@ -31,10 +31,10 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.6-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.7-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 32f1a50a..e9a97905 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -1,4 +1,4 @@ -accelerate==1.2.* +accelerate==1.3.* colorama datasets einops @@ -31,7 +31,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 938848bf..c4357676 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==1.2.* +accelerate==1.3.* colorama datasets einops @@ -31,7 +31,7 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 69e497e0..40cbc7b0 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==1.2.* +accelerate==1.3.* bitsandbytes==0.45.* colorama datasets @@ -32,22 +32,22 @@ sse-starlette==1.6.5 tiktoken # llama-cpp-python (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.6+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.7+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.6+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.7+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.6+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.7+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index a034ee61..3d6c922f 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -1,4 +1,4 @@ -accelerate==1.2.* +accelerate==1.3.* colorama datasets einops diff --git a/start_windows.bat b/start_windows.bat index 960cfdb7..2e42d6fa 100755 --- a/start_windows.bat +++ b/start_windows.bat @@ -41,12 +41,12 @@ if "%conda_exists%" == "F" ( mkdir "%INSTALL_DIR%" call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end ) - :: Try CertUtil first + @rem Try CertUtil first for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniconda_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINICONDA_CHECKSUM%"') do ( set "output=%%a" ) - :: If CertUtil fails, try PowerShell + @rem If CertUtil fails, try PowerShell if not defined output ( for /f %%a in ('powershell -Command "if((Get-FileHash \"%INSTALL_DIR%\miniconda_installer.exe\" -Algorithm SHA256).Hash -eq ''%MINICONDA_CHECKSUM%''){echo true}"') do ( set "output=%%a"