diff --git a/README.md b/README.md index 6e7c05b1..ac508f78 100644 --- a/README.md +++ b/README.md @@ -15,11 +15,11 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - Supports multiple local text generation backends, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)). - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory. - 100% offline and private, with zero telemetry, external resources, or remote update requests. -- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. - **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents. - **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation. - Aesthetic UI with dark and light themes. - `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters. +- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. - Edit messages, navigate between message versions, and branch conversations at any point. - Multiple sampling parameters and generation options for sophisticated text generation control. - Switch between different models in the UI without restarting. @@ -57,7 +57,7 @@ To update, run the update script for your OS: `update_wizard_windows.bat`, `upda
-Setup details and information about installing manually +One-click installer details ### One-click-installer @@ -67,13 +67,51 @@ The script uses Miniconda to set up a Conda environment in the `installer_files` If you ever need to install something manually in the `installer_files` environment, you can launch an interactive shell using the cmd script: `cmd_linux.sh`, `cmd_windows.bat`, or `cmd_macos.sh`. * There is no need to run any of those scripts (`start_`, `update_wizard_`, or `cmd_`) as admin/root. -* To install the requirements for extensions, you can use the `extensions_reqs` script for your OS. At the end, this script will install the main requirements for the project to make sure that they take precedence in case of version conflicts. -* For additional instructions about AMD and WSL setup, consult [the documentation](https://github.com/oobabooga/text-generation-webui/wiki). +* To install requirements for extensions, it is recommended to use the update wizard script with the "Install/update extensions requirements" option. At the end, this script will install the main requirements for the project to make sure that they take precedence in case of version conflicts. * For automated installation, you can use the `GPU_CHOICE`, `LAUNCH_AFTER_INSTALL`, and `INSTALL_EXTENSIONS` environment variables. For instance: `GPU_CHOICE=A LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh`. -### Manual installation using Conda +
-Recommended if you have some experience with the command-line. +
+ +Manual portable installation with venv + + +### Manual portable installation with venv + +Very fast setup that should work on any Python 3.9+: + +```bash +# Clone repository +git clone https://github.com/oobabooga/text-generation-webui +cd text-generation-webui + +# Create virtual environment +python -m venv venv + +# Activate virtual environment +# On Windows: +venv\Scripts\activate +# On macOS/Linux: +source venv/bin/activate + +# Install dependencies (choose appropriate file under requirements/portable for your hardware) +pip install -r requirements/portable/requirements.txt + +# Launch server (basic command) +python server.py --portable --api --auto-launch + +# When done working, deactivate +deactivate +``` +
+ +
+ +Manual full installation with conda or docker + + +### Full installation with Conda #### 0. Install Conda diff --git a/download-model.py b/download-model.py index 576a8b79..c0a3aa36 100644 --- a/download-model.py +++ b/download-model.py @@ -82,6 +82,7 @@ class ModelDownloader: links = [] sha256 = [] + file_sizes = [] classifications = [] has_pytorch = False has_pt = False @@ -118,8 +119,14 @@ class ModelDownloader: is_tokenizer = re.match(r"(tokenizer|ice|spiece).*\.model", fname) or is_tiktoken is_text = re.match(r".*\.(txt|json|py|md)", fname) or is_tokenizer if any((is_pytorch, is_safetensors, is_pt, is_gguf, is_tokenizer, is_text)): + file_size = 0 if 'lfs' in dict[i]: sha256.append([fname, dict[i]['lfs']['oid']]) + file_size = dict[i]['lfs'].get('size', 0) + elif 'size' in dict[i]: + file_size = dict[i]['size'] + + file_sizes.append(file_size) if is_text: links.append(f"{base}/{model}/resolve/{branch}/{fname}") @@ -152,6 +159,7 @@ class ModelDownloader: for i in range(len(classifications) - 1, -1, -1): if classifications[i] in ['pytorch', 'pt', 'gguf']: links.pop(i) + file_sizes.pop(i) # For GGUF, try to download only the Q4_K_M if no specific file is specified. if has_gguf and specific_file is None: @@ -164,13 +172,15 @@ class ModelDownloader: for i in range(len(classifications) - 1, -1, -1): if 'q4_k_m' not in links[i].lower(): links.pop(i) + file_sizes.pop(i) else: for i in range(len(classifications) - 1, -1, -1): if links[i].lower().endswith('.gguf'): links.pop(i) + file_sizes.pop(i) is_llamacpp = has_gguf and specific_file is not None - return links, sha256, is_lora, is_llamacpp + return links, sha256, is_lora, is_llamacpp, file_sizes def get_output_folder(self, model, branch, is_lora, is_llamacpp=False, model_dir=None): if model_dir: @@ -396,7 +406,7 @@ if __name__ == '__main__': sys.exit() # Get the download links from Hugging Face - links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface( + links, sha256, is_lora, is_llamacpp, file_sizes = downloader.get_download_links_from_huggingface( model, branch, text_only=args.text_only, specific_file=specific_file, exclude_pattern=exclude_pattern ) diff --git a/extensions/openai/models.py b/extensions/openai/models.py index f8d9a1e8..115149b0 100644 --- a/extensions/openai/models.py +++ b/extensions/openai/models.py @@ -15,7 +15,16 @@ def get_current_model_info(): def list_models(): - return {'model_names': get_available_models()[1:]} + return {'model_names': get_available_models()} + + +def list_models_openai_format(): + """Returns model list in OpenAI API format""" + model_names = get_available_models() + return { + "object": "list", + "data": [model_info_dict(name) for name in model_names] + } def model_info_dict(model_name: str) -> dict: diff --git a/extensions/openai/script.py b/extensions/openai/script.py index 3d8d5f73..9440cb1e 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -180,7 +180,7 @@ async def handle_models(request: Request): is_list = request.url.path.split('?')[0].split('#')[0] == '/v1/models' if is_list: - response = OAImodels.list_models() + response = OAImodels.list_models_openai_format() else: model_name = path[len('/v1/models/'):] response = OAImodels.model_info_dict(model_name) diff --git a/js/global_scope_js.js b/js/global_scope_js.js index 205d9375..ebed1f3d 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -351,3 +351,24 @@ function handleMorphdomUpdate(data) { } }); } + +// Wait for Gradio to finish setting its styles, then force dark theme +const observer = new MutationObserver((mutations) => { + mutations.forEach((mutation) => { + if (mutation.type === "attributes" && + mutation.target.tagName === "GRADIO-APP" && + mutation.attributeName === "style") { + + // Gradio just set its styles, now force dark theme + document.body.classList.add("dark"); + observer.disconnect(); + } + }); +}); + +// Start observing +observer.observe(document.documentElement, { + attributes: true, + subtree: true, + attributeFilter: ["style"] +}); diff --git a/modules/block_requests.py b/modules/block_requests.py index 5a4b533f..dc1ee467 100644 --- a/modules/block_requests.py +++ b/modules/block_requests.py @@ -1,5 +1,6 @@ import builtins import io +import re import requests @@ -62,6 +63,13 @@ def my_open(*args, **kwargs): '\n ' ) + file_contents = re.sub( + r'@media \(prefers-color-scheme: dark\) \{\s*body \{([^}]*)\}\s*\}', + r'body.dark {\1}', + file_contents, + flags=re.DOTALL + ) + if len(args) > 1 and args[1] == 'rb': file_contents = file_contents.encode('utf-8') return io.BytesIO(file_contents) diff --git a/modules/models_settings.py b/modules/models_settings.py index 37aa37cf..6b22499e 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -77,7 +77,7 @@ def get_model_metadata(model): model_settings['compress_pos_emb'] = metadata[k] elif k.endswith('rope.scaling.factor'): model_settings['compress_pos_emb'] = metadata[k] - elif k.endswith('block_count'): + elif k.endswith('.block_count'): model_settings['gpu_layers'] = metadata[k] + 1 model_settings['max_gpu_layers'] = metadata[k] + 1 diff --git a/modules/ui.py b/modules/ui.py index 0e8afa8f..fb719158 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -69,9 +69,9 @@ if not shared.args.old_colors: border_color_primary='#c5c5d2', body_text_color_subdued='#484848', background_fill_secondary='#eaeaea', - background_fill_secondary_dark='var(--selected-item-color-dark)', + background_fill_secondary_dark='var(--selected-item-color-dark, #282930)', background_fill_primary='var(--neutral-50)', - background_fill_primary_dark='var(--darker-gray)', + background_fill_primary_dark='var(--darker-gray, #1C1C1D)', body_background_fill="white", block_background_fill="transparent", body_text_color='rgb(64, 64, 64)', @@ -81,25 +81,25 @@ if not shared.args.old_colors: button_shadow_hover="none", # Dark Mode Colors - input_background_fill_dark='var(--darker-gray)', - checkbox_background_color_dark='var(--darker-gray)', + input_background_fill_dark='var(--darker-gray, #1C1C1D)', + checkbox_background_color_dark='var(--darker-gray, #1C1C1D)', block_background_fill_dark='transparent', block_border_color_dark='transparent', - input_border_color_dark='var(--border-color-dark)', - input_border_color_focus_dark='var(--border-color-dark)', - checkbox_border_color_dark='var(--border-color-dark)', - border_color_primary_dark='var(--border-color-dark)', - button_secondary_border_color_dark='var(--border-color-dark)', - body_background_fill_dark='var(--dark-gray)', + input_border_color_dark='var(--border-color-dark, #525252)', + input_border_color_focus_dark='var(--border-color-dark, #525252)', + checkbox_border_color_dark='var(--border-color-dark, #525252)', + border_color_primary_dark='var(--border-color-dark, #525252)', + button_secondary_border_color_dark='var(--border-color-dark, #525252)', + body_background_fill_dark='var(--dark-gray, #212125)', button_primary_background_fill_dark='transparent', button_secondary_background_fill_dark='transparent', checkbox_label_background_fill_dark='transparent', button_cancel_background_fill_dark='transparent', - button_secondary_background_fill_hover_dark='var(--selected-item-color-dark)', - checkbox_label_background_fill_hover_dark='var(--selected-item-color-dark)', - table_even_background_fill_dark='var(--darker-gray)', - table_odd_background_fill_dark='var(--selected-item-color-dark)', - code_background_fill_dark='var(--darker-gray)', + button_secondary_background_fill_hover_dark='var(--selected-item-color-dark, #282930)', + checkbox_label_background_fill_hover_dark='var(--selected-item-color-dark, #282930)', + table_even_background_fill_dark='var(--darker-gray, #1C1C1D)', + table_odd_background_fill_dark='var(--selected-item-color-dark, #282930)', + code_background_fill_dark='var(--darker-gray, #1C1C1D)', # Shadows and Radius checkbox_label_shadow='none', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 6b106203..a18d7d10 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -1,4 +1,5 @@ import importlib +import math import queue import threading import traceback @@ -244,7 +245,7 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur model, branch = downloader.sanitize_model_and_branch_names(repo_id, None) yield "Getting download links from Hugging Face..." - links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file) + links, sha256, is_lora, is_llamacpp, file_sizes = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file) if not links: yield "No files found to download for the given model/criteria." @@ -254,17 +255,33 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur # Check for multiple GGUF files gguf_files = [link for link in links if link.lower().endswith('.gguf')] if len(gguf_files) > 1 and not specific_file: - output = "Multiple GGUF files found. Please copy one of the following filenames to the 'File name' field:\n\n```\n" - for link in gguf_files: - output += f"{Path(link).name}\n" + # Sort by size in ascending order + gguf_data = [] + for i, link in enumerate(links): + if link.lower().endswith('.gguf'): + file_size = file_sizes[i] + gguf_data.append((file_size, link)) + + gguf_data.sort(key=lambda x: x[0]) + + output = "Multiple GGUF files found. Please copy one of the following filenames to the 'File name' field above:\n\n```\n" + for file_size, link in gguf_data: + size_str = format_file_size(file_size) + output += f"{size_str} - {Path(link).name}\n" + output += "```" yield output return if return_links: + # Sort by size in ascending order + file_data = list(zip(file_sizes, links)) + file_data.sort(key=lambda x: x[0]) + output = "```\n" - for link in links: - output += f"{Path(link).name}" + "\n" + for file_size, link in file_data: + size_str = format_file_size(file_size) + output += f"{size_str} - {Path(link).name}\n" output += "```" yield output @@ -391,3 +408,19 @@ def handle_load_model_event_final(truncation_length, loader, state): def handle_unload_model_click(): unload_model() return "Model unloaded" + + +def format_file_size(size_bytes): + """Convert bytes to human readable format with 2 decimal places for GB and above""" + if size_bytes == 0: + return "0 B" + + size_names = ["B", "KB", "MB", "GB", "TB"] + i = int(math.floor(math.log(size_bytes, 1024))) + p = math.pow(1024, i) + s = size_bytes / p + + if i >= 3: # GB or TB + return f"{s:.2f} {size_names[i]}" + else: + return f"{s:.1f} {size_names[i]}" diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 19e5e0fe..14828e8d 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -9,7 +9,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pandas peft==0.15.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index ebef87a6..ec2040d0 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -8,7 +8,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pandas peft==0.15.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index f1fccc93..4b50c693 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -8,7 +8,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pandas peft==0.15.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 734f22c7..a6e2925e 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -8,7 +8,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pandas peft==0.15.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index f837aade..1b97d844 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -8,7 +8,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pandas peft==0.15.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 9ec8a720..fa553f7c 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -8,7 +8,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pandas peft==0.15.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 3a3fcde9..505f8ae8 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -8,7 +8,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pandas peft==0.15.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index e68e8187..13ecaf87 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -9,7 +9,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pandas peft==0.15.* Pillow>=9.5.0 diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index d26663a7..b1c3c6ea 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -8,7 +8,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pandas peft==0.15.* Pillow>=9.5.0 diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index f596675c..49cd6264 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -4,7 +4,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pydantic==2.8.2 PyPDF2==3.0.1 python-docx==1.1.2 diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index e472e428..fca8208f 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -4,7 +4,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pydantic==2.8.2 PyPDF2==3.0.1 python-docx==1.1.2 diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index b60eccf5..86b00651 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -4,7 +4,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pydantic==2.8.2 PyPDF2==3.0.1 python-docx==1.1.2 diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index c6586848..fca46fb5 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -4,7 +4,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pydantic==2.8.2 PyPDF2==3.0.1 python-docx==1.1.2 diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index d0f113a7..3ebc2488 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -4,7 +4,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pydantic==2.8.2 PyPDF2==3.0.1 python-docx==1.1.2 diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index df1c5762..2bdf257e 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -4,7 +4,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pydantic==2.8.2 PyPDF2==3.0.1 python-docx==1.1.2 diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt index 3d30e6d6..c22d6441 100644 --- a/requirements/portable/requirements_nowheels.txt +++ b/requirements/portable/requirements_nowheels.txt @@ -4,7 +4,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pydantic==2.8.2 PyPDF2==3.0.1 python-docx==1.1.2 diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 2da3a81a..0b98ee5a 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -4,7 +4,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pydantic==2.8.2 PyPDF2==3.0.1 python-docx==1.1.2 diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index f53432d8..188dc085 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -4,7 +4,7 @@ gradio==4.37.* html2text==2025.4.15 jinja2==3.1.6 markdown -numpy==1.26.* +numpy==2.2.* pydantic==2.8.2 PyPDF2==3.0.1 python-docx==1.1.2