diff --git a/README.md b/README.md index 396e8e1e..40c242c8 100644 --- a/README.md +++ b/README.md @@ -4,6 +4,8 @@ A Gradio web UI for Large Language Models. Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.com/AUTOMATIC1111/stable-diffusion-webui) of text generation. +[Try the Deep Reason extension](https://oobabooga.gumroad.com/l/deep_reason) + |![Image1](https://github.com/oobabooga/screenshots/raw/main/AFTER-INSTRUCT.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/AFTER-CHAT.png) | |:---:|:---:| |![Image1](https://github.com/oobabooga/screenshots/raw/main/AFTER-DEFAULT.png) | ![Image2](https://github.com/oobabooga/screenshots/raw/main/AFTER-PARAMETERS.png) | diff --git a/convert-to-safetensors.py b/convert-to-safetensors.py deleted file mode 100644 index 3b721e7c..00000000 --- a/convert-to-safetensors.py +++ /dev/null @@ -1,38 +0,0 @@ -''' - -Converts a transformers model to safetensors format and shards it. - -This makes it faster to load (because of safetensors) and lowers its RAM usage -while loading (because of sharding). - -Based on the original script by 81300: - -https://gist.github.com/81300/fe5b08bff1cba45296a829b9d6b0f303 - -''' - -import argparse -from pathlib import Path - -import torch -from transformers import AutoModelForCausalLM, AutoTokenizer - -parser = argparse.ArgumentParser(formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=54)) -parser.add_argument('MODEL', type=str, default=None, nargs='?', help="Path to the input model.") -parser.add_argument('--output', type=str, default=None, help='Path to the output folder (default: models/{model_name}_safetensors).') -parser.add_argument("--max-shard-size", type=str, default="2GB", help="Maximum size of a shard in GB or MB (default: %(default)s).") -parser.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.') -args = parser.parse_args() - -if __name__ == '__main__': - path = Path(args.MODEL) - model_name = path.name - - print(f"Loading {model_name}...") - model = AutoModelForCausalLM.from_pretrained(path, low_cpu_mem_usage=True, torch_dtype=torch.bfloat16 if args.bf16 else torch.float16) - tokenizer = AutoTokenizer.from_pretrained(path) - - out_folder = args.output or Path(f"models/{model_name}_safetensors") - print(f"Saving the converted model to {out_folder} with a maximum shard size of {args.max_shard_size}...") - model.save_pretrained(out_folder, max_shard_size=args.max_shard_size, safe_serialization=True) - tokenizer.save_pretrained(out_folder) diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css index fcd0558f..4613b380 100644 --- a/css/html_instruct_style.css +++ b/css/html_instruct_style.css @@ -46,7 +46,7 @@ } .chat .user-message { - background: #f4f4f4; + background: #f5f5f5; padding: 1.5rem 1rem; padding-bottom: 2rem; border-radius: 0; diff --git a/css/main.css b/css/main.css index b10d1980..23492338 100644 --- a/css/main.css +++ b/css/main.css @@ -2,7 +2,7 @@ --darker-gray: #202123; --dark-gray: #343541; --light-gray: #444654; - --light-theme-gray: #f4f4f4; + --light-theme-gray: #f5f5f5; --border-color-dark: #525252; --header-width: 112px; --selected-item-color-dark: #32333e; diff --git a/docs/10 - WSL.md b/docs/10 - WSL.md index 3e9865c1..e0d66393 100644 --- a/docs/10 - WSL.md +++ b/docs/10 - WSL.md @@ -135,9 +135,12 @@ When you git clone a repository, put it inside WSL and not outside. To understan ### Bonus: Port Forwarding -By default, you won't be able to access the webui from another device on your local network. You will need to setup the appropriate port forwarding using the following command (using PowerShell or Terminal with administrator privileges). +By default, you won't be able to access the webui from another device on your local network. You will need to setup the appropriate port forwarding using the following steps: + +1. First, get the IP address of the WSL by typing `wsl hostname -I`. This will output the IP address, for example `172.20.134.111`. +2. Then, use the following command (using PowerShell or Terminal with administrator privileges) to set up port forwarding, replacing `172.20.134.111` with the IP address you obtained in step 1: ``` -netsh interface portproxy add v4tov4 listenaddress=0.0.0.0 listenport=7860 connectaddress=localhost connectport=7860 +netsh interface portproxy add v4tov4 listenaddress=0.0.0.0 listenport=7860 connectaddress=172.20.134.111 connectport=7860 ``` diff --git a/extensions/sd_api_pictures/script.py b/extensions/sd_api_pictures/script.py index 3a31771a..f216da38 100644 --- a/extensions/sd_api_pictures/script.py +++ b/extensions/sd_api_pictures/script.py @@ -11,7 +11,7 @@ import torch from PIL import Image from modules import shared -from modules.models import reload_model, unload_model +from modules.models import load_model, unload_model from modules.ui import create_refresh_button torch._C._jit_set_profiling_mode(False) @@ -38,7 +38,8 @@ params = { 'cfg_scale': 7, 'textgen_prefix': 'Please provide a detailed and vivid description of [subject]', 'sd_checkpoint': ' ', - 'checkpoint_list': [" "] + 'checkpoint_list': [" "], + 'last_model': "" } @@ -46,6 +47,7 @@ def give_VRAM_priority(actor): global shared, params if actor == 'SD': + params["last_model"] = shared.model_name unload_model() print("Requesting Auto1111 to re-load last checkpoint used...") response = requests.post(url=f'{params["address"]}/sdapi/v1/reload-checkpoint', json='') @@ -55,7 +57,8 @@ def give_VRAM_priority(actor): print("Requesting Auto1111 to vacate VRAM...") response = requests.post(url=f'{params["address"]}/sdapi/v1/unload-checkpoint', json='') response.raise_for_status() - reload_model() + if params["last_model"]: + shared.model, shared.tokenizer = load_model(params["last_model"]) elif actor == 'set': print("VRAM mangement activated -- requesting Auto1111 to vacate VRAM...") diff --git a/modules/chat.py b/modules/chat.py index 0e47da29..2852aaf3 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -412,8 +412,16 @@ def generate_chat_reply(text, state, regenerate=False, _continue=False, loading_ yield history return + show_after = html.escape(state["show_after"]) if state["show_after"] else None for history in chatbot_wrapper(text, state, regenerate=regenerate, _continue=_continue, loading_message=loading_message, for_ui=for_ui): - yield history + if show_after: + after = history["visible"][-1][1].partition(show_after)[2] or "*Is thinking...*" + yield { + 'internal': history['internal'], + 'visible': history['visible'][:-1] + [[history['visible'][-1][0], after]] + } + else: + yield history def character_is_loaded(state, raise_exception=False): diff --git a/modules/html_generator.py b/modules/html_generator.py index 3edbef5e..6bad0f89 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -106,52 +106,6 @@ def replace_blockquote(m): return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') -def add_long_list_class(html): - ''' - Adds a long-list class to