From 15bfe36619d5c8b9b2be32e14f71bd47802a2223 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 9 Jan 2025 15:58:14 -0800 Subject: [PATCH 01/22] Installer: update miniconda to 24.11.1 (experimental) --- start_linux.sh | 2 +- start_macos.sh | 2 +- start_windows.bat | 4 ++-- wsl.sh | 2 +- 4 files changed, 5 insertions(+), 5 deletions(-) diff --git a/start_linux.sh b/start_linux.sh index 792daca8..256604cb 100755 --- a/start_linux.sh +++ b/start_linux.sh @@ -19,7 +19,7 @@ esac INSTALL_DIR="$(pwd)/installer_files" CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda" INSTALL_ENV_DIR="$(pwd)/installer_files/env" -MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Linux-${OS_ARCH}.sh" +MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Linux-${OS_ARCH}.sh" conda_exists="F" # figure out whether git and conda needs to be installed diff --git a/start_macos.sh b/start_macos.sh index 6761f531..02f1011a 100755 --- a/start_macos.sh +++ b/start_macos.sh @@ -19,7 +19,7 @@ esac INSTALL_DIR="$(pwd)/installer_files" CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda" INSTALL_ENV_DIR="$(pwd)/installer_files/env" -MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-MacOSX-${OS_ARCH}.sh" +MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-MacOSX-${OS_ARCH}.sh" conda_exists="F" # figure out whether git and conda needs to be installed diff --git a/start_windows.bat b/start_windows.bat index ebcc1997..c167cdc5 100755 --- a/start_windows.bat +++ b/start_windows.bat @@ -25,8 +25,8 @@ set TEMP=%cd%\installer_files set INSTALL_DIR=%cd%\installer_files set CONDA_ROOT_PREFIX=%cd%\installer_files\conda set INSTALL_ENV_DIR=%cd%\installer_files\env -set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Windows-x86_64.exe -set MINICONDA_CHECKSUM=307194e1f12bbeb52b083634e89cc67db4f7980bd542254b43d3309eaf7cb358 +set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Windows-x86_64.exe +set MINICONDA_CHECKSUM=43dcbcc315ff91edf959e002cd2f1ede38c64b999fefcc951bccf2ed69c9e8bb set conda_exists=F @rem figure out whether git and conda needs to be installed diff --git a/wsl.sh b/wsl.sh index 7b17132f..c5d28b16 100755 --- a/wsl.sh +++ b/wsl.sh @@ -26,7 +26,7 @@ fi INSTALL_DIR="$INSTALL_DIR_PREFIX/text-generation-webui" CONDA_ROOT_PREFIX="$INSTALL_DIR/installer_files/conda" INSTALL_ENV_DIR="$INSTALL_DIR/installer_files/env" -MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Linux-x86_64.sh" +MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Linux-x86_64.sh" conda_exists="F" # environment isolation From da6d868f58e519ec90796eef137cef26e67d4fd8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 9 Jan 2025 16:11:46 -0800 Subject: [PATCH 02/22] Remove old deprecated flags (~6 months or more) --- modules/shared.py | 21 ++++++++++++++------- 1 file changed, 14 insertions(+), 7 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index a0070b1f..f478df05 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -203,11 +203,6 @@ group.add_argument('--multimodal-pipeline', type=str, default=None, help='The mu # Deprecated parameters group = parser.add_argument_group('Deprecated') -group.add_argument('--model_type', type=str, help='DEPRECATED') -group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED') -group.add_argument('--checkpoint', type=str, help='DEPRECATED') -group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED') -group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED') group.add_argument('--cache_4bit', action='store_true', help='DEPRECATED') group.add_argument('--cache_8bit', action='store_true', help='DEPRECATED') group.add_argument('--chat-buttons', action='store_true', help='DEPRECATED') @@ -228,14 +223,26 @@ for arg in sys.argv[1:]: if hasattr(args, arg): provided_arguments.append(arg) -deprecated_args = [] +deprecated_args = [ + 'cache_4bit', + 'cache_8bit', + 'chat_buttons', + 'triton', + 'no_inject_fused_mlp', + 'no_use_cuda_fp16', + 'desc_act', + 'disable_exllama', + 'disable_exllamav2', + 'wbits', + 'groupsize' +] def do_cmd_flags_warnings(): # Deprecation warnings for k in deprecated_args: - if getattr(args, k): + if k in provided_arguments: logger.warning(f'The --{k} flag has been deprecated and will be removed soon. Please remove that flag.') # Security warnings From 7fe46764fb2d675c4e281592a1328293c0c56b07 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 10 Jan 2025 07:07:41 -0800 Subject: [PATCH 03/22] Improve the --help message about --tensorcores as well --- modules/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/shared.py b/modules/shared.py index f478df05..89263205 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -116,7 +116,7 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for # llama.cpp group = parser.add_argument_group('llama.cpp') group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.') -group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.') +group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled without GGML_CUDA_FORCE_MMQ. This may improve performance on newer cards.') group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.') group.add_argument('--threads', type=int, default=0, help='Number of threads to use.') group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.') From 17aa97248fa3aa60d46a1a355ec8d2f5705bad38 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 10 Jan 2025 07:22:25 -0800 Subject: [PATCH 04/22] Installer: make the hashsum verification more robust on Windows --- start_windows.bat | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/start_windows.bat b/start_windows.bat index c167cdc5..960cfdb7 100755 --- a/start_windows.bat +++ b/start_windows.bat @@ -41,10 +41,18 @@ if "%conda_exists%" == "F" ( mkdir "%INSTALL_DIR%" call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end ) + :: Try CertUtil first for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniconda_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINICONDA_CHECKSUM%"') do ( set "output=%%a" ) + :: If CertUtil fails, try PowerShell + if not defined output ( + for /f %%a in ('powershell -Command "if((Get-FileHash \"%INSTALL_DIR%\miniconda_installer.exe\" -Algorithm SHA256).Hash -eq ''%MINICONDA_CHECKSUM%''){echo true}"') do ( + set "output=%%a" + ) + ) + if not defined output ( echo The checksum verification for miniconda_installer.exe has failed. del "%INSTALL_DIR%\miniconda_installer.exe" From 83c426e96b6c2cd4349d38c4cd212cbb0afd2044 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Fri, 10 Jan 2025 18:04:32 -0300 Subject: [PATCH 05/22] Organize internals (#6646) --- extensions/openai/typing.py | 48 +++--- modules/loaders.py | 306 ++++++++++++++++++------------------ modules/presets.py | 36 ++--- modules/shared.py | 40 ++--- modules/text_generation.py | 64 ++++++-- modules/ui.py | 162 +++++++++---------- 6 files changed, 346 insertions(+), 310 deletions(-) diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index dfac8e03..5f0e0128 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -7,48 +7,48 @@ from pydantic import BaseModel, Field class GenerationOptions(BaseModel): preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.") - min_p: float = 0 - dynamic_temperature: bool = False dynatemp_low: float = 1 dynatemp_high: float = 1 dynatemp_exponent: float = 1 smoothing_factor: float = 0 smoothing_curve: float = 1 + min_p: float = 0 top_k: int = 0 - repetition_penalty: float = 1 - repetition_penalty_range: int = 1024 typical_p: float = 1 - tfs: float = 1 - top_a: float = 0 + xtc_threshold: float = 0.1 + xtc_probability: float = 0 epsilon_cutoff: float = 0 eta_cutoff: float = 0 - guidance_scale: float = 1 - negative_prompt: str = '' + tfs: float = 1 + top_a: float = 0 + dry_multiplier: float = 0 + dry_allowed_length: int = 2 + dry_base: float = 1.75 + repetition_penalty: float = 1 + encoder_repetition_penalty: float = 1 + no_repeat_ngram_size: int = 0 + repetition_penalty_range: int = 1024 penalty_alpha: float = 0 + guidance_scale: float = 1 mirostat_mode: int = 0 mirostat_tau: float = 5 mirostat_eta: float = 0.1 - temperature_last: bool = False - do_sample: bool = True - seed: int = -1 - encoder_repetition_penalty: float = 1 - no_repeat_ngram_size: int = 0 - dry_multiplier: float = 0 - dry_base: float = 1.75 - dry_allowed_length: int = 2 - dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"' - xtc_threshold: float = 0.1 - xtc_probability: float = 0 - truncation_length: int = 0 - max_tokens_second: int = 0 prompt_lookup_num_tokens: int = 0 - static_cache: bool = False - custom_token_bans: str = "" - sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].") + max_tokens_second: int = 0 + do_sample: bool = True + dynamic_temperature: bool = False + temperature_last: bool = False auto_max_new_tokens: bool = False ban_eos_token: bool = False add_bos_token: bool = True skip_special_tokens: bool = True + static_cache: bool = False + truncation_length: int = 0 + seed: int = -1 + sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].") + custom_token_bans: str = "" + negative_prompt: str = '' + dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"' grammar_string: str = "" diff --git a/modules/loaders.py b/modules/loaders.py index 4e331dbb..cd864e40 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -7,102 +7,103 @@ from modules import shared loaders_and_params = OrderedDict({ 'Transformers': [ - 'cpu_memory', 'gpu_memory', - 'load_in_4bit', - 'load_in_8bit', - 'torch_compile', - 'bf16', - 'cpu', - 'disk', - 'auto_devices', - 'use_double_quant', - 'quant_type', - 'compute_dtype', - 'trust_remote_code', - 'no_use_fast', - 'use_flash_attention_2', - 'use_eager_attention', + 'cpu_memory', 'alpha_value', 'compress_pos_emb', - ], - 'llama.cpp': [ - 'n_ctx', - 'n_gpu_layers', - 'cache_type', - 'tensor_split', - 'n_batch', - 'threads', - 'threads_batch', - 'no_mmap', - 'mlock', - 'no_mul_mat_q', - 'rope_freq_base', - 'compress_pos_emb', + 'compute_dtype', + 'quant_type', + 'load_in_8bit', + 'load_in_4bit', + 'torch_compile', + 'use_flash_attention_2', + 'auto_devices', 'cpu', - 'numa', - 'no_offload_kqv', - 'row_split', - 'tensorcores', - 'flash_attn', - 'streaming_llm', - 'attention_sink_size', - ], - 'llamacpp_HF': [ - 'n_ctx', - 'n_gpu_layers', - 'cache_type', - 'tensor_split', - 'n_batch', - 'threads', - 'threads_batch', - 'no_mmap', - 'mlock', - 'no_mul_mat_q', - 'rope_freq_base', - 'compress_pos_emb', - 'cpu', - 'numa', - 'cfg_cache', + 'disk', + 'use_double_quant', + 'use_eager_attention', + 'bf16', + 'trust_remote_code', 'no_use_fast', - 'logits_all', - 'no_offload_kqv', - 'row_split', + ], + 'llama.cpp': [ + 'n_gpu_layers', + 'threads', + 'threads_batch', + 'n_batch', + 'n_ctx', + 'cache_type', + 'tensor_split', + 'rope_freq_base', + 'compress_pos_emb', + 'attention_sink_size', 'tensorcores', 'flash_attn', 'streaming_llm', + 'cpu', + 'row_split', + 'no_offload_kqv', + 'no_mul_mat_q', + 'no_mmap', + 'mlock', + 'numa', + ], + 'llamacpp_HF': [ + 'n_gpu_layers', + 'threads', + 'threads_batch', + 'n_batch', + 'n_ctx', + 'cache_type', + 'tensor_split', + 'rope_freq_base', + 'compress_pos_emb', 'attention_sink_size', + 'tensorcores', + 'flash_attn', + 'streaming_llm', + 'cpu', + 'row_split', + 'no_offload_kqv', + 'no_mul_mat_q', + 'no_mmap', + 'mlock', + 'numa', + 'cfg_cache', + 'logits_all', + 'trust_remote_code', + 'no_use_fast', 'llamacpp_HF_info', ], 'ExLlamav2_HF': [ - 'gpu_split', 'max_seq_len', - 'cfg_cache', + 'cache_type', + 'gpu_split', + 'alpha_value', + 'compress_pos_emb', + 'num_experts_per_token', + 'autosplit', + 'enable_tp', 'no_flash_attn', 'no_xformers', 'no_sdpa', - 'num_experts_per_token', - 'cache_type', - 'autosplit', - 'enable_tp', - 'alpha_value', - 'compress_pos_emb', + 'cfg_cache', 'trust_remote_code', 'no_use_fast', ], 'ExLlamav2': [ - 'gpu_split', 'max_seq_len', + 'cache_type', + 'gpu_split', + 'alpha_value', + 'compress_pos_emb', + 'num_experts_per_token', + 'autosplit', + 'enable_tp', 'no_flash_attn', 'no_xformers', 'no_sdpa', - 'num_experts_per_token', - 'cache_type', - 'autosplit', - 'enable_tp', - 'alpha_value', - 'compress_pos_emb', 'exllamav2_info', ], 'HQQ': [ @@ -121,51 +122,51 @@ loaders_and_params = OrderedDict({ def transformers_samplers(): return { 'temperature', - 'temperature_last', - 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', - 'top_p', 'min_p', + 'top_p', 'top_k', 'typical_p', + 'xtc_threshold', + 'xtc_probability', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', - 'repetition_penalty_range', + 'presence_penalty', 'encoder_repetition_penalty', 'no_repeat_ngram_size', - 'dry_multiplier', - 'dry_base', - 'dry_allowed_length', - 'dry_sequence_breakers', - 'xtc_threshold', - 'xtc_probability', - 'seed', - 'do_sample', + 'repetition_penalty_range', 'penalty_alpha', + 'guidance_scale', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'grammar_file_row', - 'grammar_string', - 'guidance_scale', - 'negative_prompt', + 'prompt_lookup_num_tokens', + 'do_sample', + 'dynamic_temperature', + 'temperature_last', + 'auto_max_new_tokens', 'ban_eos_token', - 'custom_token_bans', - 'sampler_priority', 'add_bos_token', 'skip_special_tokens', - 'auto_max_new_tokens', - 'prompt_lookup_num_tokens', 'static_cache', + 'seed', + 'sampler_priority', + 'custom_token_bans', + 'negative_prompt', + 'dry_sequence_breakers', + 'grammar_string', + 'grammar_file_row', } @@ -174,155 +175,156 @@ loaders_samplers = { 'HQQ': transformers_samplers(), 'ExLlamav2': { 'temperature', - 'temperature_last', - 'smoothing_factor', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', - 'top_p', + 'smoothing_factor', 'min_p', + 'top_p', 'top_k', 'typical_p', + 'xtc_threshold', + 'xtc_probability', 'tfs', 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', + 'presence_penalty', 'repetition_penalty_range', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'dry_multiplier', - 'dry_base', - 'dry_allowed_length', - 'dry_sequence_breakers', - 'xtc_threshold', - 'xtc_probability', - 'seed', + 'dynamic_temperature', + 'temperature_last', + 'auto_max_new_tokens', 'ban_eos_token', 'add_bos_token', - 'custom_token_bans', 'skip_special_tokens', - 'auto_max_new_tokens', + 'seed', + 'custom_token_bans', + 'dry_sequence_breakers', }, 'ExLlamav2_HF': { 'temperature', - 'temperature_last', - 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', - 'top_p', 'min_p', + 'top_p', 'top_k', 'typical_p', + 'xtc_threshold', + 'xtc_probability', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', - 'repetition_penalty_range', + 'presence_penalty', 'encoder_repetition_penalty', 'no_repeat_ngram_size', - 'dry_multiplier', - 'dry_base', - 'dry_allowed_length', - 'dry_sequence_breakers', - 'xtc_threshold', - 'xtc_probability', - 'seed', - 'do_sample', + 'repetition_penalty_range', + 'guidance_scale', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'grammar_file_row', - 'grammar_string', - 'guidance_scale', - 'negative_prompt', + 'do_sample', + 'dynamic_temperature', + 'temperature_last', + 'auto_max_new_tokens', 'ban_eos_token', - 'custom_token_bans', - 'sampler_priority', 'add_bos_token', 'skip_special_tokens', - 'auto_max_new_tokens', + 'seed', + 'sampler_priority', + 'custom_token_bans', + 'negative_prompt', + 'dry_sequence_breakers', + 'grammar_string', + 'grammar_file_row', }, 'llama.cpp': { 'temperature', - 'top_p', 'min_p', + 'top_p', 'top_k', 'typical_p', 'tfs', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', - 'seed', + 'presence_penalty', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'grammar_file_row', - 'grammar_string', 'ban_eos_token', + 'seed', 'custom_token_bans', + 'grammar_string', + 'grammar_file_row', }, 'llamacpp_HF': { 'temperature', - 'temperature_last', - 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', - 'top_p', 'min_p', + 'top_p', 'top_k', 'typical_p', + 'xtc_threshold', + 'xtc_probability', 'epsilon_cutoff', 'eta_cutoff', 'tfs', 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', - 'repetition_penalty_range', + 'presence_penalty', 'encoder_repetition_penalty', 'no_repeat_ngram_size', - 'dry_multiplier', - 'dry_base', - 'dry_allowed_length', - 'dry_sequence_breakers', - 'xtc_threshold', - 'xtc_probability', - 'seed', - 'do_sample', + 'repetition_penalty_range', + 'guidance_scale', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'grammar_file_row', - 'grammar_string', - 'guidance_scale', - 'negative_prompt', + 'do_sample', + 'dynamic_temperature', + 'temperature_last', + 'auto_max_new_tokens', 'ban_eos_token', - 'custom_token_bans', - 'sampler_priority', 'add_bos_token', 'skip_special_tokens', - 'auto_max_new_tokens', + 'seed', + 'sampler_priority', + 'custom_token_bans', + 'negative_prompt', + 'dry_sequence_breakers', + 'grammar_string', + 'grammar_file_row', }, 'TensorRT-LLM': { 'temperature', 'top_p', 'top_k', 'repetition_penalty', - 'presence_penalty', 'frequency_penalty', - 'ban_eos_token', + 'presence_penalty', 'auto_max_new_tokens', + 'ban_eos_token', } } diff --git a/modules/presets.py b/modules/presets.py index c8118fb3..b841af53 100644 --- a/modules/presets.py +++ b/modules/presets.py @@ -13,40 +13,40 @@ from modules.logging_colors import logger def default_preset(): return { 'temperature': 1, - 'temperature_last': False, - 'dynamic_temperature': False, 'dynatemp_low': 1, 'dynatemp_high': 1, 'dynatemp_exponent': 1, 'smoothing_factor': 0, 'smoothing_curve': 1, - 'top_p': 1, 'min_p': 0, + 'top_p': 1, 'top_k': 0, - 'repetition_penalty': 1, - 'presence_penalty': 0, - 'frequency_penalty': 0, - 'repetition_penalty_range': 1024, 'typical_p': 1, - 'tfs': 1, - 'top_a': 0, + 'xtc_threshold': 0.1, + 'xtc_probability': 0, 'epsilon_cutoff': 0, 'eta_cutoff': 0, - 'guidance_scale': 1, + 'tfs': 1, + 'top_a': 0, + 'dry_multiplier': 0, + 'dry_allowed_length': 2, + 'dry_base': 1.75, + 'repetition_penalty': 1, + 'frequency_penalty': 0, + 'presence_penalty': 0, + 'encoder_repetition_penalty': 1, + 'no_repeat_ngram_size': 0, + 'repetition_penalty_range': 1024, 'penalty_alpha': 0, + 'guidance_scale': 1, 'mirostat_mode': 0, 'mirostat_tau': 5, 'mirostat_eta': 0.1, 'do_sample': True, - 'encoder_repetition_penalty': 1, - 'no_repeat_ngram_size': 0, - 'dry_multiplier': 0, - 'dry_base': 1.75, - 'dry_allowed_length': 2, + 'dynamic_temperature': False, + 'temperature_last': False, + 'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram', 'dry_sequence_breakers': '"\\n", ":", "\\"", "*"', - 'xtc_threshold': 0.1, - 'xtc_probability': 0, - 'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram' } diff --git a/modules/shared.py b/modules/shared.py index 89263205..928747f7 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -29,39 +29,39 @@ need_restart = False # UI defaults settings = { - 'dark_theme': True, 'show_controls': True, 'start_with': '', 'mode': 'chat-instruct', 'chat_style': 'cai-chat', + 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', 'prompt-default': 'QA', 'prompt-notebook': 'QA', - 'preset': 'min_p', - 'max_new_tokens': 512, - 'max_new_tokens_min': 1, - 'max_new_tokens_max': 4096, - 'negative_prompt': '', - 'seed': -1, - 'truncation_length': 2048, - 'max_tokens_second': 0, - 'max_updates_second': 0, - 'prompt_lookup_num_tokens': 0, - 'static_cache': False, - 'custom_stopping_strings': '', - 'custom_token_bans': '', - 'auto_max_new_tokens': False, - 'ban_eos_token': False, - 'add_bos_token': True, - 'skip_special_tokens': True, - 'stream': True, 'character': 'Assistant', 'name1': 'You', 'user_bio': '', 'custom_system_message': '', 'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}", 'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}", - 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>', + 'preset': 'min_p', + 'max_new_tokens': 512, + 'max_new_tokens_min': 1, + 'max_new_tokens_max': 4096, + 'prompt_lookup_num_tokens': 0, + 'max_tokens_second': 0, + 'max_updates_second': 0, + 'auto_max_new_tokens': False, + 'ban_eos_token': False, + 'add_bos_token': True, + 'skip_special_tokens': True, + 'stream': True, + 'static_cache': False, + 'truncation_length': 2048, + 'seed': -1, + 'custom_stopping_strings': '', + 'custom_token_bans': '', + 'negative_prompt': '', 'autoload_model': False, + 'dark_theme': True, 'default_extensions': [], } diff --git a/modules/text_generation.py b/modules/text_generation.py index 3e9788b8..152b2b8d 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -287,31 +287,62 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings clear_torch_cache() generate_params = {} - for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'dry_multiplier', 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers', 'xtc_threshold', 'xtc_probability']: + for k in [ + 'temperature', + 'dynatemp_low', + 'dynatemp_high', + 'dynatemp_exponent', + 'smoothing_factor', + 'smoothing_curve', + 'min_p', + 'top_p', + 'top_k', + 'typical_p', + 'xtc_threshold', + 'xtc_probability', + 'tfs', + 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', + 'repetition_penalty', + 'frequency_penalty', + 'presence_penalty', + 'encoder_repetition_penalty', + 'no_repeat_ngram_size', + 'repetition_penalty_range', + 'penalty_alpha', + 'guidance_scale', + 'mirostat_mode', + 'mirostat_tau', + 'mirostat_eta', + 'max_new_tokens', + 'do_sample', + 'dynamic_temperature', + 'temperature_last', + 'dry_sequence_breakers', + ]: if k in state: generate_params[k] = state[k] - if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0: - generate_params['sampler_priority'] = state['sampler_priority'] - elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '': - generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()] - - if state['negative_prompt'] != '': - generate_params['negative_prompt_ids'] = encode(state['negative_prompt']) - - if state['prompt_lookup_num_tokens'] > 0: - generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens'] - - if state['static_cache']: - generate_params['cache_implementation'] = 'static' - for k in ['epsilon_cutoff', 'eta_cutoff']: if state[k] > 0: generate_params[k] = state[k] * 1e-4 + if state['prompt_lookup_num_tokens'] > 0: + generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens'] + if state['ban_eos_token']: generate_params['suppress_tokens'] = [shared.tokenizer.eos_token_id] + if state['static_cache']: + generate_params['cache_implementation'] = 'static' + + if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0: + generate_params['sampler_priority'] = state['sampler_priority'] + elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '': + generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()] + if state['custom_token_bans']: to_ban = [int(x) for x in state['custom_token_bans'].split(',')] if len(to_ban) > 0: @@ -320,6 +351,9 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings else: generate_params['suppress_tokens'] = to_ban + if state['negative_prompt'] != '': + generate_params['negative_prompt_ids'] = encode(state['negative_prompt']) + generate_params.update({'use_cache': not shared.args.no_cache}) if shared.args.deepspeed: generate_params.update({'synced_gpus': True}) diff --git a/modules/ui.py b/modules/ui.py index e66de434..4f7ee785 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -102,55 +102,55 @@ else: def list_model_elements(): elements = [ - 'loader', 'filter_by_loader', + 'loader', 'cpu_memory', - 'auto_devices', - 'disk', - 'cpu', - 'bf16', - 'load_in_4bit', - 'load_in_8bit', - 'torch_compile', - 'trust_remote_code', - 'no_use_fast', - 'use_flash_attention_2', - 'use_eager_attention', - 'compute_dtype', - 'quant_type', - 'use_double_quant', - 'cfg_cache', - 'no_flash_attn', - 'no_xformers', - 'no_sdpa', - 'num_experts_per_token', - 'cache_type', - 'autosplit', - 'enable_tp', + 'n_gpu_layers', 'threads', 'threads_batch', 'n_batch', - 'no_mmap', - 'mlock', - 'no_mul_mat_q', - 'n_gpu_layers', - 'tensor_split', + 'hqq_backend', 'n_ctx', - 'gpu_split', 'max_seq_len', - 'compress_pos_emb', + 'cache_type', + 'tensor_split', + 'gpu_split', 'alpha_value', 'rope_freq_base', - 'numa', - 'logits_all', - 'no_offload_kqv', - 'row_split', - 'tensorcores', - 'flash_attn', - 'streaming_llm', + 'compress_pos_emb', + 'compute_dtype', + 'quant_type', 'attention_sink_size', - 'hqq_backend', + 'num_experts_per_token', + 'tensorcores', + 'load_in_8bit', + 'load_in_4bit', + 'torch_compile', + 'flash_attn', + 'use_flash_attention_2', + 'streaming_llm', + 'auto_devices', + 'cpu', + 'disk', + 'row_split', + 'no_offload_kqv', + 'no_mul_mat_q', + 'no_mmap', + 'mlock', + 'numa', + 'use_double_quant', + 'use_eager_attention', + 'bf16', + 'autosplit', + 'enable_tp', + 'no_flash_attn', + 'no_xformers', + 'no_sdpa', + 'cfg_cache', 'cpp_runner', + 'logits_all', + 'trust_remote_code', + 'no_use_fast', ] if is_torch_xpu_available(): @@ -165,87 +165,87 @@ def list_model_elements(): def list_interface_input_elements(): elements = [ - 'max_new_tokens', - 'auto_max_new_tokens', - 'max_tokens_second', - 'max_updates_second', - 'prompt_lookup_num_tokens', - 'seed', 'temperature', - 'temperature_last', - 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', - 'top_p', 'min_p', + 'top_p', 'top_k', 'typical_p', - 'epsilon_cutoff', - 'eta_cutoff', - 'repetition_penalty', - 'presence_penalty', - 'frequency_penalty', - 'repetition_penalty_range', - 'encoder_repetition_penalty', - 'no_repeat_ngram_size', - 'dry_multiplier', - 'dry_base', - 'dry_allowed_length', - 'dry_sequence_breakers', 'xtc_threshold', 'xtc_probability', - 'do_sample', + 'epsilon_cutoff', + 'eta_cutoff', + 'tfs', + 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', + 'repetition_penalty', + 'frequency_penalty', + 'presence_penalty', + 'encoder_repetition_penalty', + 'no_repeat_ngram_size', + 'repetition_penalty_range', 'penalty_alpha', + 'guidance_scale', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', - 'grammar_string', - 'negative_prompt', - 'guidance_scale', - 'add_bos_token', + 'max_new_tokens', + 'prompt_lookup_num_tokens', + 'max_tokens_second', + 'max_updates_second', + 'do_sample', + 'dynamic_temperature', + 'temperature_last', + 'auto_max_new_tokens', 'ban_eos_token', - 'custom_token_bans', - 'sampler_priority', - 'truncation_length', - 'custom_stopping_strings', + 'add_bos_token', 'skip_special_tokens', 'stream', 'static_cache', - 'tfs', - 'top_a', + 'truncation_length', + 'seed', + 'sampler_priority', + 'custom_stopping_strings', + 'custom_token_bans', + 'negative_prompt', + 'dry_sequence_breakers', + 'grammar_string', ] # Chat elements elements += [ - 'textbox', - 'start_with', - 'character_menu', 'history', 'search_chat', 'unique_id', + 'textbox', + 'start_with', + 'mode', + 'chat_style', + 'chat-instruct_command', + 'character_menu', + 'name2', + 'context', + 'greeting', 'name1', 'user_bio', - 'name2', - 'greeting', - 'context', - 'mode', 'custom_system_message', 'instruction_template_str', 'chat_template_str', - 'chat_style', - 'chat-instruct_command', ] # Notebook/default elements elements += [ - 'textbox-notebook', 'textbox-default', - 'output_textbox', + 'textbox-notebook', 'prompt_menu-default', 'prompt_menu-notebook', + 'output_textbox', ] # Model elements From c393f7650d558d8ee1311adb5f66cc505e73fb78 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 10 Jan 2025 13:22:18 -0800 Subject: [PATCH 06/22] Update settings-template.yaml, organize modules/shared.py --- modules/shared.py | 4 ++-- settings-template.yaml | 34 +++++++++++++++++----------------- 2 files changed, 19 insertions(+), 19 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index 928747f7..4d873cb9 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -40,8 +40,6 @@ settings = { 'name1': 'You', 'user_bio': '', 'custom_system_message': '', - 'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}", - 'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}", 'preset': 'min_p', 'max_new_tokens': 512, 'max_new_tokens_min': 1, @@ -63,6 +61,8 @@ settings = { 'autoload_model': False, 'dark_theme': True, 'default_extensions': [], + 'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}", + 'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}", } default_settings = copy.deepcopy(settings) diff --git a/settings-template.yaml b/settings-template.yaml index d5ed47c3..b61dc4e0 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -1,31 +1,38 @@ -dark_theme: true show_controls: true start_with: '' mode: chat-instruct chat_style: cai-chat +chat-instruct_command: |- + Continue the chat dialogue below. Write a single reply for the character "<|character|>". + + <|prompt|> prompt-default: QA prompt-notebook: QA +character: Assistant +name1: You +user_bio: '' +custom_system_message: '' preset: min_p max_new_tokens: 512 max_new_tokens_min: 1 max_new_tokens_max: 4096 -negative_prompt: '' -seed: -1 -truncation_length: 2048 +prompt_lookup_num_tokens: 0 max_tokens_second: 0 max_updates_second: 0 -prompt_lookup_num_tokens: 0 -custom_stopping_strings: '' -custom_token_bans: '' auto_max_new_tokens: false ban_eos_token: false add_bos_token: true skip_special_tokens: true stream: true static_cache: false -character: Assistant -name1: You -custom_system_message: '' +truncation_length: 2048 +seed: -1 +custom_stopping_strings: '' +custom_token_bans: '' +negative_prompt: '' +autoload_model: false +dark_theme: true +default_extensions: [] instruction_template_str: |- {%- set ns = namespace(found=false) -%} {%- for message in messages -%} @@ -67,11 +74,4 @@ chat_template_str: |- {%- endif -%} {%- endif -%} {%- endfor -%} -chat-instruct_command: |- - Continue the chat dialogue below. Write a single reply for the character "<|character|>". - <|prompt|> -autoload_model: false -gallery-items_per_page: 50 -gallery-open: false -default_extensions: [] From d2f6c0f65ff72c96999a51655b096f42d037fe32 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 10 Jan 2025 13:25:40 -0800 Subject: [PATCH 07/22] Update README --- README.md | 30 +++++++++++------------------- modules/shared.py | 2 +- 2 files changed, 12 insertions(+), 20 deletions(-) diff --git a/README.md b/README.md index 07138772..7e2dec95 100644 --- a/README.md +++ b/README.md @@ -204,17 +204,16 @@ List of command-line flags usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS] [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices] [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast] - [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--tensorcores] - [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS] + [--use_flash_attention_2] [--use_eager_attention] [--torch-compile] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] + [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS] [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] [--attention-sink-size ATTENTION_SINK_SIZE] [--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa] - [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--enable_tp] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] [--disable_exllamav2] - [--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] + [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--enable_tp] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] - [--subpath SUBPATH] [--old-colors] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui] - [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] [--no_inject_fused_attention] - [--cache_4bit] [--cache_8bit] [--chat-buttons] + [--subpath SUBPATH] [--old-colors] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--api-enable-ipv6] + [--api-disable-ipv4] [--nowebui] [--multimodal-pipeline MULTIMODAL_PIPELINE] [--cache_4bit] [--cache_8bit] [--chat-buttons] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] + [--desc_act] [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE] Text generation web UI @@ -237,7 +236,7 @@ Basic settings: Model loader: --loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, - AutoGPTQ. + HQQ, TensorRT-LLM. Transformers/Accelerate: --cpu Use the CPU to generate text. Warning: Training on CPU is extremely slow. @@ -255,6 +254,7 @@ Transformers/Accelerate: --no_use_fast Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast. --use_flash_attention_2 Set use_flash_attention_2=True while loading the model. --use_eager_attention Set attn_implementation= eager while loading the model. + --torch-compile Compile the model with torch.compile for improved performance. bitsandbytes 4-bit: --load-in-4bit Load the model with 4-bit precision (using bitsandbytes). @@ -264,7 +264,7 @@ bitsandbytes 4-bit: llama.cpp: --flash-attn Use flash-attention. - --tensorcores NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards. + --tensorcores NVIDIA only: use llama-cpp-python compiled without GGML_CUDA_FORCE_MMQ. This may improve performance on newer cards. --n_ctx N_CTX Size of the prompt context. --threads THREADS Number of threads to use. --threads-batch THREADS_BATCH Number of threads to use for batches/prompt processing. @@ -294,16 +294,6 @@ ExLlamaV2: --num_experts_per_token NUM_EXPERTS_PER_TOKEN Number of experts to use for generation. Applies to MoE models like Mixtral. --enable_tp Enable Tensor Parallelism (TP) in ExLlamaV2. -AutoGPTQ: - --triton Use triton. - --no_inject_fused_mlp Triton mode only: disable the use of fused MLP, which will use less VRAM at the cost of slower inference. - --no_use_cuda_fp16 This can make models faster on some systems. - --desc_act For models that do not have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig. - --disable_exllama Disable ExLlama kernel, which can improve inference speed on some systems. - --disable_exllamav2 Disable ExLlamav2 kernel. - --wbits WBITS Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported. - --groupsize GROUPSIZE Group size. - HQQ: --hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN. @@ -343,6 +333,8 @@ API: --api-port API_PORT The listening port for the API. --api-key API_KEY API authentication key. --admin-key ADMIN_KEY API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key. + --api-enable-ipv6 Enable IPv6 for the API + --api-disable-ipv4 Disable IPv4 for the API --nowebui Do not launch the Gradio UI. Useful for launching the API in standalone mode. Multimodal: diff --git a/modules/shared.py b/modules/shared.py index 4d873cb9..93cd2272 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -86,7 +86,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft # Model loader group = parser.add_argument_group('Model loader') -group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2.') +group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, HQQ, TensorRT-LLM.') # Transformers/Accelerate group = parser.add_argument_group('Transformers/Accelerate') From 02db4b0d06e9573de9e399b49006f882b996571b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 10 Jan 2025 15:05:08 -0800 Subject: [PATCH 08/22] Bump transformers to 4.48 --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 6539161c..c7ced3df 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.47.* +transformers==4.48.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index 2e5f2da7..87ee93d1 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.47.* +transformers==4.48.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index b1eb7d31..fa2f5ca7 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.47.* +transformers==4.48.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 6a9bf7f7..e9838295 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.47.* +transformers==4.48.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index d8928d58..bef02feb 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.47.* +transformers==4.48.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 84658a11..32f1a50a 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.47.* +transformers==4.48.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 5944d5a7..938848bf 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.47.* +transformers==4.48.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index fda4292d..e18cbe64 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.47.* +transformers==4.48.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 45003f0d..a034ee61 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.47.* +transformers==4.48.* tqdm wandb From 3a722a36c85f31f7d5d4529b8dfea3faec7b9c37 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Sat, 11 Jan 2025 12:55:19 -0300 Subject: [PATCH 09/22] Use morphdom to make chat streaming 1902381098231% faster (#6653) --- js/main.js | 3 +-- js/morphdom/morphdom-umd.min.js | 1 + modules/block_requests.py | 1 + modules/ui_chat.py | 26 +++++++++++++++++++++++--- 4 files changed, 26 insertions(+), 5 deletions(-) create mode 100644 js/morphdom/morphdom-umd.min.js diff --git a/js/main.js b/js/main.js index efb84238..ab2499d4 100644 --- a/js/main.js +++ b/js/main.js @@ -147,10 +147,9 @@ const observer = new MutationObserver(function(mutations) { doSyntaxHighlighting(); - if(!isScrolled) { + if (!isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) { targetElement.scrollTop = targetElement.scrollHeight; } - }); // Configure the observer to watch for changes in the subtree and attributes diff --git a/js/morphdom/morphdom-umd.min.js b/js/morphdom/morphdom-umd.min.js new file mode 100644 index 00000000..6746f0e8 --- /dev/null +++ b/js/morphdom/morphdom-umd.min.js @@ -0,0 +1 @@ +(function(global,factory){typeof exports==="object"&&typeof module!=="undefined"?module.exports=factory():typeof define==="function"&&define.amd?define(factory):(global=global||self,global.morphdom=factory())})(this,function(){"use strict";var DOCUMENT_FRAGMENT_NODE=11;function morphAttrs(fromNode,toNode){var toNodeAttrs=toNode.attributes;var attr;var attrName;var attrNamespaceURI;var attrValue;var fromValue;if(toNode.nodeType===DOCUMENT_FRAGMENT_NODE||fromNode.nodeType===DOCUMENT_FRAGMENT_NODE){return}for(var i=toNodeAttrs.length-1;i>=0;i--){attr=toNodeAttrs[i];attrName=attr.name;attrNamespaceURI=attr.namespaceURI;attrValue=attr.value;if(attrNamespaceURI){attrName=attr.localName||attrName;fromValue=fromNode.getAttributeNS(attrNamespaceURI,attrName);if(fromValue!==attrValue){if(attr.prefix==="xmlns"){attrName=attr.name}fromNode.setAttributeNS(attrNamespaceURI,attrName,attrValue)}}else{fromValue=fromNode.getAttribute(attrName);if(fromValue!==attrValue){fromNode.setAttribute(attrName,attrValue)}}}var fromNodeAttrs=fromNode.attributes;for(var d=fromNodeAttrs.length-1;d>=0;d--){attr=fromNodeAttrs[d];attrName=attr.name;attrNamespaceURI=attr.namespaceURI;if(attrNamespaceURI){attrName=attr.localName||attrName;if(!toNode.hasAttributeNS(attrNamespaceURI,attrName)){fromNode.removeAttributeNS(attrNamespaceURI,attrName)}}else{if(!toNode.hasAttribute(attrName)){fromNode.removeAttribute(attrName)}}}}var range;var NS_XHTML="http://www.w3.org/1999/xhtml";var doc=typeof document==="undefined"?undefined:document;var HAS_TEMPLATE_SUPPORT=!!doc&&"content"in doc.createElement("template");var HAS_RANGE_SUPPORT=!!doc&&doc.createRange&&"createContextualFragment"in doc.createRange();function createFragmentFromTemplate(str){var template=doc.createElement("template");template.innerHTML=str;return template.content.childNodes[0]}function createFragmentFromRange(str){if(!range){range=doc.createRange();range.selectNode(doc.body)}var fragment=range.createContextualFragment(str);return fragment.childNodes[0]}function createFragmentFromWrap(str){var fragment=doc.createElement("body");fragment.innerHTML=str;return fragment.childNodes[0]}function toElement(str){str=str.trim();if(HAS_TEMPLATE_SUPPORT){return createFragmentFromTemplate(str)}else if(HAS_RANGE_SUPPORT){return createFragmentFromRange(str)}return createFragmentFromWrap(str)}function compareNodeNames(fromEl,toEl){var fromNodeName=fromEl.nodeName;var toNodeName=toEl.nodeName;var fromCodeStart,toCodeStart;if(fromNodeName===toNodeName){return true}fromCodeStart=fromNodeName.charCodeAt(0);toCodeStart=toNodeName.charCodeAt(0);if(fromCodeStart<=90&&toCodeStart>=97){return fromNodeName===toNodeName.toUpperCase()}else if(toCodeStart<=90&&fromCodeStart>=97){return toNodeName===fromNodeName.toUpperCase()}else{return false}}function createElementNS(name,namespaceURI){return!namespaceURI||namespaceURI===NS_XHTML?doc.createElement(name):doc.createElementNS(namespaceURI,name)}function moveChildren(fromEl,toEl){var curChild=fromEl.firstChild;while(curChild){var nextChild=curChild.nextSibling;toEl.appendChild(curChild);curChild=nextChild}return toEl}function syncBooleanAttrProp(fromEl,toEl,name){if(fromEl[name]!==toEl[name]){fromEl[name]=toEl[name];if(fromEl[name]){fromEl.setAttribute(name,"")}else{fromEl.removeAttribute(name)}}}var specialElHandlers={OPTION:function(fromEl,toEl){var parentNode=fromEl.parentNode;if(parentNode){var parentName=parentNode.nodeName.toUpperCase();if(parentName==="OPTGROUP"){parentNode=parentNode.parentNode;parentName=parentNode&&parentNode.nodeName.toUpperCase()}if(parentName==="SELECT"&&!parentNode.hasAttribute("multiple")){if(fromEl.hasAttribute("selected")&&!toEl.selected){fromEl.setAttribute("selected","selected");fromEl.removeAttribute("selected")}parentNode.selectedIndex=-1}}syncBooleanAttrProp(fromEl,toEl,"selected")},INPUT:function(fromEl,toEl){syncBooleanAttrProp(fromEl,toEl,"checked");syncBooleanAttrProp(fromEl,toEl,"disabled");if(fromEl.value!==toEl.value){fromEl.value=toEl.value}if(!toEl.hasAttribute("value")){fromEl.removeAttribute("value")}},TEXTAREA:function(fromEl,toEl){var newValue=toEl.value;if(fromEl.value!==newValue){fromEl.value=newValue}var firstChild=fromEl.firstChild;if(firstChild){var oldValue=firstChild.nodeValue;if(oldValue==newValue||!newValue&&oldValue==fromEl.placeholder){return}firstChild.nodeValue=newValue}},SELECT:function(fromEl,toEl){if(!toEl.hasAttribute("multiple")){var selectedIndex=-1;var i=0;var curChild=fromEl.firstChild;var optgroup;var nodeName;while(curChild){nodeName=curChild.nodeName&&curChild.nodeName.toUpperCase();if(nodeName==="OPTGROUP"){optgroup=curChild;curChild=optgroup.firstChild}else{if(nodeName==="OPTION"){if(curChild.hasAttribute("selected")){selectedIndex=i;break}i++}curChild=curChild.nextSibling;if(!curChild&&optgroup){curChild=optgroup.nextSibling;optgroup=null}}}fromEl.selectedIndex=selectedIndex}}};var ELEMENT_NODE=1;var DOCUMENT_FRAGMENT_NODE$1=11;var TEXT_NODE=3;var COMMENT_NODE=8;function noop(){}function defaultGetNodeKey(node){if(node){return node.getAttribute&&node.getAttribute("id")||node.id}}function morphdomFactory(morphAttrs){return function morphdom(fromNode,toNode,options){if(!options){options={}}if(typeof toNode==="string"){if(fromNode.nodeName==="#document"||fromNode.nodeName==="HTML"||fromNode.nodeName==="BODY"){var toNodeHtml=toNode;toNode=doc.createElement("html");toNode.innerHTML=toNodeHtml}else{toNode=toElement(toNode)}}else if(toNode.nodeType===DOCUMENT_FRAGMENT_NODE$1){toNode=toNode.firstElementChild}var getNodeKey=options.getNodeKey||defaultGetNodeKey;var onBeforeNodeAdded=options.onBeforeNodeAdded||noop;var onNodeAdded=options.onNodeAdded||noop;var onBeforeElUpdated=options.onBeforeElUpdated||noop;var onElUpdated=options.onElUpdated||noop;var onBeforeNodeDiscarded=options.onBeforeNodeDiscarded||noop;var onNodeDiscarded=options.onNodeDiscarded||noop;var onBeforeElChildrenUpdated=options.onBeforeElChildrenUpdated||noop;var skipFromChildren=options.skipFromChildren||noop;var addChild=options.addChild||function(parent,child){return parent.appendChild(child)};var childrenOnly=options.childrenOnly===true;var fromNodesLookup=Object.create(null);var keyedRemovalList=[];function addKeyedRemoval(key){keyedRemovalList.push(key)}function walkDiscardedChildNodes(node,skipKeyedNodes){if(node.nodeType===ELEMENT_NODE){var curChild=node.firstChild;while(curChild){var key=undefined;if(skipKeyedNodes&&(key=getNodeKey(curChild))){addKeyedRemoval(key)}else{onNodeDiscarded(curChild);if(curChild.firstChild){walkDiscardedChildNodes(curChild,skipKeyedNodes)}}curChild=curChild.nextSibling}}}function removeNode(node,parentNode,skipKeyedNodes){if(onBeforeNodeDiscarded(node)===false){return}if(parentNode){parentNode.removeChild(node)}onNodeDiscarded(node);walkDiscardedChildNodes(node,skipKeyedNodes)}function indexTree(node){if(node.nodeType===ELEMENT_NODE||node.nodeType===DOCUMENT_FRAGMENT_NODE$1){var curChild=node.firstChild;while(curChild){var key=getNodeKey(curChild);if(key){fromNodesLookup[key]=curChild}indexTree(curChild);curChild=curChild.nextSibling}}}indexTree(fromNode);function handleNodeAdded(el){onNodeAdded(el);var curChild=el.firstChild;while(curChild){var nextSibling=curChild.nextSibling;var key=getNodeKey(curChild);if(key){var unmatchedFromEl=fromNodesLookup[key];if(unmatchedFromEl&&compareNodeNames(curChild,unmatchedFromEl)){curChild.parentNode.replaceChild(unmatchedFromEl,curChild);morphEl(unmatchedFromEl,curChild)}else{handleNodeAdded(curChild)}}else{handleNodeAdded(curChild)}curChild=nextSibling}}function cleanupFromEl(fromEl,curFromNodeChild,curFromNodeKey){while(curFromNodeChild){var fromNextSibling=curFromNodeChild.nextSibling;if(curFromNodeKey=getNodeKey(curFromNodeChild)){addKeyedRemoval(curFromNodeKey)}else{removeNode(curFromNodeChild,fromEl,true)}curFromNodeChild=fromNextSibling}}function morphEl(fromEl,toEl,childrenOnly){var toElKey=getNodeKey(toEl);if(toElKey){delete fromNodesLookup[toElKey]}if(!childrenOnly){var beforeUpdateResult=onBeforeElUpdated(fromEl,toEl);if(beforeUpdateResult===false){return}else if(beforeUpdateResult instanceof HTMLElement){fromEl=beforeUpdateResult;indexTree(fromEl)}morphAttrs(fromEl,toEl);onElUpdated(fromEl);if(onBeforeElChildrenUpdated(fromEl,toEl)===false){return}}if(fromEl.nodeName!=="TEXTAREA"){morphChildren(fromEl,toEl)}else{specialElHandlers.TEXTAREA(fromEl,toEl)}}function morphChildren(fromEl,toEl){var skipFrom=skipFromChildren(fromEl,toEl);var curToNodeChild=toEl.firstChild;var curFromNodeChild=fromEl.firstChild;var curToNodeKey;var curFromNodeKey;var fromNextSibling;var toNextSibling;var matchingFromEl;outer:while(curToNodeChild){toNextSibling=curToNodeChild.nextSibling;curToNodeKey=getNodeKey(curToNodeChild);while(!skipFrom&&curFromNodeChild){fromNextSibling=curFromNodeChild.nextSibling;if(curToNodeChild.isSameNode&&curToNodeChild.isSameNode(curFromNodeChild)){curToNodeChild=toNextSibling;curFromNodeChild=fromNextSibling;continue outer}curFromNodeKey=getNodeKey(curFromNodeChild);var curFromNodeType=curFromNodeChild.nodeType;var isCompatible=undefined;if(curFromNodeType===curToNodeChild.nodeType){if(curFromNodeType===ELEMENT_NODE){if(curToNodeKey){if(curToNodeKey!==curFromNodeKey){if(matchingFromEl=fromNodesLookup[curToNodeKey]){if(fromNextSibling===matchingFromEl){isCompatible=false}else{fromEl.insertBefore(matchingFromEl,curFromNodeChild);if(curFromNodeKey){addKeyedRemoval(curFromNodeKey)}else{removeNode(curFromNodeChild,fromEl,true)}curFromNodeChild=matchingFromEl;curFromNodeKey=getNodeKey(curFromNodeChild)}}else{isCompatible=false}}}else if(curFromNodeKey){isCompatible=false}isCompatible=isCompatible!==false&&compareNodeNames(curFromNodeChild,curToNodeChild);if(isCompatible){morphEl(curFromNodeChild,curToNodeChild)}}else if(curFromNodeType===TEXT_NODE||curFromNodeType==COMMENT_NODE){isCompatible=true;if(curFromNodeChild.nodeValue!==curToNodeChild.nodeValue){curFromNodeChild.nodeValue=curToNodeChild.nodeValue}}}if(isCompatible){curToNodeChild=toNextSibling;curFromNodeChild=fromNextSibling;continue outer}if(curFromNodeKey){addKeyedRemoval(curFromNodeKey)}else{removeNode(curFromNodeChild,fromEl,true)}curFromNodeChild=fromNextSibling}if(curToNodeKey&&(matchingFromEl=fromNodesLookup[curToNodeKey])&&compareNodeNames(matchingFromEl,curToNodeChild)){if(!skipFrom){addChild(fromEl,matchingFromEl)}morphEl(matchingFromEl,curToNodeChild)}else{var onBeforeNodeAddedResult=onBeforeNodeAdded(curToNodeChild);if(onBeforeNodeAddedResult!==false){if(onBeforeNodeAddedResult){curToNodeChild=onBeforeNodeAddedResult}if(curToNodeChild.actualize){curToNodeChild=curToNodeChild.actualize(fromEl.ownerDocument||doc)}addChild(fromEl,curToNodeChild);handleNodeAdded(curToNodeChild)}}curToNodeChild=toNextSibling;curFromNodeChild=fromNextSibling}cleanupFromEl(fromEl,curFromNodeChild,curFromNodeKey);var specialElHandler=specialElHandlers[fromEl.nodeName];if(specialElHandler){specialElHandler(fromEl,toEl)}}var morphedNode=fromNode;var morphedNodeType=morphedNode.nodeType;var toNodeType=toNode.nodeType;if(!childrenOnly){if(morphedNodeType===ELEMENT_NODE){if(toNodeType===ELEMENT_NODE){if(!compareNodeNames(fromNode,toNode)){onNodeDiscarded(fromNode);morphedNode=moveChildren(fromNode,createElementNS(toNode.nodeName,toNode.namespaceURI))}}else{morphedNode=toNode}}else if(morphedNodeType===TEXT_NODE||morphedNodeType===COMMENT_NODE){if(toNodeType===morphedNodeType){if(morphedNode.nodeValue!==toNode.nodeValue){morphedNode.nodeValue=toNode.nodeValue}return morphedNode}else{morphedNode=toNode}}}if(morphedNode===toNode){onNodeDiscarded(fromNode)}else{if(toNode.isSameNode&&toNode.isSameNode(morphedNode)){return}morphEl(morphedNode,toNode,childrenOnly);if(keyedRemovalList){for(var i=0,len=keyedRemovalList.length;i' '\n ' '\n ' + '\n ' f'\n ' '\n ' '\n ' diff --git a/modules/ui_chat.py b/modules/ui_chat.py index b92dd9ae..61be17e3 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -20,7 +20,7 @@ def create_ui(): shared.gradio['Chat input'] = gr.State() shared.gradio['history'] = gr.JSON(visible=False) - with gr.Tab('Chat', elem_id='chat-tab'): + with gr.Tab('Chat', id='Chat', elem_id='chat-tab'): with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']): with gr.Column(): with gr.Row(elem_id='past-chats-buttons'): @@ -46,8 +46,8 @@ def create_ui(): with gr.Row(): with gr.Column(elem_id='chat-col'): - shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', '')) - + shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', ''), visible=True) + shared.gradio['display'] = gr.Textbox(value="", visible=False) # Hidden buffer with gr.Row(elem_id="chat-input-row"): with gr.Column(scale=1, elem_id='gr-hover-container'): gr.HTML(value='
', elem_id='gr-hover') @@ -180,6 +180,26 @@ def create_event_handlers(): shared.input_params = gradio(inputs) shared.reload_inputs = gradio(reload_arr) + # Morph HTML updates instead of updating everything + shared.gradio['display'].change(None, gradio('display'), None, + js=""" + (text) => { + morphdom( + document.getElementById('chat').parentNode, + '
' + text + '
', + { + onBeforeElUpdated: function(fromEl, toEl) { + if (fromEl.isEqualNode(toEl)) { + return false; // Skip identical nodes + } + return true; // Update only if nodes differ + } + } + ); + } + """ + ) + shared.gradio['Generate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( From 58342740a5b061c2836f46d93dd70832f894c6e9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 11 Jan 2025 07:59:49 -0800 Subject: [PATCH 10/22] Bump flash-attn to 2.7.3 --- requirements.txt | 8 ++++---- requirements_noavx2.txt | 8 ++++---- 2 files changed, 8 insertions(+), 8 deletions(-) diff --git a/requirements.txt b/requirements.txt index c7ced3df..9bc5956a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -55,7 +55,7 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+ https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index e18cbe64..1755ac24 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -55,7 +55,7 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+ https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" +https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" From a5d64b586da0ce39c36d01a59d991fbc76e16362 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Sat, 11 Jan 2025 16:59:21 -0300 Subject: [PATCH 11/22] Add a "copy" button below each message (#6654) --- css/html_instruct_style.css | 2 ++ css/main.css | 53 +++++++++++++++++++++++++++- modules/block_requests.py | 3 +- modules/html_generator.py | 70 ++++++++++++++++++++++++------------- modules/ui.py | 2 ++ 5 files changed, 104 insertions(+), 26 deletions(-) diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css index dcc19c29..fcd0558f 100644 --- a/css/html_instruct_style.css +++ b/css/html_instruct_style.css @@ -48,12 +48,14 @@ .chat .user-message { background: #f4f4f4; padding: 1.5rem 1rem; + padding-bottom: 2rem; border-radius: 0; border-bottom-right-radius: 0; } .chat .assistant-message { padding: 1.5rem 1rem; + padding-bottom: 2rem; border-radius: 0; border: 0; } diff --git a/css/main.css b/css/main.css index 9d99a876..48c6727a 100644 --- a/css/main.css +++ b/css/main.css @@ -1142,7 +1142,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { } .dark svg { - fill: white; color: white; } @@ -1221,3 +1220,55 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { background: var(--light-theme-gray); } } + +/* ---------------------------------------------- + Copy button for chat messages +---------------------------------------------- */ +.message .text, +.message .text-you, +.message .text-bot, +.user-message .text, +.assistant-message .text { + position: relative; +} + +.message, .user-message, .assistant-message { + position: relative; +} + +.copy-button { + position: absolute; + bottom: -23px; + left: 0; + padding: 0; + border: none; + border-radius: 3px; + cursor: pointer; + opacity: 0; + display: flex; + align-items: center; + transition: opacity 0.2s; +} + +.message:hover .copy-button, +.user-message:hover .copy-button, +.assistant-message:hover .copy-button { + opacity: 1; +} + +.copy-button svg { + stroke: rgb(156 163 175); + transition: stroke 0.2s; +} + +.copy-button:hover svg { + stroke: rgb(107 114 128); +} + +.dark .copy-button svg { + stroke: rgb(156 163 175); +} + +.dark .copy-button:hover svg { + stroke: rgb(209 213 219); +} diff --git a/modules/block_requests.py b/modules/block_requests.py index 35f983cf..29fc6633 100644 --- a/modules/block_requests.py +++ b/modules/block_requests.py @@ -3,7 +3,7 @@ import io import requests -from modules import shared +from modules import shared, ui from modules.logging_colors import logger original_open = open @@ -58,6 +58,7 @@ def my_open(*args, **kwargs): '\n ' f'\n ' '\n ' + f'\n ' '\n ' ) diff --git a/modules/html_generator.py b/modules/html_generator.py index e3550ed5..b565c63a 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -292,24 +292,34 @@ def get_image_cache(path): return image_cache[path][1] +copy_svg = '''''' +copy_button = f'' + def generate_instruct_html(history): output = f'
' - for i, _row in enumerate(history): - row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row] - if row[0]: # Don't display empty user messages + for i in range(len(history['visible'])): + row_visible = history['visible'][i] + row_internal = history['internal'][i] + converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + + if converted_visible[0]: # Don't display empty user messages output += ( - f'
' + f'
' f'
' - f'
{row[0]}
' + f'
{converted_visible[0]}
' + f'{copy_button}' f'
' f'
' ) output += ( - f'
' + f'
' f'
' - f'
{row[1]}
' + f'
{converted_visible[1]}
' + f'{copy_button}' f'
' f'
' ) @@ -332,26 +342,32 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= if Path("cache/pfp_me.png").exists() else '' ) - for i, _row in enumerate(history): - row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row] + for i in range(len(history['visible'])): + row_visible = history['visible'][i] + row_internal = history['internal'][i] + converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible] - if row[0]: # Don't display empty user messages + if converted_visible[0]: # Don't display empty user messages output += ( - f'
' + f'
' f'
{img_me}
' f'
' f'
{name1}
' - f'
{row[0]}
' + f'
{converted_visible[0]}
' + f'{copy_button}' f'
' f'
' ) output += ( - f'
' + f'
' f'
{img_bot}
' f'
' f'
{name2}
' - f'
{row[1]}
' + f'
{converted_visible[1]}
' + f'{copy_button}' f'
' f'
' ) @@ -363,22 +379,28 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= def generate_chat_html(history, name1, name2, reset_cache=False): output = f'
' - for i, _row in enumerate(history): - row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row] + for i in range(len(history['visible'])): + row_visible = history['visible'][i] + row_internal = history['internal'][i] + converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible] - if row[0]: # Don't display empty user messages + if converted_visible[0]: # Don't display empty user messages output += ( - f'
' + f'
' f'
' - f'
{row[0]}
' + f'
{converted_visible[0]}
' + f'{copy_button}' f'
' f'
' ) output += ( - f'
' + f'
' f'
' - f'
{row[1]}
' + f'
{converted_visible[1]}
' + f'{copy_button}' f'
' f'
' ) @@ -389,8 +411,8 @@ def generate_chat_html(history, name1, name2, reset_cache=False): def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False): if mode == 'instruct': - return generate_instruct_html(history['visible']) + return generate_instruct_html(history) elif style == 'wpp': - return generate_chat_html(history['visible'], name1, name2) + return generate_chat_html(history, name1, name2) else: - return generate_cai_chat_html(history['visible'], name1, name2, style, character, reset_cache) + return generate_cai_chat_html(history, name1, name2, style, character, reset_cache) diff --git a/modules/ui.py b/modules/ui.py index 4f7ee785..df948a14 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -19,6 +19,8 @@ with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy css += f.read() with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f: js = f.read() +with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r') as f: + global_scope_js = f.read() with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f: save_files_js = f.read() with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f: From 1b9121e5b87625edbbc13d2dc0e42624173553ca Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 11 Jan 2025 12:41:41 -0800 Subject: [PATCH 12/22] Add a "refresh" button below the last message, add a missing file --- css/main.css | 29 +++++++++++++++++++---------- js/global_scope_js.js | 23 +++++++++++++++++++++++ modules/html_generator.py | 7 ++++++- 3 files changed, 48 insertions(+), 11 deletions(-) create mode 100644 js/global_scope_js.js diff --git a/css/main.css b/css/main.css index 48c6727a..63681979 100644 --- a/css/main.css +++ b/css/main.css @@ -1236,11 +1236,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { position: relative; } -.copy-button { +.footer-button { position: absolute; - bottom: -23px; - left: 0; padding: 0; + margin: 0; border: none; border-radius: 3px; cursor: pointer; @@ -1250,25 +1249,35 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { transition: opacity 0.2s; } -.message:hover .copy-button, -.user-message:hover .copy-button, -.assistant-message:hover .copy-button { +.footer-button#copy-button { + bottom: -23px; + left: 0; +} + +.footer-button#refresh-button { + bottom: -23px; + left: 25px; +} + +.message:hover .footer-button, +.user-message:hover .footer-button, +.assistant-message:hover .footer-button { opacity: 1; } -.copy-button svg { +.footer-button svg { stroke: rgb(156 163 175); transition: stroke 0.2s; } -.copy-button:hover svg { +.footer-button:hover svg { stroke: rgb(107 114 128); } -.dark .copy-button svg { +.dark .footer-button svg { stroke: rgb(156 163 175); } -.dark .copy-button:hover svg { +.dark .footer-button:hover svg { stroke: rgb(209 213 219); } diff --git a/js/global_scope_js.js b/js/global_scope_js.js new file mode 100644 index 00000000..79b673d7 --- /dev/null +++ b/js/global_scope_js.js @@ -0,0 +1,23 @@ +function copyToClipboard(element) { + if (!element) return; + + const messageElement = element.closest(".message, .user-message, .assistant-message"); + if (!messageElement) return; + + const rawText = messageElement.getAttribute("data-raw"); + if (!rawText) return; + + navigator.clipboard.writeText(rawText).then(function() { + const originalSvg = element.innerHTML; + element.innerHTML = ""; + setTimeout(() => { + element.innerHTML = originalSvg; + }, 1000); + }).catch(function(err) { + console.error("Failed to copy text: ", err); + }); +} + +function regenerateClick() { + document.getElementById("Regenerate").click(); +} diff --git a/modules/html_generator.py b/modules/html_generator.py index b565c63a..79a8dc64 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -293,7 +293,9 @@ def get_image_cache(path): copy_svg = '''''' -copy_button = f'' +refresh_svg = '''''' +copy_button = f'' +refresh_button = f'' def generate_instruct_html(history): output = f'
' @@ -320,6 +322,7 @@ def generate_instruct_html(history): f'
' f'
{converted_visible[1]}
' f'{copy_button}' + f'{refresh_button if i == len(history["visible"]) - 1 else ""}' f'
' f'
' ) @@ -368,6 +371,7 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= f'
{name2}
' f'
{converted_visible[1]}
' f'{copy_button}' + f'{refresh_button if i == len(history["visible"]) - 1 else ""}' f'
' f'
' ) @@ -401,6 +405,7 @@ def generate_chat_html(history, name1, name2, reset_cache=False): f'
' f'
{converted_visible[1]}
' f'{copy_button}' + f'{refresh_button if i == len(history["visible"]) - 1 else ""}' f'
' f'
' ) From f1797f4323b6eba98521d14c1cc011ce45f4db42 Mon Sep 17 00:00:00 2001 From: mamei16 Date: Sat, 11 Jan 2025 22:39:44 +0100 Subject: [PATCH 13/22] Unescape backslashes in html_output (#6648) --- modules/html_generator.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/modules/html_generator.py b/modules/html_generator.py index 79a8dc64..3ddad51d 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -239,6 +239,9 @@ def convert_to_markdown(string): pattern = re.compile(r']*>(.*?)', re.DOTALL) html_output = pattern.sub(lambda x: html.unescape(x.group()), html_output) + # Unescape backslashes + html_output = html_output.replace('\\\\', '\\') + # Add "long-list" class to
    or
      containing a long
    1. item html_output = add_long_list_class(html_output) From a0492ce325b951a9c000fa3cad45806adc8d8926 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Sat, 11 Jan 2025 21:14:10 -0300 Subject: [PATCH 14/22] Optimize syntax highlighting during chat streaming (#6655) --- js/global_scope_js.js | 2 +- js/main.js | 55 +++++++++++++++---------------------------- modules/ui_chat.py | 40 ++++++++++++++++++------------- 3 files changed, 44 insertions(+), 53 deletions(-) diff --git a/js/global_scope_js.js b/js/global_scope_js.js index 79b673d7..983d60f1 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -19,5 +19,5 @@ function copyToClipboard(element) { } function regenerateClick() { - document.getElementById("Regenerate").click(); + document.getElementById("Regenerate").click(); } diff --git a/js/main.js b/js/main.js index ab2499d4..c5c47d04 100644 --- a/js/main.js +++ b/js/main.js @@ -177,47 +177,30 @@ function isElementVisibleOnScreen(element) { ); } -function getVisibleMessagesIndexes() { - const elements = document.querySelectorAll(".message-body"); - const visibleIndexes = []; - - elements.forEach((element, index) => { - if (isElementVisibleOnScreen(element) && !element.hasAttribute("data-highlighted")) { - visibleIndexes.push(index); - } - }); - - return visibleIndexes; -} - function doSyntaxHighlighting() { - const indexes = getVisibleMessagesIndexes(); - const elements = document.querySelectorAll(".message-body"); + const messageBodies = document.querySelectorAll(".message-body"); - if (indexes.length > 0) { + if (messageBodies.length > 0) { observer.disconnect(); - indexes.forEach((index) => { - const element = elements[index]; + messageBodies.forEach((messageBody) => { + if (isElementVisibleOnScreen(messageBody)) { + // Handle both code and math in a single pass through each message + const codeBlocks = messageBody.querySelectorAll("pre code:not([data-highlighted])"); + codeBlocks.forEach((codeBlock) => { + hljs.highlightElement(codeBlock); + codeBlock.setAttribute("data-highlighted", "true"); + }); - // Tag this element to prevent it from being highlighted twice - element.setAttribute("data-highlighted", "true"); - - // Perform syntax highlighting - const codeBlocks = element.querySelectorAll("pre code"); - - codeBlocks.forEach((codeBlock) => { - hljs.highlightElement(codeBlock); - }); - - renderMathInElement(element, { - delimiters: [ - { left: "$$", right: "$$", display: true }, - { left: "$", right: "$", display: false }, - { left: "\\(", right: "\\)", display: false }, - { left: "\\[", right: "\\]", display: true }, - ], - }); + renderMathInElement(messageBody, { + delimiters: [ + { left: "$$", right: "$$", display: true }, + { left: "$", right: "$", display: false }, + { left: "\\(", right: "\\)", display: false }, + { left: "\\[", right: "\\]", display: true }, + ], + }); + } }); observer.observe(targetElement, config); diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 61be17e3..8497f7df 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -182,23 +182,31 @@ def create_event_handlers(): # Morph HTML updates instead of updating everything shared.gradio['display'].change(None, gradio('display'), None, - js=""" - (text) => { - morphdom( - document.getElementById('chat').parentNode, - '
      ' + text + '
      ', - { - onBeforeElUpdated: function(fromEl, toEl) { - if (fromEl.isEqualNode(toEl)) { - return false; // Skip identical nodes - } - return true; // Update only if nodes differ - } - } - ); + js=""" + (text) => { + morphdom( + document.getElementById('chat').parentNode, + '
      ' + text + '
      ', + { + onBeforeElUpdated: function(fromEl, toEl) { + if (fromEl.tagName === 'PRE' && fromEl.querySelector('code[data-highlighted]')) { + const fromCode = fromEl.querySelector('code'); + const toCode = toEl.querySelector('code'); + + if (fromCode && toCode && fromCode.textContent === toCode.textContent) { + // If the content is the same, preserve the entire
       element
      +                  toEl.className = fromEl.className;
      +                  toEl.innerHTML = fromEl.innerHTML;
      +                  return false; // Skip updating the 
       element
      +                }
      +              }
      +              return !fromEl.isEqualNode(toEl); // Update only if nodes differ
                   }
      -        """
      -    )
      +          }
      +        );
      +      }
      +      """
      +    );
       
           shared.gradio['Generate'].click(
               ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
      
      From ed16374ecee6f547c49fe4af0630294add317399 Mon Sep 17 00:00:00 2001
      From: Lounger <4087076+TheLounger@users.noreply.github.com>
      Date: Sun, 12 Jan 2025 03:35:22 +0100
      Subject: [PATCH 15/22] Fix the gallery extension (#6656)
      
      ---
       extensions/gallery/script.py | 5 +++--
       1 file changed, 3 insertions(+), 2 deletions(-)
      
      diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py
      index ff0242c8..54f9c745 100644
      --- a/extensions/gallery/script.py
      +++ b/extensions/gallery/script.py
      @@ -93,10 +93,11 @@ def generate_html():
       
       def filter_cards(filter_str=''):
           if filter_str == '':
      -        return cards
      +        return gr.Dataset(samples=cards)
       
           filter_upper = filter_str.upper()
      -    return [k for k in cards if filter_upper in k[1].upper()]
      +    filtered = [k for k in cards if filter_upper in k[1].upper()]
      +    return gr.Dataset(samples=filtered)
       
       
       def select_character(evt: gr.SelectData):
      
      From facb4155d4a0d343b6f0cbae93f112456f20875b Mon Sep 17 00:00:00 2001
      From: oobabooga <112222186+oobabooga@users.noreply.github.com>
      Date: Sat, 11 Jan 2025 20:57:28 -0800
      Subject: [PATCH 16/22] Fix morphdom leaving ghost elements behind
      
      ---
       css/main.css              | 4 ++--
       modules/html_generator.py | 4 ++--
       2 files changed, 4 insertions(+), 4 deletions(-)
      
      diff --git a/css/main.css b/css/main.css
      index 63681979..1a7efe70 100644
      --- a/css/main.css
      +++ b/css/main.css
      @@ -1249,12 +1249,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
           transition: opacity 0.2s;
       }
       
      -.footer-button#copy-button {
      +.footer-button.footer-copy-button {
           bottom: -23px;
           left: 0;
       }
       
      -.footer-button#refresh-button {
      +.footer-button.footer-refresh-button {
           bottom: -23px;
           left: 25px;
       }
      diff --git a/modules/html_generator.py b/modules/html_generator.py
      index 3ddad51d..245c833c 100644
      --- a/modules/html_generator.py
      +++ b/modules/html_generator.py
      @@ -297,8 +297,8 @@ def get_image_cache(path):
       
       copy_svg = ''''''
       refresh_svg = ''''''
      -copy_button = f''
      -refresh_button = f''
      +copy_button = f''
      +refresh_button = f''
       
       def generate_instruct_html(history):
           output = f'
      ' From c85e5e58d08a18e86e94106740d482250b4c0594 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 13 Jan 2025 06:20:42 -0800 Subject: [PATCH 17/22] UI: move the new morphdom code to a .js file --- js/global_scope_js.js | 24 ++++++++++++++++++++++++ modules/ui_chat.py | 27 +-------------------------- 2 files changed, 25 insertions(+), 26 deletions(-) diff --git a/js/global_scope_js.js b/js/global_scope_js.js index 983d60f1..f4d9c673 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -21,3 +21,27 @@ function copyToClipboard(element) { function regenerateClick() { document.getElementById("Regenerate").click(); } + +function handleMorphdomUpdate(text) { + console.log("Morphing!"); + morphdom( + document.getElementById("chat").parentNode, + "
      " + text + "
      ", + { + onBeforeElUpdated: function(fromEl, toEl) { + if (fromEl.tagName === "PRE" && fromEl.querySelector("code[data-highlighted]")) { + const fromCode = fromEl.querySelector("code"); + const toCode = toEl.querySelector("code"); + + if (fromCode && toCode && fromCode.textContent === toCode.textContent) { + // If the content is the same, preserve the entire
       element
      +            toEl.className = fromEl.className;
      +            toEl.innerHTML = fromEl.innerHTML;
      +            return false; // Skip updating the 
       element
      +          }
      +        }
      +        return !fromEl.isEqualNode(toEl); // Update only if nodes differ
      +      }
      +    }
      +  );
      +}
      diff --git a/modules/ui_chat.py b/modules/ui_chat.py
      index 8497f7df..e80fa33b 100644
      --- a/modules/ui_chat.py
      +++ b/modules/ui_chat.py
      @@ -181,32 +181,7 @@ def create_event_handlers():
           shared.reload_inputs = gradio(reload_arr)
       
           # Morph HTML updates instead of updating everything
      -    shared.gradio['display'].change(None, gradio('display'), None,
      -      js="""
      -      (text) => {
      -        morphdom(
      -          document.getElementById('chat').parentNode,
      -          '
      ' + text + '
      ', - { - onBeforeElUpdated: function(fromEl, toEl) { - if (fromEl.tagName === 'PRE' && fromEl.querySelector('code[data-highlighted]')) { - const fromCode = fromEl.querySelector('code'); - const toCode = toEl.querySelector('code'); - - if (fromCode && toCode && fromCode.textContent === toCode.textContent) { - // If the content is the same, preserve the entire
       element
      -                  toEl.className = fromEl.className;
      -                  toEl.innerHTML = fromEl.innerHTML;
      -                  return false; // Skip updating the 
       element
      -                }
      -              }
      -              return !fromEl.isEqualNode(toEl); // Update only if nodes differ
      -            }
      -          }
      -        );
      -      }
      -      """
      -    );
      +    shared.gradio['display'].change(None, gradio('display'), None, js="(text) => handleMorphdomUpdate(text)")
       
           shared.gradio['Generate'].click(
               ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
      
      From 53b838d6c5637406533371cef3b440549f43f4de Mon Sep 17 00:00:00 2001
      From: Underscore <47636331+Th-Underscore@users.noreply.github.com>
      Date: Mon, 13 Jan 2025 16:01:50 -0500
      Subject: [PATCH 18/22] HTML: Fix quote pair RegEx matching for all quote types
       (#6661)
      
      ---
       modules/html_generator.py | 11 +++++++++--
       1 file changed, 9 insertions(+), 2 deletions(-)
      
      diff --git a/modules/html_generator.py b/modules/html_generator.py
      index 245c833c..c14a28b4 100644
      --- a/modules/html_generator.py
      +++ b/modules/html_generator.py
      @@ -73,7 +73,6 @@ def fix_newlines(string):
       
       
       def replace_quotes(text):
      -
           # Define a list of quote pairs (opening and closing), using HTML entities
           quote_pairs = [
               ('"', '"'),  # Double quotes
      @@ -84,14 +83,22 @@ def replace_quotes(text):
               ('‘', '’'),  # Alternative single quotes
               ('“', '”'),  # Unicode quotes (numeric entities)
               ('“', '”'),  # Unicode quotes (hex entities)
      +        ('\u201C', '\u201D'),  # Unicode quotes (literal chars)
           ]
       
           # Create a regex pattern that matches any of the quote pairs, including newlines
           pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs)
       
           # Replace matched patterns with  tags, keeping original quotes
      -    replaced_text = re.sub(pattern, lambda m: f'{m.group(1)}{m.group(2)}{m.group(3)}', text, flags=re.DOTALL)
      +    def replacer(m):
      +        # Find the first non-None group set
      +        for i in range(1, len(m.groups()), 3):  # Step through each sub-pattern's groups
      +            if m.group(i):  # If this sub-pattern matched
      +                return f'{m.group(i)}{m.group(i + 1)}{m.group(i + 2)}'
       
      +        return m.group(0)  # Fallback (shouldn't happen)
      +
      +    replaced_text = re.sub(pattern, replacer, text, flags=re.DOTALL)
           return replaced_text
       
       
      
      From c832953ff723aa5dafcb3aac4f19acd0df56bb5b Mon Sep 17 00:00:00 2001
      From: oobabooga <112222186+oobabooga@users.noreply.github.com>
      Date: Tue, 14 Jan 2025 05:59:55 -0800
      Subject: [PATCH 19/22] UI: Activate auto_max_new_tokens by default
      
      ---
       modules/shared.py      | 2 +-
       settings-template.yaml | 2 +-
       2 files changed, 2 insertions(+), 2 deletions(-)
      
      diff --git a/modules/shared.py b/modules/shared.py
      index 93cd2272..f1e12673 100644
      --- a/modules/shared.py
      +++ b/modules/shared.py
      @@ -47,7 +47,7 @@ settings = {
           'prompt_lookup_num_tokens': 0,
           'max_tokens_second': 0,
           'max_updates_second': 0,
      -    'auto_max_new_tokens': False,
      +    'auto_max_new_tokens': True,
           'ban_eos_token': False,
           'add_bos_token': True,
           'skip_special_tokens': True,
      diff --git a/settings-template.yaml b/settings-template.yaml
      index b61dc4e0..93a64abb 100644
      --- a/settings-template.yaml
      +++ b/settings-template.yaml
      @@ -19,7 +19,7 @@ max_new_tokens_max: 4096
       prompt_lookup_num_tokens: 0
       max_tokens_second: 0
       max_updates_second: 0
      -auto_max_new_tokens: false
      +auto_max_new_tokens: true
       ban_eos_token: false
       add_bos_token: true
       skip_special_tokens: true
      
      From f843cb475bd3b880838a6a3a6ff200a2e290b115 Mon Sep 17 00:00:00 2001
      From: oobabooga <112222186+oobabooga@users.noreply.github.com>
      Date: Tue, 14 Jan 2025 08:12:51 -0800
      Subject: [PATCH 20/22] UI: update a help message
      
      ---
       modules/ui_chat.py | 2 +-
       1 file changed, 1 insertion(+), 1 deletion(-)
      
      diff --git a/modules/ui_chat.py b/modules/ui_chat.py
      index e80fa33b..395300d0 100644
      --- a/modules/ui_chat.py
      +++ b/modules/ui_chat.py
      @@ -164,7 +164,7 @@ def create_chat_settings_ui():
               with gr.Row():
                   with gr.Column():
                       shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])
      -                shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='Change this according to the model/LoRA that you are using. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
      +                shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
                       with gr.Row():
                           shared.gradio['send_instruction_to_default'] = gr.Button('Send to default', elem_classes=['small-button'])
                           shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])
      
      From 1ef748fb203730aae92b8f28f44abb68699accb4 Mon Sep 17 00:00:00 2001
      From: oobabooga <112222186+oobabooga@users.noreply.github.com>
      Date: Tue, 14 Jan 2025 16:44:15 -0800
      Subject: [PATCH 21/22] Lint
      
      ---
       extensions/gallery/script.py | 1 -
       modules/html_generator.py    | 1 +
       2 files changed, 1 insertion(+), 1 deletion(-)
      
      diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py
      index 54f9c745..76be4a58 100644
      --- a/extensions/gallery/script.py
      +++ b/extensions/gallery/script.py
      @@ -5,7 +5,6 @@ import gradio as gr
       from modules.html_generator import get_image_cache
       from modules.shared import gradio
       
      -
       params = {
           'items_per_page': 50,
           'open': False,
      diff --git a/modules/html_generator.py b/modules/html_generator.py
      index c14a28b4..29973412 100644
      --- a/modules/html_generator.py
      +++ b/modules/html_generator.py
      @@ -307,6 +307,7 @@ refresh_svg = '''{copy_svg}'
       refresh_button = f''
       
      +
       def generate_instruct_html(history):
           output = f'
      ' From 5d257397678e03694ded7eca2a9639d04368039b Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 14 Jan 2025 16:59:36 -0800 Subject: [PATCH 22/22] Make the update wizards nice --- update_wizard_linux.sh | 2 +- update_wizard_macos.sh | 2 +- update_wizard_windows.bat | 2 +- 3 files changed, 3 insertions(+), 3 deletions(-) diff --git a/update_wizard_linux.sh b/update_wizard_linux.sh index 3ada9a1e..c81d9d9b 100755 --- a/update_wizard_linux.sh +++ b/update_wizard_linux.sh @@ -23,4 +23,4 @@ source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains a conda activate "$INSTALL_ENV_DIR" # update installer env -python one_click.py --update-wizard && echo -e "\nDone!" +python one_click.py --update-wizard && echo -e "\nHave a great day!" diff --git a/update_wizard_macos.sh b/update_wizard_macos.sh index c5add61e..f58bb9e9 100755 --- a/update_wizard_macos.sh +++ b/update_wizard_macos.sh @@ -23,4 +23,4 @@ source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains a conda activate "$INSTALL_ENV_DIR" # update installer env -python one_click.py --update-wizard && echo -e "\nDone!" +python one_click.py --update-wizard && echo -e "\nHave a great day!" diff --git a/update_wizard_windows.bat b/update_wizard_windows.bat index 2b23f322..fac251a7 100755 --- a/update_wizard_windows.bat +++ b/update_wizard_windows.bat @@ -30,7 +30,7 @@ call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( @rem update installer env call python one_click.py --update-wizard && ( echo. - echo Done! + echo Have a great day! ) :end