From 15bfe36619d5c8b9b2be32e14f71bd47802a2223 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 9 Jan 2025 15:58:14 -0800
Subject: [PATCH 01/22] Installer: update miniconda to 24.11.1 (experimental)

---
 start_linux.sh    | 2 +-
 start_macos.sh    | 2 +-
 start_windows.bat | 4 ++--
 wsl.sh            | 2 +-
 4 files changed, 5 insertions(+), 5 deletions(-)

diff --git a/start_linux.sh b/start_linux.sh
index 792daca8..256604cb 100755
--- a/start_linux.sh
+++ b/start_linux.sh
@@ -19,7 +19,7 @@ esac
 INSTALL_DIR="$(pwd)/installer_files"
 CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
 INSTALL_ENV_DIR="$(pwd)/installer_files/env"
-MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Linux-${OS_ARCH}.sh"
+MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Linux-${OS_ARCH}.sh"
 conda_exists="F"
 
 # figure out whether git and conda needs to be installed
diff --git a/start_macos.sh b/start_macos.sh
index 6761f531..02f1011a 100755
--- a/start_macos.sh
+++ b/start_macos.sh
@@ -19,7 +19,7 @@ esac
 INSTALL_DIR="$(pwd)/installer_files"
 CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
 INSTALL_ENV_DIR="$(pwd)/installer_files/env"
-MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-MacOSX-${OS_ARCH}.sh"
+MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-MacOSX-${OS_ARCH}.sh"
 conda_exists="F"
 
 # figure out whether git and conda needs to be installed
diff --git a/start_windows.bat b/start_windows.bat
index ebcc1997..c167cdc5 100755
--- a/start_windows.bat
+++ b/start_windows.bat
@@ -25,8 +25,8 @@ set TEMP=%cd%\installer_files
 set INSTALL_DIR=%cd%\installer_files
 set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
 set INSTALL_ENV_DIR=%cd%\installer_files\env
-set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Windows-x86_64.exe
-set MINICONDA_CHECKSUM=307194e1f12bbeb52b083634e89cc67db4f7980bd542254b43d3309eaf7cb358
+set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Windows-x86_64.exe
+set MINICONDA_CHECKSUM=43dcbcc315ff91edf959e002cd2f1ede38c64b999fefcc951bccf2ed69c9e8bb
 set conda_exists=F
 
 @rem figure out whether git and conda needs to be installed
diff --git a/wsl.sh b/wsl.sh
index 7b17132f..c5d28b16 100755
--- a/wsl.sh
+++ b/wsl.sh
@@ -26,7 +26,7 @@ fi
 INSTALL_DIR="$INSTALL_DIR_PREFIX/text-generation-webui"
 CONDA_ROOT_PREFIX="$INSTALL_DIR/installer_files/conda"
 INSTALL_ENV_DIR="$INSTALL_DIR/installer_files/env"
-MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Linux-x86_64.sh"
+MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Linux-x86_64.sh"
 conda_exists="F"
 
 # environment isolation

From da6d868f58e519ec90796eef137cef26e67d4fd8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 9 Jan 2025 16:11:46 -0800
Subject: [PATCH 02/22] Remove old deprecated flags (~6 months or more)

---
 modules/shared.py | 21 ++++++++++++++-------
 1 file changed, 14 insertions(+), 7 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index a0070b1f..f478df05 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -203,11 +203,6 @@ group.add_argument('--multimodal-pipeline', type=str, default=None, help='The mu
 
 # Deprecated parameters
 group = parser.add_argument_group('Deprecated')
-group.add_argument('--model_type', type=str, help='DEPRECATED')
-group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED')
-group.add_argument('--checkpoint', type=str, help='DEPRECATED')
-group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED')
-group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED')
 group.add_argument('--cache_4bit', action='store_true', help='DEPRECATED')
 group.add_argument('--cache_8bit', action='store_true', help='DEPRECATED')
 group.add_argument('--chat-buttons', action='store_true', help='DEPRECATED')
@@ -228,14 +223,26 @@ for arg in sys.argv[1:]:
     if hasattr(args, arg):
         provided_arguments.append(arg)
 
-deprecated_args = []
+deprecated_args = [
+    'cache_4bit',
+    'cache_8bit',
+    'chat_buttons',
+    'triton',
+    'no_inject_fused_mlp',
+    'no_use_cuda_fp16',
+    'desc_act',
+    'disable_exllama',
+    'disable_exllamav2',
+    'wbits',
+    'groupsize'
+]
 
 
 def do_cmd_flags_warnings():
 
     # Deprecation warnings
     for k in deprecated_args:
-        if getattr(args, k):
+        if k in provided_arguments:
             logger.warning(f'The --{k} flag has been deprecated and will be removed soon. Please remove that flag.')
 
     # Security warnings

From 7fe46764fb2d675c4e281592a1328293c0c56b07 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 10 Jan 2025 07:07:41 -0800
Subject: [PATCH 03/22] Improve the --help message about --tensorcores as well

---
 modules/shared.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/shared.py b/modules/shared.py
index f478df05..89263205 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -116,7 +116,7 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for
 # llama.cpp
 group = parser.add_argument_group('llama.cpp')
 group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.')
-group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.')
+group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled without GGML_CUDA_FORCE_MMQ. This may improve performance on newer cards.')
 group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.')
 group.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
 group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')

From 17aa97248fa3aa60d46a1a355ec8d2f5705bad38 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 10 Jan 2025 07:22:25 -0800
Subject: [PATCH 04/22] Installer: make the hashsum verification more robust on
 Windows

---
 start_windows.bat | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/start_windows.bat b/start_windows.bat
index c167cdc5..960cfdb7 100755
--- a/start_windows.bat
+++ b/start_windows.bat
@@ -41,10 +41,18 @@ if "%conda_exists%" == "F" (
 	mkdir "%INSTALL_DIR%"
 	call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
 
+	:: Try CertUtil first
 	for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniconda_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINICONDA_CHECKSUM%"') do (
 		set "output=%%a"
 	)
 
+	:: If CertUtil fails, try PowerShell
+	if not defined output (
+		for /f %%a in ('powershell -Command "if((Get-FileHash \"%INSTALL_DIR%\miniconda_installer.exe\" -Algorithm SHA256).Hash -eq ''%MINICONDA_CHECKSUM%''){echo true}"') do (
+			set "output=%%a"
+		)
+	)
+
 	if not defined output (
 		echo The checksum verification for miniconda_installer.exe has failed.
 		del "%INSTALL_DIR%\miniconda_installer.exe"

From 83c426e96b6c2cd4349d38c4cd212cbb0afd2044 Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Fri, 10 Jan 2025 18:04:32 -0300
Subject: [PATCH 05/22] Organize internals (#6646)

---
 extensions/openai/typing.py |  48 +++---
 modules/loaders.py          | 306 ++++++++++++++++++------------------
 modules/presets.py          |  36 ++---
 modules/shared.py           |  40 ++---
 modules/text_generation.py  |  64 ++++++--
 modules/ui.py               | 162 +++++++++----------
 6 files changed, 346 insertions(+), 310 deletions(-)

diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py
index dfac8e03..5f0e0128 100644
--- a/extensions/openai/typing.py
+++ b/extensions/openai/typing.py
@@ -7,48 +7,48 @@ from pydantic import BaseModel, Field
 
 class GenerationOptions(BaseModel):
     preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.")
-    min_p: float = 0
-    dynamic_temperature: bool = False
     dynatemp_low: float = 1
     dynatemp_high: float = 1
     dynatemp_exponent: float = 1
     smoothing_factor: float = 0
     smoothing_curve: float = 1
+    min_p: float = 0
     top_k: int = 0
-    repetition_penalty: float = 1
-    repetition_penalty_range: int = 1024
     typical_p: float = 1
-    tfs: float = 1
-    top_a: float = 0
+    xtc_threshold: float = 0.1
+    xtc_probability: float = 0
     epsilon_cutoff: float = 0
     eta_cutoff: float = 0
-    guidance_scale: float = 1
-    negative_prompt: str = ''
+    tfs: float = 1
+    top_a: float = 0
+    dry_multiplier: float = 0
+    dry_allowed_length: int = 2
+    dry_base: float = 1.75
+    repetition_penalty: float = 1
+    encoder_repetition_penalty: float = 1
+    no_repeat_ngram_size: int = 0
+    repetition_penalty_range: int = 1024
     penalty_alpha: float = 0
+    guidance_scale: float = 1
     mirostat_mode: int = 0
     mirostat_tau: float = 5
     mirostat_eta: float = 0.1
-    temperature_last: bool = False
-    do_sample: bool = True
-    seed: int = -1
-    encoder_repetition_penalty: float = 1
-    no_repeat_ngram_size: int = 0
-    dry_multiplier: float = 0
-    dry_base: float = 1.75
-    dry_allowed_length: int = 2
-    dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"'
-    xtc_threshold: float = 0.1
-    xtc_probability: float = 0
-    truncation_length: int = 0
-    max_tokens_second: int = 0
     prompt_lookup_num_tokens: int = 0
-    static_cache: bool = False
-    custom_token_bans: str = ""
-    sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].")
+    max_tokens_second: int = 0
+    do_sample: bool = True
+    dynamic_temperature: bool = False
+    temperature_last: bool = False
     auto_max_new_tokens: bool = False
     ban_eos_token: bool = False
     add_bos_token: bool = True
     skip_special_tokens: bool = True
+    static_cache: bool = False
+    truncation_length: int = 0
+    seed: int = -1
+    sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].")
+    custom_token_bans: str = ""
+    negative_prompt: str = ''
+    dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"'
     grammar_string: str = ""
 
 
diff --git a/modules/loaders.py b/modules/loaders.py
index 4e331dbb..cd864e40 100644
--- a/modules/loaders.py
+++ b/modules/loaders.py
@@ -7,102 +7,103 @@ from modules import shared
 
 loaders_and_params = OrderedDict({
     'Transformers': [
-        'cpu_memory',
         'gpu_memory',
-        'load_in_4bit',
-        'load_in_8bit',
-        'torch_compile',
-        'bf16',
-        'cpu',
-        'disk',
-        'auto_devices',
-        'use_double_quant',
-        'quant_type',
-        'compute_dtype',
-        'trust_remote_code',
-        'no_use_fast',
-        'use_flash_attention_2',
-        'use_eager_attention',
+        'cpu_memory',
         'alpha_value',
         'compress_pos_emb',
-    ],
-    'llama.cpp': [
-        'n_ctx',
-        'n_gpu_layers',
-        'cache_type',
-        'tensor_split',
-        'n_batch',
-        'threads',
-        'threads_batch',
-        'no_mmap',
-        'mlock',
-        'no_mul_mat_q',
-        'rope_freq_base',
-        'compress_pos_emb',
+        'compute_dtype',
+        'quant_type',
+        'load_in_8bit',
+        'load_in_4bit',
+        'torch_compile',
+        'use_flash_attention_2',
+        'auto_devices',
         'cpu',
-        'numa',
-        'no_offload_kqv',
-        'row_split',
-        'tensorcores',
-        'flash_attn',
-        'streaming_llm',
-        'attention_sink_size',
-    ],
-    'llamacpp_HF': [
-        'n_ctx',
-        'n_gpu_layers',
-        'cache_type',
-        'tensor_split',
-        'n_batch',
-        'threads',
-        'threads_batch',
-        'no_mmap',
-        'mlock',
-        'no_mul_mat_q',
-        'rope_freq_base',
-        'compress_pos_emb',
-        'cpu',
-        'numa',
-        'cfg_cache',
+        'disk',
+        'use_double_quant',
+        'use_eager_attention',
+        'bf16',
+
         'trust_remote_code',
         'no_use_fast',
-        'logits_all',
-        'no_offload_kqv',
-        'row_split',
+    ],
+    'llama.cpp': [
+        'n_gpu_layers',
+        'threads',
+        'threads_batch',
+        'n_batch',
+        'n_ctx',
+        'cache_type',
+        'tensor_split',
+        'rope_freq_base',
+        'compress_pos_emb',
+        'attention_sink_size',
         'tensorcores',
         'flash_attn',
         'streaming_llm',
+        'cpu',
+        'row_split',
+        'no_offload_kqv',
+        'no_mul_mat_q',
+        'no_mmap',
+        'mlock',
+        'numa',
+    ],
+    'llamacpp_HF': [
+        'n_gpu_layers',
+        'threads',
+        'threads_batch',
+        'n_batch',
+        'n_ctx',
+        'cache_type',
+        'tensor_split',
+        'rope_freq_base',
+        'compress_pos_emb',
         'attention_sink_size',
+        'tensorcores',
+        'flash_attn',
+        'streaming_llm',
+        'cpu',
+        'row_split',
+        'no_offload_kqv',
+        'no_mul_mat_q',
+        'no_mmap',
+        'mlock',
+        'numa',
+        'cfg_cache',
+        'logits_all',
+        'trust_remote_code',
+        'no_use_fast',
         'llamacpp_HF_info',
     ],
     'ExLlamav2_HF': [
-        'gpu_split',
         'max_seq_len',
-        'cfg_cache',
+        'cache_type',
+        'gpu_split',
+        'alpha_value',
+        'compress_pos_emb',
+        'num_experts_per_token',
+        'autosplit',
+        'enable_tp',
         'no_flash_attn',
         'no_xformers',
         'no_sdpa',
-        'num_experts_per_token',
-        'cache_type',
-        'autosplit',
-        'enable_tp',
-        'alpha_value',
-        'compress_pos_emb',
+        'cfg_cache',
         'trust_remote_code',
         'no_use_fast',
     ],
     'ExLlamav2': [
-        'gpu_split',
         'max_seq_len',
+        'cache_type',
+        'gpu_split',
+        'alpha_value',
+        'compress_pos_emb',
+        'num_experts_per_token',
+        'autosplit',
+        'enable_tp',
         'no_flash_attn',
         'no_xformers',
         'no_sdpa',
-        'num_experts_per_token',
-        'cache_type',
-        'autosplit',
-        'enable_tp',
-        'alpha_value',
-        'compress_pos_emb',
         'exllamav2_info',
     ],
     'HQQ': [
@@ -121,51 +122,51 @@ loaders_and_params = OrderedDict({
 def transformers_samplers():
     return {
         'temperature',
-        'temperature_last',
-        'dynamic_temperature',
         'dynatemp_low',
         'dynatemp_high',
         'dynatemp_exponent',
         'smoothing_factor',
         'smoothing_curve',
-        'top_p',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
         'epsilon_cutoff',
         'eta_cutoff',
         'tfs',
         'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
-        'repetition_penalty_range',
+        'presence_penalty',
         'encoder_repetition_penalty',
         'no_repeat_ngram_size',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
-        'do_sample',
+        'repetition_penalty_range',
         'penalty_alpha',
+        'guidance_scale',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
-        'guidance_scale',
-        'negative_prompt',
+        'prompt_lookup_num_tokens',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
         'ban_eos_token',
-        'custom_token_bans',
-        'sampler_priority',
         'add_bos_token',
         'skip_special_tokens',
-        'auto_max_new_tokens',
-        'prompt_lookup_num_tokens',
         'static_cache',
+        'seed',
+        'sampler_priority',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
+        'grammar_file_row',
     }
 
 
@@ -174,155 +175,156 @@ loaders_samplers = {
     'HQQ': transformers_samplers(),
     'ExLlamav2': {
         'temperature',
-        'temperature_last',
-        'smoothing_factor',
         'dynatemp_low',
         'dynatemp_high',
         'dynatemp_exponent',
-        'top_p',
+        'smoothing_factor',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
         'tfs',
         'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
+        'presence_penalty',
         'repetition_penalty_range',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
         'ban_eos_token',
         'add_bos_token',
-        'custom_token_bans',
         'skip_special_tokens',
-        'auto_max_new_tokens',
+        'seed',
+        'custom_token_bans',
+        'dry_sequence_breakers',
     },
     'ExLlamav2_HF': {
         'temperature',
-        'temperature_last',
-        'dynamic_temperature',
         'dynatemp_low',
         'dynatemp_high',
         'dynatemp_exponent',
         'smoothing_factor',
         'smoothing_curve',
-        'top_p',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
         'epsilon_cutoff',
         'eta_cutoff',
         'tfs',
         'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
-        'repetition_penalty_range',
+        'presence_penalty',
         'encoder_repetition_penalty',
         'no_repeat_ngram_size',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
-        'do_sample',
+        'repetition_penalty_range',
+        'guidance_scale',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
-        'guidance_scale',
-        'negative_prompt',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
         'ban_eos_token',
-        'custom_token_bans',
-        'sampler_priority',
         'add_bos_token',
         'skip_special_tokens',
-        'auto_max_new_tokens',
+        'seed',
+        'sampler_priority',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
+        'grammar_file_row',
     },
     'llama.cpp': {
         'temperature',
-        'top_p',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
         'tfs',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
-        'seed',
+        'presence_penalty',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
         'ban_eos_token',
+        'seed',
         'custom_token_bans',
+        'grammar_string',
+        'grammar_file_row',
     },
     'llamacpp_HF': {
         'temperature',
-        'temperature_last',
-        'dynamic_temperature',
         'dynatemp_low',
         'dynatemp_high',
         'dynatemp_exponent',
         'smoothing_factor',
         'smoothing_curve',
-        'top_p',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
         'epsilon_cutoff',
         'eta_cutoff',
         'tfs',
         'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
-        'repetition_penalty_range',
+        'presence_penalty',
         'encoder_repetition_penalty',
         'no_repeat_ngram_size',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
-        'do_sample',
+        'repetition_penalty_range',
+        'guidance_scale',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
-        'guidance_scale',
-        'negative_prompt',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
         'ban_eos_token',
-        'custom_token_bans',
-        'sampler_priority',
         'add_bos_token',
         'skip_special_tokens',
-        'auto_max_new_tokens',
+        'seed',
+        'sampler_priority',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
+        'grammar_file_row',
     },
     'TensorRT-LLM': {
         'temperature',
         'top_p',
         'top_k',
         'repetition_penalty',
-        'presence_penalty',
         'frequency_penalty',
-        'ban_eos_token',
+        'presence_penalty',
         'auto_max_new_tokens',
+        'ban_eos_token',
     }
 }
 
diff --git a/modules/presets.py b/modules/presets.py
index c8118fb3..b841af53 100644
--- a/modules/presets.py
+++ b/modules/presets.py
@@ -13,40 +13,40 @@ from modules.logging_colors import logger
 def default_preset():
     return {
         'temperature': 1,
-        'temperature_last': False,
-        'dynamic_temperature': False,
         'dynatemp_low': 1,
         'dynatemp_high': 1,
         'dynatemp_exponent': 1,
         'smoothing_factor': 0,
         'smoothing_curve': 1,
-        'top_p': 1,
         'min_p': 0,
+        'top_p': 1,
         'top_k': 0,
-        'repetition_penalty': 1,
-        'presence_penalty': 0,
-        'frequency_penalty': 0,
-        'repetition_penalty_range': 1024,
         'typical_p': 1,
-        'tfs': 1,
-        'top_a': 0,
+        'xtc_threshold': 0.1,
+        'xtc_probability': 0,
         'epsilon_cutoff': 0,
         'eta_cutoff': 0,
-        'guidance_scale': 1,
+        'tfs': 1,
+        'top_a': 0,
+        'dry_multiplier': 0,
+        'dry_allowed_length': 2,
+        'dry_base': 1.75,
+        'repetition_penalty': 1,
+        'frequency_penalty': 0,
+        'presence_penalty': 0,
+        'encoder_repetition_penalty': 1,
+        'no_repeat_ngram_size': 0,
+        'repetition_penalty_range': 1024,
         'penalty_alpha': 0,
+        'guidance_scale': 1,
         'mirostat_mode': 0,
         'mirostat_tau': 5,
         'mirostat_eta': 0.1,
         'do_sample': True,
-        'encoder_repetition_penalty': 1,
-        'no_repeat_ngram_size': 0,
-        'dry_multiplier': 0,
-        'dry_base': 1.75,
-        'dry_allowed_length': 2,
+        'dynamic_temperature': False,
+        'temperature_last': False,
+        'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram',
         'dry_sequence_breakers': '"\\n", ":", "\\"", "*"',
-        'xtc_threshold': 0.1,
-        'xtc_probability': 0,
-        'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram'
     }
 
 
diff --git a/modules/shared.py b/modules/shared.py
index 89263205..928747f7 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -29,39 +29,39 @@ need_restart = False
 
 # UI defaults
 settings = {
-    'dark_theme': True,
     'show_controls': True,
     'start_with': '',
     'mode': 'chat-instruct',
     'chat_style': 'cai-chat',
+    'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
     'prompt-default': 'QA',
     'prompt-notebook': 'QA',
-    'preset': 'min_p',
-    'max_new_tokens': 512,
-    'max_new_tokens_min': 1,
-    'max_new_tokens_max': 4096,
-    'negative_prompt': '',
-    'seed': -1,
-    'truncation_length': 2048,
-    'max_tokens_second': 0,
-    'max_updates_second': 0,
-    'prompt_lookup_num_tokens': 0,
-    'static_cache': False,
-    'custom_stopping_strings': '',
-    'custom_token_bans': '',
-    'auto_max_new_tokens': False,
-    'ban_eos_token': False,
-    'add_bos_token': True,
-    'skip_special_tokens': True,
-    'stream': True,
     'character': 'Assistant',
     'name1': 'You',
     'user_bio': '',
     'custom_system_message': '',
     'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '' + message['content'] + '\\n\\n' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n        {%- else -%}\n            {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'### Response:\\n'-}}\n{%- endif -%}",
     'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
-    'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+    'preset': 'min_p',
+    'max_new_tokens': 512,
+    'max_new_tokens_min': 1,
+    'max_new_tokens_max': 4096,
+    'prompt_lookup_num_tokens': 0,
+    'max_tokens_second': 0,
+    'max_updates_second': 0,
+    'auto_max_new_tokens': False,
+    'ban_eos_token': False,
+    'add_bos_token': True,
+    'skip_special_tokens': True,
+    'stream': True,
+    'static_cache': False,
+    'truncation_length': 2048,
+    'seed': -1,
+    'custom_stopping_strings': '',
+    'custom_token_bans': '',
+    'negative_prompt': '',
     'autoload_model': False,
+    'dark_theme': True,
     'default_extensions': [],
 }
 
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 3e9788b8..152b2b8d 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -287,31 +287,62 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
         clear_torch_cache()
 
     generate_params = {}
-    for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'dry_multiplier', 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers', 'xtc_threshold', 'xtc_probability']:
+    for k in [
+        'temperature',
+        'dynatemp_low',
+        'dynatemp_high',
+        'dynatemp_exponent',
+        'smoothing_factor',
+        'smoothing_curve',
+        'min_p',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
+        'tfs',
+        'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
+        'repetition_penalty',
+        'frequency_penalty',
+        'presence_penalty',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'repetition_penalty_range',
+        'penalty_alpha',
+        'guidance_scale',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'max_new_tokens',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'dry_sequence_breakers',
+    ]:
         if k in state:
             generate_params[k] = state[k]
 
-    if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0:
-        generate_params['sampler_priority'] = state['sampler_priority']
-    elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '':
-        generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()]
-
-    if state['negative_prompt'] != '':
-        generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])
-
-    if state['prompt_lookup_num_tokens'] > 0:
-        generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens']
-
-    if state['static_cache']:
-        generate_params['cache_implementation'] = 'static'
-
     for k in ['epsilon_cutoff', 'eta_cutoff']:
         if state[k] > 0:
             generate_params[k] = state[k] * 1e-4
 
+    if state['prompt_lookup_num_tokens'] > 0:
+        generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens']
+
     if state['ban_eos_token']:
         generate_params['suppress_tokens'] = [shared.tokenizer.eos_token_id]
 
+    if state['static_cache']:
+        generate_params['cache_implementation'] = 'static'
+
+    if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0:
+        generate_params['sampler_priority'] = state['sampler_priority']
+    elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '':
+        generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()]
+
     if state['custom_token_bans']:
         to_ban = [int(x) for x in state['custom_token_bans'].split(',')]
         if len(to_ban) > 0:
@@ -320,6 +351,9 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
             else:
                 generate_params['suppress_tokens'] = to_ban
 
+    if state['negative_prompt'] != '':
+        generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])
+
     generate_params.update({'use_cache': not shared.args.no_cache})
     if shared.args.deepspeed:
         generate_params.update({'synced_gpus': True})
diff --git a/modules/ui.py b/modules/ui.py
index e66de434..4f7ee785 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -102,55 +102,55 @@ else:
 
 def list_model_elements():
     elements = [
-        'loader',
         'filter_by_loader',
+        'loader',
         'cpu_memory',
-        'auto_devices',
-        'disk',
-        'cpu',
-        'bf16',
-        'load_in_4bit',
-        'load_in_8bit',
-        'torch_compile',
-        'trust_remote_code',
-        'no_use_fast',
-        'use_flash_attention_2',
-        'use_eager_attention',
-        'compute_dtype',
-        'quant_type',
-        'use_double_quant',
-        'cfg_cache',
-        'no_flash_attn',
-        'no_xformers',
-        'no_sdpa',
-        'num_experts_per_token',
-        'cache_type',
-        'autosplit',
-        'enable_tp',
+        'n_gpu_layers',
         'threads',
         'threads_batch',
         'n_batch',
-        'no_mmap',
-        'mlock',
-        'no_mul_mat_q',
-        'n_gpu_layers',
-        'tensor_split',
+        'hqq_backend',
         'n_ctx',
-        'gpu_split',
         'max_seq_len',
-        'compress_pos_emb',
+        'cache_type',
+        'tensor_split',
+        'gpu_split',
         'alpha_value',
         'rope_freq_base',
-        'numa',
-        'logits_all',
-        'no_offload_kqv',
-        'row_split',
-        'tensorcores',
-        'flash_attn',
-        'streaming_llm',
+        'compress_pos_emb',
+        'compute_dtype',
+        'quant_type',
         'attention_sink_size',
-        'hqq_backend',
+        'num_experts_per_token',
+        'tensorcores',
+        'load_in_8bit',
+        'load_in_4bit',
+        'torch_compile',
+        'flash_attn',
+        'use_flash_attention_2',
+        'streaming_llm',
+        'auto_devices',
+        'cpu',
+        'disk',
+        'row_split',
+        'no_offload_kqv',
+        'no_mul_mat_q',
+        'no_mmap',
+        'mlock',
+        'numa',
+        'use_double_quant',
+        'use_eager_attention',
+        'bf16',
+        'autosplit',
+        'enable_tp',
+        'no_flash_attn',
+        'no_xformers',
+        'no_sdpa',
+        'cfg_cache',
         'cpp_runner',
+        'logits_all',
+        'trust_remote_code',
+        'no_use_fast',
     ]
 
     if is_torch_xpu_available():
@@ -165,87 +165,87 @@ def list_model_elements():
 
 def list_interface_input_elements():
     elements = [
-        'max_new_tokens',
-        'auto_max_new_tokens',
-        'max_tokens_second',
-        'max_updates_second',
-        'prompt_lookup_num_tokens',
-        'seed',
         'temperature',
-        'temperature_last',
-        'dynamic_temperature',
         'dynatemp_low',
         'dynatemp_high',
         'dynatemp_exponent',
         'smoothing_factor',
         'smoothing_curve',
-        'top_p',
         'min_p',
+        'top_p',
         'top_k',
         'typical_p',
-        'epsilon_cutoff',
-        'eta_cutoff',
-        'repetition_penalty',
-        'presence_penalty',
-        'frequency_penalty',
-        'repetition_penalty_range',
-        'encoder_repetition_penalty',
-        'no_repeat_ngram_size',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
         'xtc_threshold',
         'xtc_probability',
-        'do_sample',
+        'epsilon_cutoff',
+        'eta_cutoff',
+        'tfs',
+        'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
+        'repetition_penalty',
+        'frequency_penalty',
+        'presence_penalty',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'repetition_penalty_range',
         'penalty_alpha',
+        'guidance_scale',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
-        'grammar_string',
-        'negative_prompt',
-        'guidance_scale',
-        'add_bos_token',
+        'max_new_tokens',
+        'prompt_lookup_num_tokens',
+        'max_tokens_second',
+        'max_updates_second',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
         'ban_eos_token',
-        'custom_token_bans',
-        'sampler_priority',
-        'truncation_length',
-        'custom_stopping_strings',
+        'add_bos_token',
         'skip_special_tokens',
         'stream',
         'static_cache',
-        'tfs',
-        'top_a',
+        'truncation_length',
+        'seed',
+        'sampler_priority',
+        'custom_stopping_strings',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
     ]
 
     # Chat elements
     elements += [
-        'textbox',
-        'start_with',
-        'character_menu',
         'history',
         'search_chat',
         'unique_id',
+        'textbox',
+        'start_with',
+        'mode',
+        'chat_style',
+        'chat-instruct_command',
+        'character_menu',
+        'name2',
+        'context',
+        'greeting',
         'name1',
         'user_bio',
-        'name2',
-        'greeting',
-        'context',
-        'mode',
         'custom_system_message',
         'instruction_template_str',
         'chat_template_str',
-        'chat_style',
-        'chat-instruct_command',
     ]
 
     # Notebook/default elements
     elements += [
-        'textbox-notebook',
         'textbox-default',
-        'output_textbox',
+        'textbox-notebook',
         'prompt_menu-default',
         'prompt_menu-notebook',
+        'output_textbox',
     ]
 
     # Model elements

From c393f7650d558d8ee1311adb5f66cc505e73fb78 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 10 Jan 2025 13:22:18 -0800
Subject: [PATCH 06/22] Update settings-template.yaml, organize
 modules/shared.py

---
 modules/shared.py      |  4 ++--
 settings-template.yaml | 34 +++++++++++++++++-----------------
 2 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 928747f7..4d873cb9 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -40,8 +40,6 @@ settings = {
     'name1': 'You',
     'user_bio': '',
     'custom_system_message': '',
-    'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '' + message['content'] + '\\n\\n' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n        {%- else -%}\n            {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'### Response:\\n'-}}\n{%- endif -%}",
-    'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
     'preset': 'min_p',
     'max_new_tokens': 512,
     'max_new_tokens_min': 1,
@@ -63,6 +61,8 @@ settings = {
     'autoload_model': False,
     'dark_theme': True,
     'default_extensions': [],
+    'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '' + message['content'] + '\\n\\n' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n        {%- else -%}\n            {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'### Response:\\n'-}}\n{%- endif -%}",
+    'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
 }
 
 default_settings = copy.deepcopy(settings)
diff --git a/settings-template.yaml b/settings-template.yaml
index d5ed47c3..b61dc4e0 100644
--- a/settings-template.yaml
+++ b/settings-template.yaml
@@ -1,31 +1,38 @@
-dark_theme: true
 show_controls: true
 start_with: ''
 mode: chat-instruct
 chat_style: cai-chat
+chat-instruct_command: |-
+  Continue the chat dialogue below. Write a single reply for the character "<|character|>".
+
+  <|prompt|>
 prompt-default: QA
 prompt-notebook: QA
+character: Assistant
+name1: You
+user_bio: ''
+custom_system_message: ''
 preset: min_p
 max_new_tokens: 512
 max_new_tokens_min: 1
 max_new_tokens_max: 4096
-negative_prompt: ''
-seed: -1
-truncation_length: 2048
+prompt_lookup_num_tokens: 0
 max_tokens_second: 0
 max_updates_second: 0
-prompt_lookup_num_tokens: 0
-custom_stopping_strings: ''
-custom_token_bans: ''
 auto_max_new_tokens: false
 ban_eos_token: false
 add_bos_token: true
 skip_special_tokens: true
 stream: true
 static_cache: false
-character: Assistant
-name1: You
-custom_system_message: ''
+truncation_length: 2048
+seed: -1
+custom_stopping_strings: ''
+custom_token_bans: ''
+negative_prompt: ''
+autoload_model: false
+dark_theme: true
+default_extensions: []
 instruction_template_str: |-
   {%- set ns = namespace(found=false) -%}
   {%- for message in messages -%}
@@ -67,11 +74,4 @@ chat_template_str: |-
           {%- endif -%}
       {%- endif -%}
   {%- endfor -%}
-chat-instruct_command: |-
-  Continue the chat dialogue below. Write a single reply for the character "<|character|>".
 
-  <|prompt|>
-autoload_model: false
-gallery-items_per_page: 50
-gallery-open: false
-default_extensions: []

From d2f6c0f65ff72c96999a51655b096f42d037fe32 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 10 Jan 2025 13:25:40 -0800
Subject: [PATCH 07/22] Update README

---
 README.md         | 30 +++++++++++-------------------
 modules/shared.py |  2 +-
 2 files changed, 12 insertions(+), 20 deletions(-)

diff --git a/README.md b/README.md
index 07138772..7e2dec95 100644
--- a/README.md
+++ b/README.md
@@ -204,17 +204,16 @@ List of command-line flags
 usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]
                  [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices] [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]]
                  [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast]
-                 [--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--tensorcores]
-                 [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS]
+                 [--use_flash_attention_2] [--use_eager_attention] [--torch-compile] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn]
+                 [--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS]
                  [--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] [--attention-sink-size ATTENTION_SINK_SIZE]
                  [--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa]
-                 [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--enable_tp] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] [--disable_exllamav2]
-                 [--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR]
+                 [--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--enable_tp] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR]
                  [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT]
                  [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE]
-                 [--subpath SUBPATH] [--old-colors] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui]
-                 [--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] [--no_inject_fused_attention]
-                 [--cache_4bit] [--cache_8bit] [--chat-buttons]
+                 [--subpath SUBPATH] [--old-colors] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--api-enable-ipv6]
+                 [--api-disable-ipv4] [--nowebui] [--multimodal-pipeline MULTIMODAL_PIPELINE] [--cache_4bit] [--cache_8bit] [--chat-buttons] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16]
+                 [--desc_act] [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE]
 
 Text generation web UI
 
@@ -237,7 +236,7 @@ Basic settings:
 
 Model loader:
   --loader LOADER                                Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2,
-                                                 AutoGPTQ.
+                                                 HQQ, TensorRT-LLM.
 
 Transformers/Accelerate:
   --cpu                                          Use the CPU to generate text. Warning: Training on CPU is extremely slow.
@@ -255,6 +254,7 @@ Transformers/Accelerate:
   --no_use_fast                                  Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast.
   --use_flash_attention_2                        Set use_flash_attention_2=True while loading the model.
   --use_eager_attention                          Set attn_implementation= eager while loading the model.
+  --torch-compile                                Compile the model with torch.compile for improved performance.
 
 bitsandbytes 4-bit:
   --load-in-4bit                                 Load the model with 4-bit precision (using bitsandbytes).
@@ -264,7 +264,7 @@ bitsandbytes 4-bit:
 
 llama.cpp:
   --flash-attn                                   Use flash-attention.
-  --tensorcores                                  NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.
+  --tensorcores                                  NVIDIA only: use llama-cpp-python compiled without GGML_CUDA_FORCE_MMQ. This may improve performance on newer cards.
   --n_ctx N_CTX                                  Size of the prompt context.
   --threads THREADS                              Number of threads to use.
   --threads-batch THREADS_BATCH                  Number of threads to use for batches/prompt processing.
@@ -294,16 +294,6 @@ ExLlamaV2:
   --num_experts_per_token NUM_EXPERTS_PER_TOKEN  Number of experts to use for generation. Applies to MoE models like Mixtral.
   --enable_tp                                    Enable Tensor Parallelism (TP) in ExLlamaV2.
 
-AutoGPTQ:
-  --triton                                       Use triton.
-  --no_inject_fused_mlp                          Triton mode only: disable the use of fused MLP, which will use less VRAM at the cost of slower inference.
-  --no_use_cuda_fp16                             This can make models faster on some systems.
-  --desc_act                                     For models that do not have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig.
-  --disable_exllama                              Disable ExLlama kernel, which can improve inference speed on some systems.
-  --disable_exllamav2                            Disable ExLlamav2 kernel.
-  --wbits WBITS                                  Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.
-  --groupsize GROUPSIZE                          Group size.
-
 HQQ:
   --hqq-backend HQQ_BACKEND                      Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.
 
@@ -343,6 +333,8 @@ API:
   --api-port API_PORT                            The listening port for the API.
   --api-key API_KEY                              API authentication key.
   --admin-key ADMIN_KEY                          API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key.
+  --api-enable-ipv6                              Enable IPv6 for the API
+  --api-disable-ipv4                             Disable IPv4 for the API
   --nowebui                                      Do not launch the Gradio UI. Useful for launching the API in standalone mode.
 
 Multimodal:
diff --git a/modules/shared.py b/modules/shared.py
index 4d873cb9..93cd2272 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -86,7 +86,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft
 
 # Model loader
 group = parser.add_argument_group('Model loader')
-group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2.')
+group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, HQQ, TensorRT-LLM.')
 
 # Transformers/Accelerate
 group = parser.add_argument_group('Transformers/Accelerate')

From 02db4b0d06e9573de9e399b49006f882b996571b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 10 Jan 2025 15:05:08 -0800
Subject: [PATCH 08/22] Bump transformers to 4.48

---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 6539161c..c7ced3df 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.47.*
+transformers==4.48.*
 tqdm
 wandb
 
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 2e5f2da7..87ee93d1 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.47.*
+transformers==4.48.*
 tqdm
 wandb
 
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index b1eb7d31..fa2f5ca7 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.47.*
+transformers==4.48.*
 tqdm
 wandb
 
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 6a9bf7f7..e9838295 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.47.*
+transformers==4.48.*
 tqdm
 wandb
 
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index d8928d58..bef02feb 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.47.*
+transformers==4.48.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 84658a11..32f1a50a 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.47.*
+transformers==4.48.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 5944d5a7..938848bf 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.47.*
+transformers==4.48.*
 tqdm
 wandb
 
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index fda4292d..e18cbe64 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.47.*
+transformers==4.48.*
 tqdm
 wandb
 
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 45003f0d..a034ee61 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.47.*
+transformers==4.48.*
 tqdm
 wandb
 

From 3a722a36c85f31f7d5d4529b8dfea3faec7b9c37 Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Sat, 11 Jan 2025 12:55:19 -0300
Subject: [PATCH 09/22] Use morphdom to make chat streaming 1902381098231%
 faster (#6653)

---
 js/main.js                      |  3 +--
 js/morphdom/morphdom-umd.min.js |  1 +
 modules/block_requests.py       |  1 +
 modules/ui_chat.py              | 26 +++++++++++++++++++++++---
 4 files changed, 26 insertions(+), 5 deletions(-)
 create mode 100644 js/morphdom/morphdom-umd.min.js

diff --git a/js/main.js b/js/main.js
index efb84238..ab2499d4 100644
--- a/js/main.js
+++ b/js/main.js
@@ -147,10 +147,9 @@ const observer = new MutationObserver(function(mutations) {
 
   doSyntaxHighlighting();
 
-  if(!isScrolled) {
+  if (!isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) {
     targetElement.scrollTop = targetElement.scrollHeight;
   }
-
 });
 
 // Configure the observer to watch for changes in the subtree and attributes
diff --git a/js/morphdom/morphdom-umd.min.js b/js/morphdom/morphdom-umd.min.js
new file mode 100644
index 00000000..6746f0e8
--- /dev/null
+++ b/js/morphdom/morphdom-umd.min.js
@@ -0,0 +1 @@
+(function(global,factory){typeof exports==="object"&&typeof module!=="undefined"?module.exports=factory():typeof define==="function"&&define.amd?define(factory):(global=global||self,global.morphdom=factory())})(this,function(){"use strict";var DOCUMENT_FRAGMENT_NODE=11;function morphAttrs(fromNode,toNode){var toNodeAttrs=toNode.attributes;var attr;var attrName;var attrNamespaceURI;var attrValue;var fromValue;if(toNode.nodeType===DOCUMENT_FRAGMENT_NODE||fromNode.nodeType===DOCUMENT_FRAGMENT_NODE){return}for(var i=toNodeAttrs.length-1;i>=0;i--){attr=toNodeAttrs[i];attrName=attr.name;attrNamespaceURI=attr.namespaceURI;attrValue=attr.value;if(attrNamespaceURI){attrName=attr.localName||attrName;fromValue=fromNode.getAttributeNS(attrNamespaceURI,attrName);if(fromValue!==attrValue){if(attr.prefix==="xmlns"){attrName=attr.name}fromNode.setAttributeNS(attrNamespaceURI,attrName,attrValue)}}else{fromValue=fromNode.getAttribute(attrName);if(fromValue!==attrValue){fromNode.setAttribute(attrName,attrValue)}}}var fromNodeAttrs=fromNode.attributes;for(var d=fromNodeAttrs.length-1;d>=0;d--){attr=fromNodeAttrs[d];attrName=attr.name;attrNamespaceURI=attr.namespaceURI;if(attrNamespaceURI){attrName=attr.localName||attrName;if(!toNode.hasAttributeNS(attrNamespaceURI,attrName)){fromNode.removeAttributeNS(attrNamespaceURI,attrName)}}else{if(!toNode.hasAttribute(attrName)){fromNode.removeAttribute(attrName)}}}}var range;var NS_XHTML="http://www.w3.org/1999/xhtml";var doc=typeof document==="undefined"?undefined:document;var HAS_TEMPLATE_SUPPORT=!!doc&&"content"in doc.createElement("template");var HAS_RANGE_SUPPORT=!!doc&&doc.createRange&&"createContextualFragment"in doc.createRange();function createFragmentFromTemplate(str){var template=doc.createElement("template");template.innerHTML=str;return template.content.childNodes[0]}function createFragmentFromRange(str){if(!range){range=doc.createRange();range.selectNode(doc.body)}var fragment=range.createContextualFragment(str);return fragment.childNodes[0]}function createFragmentFromWrap(str){var fragment=doc.createElement("body");fragment.innerHTML=str;return fragment.childNodes[0]}function toElement(str){str=str.trim();if(HAS_TEMPLATE_SUPPORT){return createFragmentFromTemplate(str)}else if(HAS_RANGE_SUPPORT){return createFragmentFromRange(str)}return createFragmentFromWrap(str)}function compareNodeNames(fromEl,toEl){var fromNodeName=fromEl.nodeName;var toNodeName=toEl.nodeName;var fromCodeStart,toCodeStart;if(fromNodeName===toNodeName){return true}fromCodeStart=fromNodeName.charCodeAt(0);toCodeStart=toNodeName.charCodeAt(0);if(fromCodeStart<=90&&toCodeStart>=97){return fromNodeName===toNodeName.toUpperCase()}else if(toCodeStart<=90&&fromCodeStart>=97){return toNodeName===fromNodeName.toUpperCase()}else{return false}}function createElementNS(name,namespaceURI){return!namespaceURI||namespaceURI===NS_XHTML?doc.createElement(name):doc.createElementNS(namespaceURI,name)}function moveChildren(fromEl,toEl){var curChild=fromEl.firstChild;while(curChild){var nextChild=curChild.nextSibling;toEl.appendChild(curChild);curChild=nextChild}return toEl}function syncBooleanAttrProp(fromEl,toEl,name){if(fromEl[name]!==toEl[name]){fromEl[name]=toEl[name];if(fromEl[name]){fromEl.setAttribute(name,"")}else{fromEl.removeAttribute(name)}}}var specialElHandlers={OPTION:function(fromEl,toEl){var parentNode=fromEl.parentNode;if(parentNode){var parentName=parentNode.nodeName.toUpperCase();if(parentName==="OPTGROUP"){parentNode=parentNode.parentNode;parentName=parentNode&&parentNode.nodeName.toUpperCase()}if(parentName==="SELECT"&&!parentNode.hasAttribute("multiple")){if(fromEl.hasAttribute("selected")&&!toEl.selected){fromEl.setAttribute("selected","selected");fromEl.removeAttribute("selected")}parentNode.selectedIndex=-1}}syncBooleanAttrProp(fromEl,toEl,"selected")},INPUT:function(fromEl,toEl){syncBooleanAttrProp(fromEl,toEl,"checked");syncBooleanAttrProp(fromEl,toEl,"disabled");if(fromEl.value!==toEl.value){fromEl.value=toEl.value}if(!toEl.hasAttribute("value")){fromEl.removeAttribute("value")}},TEXTAREA:function(fromEl,toEl){var newValue=toEl.value;if(fromEl.value!==newValue){fromEl.value=newValue}var firstChild=fromEl.firstChild;if(firstChild){var oldValue=firstChild.nodeValue;if(oldValue==newValue||!newValue&&oldValue==fromEl.placeholder){return}firstChild.nodeValue=newValue}},SELECT:function(fromEl,toEl){if(!toEl.hasAttribute("multiple")){var selectedIndex=-1;var i=0;var curChild=fromEl.firstChild;var optgroup;var nodeName;while(curChild){nodeName=curChild.nodeName&&curChild.nodeName.toUpperCase();if(nodeName==="OPTGROUP"){optgroup=curChild;curChild=optgroup.firstChild}else{if(nodeName==="OPTION"){if(curChild.hasAttribute("selected")){selectedIndex=i;break}i++}curChild=curChild.nextSibling;if(!curChild&&optgroup){curChild=optgroup.nextSibling;optgroup=null}}}fromEl.selectedIndex=selectedIndex}}};var ELEMENT_NODE=1;var DOCUMENT_FRAGMENT_NODE$1=11;var TEXT_NODE=3;var COMMENT_NODE=8;function noop(){}function defaultGetNodeKey(node){if(node){return node.getAttribute&&node.getAttribute("id")||node.id}}function morphdomFactory(morphAttrs){return function morphdom(fromNode,toNode,options){if(!options){options={}}if(typeof toNode==="string"){if(fromNode.nodeName==="#document"||fromNode.nodeName==="HTML"||fromNode.nodeName==="BODY"){var toNodeHtml=toNode;toNode=doc.createElement("html");toNode.innerHTML=toNodeHtml}else{toNode=toElement(toNode)}}else if(toNode.nodeType===DOCUMENT_FRAGMENT_NODE$1){toNode=toNode.firstElementChild}var getNodeKey=options.getNodeKey||defaultGetNodeKey;var onBeforeNodeAdded=options.onBeforeNodeAdded||noop;var onNodeAdded=options.onNodeAdded||noop;var onBeforeElUpdated=options.onBeforeElUpdated||noop;var onElUpdated=options.onElUpdated||noop;var onBeforeNodeDiscarded=options.onBeforeNodeDiscarded||noop;var onNodeDiscarded=options.onNodeDiscarded||noop;var onBeforeElChildrenUpdated=options.onBeforeElChildrenUpdated||noop;var skipFromChildren=options.skipFromChildren||noop;var addChild=options.addChild||function(parent,child){return parent.appendChild(child)};var childrenOnly=options.childrenOnly===true;var fromNodesLookup=Object.create(null);var keyedRemovalList=[];function addKeyedRemoval(key){keyedRemovalList.push(key)}function walkDiscardedChildNodes(node,skipKeyedNodes){if(node.nodeType===ELEMENT_NODE){var curChild=node.firstChild;while(curChild){var key=undefined;if(skipKeyedNodes&&(key=getNodeKey(curChild))){addKeyedRemoval(key)}else{onNodeDiscarded(curChild);if(curChild.firstChild){walkDiscardedChildNodes(curChild,skipKeyedNodes)}}curChild=curChild.nextSibling}}}function removeNode(node,parentNode,skipKeyedNodes){if(onBeforeNodeDiscarded(node)===false){return}if(parentNode){parentNode.removeChild(node)}onNodeDiscarded(node);walkDiscardedChildNodes(node,skipKeyedNodes)}function indexTree(node){if(node.nodeType===ELEMENT_NODE||node.nodeType===DOCUMENT_FRAGMENT_NODE$1){var curChild=node.firstChild;while(curChild){var key=getNodeKey(curChild);if(key){fromNodesLookup[key]=curChild}indexTree(curChild);curChild=curChild.nextSibling}}}indexTree(fromNode);function handleNodeAdded(el){onNodeAdded(el);var curChild=el.firstChild;while(curChild){var nextSibling=curChild.nextSibling;var key=getNodeKey(curChild);if(key){var unmatchedFromEl=fromNodesLookup[key];if(unmatchedFromEl&&compareNodeNames(curChild,unmatchedFromEl)){curChild.parentNode.replaceChild(unmatchedFromEl,curChild);morphEl(unmatchedFromEl,curChild)}else{handleNodeAdded(curChild)}}else{handleNodeAdded(curChild)}curChild=nextSibling}}function cleanupFromEl(fromEl,curFromNodeChild,curFromNodeKey){while(curFromNodeChild){var fromNextSibling=curFromNodeChild.nextSibling;if(curFromNodeKey=getNodeKey(curFromNodeChild)){addKeyedRemoval(curFromNodeKey)}else{removeNode(curFromNodeChild,fromEl,true)}curFromNodeChild=fromNextSibling}}function morphEl(fromEl,toEl,childrenOnly){var toElKey=getNodeKey(toEl);if(toElKey){delete fromNodesLookup[toElKey]}if(!childrenOnly){var beforeUpdateResult=onBeforeElUpdated(fromEl,toEl);if(beforeUpdateResult===false){return}else if(beforeUpdateResult instanceof HTMLElement){fromEl=beforeUpdateResult;indexTree(fromEl)}morphAttrs(fromEl,toEl);onElUpdated(fromEl);if(onBeforeElChildrenUpdated(fromEl,toEl)===false){return}}if(fromEl.nodeName!=="TEXTAREA"){morphChildren(fromEl,toEl)}else{specialElHandlers.TEXTAREA(fromEl,toEl)}}function morphChildren(fromEl,toEl){var skipFrom=skipFromChildren(fromEl,toEl);var curToNodeChild=toEl.firstChild;var curFromNodeChild=fromEl.firstChild;var curToNodeKey;var curFromNodeKey;var fromNextSibling;var toNextSibling;var matchingFromEl;outer:while(curToNodeChild){toNextSibling=curToNodeChild.nextSibling;curToNodeKey=getNodeKey(curToNodeChild);while(!skipFrom&&curFromNodeChild){fromNextSibling=curFromNodeChild.nextSibling;if(curToNodeChild.isSameNode&&curToNodeChild.isSameNode(curFromNodeChild)){curToNodeChild=toNextSibling;curFromNodeChild=fromNextSibling;continue outer}curFromNodeKey=getNodeKey(curFromNodeChild);var curFromNodeType=curFromNodeChild.nodeType;var isCompatible=undefined;if(curFromNodeType===curToNodeChild.nodeType){if(curFromNodeType===ELEMENT_NODE){if(curToNodeKey){if(curToNodeKey!==curFromNodeKey){if(matchingFromEl=fromNodesLookup[curToNodeKey]){if(fromNextSibling===matchingFromEl){isCompatible=false}else{fromEl.insertBefore(matchingFromEl,curFromNodeChild);if(curFromNodeKey){addKeyedRemoval(curFromNodeKey)}else{removeNode(curFromNodeChild,fromEl,true)}curFromNodeChild=matchingFromEl;curFromNodeKey=getNodeKey(curFromNodeChild)}}else{isCompatible=false}}}else if(curFromNodeKey){isCompatible=false}isCompatible=isCompatible!==false&&compareNodeNames(curFromNodeChild,curToNodeChild);if(isCompatible){morphEl(curFromNodeChild,curToNodeChild)}}else if(curFromNodeType===TEXT_NODE||curFromNodeType==COMMENT_NODE){isCompatible=true;if(curFromNodeChild.nodeValue!==curToNodeChild.nodeValue){curFromNodeChild.nodeValue=curToNodeChild.nodeValue}}}if(isCompatible){curToNodeChild=toNextSibling;curFromNodeChild=fromNextSibling;continue outer}if(curFromNodeKey){addKeyedRemoval(curFromNodeKey)}else{removeNode(curFromNodeChild,fromEl,true)}curFromNodeChild=fromNextSibling}if(curToNodeKey&&(matchingFromEl=fromNodesLookup[curToNodeKey])&&compareNodeNames(matchingFromEl,curToNodeChild)){if(!skipFrom){addChild(fromEl,matchingFromEl)}morphEl(matchingFromEl,curToNodeChild)}else{var onBeforeNodeAddedResult=onBeforeNodeAdded(curToNodeChild);if(onBeforeNodeAddedResult!==false){if(onBeforeNodeAddedResult){curToNodeChild=onBeforeNodeAddedResult}if(curToNodeChild.actualize){curToNodeChild=curToNodeChild.actualize(fromEl.ownerDocument||doc)}addChild(fromEl,curToNodeChild);handleNodeAdded(curToNodeChild)}}curToNodeChild=toNextSibling;curFromNodeChild=fromNextSibling}cleanupFromEl(fromEl,curFromNodeChild,curFromNodeKey);var specialElHandler=specialElHandlers[fromEl.nodeName];if(specialElHandler){specialElHandler(fromEl,toEl)}}var morphedNode=fromNode;var morphedNodeType=morphedNode.nodeType;var toNodeType=toNode.nodeType;if(!childrenOnly){if(morphedNodeType===ELEMENT_NODE){if(toNodeType===ELEMENT_NODE){if(!compareNodeNames(fromNode,toNode)){onNodeDiscarded(fromNode);morphedNode=moveChildren(fromNode,createElementNS(toNode.nodeName,toNode.namespaceURI))}}else{morphedNode=toNode}}else if(morphedNodeType===TEXT_NODE||morphedNodeType===COMMENT_NODE){if(toNodeType===morphedNodeType){if(morphedNode.nodeValue!==toNode.nodeValue){morphedNode.nodeValue=toNode.nodeValue}return morphedNode}else{morphedNode=toNode}}}if(morphedNode===toNode){onNodeDiscarded(fromNode)}else{if(toNode.isSameNode&&toNode.isSameNode(morphedNode)){return}morphEl(morphedNode,toNode,childrenOnly);if(keyedRemovalList){for(var i=0,len=keyedRemovalList.length;i<len;i++){var elToRemove=fromNodesLookup[keyedRemovalList[i]];if(elToRemove){removeNode(elToRemove,elToRemove.parentNode,false)}}}}if(!childrenOnly&&morphedNode!==fromNode&&fromNode.parentNode){if(morphedNode.actualize){morphedNode=morphedNode.actualize(fromNode.ownerDocument||doc)}fromNode.parentNode.replaceChild(morphedNode,fromNode)}return morphedNode}}var morphdom=morphdomFactory(morphAttrs);return morphdom});
\ No newline at end of file
diff --git a/modules/block_requests.py b/modules/block_requests.py
index 6adc385a..35f983cf 100644
--- a/modules/block_requests.py
+++ b/modules/block_requests.py
@@ -55,6 +55,7 @@ def my_open(*args, **kwargs):
             '\n    <script src="file/js/katex/auto-render.min.js"></script>'
             '\n    <script src="file/js/highlightjs/highlight.min.js"></script>'
             '\n    <script src="file/js/highlightjs/highlightjs-copy.min.js"></script>'
+            '\n    <script src="file/js/morphdom/morphdom-umd.min.js"></script>'
             f'\n    <link id="highlight-css" rel="stylesheet" href="file/css/highlightjs/{"github-dark" if shared.settings["dark_theme"] else "github"}.min.css">'
             '\n    <script>hljs.addPlugin(new CopyButtonPlugin());</script>'
             '\n  </head>'
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index b92dd9ae..61be17e3 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -20,7 +20,7 @@ def create_ui():
     shared.gradio['Chat input'] = gr.State()
     shared.gradio['history'] = gr.JSON(visible=False)
 
-    with gr.Tab('Chat', elem_id='chat-tab'):
+    with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
         with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
             with gr.Column():
                 with gr.Row(elem_id='past-chats-buttons'):
@@ -46,8 +46,8 @@ def create_ui():
 
         with gr.Row():
             with gr.Column(elem_id='chat-col'):
-                shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', ''))
-
+                shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', ''), visible=True)
+                shared.gradio['display'] = gr.Textbox(value="", visible=False)  # Hidden buffer
                 with gr.Row(elem_id="chat-input-row"):
                     with gr.Column(scale=1, elem_id='gr-hover-container'):
                         gr.HTML(value='<div class="hover-element" onclick="void(0)"><span style="width: 100px; display: block" id="hover-element-button">&#9776;</span><div class="hover-menu" id="hover-menu"></div>', elem_id='gr-hover')
@@ -180,6 +180,26 @@ def create_event_handlers():
     shared.input_params = gradio(inputs)
     shared.reload_inputs = gradio(reload_arr)
 
+    # Morph HTML updates instead of updating everything
+    shared.gradio['display'].change(None, gradio('display'), None,
+        js="""
+            (text) => {
+                morphdom(
+                    document.getElementById('chat').parentNode,
+                    '<div class="prose svelte-1ybaih5">' + text + '</div>',
+                    {
+                        onBeforeElUpdated: function(fromEl, toEl) {
+                            if (fromEl.isEqualNode(toEl)) {
+                                return false; // Skip identical nodes
+                            }
+                            return true; // Update only if nodes differ
+                        }
+                    }
+                );
+            }
+        """
+    )
+
     shared.gradio['Generate'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(

From 58342740a5b061c2836f46d93dd70832f894c6e9 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 11 Jan 2025 07:59:49 -0800
Subject: [PATCH 10/22] Bump flash-attn to 2.7.3

---
 requirements.txt        | 8 ++++----
 requirements_noavx2.txt | 8 ++++----
 2 files changed, 8 insertions(+), 8 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index c7ced3df..9bc5956a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -55,7 +55,7 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index e18cbe64..1755ac24 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -55,7 +55,7 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
+https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"

From a5d64b586da0ce39c36d01a59d991fbc76e16362 Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Sat, 11 Jan 2025 16:59:21 -0300
Subject: [PATCH 11/22] Add a "copy" button below each message (#6654)

---
 css/html_instruct_style.css |  2 ++
 css/main.css                | 53 +++++++++++++++++++++++++++-
 modules/block_requests.py   |  3 +-
 modules/html_generator.py   | 70 ++++++++++++++++++++++++-------------
 modules/ui.py               |  2 ++
 5 files changed, 104 insertions(+), 26 deletions(-)

diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
index dcc19c29..fcd0558f 100644
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@@ -48,12 +48,14 @@
 .chat .user-message {
     background: #f4f4f4;
     padding: 1.5rem 1rem;
+    padding-bottom: 2rem;
     border-radius: 0;
     border-bottom-right-radius: 0;
 }
 
 .chat .assistant-message {
     padding: 1.5rem 1rem;
+    padding-bottom: 2rem;
     border-radius: 0;
     border: 0;
 }
diff --git a/css/main.css b/css/main.css
index 9d99a876..48c6727a 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1142,7 +1142,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
 }
 
 .dark svg {
-    fill: white;
     color: white;
 }
 
@@ -1221,3 +1220,55 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
         background: var(--light-theme-gray);
     }
 }
+
+/* ----------------------------------------------
+  Copy button for chat messages
+---------------------------------------------- */
+.message .text,
+.message .text-you,
+.message .text-bot,
+.user-message .text,
+.assistant-message .text {
+    position: relative;
+}
+
+.message, .user-message, .assistant-message {
+    position: relative;
+}
+
+.copy-button {
+    position: absolute;
+    bottom: -23px;
+    left: 0;
+    padding: 0;
+    border: none;
+    border-radius: 3px;
+    cursor: pointer;
+    opacity: 0;
+    display: flex;
+    align-items: center;
+    transition: opacity 0.2s;
+}
+
+.message:hover .copy-button,
+.user-message:hover .copy-button,
+.assistant-message:hover .copy-button {
+    opacity: 1;
+}
+
+.copy-button svg {
+    stroke: rgb(156 163 175);
+    transition: stroke 0.2s;
+}
+
+.copy-button:hover svg {
+    stroke: rgb(107 114 128);
+}
+
+.dark .copy-button svg {
+    stroke: rgb(156 163 175);
+}
+
+.dark .copy-button:hover svg {
+    stroke: rgb(209 213 219);
+}
diff --git a/modules/block_requests.py b/modules/block_requests.py
index 35f983cf..29fc6633 100644
--- a/modules/block_requests.py
+++ b/modules/block_requests.py
@@ -3,7 +3,7 @@ import io
 
 import requests
 
-from modules import shared
+from modules import shared, ui
 from modules.logging_colors import logger
 
 original_open = open
@@ -58,6 +58,7 @@ def my_open(*args, **kwargs):
             '\n    <script src="file/js/morphdom/morphdom-umd.min.js"></script>'
             f'\n    <link id="highlight-css" rel="stylesheet" href="file/css/highlightjs/{"github-dark" if shared.settings["dark_theme"] else "github"}.min.css">'
             '\n    <script>hljs.addPlugin(new CopyButtonPlugin());</script>'
+            f'\n    <script>{ui.global_scope_js}</script>'
             '\n  </head>'
         )
 
diff --git a/modules/html_generator.py b/modules/html_generator.py
index e3550ed5..b565c63a 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -292,24 +292,34 @@ def get_image_cache(path):
     return image_cache[path][1]
 
 
+copy_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-copy"><path d="M8 8m0 2a2 2 0 0 1 2 -2h8a2 2 0 0 1 2 2v8a2 2 0 0 1 -2 2h-8a2 2 0 0 1 -2 -2z"></path><path d="M16 8v-2a2 2 0 0 0 -2 -2h-8a2 2 0 0 0 -2 2v8a2 2 0 0 0 2 2h2"></path></svg>'''
+copy_button = f'<button class="copy-button" onclick="copyToClipboard(this)">{copy_svg}</button>'
+
 def generate_instruct_html(history):
     output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">'
-    for i, _row in enumerate(history):
-        row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
 
-        if row[0]:  # Don't display empty user messages
+    for i in range(len(history['visible'])):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+        converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+        if converted_visible[0]:  # Don't display empty user messages
             output += (
-                f'<div class="user-message">'
+                f'<div class="user-message" '
+                f'data-raw="{html.escape(row_internal[0], quote=True)}">'
                 f'<div class="text">'
-                f'<div class="message-body">{row[0]}</div>'
+                f'<div class="message-body">{converted_visible[0]}</div>'
+                f'{copy_button}'
                 f'</div>'
                 f'</div>'
             )
 
         output += (
-            f'<div class="assistant-message">'
+            f'<div class="assistant-message" '
+            f'data-raw="{html.escape(row_internal[1], quote=True)}">'
             f'<div class="text">'
-            f'<div class="message-body">{row[1]}</div>'
+            f'<div class="message-body">{converted_visible[1]}</div>'
+            f'{copy_button}'
             f'</div>'
             f'</div>'
         )
@@ -332,26 +342,32 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
         if Path("cache/pfp_me.png").exists() else ''
     )
 
-    for i, _row in enumerate(history):
-        row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
+    for i in range(len(history['visible'])):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+        converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
 
-        if row[0]:  # Don't display empty user messages
+        if converted_visible[0]:  # Don't display empty user messages
             output += (
-                f'<div class="message">'
+                f'<div class="message" '
+                f'data-raw="{html.escape(row_internal[0], quote=True)}">'
                 f'<div class="circle-you">{img_me}</div>'
                 f'<div class="text">'
                 f'<div class="username">{name1}</div>'
-                f'<div class="message-body">{row[0]}</div>'
+                f'<div class="message-body">{converted_visible[0]}</div>'
+                f'{copy_button}'
                 f'</div>'
                 f'</div>'
             )
 
         output += (
-            f'<div class="message">'
+            f'<div class="message" '
+            f'data-raw="{html.escape(row_internal[1], quote=True)}">'
             f'<div class="circle-bot">{img_bot}</div>'
             f'<div class="text">'
             f'<div class="username">{name2}</div>'
-            f'<div class="message-body">{row[1]}</div>'
+            f'<div class="message-body">{converted_visible[1]}</div>'
+            f'{copy_button}'
             f'</div>'
             f'</div>'
         )
@@ -363,22 +379,28 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
 def generate_chat_html(history, name1, name2, reset_cache=False):
     output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
 
-    for i, _row in enumerate(history):
-        row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
+    for i in range(len(history['visible'])):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+        converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
 
-        if row[0]:  # Don't display empty user messages
+        if converted_visible[0]:  # Don't display empty user messages
             output += (
-                f'<div class="message">'
+                f'<div class="message" '
+                f'data-raw="{html.escape(row_internal[0], quote=True)}">'
                 f'<div class="text-you">'
-                f'<div class="message-body">{row[0]}</div>'
+                f'<div class="message-body">{converted_visible[0]}</div>'
+                f'{copy_button}'
                 f'</div>'
                 f'</div>'
             )
 
         output += (
-            f'<div class="message">'
+            f'<div class="message" '
+            f'data-raw="{html.escape(row_internal[1], quote=True)}">'
             f'<div class="text-bot">'
-            f'<div class="message-body">{row[1]}</div>'
+            f'<div class="message-body">{converted_visible[1]}</div>'
+            f'{copy_button}'
             f'</div>'
             f'</div>'
         )
@@ -389,8 +411,8 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
 
 def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
     if mode == 'instruct':
-        return generate_instruct_html(history['visible'])
+        return generate_instruct_html(history)
     elif style == 'wpp':
-        return generate_chat_html(history['visible'], name1, name2)
+        return generate_chat_html(history, name1, name2)
     else:
-        return generate_cai_chat_html(history['visible'], name1, name2, style, character, reset_cache)
+        return generate_cai_chat_html(history, name1, name2, style, character, reset_cache)
diff --git a/modules/ui.py b/modules/ui.py
index 4f7ee785..df948a14 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -19,6 +19,8 @@ with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy
     css += f.read()
 with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f:
     js = f.read()
+with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r') as f:
+    global_scope_js = f.read()
 with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f:
     save_files_js = f.read()
 with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f:

From 1b9121e5b87625edbbc13d2dc0e42624173553ca Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 11 Jan 2025 12:41:41 -0800
Subject: [PATCH 12/22] Add a "refresh" button below the last message, add a
 missing file

---
 css/main.css              | 29 +++++++++++++++++++----------
 js/global_scope_js.js     | 23 +++++++++++++++++++++++
 modules/html_generator.py |  7 ++++++-
 3 files changed, 48 insertions(+), 11 deletions(-)
 create mode 100644 js/global_scope_js.js

diff --git a/css/main.css b/css/main.css
index 48c6727a..63681979 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1236,11 +1236,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     position: relative;
 }
 
-.copy-button {
+.footer-button {
     position: absolute;
-    bottom: -23px;
-    left: 0;
     padding: 0;
+    margin: 0;
     border: none;
     border-radius: 3px;
     cursor: pointer;
@@ -1250,25 +1249,35 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     transition: opacity 0.2s;
 }
 
-.message:hover .copy-button,
-.user-message:hover .copy-button,
-.assistant-message:hover .copy-button {
+.footer-button#copy-button {
+    bottom: -23px;
+    left: 0;
+}
+
+.footer-button#refresh-button {
+    bottom: -23px;
+    left: 25px;
+}
+
+.message:hover .footer-button,
+.user-message:hover .footer-button,
+.assistant-message:hover .footer-button {
     opacity: 1;
 }
 
-.copy-button svg {
+.footer-button svg {
     stroke: rgb(156 163 175);
     transition: stroke 0.2s;
 }
 
-.copy-button:hover svg {
+.footer-button:hover svg {
     stroke: rgb(107 114 128);
 }
 
-.dark .copy-button svg {
+.dark .footer-button svg {
     stroke: rgb(156 163 175);
 }
 
-.dark .copy-button:hover svg {
+.dark .footer-button:hover svg {
     stroke: rgb(209 213 219);
 }
diff --git a/js/global_scope_js.js b/js/global_scope_js.js
new file mode 100644
index 00000000..79b673d7
--- /dev/null
+++ b/js/global_scope_js.js
@@ -0,0 +1,23 @@
+function copyToClipboard(element) {
+  if (!element) return;
+
+  const messageElement = element.closest(".message, .user-message, .assistant-message");
+  if (!messageElement) return;
+
+  const rawText = messageElement.getAttribute("data-raw");
+  if (!rawText) return;
+
+  navigator.clipboard.writeText(rawText).then(function() {
+    const originalSvg = element.innerHTML;
+    element.innerHTML = "<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"20\" height=\"20\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"text-green-500 dark:text-green-400\"><path d=\"M5 12l5 5l10 -10\"></path></svg>";
+    setTimeout(() => {
+      element.innerHTML = originalSvg;
+    }, 1000);
+  }).catch(function(err) {
+    console.error("Failed to copy text: ", err);
+  });
+}
+
+function regenerateClick() {
+    document.getElementById("Regenerate").click();
+}
diff --git a/modules/html_generator.py b/modules/html_generator.py
index b565c63a..79a8dc64 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -293,7 +293,9 @@ def get_image_cache(path):
 
 
 copy_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-copy"><path d="M8 8m0 2a2 2 0 0 1 2 -2h8a2 2 0 0 1 2 2v8a2 2 0 0 1 -2 2h-8a2 2 0 0 1 -2 -2z"></path><path d="M16 8v-2a2 2 0 0 0 -2 -2h-8a2 2 0 0 0 -2 2v8a2 2 0 0 0 2 2h2"></path></svg>'''
-copy_button = f'<button class="copy-button" onclick="copyToClipboard(this)">{copy_svg}</button>'
+refresh_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-repeat"><path d="M4 12v-3a3 3 0 0 1 3 -3h13m-3 -3l3 3l-3 3"></path><path d="M20 12v3a3 3 0 0 1 -3 3h-13m3 3l-3 -3l3 -3"></path></svg>'''
+copy_button = f'<button class="footer-button" id="copy-button" onclick="copyToClipboard(this)">{copy_svg}</button>'
+refresh_button = f'<button class="footer-button" id="refresh-button" onclick="regenerateClick()">{refresh_svg}</button>'
 
 def generate_instruct_html(history):
     output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">'
@@ -320,6 +322,7 @@ def generate_instruct_html(history):
             f'<div class="text">'
             f'<div class="message-body">{converted_visible[1]}</div>'
             f'{copy_button}'
+            f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
             f'</div>'
             f'</div>'
         )
@@ -368,6 +371,7 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
             f'<div class="username">{name2}</div>'
             f'<div class="message-body">{converted_visible[1]}</div>'
             f'{copy_button}'
+            f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
             f'</div>'
             f'</div>'
         )
@@ -401,6 +405,7 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
             f'<div class="text-bot">'
             f'<div class="message-body">{converted_visible[1]}</div>'
             f'{copy_button}'
+            f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
             f'</div>'
             f'</div>'
         )

From f1797f4323b6eba98521d14c1cc011ce45f4db42 Mon Sep 17 00:00:00 2001
From: mamei16 <marcel.1710@live.de>
Date: Sat, 11 Jan 2025 22:39:44 +0100
Subject: [PATCH 13/22] Unescape backslashes in html_output (#6648)

---
 modules/html_generator.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/html_generator.py b/modules/html_generator.py
index 79a8dc64..3ddad51d 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -239,6 +239,9 @@ def convert_to_markdown(string):
     pattern = re.compile(r'<code[^>]*>(.*?)</code>', re.DOTALL)
     html_output = pattern.sub(lambda x: html.unescape(x.group()), html_output)
 
+    # Unescape backslashes
+    html_output = html_output.replace('\\\\', '\\')
+
     # Add "long-list" class to <ul> or <ol> containing a long <li> item
     html_output = add_long_list_class(html_output)
 

From a0492ce325b951a9c000fa3cad45806adc8d8926 Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Sat, 11 Jan 2025 21:14:10 -0300
Subject: [PATCH 14/22] Optimize syntax highlighting during chat streaming
 (#6655)

---
 js/global_scope_js.js |  2 +-
 js/main.js            | 55 +++++++++++++++----------------------------
 modules/ui_chat.py    | 40 ++++++++++++++++++-------------
 3 files changed, 44 insertions(+), 53 deletions(-)

diff --git a/js/global_scope_js.js b/js/global_scope_js.js
index 79b673d7..983d60f1 100644
--- a/js/global_scope_js.js
+++ b/js/global_scope_js.js
@@ -19,5 +19,5 @@ function copyToClipboard(element) {
 }
 
 function regenerateClick() {
-    document.getElementById("Regenerate").click();
+  document.getElementById("Regenerate").click();
 }
diff --git a/js/main.js b/js/main.js
index ab2499d4..c5c47d04 100644
--- a/js/main.js
+++ b/js/main.js
@@ -177,47 +177,30 @@ function isElementVisibleOnScreen(element) {
   );
 }
 
-function getVisibleMessagesIndexes() {
-  const elements = document.querySelectorAll(".message-body");
-  const visibleIndexes = [];
-
-  elements.forEach((element, index) => {
-    if (isElementVisibleOnScreen(element) && !element.hasAttribute("data-highlighted")) {
-      visibleIndexes.push(index);
-    }
-  });
-
-  return visibleIndexes;
-}
-
 function doSyntaxHighlighting() {
-  const indexes = getVisibleMessagesIndexes();
-  const elements = document.querySelectorAll(".message-body");
+  const messageBodies = document.querySelectorAll(".message-body");
 
-  if (indexes.length > 0) {
+  if (messageBodies.length > 0) {
     observer.disconnect();
 
-    indexes.forEach((index) => {
-      const element = elements[index];
+    messageBodies.forEach((messageBody) => {
+      if (isElementVisibleOnScreen(messageBody)) {
+        // Handle both code and math in a single pass through each message
+        const codeBlocks = messageBody.querySelectorAll("pre code:not([data-highlighted])");
+        codeBlocks.forEach((codeBlock) => {
+          hljs.highlightElement(codeBlock);
+          codeBlock.setAttribute("data-highlighted", "true");
+        });
 
-      // Tag this element to prevent it from being highlighted twice
-      element.setAttribute("data-highlighted", "true");
-
-      // Perform syntax highlighting
-      const codeBlocks = element.querySelectorAll("pre code");
-
-      codeBlocks.forEach((codeBlock) => {
-        hljs.highlightElement(codeBlock);
-      });
-
-      renderMathInElement(element, {
-        delimiters: [
-          { left: "$$", right: "$$", display: true },
-          { left: "$", right: "$", display: false },
-          { left: "\\(", right: "\\)", display: false },
-          { left: "\\[", right: "\\]", display: true },
-        ],
-      });
+        renderMathInElement(messageBody, {
+          delimiters: [
+            { left: "$$", right: "$$", display: true },
+            { left: "$", right: "$", display: false },
+            { left: "\\(", right: "\\)", display: false },
+            { left: "\\[", right: "\\]", display: true },
+          ],
+        });
+      }
     });
 
     observer.observe(targetElement, config);
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 61be17e3..8497f7df 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -182,23 +182,31 @@ def create_event_handlers():
 
     # Morph HTML updates instead of updating everything
     shared.gradio['display'].change(None, gradio('display'), None,
-        js="""
-            (text) => {
-                morphdom(
-                    document.getElementById('chat').parentNode,
-                    '<div class="prose svelte-1ybaih5">' + text + '</div>',
-                    {
-                        onBeforeElUpdated: function(fromEl, toEl) {
-                            if (fromEl.isEqualNode(toEl)) {
-                                return false; // Skip identical nodes
-                            }
-                            return true; // Update only if nodes differ
-                        }
-                    }
-                );
+      js="""
+      (text) => {
+        morphdom(
+          document.getElementById('chat').parentNode,
+          '<div class="prose svelte-1ybaih5">' + text + '</div>',
+          {
+            onBeforeElUpdated: function(fromEl, toEl) {
+              if (fromEl.tagName === 'PRE' && fromEl.querySelector('code[data-highlighted]')) {
+                const fromCode = fromEl.querySelector('code');
+                const toCode = toEl.querySelector('code');
+
+                if (fromCode && toCode && fromCode.textContent === toCode.textContent) {
+                  // If the <code> content is the same, preserve the entire <pre> element
+                  toEl.className = fromEl.className;
+                  toEl.innerHTML = fromEl.innerHTML;
+                  return false; // Skip updating the <pre> element
+                }
+              }
+              return !fromEl.isEqualNode(toEl); // Update only if nodes differ
             }
-        """
-    )
+          }
+        );
+      }
+      """
+    );
 
     shared.gradio['Generate'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(

From ed16374ecee6f547c49fe4af0630294add317399 Mon Sep 17 00:00:00 2001
From: Lounger <4087076+TheLounger@users.noreply.github.com>
Date: Sun, 12 Jan 2025 03:35:22 +0100
Subject: [PATCH 15/22] Fix the gallery extension (#6656)

---
 extensions/gallery/script.py | 5 +++--
 1 file changed, 3 insertions(+), 2 deletions(-)

diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py
index ff0242c8..54f9c745 100644
--- a/extensions/gallery/script.py
+++ b/extensions/gallery/script.py
@@ -93,10 +93,11 @@ def generate_html():
 
 def filter_cards(filter_str=''):
     if filter_str == '':
-        return cards
+        return gr.Dataset(samples=cards)
 
     filter_upper = filter_str.upper()
-    return [k for k in cards if filter_upper in k[1].upper()]
+    filtered = [k for k in cards if filter_upper in k[1].upper()]
+    return gr.Dataset(samples=filtered)
 
 
 def select_character(evt: gr.SelectData):

From facb4155d4a0d343b6f0cbae93f112456f20875b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 11 Jan 2025 20:57:28 -0800
Subject: [PATCH 16/22] Fix morphdom leaving ghost elements behind

---
 css/main.css              | 4 ++--
 modules/html_generator.py | 4 ++--
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/css/main.css b/css/main.css
index 63681979..1a7efe70 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1249,12 +1249,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     transition: opacity 0.2s;
 }
 
-.footer-button#copy-button {
+.footer-button.footer-copy-button {
     bottom: -23px;
     left: 0;
 }
 
-.footer-button#refresh-button {
+.footer-button.footer-refresh-button {
     bottom: -23px;
     left: 25px;
 }
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 3ddad51d..245c833c 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -297,8 +297,8 @@ def get_image_cache(path):
 
 copy_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-copy"><path d="M8 8m0 2a2 2 0 0 1 2 -2h8a2 2 0 0 1 2 2v8a2 2 0 0 1 -2 2h-8a2 2 0 0 1 -2 -2z"></path><path d="M16 8v-2a2 2 0 0 0 -2 -2h-8a2 2 0 0 0 -2 2v8a2 2 0 0 0 2 2h2"></path></svg>'''
 refresh_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-repeat"><path d="M4 12v-3a3 3 0 0 1 3 -3h13m-3 -3l3 3l-3 3"></path><path d="M20 12v3a3 3 0 0 1 -3 3h-13m3 3l-3 -3l3 -3"></path></svg>'''
-copy_button = f'<button class="footer-button" id="copy-button" onclick="copyToClipboard(this)">{copy_svg}</button>'
-refresh_button = f'<button class="footer-button" id="refresh-button" onclick="regenerateClick()">{refresh_svg}</button>'
+copy_button = f'<button class="footer-button footer-copy-button" onclick="copyToClipboard(this)">{copy_svg}</button>'
+refresh_button = f'<button class="footer-button footer-refresh-button" onclick="regenerateClick()">{refresh_svg}</button>'
 
 def generate_instruct_html(history):
     output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">'

From c85e5e58d08a18e86e94106740d482250b4c0594 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 13 Jan 2025 06:20:42 -0800
Subject: [PATCH 17/22] UI: move the new morphdom code to a .js file

---
 js/global_scope_js.js | 24 ++++++++++++++++++++++++
 modules/ui_chat.py    | 27 +--------------------------
 2 files changed, 25 insertions(+), 26 deletions(-)

diff --git a/js/global_scope_js.js b/js/global_scope_js.js
index 983d60f1..f4d9c673 100644
--- a/js/global_scope_js.js
+++ b/js/global_scope_js.js
@@ -21,3 +21,27 @@ function copyToClipboard(element) {
 function regenerateClick() {
   document.getElementById("Regenerate").click();
 }
+
+function handleMorphdomUpdate(text) {
+  console.log("Morphing!");
+  morphdom(
+    document.getElementById("chat").parentNode,
+    "<div class=\"prose svelte-1ybaih5\">" + text + "</div>",
+    {
+      onBeforeElUpdated: function(fromEl, toEl) {
+        if (fromEl.tagName === "PRE" && fromEl.querySelector("code[data-highlighted]")) {
+          const fromCode = fromEl.querySelector("code");
+          const toCode = toEl.querySelector("code");
+
+          if (fromCode && toCode && fromCode.textContent === toCode.textContent) {
+            // If the <code> content is the same, preserve the entire <pre> element
+            toEl.className = fromEl.className;
+            toEl.innerHTML = fromEl.innerHTML;
+            return false; // Skip updating the <pre> element
+          }
+        }
+        return !fromEl.isEqualNode(toEl); // Update only if nodes differ
+      }
+    }
+  );
+}
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 8497f7df..e80fa33b 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -181,32 +181,7 @@ def create_event_handlers():
     shared.reload_inputs = gradio(reload_arr)
 
     # Morph HTML updates instead of updating everything
-    shared.gradio['display'].change(None, gradio('display'), None,
-      js="""
-      (text) => {
-        morphdom(
-          document.getElementById('chat').parentNode,
-          '<div class="prose svelte-1ybaih5">' + text + '</div>',
-          {
-            onBeforeElUpdated: function(fromEl, toEl) {
-              if (fromEl.tagName === 'PRE' && fromEl.querySelector('code[data-highlighted]')) {
-                const fromCode = fromEl.querySelector('code');
-                const toCode = toEl.querySelector('code');
-
-                if (fromCode && toCode && fromCode.textContent === toCode.textContent) {
-                  // If the <code> content is the same, preserve the entire <pre> element
-                  toEl.className = fromEl.className;
-                  toEl.innerHTML = fromEl.innerHTML;
-                  return false; // Skip updating the <pre> element
-                }
-              }
-              return !fromEl.isEqualNode(toEl); // Update only if nodes differ
-            }
-          }
-        );
-      }
-      """
-    );
+    shared.gradio['display'].change(None, gradio('display'), None, js="(text) => handleMorphdomUpdate(text)")
 
     shared.gradio['Generate'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(

From 53b838d6c5637406533371cef3b440549f43f4de Mon Sep 17 00:00:00 2001
From: Underscore <47636331+Th-Underscore@users.noreply.github.com>
Date: Mon, 13 Jan 2025 16:01:50 -0500
Subject: [PATCH 18/22] HTML: Fix quote pair RegEx matching for all quote types
 (#6661)

---
 modules/html_generator.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/modules/html_generator.py b/modules/html_generator.py
index 245c833c..c14a28b4 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -73,7 +73,6 @@ def fix_newlines(string):
 
 
 def replace_quotes(text):
-
     # Define a list of quote pairs (opening and closing), using HTML entities
     quote_pairs = [
         ('&quot;', '&quot;'),  # Double quotes
@@ -84,14 +83,22 @@ def replace_quotes(text):
         ('&lsquo;', '&rsquo;'),  # Alternative single quotes
         ('&#8220;', '&#8221;'),  # Unicode quotes (numeric entities)
         ('&#x201C;', '&#x201D;'),  # Unicode quotes (hex entities)
+        ('\u201C', '\u201D'),  # Unicode quotes (literal chars)
     ]
 
     # Create a regex pattern that matches any of the quote pairs, including newlines
     pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs)
 
     # Replace matched patterns with <q> tags, keeping original quotes
-    replaced_text = re.sub(pattern, lambda m: f'<q>{m.group(1)}{m.group(2)}{m.group(3)}</q>', text, flags=re.DOTALL)
+    def replacer(m):
+        # Find the first non-None group set
+        for i in range(1, len(m.groups()), 3):  # Step through each sub-pattern's groups
+            if m.group(i):  # If this sub-pattern matched
+                return f'<q>{m.group(i)}{m.group(i + 1)}{m.group(i + 2)}</q>'
 
+        return m.group(0)  # Fallback (shouldn't happen)
+
+    replaced_text = re.sub(pattern, replacer, text, flags=re.DOTALL)
     return replaced_text
 
 

From c832953ff723aa5dafcb3aac4f19acd0df56bb5b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 14 Jan 2025 05:59:55 -0800
Subject: [PATCH 19/22] UI: Activate auto_max_new_tokens by default

---
 modules/shared.py      | 2 +-
 settings-template.yaml | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 93cd2272..f1e12673 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -47,7 +47,7 @@ settings = {
     'prompt_lookup_num_tokens': 0,
     'max_tokens_second': 0,
     'max_updates_second': 0,
-    'auto_max_new_tokens': False,
+    'auto_max_new_tokens': True,
     'ban_eos_token': False,
     'add_bos_token': True,
     'skip_special_tokens': True,
diff --git a/settings-template.yaml b/settings-template.yaml
index b61dc4e0..93a64abb 100644
--- a/settings-template.yaml
+++ b/settings-template.yaml
@@ -19,7 +19,7 @@ max_new_tokens_max: 4096
 prompt_lookup_num_tokens: 0
 max_tokens_second: 0
 max_updates_second: 0
-auto_max_new_tokens: false
+auto_max_new_tokens: true
 ban_eos_token: false
 add_bos_token: true
 skip_special_tokens: true

From f843cb475bd3b880838a6a3a6ff200a2e290b115 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 14 Jan 2025 08:12:51 -0800
Subject: [PATCH 20/22] UI: update a help message

---
 modules/ui_chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index e80fa33b..395300d0 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -164,7 +164,7 @@ def create_chat_settings_ui():
         with gr.Row():
             with gr.Column():
                 shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])
-                shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='Change this according to the model/LoRA that you are using. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
+                shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
                 with gr.Row():
                     shared.gradio['send_instruction_to_default'] = gr.Button('Send to default', elem_classes=['small-button'])
                     shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])

From 1ef748fb203730aae92b8f28f44abb68699accb4 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 14 Jan 2025 16:44:15 -0800
Subject: [PATCH 21/22] Lint

---
 extensions/gallery/script.py | 1 -
 modules/html_generator.py    | 1 +
 2 files changed, 1 insertion(+), 1 deletion(-)

diff --git a/extensions/gallery/script.py b/extensions/gallery/script.py
index 54f9c745..76be4a58 100644
--- a/extensions/gallery/script.py
+++ b/extensions/gallery/script.py
@@ -5,7 +5,6 @@ import gradio as gr
 from modules.html_generator import get_image_cache
 from modules.shared import gradio
 
-
 params = {
     'items_per_page': 50,
     'open': False,
diff --git a/modules/html_generator.py b/modules/html_generator.py
index c14a28b4..29973412 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -307,6 +307,7 @@ refresh_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20"
 copy_button = f'<button class="footer-button footer-copy-button" onclick="copyToClipboard(this)">{copy_svg}</button>'
 refresh_button = f'<button class="footer-button footer-refresh-button" onclick="regenerateClick()">{refresh_svg}</button>'
 
+
 def generate_instruct_html(history):
     output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">'
 

From 5d257397678e03694ded7eca2a9639d04368039b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 14 Jan 2025 16:59:36 -0800
Subject: [PATCH 22/22] Make the update wizards nice

---
 update_wizard_linux.sh    | 2 +-
 update_wizard_macos.sh    | 2 +-
 update_wizard_windows.bat | 2 +-
 3 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/update_wizard_linux.sh b/update_wizard_linux.sh
index 3ada9a1e..c81d9d9b 100755
--- a/update_wizard_linux.sh
+++ b/update_wizard_linux.sh
@@ -23,4 +23,4 @@ source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains a
 conda activate "$INSTALL_ENV_DIR"
 
 # update installer env
-python one_click.py --update-wizard && echo -e "\nDone!"
+python one_click.py --update-wizard && echo -e "\nHave a great day!"
diff --git a/update_wizard_macos.sh b/update_wizard_macos.sh
index c5add61e..f58bb9e9 100755
--- a/update_wizard_macos.sh
+++ b/update_wizard_macos.sh
@@ -23,4 +23,4 @@ source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains a
 conda activate "$INSTALL_ENV_DIR"
 
 # update installer env
-python one_click.py --update-wizard && echo -e "\nDone!"
+python one_click.py --update-wizard && echo -e "\nHave a great day!"
diff --git a/update_wizard_windows.bat b/update_wizard_windows.bat
index 2b23f322..fac251a7 100755
--- a/update_wizard_windows.bat
+++ b/update_wizard_windows.bat
@@ -30,7 +30,7 @@ call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || (
 @rem update installer env
 call python one_click.py --update-wizard && (
     echo.
-    echo Done!
+    echo Have a great day!
 )
 
 :end