From c55d3c61c6e44712e90fa60c1e434d7687e90947 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 31 May 2025 14:21:42 -0700
Subject: [PATCH 01/82] Bump exllamav2 to 0.3.1

---
 requirements/full/requirements.txt               | 6 +++---
 requirements/full/requirements_amd.txt           | 4 ++--
 requirements/full/requirements_amd_noavx2.txt    | 4 ++--
 requirements/full/requirements_apple_intel.txt   | 2 +-
 requirements/full/requirements_apple_silicon.txt | 2 +-
 requirements/full/requirements_noavx2.txt        | 6 +++---
 6 files changed, 12 insertions(+), 12 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 2c322715..dd631341 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -37,8 +37,8 @@ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
 https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index 6aeb325e..acdbd455 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -34,5 +34,5 @@ tiktoken
 # AMD wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 3b052423..a478d7d3 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -34,5 +34,5 @@ tiktoken
 # AMD wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 8c51459e..98ed90a2 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -35,4 +35,4 @@ tiktoken
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index b9f15d45..cb72d036 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -36,4 +36,4 @@ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index dfd42577..f6982134 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -37,8 +37,8 @@ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
 https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

From dc8ed6dbe769457b3a2758780abefab0ab04c8a4 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 31 May 2025 14:27:33 -0700
Subject: [PATCH 02/82] Bump exllamav3 to 0.0.3

---
 requirements/full/requirements.txt               | 4 ++--
 requirements/full/requirements_apple_intel.txt   | 2 +-
 requirements/full/requirements_apple_silicon.txt | 2 +-
 requirements/full/requirements_noavx2.txt        | 4 ++--
 4 files changed, 6 insertions(+), 6 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index dd631341..ec055876 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -35,8 +35,8 @@ tiktoken
 # CUDA wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 98ed90a2..96a48f32 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -34,5 +34,5 @@ tiktoken
 # Mac wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index cb72d036..14b74081 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -35,5 +35,5 @@ tiktoken
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index f6982134..de507308 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -35,8 +35,8 @@ tiktoken
 # CUDA wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"

From 1d88456659d8e71800f6fb732b8cad7d36fa4c20 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 31 May 2025 20:15:07 -0700
Subject: [PATCH 03/82] Add support for .docx attachments

---
 README.md                                     |  2 +-
 modules/chat.py                               | 50 +++++++++++++++++++
 requirements/full/requirements.txt            |  1 +
 requirements/full/requirements_amd.txt        |  1 +
 requirements/full/requirements_amd_noavx2.txt |  1 +
 .../full/requirements_apple_intel.txt         |  1 +
 .../full/requirements_apple_silicon.txt       |  1 +
 requirements/full/requirements_cpu_only.txt   |  1 +
 .../full/requirements_cpu_only_noavx2.txt     |  1 +
 requirements/full/requirements_noavx2.txt     |  1 +
 requirements/full/requirements_nowheels.txt   |  1 +
 requirements/portable/requirements.txt        |  1 +
 .../portable/requirements_apple_intel.txt     |  1 +
 .../portable/requirements_apple_silicon.txt   |  1 +
 .../portable/requirements_cpu_only.txt        |  1 +
 .../portable/requirements_cpu_only_noavx2.txt |  1 +
 requirements/portable/requirements_noavx2.txt |  1 +
 .../portable/requirements_nowheels.txt        |  1 +
 requirements/portable/requirements_vulkan.txt |  1 +
 .../portable/requirements_vulkan_noavx2.txt   |  1 +
 20 files changed, 69 insertions(+), 1 deletion(-)

diff --git a/README.md b/README.md
index 55df33d2..16b02539 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory.
 - 100% offline and private, with zero telemetry, external resources, or remote update requests.
 - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats.
-- **File attachments**: Upload text files and PDF documents to talk about their contents.
+- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
 - **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
 - Aesthetic UI with dark and light themes.
 - `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters.
diff --git a/modules/chat.py b/modules/chat.py
index 881f7330..ba61c7a9 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -500,6 +500,9 @@ def add_message_attachment(history, row_idx, file_path, is_user=True):
             # Process PDF file
             content = extract_pdf_text(path)
             file_type = "application/pdf"
+        elif file_extension == '.docx':
+            content = extract_docx_text(path)
+            file_type = "application/docx"
         else:
             # Default handling for text files
             with open(path, 'r', encoding='utf-8') as f:
@@ -538,6 +541,53 @@ def extract_pdf_text(pdf_path):
         return f"[Error extracting PDF text: {str(e)}]"
 
 
+def extract_docx_text(docx_path):
+    """
+    Extract text from a .docx file, including headers,
+    body (paragraphs and tables), and footers.
+    """
+    try:
+        import docx
+
+        doc = docx.Document(docx_path)
+        parts = []
+
+        # 1) Extract non-empty header paragraphs from each section
+        for section in doc.sections:
+            for para in section.header.paragraphs:
+                text = para.text.strip()
+                if text:
+                    parts.append(text)
+
+        # 2) Extract body blocks (paragraphs and tables) in document order
+        parent_elm = doc.element.body
+        for child in parent_elm.iterchildren():
+            if isinstance(child, docx.oxml.text.paragraph.CT_P):
+                para = docx.text.paragraph.Paragraph(child, doc)
+                text = para.text.strip()
+                if text:
+                    parts.append(text)
+
+            elif isinstance(child, docx.oxml.table.CT_Tbl):
+                table = docx.table.Table(child, doc)
+                for row in table.rows:
+                    cells = [cell.text.strip() for cell in row.cells]
+                    parts.append("\t".join(cells))
+
+        # 3) Extract non-empty footer paragraphs from each section
+        for section in doc.sections:
+            for para in section.footer.paragraphs:
+                text = para.text.strip()
+                if text:
+                    parts.append(text)
+
+        return "\n".join(parts)
+
+    except Exception as e:
+        logger.error(f"Error extracting text from DOCX: {e}")
+        return f"[Error extracting DOCX text: {str(e)}]"
+
+
 def generate_search_query(user_message, state):
     """Generate a search query from user message using the LLM"""
     # Augment the user message with search instruction
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index ec055876..e61677a6 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -16,6 +16,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index acdbd455..f807199d 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index a478d7d3..4fb70eb1 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 96a48f32..a311ab9b 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 14b74081..30e8409a 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 0877d968..70949949 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index cab78237..318bb93a 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index de507308..e0cb84b4 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -16,6 +16,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 5d9f84ce..a412367c 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index fdae681d..bde310e1 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -7,6 +7,7 @@ markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index a58f39f7..521edc0c 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -7,6 +7,7 @@ markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 91ea3a6d..ef7946ff 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -7,6 +7,7 @@ markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 37e5aa40..a3ad743e 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -7,6 +7,7 @@ markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index dcb2884b..eec052d3 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -7,6 +7,7 @@ markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index 8f1295bb..c9898a05 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -7,6 +7,7 @@ markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index 21805fe2..f6c866cf 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -7,6 +7,7 @@ markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 858b4488..0de9c7cb 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -7,6 +7,7 @@ markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 569bae99..2bfb4d51 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -7,6 +7,7 @@ markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich

From 4a2727b71d8976366cc35e18048ad9742ccb1898 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 31 May 2025 20:24:31 -0700
Subject: [PATCH 04/82] Add a tooltip to the file upload button

---
 js/main.js | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/js/main.js b/js/main.js
index f23dc246..0fdd7ffd 100644
--- a/js/main.js
+++ b/js/main.js
@@ -872,3 +872,10 @@ function navigateLastAssistantMessage(direction) {
 
   return false;
 }
+
+//------------------------------------------------
+// Tooltips
+//------------------------------------------------
+
+// File upload button
+document.querySelector("#chat-input .upload-button").title = "Upload text files, PDFs, and DOCX documents";

From f8d220c1e6c0263e76797b0e34dc9ce20335875b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 31 May 2025 21:22:36 -0700
Subject: [PATCH 05/82] Add a tooltip to the web search checkbox

---
 js/main.js         | 3 +++
 modules/ui_chat.py | 2 +-
 2 files changed, 4 insertions(+), 1 deletion(-)

diff --git a/js/main.js b/js/main.js
index 0fdd7ffd..b9cb3cdd 100644
--- a/js/main.js
+++ b/js/main.js
@@ -879,3 +879,6 @@ function navigateLastAssistantMessage(direction) {
 
 // File upload button
 document.querySelector("#chat-input .upload-button").title = "Upload text files, PDFs, and DOCX documents";
+
+// Activate web search
+document.getElementById("web-search").title = "Search the internet with DuckDuckGo";
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index d79aa523..73528a92 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -87,7 +87,7 @@ def create_ui():
                     shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
 
                 with gr.Row():
-                    shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search')
+                    shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search')
 
                 with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:
                     shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)

From 85f2f01a3a78dc85bce9eeded71d9ff9f5bd4ab3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 31 May 2025 21:29:25 -0700
Subject: [PATCH 06/82] UI: Fix extra gaps on the right sidebar

---
 css/main.css | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/css/main.css b/css/main.css
index 967d94ed..bdaacd4f 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1555,3 +1555,8 @@ strong {
 button:focus {
     outline: none;
 }
+
+/* Fix extra gaps for hidden elements on the right sidebar */
+.svelte-sa48pu.stretch:has(> .hidden:only-child) {
+    display: none;
+}

From 98a7508a99f2c3bcb2139f7ef975b692f004c695 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 31 May 2025 22:18:17 -0700
Subject: [PATCH 07/82] UI: Move 'Show controls' inside the hover menu

---
 css/main.css       | 52 +++++++++++++++++++++++-----------------------
 js/main.js         | 40 ++++++++++++++---------------------
 modules/ui_chat.py | 25 ++++++++--------------
 3 files changed, 51 insertions(+), 66 deletions(-)

diff --git a/css/main.css b/css/main.css
index bdaacd4f..adc59fba 100644
--- a/css/main.css
+++ b/css/main.css
@@ -582,7 +582,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
 
 #chat-input {
     padding: 0;
-    padding-top: 18px;
     background: transparent;
     border: none;
 }
@@ -661,31 +660,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     }
 }
 
-#show-controls {
-    position: absolute;
-    background-color: transparent;
-    border: 0 !important;
-    border-radius: 0;
-}
-
-#show-controls label {
-    z-index: 1000;
-    position: absolute;
-    right: 30px;
-    top: 10px;
-    white-space: nowrap;
-    overflow: hidden;
-    text-overflow: ellipsis;
-}
-
-.dark #show-controls span {
-    color: var(--neutral-400);
-}
-
-#show-controls span {
-    color: var(--neutral-600);
-}
-
 #typing-container {
     display: none;
     position: absolute;
@@ -785,6 +759,32 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     background: var(--selected-item-color-dark) !important;
 }
 
+#show-controls {
+    height: 36px;
+    border-top: 1px solid var(--border-color-dark) !important;
+    border-left: 1px solid var(--border-color-dark) !important;
+    border-right: 1px solid var(--border-color-dark) !important;
+    border-radius: 0;
+    border-bottom: 0 !important;
+    background-color: var(--darker-gray);
+    padding-top: 3px;
+    padding-left: 4px;
+    display: flex;
+}
+
+#show-controls label {
+    display: flex;
+    flex-direction: row-reverse;
+    font-weight: bold;
+    justify-content: space-between;
+    width: 100%;
+    padding-right: 12px;
+}
+
+#show-controls label input {
+    margin-top: 4px;
+}
+
 .transparent-substring {
     opacity: 0.333;
 }
diff --git a/js/main.js b/js/main.js
index b9cb3cdd..3652daa0 100644
--- a/js/main.js
+++ b/js/main.js
@@ -277,7 +277,7 @@ for (i = 0; i < slimDropdownElements.length; i++) {
 // The show/hide events were adapted from:
 // https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js
 //------------------------------------------------
-var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button");
+var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button, #chat-tab #chat-buttons #show-controls");
 var button = document.getElementById("hover-element-button");
 var menu = document.getElementById("hover-menu");
 var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement;
@@ -298,18 +298,21 @@ if (buttonsInChat.length > 0) {
     const thisButton = buttonsInChat[i];
     menu.appendChild(thisButton);
 
-    thisButton.addEventListener("click", () => {
-      hideMenu();
-    });
+    // Only apply transformations to button elements
+    if (thisButton.tagName.toLowerCase() === 'button') {
+      thisButton.addEventListener("click", () => {
+        hideMenu();
+      });
+      
+      const buttonText = thisButton.textContent;
+      const matches = buttonText.match(/(\(.*?\))/);
 
-    const buttonText = thisButton.textContent;
-    const matches = buttonText.match(/(\(.*?\))/);
-
-    if (matches && matches.length > 1) {
-      // Apply the transparent-substring class to the matched substring
-      const substring = matches[1];
-      const newText = buttonText.replace(substring, `&nbsp;<span class="transparent-substring">${substring.slice(1, -1)}</span>`);
-      thisButton.innerHTML = newText;
+      if (matches && matches.length > 1) {
+        // Apply the transparent-substring class to the matched substring
+        const substring = matches[1];
+        const newText = buttonText.replace(substring, `&nbsp;<span class="transparent-substring">${substring.slice(1, -1)}</span>`);
+        thisButton.innerHTML = newText;
+      }
     }
   }
 }
@@ -382,21 +385,10 @@ document.addEventListener("click", function (event) {
   }
 });
 
-//------------------------------------------------
-// Relocate the "Show controls" checkbox
-//------------------------------------------------
-var elementToMove = document.getElementById("show-controls");
-var parent = elementToMove.parentNode;
-for (var i = 0; i < 2; i++) {
-  parent = parent.parentNode;
-}
-
-parent.insertBefore(elementToMove, parent.firstChild);
-
 //------------------------------------------------
 // Position the chat input
 //------------------------------------------------
-document.getElementById("show-controls").parentNode.classList.add("chat-input-positioned");
+document.getElementById("chat-input-row").classList.add("chat-input-positioned");
 
 //------------------------------------------------
 // Focus on the chat input
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 73528a92..822b77b8 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -55,7 +55,6 @@ def create_ui():
 
                     with gr.Column(scale=10, elem_id='chat-input-container'):
                         shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
-                        shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
                         shared.gradio['typing-dots'] = gr.HTML(value='<div class="typing"><span></span><span class="dot1"></span><span class="dot2"></span></div>', label='typing', elem_id='typing-container')
 
                     with gr.Column(scale=1, elem_id='generate-stop-container'):
@@ -65,21 +64,15 @@ def create_ui():
 
         # Hover menu buttons
         with gr.Column(elem_id='chat-buttons'):
-            with gr.Row():
-                shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
-                shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
-                shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
-
-            with gr.Row():
-                shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
-
-            with gr.Row():
-                shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
-                shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
-
-            with gr.Row():
-                shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
-                shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
+            shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
+            shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
+            shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
+            shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
+            shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
+            shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
+            shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
+            shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
+            shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
 
         with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
             with gr.Column():

From 0816ecedb75add2dd1a61c9bd9a477e5d847c88a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 31 May 2025 22:24:39 -0700
Subject: [PATCH 08/82] Lint

---
 js/main.js | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/js/main.js b/js/main.js
index 3652daa0..d152a572 100644
--- a/js/main.js
+++ b/js/main.js
@@ -299,11 +299,11 @@ if (buttonsInChat.length > 0) {
     menu.appendChild(thisButton);
 
     // Only apply transformations to button elements
-    if (thisButton.tagName.toLowerCase() === 'button') {
+    if (thisButton.tagName.toLowerCase() === "button") {
       thisButton.addEventListener("click", () => {
         hideMenu();
       });
-      
+
       const buttonText = thisButton.textContent;
       const matches = buttonText.match(/(\(.*?\))/);
 

From 9e801930087170bb24628e680ad4cbd4f6a5b098 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 31 May 2025 22:39:07 -0700
Subject: [PATCH 09/82] Add the model name to each message's metadata

---
 modules/chat.py           |  2 +-
 modules/html_generator.py | 47 ++++++++++++++++++++++++++-------------
 2 files changed, 32 insertions(+), 17 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index ba61c7a9..1222d2bb 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -710,7 +710,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
 
     # Add timestamp for assistant's response at the start of generation
     row_idx = len(output['internal']) - 1
-    update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp())
+    update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp(), model_name=shared.model_name)
 
     # Generate
     reply = None
diff --git a/modules/html_generator.py b/modules/html_generator.py
index cbf3e19c..03b5d485 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -350,12 +350,14 @@ remove_button = f'<button class="footer-button footer-remove-button" title="Remo
 info_button = f'<button class="footer-button footer-info-button" title="message">{info_svg}</button>'
 
 
-def format_message_timestamp(history, role, index):
+def format_message_timestamp(history, role, index, tooltip_include_timestamp=True):
     """Get a formatted timestamp HTML span for a message if available"""
     key = f"{role}_{index}"
     if 'metadata' in history and key in history['metadata'] and history['metadata'][key].get('timestamp'):
         timestamp = history['metadata'][key]['timestamp']
-        return f"<span class='timestamp'>{timestamp}</span>"
+        tooltip_text = get_message_tooltip(history, role, index, include_timestamp=tooltip_include_timestamp)
+        title_attr = f' title="{html.escape(tooltip_text)}"' if tooltip_text else ''
+        return f"<span class='timestamp'{title_attr}>{timestamp}</span>"
 
     return ""
 
@@ -388,6 +390,23 @@ def format_message_attachments(history, role, index):
     return ""
 
 
+def get_message_tooltip(history, role, index, include_timestamp=True):
+    """Get tooltip text combining timestamp and model name for a message"""
+    key = f"{role}_{index}"
+    if 'metadata' not in history or key not in history['metadata']:
+        return ""
+
+    meta = history['metadata'][key]
+    tooltip_parts = []
+
+    if include_timestamp and meta.get('timestamp'):
+        tooltip_parts.append(meta['timestamp'])
+    if meta.get('model_name'):
+        tooltip_parts.append(f"Model: {meta['model_name']}")
+
+    return " | ".join(tooltip_parts)
+
+
 def get_version_navigation_html(history, i, role):
     """Generate simple navigation arrows for message versions"""
     key = f"{role}_{i}"
@@ -462,15 +481,13 @@ def generate_instruct_html(history):
         # Create info buttons for timestamps if they exist
         info_message_user = ""
         if user_timestamp != "":
-            # Extract the timestamp value from the span
-            user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_user = info_button.replace("message", user_timestamp_value)
+            tooltip_text = get_message_tooltip(history, "user", i)
+            info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
 
         info_message_assistant = ""
         if assistant_timestamp != "":
-            # Extract the timestamp value from the span
-            assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_assistant = info_button.replace("message", assistant_timestamp_value)
+            tooltip_text = get_message_tooltip(history, "assistant", i)
+            info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
 
         if converted_visible[0]:  # Don't display empty user messages
             output += (
@@ -521,8 +538,8 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
         converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
 
         # Get timestamps
-        user_timestamp = format_message_timestamp(history, "user", i)
-        assistant_timestamp = format_message_timestamp(history, "assistant", i)
+        user_timestamp = format_message_timestamp(history, "user", i, tooltip_include_timestamp=False)
+        assistant_timestamp = format_message_timestamp(history, "assistant", i, tooltip_include_timestamp=False)
 
         # Get attachments
         user_attachments = format_message_attachments(history, "user", i)
@@ -580,15 +597,13 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
         # Create info buttons for timestamps if they exist
         info_message_user = ""
         if user_timestamp != "":
-            # Extract the timestamp value from the span
-            user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_user = info_button.replace("message", user_timestamp_value)
+            tooltip_text = get_message_tooltip(history, "user", i)
+            info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
 
         info_message_assistant = ""
         if assistant_timestamp != "":
-            # Extract the timestamp value from the span
-            assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_assistant = info_button.replace("message", assistant_timestamp_value)
+            tooltip_text = get_message_tooltip(history, "assistant", i)
+            info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
 
         if converted_visible[0]:  # Don't display empty user messages
             output += (

From 88ff3e6ad8ddf96aabf6d7ceb4c228ed6fb08980 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 1 Jun 2025 08:00:37 -0700
Subject: [PATCH 10/82] CSS fixes after
 98a7508a99f2c3bcb2139f7ef975b692f004c695

---
 css/main.css | 2 +-
 js/main.js   | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/css/main.css b/css/main.css
index adc59fba..0c6dc16e 100644
--- a/css/main.css
+++ b/css/main.css
@@ -665,7 +665,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     position: absolute;
     background-color: transparent;
     left: -2px;
-    top: 4px;
+    top: -14px;
     padding: var(--block-padding);
 }
 
diff --git a/js/main.js b/js/main.js
index d152a572..05c19571 100644
--- a/js/main.js
+++ b/js/main.js
@@ -184,7 +184,7 @@ const observer = new MutationObserver(function(mutations) {
     const prevSibling = lastChild?.previousElementSibling;
     if (lastChild && prevSibling) {
       lastChild.style.setProperty("margin-bottom",
-        `max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px) - ${lastChild.offsetHeight}px))`,
+        `max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 84px) - ${lastChild.offsetHeight}px))`,
         "important"
       );
     }

From 3e3746283cd60409f83b6cf5549ba08d12612bde Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 1 Jun 2025 10:55:31 -0700
Subject: [PATCH 11/82] Improve the typing dots position

---
 css/main.css | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/css/main.css b/css/main.css
index 0c6dc16e..296476cd 100644
--- a/css/main.css
+++ b/css/main.css
@@ -665,7 +665,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     position: absolute;
     background-color: transparent;
     left: -2px;
-    top: -14px;
+    top: -5px;
     padding: var(--block-padding);
 }
 

From 83849336d8efcae0340b768a39c83106ee406264 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 1 Jun 2025 10:58:28 -0700
Subject: [PATCH 12/82] Improve how Show controls looks in the hover menu

---
 css/main.css | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/css/main.css b/css/main.css
index 296476cd..71d67ff4 100644
--- a/css/main.css
+++ b/css/main.css
@@ -776,9 +776,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     display: flex;
     flex-direction: row-reverse;
     font-weight: bold;
-    justify-content: space-between;
+    justify-content: start;
     width: 100%;
     padding-right: 12px;
+    gap: 10px;
 }
 
 #show-controls label input {

From bf42b2c3a1175266dcc7c481f589d53805d956f3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 1 Jun 2025 11:02:04 -0700
Subject: [PATCH 13/82] Fix thinking blocks sometimes showing a white outline

---
 css/main.css | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/css/main.css b/css/main.css
index 71d67ff4..a9cb36ab 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1327,6 +1327,10 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     overflow: hidden;
 }
 
+.thinking-content:focus, .thinking-header:focus {
+    outline: 0 !important;
+}
+
 .dark .thinking-block {
     background-color: var(--darker-gray);
 }

From 7a81beb0c16ff51a90fbe77e6300076714af1fd0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 1 Jun 2025 18:23:23 -0700
Subject: [PATCH 14/82] Turn long pasted text into an attachment automatically

---
 js/main.js | 40 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 40 insertions(+)

diff --git a/js/main.js b/js/main.js
index 05c19571..8090937f 100644
--- a/js/main.js
+++ b/js/main.js
@@ -865,6 +865,46 @@ function navigateLastAssistantMessage(direction) {
   return false;
 }
 
+//------------------------------------------------
+// Paste Handler for Long Text
+//------------------------------------------------
+
+const MAX_PLAIN_TEXT_LENGTH = 2500;
+
+function setupPasteHandler() {
+  const textbox = document.querySelector("#chat-input textarea[data-testid=\"textbox\"]");
+  const fileInput = document.querySelector("#chat-input input[data-testid=\"file-upload\"]");
+
+  if (!textbox || !fileInput) {
+    setTimeout(setupPasteHandler, 500);
+    return;
+  }
+
+  textbox.addEventListener("paste", async (event) => {
+    const text = event.clipboardData?.getData("text");
+
+    if (text && text.length > MAX_PLAIN_TEXT_LENGTH) {
+      event.preventDefault();
+
+      const file = new File([text], "pasted_text.txt", {
+        type: "text/plain",
+        lastModified: Date.now()
+      });
+
+      const dataTransfer = new DataTransfer();
+      dataTransfer.items.add(file);
+      fileInput.files = dataTransfer.files;
+      fileInput.dispatchEvent(new Event("change", { bubbles: true }));
+    }
+  });
+}
+
+if (document.readyState === "loading") {
+  document.addEventListener("DOMContentLoaded", setupPasteHandler);
+} else {
+  setupPasteHandler();
+}
+
 //------------------------------------------------
 // Tooltips
 //------------------------------------------------

From 92adceb7b57464ef03886cba5324a32e7d8f8b67 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 1 Jun 2025 19:22:21 -0700
Subject: [PATCH 15/82] UI: Fix the model downloader progress bar

---
 download-model.py        |  52 ++++++++++++------
 modules/ui_model_menu.py | 115 +++++++++++++++++++++++++++------------
 2 files changed, 115 insertions(+), 52 deletions(-)

diff --git a/download-model.py b/download-model.py
index 25517491..576a8b79 100644
--- a/download-model.py
+++ b/download-model.py
@@ -32,6 +32,7 @@ class ModelDownloader:
         self.max_retries = max_retries
         self.session = self.get_session()
         self._progress_bar_slots = None
+        self.progress_queue = None
 
     def get_session(self):
         session = requests.Session()
@@ -218,33 +219,45 @@ class ModelDownloader:
 
         max_retries = self.max_retries
         attempt = 0
+        file_downloaded_count_for_progress = 0
+
         try:
             while attempt < max_retries:
                 attempt += 1
                 session = self.session
                 headers = {}
                 mode = 'wb'
+                current_file_size_on_disk = 0
 
                 try:
                     if output_path.exists() and not start_from_scratch:
-                        # Resume download
-                        r = session.get(url, stream=True, timeout=20)
-                        total_size = int(r.headers.get('content-length', 0))
-                        if output_path.stat().st_size >= total_size:
+                        current_file_size_on_disk = output_path.stat().st_size
+                        r_head = session.head(url, timeout=20)
+                        r_head.raise_for_status()
+                        total_size = int(r_head.headers.get('content-length', 0))
+
+                        if current_file_size_on_disk >= total_size and total_size > 0:
+                            if self.progress_queue is not None and total_size > 0:
+                                self.progress_queue.put((1.0, str(filename)))
                             return
 
-                        headers = {'Range': f'bytes={output_path.stat().st_size}-'}
+                        headers = {'Range': f'bytes={current_file_size_on_disk}-'}
                         mode = 'ab'
 
                     with session.get(url, stream=True, headers=headers, timeout=30) as r:
-                        r.raise_for_status()  # If status is not 2xx, raise an error
-                        total_size = int(r.headers.get('content-length', 0))
-                        block_size = 1024 * 1024  # 1MB
+                        r.raise_for_status()
+                        total_size_from_stream = int(r.headers.get('content-length', 0))
+                        if mode == 'ab':
+                            effective_total_size = current_file_size_on_disk + total_size_from_stream
+                        else:
+                            effective_total_size = total_size_from_stream
 
-                        filename_str = str(filename)  # Convert PosixPath to string if necessary
+                        block_size = 1024 * 1024
+                        filename_str = str(filename)
 
                         tqdm_kwargs = {
-                            'total': total_size,
+                            'total': effective_total_size,
+                            'initial': current_file_size_on_disk if mode == 'ab' else 0,
                             'unit': 'B',
                             'unit_scale': True,
                             'unit_divisor': 1024,
@@ -261,16 +274,20 @@ class ModelDownloader:
                             })
 
                         with open(output_path, mode) as f:
+                            if mode == 'ab':
+                                f.seek(current_file_size_on_disk)
+
                             with tqdm.tqdm(**tqdm_kwargs) as t:
-                                count = 0
+                                file_downloaded_count_for_progress = current_file_size_on_disk
                                 for data in r.iter_content(block_size):
                                     f.write(data)
                                     t.update(len(data))
-                                    if total_size != 0 and self.progress_bar is not None:
-                                        count += len(data)
-                                        self.progress_bar(float(count) / float(total_size), f"{filename_str}")
+                                    if effective_total_size != 0 and self.progress_queue is not None:
+                                        file_downloaded_count_for_progress += len(data)
+                                        progress_fraction = float(file_downloaded_count_for_progress) / float(effective_total_size)
+                                        self.progress_queue.put((progress_fraction, filename_str))
+                        break
 
-                        break  # Exit loop if successful
                 except (RequestException, ConnectionError, Timeout) as e:
                     print(f"Error downloading {filename}: {e}.")
                     print(f"That was attempt {attempt}/{max_retries}.", end=' ')
@@ -295,10 +312,9 @@ class ModelDownloader:
         finally:
             print(f"\nDownload of {len(file_list)} files to {output_folder} completed.")
 
-    def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
-        self.progress_bar = progress_bar
+    def download_model_files(self, model, branch, links, sha256, output_folder, progress_queue=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
+        self.progress_queue = progress_queue
 
-        # Create the folder and writing the metadata
         output_folder.mkdir(parents=True, exist_ok=True)
 
         if not is_llamacpp:
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 862b3893..2a7d3d9d 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -1,4 +1,6 @@
 import importlib
+import queue
+import threading
 import traceback
 from functools import partial
 from pathlib import Path
@@ -205,48 +207,51 @@ def load_lora_wrapper(selected_loras):
 
 
 def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
+    downloader_module = importlib.import_module("download-model")
+    downloader = downloader_module.ModelDownloader()
+    update_queue = queue.Queue()
+
     try:
         # Handle direct GGUF URLs
         if repo_id.startswith("https://") and ("huggingface.co" in repo_id) and (repo_id.endswith(".gguf") or repo_id.endswith(".gguf?download=true")):
             try:
                 path = repo_id.split("huggingface.co/")[1]
-
-                # Extract the repository ID (first two parts of the path)
                 parts = path.split("/")
                 if len(parts) >= 2:
                     extracted_repo_id = f"{parts[0]}/{parts[1]}"
-
-                    # Extract the filename (last part of the path)
-                    filename = repo_id.split("/")[-1]
-                    if "?download=true" in filename:
-                        filename = filename.replace("?download=true", "")
-
+                    filename = repo_id.split("/")[-1].replace("?download=true", "")
                     repo_id = extracted_repo_id
                     specific_file = filename
-            except:
-                pass
+            except Exception as e:
+                yield f"Error parsing GGUF URL: {e}"
+                progress(0.0)
+                return
 
-        if repo_id == "":
-            yield ("Please enter a model path")
+        if not repo_id:
+            yield "Please enter a model path."
+            progress(0.0)
             return
 
         repo_id = repo_id.strip()
         specific_file = specific_file.strip()
-        downloader = importlib.import_module("download-model").ModelDownloader()
 
-        progress(0.0)
+        progress(0.0, "Preparing download...")
+
         model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
-
-        yield ("Getting the download links from Hugging Face")
+        yield "Getting download links from Hugging Face..."
         links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
 
+        if not links:
+            yield "No files found to download for the given model/criteria."
+            progress(0.0)
+            return
+
         # Check for multiple GGUF files
         gguf_files = [link for link in links if link.lower().endswith('.gguf')]
         if len(gguf_files) > 1 and not specific_file:
             output = "Multiple GGUF files found. Please copy one of the following filenames to the 'File name' field:\n\n```\n"
             for link in gguf_files:
                 output += f"{Path(link).name}\n"
-
             output += "```"
             yield output
             return
@@ -255,17 +260,13 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
             output = "```\n"
             for link in links:
                 output += f"{Path(link).name}" + "\n"
-
             output += "```"
             yield output
             return
 
-        yield ("Getting the output folder")
+        yield "Determining output folder..."
         output_folder = downloader.get_output_folder(
-            model,
-            branch,
-            is_lora,
-            is_llamacpp=is_llamacpp,
+            model, branch, is_lora, is_llamacpp=is_llamacpp,
             model_dir=shared.args.model_dir if shared.args.model_dir != shared.args_defaults.model_dir else None
         )
 
@@ -275,19 +276,65 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
             output_folder = Path(shared.args.lora_dir)
 
         if check:
-            progress(0.5)
-
-            yield ("Checking previously downloaded files")
+            yield "Checking previously downloaded files..."
+            progress(0.5, "Verifying files...")
             downloader.check_model_files(model, branch, links, sha256, output_folder)
-            progress(1.0)
-        else:
-            yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
-            downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
+            progress(1.0, "Verification complete.")
+            yield "File check complete."
+            return
 
-            yield (f"Model successfully saved to `{output_folder}/`.")
-    except:
-        progress(1.0)
-        yield traceback.format_exc().replace('\n', '\n\n')
+        yield ""
+        progress(0.0, "Download starting...")
+
+        def downloader_thread_target():
+            try:
+                downloader.download_model_files(
+                    model, branch, links, sha256, output_folder,
+                    progress_queue=update_queue,
+                    threads=4,
+                    is_llamacpp=is_llamacpp,
+                    specific_file=specific_file
+                )
+                update_queue.put(("COMPLETED", f"Model successfully saved to `{output_folder}/`."))
+            except Exception as e:
+                tb_str = traceback.format_exc().replace('\n', '\n\n')
+                update_queue.put(("ERROR", tb_str))
+
+        download_thread = threading.Thread(target=downloader_thread_target)
+        download_thread.start()
+
+        while True:
+            try:
+                message = update_queue.get(timeout=0.2)
+                if not isinstance(message, tuple) or len(message) != 2:
+                    continue
+
+                msg_identifier, data = message
+
+                if msg_identifier == "COMPLETED":
+                    progress(1.0, "Download complete!")
+                    yield data
+                    break
+                elif msg_identifier == "ERROR":
+                    progress(0.0, "Error occurred")
+                    yield data
+                    break
+                elif isinstance(msg_identifier, float):
+                    progress_value = msg_identifier
+                    description_str = data
+                    progress(progress_value, f"Downloading: {description_str}")
+
+            except queue.Empty:
+                if not download_thread.is_alive():
+                    yield "Download process finished."
+                    break
+
+        download_thread.join()
+
+    except Exception as e:
+        progress(0.0)
+        tb_str = traceback.format_exc().replace('\n', '\n\n')
+        yield tb_str
 
 
 def update_truncation_length(current_length, state):

From ad6d0218ae0c015694bef7a43f5f628d281a1c36 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 1 Jun 2025 19:27:14 -0700
Subject: [PATCH 16/82] Fix after 219f0a773166deeb0326c2874b29e66e382df524

---
 modules/models.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/models.py b/modules/models.py
index d329ae3c..c1e7fb56 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -116,7 +116,7 @@ def unload_model(keep_model_name=False):
         return
 
     is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer')
-    if shared.args.loader == 'ExLlamav3_HF':
+    if shared.model.__class__.__name__ == 'Exllamav3HF':
         shared.model.unload()
 
     shared.model = shared.tokenizer = None

From 2db7745cbde543d7e1abd81c0389c544c84621db Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 1 Jun 2025 22:12:24 -0700
Subject: [PATCH 17/82] Show llama.cpp prompt processing on one line instead of
 many lines

---
 modules/llama_cpp_server.py | 21 +++++++++++++++++++--
 1 file changed, 19 insertions(+), 2 deletions(-)

diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index d695c74e..aa712541 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -409,14 +409,31 @@ class LlamaServer:
 
 def filter_stderr_with_progress(process_stderr):
     progress_pattern = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
+    last_was_progress = False
+
     try:
         for line in iter(process_stderr.readline, ''):
+            line = line.rstrip('\n\r')  # Remove existing newlines
             progress_match = progress_pattern.search(line)
+
             if progress_match:
-                sys.stderr.write(line)
+                if last_was_progress:
+                    # Overwrite the previous progress line using carriage return
+                    sys.stderr.write(f'\r{line}')
+                else:
+                    # First progress line - print normally
+                    sys.stderr.write(line)
                 sys.stderr.flush()
+                last_was_progress = True
             elif not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
-                sys.stderr.write(line)
+                if last_was_progress:
+                    # Finish the progress line with a newline, then print the new line
+                    sys.stderr.write(f'\n{line}\n')
+                else:
+                    # Normal line - print with newline
+                    sys.stderr.write(f'{line}\n')
                 sys.stderr.flush()
+                last_was_progress = False
+            # For filtered lines, don't change last_was_progress state
     except (ValueError, IOError):
         pass

From 45c9ae312c1ff60ce13c721d1290b65f01bf9660 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 1 Jun 2025 22:17:22 -0700
Subject: [PATCH 18/82] Use the flash-attention wheels in
 https://github.com/kingbri1/flash-attention

---
 requirements/full/requirements.txt        | 2 +-
 requirements/full/requirements_noavx2.txt | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index e61677a6..04d97220 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -41,5 +41,5 @@ https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index e0cb84b4..7c3635cc 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -41,5 +41,5 @@ https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

From bb409c926e986e57b8c3eea3582abb466f32ad08 Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Mon, 2 Jun 2025 09:50:17 -0300
Subject: [PATCH 19/82] Update only the last message during streaming + add
 back dynamic UI update speed (#7038)

---
 js/global_scope_js.js            |  25 ++-
 modules/chat.py                  |   4 +-
 modules/html_generator.py        | 274 ++++++++++++++++---------------
 modules/shared.py                |   3 +-
 modules/text_generation.py       |  18 +-
 modules/ui.py                    |   6 +-
 modules/ui_chat.py               |   4 +-
 modules/ui_parameters.py         |   2 -
 user_data/settings-template.yaml |   1 -
 9 files changed, 181 insertions(+), 156 deletions(-)

diff --git a/js/global_scope_js.js b/js/global_scope_js.js
index 3274f47e..d5140c93 100644
--- a/js/global_scope_js.js
+++ b/js/global_scope_js.js
@@ -229,10 +229,23 @@ function removeLastClick() {
   document.getElementById("Remove-last").click();
 }
 
-function handleMorphdomUpdate(text) {
+function handleMorphdomUpdate(data) {
+  // Determine target element and use it as query scope
+  var target_element, target_html;
+  if (data.last_message_only) {
+    const childNodes = document.getElementsByClassName("messages")[0].childNodes;
+    target_element = childNodes[childNodes.length - 1];
+    target_html = data.html;
+  } else {
+    target_element = document.getElementById("chat").parentNode;
+    target_html =  "<div class=\"prose svelte-1ybaih5\">" + data.html + "</div>";
+  }
+
+  const queryScope = target_element;
+
   // Track open blocks
   const openBlocks = new Set();
-  document.querySelectorAll(".thinking-block").forEach(block => {
+  queryScope.querySelectorAll(".thinking-block").forEach(block => {
     const blockId = block.getAttribute("data-block-id");
     // If block exists and is open, add to open set
     if (blockId && block.hasAttribute("open")) {
@@ -242,7 +255,7 @@ function handleMorphdomUpdate(text) {
 
   // Store scroll positions for any open blocks
   const scrollPositions = {};
-  document.querySelectorAll(".thinking-block[open]").forEach(block => {
+  queryScope.querySelectorAll(".thinking-block[open]").forEach(block => {
     const content = block.querySelector(".thinking-content");
     const blockId = block.getAttribute("data-block-id");
     if (content && blockId) {
@@ -255,8 +268,8 @@ function handleMorphdomUpdate(text) {
   });
 
   morphdom(
-    document.getElementById("chat").parentNode,
-    "<div class=\"prose svelte-1ybaih5\">" + text + "</div>",
+    target_element,
+    target_html,
     {
       onBeforeElUpdated: function(fromEl, toEl) {
         // Preserve code highlighting
@@ -307,7 +320,7 @@ function handleMorphdomUpdate(text) {
   );
 
   // Add toggle listeners for new blocks
-  document.querySelectorAll(".thinking-block").forEach(block => {
+  queryScope.querySelectorAll(".thinking-block").forEach(block => {
     if (!block._hasToggleListener) {
       block.addEventListener("toggle", function(e) {
         if (this.open) {
diff --git a/modules/chat.py b/modules/chat.py
index 1222d2bb..f1ea16f1 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -825,7 +825,9 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
     last_save_time = time.monotonic()
     save_interval = 8
     for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)):
-        yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history
+        yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], last_message_only=(i > 0)), history
+        if i == 0:
+            time.sleep(0.125)  # We need this to make sure the first update goes through
 
         current_time = time.monotonic()
         # Save on first iteration or if save_interval seconds have passed
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 03b5d485..f90e3b04 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -462,64 +462,69 @@ def actions_html(history, i, role, info_message=""):
             f'{version_nav_html}')
 
 
-def generate_instruct_html(history):
-    output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
+def generate_instruct_html(history, last_message_only=False):
+    if not last_message_only:
+        output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
+    else:
+        output = ""
 
-    for i in range(len(history['visible'])):
-        row_visible = history['visible'][i]
-        row_internal = history['internal'][i]
-        converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+    def create_message(role, content, raw_content):
+        """Inner function that captures variables from outer scope."""
+        class_name = "user-message" if role == "user" else "assistant-message"
 
-        # Get timestamps
-        user_timestamp = format_message_timestamp(history, "user", i)
-        assistant_timestamp = format_message_timestamp(history, "assistant", i)
+        # Get role-specific data
+        timestamp = format_message_timestamp(history, role, i)
+        attachments = format_message_attachments(history, role, i)
 
-        # Get attachments
-        user_attachments = format_message_attachments(history, "user", i)
-        assistant_attachments = format_message_attachments(history, "assistant", i)
+        # Create info button if timestamp exists
+        info_message = ""
+        if timestamp:
+            tooltip_text = get_message_tooltip(history, role, i)
+            info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
 
-        # Create info buttons for timestamps if they exist
-        info_message_user = ""
-        if user_timestamp != "":
-            tooltip_text = get_message_tooltip(history, "user", i)
-            info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
-
-        info_message_assistant = ""
-        if assistant_timestamp != "":
-            tooltip_text = get_message_tooltip(history, "assistant", i)
-            info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
-
-        if converted_visible[0]:  # Don't display empty user messages
-            output += (
-                f'<div class="user-message" '
-                f'data-raw="{html.escape(row_internal[0], quote=True)}"'
-                f'data-index={i}>'
-                f'<div class="text">'
-                f'<div class="message-body">{converted_visible[0]}</div>'
-                f'{user_attachments}'
-                f'{actions_html(history, i, "user", info_message_user)}'
-                f'</div>'
-                f'</div>'
-            )
-
-        output += (
-            f'<div class="assistant-message" '
-            f'data-raw="{html.escape(row_internal[1], quote=True)}"'
+        return (
+            f'<div class="{class_name}" '
+            f'data-raw="{html.escape(raw_content, quote=True)}"'
             f'data-index={i}>'
             f'<div class="text">'
-            f'<div class="message-body">{converted_visible[1]}</div>'
-            f'{assistant_attachments}'
-            f'{actions_html(history, i, "assistant", info_message_assistant)}'
+            f'<div class="message-body">{content}</div>'
+            f'{attachments}'
+            f'{actions_html(history, i, role, info_message)}'
             f'</div>'
             f'</div>'
         )
 
-    output += "</div></div>"
+    # Determine range
+    start_idx = len(history['visible']) - 1 if last_message_only else 0
+    end_idx = len(history['visible'])
+
+    for i in range(start_idx, end_idx):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+
+        # Convert content
+        if last_message_only:
+            converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
+        else:
+            converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+        # Generate messages
+        if not last_message_only and converted_visible[0]:
+            output += create_message("user", converted_visible[0], row_internal[0])
+
+        output += create_message("assistant", converted_visible[1], row_internal[1])
+
+    if not last_message_only:
+        output += "</div></div>"
+
     return output
 
 
-def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False):
-    output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
+def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False, last_message_only=False):
+    if not last_message_only:
+        output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
+    else:
+        output = ""
 
     # We use ?character and ?time.time() to force the browser to reset caches
     img_bot = (
@@ -527,110 +532,117 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
         if Path("user_data/cache/pfp_character_thumb.png").exists() else ''
     )
 
-    img_me = (
-        f'<img src="file/user_data/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
-        if Path("user_data/cache/pfp_me.png").exists() else ''
-    )
+    def create_message(role, content, raw_content):
+        """Inner function for CAI-style messages."""
+        circle_class = "circle-you" if role == "user" else "circle-bot"
+        name = name1 if role == "user" else name2
 
-    for i in range(len(history['visible'])):
-        row_visible = history['visible'][i]
-        row_internal = history['internal'][i]
-        converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+        # Get role-specific data
+        timestamp = format_message_timestamp(history, role, i, tooltip_include_timestamp=False)
+        attachments = format_message_attachments(history, role, i)
 
-        # Get timestamps
-        user_timestamp = format_message_timestamp(history, "user", i, tooltip_include_timestamp=False)
-        assistant_timestamp = format_message_timestamp(history, "assistant", i, tooltip_include_timestamp=False)
+        # Get appropriate image
+        if role == "user":
+            img = (f'<img src="file/user_data/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
+                   if Path("user_data/cache/pfp_me.png").exists() else '')
+        else:
+            img = img_bot
 
-        # Get attachments
-        user_attachments = format_message_attachments(history, "user", i)
-        assistant_attachments = format_message_attachments(history, "assistant", i)
-
-        if converted_visible[0]:  # Don't display empty user messages
-            output += (
-                f'<div class="message" '
-                f'data-raw="{html.escape(row_internal[0], quote=True)}"'
-                f'data-index={i}>'
-                f'<div class="circle-you">{img_me}</div>'
-                f'<div class="text">'
-                f'<div class="username">{name1}{user_timestamp}</div>'
-                f'<div class="message-body">{converted_visible[0]}</div>'
-                f'{user_attachments}'
-                f'{actions_html(history, i, "user")}'
-                f'</div>'
-                f'</div>'
-            )
-
-        output += (
+        return (
             f'<div class="message" '
-            f'data-raw="{html.escape(row_internal[1], quote=True)}"'
+            f'data-raw="{html.escape(raw_content, quote=True)}"'
             f'data-index={i}>'
-            f'<div class="circle-bot">{img_bot}</div>'
+            f'<div class="{circle_class}">{img}</div>'
             f'<div class="text">'
-            f'<div class="username">{name2}{assistant_timestamp}</div>'
-            f'<div class="message-body">{converted_visible[1]}</div>'
-            f'{assistant_attachments}'
-            f'{actions_html(history, i, "assistant")}'
+            f'<div class="username">{name}{timestamp}</div>'
+            f'<div class="message-body">{content}</div>'
+            f'{attachments}'
+            f'{actions_html(history, i, role)}'
             f'</div>'
             f'</div>'
         )
 
-    output += "</div></div>"
+    # Determine range
+    start_idx = len(history['visible']) - 1 if last_message_only else 0
+    end_idx = len(history['visible'])
+
+    for i in range(start_idx, end_idx):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+
+        # Convert content
+        if last_message_only:
+            converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
+        else:
+            converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+        # Generate messages
+        if not last_message_only and converted_visible[0]:
+            output += create_message("user", converted_visible[0], row_internal[0])
+
+        output += create_message("assistant", converted_visible[1], row_internal[1])
+
+    if not last_message_only:
+        output += "</div></div>"
+
     return output
 
 
-def generate_chat_html(history, name1, name2, reset_cache=False):
-    output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
+def generate_chat_html(history, name1, name2, reset_cache=False, last_message_only=False):
+    if not last_message_only:
+        output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
+    else:
+        output = ""
 
-    for i in range(len(history['visible'])):
-        row_visible = history['visible'][i]
-        row_internal = history['internal'][i]
-        converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+    def create_message(role, content, raw_content):
+        """Inner function for WPP-style messages."""
+        text_class = "text-you" if role == "user" else "text-bot"
 
-        # Get timestamps
-        user_timestamp = format_message_timestamp(history, "user", i)
-        assistant_timestamp = format_message_timestamp(history, "assistant", i)
+        # Get role-specific data
+        timestamp = format_message_timestamp(history, role, i)
+        attachments = format_message_attachments(history, role, i)
 
-        # Get attachments
-        user_attachments = format_message_attachments(history, "user", i)
-        assistant_attachments = format_message_attachments(history, "assistant", i)
+        # Create info button if timestamp exists
+        info_message = ""
+        if timestamp:
+            tooltip_text = get_message_tooltip(history, role, i)
+            info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
 
-        # Create info buttons for timestamps if they exist
-        info_message_user = ""
-        if user_timestamp != "":
-            tooltip_text = get_message_tooltip(history, "user", i)
-            info_message_user = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
-
-        info_message_assistant = ""
-        if assistant_timestamp != "":
-            tooltip_text = get_message_tooltip(history, "assistant", i)
-            info_message_assistant = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
-
-        if converted_visible[0]:  # Don't display empty user messages
-            output += (
-                f'<div class="message" '
-                f'data-raw="{html.escape(row_internal[0], quote=True)}"'
-                f'data-index={i}>'
-                f'<div class="text-you">'
-                f'<div class="message-body">{converted_visible[0]}</div>'
-                f'{user_attachments}'
-                f'{actions_html(history, i, "user", info_message_user)}'
-                f'</div>'
-                f'</div>'
-            )
-
-        output += (
+        return (
             f'<div class="message" '
-            f'data-raw="{html.escape(row_internal[1], quote=True)}"'
+            f'data-raw="{html.escape(raw_content, quote=True)}"'
             f'data-index={i}>'
-            f'<div class="text-bot">'
-            f'<div class="message-body">{converted_visible[1]}</div>'
-            f'{assistant_attachments}'
-            f'{actions_html(history, i, "assistant", info_message_assistant)}'
+            f'<div class="{text_class}">'
+            f'<div class="message-body">{content}</div>'
+            f'{attachments}'
+            f'{actions_html(history, i, role, info_message)}'
             f'</div>'
             f'</div>'
         )
 
-    output += "</div></div>"
+    # Determine range
+    start_idx = len(history['visible']) - 1 if last_message_only else 0
+    end_idx = len(history['visible'])
+
+    for i in range(start_idx, end_idx):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+
+        # Convert content
+        if last_message_only:
+            converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
+        else:
+            converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+        # Generate messages
+        if not last_message_only and converted_visible[0]:
+            output += create_message("user", converted_visible[0], row_internal[0])
+
+        output += create_message("assistant", converted_visible[1], row_internal[1])
+
+    if not last_message_only:
+        output += "</div></div>"
+
     return output
 
 
@@ -644,15 +656,15 @@ def time_greeting():
         return "Good evening!"
 
 
-def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
+def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False, last_message_only=False):
     if len(history['visible']) == 0:
         greeting = f"<div class=\"welcome-greeting\">{time_greeting()} How can I help you today?</div>"
         result = f'<div class="chat" id="chat">{greeting}</div>'
     elif mode == 'instruct':
-        result = generate_instruct_html(history)
+        result = generate_instruct_html(history, last_message_only=last_message_only)
     elif style == 'wpp':
-        result = generate_chat_html(history, name1, name2)
+        result = generate_chat_html(history, name1, name2, last_message_only=last_message_only)
     else:
-        result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache)
+        result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache=reset_cache, last_message_only=last_message_only)
 
-    return {'html': result}
+    return {'html': result, 'last_message_only': last_message_only}
diff --git a/modules/shared.py b/modules/shared.py
index d2305f30..f712f7f8 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -21,7 +21,7 @@ lora_names = []
 # Generation variables
 stop_everything = False
 generation_lock = None
-processing_message = '*Is typing...*'
+processing_message = ''
 
 # UI variables
 gradio = {}
@@ -47,7 +47,6 @@ settings = {
     'max_new_tokens_max': 4096,
     'prompt_lookup_num_tokens': 0,
     'max_tokens_second': 0,
-    'max_updates_second': 12,
     'auto_max_new_tokens': True,
     'ban_eos_token': False,
     'add_bos_token': True,
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 1fd6d810..0d499d50 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -65,41 +65,39 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
             all_stop_strings += st
 
     shared.stop_everything = False
-    last_update = -1
     reply = ''
     is_stream = state['stream']
     if len(all_stop_strings) > 0 and not state['stream']:
         state = copy.deepcopy(state)
         state['stream'] = True
 
-    min_update_interval = 0
-    if state.get('max_updates_second', 0) > 0:
-        min_update_interval = 1 / state['max_updates_second']
-
     # Generate
+    last_update = -1
+    latency_threshold = 1 / 1000
     for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat):
+        cur_time = time.monotonic()
         reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
         if escape_html:
             reply = html.escape(reply)
 
         if is_stream:
-            cur_time = time.time()
-
             # Limit number of tokens/second to make text readable in real time
             if state['max_tokens_second'] > 0:
                 diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
                 if diff > 0:
                     time.sleep(diff)
 
-                last_update = time.time()
+                last_update = time.monotonic()
                 yield reply
 
             # Limit updates to avoid lag in the Gradio UI
             # API updates are not limited
             else:
-                if cur_time - last_update > min_update_interval:
-                    last_update = cur_time
+                # If 'generate_func' takes less than 0.001 seconds to yield the next token
+                # (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding
+                if (cur_time - last_update) > latency_threshold:
                     yield reply
+                last_update = time.monotonic()
 
         if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
             break
diff --git a/modules/ui.py b/modules/ui.py
index 9f4d67cb..14a09d2b 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -6,6 +6,7 @@ import yaml
 
 import extensions
 from modules import shared
+from modules.chat import load_history
 
 with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r') as f:
     css = f.read()
@@ -194,7 +195,6 @@ def list_interface_input_elements():
         'max_new_tokens',
         'prompt_lookup_num_tokens',
         'max_tokens_second',
-        'max_updates_second',
         'do_sample',
         'dynamic_temperature',
         'temperature_last',
@@ -270,6 +270,10 @@ def gather_interface_values(*args):
     if not shared.args.multi_user:
         shared.persistent_interface_state = output
 
+    # Prevent history loss if backend is restarted but UI is not refreshed
+    if output['history'] is None and output['unique_id'] is not None:
+        output['history'] = load_history(output['unique_id'], output['character_menu'], output['mode'])
+
     return output
 
 
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 822b77b8..0d5a2c18 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -18,7 +18,7 @@ def create_ui():
     mu = shared.args.multi_user
 
     shared.gradio['Chat input'] = gr.State()
-    shared.gradio['history'] = gr.JSON(visible=False)
+    shared.gradio['history'] = gr.State({'internal': [], 'visible': [], 'metadata': {}})
 
     with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
         with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
@@ -195,7 +195,7 @@ def create_event_handlers():
     shared.reload_inputs = gradio(reload_arr)
 
     # Morph HTML updates instead of updating everything
-    shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data.html)")
+    shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data)")
 
     shared.gradio['Generate'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 733d0901..84f9fbfc 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -71,8 +71,6 @@ def create_ui(default_preset):
                             shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
                             shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
                             shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
-                            shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
-
                 with gr.Column():
                     with gr.Row():
                         with gr.Column():
diff --git a/user_data/settings-template.yaml b/user_data/settings-template.yaml
index ce0f77e1..db481e84 100644
--- a/user_data/settings-template.yaml
+++ b/user_data/settings-template.yaml
@@ -18,7 +18,6 @@ max_new_tokens_min: 1
 max_new_tokens_max: 4096
 prompt_lookup_num_tokens: 0
 max_tokens_second: 0
-max_updates_second: 12
 auto_max_new_tokens: true
 ban_eos_token: false
 add_bos_token: true

From 7278548cd18a9ba05062eb2db59d7f2965d8a9f6 Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Mon, 2 Jun 2025 09:57:55 -0300
Subject: [PATCH 20/82] Simplify the one-click installer (#7039)

---
 one_click.py | 253 +++++++++++++++++++++++++--------------------------
 1 file changed, 123 insertions(+), 130 deletions(-)

diff --git a/one_click.py b/one_click.py
index 482a6aa9..cccb0dc9 100644
--- a/one_click.py
+++ b/one_click.py
@@ -70,12 +70,8 @@ def is_installed():
 def cpu_has_avx2():
     try:
         import cpuinfo
-
         info = cpuinfo.get_cpu_info()
-        if 'avx2' in info['flags']:
-            return True
-        else:
-            return False
+        return 'avx2' in info['flags']
     except:
         return True
 
@@ -83,30 +79,112 @@ def cpu_has_avx2():
 def cpu_has_amx():
     try:
         import cpuinfo
-
         info = cpuinfo.get_cpu_info()
-        if 'amx' in info['flags']:
-            return True
-        else:
-            return False
+        return 'amx' in info['flags']
     except:
         return True
 
 
-def torch_version():
-    site_packages_path = None
-    for sitedir in site.getsitepackages():
-        if "site-packages" in sitedir and conda_env_path in sitedir:
-            site_packages_path = sitedir
-            break
+def load_state():
+    """Load installer state from JSON file"""
+    if os.path.exists(state_file):
+        try:
+            with open(state_file, 'r') as f:
+                return json.load(f)
+        except:
+            return {}
+    return {}
 
-    if site_packages_path:
-        torch_version_file = open(os.path.join(site_packages_path, 'torch', 'version.py')).read().splitlines()
-        torver = [line for line in torch_version_file if line.startswith('__version__')][0].split('__version__ = ')[1].strip("'")
+
+def save_state(state):
+    """Save installer state to JSON file"""
+    with open(state_file, 'w') as f:
+        json.dump(state, f)
+
+
+def get_gpu_choice():
+    """Get GPU choice from state file or ask user"""
+    state = load_state()
+    gpu_choice = state.get('gpu_choice')
+
+    if not gpu_choice:
+        if "GPU_CHOICE" in os.environ:
+            choice = os.environ["GPU_CHOICE"].upper()
+            print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
+        else:
+            choice = get_user_choice(
+                "What is your GPU?",
+                {
+                    'A': 'NVIDIA - CUDA 12.4',
+                    'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
+                    'C': 'Apple M Series',
+                    'D': 'Intel Arc (beta)',
+                    'N': 'CPU mode'
+                },
+            )
+
+        # Convert choice to GPU name
+        gpu_choice = {"A": "NVIDIA", "B": "AMD", "C": "APPLE", "D": "INTEL", "N": "NONE"}[choice]
+
+        # Save choice to state
+        state['gpu_choice'] = gpu_choice
+        save_state(state)
+
+    return gpu_choice
+
+
+def get_pytorch_install_command(gpu_choice):
+    """Get PyTorch installation command based on GPU choice"""
+    base_cmd = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
+
+    if gpu_choice == "NVIDIA":
+        return base_cmd + "--index-url https://download.pytorch.org/whl/cu124"
+    elif gpu_choice == "AMD":
+        return base_cmd + "--index-url https://download.pytorch.org/whl/rocm6.2.4"
+    elif gpu_choice in ["APPLE", "NONE"]:
+        return base_cmd + "--index-url https://download.pytorch.org/whl/cpu"
+    elif gpu_choice == "INTEL":
+        if is_linux():
+            return "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+        else:
+            return "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
     else:
-        from torch import __version__ as torver
+        return base_cmd
 
-    return torver
+
+def get_pytorch_update_command(gpu_choice):
+    """Get PyTorch update command based on GPU choice"""
+    base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
+
+    if gpu_choice == "NVIDIA":
+        return f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
+    elif gpu_choice == "AMD":
+        return f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
+    elif gpu_choice in ["APPLE", "NONE"]:
+        return f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
+    elif gpu_choice == "INTEL":
+        intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
+        return f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+    else:
+        return base_cmd
+
+
+def get_requirements_file(gpu_choice):
+    """Get requirements file path based on GPU choice"""
+    requirements_base = os.path.join("requirements", "full")
+
+    if gpu_choice == "AMD":
+        file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    elif gpu_choice == "APPLE":
+        file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt"
+    elif gpu_choice in ["INTEL", "NONE"]:
+        file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    elif gpu_choice == "NVIDIA":
+        file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    else:
+        raise ValueError(f"Unknown GPU choice: {gpu_choice}")
+
+    return os.path.join(requirements_base, file_name)
 
 
 def get_current_commit():
@@ -209,28 +287,8 @@ def get_user_choice(question, options_dict):
 
 def update_pytorch_and_python():
     print_big_message("Checking for PyTorch updates.")
-
-    # Update the Python version. Left here for future reference in case this becomes necessary.
-    # print_big_message("Checking for PyTorch and Python updates.")
-    # current_python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
-    # if current_python_version != PYTHON_VERSION:
-    #     run_cmd(f"conda install -y python={PYTHON_VERSION}", assert_success=True, environment=True)
-
-    torver = torch_version()
-    base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
-
-    if "+cu" in torver:
-        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
-    elif "+rocm" in torver:
-        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
-    elif "+cpu" in torver:
-        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
-    elif "+cxx11" in torver:
-        intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
-        install_cmd = f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
-    else:
-        install_cmd = base_cmd
-
+    gpu_choice = get_gpu_choice()
+    install_cmd = get_pytorch_update_command(gpu_choice)
     run_cmd(install_cmd, assert_success=True, environment=True)
 
 
@@ -256,43 +314,11 @@ def install_webui():
     if os.path.isfile(state_file):
         os.remove(state_file)
 
-    # Ask the user for the GPU vendor
-    if "GPU_CHOICE" in os.environ:
-        choice = os.environ["GPU_CHOICE"].upper()
-        print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
-
-        # Warn about changed meanings and handle old choices
-        if choice == "B":
-            print_big_message("Warning: GPU_CHOICE='B' now means 'AMD' in the new version.")
-        elif choice == "C":
-            print_big_message("Warning: GPU_CHOICE='C' now means 'Apple M Series' in the new version.")
-        elif choice == "D":
-            print_big_message("Warning: GPU_CHOICE='D' now means 'Intel Arc' in the new version.")
-    else:
-        choice = get_user_choice(
-            "What is your GPU?",
-            {
-                'A': 'NVIDIA - CUDA 12.4',
-                'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
-                'C': 'Apple M Series',
-                'D': 'Intel Arc (beta)',
-                'N': 'CPU mode'
-            },
-        )
-
-    # Convert choices to GPU names for compatibility
-    gpu_choice_to_name = {
-        "A": "NVIDIA",
-        "B": "AMD",
-        "C": "APPLE",
-        "D": "INTEL",
-        "N": "NONE"
-    }
-
-    selected_gpu = gpu_choice_to_name[choice]
+    # Get GPU choice and save it to state
+    gpu_choice = get_gpu_choice()
 
     # Write a flag to CMD_FLAGS.txt for CPU mode
-    if selected_gpu == "NONE":
+    if gpu_choice == "NONE":
         cmd_flags_path = os.path.join(script_dir, "user_data", "CMD_FLAGS.txt")
         with open(cmd_flags_path, 'r+') as cmd_flags_file:
             if "--cpu" not in cmd_flags_file.read():
@@ -300,34 +326,20 @@ def install_webui():
                 cmd_flags_file.write("\n--cpu\n")
 
     # Handle CUDA version display
-    elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA":
+    elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA":
         print("CUDA: 12.4")
 
     # No PyTorch for AMD on Windows (?)
-    elif is_windows() and selected_gpu == "AMD":
+    elif is_windows() and gpu_choice == "AMD":
         print("PyTorch setup on Windows is not implemented yet. Exiting...")
         sys.exit(1)
 
-    # Find the Pytorch installation command
-    install_pytorch = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
-
-    if selected_gpu == "NVIDIA":
-        install_pytorch += "--index-url https://download.pytorch.org/whl/cu124"
-    elif selected_gpu == "AMD":
-        install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.2.4"
-    elif selected_gpu in ["APPLE", "NONE"]:
-        install_pytorch += "--index-url https://download.pytorch.org/whl/cpu"
-    elif selected_gpu == "INTEL":
-        if is_linux():
-            install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
-        else:
-            install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
-
     # Install Git and then Pytorch
     print_big_message("Installing PyTorch.")
+    install_pytorch = get_pytorch_install_command(gpu_choice)
     run_cmd(f"conda install -y ninja git && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True)
 
-    if selected_gpu == "INTEL":
+    if gpu_choice == "INTEL":
         # Install oneAPI dependencies via conda
         print_big_message("Installing Intel oneAPI runtime libraries.")
         run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0", environment=True)
@@ -349,31 +361,15 @@ def update_requirements(initial_installation=False, pull=True):
             assert_success=True
         )
 
-    torver = torch_version()
-    requirements_base = os.path.join("requirements", "full")
-
-    if "+rocm" in torver:
-        file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt"
-    elif "+cpu" in torver or "+cxx11" in torver:
-        file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
-    elif is_macos():
-        file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt"
-    else:
-        file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
-
-    requirements_file = os.path.join(requirements_base, file_name)
-
-    # Load state from JSON file
     current_commit = get_current_commit()
-    wheels_changed = False
-    if os.path.exists(state_file):
-        with open(state_file, 'r') as f:
-            last_state = json.load(f)
-
-        if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit:
+    wheels_changed = not os.path.exists(state_file)
+    if not wheels_changed:
+        state = load_state()
+        if 'wheels_changed' in state or state.get('last_installed_commit') != current_commit:
             wheels_changed = True
-    else:
-        wheels_changed = True
+
+    gpu_choice = get_gpu_choice()
+    requirements_file = get_requirements_file(gpu_choice)
 
     if pull:
         # Read .whl lines before pulling
@@ -409,19 +405,17 @@ def update_requirements(initial_installation=False, pull=True):
                 print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.")
 
                 # Save state before exiting
-                current_state = {}
+                state = load_state()
                 if wheels_changed:
-                    current_state['wheels_changed'] = True
-
-                with open(state_file, 'w') as f:
-                    json.dump(current_state, f)
-
+                    state['wheels_changed'] = True
+                save_state(state)
                 sys.exit(1)
 
     # Save current state
-    current_state = {'last_installed_commit': current_commit}
-    with open(state_file, 'w') as f:
-        json.dump(current_state, f)
+    state = load_state()
+    state['last_installed_commit'] = current_commit
+    state.pop('wheels_changed', None)  # Remove wheels_changed flag
+    save_state(state)
 
     if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
         install_extensions_requirements()
@@ -432,11 +426,10 @@ def update_requirements(initial_installation=False, pull=True):
     # Update PyTorch
     if not initial_installation:
         update_pytorch_and_python()
-        torver = torch_version()
         clean_outdated_pytorch_cuda_dependencies()
 
     print_big_message(f"Installing webui requirements from file: {requirements_file}")
-    print(f"TORCH: {torver}\n")
+    print(f"GPU Choice: {gpu_choice}\n")
 
     # Prepare the requirements file
     textgen_requirements = open(requirements_file).read().splitlines()

From b30a73016d626e985e248de15fa65e5a531c8bd2 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 2 Jun 2025 07:49:22 -0700
Subject: [PATCH 21/82] Remove the "Is typing..." yield by default

---
 modules/chat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index f1ea16f1..3c4c3636 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -656,7 +656,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
         update_message_metadata(output['metadata'], "user", row_idx, timestamp=get_current_timestamp())
 
         # *Is typing...*
-        if loading_message:
+        if loading_message and shared.processing_message:
             yield {
                 'visible': output['visible'][:-1] + [[output['visible'][-1][0], shared.processing_message]],
                 'internal': output['internal'],
@@ -680,7 +680,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
             })
             output['metadata'][key]["current_version_index"] = len(output['metadata'][key]["versions"]) - 1
 
-            if loading_message:
+            if loading_message and shared.processing_message:
                 yield {
                     'visible': output['visible'][:-1] + [[visible_text, shared.processing_message]],
                     'internal': output['internal'][:-1] + [[text, '']],

From b38ec0ec385d44d49d3fe7adf2ad77ae62302214 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 2 Jun 2025 11:33:17 -0700
Subject: [PATCH 22/82] Update llama.cpp

---
 requirements/full/requirements.txt                     | 4 ++--
 requirements/full/requirements_amd.txt                 | 4 ++--
 requirements/full/requirements_amd_noavx2.txt          | 4 ++--
 requirements/full/requirements_apple_intel.txt         | 4 ++--
 requirements/full/requirements_apple_silicon.txt       | 6 +++---
 requirements/full/requirements_cpu_only.txt            | 4 ++--
 requirements/full/requirements_cpu_only_noavx2.txt     | 4 ++--
 requirements/full/requirements_noavx2.txt              | 4 ++--
 requirements/portable/requirements.txt                 | 4 ++--
 requirements/portable/requirements_apple_intel.txt     | 4 ++--
 requirements/portable/requirements_apple_silicon.txt   | 6 +++---
 requirements/portable/requirements_cpu_only.txt        | 4 ++--
 requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++--
 requirements/portable/requirements_noavx2.txt          | 4 ++--
 requirements/portable/requirements_vulkan.txt          | 4 ++--
 requirements/portable/requirements_vulkan_noavx2.txt   | 4 ++--
 16 files changed, 34 insertions(+), 34 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 04d97220..277f8249 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index f807199d..dbf35c34 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 4fb70eb1..2e5eb6c9 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index a311ab9b..9a19ab29 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 30e8409a..973d9bfb 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -33,8 +33,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 70949949..4a48a51f 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -33,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 318bb93a..76bde864 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -33,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 7c3635cc..6cd0fa65 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index bde310e1..60ce941e 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index 521edc0c..b1649bc9 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index ef7946ff..571eba52 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -19,6 +19,6 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index a3ad743e..88170cf3 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index eec052d3..e96cef49 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index c9898a05..78f94aa5 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 0de9c7cb..3e41427d 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 2bfb4d51..022ebb61 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

From 93b3752cdf9f43dd391462168e2e14dd2ab75643 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 4 Jun 2025 09:40:30 -0700
Subject: [PATCH 23/82] Revert "Remove the "Is typing..." yield by default"

This reverts commit b30a73016d626e985e248de15fa65e5a531c8bd2.
---
 modules/chat.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index 3c4c3636..f1ea16f1 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -656,7 +656,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
         update_message_metadata(output['metadata'], "user", row_idx, timestamp=get_current_timestamp())
 
         # *Is typing...*
-        if loading_message and shared.processing_message:
+        if loading_message:
             yield {
                 'visible': output['visible'][:-1] + [[output['visible'][-1][0], shared.processing_message]],
                 'internal': output['internal'],
@@ -680,7 +680,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
             })
             output['metadata'][key]["current_version_index"] = len(output['metadata'][key]["versions"]) - 1
 
-            if loading_message and shared.processing_message:
+            if loading_message:
                 yield {
                     'visible': output['visible'][:-1] + [[visible_text, shared.processing_message]],
                     'internal': output['internal'][:-1] + [[text, '']],

From 9bd7359ffab5e434b7cdfdb43ee91cb3ad397c0d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 4 Jun 2025 10:47:14 -0700
Subject: [PATCH 24/82] Scroll the textarea into view when editing a message

---
 js/global_scope_js.js | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/js/global_scope_js.js b/js/global_scope_js.js
index d5140c93..801f1574 100644
--- a/js/global_scope_js.js
+++ b/js/global_scope_js.js
@@ -95,6 +95,12 @@ function startEditing(messageElement, messageBody, isUserMessage) {
   editingInterface.textarea.focus();
   editingInterface.textarea.setSelectionRange(rawText.length, rawText.length);
 
+  // Scroll the textarea into view
+  editingInterface.textarea.scrollIntoView({
+    behavior: "smooth",
+    block: "center"
+  });
+
   // Setup event handlers
   setupEditingHandlers(editingInterface.textarea, messageElement, originalHTML, messageBody, isUserMessage);
 }

From 66a75c899a4b0786cd8744886a189864923287b5 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 4 Jun 2025 10:59:43 -0700
Subject: [PATCH 25/82] Improve the scrollbars in code blocks

---
 js/main.js | 1 +
 1 file changed, 1 insertion(+)

diff --git a/js/main.js b/js/main.js
index 8090937f..2e72d82e 100644
--- a/js/main.js
+++ b/js/main.js
@@ -229,6 +229,7 @@ function doSyntaxHighlighting() {
         codeBlocks.forEach((codeBlock) => {
           hljs.highlightElement(codeBlock);
           codeBlock.setAttribute("data-highlighted", "true");
+          codeBlock.classList.add("pretty_scrollbar");
         });
 
         renderMathInElement(messageBody, {

From 3d676cd50f8661ca96a20a452611422acb47177c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 4 Jun 2025 11:02:04 -0700
Subject: [PATCH 26/82] Optimize syntax highlighting

---
 js/main.js | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/js/main.js b/js/main.js
index 2e72d82e..9a620fa9 100644
--- a/js/main.js
+++ b/js/main.js
@@ -217,7 +217,7 @@ function isElementVisibleOnScreen(element) {
 }
 
 function doSyntaxHighlighting() {
-  const messageBodies = document.querySelectorAll(".message-body");
+  const messageBodies = document.getElementById("chat").querySelectorAll(".message-body");
 
   if (messageBodies.length > 0) {
     observer.disconnect();

From 3829507d0fd66eccc532b5d8d0e3d77c38143d0c Mon Sep 17 00:00:00 2001
From: Hanusz Leszek <leszek.hanusz@gmail.com>
Date: Wed, 4 Jun 2025 20:13:36 +0200
Subject: [PATCH 27/82] Stop model during graceful shutdown (#7042)

---
 server.py | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/server.py b/server.py
index c22ed1f1..99d2e171 100644
--- a/server.py
+++ b/server.py
@@ -60,6 +60,14 @@ from modules.utils import gradio
 
 def signal_handler(sig, frame):
     logger.info("Received Ctrl+C. Shutting down Text generation web UI gracefully.")
+
+    # Explicitly stop LlamaServer to avoid __del__ cleanup issues during shutdown
+    if shared.model and shared.model.__class__.__name__ == 'LlamaServer':
+        try:
+            shared.model.stop()
+        except:
+            pass
+
     sys.exit(0)
 
 

From 977ec801b7682c3239fe3e6fdfcb8b90c1e802f3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 5 Jun 2025 06:33:45 -0700
Subject: [PATCH 28/82] Improve table colors in instruct mode

---
 css/html_instruct_style.css | 8 ++++++++
 1 file changed, 8 insertions(+)

diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
index 6ad250aa..9831ee8f 100644
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@@ -17,6 +17,14 @@
     color: #d1d5db !important;
 }
 
+.chat .message-body :is(th, td) {
+    border-color: #40404096 !important;
+}
+
+.dark .chat .message-body :is(th, td) {
+    border-color: #ffffff75 !important;
+}
+
 .chat .message-body :is(p, ul, ol) {
     margin: 1.25em 0 !important;
 }

From d47c8eb956a72ebc7c1f582718758697aef62118 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 5 Jun 2025 06:56:24 -0700
Subject: [PATCH 29/82] Remove quotes from LLM-generated websearch query
 (closes #7045).

Fix by @Quiet-Joker
---
 modules/chat.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/modules/chat.py b/modules/chat.py
index f1ea16f1..14f2a4f7 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -604,7 +604,12 @@ def generate_search_query(user_message, state):
 
     query = ""
     for reply in generate_reply(formatted_prompt, search_state, stopping_strings=[], is_chat=True):
-        query = reply.strip()
+        query = reply
+
+    # Strip and remove surrounding quotes if present
+    query = query.strip()
+    if len(query) >= 2 and query.startswith('"') and query.endswith('"'):
+        query = query[1:-1]
 
     return query
 

From 45f823ddf625cb914acb6620b0551bf6734b2838 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 6 Jun 2025 22:23:34 -0700
Subject: [PATCH 30/82] Print \n after the llama.cpp progress bar reaches 1.0

---
 modules/llama_cpp_server.py | 11 ++++++++++-
 1 file changed, 10 insertions(+), 1 deletion(-)

diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index aa712541..d8fcb89d 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -417,14 +417,23 @@ def filter_stderr_with_progress(process_stderr):
             progress_match = progress_pattern.search(line)
 
             if progress_match:
+                progress_value = float(progress_match.group(1))
+
                 if last_was_progress:
                     # Overwrite the previous progress line using carriage return
                     sys.stderr.write(f'\r{line}')
                 else:
                     # First progress line - print normally
                     sys.stderr.write(line)
+
+                # Check if progress is 100% (1.0)
+                if progress_value >= 1.0:
+                    sys.stderr.write('\n')  # Add newline for completed progress
+                    last_was_progress = False
+                else:
+                    last_was_progress = True
+
                 sys.stderr.flush()
-                last_was_progress = True
             elif not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
                 if last_was_progress:
                     # Finish the progress line with a newline, then print the new line

From f8f23b5489e70467ffc46a1b6ae2a406b3fcf9ff Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 6 Jun 2025 22:25:13 -0700
Subject: [PATCH 31/82] Simplify the llama.cpp stderr filter code

---
 modules/llama_cpp_server.py | 49 ++++++++++++++++---------------------
 1 file changed, 21 insertions(+), 28 deletions(-)

diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index d8fcb89d..f0a72de8 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -408,41 +408,34 @@ class LlamaServer:
 
 
 def filter_stderr_with_progress(process_stderr):
-    progress_pattern = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
+    """
+    Reads stderr lines from a process, filters out noise, and displays progress updates
+    inline (overwriting the same line) until completion.
+    """
+    progress_re = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
     last_was_progress = False
 
     try:
-        for line in iter(process_stderr.readline, ''):
-            line = line.rstrip('\n\r')  # Remove existing newlines
-            progress_match = progress_pattern.search(line)
+        for raw in iter(process_stderr.readline, ''):
+            line = raw.rstrip('\r\n')
+            match = progress_re.search(line)
 
-            if progress_match:
-                progress_value = float(progress_match.group(1))
+            if match:
+                progress = float(match.group(1))
+                # choose carriage return for in-progress or newline at completion
+                end_char = '\r' if progress < 1.0 else '\n'
+                print(line, end=end_char, file=sys.stderr, flush=True)
+                last_was_progress = (progress < 1.0)
 
+            # skip noise lines
+            elif not (line.startswith(('srv ', 'slot ')) or 'log_server_r: request: GET /health' in line):
+                # if we were in progress, finish that line first
                 if last_was_progress:
-                    # Overwrite the previous progress line using carriage return
-                    sys.stderr.write(f'\r{line}')
-                else:
-                    # First progress line - print normally
-                    sys.stderr.write(line)
+                    print(file=sys.stderr)
 
-                # Check if progress is 100% (1.0)
-                if progress_value >= 1.0:
-                    sys.stderr.write('\n')  # Add newline for completed progress
-                    last_was_progress = False
-                else:
-                    last_was_progress = True
-
-                sys.stderr.flush()
-            elif not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
-                if last_was_progress:
-                    # Finish the progress line with a newline, then print the new line
-                    sys.stderr.write(f'\n{line}\n')
-                else:
-                    # Normal line - print with newline
-                    sys.stderr.write(f'{line}\n')
-                sys.stderr.flush()
+                print(line, file=sys.stderr, flush=True)
                 last_was_progress = False
-            # For filtered lines, don't change last_was_progress state
+
     except (ValueError, IOError):
+        # silently ignore broken output or IO errors
         pass

From 379dd01ca7770484bbd0798252665935a0bbfa05 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 6 Jun 2025 22:32:07 -0700
Subject: [PATCH 32/82] Filter out failed web search downloads from attachments

---
 modules/web_search.py | 11 +++++++++--
 1 file changed, 9 insertions(+), 2 deletions(-)

diff --git a/modules/web_search.py b/modules/web_search.py
index 1f670349..a1e47253 100644
--- a/modules/web_search.py
+++ b/modules/web_search.py
@@ -107,6 +107,13 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
             logger.warning("No search results found")
             return
 
+        # Filter out failed downloads before adding attachments
+        successful_results = [result for result in search_results if result['content'] and result['content'].strip()]
+
+        if not successful_results:
+            logger.warning("No successful downloads to add as attachments")
+            return
+
         # Add search results as attachments
         key = f"user_{row_idx}"
         if key not in history['metadata']:
@@ -114,7 +121,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
         if "attachments" not in history['metadata'][key]:
             history['metadata'][key]["attachments"] = []
 
-        for result in search_results:
+        for result in successful_results:
             attachment = {
                 "name": result['title'],
                 "type": "text/html",
@@ -123,7 +130,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
             }
             history['metadata'][key]["attachments"].append(attachment)
 
-        logger.info(f"Added {len(search_results)} web search results as attachments")
+        logger.info(f"Added {len(successful_results)} successful web search results as attachments")
 
     except Exception as e:
         logger.error(f"Error in web search: {e}")

From 2d263f227d090d2d5c0b152e045ab5cc3eae3073 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 6 Jun 2025 22:38:20 -0700
Subject: [PATCH 33/82] Fix the chat input reappearing when the page is
 reloaded

---
 modules/ui.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/modules/ui.py b/modules/ui.py
index 14a09d2b..89bbbdb6 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -270,6 +270,9 @@ def gather_interface_values(*args):
     if not shared.args.multi_user:
         shared.persistent_interface_state = output
 
+        # Remove the chat input, as it gets cleared after this function call
+        shared.persistent_interface_state.pop('textbox')
+
     # Prevent history loss if backend is restarted but UI is not refreshed
     if output['history'] is None and output['unique_id'] is not None:
         output['history'] = load_history(output['unique_id'], output['character_menu'], output['mode'])

From db847eed4c0693c735e364fcf48e79e4f001bb77 Mon Sep 17 00:00:00 2001
From: rakha abadi susilo <rakaabadisusilo@gmail.com>
Date: Sun, 8 Jun 2025 07:44:15 +0700
Subject: [PATCH 34/82] Add RTX 50XX Nvidia blackwell support (ExLlamaV2/V3 and
 Transformers) (#7011)

---------

Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com>
---
 one_click.py                                  | 21 ++++++---
 requirements/full/requirements_cuda128.txt    | 46 +++++++++++++++++++
 .../full/requirements_cuda128_noavx2.txt      | 46 +++++++++++++++++++
 3 files changed, 106 insertions(+), 7 deletions(-)
 create mode 100644 requirements/full/requirements_cuda128.txt
 create mode 100644 requirements/full/requirements_cuda128_noavx2.txt

diff --git a/one_click.py b/one_click.py
index cccb0dc9..94f2aab0 100644
--- a/one_click.py
+++ b/one_click.py
@@ -17,8 +17,6 @@ import sys
 
 # Define the required versions
 TORCH_VERSION = "2.6.0"
-TORCHVISION_VERSION = "0.21.0"
-TORCHAUDIO_VERSION = "2.6.0"
 PYTHON_VERSION = "3.11"
 LIBSTDCXX_VERSION_LINUX = "12.1.0"
 
@@ -119,12 +117,13 @@ def get_gpu_choice():
                     'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
                     'C': 'Apple M Series',
                     'D': 'Intel Arc (beta)',
+                    'E': 'NVIDIA - CUDA 12.8',
                     'N': 'CPU mode'
                 },
             )
 
         # Convert choice to GPU name
-        gpu_choice = {"A": "NVIDIA", "B": "AMD", "C": "APPLE", "D": "INTEL", "N": "NONE"}[choice]
+        gpu_choice = {"A": "NVIDIA", "B": "AMD", "C": "APPLE", "D": "INTEL", "E": "NVIDIA_CUDA128", "N": "NONE"}[choice]
 
         # Save choice to state
         state['gpu_choice'] = gpu_choice
@@ -135,29 +134,33 @@ def get_gpu_choice():
 
 def get_pytorch_install_command(gpu_choice):
     """Get PyTorch installation command based on GPU choice"""
-    base_cmd = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
+    base_cmd = f"python -m pip install torch=={TORCH_VERSION} "
 
     if gpu_choice == "NVIDIA":
         return base_cmd + "--index-url https://download.pytorch.org/whl/cu124"
+    elif gpu_choice == "NVIDIA_CUDA128":
+        return "python -m pip install torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
     elif gpu_choice == "AMD":
         return base_cmd + "--index-url https://download.pytorch.org/whl/rocm6.2.4"
     elif gpu_choice in ["APPLE", "NONE"]:
         return base_cmd + "--index-url https://download.pytorch.org/whl/cpu"
     elif gpu_choice == "INTEL":
         if is_linux():
-            return "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+            return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
         else:
-            return "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+            return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
     else:
         return base_cmd
 
 
 def get_pytorch_update_command(gpu_choice):
     """Get PyTorch update command based on GPU choice"""
-    base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
+    base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} "
 
     if gpu_choice == "NVIDIA":
         return f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
+    elif gpu_choice == "NVIDIA_CUDA128":
+        return "python -m pip install --upgrade torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
     elif gpu_choice == "AMD":
         return f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
     elif gpu_choice in ["APPLE", "NONE"]:
@@ -181,6 +184,8 @@ def get_requirements_file(gpu_choice):
         file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
     elif gpu_choice == "NVIDIA":
         file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    elif gpu_choice == "NVIDIA_CUDA128":
+        file_name = f"requirements_cuda128{'_noavx2' if not cpu_has_avx2() else ''}.txt"
     else:
         raise ValueError(f"Unknown GPU choice: {gpu_choice}")
 
@@ -328,6 +333,8 @@ def install_webui():
     # Handle CUDA version display
     elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA":
         print("CUDA: 12.4")
+    elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA_CUDA128":
+        print("CUDA: 12.8")
 
     # No PyTorch for AMD on Windows (?)
     elif is_windows() and gpu_choice == "AMD":
diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt
new file mode 100644
index 00000000..9fc99606
--- /dev/null
+++ b/requirements/full/requirements_cuda128.txt
@@ -0,0 +1,46 @@
+accelerate==1.5.*
+beautifulsoup4==4.13.4
+bitsandbytes==0.45.*
+colorama
+datasets
+duckduckgo_search==8.0.2
+einops
+fastapi==0.112.4
+gradio==4.37.*
+jinja2==3.1.6
+markdown
+numpy==2.2.*
+pandas
+peft==0.15.*
+Pillow>=9.5.0
+psutil
+pydantic==2.8.2
+PyPDF2==3.0.1
+python-docx==1.1.2
+pyyaml
+requests
+rich
+safetensors==0.5.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.52.*
+triton-windows; platform_system == "Windows"
+tqdm
+wandb
+
+# API
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
+# CUDA wheels
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt
new file mode 100644
index 00000000..ff34673a
--- /dev/null
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@@ -0,0 +1,46 @@
+accelerate==1.5.*
+beautifulsoup4==4.13.4
+bitsandbytes==0.45.*
+colorama
+datasets
+duckduckgo_search==8.0.2
+einops
+fastapi==0.112.4
+gradio==4.37.*
+jinja2==3.1.6
+markdown
+numpy==2.2.*
+pandas
+peft==0.15.*
+Pillow>=9.5.0
+psutil
+pydantic==2.8.2
+PyPDF2==3.0.1
+python-docx==1.1.2
+pyyaml
+requests
+rich
+safetensors==0.5.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.52.*
+triton-windows; platform_system == "Windows"
+tqdm
+wandb
+
+# API
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
+# CUDA wheels
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

From 35ed55d18f0d7acfe55af10d2086185f7141cc46 Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Sat, 7 Jun 2025 22:46:52 -0300
Subject: [PATCH 35/82] UI persistence (#7050)

---
 modules/shared.py     |   2 +
 modules/ui.py         | 106 ++++++++++++++++++++++++++++++++++++++++++
 modules/ui_session.py |  17 +------
 server.py             |   3 ++
 4 files changed, 112 insertions(+), 16 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index f712f7f8..2e500779 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -35,6 +35,8 @@ settings = {
     'mode': 'instruct',
     'chat_style': 'cai-chat',
     'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+    'enable_web_search': False,
+    'web_search_pages': 3,
     'prompt-default': 'QA',
     'prompt-notebook': 'QA',
     'character': 'Assistant',
diff --git a/modules/ui.py b/modules/ui.py
index 89bbbdb6..db3adf0f 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1,4 +1,5 @@
 import copy
+import threading
 from pathlib import Path
 
 import gradio as gr
@@ -7,6 +8,16 @@ import yaml
 import extensions
 from modules import shared
 from modules.chat import load_history
+from modules.utils import gradio
+
+# Global state for auto-saving UI settings with debouncing
+_auto_save_timer = None
+_auto_save_lock = threading.Lock()
+_last_interface_state = None
+_last_preset = None
+_last_extensions = None
+_last_show_controls = None
+_last_theme_state = None
 
 with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r') as f:
     css = f.read()
@@ -334,6 +345,101 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
     return yaml.dump(output, sort_keys=False, width=float("inf"), allow_unicode=True)
 
 
+def store_current_state_and_debounce(interface_state, preset, extensions, show_controls, theme_state):
+    """Store current state and trigger debounced save"""
+    global _auto_save_timer, _last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state
+
+    if shared.args.multi_user:
+        return
+
+    # Store the current state in global variables
+    _last_interface_state = interface_state
+    _last_preset = preset
+    _last_extensions = extensions
+    _last_show_controls = show_controls
+    _last_theme_state = theme_state
+
+    # Reset the debounce timer
+    with _auto_save_lock:
+        if _auto_save_timer is not None:
+            _auto_save_timer.cancel()
+
+        _auto_save_timer = threading.Timer(2.0, _perform_debounced_save)
+        _auto_save_timer.start()
+
+
+def _perform_debounced_save():
+    """Actually perform the save using the stored state"""
+    global _auto_save_timer
+
+    try:
+        if _last_interface_state is not None:
+            contents = save_settings(_last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state)
+            settings_path = Path('user_data') / 'settings.yaml'
+            settings_path.parent.mkdir(exist_ok=True)
+            with open(settings_path, 'w', encoding='utf-8') as f:
+                f.write(contents)
+    except Exception as e:
+        print(f"Auto-save failed: {e}")
+    finally:
+        with _auto_save_lock:
+            _auto_save_timer = None
+
+
+def setup_auto_save():
+    """Attach auto-save to key UI elements"""
+    if shared.args.multi_user:
+        return
+
+    change_elements = [
+        # Chat tab (ui_chat.py)
+        'start_with',
+        'enable_web_search',
+        'web_search_pages',
+        'mode',
+        'chat_style',
+        'chat-instruct_command',
+        'character_menu',
+        'name1',
+        'user_bio',
+        'custom_system_message',
+        'chat_template_str',
+
+        # Parameters tab (ui_parameters.py)
+        'preset_menu',
+        'max_new_tokens',
+        'prompt_lookup_num_tokens',
+        'max_tokens_second',
+        'auto_max_new_tokens',
+        'ban_eos_token',
+        'add_bos_token',
+        'enable_thinking',
+        'skip_special_tokens',
+        'stream',
+        'static_cache',
+        'seed',
+        'custom_stopping_strings',
+        'custom_token_bans',
+        'negative_prompt',
+
+        # Default tab (ui_default.py)
+        'prompt_menu-default',
+
+        # Notebook tab (ui_notebook.py)
+        'prompt_menu-notebook',
+
+        # Session tab (ui_session.py)
+        'show_controls',
+        'theme_state',
+    ]
+
+    for element_name in change_elements:
+        if element_name in shared.gradio:
+            shared.gradio[element_name].change(
+                gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+                store_current_state_and_debounce, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), None, show_progress=False)
+
+
 def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class, interactive=True):
     """
     Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui
diff --git a/modules/ui_session.py b/modules/ui_session.py
index a4eba667..4ed740cd 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -10,11 +10,10 @@ def create_ui():
     with gr.Tab("Session", elem_id="session-tab"):
         with gr.Row():
             with gr.Column():
-                shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
                 with gr.Row():
                     shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡')
-                    shared.gradio['save_settings'] = gr.Button('Save UI defaults to user_data/settings.yaml', interactive=not mu)
 
+                shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
                 with gr.Row():
                     with gr.Column():
                         shared.gradio['extensions_menu'] = gr.CheckboxGroup(choices=utils.get_available_extensions(), value=shared.args.extensions, label="Available extensions", info='Note that some of these extensions may require manually installing Python requirements through the command: pip install -r extensions/extension_name/requirements.txt', elem_classes='checkboxgroup-table')
@@ -42,20 +41,6 @@ def create_ui():
             lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
             None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode()}}')
 
-        shared.gradio['save_settings'].click(
-            ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-            handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
-
-
-def handle_save_settings(state, preset, extensions, show_controls, theme):
-    contents = ui.save_settings(state, preset, extensions, show_controls, theme)
-    return [
-        contents,
-        "settings.yaml",
-        "user_data/",
-        gr.update(visible=True)
-    ]
-
 
 def set_interface_arguments(extensions, bool_active):
     shared.args.extensions = extensions
diff --git a/server.py b/server.py
index 99d2e171..3be4c27c 100644
--- a/server.py
+++ b/server.py
@@ -150,6 +150,9 @@ def create_interface():
         ui_parameters.create_event_handlers()
         ui_model_menu.create_event_handlers()
 
+        # UI persistence events
+        ui.setup_auto_save()
+
         # Interface launch events
         shared.gradio['interface'].load(
             None,

From 6436bf1920b128b40ed2abb35da947af6844cca3 Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Sun, 8 Jun 2025 01:58:02 -0300
Subject: [PATCH 36/82] More UI persistence: presets and characters (#7051)

---
 modules/chat.py           | 61 ++++++++++++++++++++++++++
 modules/html_generator.py | 16 ++++---
 modules/presets.py        | 72 +++++--------------------------
 modules/shared.py         | 61 +++++++++++++++++++++++---
 modules/ui.py             | 48 +++++++++++++++++++--
 modules/ui_chat.py        | 20 ++++++---
 modules/ui_default.py     |  4 +-
 modules/ui_notebook.py    |  4 +-
 modules/ui_parameters.py  | 90 ++++++++++++++++++++-------------------
 server.py                 | 20 +++++----
 10 files changed, 260 insertions(+), 136 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index 14f2a4f7..f740db55 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -1220,6 +1220,45 @@ def load_character(character, name1, name2):
     return name1, name2, picture, greeting, context
 
 
+def reset_character_for_ui(state):
+    """Reset character fields to the currently loaded character's saved values"""
+    if state['character_menu'] and state['character_menu'] != 'None':
+        try:
+            name1, name2, picture, greeting, context = load_character(state['character_menu'], state['name1'], state['name2'])
+
+            state['name2'] = name2
+            state['greeting'] = greeting
+            state['context'] = context
+            state['character_picture'] = picture  # This triggers cache update via generate_pfp_cache
+
+            return state, name2, context, greeting, picture
+
+        except Exception as e:
+            logger.error(f"Failed to reset character '{state['character_menu']}': {e}")
+            return clear_character_for_ui(state)
+    else:
+        return clear_character_for_ui(state)
+
+
+def clear_character_for_ui(state):
+    """Clear all character fields and picture cache"""
+    state['name2'] = shared.settings['name2']
+    state['context'] = shared.settings['context']
+    state['greeting'] = shared.settings['greeting']
+    state['character_picture'] = None
+
+    # Clear the cache files
+    cache_folder = Path(shared.args.disk_cache_dir)
+    for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
+        cache_path = Path(f'{cache_folder}/{cache_file}')
+        if cache_path.exists():
+            cache_path.unlink()
+
+    logger.info("Cleared character fields and picture cache")
+
+    return state, state['name2'], state['context'], state['greeting'], None
+
+
 def load_instruction_template(template):
     if template == 'None':
         return ''
@@ -1710,6 +1749,28 @@ def handle_character_menu_change(state):
     ]
 
 
+def handle_character_picture_change(picture):
+    """Update or clear cache when character picture changes"""
+    cache_folder = Path(shared.args.disk_cache_dir)
+    if not cache_folder.exists():
+        cache_folder.mkdir()
+
+    if picture is not None:
+        # Save to cache
+        picture.save(Path(f'{cache_folder}/pfp_character.png'), format='PNG')
+        thumb = make_thumbnail(picture)
+        thumb.save(Path(f'{cache_folder}/pfp_character_thumb.png'), format='PNG')
+        logger.info("Updated character picture cache")
+    else:
+        # Remove cache files when picture is cleared
+        for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
+            cache_path = Path(f'{cache_folder}/{cache_file}')
+            if cache_path.exists():
+                cache_path.unlink()
+
+        logger.info("Cleared character picture cache")
+
+
 def handle_mode_change(state):
     history = load_latest_history(state)
     histories = find_all_histories_with_first_prompts(state)
diff --git a/modules/html_generator.py b/modules/html_generator.py
index f90e3b04..eac7d91a 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -520,17 +520,23 @@ def generate_instruct_html(history, last_message_only=False):
     return output
 
 
+def get_character_image_with_cache_buster():
+    """Get character image URL with cache busting based on file modification time"""
+    cache_path = Path("user_data/cache/pfp_character_thumb.png")
+    if cache_path.exists():
+        mtime = int(cache_path.stat().st_mtime)
+        return f'<img src="file/user_data/cache/pfp_character_thumb.png?{mtime}" class="pfp_character">'
+
+    return ''
+
+
 def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False, last_message_only=False):
     if not last_message_only:
         output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
     else:
         output = ""
 
-    # We use ?character and ?time.time() to force the browser to reset caches
-    img_bot = (
-        f'<img src="file/user_data/cache/pfp_character_thumb.png?{character}" class="pfp_character">'
-        if Path("user_data/cache/pfp_character_thumb.png").exists() else ''
-    )
+    img_bot = get_character_image_with_cache_buster()
 
     def create_message(role, content, raw_content):
         """Inner function for CAI-style messages."""
diff --git a/modules/presets.py b/modules/presets.py
index cf706605..3eb1f5fc 100644
--- a/modules/presets.py
+++ b/modules/presets.py
@@ -1,6 +1,5 @@
 import functools
 import pprint
-import random
 from pathlib import Path
 
 import yaml
@@ -93,68 +92,17 @@ def load_preset_for_ui(name, state):
     return state, *[generate_params[k] for k in presets_params()]
 
 
-def random_preset(state):
-    params_and_values = {
-        'remove_tail_tokens': {
-            'top_p': [0.5, 0.8, 0.9, 0.95, 0.99],
-            'min_p': [0.5, 0.2, 0.1, 0.05, 0.01],
-            'top_k': [3, 5, 10, 20, 30, 40],
-            'typical_p': [0.2, 0.575, 0.95],
-            'tfs': [0.5, 0.8, 0.9, 0.95, 0.99],
-            'top_a': [0.5, 0.2, 0.1, 0.05, 0.01],
-            'epsilon_cutoff': [1, 3, 5, 7, 9],
-            'eta_cutoff': [3, 6, 9, 12, 15, 18],
-        },
-        'flatten_distribution': {
-            'temperature': [0.1, 0.5, 0.7, 0.8, 1, 1.2, 1.5, 2.0, 5.0],
-            'dynamic_temperature': [
-                [0.1, 1],
-                [0.1, 1.5],
-                [0.1, 2],
-                [0.1, 5],
-                [0.5, 1],
-                [0.5, 1.5],
-                [0.5, 2],
-                [0.5, 5],
-                [0.8, 1],
-                [0.8, 1.5],
-                [0.8, 2],
-                [0.8, 5],
-                [1, 1.5],
-                [1, 2],
-                [1, 5]
-            ],
-            'smoothing_factor': [0.2, 0.3, 0.6, 1.2],
-        },
-        'repetition': {
-            'repetition_penalty': [1, 1.05, 1.1, 1.15, 1.20, 1.25],
-            'presence_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
-            'frequency_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
-        },
-        'other': {
-            'temperature_last': [True, False],
-        }
-    }
-
-    generate_params = default_preset()
-    for cat in params_and_values:
-        choices = list(params_and_values[cat].keys())
-        if shared.args.loader is not None:
-            choices = [x for x in choices if loader_contains(x)]
-
-        if len(choices) > 0:
-            choice = random.choice(choices)
-            value = random.choice(params_and_values[cat][choice])
-            if choice == 'dynamic_temperature':
-                generate_params['dynamic_temperature'] = True
-                generate_params['dynatemp_low'] = value[0]
-                generate_params['dynatemp_high'] = value[1]
-            else:
-                generate_params[choice] = value
-
+def reset_preset_for_ui(name, state):
+    """Reset current preset to its saved values from file"""
+    generate_params = load_preset(name, verbose=True)
+    state.update(generate_params)
+    return state, *[generate_params[k] for k in presets_params()]
+
+
+def neutralize_samplers_for_ui(state):
+    """Set all samplers to their default/neutral values"""
+    generate_params = default_preset()
     state.update(generate_params)
-    logger.info("GENERATED_PRESET=")
-    pprint.PrettyPrinter(indent=4, width=1, sort_dicts=False).pprint(remove_defaults(state))
     return state, *[generate_params[k] for k in presets_params()]
 
 
diff --git a/modules/shared.py b/modules/shared.py
index 2e500779..f4f8e180 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -39,10 +39,6 @@ settings = {
     'web_search_pages': 3,
     'prompt-default': 'QA',
     'prompt-notebook': 'QA',
-    'character': 'Assistant',
-    'name1': 'You',
-    'user_bio': '',
-    'custom_system_message': '',
     'preset': 'min_p',
     'max_new_tokens': 512,
     'max_new_tokens_min': 1,
@@ -63,8 +59,64 @@ settings = {
     'negative_prompt': '',
     'dark_theme': True,
     'default_extensions': [],
+
+    # Character settings
+    'character': 'Assistant',
+    'name1': 'You',
+    'name2': 'AI',
+    'user_bio': '',
+    'context': 'The following is a conversation with an AI Large Language Model. The AI has been trained to answer questions, provide recommendations, and help with decision making. The AI follows user requests. The AI thinks outside the box.',
+    'greeting': 'How can I help you today?',
+    'custom_system_message': '',
     'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '' + message['content'] + '\\n\\n' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n        {%- else -%}\n            {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'### Response:\\n'-}}\n{%- endif -%}",
     'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
+
+    # Generation parameters - Curve shape
+    'temperature': 1.0,
+    'dynatemp_low': 1.0,
+    'dynatemp_high': 1.0,
+    'dynatemp_exponent': 1.0,
+    'smoothing_factor': 0.0,
+    'smoothing_curve': 1.0,
+
+    # Generation parameters - Curve cutoff
+    'min_p': 0.0,
+    'top_p': 1.0,
+    'top_k': 0,
+    'typical_p': 1.0,
+    'xtc_threshold': 0.1,
+    'xtc_probability': 0.0,
+    'epsilon_cutoff': 0.0,
+    'eta_cutoff': 0.0,
+    'tfs': 1.0,
+    'top_a': 0.0,
+    'top_n_sigma': 0.0,
+
+    # Generation parameters - Repetition suppression
+    'dry_multiplier': 0.0,
+    'dry_allowed_length': 2,
+    'dry_base': 1.75,
+    'repetition_penalty': 1.0,
+    'frequency_penalty': 0.0,
+    'presence_penalty': 0.0,
+    'encoder_repetition_penalty': 1.0,
+    'no_repeat_ngram_size': 0,
+    'repetition_penalty_range': 1024,
+
+    # Generation parameters - Alternative sampling methods
+    'penalty_alpha': 0.0,
+    'guidance_scale': 1.0,
+    'mirostat_mode': 0,
+    'mirostat_tau': 5.0,
+    'mirostat_eta': 0.1,
+
+    # Generation parameters - Other options
+    'do_sample': True,
+    'dynamic_temperature': False,
+    'temperature_last': False,
+    'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntop_n_sigma\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram',
+    'dry_sequence_breakers': '"\\n", ":", "\\"", "*"',
+    'grammar_string': '',
 }
 
 default_settings = copy.deepcopy(settings)
@@ -75,7 +127,6 @@ parser = argparse.ArgumentParser(description="Text generation web UI", conflict_
 # Basic settings
 group = parser.add_argument_group('Basic settings')
 group.add_argument('--multi-user', action='store_true', help='Multi-user mode. Chat histories are not saved or automatically loaded. Warning: this is likely not safe for sharing publicly.')
-group.add_argument('--character', type=str, help='The name of the character to load in chat mode by default.')
 group.add_argument('--model', type=str, help='Name of the model to load by default.')
 group.add_argument('--lora', type=str, nargs='+', help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.')
 group.add_argument('--model-dir', type=str, default='user_data/models', help='Path to directory with all the models.')
diff --git a/modules/ui.py b/modules/ui.py
index db3adf0f..8ec4b165 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -310,7 +310,7 @@ def apply_interface_values(state, use_persistent=False):
 
 def save_settings(state, preset, extensions_list, show_controls, theme_state):
     output = copy.deepcopy(shared.settings)
-    exclude = ['name2', 'greeting', 'context', 'truncation_length', 'instruction_template_str']
+    exclude = []
     for k in state:
         if k in shared.settings and k not in exclude:
             output[k] = state[k]
@@ -323,6 +323,7 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
     output['seed'] = int(output['seed'])
     output['show_controls'] = show_controls
     output['dark_theme'] = True if theme_state == 'dark' else False
+    output.pop('instruction_template_str')
 
     # Save extension values in the UI
     for extension_name in extensions_list:
@@ -364,7 +365,7 @@ def store_current_state_and_debounce(interface_state, preset, extensions, show_c
         if _auto_save_timer is not None:
             _auto_save_timer.cancel()
 
-        _auto_save_timer = threading.Timer(2.0, _perform_debounced_save)
+        _auto_save_timer = threading.Timer(1.0, _perform_debounced_save)
         _auto_save_timer.start()
 
 
@@ -401,15 +402,52 @@ def setup_auto_save():
         'chat-instruct_command',
         'character_menu',
         'name1',
+        'name2',
+        'context',
+        'greeting',
         'user_bio',
         'custom_system_message',
         'chat_template_str',
 
-        # Parameters tab (ui_parameters.py)
+        # Parameters tab (ui_parameters.py) - Generation parameters
         'preset_menu',
+        'temperature',
+        'dynatemp_low',
+        'dynatemp_high',
+        'dynatemp_exponent',
+        'smoothing_factor',
+        'smoothing_curve',
+        'min_p',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
+        'epsilon_cutoff',
+        'eta_cutoff',
+        'tfs',
+        'top_a',
+        'top_n_sigma',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
+        'repetition_penalty',
+        'frequency_penalty',
+        'presence_penalty',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'repetition_penalty_range',
+        'penalty_alpha',
+        'guidance_scale',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
         'max_new_tokens',
         'prompt_lookup_num_tokens',
         'max_tokens_second',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
         'auto_max_new_tokens',
         'ban_eos_token',
         'add_bos_token',
@@ -417,10 +455,14 @@ def setup_auto_save():
         'skip_special_tokens',
         'stream',
         'static_cache',
+        'truncation_length',
         'seed',
+        'sampler_priority',
         'custom_stopping_strings',
         'custom_token_bans',
         'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
 
         # Default tab (ui_default.py)
         'prompt_menu-default',
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 0d5a2c18..712843d3 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -86,7 +86,7 @@ def create_ui():
                     shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)
 
                 with gr.Row():
-                    shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
+                    shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
 
                 with gr.Row():
                     shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
@@ -118,14 +118,16 @@ def create_chat_settings_ui():
             with gr.Column(scale=8):
                 with gr.Tab("Character"):
                     with gr.Row():
-                        shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
+                        shared.gradio['character_menu'] = gr.Dropdown(value=shared.settings['character'], choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
                         ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
                         shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
                         shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
+                        shared.gradio['reset_character'] = gr.Button('↺', elem_classes='refresh-button', interactive=True)
+                        shared.gradio['clear_character'] = gr.Button('⚪', elem_classes='refresh-button', interactive=True)
 
-                    shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
-                    shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
-                    shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
+                    shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name')
+                    shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar'])
+                    shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label='Greeting', elem_classes=['add_scrollbar'])
 
                 with gr.Tab("User"):
                     shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
@@ -178,7 +180,7 @@ def create_chat_settings_ui():
         with gr.Row():
             with gr.Column():
                 shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])
-                shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
+                shared.gradio['instruction_template_str'] = gr.Textbox(value=shared.settings['instruction_template_str'], label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
                 with gr.Row():
                     shared.gradio['send_instruction_to_default'] = gr.Button('Send to default', elem_classes=['small-button'])
                     shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])
@@ -294,6 +296,8 @@ def create_event_handlers():
         chat.handle_character_menu_change, gradio('interface_state'), gradio('history', 'display', 'name1', 'name2', 'character_picture', 'greeting', 'context', 'unique_id'), show_progress=False).then(
         None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
 
+    shared.gradio['character_picture'].change(chat.handle_character_picture_change, gradio('character_picture'), None, show_progress=False)
+
     shared.gradio['mode'].change(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.handle_mode_change, gradio('interface_state'), gradio('history', 'display', 'chat_style', 'chat-instruct_command', 'unique_id'), show_progress=False).then(
@@ -317,6 +321,10 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.handle_save_template_click, gradio('instruction_template_str'), gradio('save_filename', 'save_root', 'save_contents', 'file_saver'), show_progress=False)
 
+    shared.gradio['reset_character'].click(
+        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+        chat.reset_character_for_ui, gradio('interface_state'), gradio('interface_state', 'name2', 'context', 'greeting', 'character_picture'), show_progress=False)
+
     shared.gradio['delete_template'].click(chat.handle_delete_template_click, gradio('instruction_template'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
     shared.gradio['save_chat_history'].click(
         lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then(
diff --git a/modules/ui_default.py b/modules/ui_default.py
index c2946b37..8acc4b10 100644
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@@ -19,7 +19,7 @@ def create_ui():
         with gr.Row():
             with gr.Column():
                 with gr.Row():
-                    shared.gradio['textbox-default'] = gr.Textbox(value='', lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
+                    shared.gradio['textbox-default'] = gr.Textbox(value=load_prompt(shared.settings['prompt-default']), lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
                     shared.gradio['token-counter-default'] = gr.HTML(value="<span>0</span>", elem_id="default-token-counter")
 
                 with gr.Row():
@@ -28,7 +28,7 @@ def create_ui():
                     shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary')
 
                 with gr.Row():
-                    shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
+                    shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-default'], label='Prompt', elem_classes='slim-dropdown')
                     ui.create_refresh_button(shared.gradio['prompt_menu-default'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, 'refresh-button', interactive=not mu)
                     shared.gradio['save_prompt-default'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
                     shared.gradio['delete_prompt-default'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py
index b234ac57..3f79a93c 100644
--- a/modules/ui_notebook.py
+++ b/modules/ui_notebook.py
@@ -22,7 +22,7 @@ def create_ui():
             with gr.Column(scale=4):
                 with gr.Tab('Raw'):
                     with gr.Row():
-                        shared.gradio['textbox-notebook'] = gr.Textbox(value='', lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
+                        shared.gradio['textbox-notebook'] = gr.Textbox(value=load_prompt(shared.settings['prompt-notebook']), lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
                         shared.gradio['token-counter-notebook'] = gr.HTML(value="<span>0</span>", elem_id="notebook-token-counter")
 
                 with gr.Tab('Markdown'):
@@ -56,7 +56,7 @@ def create_ui():
             with gr.Column(scale=1):
                 gr.HTML('<div style="padding-bottom: 13px"></div>')
                 with gr.Row():
-                    shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
+                    shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-notebook'], label='Prompt', elem_classes='slim-dropdown')
                     ui.create_refresh_button(shared.gradio['prompt_menu-notebook'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, ['refresh-button', 'refresh-button-small'], interactive=not mu)
                     shared.gradio['save_prompt-notebook'] = gr.Button('💾', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)
                     shared.gradio['delete_prompt-notebook'] = gr.Button('🗑️', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 84f9fbfc..ff965d39 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -6,19 +6,19 @@ from modules import loaders, presets, shared, ui, ui_chat, utils
 from modules.utils import gradio
 
 
-def create_ui(default_preset):
+def create_ui():
     mu = shared.args.multi_user
-    generate_params = presets.load_preset(default_preset)
     with gr.Tab("Parameters", elem_id="parameters"):
         with gr.Tab("Generation"):
             with gr.Row():
                 with gr.Column():
                     with gr.Row():
-                        shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=default_preset, label='Preset', elem_classes='slim-dropdown')
+                        shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=shared.settings['preset'], label='Preset', elem_classes='slim-dropdown')
                         ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
                         shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
                         shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
-                        shared.gradio['random_preset'] = gr.Button('🎲', elem_classes='refresh-button')
+                        shared.gradio['reset_preset'] = gr.Button('Reload preset', elem_classes='refresh-button', interactive=True)
+                        shared.gradio['neutralize_samplers'] = gr.Button('Neutralize samplers', elem_classes='refresh-button', interactive=True)
 
                 with gr.Column():
                     shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()) if not shared.args.portable else ['llama.cpp'], value="All", elem_classes='slim-dropdown')
@@ -28,44 +28,44 @@ def create_ui(default_preset):
                     with gr.Row():
                         with gr.Column():
                             gr.Markdown('## Curve shape')
-                            shared.gradio['temperature'] = gr.Slider(0.01, 5, value=generate_params['temperature'], step=0.01, label='temperature')
-                            shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature'])
-                            shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature'])
-                            shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature'])
-                            shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
-                            shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=generate_params['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
+                            shared.gradio['temperature'] = gr.Slider(0.01, 5, value=shared.settings['temperature'], step=0.01, label='temperature')
+                            shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_low'], step=0.01, label='dynatemp_low', visible=shared.settings['dynamic_temperature'])
+                            shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_high'], step=0.01, label='dynatemp_high', visible=shared.settings['dynamic_temperature'])
+                            shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=shared.settings['dynamic_temperature'])
+                            shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=shared.settings['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
+                            shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=shared.settings['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
 
                             gr.Markdown('## Curve cutoff')
-                            shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p')
-                            shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=generate_params['top_n_sigma'], step=0.01, label='top_n_sigma')
-                            shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p')
-                            shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k')
-                            shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p')
-                            shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=generate_params['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
-                            shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=generate_params['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
-                            shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
-                            shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff')
-                            shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs')
-                            shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a')
+                            shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=shared.settings['min_p'], step=0.01, label='min_p')
+                            shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=shared.settings['top_n_sigma'], step=0.01, label='top_n_sigma')
+                            shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=shared.settings['top_p'], step=0.01, label='top_p')
+                            shared.gradio['top_k'] = gr.Slider(0, 200, value=shared.settings['top_k'], step=1, label='top_k')
+                            shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=shared.settings['typical_p'], step=0.01, label='typical_p')
+                            shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=shared.settings['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
+                            shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=shared.settings['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
+                            shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=shared.settings['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
+                            shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=shared.settings['eta_cutoff'], step=0.01, label='eta_cutoff')
+                            shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=shared.settings['tfs'], step=0.01, label='tfs')
+                            shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=shared.settings['top_a'], step=0.01, label='top_a')
 
                             gr.Markdown('## Repetition suppression')
-                            shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
-                            shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=generate_params['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
-                            shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
-                            shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty')
-                            shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=generate_params['frequency_penalty'], step=0.05, label='frequency_penalty')
-                            shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty')
-                            shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
-                            shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size')
-                            shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range')
+                            shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=shared.settings['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
+                            shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=shared.settings['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
+                            shared.gradio['dry_base'] = gr.Slider(1, 4, value=shared.settings['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
+                            shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=shared.settings['repetition_penalty'], step=0.01, label='repetition_penalty')
+                            shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=shared.settings['frequency_penalty'], step=0.05, label='frequency_penalty')
+                            shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=shared.settings['presence_penalty'], step=0.05, label='presence_penalty')
+                            shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=shared.settings['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
+                            shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=shared.settings['no_repeat_ngram_size'], label='no_repeat_ngram_size')
+                            shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=shared.settings['repetition_penalty_range'], label='repetition_penalty_range')
 
                         with gr.Column():
                             gr.Markdown('## Alternative sampling methods')
-                            shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
-                            shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
-                            shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
-                            shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau')
-                            shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta')
+                            shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=shared.settings['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
+                            shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=shared.settings['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
+                            shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=shared.settings['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
+                            shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=shared.settings['mirostat_tau'], label='mirostat_tau')
+                            shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=shared.settings['mirostat_eta'], label='mirostat_eta')
 
                             gr.Markdown('## Other options')
                             shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
@@ -74,9 +74,9 @@ def create_ui(default_preset):
                 with gr.Column():
                     with gr.Row():
                         with gr.Column():
-                            shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
-                            shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature')
-                            shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
+                            shared.gradio['do_sample'] = gr.Checkbox(value=shared.settings['do_sample'], label='do_sample')
+                            shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label='dynamic_temperature')
+                            shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
                             shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
                             shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
                             shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
@@ -89,18 +89,18 @@ def create_ui(default_preset):
                             shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length.')
                             shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
 
-                            shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
+                            shared.gradio['sampler_priority'] = gr.Textbox(value=shared.settings['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
                             shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
                             shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
                             shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])
-                            shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
+                            shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
                             with gr.Row() as shared.gradio['grammar_file_row']:
                                 shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')
                                 ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)
                                 shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
                                 shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
 
-                            shared.gradio['grammar_string'] = gr.Textbox(value='', label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
+                            shared.gradio['grammar_string'] = gr.Textbox(value=shared.settings['grammar_string'], label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
 
         ui_chat.create_chat_settings_ui()
 
@@ -111,9 +111,13 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
 
-    shared.gradio['random_preset'].click(
+    shared.gradio['reset_preset'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
+        presets.reset_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
+
+    shared.gradio['neutralize_samplers'].click(
+        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+        presets.neutralize_samplers_for_ui, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
 
     shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'), show_progress=False)
     shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'), show_progress=False)
diff --git a/server.py b/server.py
index 3be4c27c..3dd0a3f3 100644
--- a/server.py
+++ b/server.py
@@ -45,6 +45,7 @@ from modules import (
     ui_session,
     utils
 )
+from modules.chat import generate_pfp_cache
 from modules.extensions import apply_extensions
 from modules.LoRA import add_lora_to_model
 from modules.models import load_model, unload_model_if_idle
@@ -93,17 +94,20 @@ def create_interface():
 
     # Force some events to be triggered on page load
     shared.persistent_interface_state.update({
+        'mode': shared.settings['mode'],
         'loader': shared.args.loader or 'llama.cpp',
-        'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(),
-        'character_menu': shared.args.character or shared.settings['character'],
-        'instruction_template_str': shared.settings['instruction_template_str'],
-        'prompt_menu-default': shared.settings['prompt-default'],
-        'prompt_menu-notebook': shared.settings['prompt-notebook'],
         'filter_by_loader': (shared.args.loader or 'All') if not shared.args.portable else 'llama.cpp'
     })
 
-    if Path("user_data/cache/pfp_character.png").exists():
-        Path("user_data/cache/pfp_character.png").unlink()
+    # Clear existing cache files
+    for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
+        cache_path = Path(f"user_data/cache/{cache_file}")
+        if cache_path.exists():
+            cache_path.unlink()
+
+    # Regenerate for default character
+    if shared.settings['mode'] != 'instruct':
+        generate_pfp_cache(shared.settings['character'])
 
     # css/js strings
     css = ui.css
@@ -134,7 +138,7 @@ def create_interface():
         ui_default.create_ui()
         ui_notebook.create_ui()
 
-        ui_parameters.create_ui(shared.settings['preset'])  # Parameters tab
+        ui_parameters.create_ui()  # Parameters tab
         ui_model_menu.create_ui()  # Model tab
         if not shared.args.portable:
             training.create_ui()  # Training tab

From 3650a6fd1fc332f462bc1aa08f7671a3d517d847 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 7 Jun 2025 22:02:34 -0700
Subject: [PATCH 37/82] Small UI changes

---
 css/main.css             | 2 +-
 modules/ui_chat.py       | 3 +--
 modules/ui_parameters.py | 2 +-
 3 files changed, 3 insertions(+), 4 deletions(-)

diff --git a/css/main.css b/css/main.css
index a9cb36ab..4277519a 100644
--- a/css/main.css
+++ b/css/main.css
@@ -53,7 +53,7 @@ div.svelte-iyf88w {
 }
 
 .refresh-button {
-    max-width: 4.4em;
+    max-width: none;
     min-width: 2.2em !important;
     height: 39.594px;
     align-self: end;
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 712843d3..f8e2fc32 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -122,8 +122,7 @@ def create_chat_settings_ui():
                         ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
                         shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
                         shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
-                        shared.gradio['reset_character'] = gr.Button('↺', elem_classes='refresh-button', interactive=True)
-                        shared.gradio['clear_character'] = gr.Button('⚪', elem_classes='refresh-button', interactive=True)
+                        shared.gradio['reset_character'] = gr.Button('Restore character', elem_classes='refresh-button', interactive=True)
 
                     shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name')
                     shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar'])
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index ff965d39..9b5cb3ab 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -17,7 +17,7 @@ def create_ui():
                         ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
                         shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
                         shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
-                        shared.gradio['reset_preset'] = gr.Button('Reload preset', elem_classes='refresh-button', interactive=True)
+                        shared.gradio['reset_preset'] = gr.Button('Restore preset', elem_classes='refresh-button', interactive=True)
                         shared.gradio['neutralize_samplers'] = gr.Button('Neutralize samplers', elem_classes='refresh-button', interactive=True)
 
                 with gr.Column():

From caf9fca5f339e4018913608f8accb3897352853f Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 7 Jun 2025 22:11:35 -0700
Subject: [PATCH 38/82] Avoid some code repetition

---
 modules/shared.py | 190 +++++++++++++++++++++++-----------------------
 1 file changed, 97 insertions(+), 93 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index f4f8e180..08200399 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -9,6 +9,7 @@ from pathlib import Path
 import yaml
 
 from modules.logging_colors import logger
+from modules.presets import default_preset
 
 # Model variables
 model = None
@@ -28,99 +29,6 @@ gradio = {}
 persistent_interface_state = {}
 need_restart = False
 
-# UI defaults
-settings = {
-    'show_controls': True,
-    'start_with': '',
-    'mode': 'instruct',
-    'chat_style': 'cai-chat',
-    'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
-    'enable_web_search': False,
-    'web_search_pages': 3,
-    'prompt-default': 'QA',
-    'prompt-notebook': 'QA',
-    'preset': 'min_p',
-    'max_new_tokens': 512,
-    'max_new_tokens_min': 1,
-    'max_new_tokens_max': 4096,
-    'prompt_lookup_num_tokens': 0,
-    'max_tokens_second': 0,
-    'auto_max_new_tokens': True,
-    'ban_eos_token': False,
-    'add_bos_token': True,
-    'enable_thinking': True,
-    'skip_special_tokens': True,
-    'stream': True,
-    'static_cache': False,
-    'truncation_length': 8192,
-    'seed': -1,
-    'custom_stopping_strings': '',
-    'custom_token_bans': '',
-    'negative_prompt': '',
-    'dark_theme': True,
-    'default_extensions': [],
-
-    # Character settings
-    'character': 'Assistant',
-    'name1': 'You',
-    'name2': 'AI',
-    'user_bio': '',
-    'context': 'The following is a conversation with an AI Large Language Model. The AI has been trained to answer questions, provide recommendations, and help with decision making. The AI follows user requests. The AI thinks outside the box.',
-    'greeting': 'How can I help you today?',
-    'custom_system_message': '',
-    'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '' + message['content'] + '\\n\\n' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n        {%- else -%}\n            {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'### Response:\\n'-}}\n{%- endif -%}",
-    'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
-
-    # Generation parameters - Curve shape
-    'temperature': 1.0,
-    'dynatemp_low': 1.0,
-    'dynatemp_high': 1.0,
-    'dynatemp_exponent': 1.0,
-    'smoothing_factor': 0.0,
-    'smoothing_curve': 1.0,
-
-    # Generation parameters - Curve cutoff
-    'min_p': 0.0,
-    'top_p': 1.0,
-    'top_k': 0,
-    'typical_p': 1.0,
-    'xtc_threshold': 0.1,
-    'xtc_probability': 0.0,
-    'epsilon_cutoff': 0.0,
-    'eta_cutoff': 0.0,
-    'tfs': 1.0,
-    'top_a': 0.0,
-    'top_n_sigma': 0.0,
-
-    # Generation parameters - Repetition suppression
-    'dry_multiplier': 0.0,
-    'dry_allowed_length': 2,
-    'dry_base': 1.75,
-    'repetition_penalty': 1.0,
-    'frequency_penalty': 0.0,
-    'presence_penalty': 0.0,
-    'encoder_repetition_penalty': 1.0,
-    'no_repeat_ngram_size': 0,
-    'repetition_penalty_range': 1024,
-
-    # Generation parameters - Alternative sampling methods
-    'penalty_alpha': 0.0,
-    'guidance_scale': 1.0,
-    'mirostat_mode': 0,
-    'mirostat_tau': 5.0,
-    'mirostat_eta': 0.1,
-
-    # Generation parameters - Other options
-    'do_sample': True,
-    'dynamic_temperature': False,
-    'temperature_last': False,
-    'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntop_n_sigma\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram',
-    'dry_sequence_breakers': '"\\n", ":", "\\"", "*"',
-    'grammar_string': '',
-}
-
-default_settings = copy.deepcopy(settings)
-
 # Parser copied from https://github.com/vladmandic/automatic
 parser = argparse.ArgumentParser(description="Text generation web UI", conflict_handler='resolve', add_help=True, formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=55, indent_increment=2, width=200))
 
@@ -282,6 +190,102 @@ for arg in sys.argv[1:]:
     elif hasattr(args, arg):
         provided_arguments.append(arg)
 
+# Default generation parameters
+neutral_samplers = default_preset()
+
+# UI defaults
+settings = {
+    'show_controls': True,
+    'start_with': '',
+    'mode': 'instruct',
+    'chat_style': 'cai-chat',
+    'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+    'enable_web_search': False,
+    'web_search_pages': 3,
+    'prompt-default': 'QA',
+    'prompt-notebook': 'QA',
+    'preset': 'min_p',
+    'max_new_tokens': 512,
+    'max_new_tokens_min': 1,
+    'max_new_tokens_max': 4096,
+    'prompt_lookup_num_tokens': 0,
+    'max_tokens_second': 0,
+    'auto_max_new_tokens': True,
+    'ban_eos_token': False,
+    'add_bos_token': True,
+    'enable_thinking': True,
+    'skip_special_tokens': True,
+    'stream': True,
+    'static_cache': False,
+    'truncation_length': 8192,
+    'seed': -1,
+    'custom_stopping_strings': '',
+    'custom_token_bans': '',
+    'negative_prompt': '',
+    'dark_theme': True,
+    'default_extensions': [],
+
+    # Character settings
+    'character': 'Assistant',
+    'name1': 'You',
+    'name2': 'AI',
+    'user_bio': '',
+    'context': 'The following is a conversation with an AI Large Language Model. The AI has been trained to answer questions, provide recommendations, and help with decision making. The AI follows user requests. The AI thinks outside the box.',
+    'greeting': 'How can I help you today?',
+    'custom_system_message': '',
+    'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '' + message['content'] + '\\n\\n' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n        {%- else -%}\n            {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'### Response:\\n'-}}\n{%- endif -%}",
+    'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
+
+    # Generation parameters - Curve shape
+    'temperature': neutral_samplers['temperature'],
+    'dynatemp_low': neutral_samplers['dynatemp_low'],
+    'dynatemp_high': neutral_samplers['dynatemp_high'],
+    'dynatemp_exponent': neutral_samplers['dynatemp_exponent'],
+    'smoothing_factor': neutral_samplers['smoothing_factor'],
+    'smoothing_curve': neutral_samplers['smoothing_curve'],
+
+    # Generation parameters - Curve cutoff
+    'min_p': neutral_samplers['min_p'],
+    'top_p': neutral_samplers['top_p'],
+    'top_k': neutral_samplers['top_k'],
+    'typical_p': neutral_samplers['typical_p'],
+    'xtc_threshold': neutral_samplers['xtc_threshold'],
+    'xtc_probability': neutral_samplers['xtc_probability'],
+    'epsilon_cutoff': neutral_samplers['epsilon_cutoff'],
+    'eta_cutoff': neutral_samplers['eta_cutoff'],
+    'tfs': neutral_samplers['tfs'],
+    'top_a': neutral_samplers['top_a'],
+    'top_n_sigma': neutral_samplers['top_n_sigma'],
+
+    # Generation parameters - Repetition suppression
+    'dry_multiplier': neutral_samplers['dry_multiplier'],
+    'dry_allowed_length': neutral_samplers['dry_allowed_length'],
+    'dry_base': neutral_samplers['dry_base'],
+    'repetition_penalty': neutral_samplers['repetition_penalty'],
+    'frequency_penalty': neutral_samplers['frequency_penalty'],
+    'presence_penalty': neutral_samplers['presence_penalty'],
+    'encoder_repetition_penalty': neutral_samplers['encoder_repetition_penalty'],
+    'no_repeat_ngram_size': neutral_samplers['no_repeat_ngram_size'],
+    'repetition_penalty_range': neutral_samplers['repetition_penalty_range'],
+
+    # Generation parameters - Alternative sampling methods
+    'penalty_alpha': neutral_samplers['penalty_alpha'],
+    'guidance_scale': neutral_samplers['guidance_scale'],
+    'mirostat_mode': neutral_samplers['mirostat_mode'],
+    'mirostat_tau': neutral_samplers['mirostat_tau'],
+    'mirostat_eta': neutral_samplers['mirostat_eta'],
+
+    # Generation parameters - Other options
+    'do_sample': neutral_samplers['do_sample'],
+    'dynamic_temperature': neutral_samplers['dynamic_temperature'],
+    'temperature_last': neutral_samplers['temperature_last'],
+    'sampler_priority': neutral_samplers['sampler_priority'],
+    'dry_sequence_breakers': neutral_samplers['dry_sequence_breakers'],
+    'grammar_string': '',
+}
+
+default_settings = copy.deepcopy(settings)
+
 
 def do_cmd_flags_warnings():
     # Security warnings

From fe955cac1fd7298d820e42790e257d9ace1c5eb4 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 7 Jun 2025 22:15:19 -0700
Subject: [PATCH 39/82] Small UI changes

---
 css/main.css       | 4 ++++
 modules/ui_chat.py | 8 ++++----
 2 files changed, 8 insertions(+), 4 deletions(-)

diff --git a/css/main.css b/css/main.css
index 4277519a..7d7b0cbd 100644
--- a/css/main.css
+++ b/css/main.css
@@ -62,6 +62,10 @@ div.svelte-iyf88w {
     flex: none;
 }
 
+.refresh-button-medium {
+    max-width: 4.4em;
+}
+
 .refresh-button-small {
     max-width: 2.2em;
 }
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index f8e2fc32..ef9330e4 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -24,11 +24,11 @@ def create_ui():
         with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
             with gr.Column():
                 with gr.Row(elem_id='past-chats-buttons'):
-                    shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes='refresh-button', elem_id='Branch', interactive=not mu)
-                    shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id="Branch-index", interactive=True)
-                    shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)
+                    shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes=['refresh-button', 'refresh-button-medium'], elem_id='Branch', interactive=not mu)
+                    shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes=['refresh-button', 'refresh-button-medium'], interactive=not mu)
                     shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
-                    shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input'])
+                    shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'refresh-button-medium', 'focus-on-chat-input'])
+                    shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id="Branch-index", interactive=True)
 
                 shared.gradio['search_chat'] = gr.Textbox(placeholder='Search chats...', max_lines=1, elem_id='search_chat')
 

From 0dbc4cbc71e7eed1d51fd9169048a54cb8d2d927 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 7 Jun 2025 22:20:58 -0700
Subject: [PATCH 40/82] Add Qwen3 presets

---
 user_data/presets/Qwen3 - No Thinking.yaml | 3 +++
 user_data/presets/Qwen3 - Thinking.yaml    | 3 +++
 2 files changed, 6 insertions(+)
 create mode 100644 user_data/presets/Qwen3 - No Thinking.yaml
 create mode 100644 user_data/presets/Qwen3 - Thinking.yaml

diff --git a/user_data/presets/Qwen3 - No Thinking.yaml b/user_data/presets/Qwen3 - No Thinking.yaml
new file mode 100644
index 00000000..b1c1e03c
--- /dev/null
+++ b/user_data/presets/Qwen3 - No Thinking.yaml	
@@ -0,0 +1,3 @@
+temperature: 0.7
+top_p: 0.8
+top_k: 20
diff --git a/user_data/presets/Qwen3 - Thinking.yaml b/user_data/presets/Qwen3 - Thinking.yaml
new file mode 100644
index 00000000..cb2942f9
--- /dev/null
+++ b/user_data/presets/Qwen3 - Thinking.yaml	
@@ -0,0 +1,3 @@
+temperature: 0.6
+top_p: 0.95
+top_k: 20

From 1bdf11b511b69f6eb10f9067847c64d39c8872f4 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 7 Jun 2025 22:23:09 -0700
Subject: [PATCH 41/82] Use the Qwen3 - Thinking preset by default

---
 modules/shared.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 08200399..3794206b 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -204,7 +204,7 @@ settings = {
     'web_search_pages': 3,
     'prompt-default': 'QA',
     'prompt-notebook': 'QA',
-    'preset': 'min_p',
+    'preset': 'Qwen3 - Thinking' if Path('user_data/presets/Qwen3 - Thinking.yaml').exists() else '',
     'max_new_tokens': 512,
     'max_new_tokens_min': 1,
     'max_new_tokens_max': 4096,
@@ -237,7 +237,7 @@ settings = {
     'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
 
     # Generation parameters - Curve shape
-    'temperature': neutral_samplers['temperature'],
+    'temperature': 0.6,
     'dynatemp_low': neutral_samplers['dynatemp_low'],
     'dynatemp_high': neutral_samplers['dynatemp_high'],
     'dynatemp_exponent': neutral_samplers['dynatemp_exponent'],
@@ -246,8 +246,8 @@ settings = {
 
     # Generation parameters - Curve cutoff
     'min_p': neutral_samplers['min_p'],
-    'top_p': neutral_samplers['top_p'],
-    'top_k': neutral_samplers['top_k'],
+    'top_p': 0.95,
+    'top_k': 20,
     'typical_p': neutral_samplers['typical_p'],
     'xtc_threshold': neutral_samplers['xtc_threshold'],
     'xtc_probability': neutral_samplers['xtc_probability'],

From ae150fa24f204fb56c9fe4e2530435bfcf0ec1d7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 7 Jun 2025 22:25:46 -0700
Subject: [PATCH 42/82] Remove the null preset

---
 user_data/presets/Null preset.yaml | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 user_data/presets/Null preset.yaml

diff --git a/user_data/presets/Null preset.yaml b/user_data/presets/Null preset.yaml
deleted file mode 100644
index 714aa9a3..00000000
--- a/user_data/presets/Null preset.yaml	
+++ /dev/null
@@ -1 +0,0 @@
-temperature: 1

From 1cab149c1a020c86f215bafd48b809cbc02a225b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 7 Jun 2025 22:26:13 -0700
Subject: [PATCH 43/82] Remove the contrastive search preset

---
 user_data/presets/Contrastive Search.yaml | 3 ---
 1 file changed, 3 deletions(-)
 delete mode 100644 user_data/presets/Contrastive Search.yaml

diff --git a/user_data/presets/Contrastive Search.yaml b/user_data/presets/Contrastive Search.yaml
deleted file mode 100644
index d9a47a9f..00000000
--- a/user_data/presets/Contrastive Search.yaml	
+++ /dev/null
@@ -1,3 +0,0 @@
-do_sample: false
-top_k: 4
-penalty_alpha: 0.3

From af6bb7513afc6a45cba1716a8cd665d4ee264779 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 09:09:36 -0700
Subject: [PATCH 44/82] Add back the "Save UI defaults" button

It's useful for saving extensions settings.
---
 modules/ui_session.py | 21 ++++++++++++++++++---
 1 file changed, 18 insertions(+), 3 deletions(-)

diff --git a/modules/ui_session.py b/modules/ui_session.py
index 4ed740cd..db9ce3b0 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -12,6 +12,7 @@ def create_ui():
             with gr.Column():
                 with gr.Row():
                     shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡')
+                    shared.gradio['save_settings'] = gr.Button('Save UI defaults to user_data/settings.yaml', interactive=not mu)
 
                 shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
                 with gr.Row():
@@ -32,14 +33,28 @@ def create_ui():
         if not shared.args.portable:
             extension_name.submit(clone_or_pull_repository, extension_name, extension_status, show_progress=False)
 
+        shared.gradio['save_settings'].click(
+            ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+            handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
+
+        shared.gradio['toggle_dark_mode'].click(
+            lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
+            None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode()}}')
+
         # Reset interface event
         shared.gradio['reset_interface'].click(
             set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then(
             None, None, None, js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')
 
-        shared.gradio['toggle_dark_mode'].click(
-            lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
-            None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode()}}')
+
+def handle_save_settings(state, preset, extensions, show_controls, theme):
+    contents = ui.save_settings(state, preset, extensions, show_controls, theme)
+    return [
+        contents,
+        "settings.yaml",
+        "user_data/",
+        gr.update(visible=True)
+    ]
 
 
 def set_interface_arguments(extensions, bool_active):

From 42e7864d6242745263074a08bb4696059650104e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 09:20:21 -0700
Subject: [PATCH 45/82] Reorganize the Session tab

---
 modules/github.py     | 38 --------------------------------------
 modules/ui_session.py | 19 +++++--------------
 modules/utils.py      |  3 +--
 3 files changed, 6 insertions(+), 54 deletions(-)
 delete mode 100644 modules/github.py

diff --git a/modules/github.py b/modules/github.py
deleted file mode 100644
index f3dc26e1..00000000
--- a/modules/github.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import subprocess
-from pathlib import Path
-
-new_extensions = set()
-
-
-def clone_or_pull_repository(github_url):
-    global new_extensions
-
-    repository_folder = Path("extensions")
-    repo_name = github_url.rstrip("/").split("/")[-1].split(".")[0]
-
-    # Check if the repository folder exists
-    if not repository_folder.exists():
-        repository_folder.mkdir(parents=True)
-
-    repo_path = repository_folder / repo_name
-
-    # Check if the repository is already cloned
-    if repo_path.exists():
-        yield f"Updating {github_url}..."
-        # Perform a 'git pull' to update the repository
-        try:
-            pull_output = subprocess.check_output(["git", "-C", repo_path, "pull"], stderr=subprocess.STDOUT)
-            yield "Done."
-            return pull_output.decode()
-        except subprocess.CalledProcessError as e:
-            return str(e)
-
-    # Clone the repository
-    try:
-        yield f"Cloning {github_url}..."
-        clone_output = subprocess.check_output(["git", "clone", github_url, repo_path], stderr=subprocess.STDOUT)
-        new_extensions.add(repo_name)
-        yield f"The extension `{repo_name}` has been downloaded.\n\nPlease close the web UI completely and launch it again to be able to load it."
-        return clone_output.decode()
-    except subprocess.CalledProcessError as e:
-        return str(e)
diff --git a/modules/ui_session.py b/modules/ui_session.py
index db9ce3b0..086a06bf 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -1,7 +1,6 @@
 import gradio as gr
 
 from modules import shared, ui, utils
-from modules.github import clone_or_pull_repository
 from modules.utils import gradio
 
 
@@ -10,10 +9,12 @@ def create_ui():
     with gr.Tab("Session", elem_id="session-tab"):
         with gr.Row():
             with gr.Column():
-                with gr.Row():
-                    shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡')
-                    shared.gradio['save_settings'] = gr.Button('Save UI defaults to user_data/settings.yaml', interactive=not mu)
+                gr.Markdown("## Settings")
+                shared.gradio['save_settings'] = gr.Button('Save settings to user_data/settings.yaml', elem_classes='refresh-button', interactive=not mu)
+                shared.gradio['toggle_dark_mode'] = gr.Button('Toggle light/dark theme 💡', elem_classes='refresh-button')
 
+            with gr.Column():
+                gr.Markdown("## Extensions & flags")
                 shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
                 with gr.Row():
                     with gr.Column():
@@ -22,17 +23,7 @@ def create_ui():
                     with gr.Column():
                         shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=get_boolean_arguments(), value=get_boolean_arguments(active=True), label="Boolean command-line flags", elem_classes='checkboxgroup-table')
 
-            with gr.Column():
-                if not shared.args.portable:
-                    extension_name = gr.Textbox(lines=1, label='Install or update an extension', info='Enter the GitHub URL below and press Enter. For a list of extensions, see: https://github.com/oobabooga/text-generation-webui-extensions ⚠️  WARNING ⚠️ : extensions can execute arbitrary code. Make sure to inspect their source code before activating them.', interactive=not mu)
-                    extension_status = gr.Markdown()
-                else:
-                    pass
-
         shared.gradio['theme_state'] = gr.Textbox(visible=False, value='dark' if shared.settings['dark_theme'] else 'light')
-        if not shared.args.portable:
-            extension_name.submit(clone_or_pull_repository, extension_name, extension_status, show_progress=False)
-
         shared.gradio['save_settings'].click(
             ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
             handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
diff --git a/modules/utils.py b/modules/utils.py
index 577c55b8..21873541 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -3,7 +3,7 @@ import re
 from datetime import datetime
 from pathlib import Path
 
-from modules import github, shared
+from modules import shared
 from modules.logging_colors import logger
 
 
@@ -182,7 +182,6 @@ def get_available_instruction_templates():
 
 def get_available_extensions():
     extensions = sorted(set(map(lambda x: x.parts[1], Path('extensions').glob('*/script.py'))), key=natural_keys)
-    extensions = [v for v in extensions if v not in github.new_extensions]
     return extensions
 
 

From 84f66484c524c41d172abe341b206d31598ef40b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 09:31:38 -0700
Subject: [PATCH 46/82] Make it optional to paste long pasted content to an
 attachment

---
 js/main.js            | 2 +-
 modules/shared.py     | 1 +
 modules/ui.py         | 6 ++++++
 modules/ui_session.py | 1 +
 4 files changed, 9 insertions(+), 1 deletion(-)

diff --git a/js/main.js b/js/main.js
index 9a620fa9..7e2a457c 100644
--- a/js/main.js
+++ b/js/main.js
@@ -884,7 +884,7 @@ function setupPasteHandler() {
   textbox.addEventListener("paste", async (event) => {
     const text = event.clipboardData?.getData("text");
 
-    if (text && text.length > MAX_PLAIN_TEXT_LENGTH) {
+    if (text && text.length > MAX_PLAIN_TEXT_LENGTH && document.querySelector("#paste_to_attachment input[data-testid=\"checkbox\"]")?.checked) {
       event.preventDefault();
 
       const file = new File([text], "pasted_text.txt", {
diff --git a/modules/shared.py b/modules/shared.py
index 3794206b..59c7dbcd 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -223,6 +223,7 @@ settings = {
     'custom_token_bans': '',
     'negative_prompt': '',
     'dark_theme': True,
+    'paste_to_attachment': False,
     'default_extensions': [],
 
     # Character settings
diff --git a/modules/ui.py b/modules/ui.py
index 8ec4b165..46403dd9 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -268,6 +268,11 @@ def list_interface_input_elements():
     # Model elements
     elements += list_model_elements()
 
+    # Other elements
+    elements += [
+        'paste_to_attachment'
+    ]
+
     return elements
 
 
@@ -473,6 +478,7 @@ def setup_auto_save():
         # Session tab (ui_session.py)
         'show_controls',
         'theme_state',
+        'paste_to_attachment'
     ]
 
     for element_name in change_elements:
diff --git a/modules/ui_session.py b/modules/ui_session.py
index 086a06bf..2ece2251 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -12,6 +12,7 @@ def create_ui():
                 gr.Markdown("## Settings")
                 shared.gradio['save_settings'] = gr.Button('Save settings to user_data/settings.yaml', elem_classes='refresh-button', interactive=not mu)
                 shared.gradio['toggle_dark_mode'] = gr.Button('Toggle light/dark theme 💡', elem_classes='refresh-button')
+                shared.gradio['paste_to_attachment'] = gr.Checkbox(label='Turn long pasted text into attachments in the Chat tab', value=shared.settings['paste_to_attachment'], elem_id='paste_to_attachment')
 
             with gr.Column():
                 gr.Markdown("## Extensions & flags")

From 1f1435997a65b92c498f6d5512683a6ee2f21b26 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 09:37:54 -0700
Subject: [PATCH 47/82] Don't show the new 'Restore character' button in the
 Chat tab

---
 js/main.js         | 2 ++
 modules/chat.py    | 2 +-
 modules/ui_chat.py | 6 +++---
 3 files changed, 6 insertions(+), 4 deletions(-)

diff --git a/js/main.js b/js/main.js
index 7e2a457c..70afabe3 100644
--- a/js/main.js
+++ b/js/main.js
@@ -555,6 +555,7 @@ function moveToChatTab() {
 
   newParent.insertBefore(grandParent, newParent.children[newPosition]);
   document.getElementById("save-character").style.display = "none";
+  document.getElementById("restore-character").style.display = "none";
 }
 
 function restoreOriginalPosition() {
@@ -566,6 +567,7 @@ function restoreOriginalPosition() {
     }
 
     document.getElementById("save-character").style.display = "";
+    document.getElementById("restore-character").style.display = "";
     movedElement.style.display = "";
     movedElement.children[0].style.minWidth = "";
   }
diff --git a/modules/chat.py b/modules/chat.py
index f740db55..49511af1 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -1220,7 +1220,7 @@ def load_character(character, name1, name2):
     return name1, name2, picture, greeting, context
 
 
-def reset_character_for_ui(state):
+def restore_character_for_ui(state):
     """Reset character fields to the currently loaded character's saved values"""
     if state['character_menu'] and state['character_menu'] != 'None':
         try:
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index ef9330e4..d7a5ec69 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -122,7 +122,7 @@ def create_chat_settings_ui():
                         ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
                         shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
                         shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
-                        shared.gradio['reset_character'] = gr.Button('Restore character', elem_classes='refresh-button', interactive=True)
+                        shared.gradio['restore_character'] = gr.Button('Restore character', elem_classes='refresh-button', interactive=True, elem_id='restore-character')
 
                     shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name')
                     shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar'])
@@ -320,9 +320,9 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.handle_save_template_click, gradio('instruction_template_str'), gradio('save_filename', 'save_root', 'save_contents', 'file_saver'), show_progress=False)
 
-    shared.gradio['reset_character'].click(
+    shared.gradio['restore_character'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        chat.reset_character_for_ui, gradio('interface_state'), gradio('interface_state', 'name2', 'context', 'greeting', 'character_picture'), show_progress=False)
+        chat.restore_character_for_ui, gradio('interface_state'), gradio('interface_state', 'name2', 'context', 'greeting', 'character_picture'), show_progress=False)
 
     shared.gradio['delete_template'].click(chat.handle_delete_template_click, gradio('instruction_template'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
     shared.gradio['save_chat_history'].click(

From 78899244d5e1e6504a5be88252b17e44a48b6ca7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 09:40:09 -0700
Subject: [PATCH 48/82] Remove settings-template.yaml

---
 user_data/settings-template.yaml | 76 --------------------------------
 1 file changed, 76 deletions(-)
 delete mode 100644 user_data/settings-template.yaml

diff --git a/user_data/settings-template.yaml b/user_data/settings-template.yaml
deleted file mode 100644
index db481e84..00000000
--- a/user_data/settings-template.yaml
+++ /dev/null
@@ -1,76 +0,0 @@
-show_controls: true
-start_with: ''
-mode: instruct
-chat_style: cai-chat
-chat-instruct_command: |-
-  Continue the chat dialogue below. Write a single reply for the character "<|character|>".
-
-  <|prompt|>
-prompt-default: QA
-prompt-notebook: QA
-character: Assistant
-name1: You
-user_bio: ''
-custom_system_message: ''
-preset: min_p
-max_new_tokens: 512
-max_new_tokens_min: 1
-max_new_tokens_max: 4096
-prompt_lookup_num_tokens: 0
-max_tokens_second: 0
-auto_max_new_tokens: true
-ban_eos_token: false
-add_bos_token: true
-enable_thinking: true
-skip_special_tokens: true
-stream: true
-static_cache: false
-truncation_length: 8192
-seed: -1
-custom_stopping_strings: ''
-custom_token_bans: ''
-negative_prompt: ''
-dark_theme: true
-default_extensions: []
-instruction_template_str: |-
-  {%- set ns = namespace(found=false) -%}
-  {%- for message in messages -%}
-      {%- if message['role'] == 'system' -%}
-          {%- set ns.found = true -%}
-      {%- endif -%}
-  {%- endfor -%}
-  {%- if not ns.found -%}
-      {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\n\n' -}}
-  {%- endif %}
-  {%- for message in messages %}
-      {%- if message['role'] == 'system' -%}
-          {{- '' + message['content'] + '\n\n' -}}
-      {%- else -%}
-          {%- if message['role'] == 'user' -%}
-              {{-'### Instruction:\n' + message['content'] + '\n\n'-}}
-          {%- else -%}
-              {{-'### Response:\n' + message['content'] + '\n\n' -}}
-          {%- endif -%}
-      {%- endif -%}
-  {%- endfor -%}
-  {%- if add_generation_prompt -%}
-      {{-'### Response:\n'-}}
-  {%- endif -%}
-chat_template_str: |-
-  {%- for message in messages %}
-      {%- if message['role'] == 'system' -%}
-          {%- if message['content'] -%}
-              {{- message['content'] + '\n\n' -}}
-          {%- endif -%}
-          {%- if user_bio -%}
-              {{- user_bio + '\n\n' -}}
-          {%- endif -%}
-      {%- else -%}
-          {%- if message['role'] == 'user' -%}
-              {{- name1 + ': ' + message['content'] + '\n'-}}
-          {%- else -%}
-              {{- name2 + ': ' + message['content'] + '\n' -}}
-          {%- endif -%}
-      {%- endif -%}
-  {%- endfor -%}
-

From eb0ab9db1d285fb3be949c4180e63d7df30e61a0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 15:04:05 -0700
Subject: [PATCH 49/82] Fix light/dark theme persistence across page reloads

---
 modules/ui_session.py |  2 +-
 server.py             | 25 ++++++++++++++++++++-----
 2 files changed, 21 insertions(+), 6 deletions(-)

diff --git a/modules/ui_session.py b/modules/ui_session.py
index 2ece2251..33d7dcb7 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -31,7 +31,7 @@ def create_ui():
 
         shared.gradio['toggle_dark_mode'].click(
             lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
-            None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode()}}')
+            None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode(); localStorage.setItem("theme", document.body.classList.contains("dark") ? "dark" : "light")}}')
 
         # Reset interface event
         shared.gradio['reset_interface'].click(
diff --git a/server.py b/server.py
index 3dd0a3f3..bd440fe0 100644
--- a/server.py
+++ b/server.py
@@ -163,11 +163,26 @@ def create_interface():
             gradio('show_controls'),
             None,
             js=f"""(x) => {{
-                if ({str(shared.settings['dark_theme']).lower()}) {{
-                    document.getElementsByTagName('body')[0].classList.add('dark');
-                }}
-                else {{
-                    document.getElementsByTagName('body')[0].classList.remove('dark');
+                // Check if this is first visit or if localStorage is out of sync
+                const savedTheme = localStorage.getItem('theme');
+                const serverTheme = {str(shared.settings['dark_theme']).lower()} ? 'dark' : 'light';
+
+                // If no saved theme or mismatch with server on first load, use server setting
+                if (!savedTheme || !sessionStorage.getItem('theme_synced')) {{
+                    localStorage.setItem('theme', serverTheme);
+                    sessionStorage.setItem('theme_synced', 'true');
+                    if (serverTheme === 'dark') {{
+                        document.getElementsByTagName('body')[0].classList.add('dark');
+                    }} else {{
+                        document.getElementsByTagName('body')[0].classList.remove('dark');
+                    }}
+                }} else {{
+                    // Use localStorage for subsequent reloads
+                    if (savedTheme === 'dark') {{
+                        document.getElementsByTagName('body')[0].classList.add('dark');
+                    }} else {{
+                        document.getElementsByTagName('body')[0].classList.remove('dark');
+                    }}
                 }}
                 {js}
                 {ui.show_controls_js}

From f81b1540caf324923eeb1bec75c9ce8774865e38 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 15:19:25 -0700
Subject: [PATCH 50/82] Small style improvements

---
 css/main.css  | 2 +-
 modules/ui.py | 4 +++-
 2 files changed, 4 insertions(+), 2 deletions(-)

diff --git a/css/main.css b/css/main.css
index 7d7b0cbd..e2ce4801 100644
--- a/css/main.css
+++ b/css/main.css
@@ -269,7 +269,7 @@ button {
 
 .dark .pretty_scrollbar::-webkit-scrollbar-thumb,
 .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
-    background: rgb(255 255 255 / 10%);
+    background: rgb(255 255 255 / 6.25%);
     border-radius: 10px;
 }
 
diff --git a/modules/ui.py b/modules/ui.py
index 46403dd9..458b18ed 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -74,8 +74,10 @@ if not shared.args.old_colors:
         body_background_fill="white",
         block_background_fill="transparent",
         body_text_color='rgb(64, 64, 64)',
-        button_secondary_background_fill="#f4f4f4",
+        button_secondary_background_fill="white",
         button_secondary_border_color="var(--border-color-primary)",
+        input_shadow="none",
+        button_shadow_hover="none",
 
         # Dark Mode Colors
         input_background_fill_dark='var(--darker-gray)',

From ff01bcb870e6a5bca6b11f32d522911b5f2ccf40 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 15:33:05 -0700
Subject: [PATCH 51/82] Use user_data/cache/gradio for Gradio temp files

---
 server.py | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/server.py b/server.py
index bd440fe0..e178e8fe 100644
--- a/server.py
+++ b/server.py
@@ -1,12 +1,26 @@
 import os
+import shutil
 import warnings
+from pathlib import Path
 
 from modules import shared
 from modules.block_requests import OpenMonkeyPatch, RequestBlocker
 from modules.logging_colors import logger
 
-os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
-os.environ['BITSANDBYTES_NOWELCOME'] = '1'
+# Set up Gradio temp directory path
+gradio_temp_path = Path('user_data') / 'cache' / 'gradio'
+
+# Set environment variables
+os.environ.update({
+    'GRADIO_ANALYTICS_ENABLED': 'False',
+    'BITSANDBYTES_NOWELCOME': '1',
+    'GRADIO_TEMP_DIR': str(gradio_temp_path)
+})
+
+# Clear and recreate gradio temp directory
+shutil.rmtree(gradio_temp_path, ignore_errors=True)
+gradio_temp_path.mkdir(parents=True, exist_ok=True)
+
 warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
 warnings.filterwarnings('ignore', category=UserWarning, message='Using the update method is deprecated')
 warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_name" has conflict')
@@ -27,7 +41,6 @@ import signal
 import sys
 import time
 from functools import partial
-from pathlib import Path
 from threading import Lock, Thread
 
 import yaml

From 7ed1926ce7a37d1ccd6b6c8be399581a258759a5 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 15:38:40 -0700
Subject: [PATCH 52/82] Small change after previous commit

---
 server.py | 6 ++----
 1 file changed, 2 insertions(+), 4 deletions(-)

diff --git a/server.py b/server.py
index e178e8fe..5cad1d8a 100644
--- a/server.py
+++ b/server.py
@@ -9,6 +9,8 @@ from modules.logging_colors import logger
 
 # Set up Gradio temp directory path
 gradio_temp_path = Path('user_data') / 'cache' / 'gradio'
+shutil.rmtree(gradio_temp_path, ignore_errors=True)
+gradio_temp_path.mkdir(parents=True, exist_ok=True)
 
 # Set environment variables
 os.environ.update({
@@ -17,10 +19,6 @@ os.environ.update({
     'GRADIO_TEMP_DIR': str(gradio_temp_path)
 })
 
-# Clear and recreate gradio temp directory
-shutil.rmtree(gradio_temp_path, ignore_errors=True)
-gradio_temp_path.mkdir(parents=True, exist_ok=True)
-
 warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
 warnings.filterwarnings('ignore', category=UserWarning, message='Using the update method is deprecated')
 warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_name" has conflict')

From e976a5ddc7598de49227f600515a7eee72fc1af6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 17:34:56 -0700
Subject: [PATCH 53/82] Re-highlight code blocks when switching light/dark
 themes

---
 js/dark_theme.js | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/js/dark_theme.js b/js/dark_theme.js
index b540fb11..f61060cd 100644
--- a/js/dark_theme.js
+++ b/js/dark_theme.js
@@ -6,4 +6,11 @@ function toggleDarkMode() {
   } else {
     currentCSS.setAttribute("href", "file/css/highlightjs/github-dark.min.css");
   }
+
+  // Re-highlight all code blocks once stylesheet loads
+  currentCSS.onload = function() {
+    document.querySelectorAll("pre code").forEach(block => {
+      hljs.highlightElement(block);
+    });
+  };
 }

From b5e021fc49c840d1ac934f647b64317918ee1208 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 17:44:04 -0700
Subject: [PATCH 54/82] Make the dark theme darker

---
 css/main.css | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/css/main.css b/css/main.css
index e2ce4801..d04ff624 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1,11 +1,11 @@
 :root {
-    --darker-gray: #202123;
-    --dark-gray: #2A2B32;
-    --light-gray: #373943;
+    --darker-gray: #1C1C1D;
+    --dark-gray: #212125;
+    --light-gray: #2C2E34;
     --light-theme-gray: #f9fbff;
     --border-color-dark: #525252;
     --header-width: 112px;
-    --selected-item-color-dark: #2E2F38;
+    --selected-item-color-dark: #282930;
 }
 
 @font-face {

From 06dfb7e772a943c1bfdeff6d125d8cf056dc3b73 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 18:03:07 -0700
Subject: [PATCH 55/82] Improve the style of the hover menu

---
 css/main.css | 19 ++++++++++---------
 1 file changed, 10 insertions(+), 9 deletions(-)

diff --git a/css/main.css b/css/main.css
index d04ff624..8e57db89 100644
--- a/css/main.css
+++ b/css/main.css
@@ -745,16 +745,13 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     justify-content: space-between;
     margin: 0 !important;
     height: 36px;
+    border-color: transparent !important;
 }
 
 .hover-menu button:not(#clear-history-confirm) {
     border-bottom: 0 !important;
 }
 
-.hover-menu button:not(#clear-history-confirm):last-child {
-    border-bottom: var(--button-border-width) solid var(--border-color-primary) !important;
-}
-
 .hover-menu button:hover {
     background: #dbeafe !important;
 }
@@ -764,26 +761,30 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
 }
 
 #show-controls {
+    background-color: white;
+    border-color: transparent !important;
     height: 36px;
-    border-top: 1px solid var(--border-color-dark) !important;
-    border-left: 1px solid var(--border-color-dark) !important;
-    border-right: 1px solid var(--border-color-dark) !important;
     border-radius: 0;
     border-bottom: 0 !important;
-    background-color: var(--darker-gray);
     padding-top: 3px;
     padding-left: 4px;
     display: flex;
+    font-weight: normal;
+}
+
+.dark #show-controls {
+    background-color: var(--darker-gray);
 }
 
 #show-controls label {
     display: flex;
     flex-direction: row-reverse;
-    font-weight: bold;
     justify-content: start;
     width: 100%;
     padding-right: 12px;
     gap: 10px;
+    font-weight: 600;
+    color: var(--button-secondary-text-color);
 }
 
 #show-controls label input {

From 0b8d2d65a262271e74697410c6aff577cad2594a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 18:11:27 -0700
Subject: [PATCH 56/82] Minor style improvement

---
 modules/ui.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/ui.py b/modules/ui.py
index 458b18ed..ea36c639 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -109,6 +109,7 @@ if not shared.args.old_colors:
         button_large_radius='0.375rem',
         button_large_padding='6px 12px',
         input_radius='0.375rem',
+        block_radius='0',
     )
 
 if Path("user_data/notification.mp3").exists():

From 4a369e070aab9e540187bf456a83df5478565a0e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 18:47:48 -0700
Subject: [PATCH 57/82] Add buttons for easily deleting past chats

---
 css/main.css       | 53 +++++++++++++++++++++++++++++++++++
 js/main.js         | 70 ++++++++++++++++++++++++++++++++++++++++++++++
 modules/chat.py    |  1 -
 modules/ui_chat.py | 10 +++----
 4 files changed, 127 insertions(+), 7 deletions(-)

diff --git a/css/main.css b/css/main.css
index 8e57db89..c5460eb7 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1570,3 +1570,56 @@ button:focus {
 .svelte-sa48pu.stretch:has(> .hidden:only-child) {
     display: none;
 }
+
+.delete-container {
+    position: absolute;
+    right: 8px;
+    display: flex;
+    gap: 6px;
+    opacity: 0;
+    transition: opacity 0.2s;
+    margin-left: 0;
+}
+
+.chat-label-with-delete {
+    position: relative;
+    padding-right: 60px;
+}
+
+.trash-btn {
+    border: none;
+    background: none;
+    cursor: pointer;
+    padding: 2px;
+    opacity: 0.7;
+}
+
+.cancel-btn {
+    border: none;
+    background: #ef4444;
+    color: white;
+    cursor: pointer;
+    width: 20px;
+    height: 20px;
+    border-radius: 2px;
+    font-family: monospace;
+    font-size: 12px;
+    align-items: center;
+    justify-content: center;
+    display: none;
+}
+
+.confirm-btn {
+    border: none;
+    background: #22c55e;
+    color: white;
+    cursor: pointer;
+    width: 20px;
+    height: 20px;
+    border-radius: 2px;
+    font-family: monospace;
+    font-size: 12px;
+    align-items: center;
+    justify-content: center;
+    display: none;
+}
diff --git a/js/main.js b/js/main.js
index 70afabe3..e9ca5a0b 100644
--- a/js/main.js
+++ b/js/main.js
@@ -917,3 +917,73 @@ document.querySelector("#chat-input .upload-button").title = "Upload text files,
 
 // Activate web search
 document.getElementById("web-search").title = "Search the internet with DuckDuckGo";
+
+//------------------------------------------------
+// Inline icons for deleting past chats
+//------------------------------------------------
+
+function addMiniDeletes() {
+  document.querySelectorAll("#past-chats label:not(.has-delete)").forEach(label => {
+    const container = document.createElement("span");
+    container.className = "delete-container";
+
+    label.classList.add("chat-label-with-delete");
+
+    const trashBtn = document.createElement("button");
+    trashBtn.innerHTML = "🗑️";
+    trashBtn.className = "trash-btn";
+
+    const cancelBtn = document.createElement("button");
+    cancelBtn.innerHTML = "✕";
+    cancelBtn.className = "cancel-btn";
+
+    const confirmBtn = document.createElement("button");
+    confirmBtn.innerHTML = "✓";
+    confirmBtn.className = "confirm-btn";
+
+    label.addEventListener("mouseenter", () => {
+      container.style.opacity = "1";
+    });
+
+    label.addEventListener("mouseleave", () => {
+      container.style.opacity = "0";
+    });
+
+    trashBtn.onclick = (e) => {
+      e.stopPropagation();
+      label.querySelector("input").click();
+      document.querySelector("#delete_chat").click();
+      trashBtn.style.display = "none";
+      cancelBtn.style.display = "flex";
+      confirmBtn.style.display = "flex";
+    };
+
+    cancelBtn.onclick = (e) => {
+      e.stopPropagation();
+      document.querySelector("#delete_chat-cancel").click();
+      resetButtons();
+    };
+
+    confirmBtn.onclick = (e) => {
+      e.stopPropagation();
+      document.querySelector("#delete_chat-confirm").click();
+      resetButtons();
+    };
+
+    function resetButtons() {
+      trashBtn.style.display = "inline";
+      cancelBtn.style.display = "none";
+      confirmBtn.style.display = "none";
+    }
+
+    container.append(trashBtn, cancelBtn, confirmBtn);
+    label.appendChild(container);
+    label.classList.add("has-delete");
+  });
+}
+
+new MutationObserver(() => addMiniDeletes()).observe(
+  document.querySelector("#past-chats"),
+  {childList: true, subtree: true}
+);
+addMiniDeletes();
diff --git a/modules/chat.py b/modules/chat.py
index 49511af1..ad5045e0 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -1562,7 +1562,6 @@ def handle_delete_chat_confirm_click(state):
         unique_id,
         gr.update(visible=False),
         gr.update(visible=True),
-        gr.update(visible=False)
     ]
 
 
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index d7a5ec69..428b64c9 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -26,15 +26,15 @@ def create_ui():
                 with gr.Row(elem_id='past-chats-buttons'):
                     shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes=['refresh-button', 'refresh-button-medium'], elem_id='Branch', interactive=not mu)
                     shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes=['refresh-button', 'refresh-button-medium'], interactive=not mu)
-                    shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
+                    shared.gradio['delete_chat'] = gr.Button('🗑️', visible=False, elem_classes='refresh-button', interactive=not mu, elem_id='delete_chat')
                     shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'refresh-button-medium', 'focus-on-chat-input'])
                     shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id="Branch-index", interactive=True)
 
                 shared.gradio['search_chat'] = gr.Textbox(placeholder='Search chats...', max_lines=1, elem_id='search_chat')
 
                 with gr.Row(elem_id='delete-chat-row', visible=False) as shared.gradio['delete-chat-row']:
-                    shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'])
-                    shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'])
+                    shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-cancel')
+                    shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-confirm')
 
                 with gr.Row(elem_id='rename-row', visible=False) as shared.gradio['rename-row']:
                     shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', elem_classes=['no-background'])
@@ -261,11 +261,9 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
 
-    shared.gradio['delete_chat'].click(lambda: gr.update(visible=True), None, gradio('delete-chat-row'))
-    shared.gradio['delete_chat-cancel'].click(lambda: gr.update(visible=False), None, gradio('delete-chat-row'))
     shared.gradio['delete_chat-confirm'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id', 'delete-chat-row'), show_progress=False)
+        chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
 
     shared.gradio['branch_chat'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(

From f3388c2ab4cdf60cbd035d9a021b8e7de1e4665d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 18:53:04 -0700
Subject: [PATCH 58/82] Fix selecting next chat when deleting with active
 search

---
 modules/chat.py | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/modules/chat.py b/modules/chat.py
index ad5045e0..d62bbc42 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -1549,7 +1549,10 @@ def handle_start_new_chat_click(state):
 
 
 def handle_delete_chat_confirm_click(state):
-    index = str(find_all_histories(state).index(state['unique_id']))
+    filtered_histories = find_all_histories_with_first_prompts(state)
+    filtered_ids = [h[1] for h in filtered_histories]
+    index = str(filtered_ids.index(state['unique_id']))
+
     delete_history(state['unique_id'], state['character_menu'], state['mode'])
     history, unique_id = load_history_after_deletion(state, index)
     html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])

From f9a007c6a85899d31a9b38baf40fcad5daa0673d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 19:25:23 -0700
Subject: [PATCH 59/82] Properly filter out failed web search downloads from
 attachments

---
 modules/web_search.py | 10 ++++------
 1 file changed, 4 insertions(+), 6 deletions(-)

diff --git a/modules/web_search.py b/modules/web_search.py
index a1e47253..2b6c6c40 100644
--- a/modules/web_search.py
+++ b/modules/web_search.py
@@ -38,7 +38,7 @@ def download_web_page(url, timeout=5):
         return text
     except Exception as e:
         logger.error(f"Error downloading {url}: {e}")
-        return f"[Error downloading content from {url}: {str(e)}]"
+        return ""
 
 
 def perform_web_search(query, num_pages=3, max_workers=5):
@@ -74,9 +74,7 @@ def perform_web_search(query, num_pages=3, max_workers=5):
                         'url': url,
                         'content': content
                     }
-                except Exception as e:
-                    logger.error(f"Error downloading {url}: {e}")
-                    # Include failed downloads with empty content
+                except Exception:
                     search_results[index] = {
                         'title': title,
                         'url': url,
@@ -108,7 +106,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
             return
 
         # Filter out failed downloads before adding attachments
-        successful_results = [result for result in search_results if result['content'] and result['content'].strip()]
+        successful_results = [result for result in search_results if result['content'].strip()]
 
         if not successful_results:
             logger.warning("No successful downloads to add as attachments")
@@ -130,7 +128,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
             }
             history['metadata'][key]["attachments"].append(attachment)
 
-        logger.info(f"Added {len(successful_results)} successful web search results as attachments")
+        logger.info(f"Added {len(successful_results)} successful web search results as attachments.")
 
     except Exception as e:
         logger.error(f"Error in web search: {e}")

From 80637cae288dfd218b1bf664e8caedbf473ef298 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 21:55:49 -0700
Subject: [PATCH 60/82] Add version to portable build folder names

---
 .github/workflows/build-portable-release-cuda.yml   | 9 ++++++---
 .github/workflows/build-portable-release-vulkan.yml | 9 ++++++---
 .github/workflows/build-portable-release.yml        | 9 ++++++---
 3 files changed, 18 insertions(+), 9 deletions(-)

diff --git a/.github/workflows/build-portable-release-cuda.yml b/.github/workflows/build-portable-release-cuda.yml
index 571cbac0..283fdd72 100644
--- a/.github/workflows/build-portable-release-cuda.yml
+++ b/.github/workflows/build-portable-release-cuda.yml
@@ -160,16 +160,19 @@ jobs:
                 rm requirements_cuda_temp.txt
             fi
 
-            # 6. Create ZIP file
+            # 6. Move up and rename folder to include version
             cd ..
             VERSION_CLEAN="${VERSION#v}"
+            mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
+
+            # 7. Create ZIP file
             ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
             echo "Creating archive: $ZIP_NAME"
 
             if [[ "$RUNNER_OS" == "Windows" ]]; then
-                powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
+                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
             else
-                zip -r "$ZIP_NAME" text-generation-webui
+                zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
             fi
 
       - name: Upload files to a GitHub release
diff --git a/.github/workflows/build-portable-release-vulkan.yml b/.github/workflows/build-portable-release-vulkan.yml
index 4e88d4d9..c6ab8fa7 100644
--- a/.github/workflows/build-portable-release-vulkan.yml
+++ b/.github/workflows/build-portable-release-vulkan.yml
@@ -146,16 +146,19 @@ jobs:
             echo "Installing Python packages from $REQ_FILE..."
             $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
 
-            # 6. Create ZIP file
+            # 5. Move up and rename folder to include version
             cd ..
             VERSION_CLEAN="${VERSION#v}"
+            mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
+
+            # 6. Create ZIP file
             ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.zip"
             echo "Creating archive: $ZIP_NAME"
 
             if [[ "$RUNNER_OS" == "Windows" ]]; then
-                powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
+                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
             else
-                zip -r "$ZIP_NAME" text-generation-webui
+                zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
             fi
 
       - name: Upload files to a GitHub release
diff --git a/.github/workflows/build-portable-release.yml b/.github/workflows/build-portable-release.yml
index 6910ce2c..58bfdb25 100644
--- a/.github/workflows/build-portable-release.yml
+++ b/.github/workflows/build-portable-release.yml
@@ -170,16 +170,19 @@ jobs:
             echo "Installing Python packages from $REQ_FILE..."
             $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
 
-            # 5. Create ZIP file
+            # 5. Move up and rename folder to include version
             cd ..
             VERSION_CLEAN="${VERSION#v}"
+            mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
+
+            # 6. Create ZIP file
             ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}.zip"
             echo "Creating archive: $ZIP_NAME"
 
             if [[ "$RUNNER_OS" == "Windows" ]]; then
-                powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
+                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
             else
-                zip -r "$ZIP_NAME" text-generation-webui
+                zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
             fi
 
       - name: Upload files to a GitHub release

From eefbf96f6a30ad63da6a494b78fbb3e496462d38 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 8 Jun 2025 22:14:56 -0700
Subject: [PATCH 61/82] Don't save truncation_length to user_data/settings.yaml

---
 modules/ui.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/ui.py b/modules/ui.py
index ea36c639..38693da8 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -332,6 +332,7 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
     output['show_controls'] = show_controls
     output['dark_theme'] = True if theme_state == 'dark' else False
     output.pop('instruction_template_str')
+    output.pop('truncation_length')
 
     # Save extension values in the UI
     for extension_name in extensions_list:

From 1602ac1c8ff640399a8a1eadd0305958a89dfccf Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 9 Jun 2025 09:03:39 -0700
Subject: [PATCH 62/82] Improve the style of thinking blocks in dark mode

---
 css/main.css | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/css/main.css b/css/main.css
index c5460eb7..307d0437 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1337,7 +1337,8 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
 }
 
 .dark .thinking-block {
-    background-color: var(--darker-gray);
+    background-color: transparent;
+    border: 1px solid var(--input-border-color);
 }
 
 .thinking-header {

From 14efd420845226706aefb92c74fae33d0a49816c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 9 Jun 2025 11:25:36 -0700
Subject: [PATCH 63/82] Improve scroll performance by disabling hover effects
 during scroll

---
 css/main.css |  5 +++++
 js/main.js   | 11 ++++++++++-
 2 files changed, 15 insertions(+), 1 deletion(-)

diff --git a/css/main.css b/css/main.css
index 307d0437..62dfc05f 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1624,3 +1624,8 @@ button:focus {
     justify-content: center;
     display: none;
 }
+
+/* Disable hover effects while scrolling */
+.chat-parent.scrolling * {
+    pointer-events: none !important;
+}
diff --git a/js/main.js b/js/main.js
index e9ca5a0b..1953a6be 100644
--- a/js/main.js
+++ b/js/main.js
@@ -146,8 +146,12 @@ const targetElement = document.getElementById("chat").parentNode.parentNode.pare
 targetElement.classList.add("pretty_scrollbar");
 targetElement.classList.add("chat-parent");
 let isScrolled = false;
+let scrollTimeout;
 
 targetElement.addEventListener("scroll", function() {
+  // Add scrolling class to disable hover effects
+  targetElement.classList.add("scrolling");
+
   let diff = targetElement.scrollHeight - targetElement.clientHeight;
   if(Math.abs(targetElement.scrollTop - diff) <= 10 || diff == 0) {
     isScrolled = false;
@@ -155,7 +159,12 @@ targetElement.addEventListener("scroll", function() {
     isScrolled = true;
   }
 
-  doSyntaxHighlighting();
+  // Clear previous timeout and set new one
+  clearTimeout(scrollTimeout);
+  scrollTimeout = setTimeout(() => {
+    targetElement.classList.remove("scrolling");
+    doSyntaxHighlighting(); // Only run after scrolling stops
+  }, 150);
 
 });
 

From 747a4a0e5647aac2aea40bdb1e8ae0d2705bc027 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 9 Jun 2025 12:32:10 -0700
Subject: [PATCH 64/82] Reposition the ... typing dots

---
 css/main.css | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/css/main.css b/css/main.css
index 62dfc05f..a22fdd95 100644
--- a/css/main.css
+++ b/css/main.css
@@ -668,7 +668,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     display: none;
     position: absolute;
     background-color: transparent;
-    left: -2px;
+    left: 23px;
     top: -5px;
     padding: var(--block-padding);
 }

From f5a5d0c0cbcca8e18a3a30f678d5d6ae2396c0d9 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 9 Jun 2025 17:32:25 -0700
Subject: [PATCH 65/82] Add the URL of web attachments to the prompt

---
 modules/chat.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index d62bbc42..25a0607b 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -223,7 +223,10 @@ def generate_chat_prompt(user_input, state, **kwargs):
                 for attachment in metadata[user_key]["attachments"]:
                     filename = attachment.get("name", "file")
                     content = attachment.get("content", "")
-                    attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    if attachment.get("type") == "text/html" and attachment.get("url"):
+                        attachments_text += f"\nName: {filename}\nURL: {attachment['url']}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    else:
+                        attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
 
                 if attachments_text:
                     enhanced_user_msg = f"{user_msg}\n\nATTACHMENTS:\n{attachments_text}"
@@ -250,7 +253,10 @@ def generate_chat_prompt(user_input, state, **kwargs):
                 for attachment in metadata[user_key]["attachments"]:
                     filename = attachment.get("name", "file")
                     content = attachment.get("content", "")
-                    attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    if attachment.get("type") == "text/html" and attachment.get("url"):
+                        attachments_text += f"\nName: {filename}\nURL: {attachment['url']}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    else:
+                        attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
 
                 if attachments_text:
                     user_input = f"{user_input}\n\nATTACHMENTS:\n{attachments_text}"

From 263b5d5557efd7632f0d02adb6f7f44020f19b41 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 9 Jun 2025 17:55:26 -0700
Subject: [PATCH 66/82] Use html2text to extract the text of web searches
 without losing formatting

---
 modules/web_search.py                         | 36 ++++++++++---------
 requirements/full/requirements.txt            |  2 +-
 requirements/full/requirements_amd.txt        |  2 +-
 requirements/full/requirements_amd_noavx2.txt |  2 +-
 .../full/requirements_apple_intel.txt         |  2 +-
 .../full/requirements_apple_silicon.txt       |  2 +-
 requirements/full/requirements_cpu_only.txt   |  2 +-
 .../full/requirements_cpu_only_noavx2.txt     |  2 +-
 requirements/full/requirements_cuda128.txt    |  2 +-
 .../full/requirements_cuda128_noavx2.txt      |  2 +-
 requirements/full/requirements_noavx2.txt     |  2 +-
 requirements/full/requirements_nowheels.txt   |  2 +-
 requirements/portable/requirements.txt        |  2 +-
 .../portable/requirements_apple_intel.txt     |  2 +-
 .../portable/requirements_apple_silicon.txt   |  2 +-
 .../portable/requirements_cpu_only.txt        |  2 +-
 .../portable/requirements_cpu_only_noavx2.txt |  2 +-
 requirements/portable/requirements_noavx2.txt |  2 +-
 .../portable/requirements_nowheels.txt        |  2 +-
 requirements/portable/requirements_vulkan.txt |  2 +-
 .../portable/requirements_vulkan_noavx2.txt   |  2 +-
 21 files changed, 39 insertions(+), 37 deletions(-)

diff --git a/modules/web_search.py b/modules/web_search.py
index 2b6c6c40..ffd7e483 100644
--- a/modules/web_search.py
+++ b/modules/web_search.py
@@ -3,8 +3,6 @@ from concurrent.futures import as_completed
 from datetime import datetime
 
 import requests
-from bs4 import BeautifulSoup
-from duckduckgo_search import DDGS
 
 from modules.logging_colors import logger
 
@@ -14,35 +12,39 @@ def get_current_timestamp():
     return datetime.now().strftime('%b %d, %Y %H:%M')
 
 
-def download_web_page(url, timeout=5):
-    """Download and extract text from a web page"""
+def download_web_page(url, timeout=10):
+    """
+    Download a web page and convert its HTML content to structured Markdown text.
+    """
+    import html2text
+
     try:
         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
         response = requests.get(url, headers=headers, timeout=timeout)
-        response.raise_for_status()
+        response.raise_for_status()  # Raise an exception for bad status codes
 
-        soup = BeautifulSoup(response.content, 'html.parser')
+        # Initialize the HTML to Markdown converter
+        h = html2text.HTML2Text()
+        h.body_width = 0
 
-        # Remove script and style elements
-        for script in soup(["script", "style"]):
-            script.decompose()
+        # Convert the HTML to Markdown
+        markdown_text = h.handle(response.text)
 
-        # Get text and clean it up
-        text = soup.get_text()
-        lines = (line.strip() for line in text.splitlines())
-        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-        text = ' '.join(chunk for chunk in chunks if chunk)
-
-        return text
-    except Exception as e:
+        return markdown_text
+    except requests.exceptions.RequestException as e:
         logger.error(f"Error downloading {url}: {e}")
         return ""
+    except Exception as e:
+        logger.error(f"An unexpected error occurred: {e}")
+        return ""
 
 
 def perform_web_search(query, num_pages=3, max_workers=5):
     """Perform web search and return results with content"""
+    from duckduckgo_search import DDGS
+
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=num_pages))
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 277f8249..b751482a 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -1,5 +1,4 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 bitsandbytes==0.45.*
 colorama
 datasets
@@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index dbf35c34..11bacf97 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 2e5eb6c9..a64a93f0 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 9a19ab29..62747ac4 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 973d9bfb..bc82f07a 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 4a48a51f..f880f40a 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 76bde864..6d8875cb 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt
index 9fc99606..b2bcf91c 100644
--- a/requirements/full/requirements_cuda128.txt
+++ b/requirements/full/requirements_cuda128.txt
@@ -1,5 +1,4 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 bitsandbytes==0.45.*
 colorama
 datasets
@@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==2.2.*
diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt
index ff34673a..54496cd7 100644
--- a/requirements/full/requirements_cuda128_noavx2.txt
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@@ -1,5 +1,4 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 bitsandbytes==0.45.*
 colorama
 datasets
@@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==2.2.*
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 6cd0fa65..eabcdbd0 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -1,5 +1,4 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 bitsandbytes==0.45.*
 colorama
 datasets
@@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index a412367c..d26663a7 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 60ce941e..5e5d4ba5 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -1,7 +1,7 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index b1649bc9..4909f5a2 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -1,7 +1,7 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 571eba52..e54b2593 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -1,7 +1,7 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 88170cf3..74c0c5a7 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -1,7 +1,7 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index e96cef49..264bc378 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -1,7 +1,7 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index 78f94aa5..fcb8f05e 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -1,7 +1,7 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index f6c866cf..3d30e6d6 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -1,7 +1,7 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 3e41427d..395f225f 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -1,7 +1,7 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 022ebb61..0d41f541 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -1,7 +1,7 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*

From d085dc6a93577bbafbde529c497528965dbfc19b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 9 Jun 2025 18:40:54 -0700
Subject: [PATCH 67/82] Minor optimization after
 e976a5ddc7598de49227f600515a7eee72fc1af6

---
 js/dark_theme.js | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/js/dark_theme.js b/js/dark_theme.js
index f61060cd..7136f5bf 100644
--- a/js/dark_theme.js
+++ b/js/dark_theme.js
@@ -9,8 +9,12 @@ function toggleDarkMode() {
 
   // Re-highlight all code blocks once stylesheet loads
   currentCSS.onload = function() {
-    document.querySelectorAll("pre code").forEach(block => {
-      hljs.highlightElement(block);
+    const messageBodies = document.getElementById("chat").querySelectorAll(".message-body");
+    messageBodies.forEach((messageBody) => {
+      const codeBlocks = messageBody.querySelectorAll("pre code");
+      codeBlocks.forEach((codeBlock) => {
+        hljs.highlightElement(codeBlock);
+      });
     });
   };
 }

From 1443612e72619e063d57226d05df3c159ec6d0ca Mon Sep 17 00:00:00 2001
From: Miriam <yimeng630@gmail.com>
Date: Mon, 9 Jun 2025 19:22:01 -0700
Subject: [PATCH 68/82] check .attention.head_count if .attention.head_count_kv
 doesn't exist (#7048)

---
 modules/models_settings.py | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/modules/models_settings.py b/modules/models_settings.py
index c914bdea..283a9744 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -329,6 +329,7 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
     # Extract values from metadata
     n_layers = None
     n_kv_heads = None
+    n_attention_heads = None  # Fallback for models without separate KV heads
     embedding_dim = None
 
     for key, value in metadata.items():
@@ -336,9 +337,14 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
             n_layers = value
         elif key.endswith('.attention.head_count_kv'):
             n_kv_heads = max(value) if isinstance(value, list) else value
+        elif key.endswith('.attention.head_count'):
+            n_attention_heads = max(value) if isinstance(value, list) else value
         elif key.endswith('.embedding_length'):
             embedding_dim = value
 
+    if n_kv_heads is None:
+        n_kv_heads = n_attention_heads
+
     if gpu_layers > n_layers:
         gpu_layers = n_layers
 

From 331d03c33fd69dd50b5c3b9f9ed8fb7d9da56bf0 Mon Sep 17 00:00:00 2001
From: Miriam <yimeng630@gmail.com>
Date: Mon, 9 Jun 2025 19:25:39 -0700
Subject: [PATCH 69/82] fix failure when --nowebui called without --api (#7055)

---
 server.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/server.py b/server.py
index 5cad1d8a..80cc2f18 100644
--- a/server.py
+++ b/server.py
@@ -318,8 +318,8 @@ if __name__ == "__main__":
 
     if shared.args.nowebui:
         # Start the API in standalone mode
-        shared.args.extensions = [x for x in shared.args.extensions if x != 'gallery']
-        if shared.args.extensions is not None and len(shared.args.extensions) > 0:
+        shared.args.extensions = [x for x in (shared.args.extensions or []) if x != 'gallery']
+        if shared.args.extensions:
             extensions_module.load_extensions()
     else:
         # Launch the web UI

From ec731210200ab53dc26bf56e6616d4533f64c165 Mon Sep 17 00:00:00 2001
From: Mykeehu <halasim@hotmail.hu>
Date: Tue, 10 Jun 2025 05:17:05 +0200
Subject: [PATCH 70/82] Fix continue/start reply with when using translation
 extensions (#6944)

---------

Co-authored-by: oobabooga <oobabooga4@gmail.com>
---
 modules/chat.py | 13 ++++++++++++-
 1 file changed, 12 insertions(+), 1 deletion(-)

diff --git a/modules/chat.py b/modules/chat.py
index 25a0607b..88ba33bb 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -760,7 +760,18 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
         if is_stream:
             yield output
 
-    output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
+    if _continue:
+        # Reprocess the entire internal text for extensions (like translation)
+        full_internal = output['internal'][-1][1]
+        if state['mode'] in ['chat', 'chat-instruct']:
+            full_visible = re.sub("(<USER>|<user>|{{user}})", state['name1'], full_internal)
+        else:
+            full_visible = full_internal
+
+        full_visible = html.escape(full_visible)
+        output['visible'][-1][1] = apply_extensions('output', full_visible, state, is_chat=True)
+    else:
+        output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
 
     # Final sync for version metadata (in case streaming was disabled)
     if regenerate:

From df98f4b3312f84995d4dbdbe73681ed78c8e08d2 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 9 Jun 2025 20:28:16 -0700
Subject: [PATCH 71/82] Don't save active extensions through the UI

Prevents command-line activated extensions from becoming permanently active due to autosave.
---
 modules/shared.py | 1 -
 modules/ui.py     | 1 -
 server.py         | 6 ------
 3 files changed, 8 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 59c7dbcd..cc2884c1 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -224,7 +224,6 @@ settings = {
     'negative_prompt': '',
     'dark_theme': True,
     'paste_to_attachment': False,
-    'default_extensions': [],
 
     # Character settings
     'character': 'Assistant',
diff --git a/modules/ui.py b/modules/ui.py
index 38693da8..ca1ff528 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -327,7 +327,6 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
     output['prompt-default'] = state['prompt_menu-default']
     output['prompt-notebook'] = state['prompt_menu-notebook']
     output['character'] = state['character_menu']
-    output['default_extensions'] = extensions_list
     output['seed'] = int(output['seed'])
     output['show_controls'] = show_controls
     output['dark_theme'] = True if theme_state == 'dark' else False
diff --git a/server.py b/server.py
index 80cc2f18..e0e3fbe5 100644
--- a/server.py
+++ b/server.py
@@ -249,13 +249,7 @@ if __name__ == "__main__":
     shared.model_config['.*'] = get_fallback_settings()
     shared.model_config.move_to_end('.*', last=False)  # Move to the beginning
 
-    # Activate the extensions listed on settings.yaml
     extensions_module.available_extensions = utils.get_available_extensions()
-    for extension in shared.settings['default_extensions']:
-        shared.args.extensions = shared.args.extensions or []
-        if extension not in shared.args.extensions:
-            shared.args.extensions.append(extension)
-
     available_models = utils.get_available_models()
 
     # Model defined through --model

From efd9c9707b809754b2e467b53bbeb76688768e6d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 9 Jun 2025 20:57:25 -0700
Subject: [PATCH 72/82] Fix random seeds being saved to settings.yaml

---
 modules/text_generation.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/text_generation.py b/modules/text_generation.py
index 0d499d50..55b538b0 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -479,6 +479,7 @@ def generate_reply_custom(question, original_question, state, stopping_strings=N
     For models that do not use the transformers library for sampling
     """
 
+    state = copy.deepcopy(state)
     state['seed'] = set_manual_seed(state['seed'])
     t0 = time.time()
     reply = ''

From c92eba0b0a0776119ce1912e25dbeeb7a1dfc749 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 9 Jun 2025 22:03:23 -0700
Subject: [PATCH 73/82] Reorganize the Parameters tab (left: preset parameters,
 right: everything else)

---
 modules/ui_parameters.py | 19 +++++++++++--------
 1 file changed, 11 insertions(+), 8 deletions(-)

diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 9b5cb3ab..e2b10554 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -34,6 +34,7 @@ def create_ui():
                             shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=shared.settings['dynamic_temperature'])
                             shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=shared.settings['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
                             shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=shared.settings['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
+                            shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label='dynamic_temperature')
 
                             gr.Markdown('## Curve cutoff')
                             shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=shared.settings['min_p'], step=0.01, label='min_p')
@@ -68,15 +69,19 @@ def create_ui():
                             shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=shared.settings['mirostat_eta'], label='mirostat_eta')
 
                             gr.Markdown('## Other options')
-                            shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
-                            shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
-                            shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
+                            shared.gradio['do_sample'] = gr.Checkbox(value=shared.settings['do_sample'], label='do_sample')
+                            shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
+                            shared.gradio['sampler_priority'] = gr.Textbox(value=shared.settings['sampler_priority'], lines=10, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
+                            shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
+
                 with gr.Column():
                     with gr.Row():
                         with gr.Column():
-                            shared.gradio['do_sample'] = gr.Checkbox(value=shared.settings['do_sample'], label='do_sample')
-                            shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label='dynamic_temperature')
-                            shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
+                            with gr.Blocks():
+                                shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
+                                shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
+                                shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
+
                             shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
                             shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
                             shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
@@ -89,11 +94,9 @@ def create_ui():
                             shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length.')
                             shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
 
-                            shared.gradio['sampler_priority'] = gr.Textbox(value=shared.settings['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
                             shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
                             shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
                             shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])
-                            shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
                             with gr.Row() as shared.gradio['grammar_file_row']:
                                 shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')
                                 ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)

From 2dabdbc7da85f91ea9ddd74dfb49c2d161a6ef4d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 10 Jun 2025 05:25:23 -0700
Subject: [PATCH 74/82] Update llama.cpp

---
 requirements/full/requirements.txt                     | 4 ++--
 requirements/full/requirements_amd.txt                 | 4 ++--
 requirements/full/requirements_amd_noavx2.txt          | 4 ++--
 requirements/full/requirements_apple_intel.txt         | 4 ++--
 requirements/full/requirements_apple_silicon.txt       | 6 +++---
 requirements/full/requirements_cpu_only.txt            | 4 ++--
 requirements/full/requirements_cpu_only_noavx2.txt     | 4 ++--
 requirements/full/requirements_cuda128.txt             | 4 ++--
 requirements/full/requirements_cuda128_noavx2.txt      | 4 ++--
 requirements/full/requirements_noavx2.txt              | 4 ++--
 requirements/portable/requirements.txt                 | 4 ++--
 requirements/portable/requirements_apple_intel.txt     | 4 ++--
 requirements/portable/requirements_apple_silicon.txt   | 6 +++---
 requirements/portable/requirements_cpu_only.txt        | 4 ++--
 requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++--
 requirements/portable/requirements_noavx2.txt          | 4 ++--
 requirements/portable/requirements_vulkan.txt          | 4 ++--
 requirements/portable/requirements_vulkan_noavx2.txt   | 4 ++--
 18 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index b751482a..a71e5240 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index 11bacf97..db1ead1a 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index a64a93f0..a08aa392 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 62747ac4..fa217c3e 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index bc82f07a..52581f1a 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -33,8 +33,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index f880f40a..b72f22aa 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -33,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 6d8875cb..e8de6057 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -33,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt
index b2bcf91c..801009fb 100644
--- a/requirements/full/requirements_cuda128.txt
+++ b/requirements/full/requirements_cuda128.txt
@@ -35,8 +35,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt
index 54496cd7..8b77e144 100644
--- a/requirements/full/requirements_cuda128_noavx2.txt
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@@ -35,8 +35,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index eabcdbd0..5e81ce1f 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 5e5d4ba5..4ddcf43f 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index 4909f5a2..38a21618 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index e54b2593..0b70c800 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -19,6 +19,6 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 74c0c5a7..510a20f4 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index 264bc378..e6d9f0c5 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index fcb8f05e..48f92e0a 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 395f225f..9f93424f 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 0d41f541..9070b9a6 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.16.0/llama_cpp_binaries-0.16.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

From 889153952f5545fb2b94b7a853aa24ae754ef79f Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 10 Jun 2025 09:02:52 -0700
Subject: [PATCH 75/82] Lint

---
 modules/ui_model_menu.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 2a7d3d9d..9e982f0e 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -260,6 +260,7 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
             output = "```\n"
             for link in links:
                 output += f"{Path(link).name}" + "\n"
+
             output += "```"
             yield output
             return

From 18bd78f1f038b2b69178d464184b937f7d2b15d6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 10 Jun 2025 14:03:25 -0700
Subject: [PATCH 76/82] Make the llama.cpp prompt processing messages shorter

---
 modules/llama_cpp_server.py | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index f0a72de8..a79e24e4 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -422,9 +422,17 @@ def filter_stderr_with_progress(process_stderr):
 
             if match:
                 progress = float(match.group(1))
+
+                # Extract just the part from "prompt processing" onwards
+                prompt_processing_idx = line.find('prompt processing')
+                if prompt_processing_idx != -1:
+                    display_line = line[prompt_processing_idx:]
+                else:
+                    display_line = line  # fallback to full line
+
                 # choose carriage return for in-progress or newline at completion
                 end_char = '\r' if progress < 1.0 else '\n'
-                print(line, end=end_char, file=sys.stderr, flush=True)
+                print(display_line, end=end_char, file=sys.stderr, flush=True)
                 last_was_progress = (progress < 1.0)
 
             # skip noise lines

From 3f9eb3aad1d1a734791dd41e4817570d81b485eb Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 10 Jun 2025 14:22:37 -0700
Subject: [PATCH 77/82] Fix the preset dropdown when the default preset file is
 not present

---
 modules/shared.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/shared.py b/modules/shared.py
index cc2884c1..b8ab2426 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -204,7 +204,7 @@ settings = {
     'web_search_pages': 3,
     'prompt-default': 'QA',
     'prompt-notebook': 'QA',
-    'preset': 'Qwen3 - Thinking' if Path('user_data/presets/Qwen3 - Thinking.yaml').exists() else '',
+    'preset': 'Qwen3 - Thinking' if Path('user_data/presets/Qwen3 - Thinking.yaml').exists() else None,
     'max_new_tokens': 512,
     'max_new_tokens_min': 1,
     'max_new_tokens_max': 4096,

From 1c1fd3be46f66b2f3110aed52a155817ca3392b7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 10 Jun 2025 14:29:28 -0700
Subject: [PATCH 78/82] Remove some log messages

---
 modules/chat.py | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/modules/chat.py b/modules/chat.py
index 88ba33bb..dfc301df 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -1271,8 +1271,6 @@ def clear_character_for_ui(state):
         if cache_path.exists():
             cache_path.unlink()
 
-    logger.info("Cleared character fields and picture cache")
-
     return state, state['name2'], state['context'], state['greeting'], None
 
 
@@ -1779,7 +1777,6 @@ def handle_character_picture_change(picture):
         picture.save(Path(f'{cache_folder}/pfp_character.png'), format='PNG')
         thumb = make_thumbnail(picture)
         thumb.save(Path(f'{cache_folder}/pfp_character_thumb.png'), format='PNG')
-        logger.info("Updated character picture cache")
     else:
         # Remove cache files when picture is cleared
         for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
@@ -1787,8 +1784,6 @@ def handle_character_picture_change(picture):
             if cache_path.exists():
                 cache_path.unlink()
 
-        logger.info("Cleared character picture cache")
-
 
 def handle_mode_change(state):
     history = load_latest_history(state)

From 75da90190f33317dd9731e14d844dd7238a2c870 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 10 Jun 2025 17:34:54 -0700
Subject: [PATCH 79/82] Fix character dropdown sometimes disappearing in the
 Parameters tab

---
 modules/ui_chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 428b64c9..3b841b8b 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -298,7 +298,7 @@ def create_event_handlers():
     shared.gradio['mode'].change(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.handle_mode_change, gradio('interface_state'), gradio('history', 'display', 'chat_style', 'chat-instruct_command', 'unique_id'), show_progress=False).then(
-        None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
+        None, gradio('mode'), None, js="(mode) => {const characterContainer = document.getElementById('character-menu').parentNode.parentNode; const isInChatTab = document.querySelector('#chat-controls').contains(characterContainer); if (isInChatTab) { characterContainer.style.display = mode === 'instruct' ? 'none' : ''; }}")
 
     shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)
 

From 4cf39120fca88fc78aeaaa55946ab74e5db1a512 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 10 Jun 2025 18:03:00 -0700
Subject: [PATCH 80/82] Fix chat area sometimes not scrolling up to edit
 message

---
 js/global_scope_js.js | 9 +++++++++
 js/main.js            | 8 ++++----
 2 files changed, 13 insertions(+), 4 deletions(-)

diff --git a/js/global_scope_js.js b/js/global_scope_js.js
index 801f1574..205d9375 100644
--- a/js/global_scope_js.js
+++ b/js/global_scope_js.js
@@ -95,12 +95,21 @@ function startEditing(messageElement, messageBody, isUserMessage) {
   editingInterface.textarea.focus();
   editingInterface.textarea.setSelectionRange(rawText.length, rawText.length);
 
+  // Temporarily mark as scrolled to prevent auto-scroll
+  const wasScrolled = window.isScrolled;
+  window.isScrolled = true;
+
   // Scroll the textarea into view
   editingInterface.textarea.scrollIntoView({
     behavior: "smooth",
     block: "center"
   });
 
+  // Restore the original scroll state after animation
+  setTimeout(() => {
+    window.isScrolled = wasScrolled;
+  }, 500);
+
   // Setup event handlers
   setupEditingHandlers(editingInterface.textarea, messageElement, originalHTML, messageBody, isUserMessage);
 }
diff --git a/js/main.js b/js/main.js
index 1953a6be..e970884d 100644
--- a/js/main.js
+++ b/js/main.js
@@ -145,7 +145,7 @@ typingSibling.insertBefore(typing, typingSibling.childNodes[2]);
 const targetElement = document.getElementById("chat").parentNode.parentNode.parentNode;
 targetElement.classList.add("pretty_scrollbar");
 targetElement.classList.add("chat-parent");
-let isScrolled = false;
+window.isScrolled = false;
 let scrollTimeout;
 
 targetElement.addEventListener("scroll", function() {
@@ -154,9 +154,9 @@ targetElement.addEventListener("scroll", function() {
 
   let diff = targetElement.scrollHeight - targetElement.clientHeight;
   if(Math.abs(targetElement.scrollTop - diff) <= 10 || diff == 0) {
-    isScrolled = false;
+    window.isScrolled = false;
   } else {
-    isScrolled = true;
+    window.isScrolled = true;
   }
 
   // Clear previous timeout and set new one
@@ -182,7 +182,7 @@ const observer = new MutationObserver(function(mutations) {
 
   doSyntaxHighlighting();
 
-  if (!isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) {
+  if (!window.isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) {
     targetElement.scrollTop = targetElement.scrollHeight;
   }
 

From bc921c66e5ef2f2a14286702e564c1b22dbba5a7 Mon Sep 17 00:00:00 2001
From: LawnMauer <skoupo@gmail.com>
Date: Wed, 11 Jun 2025 03:16:50 +0200
Subject: [PATCH 81/82] Load js and css sources in UTF-8 (#7059)

---
 modules/html_generator.py |  7 ++++---
 modules/ui.py             | 22 +++++++++++-----------
 2 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/modules/html_generator.py b/modules/html_generator.py
index eac7d91a..af64894e 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -39,15 +39,16 @@ def minify_css(css: str) -> str:
     return css
 
 
-with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r', encoding='utf-8') as f:
     readable_css = f.read()
-with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r', encoding='utf-8') as f:
     instruct_css = f.read()
 
 # Custom chat styles
 chat_styles = {}
 for k in get_available_chat_styles():
-    chat_styles[k] = open(Path(f'css/chat_style-{k}.css'), 'r').read()
+    with open(Path(f'css/chat_style-{k}.css'), 'r', encoding='utf-8') as f:
+        chat_styles[k] = f.read()
 
 # Handle styles that derive from other styles
 for k in chat_styles:
diff --git a/modules/ui.py b/modules/ui.py
index ca1ff528..59da5118 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -19,27 +19,27 @@ _last_extensions = None
 _last_show_controls = None
 _last_theme_state = None
 
-with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r', encoding='utf-8') as f:
     css = f.read()
-with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/main.css', 'r', encoding='utf-8') as f:
     css += f.read()
-with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r', encoding='utf-8') as f:
     css += f.read()
-with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r', encoding='utf-8') as f:
     css += f.read()
-with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/main.js', 'r', encoding='utf-8') as f:
     js = f.read()
-with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r', encoding='utf-8') as f:
     global_scope_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r', encoding='utf-8') as f:
     save_files_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r', encoding='utf-8') as f:
     switch_tabs_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r', encoding='utf-8') as f:
     show_controls_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r', encoding='utf-8') as f:
     update_big_picture_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r', encoding='utf-8') as f:
     dark_theme_js = f.read()
 
 refresh_symbol = '🔄'

From 552cb09f09de0d3a9442334580fd4a66f95a1ce3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 10 Jun 2025 18:45:42 -0700
Subject: [PATCH 82/82] Do not bump Transformers to 4.52 on CUDA 12.8

Performance is slow, and the older version works fine with torch 2.7.
---
 requirements/full/requirements_cuda128.txt        | 3 +--
 requirements/full/requirements_cuda128_noavx2.txt | 3 +--
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt
index 801009fb..7851041f 100644
--- a/requirements/full/requirements_cuda128.txt
+++ b/requirements/full/requirements_cuda128.txt
@@ -24,8 +24,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.52.*
-triton-windows; platform_system == "Windows"
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt
index 8b77e144..c8015166 100644
--- a/requirements/full/requirements_cuda128_noavx2.txt
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@@ -24,8 +24,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.52.*
-triton-windows; platform_system == "Windows"
+transformers==4.50.*
 tqdm
 wandb