From f154aeafeafe131ca09802c2dfc12470f0fb68df Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 19 Jun 2025 21:23:10 -0700
Subject: [PATCH 01/29] Optimize chat scrolling for the 40th time, hopefully
 the last one

---
 css/main.css | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/css/main.css b/css/main.css
index bc59f833..b9bf2ba5 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1670,3 +1670,15 @@ button:focus {
 #textbox-notebook span {
     display: none;
 }
+
+.chat-parent {
+    /* Optimize for scrolling performance */
+    will-change: scroll-position;
+    contain: layout style paint;
+
+    /* Ensure GPU acceleration */
+    transform: translateZ(0);
+
+    /* Prevent layout shifts */
+    overflow-anchor: none;
+}

From bb97ca1b22dfb6674c72e13b400b64ffaac59684 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 19 Jun 2025 21:41:41 -0700
Subject: [PATCH 02/29] Fix a small issue with the chat input

---
 css/main.css | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/css/main.css b/css/main.css
index b9bf2ba5..531d622a 100644
--- a/css/main.css
+++ b/css/main.css
@@ -670,7 +670,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     position: absolute;
     background-color: transparent;
     left: 23px;
-    top: -5px;
+    top: -25px;
     padding: var(--block-padding);
 }
 
@@ -830,7 +830,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
 
 #chat-input-row {
     padding: 1rem;
-    padding-top: 0;
 }
 
 #chat-input-row.bigchat {
@@ -1682,3 +1681,7 @@ button:focus {
     /* Prevent layout shifts */
     overflow-anchor: none;
 }
+
+#chat-input span {
+    display: none;
+}

From 58282f7107635d80d415651af764d590a1e5a9f1 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 20 Jun 2025 06:59:48 -0700
Subject: [PATCH 03/29] Replace 'Generate' with 'Send' in the Chat tab

---
 css/main.css       | 4 ----
 modules/ui_chat.py | 2 +-
 2 files changed, 1 insertion(+), 5 deletions(-)

diff --git a/css/main.css b/css/main.css
index 531d622a..036a2bb7 100644
--- a/css/main.css
+++ b/css/main.css
@@ -365,10 +365,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     gap: 0;
 }
 
-#chat-tab button#Generate, #chat-tab button#stop {
-    width: 89.3438px !important;
-}
-
 #chat-tab button, #notebook-tab button, #default-tab button {
     min-width: 0 !important;
 }
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 8a90608f..8ed0aab1 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -60,7 +60,7 @@ def create_ui():
                     with gr.Column(scale=1, elem_id='generate-stop-container'):
                         with gr.Row():
                             shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop', visible=False)
-                            shared.gradio['Generate'] = gr.Button('Generate', elem_id='Generate', variant='primary')
+                            shared.gradio['Generate'] = gr.Button('Send', elem_id='Generate', variant='primary')
 
         # Hover menu buttons
         with gr.Column(elem_id='chat-buttons'):

From 02f604479d492b0199bba260c5e8235ae9730d28 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 21 Jun 2025 14:02:17 -0700
Subject: [PATCH 04/29] Remove the pre-jinja2 custom stopping string handling
 (closes #7094)

---
 modules/models_settings.py   | 1 -
 user_data/models/config.yaml | 5 -----
 2 files changed, 6 deletions(-)

diff --git a/modules/models_settings.py b/modules/models_settings.py
index 6b22499e..bea5b4d6 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -23,7 +23,6 @@ def get_fallback_settings():
         'truncation_length': shared.settings['truncation_length'],
         'truncation_length_info': shared.settings['truncation_length'],
         'skip_special_tokens': shared.settings['skip_special_tokens'],
-        'custom_stopping_strings': shared.settings['custom_stopping_strings'],
     }
 
 
diff --git a/user_data/models/config.yaml b/user_data/models/config.yaml
index 8521c4c6..038ebcf1 100644
--- a/user_data/models/config.yaml
+++ b/user_data/models/config.yaml
@@ -33,18 +33,14 @@
   skip_special_tokens: false
 .*alpaca-native-4bit:
   instruction_template: 'Alpaca'
-  custom_stopping_strings: '"### End"'
 .*llava:
   instruction_template: 'LLaVA'
-  custom_stopping_strings: '"\n###"'
 .*llava.*1.5:
   instruction_template: 'Vicuna-v1.1'
 .*wizard.*mega:
   instruction_template: 'Wizard-Mega'
-  custom_stopping_strings: '"</s>"'
 .*starchat-beta:
   instruction_template: 'Starchat-Beta'
-  custom_stopping_strings: '"<|end|>"'
 (?!.*v0)(?!.*1.1)(?!.*1_1)(?!.*stable)(?!.*chinese).*vicuna:
   instruction_template: 'Vicuna-v0'
 .*vicuna.*v0:
@@ -151,7 +147,6 @@
   instruction_template: 'Alpaca'
 .*(openorca-platypus2):
   instruction_template: 'OpenOrca-Platypus2'
-  custom_stopping_strings: '"### Instruction:", "### Response:"'
 .*longchat:
   instruction_template: 'Vicuna-v1.1'
 .*vicuna-33b:

From 1b19dd77a4f01684a6a436914c4dd37a0a7975a5 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 22 Jun 2025 17:29:17 -0700
Subject: [PATCH 05/29] Move 'Enable thinking' to the Chat tab

---
 modules/ui_chat.py       | 5 ++---
 modules/ui_parameters.py | 1 -
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 8ed0aab1..159df235 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -78,9 +78,8 @@ def create_ui():
                 with gr.Row():
                     shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
 
-                with gr.Row():
-                    shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search')
-
+                shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='Used by Qwen3 to toggle <think> mode.')
+                shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search')
                 with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:
                     shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)
 
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index e42e4c0c..5c92f32e 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -85,7 +85,6 @@ def create_ui():
                             shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
                             shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
                             shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
-                            shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='Used by Qwen3 to toggle <think> mode.')
                             shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.')
                             shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming')
                             shared.gradio['static_cache'] = gr.Checkbox(value=shared.settings['static_cache'], label='Static KV cache', info='Use a static cache for improved performance.')

From c3faecfd2737c84609b5b758426e5f1546f3cc46 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 22 Jun 2025 17:51:09 -0700
Subject: [PATCH 06/29] Minor change

---
 modules/ui_chat.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 159df235..e2d8935a 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -19,6 +19,7 @@ def create_ui():
 
     shared.gradio['Chat input'] = gr.State()
     shared.gradio['history'] = gr.State({'internal': [], 'visible': [], 'metadata': {}})
+    shared.gradio['display'] = gr.JSON(value={}, visible=False)  # Hidden buffer
 
     with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
         with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
@@ -47,7 +48,6 @@ def create_ui():
 
         with gr.Row():
             with gr.Column(elem_id='chat-col'):
-                shared.gradio['display'] = gr.JSON(value={}, visible=False)  # Hidden buffer
                 shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': [], 'metadata': {}}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)
                 with gr.Row(elem_id="chat-input-row"):
                     with gr.Column(scale=1, elem_id='gr-hover-container'):

From 9a58964834833e37fc830f06f4d4a87a142d4179 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 22 Jun 2025 20:25:53 -0700
Subject: [PATCH 07/29] Keep the last message visible when the input height
 changes

---
 js/main.js | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/js/main.js b/js/main.js
index 3ff4bf06..ea046e43 100644
--- a/js/main.js
+++ b/js/main.js
@@ -1046,3 +1046,42 @@ new MutationObserver(() => addMiniDeletes()).observe(
   {childList: true, subtree: true}
 );
 addMiniDeletes();
+
+//------------------------------------------------
+// Maintain distance from bottom when input height changes
+//------------------------------------------------
+let wasAtBottom = false;
+let preservedDistance = 0;
+
+function checkIfAtBottom() {
+  const distanceFromBottom = targetElement.scrollHeight - targetElement.scrollTop - targetElement.clientHeight;
+  wasAtBottom = distanceFromBottom <= 1; // Allow for rounding errors
+}
+
+function preserveScrollPosition() {
+  preservedDistance = targetElement.scrollHeight - targetElement.scrollTop - targetElement.clientHeight;
+}
+
+function restoreScrollPosition() {
+  if (wasAtBottom) {
+    // Force to bottom
+    targetElement.scrollTop = targetElement.scrollHeight - targetElement.clientHeight;
+  } else {
+    // Restore original distance
+    targetElement.scrollTop = targetElement.scrollHeight - targetElement.clientHeight - preservedDistance;
+  }
+}
+
+// Check position before input
+chatInput.addEventListener("beforeinput", () => {
+  checkIfAtBottom();
+  preserveScrollPosition();
+});
+
+// Restore after input
+chatInput.addEventListener("input", () => {
+  requestAnimationFrame(() => restoreScrollPosition());
+});
+
+// Update wasAtBottom when user scrolls
+targetElement.addEventListener("scroll", checkIfAtBottom);

From ab162f976c8ef3622d061420a1f47312c727fd7a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 3 Jul 2025 11:31:52 -0700
Subject: [PATCH 08/29] Use miniforge instead of miniconda to avoid anaconda
 licensing issues

---
 README.md         | 12 +++++++-----
 start_linux.sh    |  2 +-
 start_macos.sh    |  2 +-
 start_windows.bat |  4 ++--
 4 files changed, 11 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index ac508f78..ee4a326a 100644
--- a/README.md
+++ b/README.md
@@ -62,7 +62,7 @@ One-click installer details
 
 ### One-click-installer
 
-The script uses Miniconda to set up a Conda environment in the `installer_files` folder.
+The script uses Miniforge to set up a Conda environment in the `installer_files` folder.
 
 If you ever need to install something manually in the `installer_files` environment, you can launch an interactive shell using the cmd script: `cmd_linux.sh`, `cmd_windows.bat`, or `cmd_macos.sh`.
 
@@ -115,15 +115,17 @@ Manual full installation with conda or docker
 
 #### 0. Install Conda
 
-https://docs.conda.io/en/latest/miniconda.html
+https://github.com/conda-forge/miniforge
 
-On Linux or WSL, it can be automatically installed with these two commands ([source](https://educe-ubc.github.io/conda.html)):
+On Linux or WSL, Miniforge can be automatically installed with these two commands:
 
 ```
-curl -sL "https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh" > "Miniconda3.sh"
-bash Miniconda3.sh
+curl -sL "https://github.com/conda-forge/miniforge/releases/latest/download/Miniforge3-Linux-x86_64.sh" > "Miniforge3.sh"
+bash Miniforge3.sh
 ```
 
+For other platforms, download from: https://github.com/conda-forge/miniforge/releases/latest
+
 #### 1. Create a new conda environment
 
 ```
diff --git a/start_linux.sh b/start_linux.sh
index e2b00558..0b9e65ab 100755
--- a/start_linux.sh
+++ b/start_linux.sh
@@ -30,7 +30,7 @@ esac
 INSTALL_DIR="$(pwd)/installer_files"
 CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
 INSTALL_ENV_DIR="$(pwd)/installer_files/env"
-MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Linux-${OS_ARCH}.sh"
+MINICONDA_DOWNLOAD_URL="https://github.com/conda-forge/miniforge/releases/download/25.3.0-3/Miniforge3-25.3.0-3-Linux-${OS_ARCH}.sh"
 conda_exists="F"
 
 # figure out whether git and conda needs to be installed
diff --git a/start_macos.sh b/start_macos.sh
index bff11bc1..2e1a7842 100755
--- a/start_macos.sh
+++ b/start_macos.sh
@@ -30,7 +30,7 @@ esac
 INSTALL_DIR="$(pwd)/installer_files"
 CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
 INSTALL_ENV_DIR="$(pwd)/installer_files/env"
-MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-MacOSX-${OS_ARCH}.sh"
+MINICONDA_DOWNLOAD_URL="https://github.com/conda-forge/miniforge/releases/download/25.3.0-3/Miniforge3-25.3.0-3-MacOSX-${OS_ARCH}.sh"
 conda_exists="F"
 
 # figure out whether git and conda needs to be installed
diff --git a/start_windows.bat b/start_windows.bat
index f5e66ec2..020baeae 100755
--- a/start_windows.bat
+++ b/start_windows.bat
@@ -36,8 +36,8 @@ set TEMP=%cd%\installer_files
 set INSTALL_DIR=%cd%\installer_files
 set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
 set INSTALL_ENV_DIR=%cd%\installer_files\env
-set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Windows-x86_64.exe
-set MINICONDA_CHECKSUM=43dcbcc315ff91edf959e002cd2f1ede38c64b999fefcc951bccf2ed69c9e8bb
+set MINICONDA_DOWNLOAD_URL=https://github.com/conda-forge/miniforge/releases/download/25.3.0-3/Miniforge3-25.3.0-3-Windows-x86_64.exe
+set MINICONDA_CHECKSUM=b48cd98430170983076dfb51769a6d37668176f59bf3b59c4b21ac4c9bc24f39
 set conda_exists=F
 
 @rem figure out whether git and conda needs to be installed

From 13373391df5923aea218807ce1d0c863549958c2 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 3 Jul 2025 14:13:22 -0700
Subject: [PATCH 09/29] Rename miniconda -> miniforge everywhere

---
 cmd_linux.sh              |  2 +-
 cmd_macos.sh              |  2 +-
 cmd_windows.bat           |  4 ++--
 start_linux.sh            | 20 ++++++++++----------
 start_macos.sh            | 20 ++++++++++----------
 start_windows.bat         | 34 +++++++++++++++++-----------------
 update_wizard_linux.sh    |  2 +-
 update_wizard_macos.sh    |  2 +-
 update_wizard_windows.bat |  4 ++--
 9 files changed, 45 insertions(+), 45 deletions(-)

diff --git a/cmd_linux.sh b/cmd_linux.sh
index 576dbf02..e124aab9 100755
--- a/cmd_linux.sh
+++ b/cmd_linux.sh
@@ -2,7 +2,7 @@
 
 cd "$(dirname "${BASH_SOURCE[0]}")"
 
-if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi
 
 # deactivate existing conda envs as needed to avoid conflicts
 { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
diff --git a/cmd_macos.sh b/cmd_macos.sh
index 1b052e5c..7e1a379f 100755
--- a/cmd_macos.sh
+++ b/cmd_macos.sh
@@ -2,7 +2,7 @@
 
 cd "$(dirname "${BASH_SOURCE[0]}")"
 
-if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi
 
 # deactivate existing conda envs as needed to avoid conflicts
 { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
diff --git a/cmd_windows.bat b/cmd_windows.bat
index 531a3261..787b4335 100755
--- a/cmd_windows.bat
+++ b/cmd_windows.bat
@@ -4,7 +4,7 @@ cd /D "%~dp0"
 
 set PATH=%PATH%;%SystemRoot%\system32
 
-echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
+echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniforge which can not be silently installed under a path with spaces. && goto end
 
 @rem fix failed install when installing to a separate drive
 set TMP=%cd%\installer_files
@@ -25,7 +25,7 @@ set "CUDA_PATH=%INSTALL_ENV_DIR%"
 set "CUDA_HOME=%CUDA_PATH%"
 
 @rem activate installer env
-call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
+call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniforge hook not found. && goto end )
 
 @rem enter commands
 cmd /k "%*"
diff --git a/start_linux.sh b/start_linux.sh
index 0b9e65ab..a991e1bc 100755
--- a/start_linux.sh
+++ b/start_linux.sh
@@ -13,7 +13,7 @@ if [ -d "portable_env" ]; then
     exit $?
 fi
 
-if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi
 
 # deactivate existing conda envs as needed to avoid conflicts
 { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
@@ -30,29 +30,29 @@ esac
 INSTALL_DIR="$(pwd)/installer_files"
 CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
 INSTALL_ENV_DIR="$(pwd)/installer_files/env"
-MINICONDA_DOWNLOAD_URL="https://github.com/conda-forge/miniforge/releases/download/25.3.0-3/Miniforge3-25.3.0-3-Linux-${OS_ARCH}.sh"
+MINIFORGE_DOWNLOAD_URL="https://github.com/conda-forge/miniforge/releases/download/25.3.0-3/Miniforge3-25.3.0-3-Linux-${OS_ARCH}.sh"
 conda_exists="F"
 
 # figure out whether git and conda needs to be installed
 if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi
 
 # (if necessary) install git and conda into a contained environment
-# download miniconda
+# download miniforge
 if [ "$conda_exists" == "F" ]; then
-    echo "Downloading Miniconda from $MINICONDA_DOWNLOAD_URL to $INSTALL_DIR/miniconda_installer.sh"
+    echo "Downloading Miniforge from $MINIFORGE_DOWNLOAD_URL to $INSTALL_DIR/miniforge_installer.sh"
 
     mkdir -p "$INSTALL_DIR"
-    curl -L "$MINICONDA_DOWNLOAD_URL" > "$INSTALL_DIR/miniconda_installer.sh"
+    curl -L "$MINIFORGE_DOWNLOAD_URL" > "$INSTALL_DIR/miniforge_installer.sh"
 
-    chmod u+x "$INSTALL_DIR/miniconda_installer.sh"
-    bash "$INSTALL_DIR/miniconda_installer.sh" -b -p $CONDA_ROOT_PREFIX
+    chmod u+x "$INSTALL_DIR/miniforge_installer.sh"
+    bash "$INSTALL_DIR/miniforge_installer.sh" -b -p $CONDA_ROOT_PREFIX
 
     # test the conda binary
-    echo "Miniconda version:"
+    echo "Miniforge version:"
     "$CONDA_ROOT_PREFIX/bin/conda" --version
 
-    # delete the Miniconda installer
-    rm "$INSTALL_DIR/miniconda_installer.sh"
+    # delete the Miniforge installer
+    rm "$INSTALL_DIR/miniforge_installer.sh"
 fi
 
 # create the installer env
diff --git a/start_macos.sh b/start_macos.sh
index 2e1a7842..6426229a 100755
--- a/start_macos.sh
+++ b/start_macos.sh
@@ -13,7 +13,7 @@ if [ -d "portable_env" ]; then
     exit $?
 fi
 
-if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi
 
 # deactivate existing conda envs as needed to avoid conflicts
 { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
@@ -30,29 +30,29 @@ esac
 INSTALL_DIR="$(pwd)/installer_files"
 CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
 INSTALL_ENV_DIR="$(pwd)/installer_files/env"
-MINICONDA_DOWNLOAD_URL="https://github.com/conda-forge/miniforge/releases/download/25.3.0-3/Miniforge3-25.3.0-3-MacOSX-${OS_ARCH}.sh"
+MINIFORGE_DOWNLOAD_URL="https://github.com/conda-forge/miniforge/releases/download/25.3.0-3/Miniforge3-25.3.0-3-MacOSX-${OS_ARCH}.sh"
 conda_exists="F"
 
 # figure out whether git and conda needs to be installed
 if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi
 
 # (if necessary) install git and conda into a contained environment
-# download miniconda
+# download miniforge
 if [ "$conda_exists" == "F" ]; then
-    echo "Downloading Miniconda from $MINICONDA_DOWNLOAD_URL to $INSTALL_DIR/miniconda_installer.sh"
+    echo "Downloading Miniforge from $MINIFORGE_DOWNLOAD_URL to $INSTALL_DIR/miniforge_installer.sh"
 
     mkdir -p "$INSTALL_DIR"
-    curl -L "$MINICONDA_DOWNLOAD_URL" > "$INSTALL_DIR/miniconda_installer.sh"
+    curl -L "$MINIFORGE_DOWNLOAD_URL" > "$INSTALL_DIR/miniforge_installer.sh"
 
-    chmod u+x "$INSTALL_DIR/miniconda_installer.sh"
-    bash "$INSTALL_DIR/miniconda_installer.sh" -b -p $CONDA_ROOT_PREFIX
+    chmod u+x "$INSTALL_DIR/miniforge_installer.sh"
+    bash "$INSTALL_DIR/miniforge_installer.sh" -b -p $CONDA_ROOT_PREFIX
 
     # test the conda binary
-    echo "Miniconda version:"
+    echo "Miniforge version:"
     "$CONDA_ROOT_PREFIX/bin/conda" --version
 
-    # delete the Miniconda installer
-    rm "$INSTALL_DIR/miniconda_installer.sh"
+    # delete the Miniforge installer
+    rm "$INSTALL_DIR/miniforge_installer.sh"
 fi
 
 # create the installer env
diff --git a/start_windows.bat b/start_windows.bat
index 020baeae..68c6194e 100755
--- a/start_windows.bat
+++ b/start_windows.bat
@@ -16,7 +16,7 @@ if exist "portable_env" (
 
 set PATH=%PATH%;%SystemRoot%\system32
 
-echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
+echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniforge which can not be silently installed under a path with spaces. && goto end
 
 @rem Check for special characters in installation path
 set "SPCHARMESSAGE="WARNING: Special characters were detected in the installation path!" "         This can cause the installation to fail!""
@@ -36,8 +36,8 @@ set TEMP=%cd%\installer_files
 set INSTALL_DIR=%cd%\installer_files
 set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
 set INSTALL_ENV_DIR=%cd%\installer_files\env
-set MINICONDA_DOWNLOAD_URL=https://github.com/conda-forge/miniforge/releases/download/25.3.0-3/Miniforge3-25.3.0-3-Windows-x86_64.exe
-set MINICONDA_CHECKSUM=b48cd98430170983076dfb51769a6d37668176f59bf3b59c4b21ac4c9bc24f39
+set MINIFORGE_DOWNLOAD_URL=https://github.com/conda-forge/miniforge/releases/download/25.3.0-3/Miniforge3-25.3.0-3-Windows-x86_64.exe
+set MINIFORGE_CHECKSUM=b48cd98430170983076dfb51769a6d37668176f59bf3b59c4b21ac4c9bc24f39
 set conda_exists=F
 
 @rem figure out whether git and conda needs to be installed
@@ -47,40 +47,40 @@ if "%ERRORLEVEL%" EQU "0" set conda_exists=T
 @rem (if necessary) install git and conda into a contained environment
 @rem download conda
 if "%conda_exists%" == "F" (
-	echo Downloading Miniconda from %MINICONDA_DOWNLOAD_URL% to %INSTALL_DIR%\miniconda_installer.exe
+	echo Downloading Miniforge from %MINIFORGE_DOWNLOAD_URL% to %INSTALL_DIR%\miniforge_installer.exe
 
 	mkdir "%INSTALL_DIR%"
-	call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
+	call curl -Lk "%MINIFORGE_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniforge_installer.exe" || ( echo. && echo Miniforge failed to download. && goto end )
 
 	@rem Try CertUtil first
-	for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniconda_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINICONDA_CHECKSUM%"') do (
+	for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniforge_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINIFORGE_CHECKSUM%"') do (
 		set "output=%%a"
 	)
 
 	@rem If CertUtil fails, try PowerShell
 	if not defined output (
-		for /f %%a in ('powershell -Command "if((Get-FileHash \"%INSTALL_DIR%\miniconda_installer.exe\" -Algorithm SHA256).Hash -eq ''%MINICONDA_CHECKSUM%''){echo true}"') do (
+		for /f %%a in ('powershell -Command "if((Get-FileHash \"%INSTALL_DIR%\miniforge_installer.exe\" -Algorithm SHA256).Hash -eq ''%MINIFORGE_CHECKSUM%''){echo true}"') do (
 			set "output=%%a"
 		)
 	)
 
 	if not defined output (
-		echo The checksum verification for miniconda_installer.exe has failed.
-		del "%INSTALL_DIR%\miniconda_installer.exe"
+		echo The checksum verification for miniforge_installer.exe has failed.
+		del "%INSTALL_DIR%\miniforge_installer.exe"
 		goto end
 	) else (
-		echo The checksum verification for miniconda_installer.exe has passed successfully.
+		echo The checksum verification for miniforge_installer.exe has passed successfully.
 	)
 
-	echo Installing Miniconda to %CONDA_ROOT_PREFIX%
-	start /wait "" "%INSTALL_DIR%\miniconda_installer.exe" /InstallationType=JustMe /NoShortcuts=1 /AddToPath=0 /RegisterPython=0 /NoRegistry=1 /S /D=%CONDA_ROOT_PREFIX%
+	echo Installing Miniforge to %CONDA_ROOT_PREFIX%
+	start /wait "" "%INSTALL_DIR%\miniforge_installer.exe" /InstallationType=JustMe /NoShortcuts=1 /AddToPath=0 /RegisterPython=0 /NoRegistry=1 /S /D=%CONDA_ROOT_PREFIX%
 
 	@rem test the conda binary
-	echo Miniconda version:
-	call "%CONDA_ROOT_PREFIX%\_conda.exe" --version || ( echo. && echo Miniconda not found. && goto end )
+	echo Miniforge version:
+	call "%CONDA_ROOT_PREFIX%\_conda.exe" --version || ( echo. && echo Miniforge not found. && goto end )
 
-	@rem delete the Miniconda installer
-	del "%INSTALL_DIR%\miniconda_installer.exe"
+	@rem delete the Miniforge installer
+	del "%INSTALL_DIR%\miniforge_installer.exe"
 )
 
 @rem create the installer env
@@ -96,7 +96,7 @@ set "CUDA_PATH=%INSTALL_ENV_DIR%"
 set "CUDA_HOME=%CUDA_PATH%"
 
 @rem activate installer env
-call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
+call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniforge hook not found. && goto end )
 
 @rem setup installer env
 call python one_click.py %*
diff --git a/update_wizard_linux.sh b/update_wizard_linux.sh
index c81d9d9b..eb4a753c 100755
--- a/update_wizard_linux.sh
+++ b/update_wizard_linux.sh
@@ -2,7 +2,7 @@
 
 cd "$(dirname "${BASH_SOURCE[0]}")"
 
-if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi
 
 # deactivate existing conda envs as needed to avoid conflicts
 { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
diff --git a/update_wizard_macos.sh b/update_wizard_macos.sh
index f58bb9e9..61a8110c 100755
--- a/update_wizard_macos.sh
+++ b/update_wizard_macos.sh
@@ -2,7 +2,7 @@
 
 cd "$(dirname "${BASH_SOURCE[0]}")"
 
-if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
+if [[ "$(pwd)" =~ " " ]]; then echo This script relies on Miniforge which can not be silently installed under a path with spaces. && exit; fi
 
 # deactivate existing conda envs as needed to avoid conflicts
 { conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
diff --git a/update_wizard_windows.bat b/update_wizard_windows.bat
index fac251a7..e9203c52 100755
--- a/update_wizard_windows.bat
+++ b/update_wizard_windows.bat
@@ -4,7 +4,7 @@ cd /D "%~dp0"
 
 set PATH=%PATH%;%SystemRoot%\system32
 
-echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniconda which can not be silently installed under a path with spaces. && goto end
+echo "%CD%"| findstr /C:" " >nul && echo This script relies on Miniforge which can not be silently installed under a path with spaces. && goto end
 
 @rem fix failed install when installing to a separate drive
 set TMP=%cd%\installer_files
@@ -25,7 +25,7 @@ set "CUDA_PATH=%INSTALL_ENV_DIR%"
 set "CUDA_HOME=%CUDA_PATH%"
 
 @rem activate installer env
-call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniconda hook not found. && goto end )
+call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniforge hook not found. && goto end )
 
 @rem update installer env
 call python one_click.py --update-wizard && (

From cbba88f5655f949828af0e46b5a12e1a5ac835a2 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 3 Jul 2025 18:14:52 -0700
Subject: [PATCH 10/29] Fix scrolling during streaming when thinking blocks are
 present

---
 js/main.js | 9 +++++----
 1 file changed, 5 insertions(+), 4 deletions(-)

diff --git a/js/main.js b/js/main.js
index ea046e43..c44c144e 100644
--- a/js/main.js
+++ b/js/main.js
@@ -199,10 +199,11 @@ const observer = new MutationObserver(function(mutations) {
     const lastChild = messagesContainer?.lastElementChild;
     const prevSibling = lastChild?.previousElementSibling;
     if (lastChild && prevSibling) {
-      lastChild.style.setProperty("margin-bottom",
-        `max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 84px) - ${lastChild.offsetHeight}px))`,
-        "important"
-      );
+      // Add padding to the messages container to create room for the last message.
+      // The purpose of this is to avoid constant scrolling during streaming in
+      // instruct mode.
+      const bufferHeight = Math.max(0, Math.max(0.7 * window.innerHeight, window.innerHeight - prevSibling.offsetHeight - 84) - lastChild.offsetHeight);
+      messagesContainer.style.paddingBottom = `${bufferHeight}px`;
     }
   }
 });

From 877c651c04490d8b13948395010fb53acc84e74d Mon Sep 17 00:00:00 2001
From: zombiegreedo <65383556+zombiegreedo@users.noreply.github.com>
Date: Thu, 3 Jul 2025 22:05:46 -0400
Subject: [PATCH 11/29] Handle either missing <think> start or </think> end
 tags (#7102)

---
 modules/html_generator.py | 40 +++++++++++++++++++--------------------
 1 file changed, 20 insertions(+), 20 deletions(-)

diff --git a/modules/html_generator.py b/modules/html_generator.py
index 11572fc6..6844c244 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -116,29 +116,29 @@ def extract_thinking_block(string):
     THINK_START_TAG = "&lt;think&gt;"
     THINK_END_TAG = "&lt;/think&gt;"
 
-    # Look for opening tag
-    start_pos = string.lstrip().find(THINK_START_TAG)
-    if start_pos == -1:
+    # Look for think tag
+    start_pos = string.find(THINK_START_TAG)
+    end_pos = string.find(THINK_END_TAG)
+
+    # Return if neither tag is in string
+    if start_pos == -1 and end_pos == -1:
         return None, string
 
-    # Adjust start position to account for any leading whitespace
-    start_pos = string.find(THINK_START_TAG)
-
-    # Find the content after the opening tag
-    content_start = start_pos + len(THINK_START_TAG)
-
-    # Look for closing tag
-    end_pos = string.find(THINK_END_TAG, content_start)
-
-    if end_pos != -1:
-        # Both tags found - extract content between them
-        thinking_content = string[content_start:end_pos]
-        remaining_content = string[end_pos + len(THINK_END_TAG):]
-        return thinking_content, remaining_content
+    # handle missing start or end tags
+    if start_pos == -1:
+        thought_start = 0
     else:
-        # Only opening tag found - everything else is thinking content
-        thinking_content = string[content_start:]
-        return thinking_content, ""
+        thought_start = start_pos + len(THINK_START_TAG)
+    if end_pos == -1:
+        thought_end = len(string)
+        content_start = len(string)
+    else:
+        thought_end = end_pos
+        content_start = end_pos + len(THINK_END_TAG)
+
+    thinking_content = string[thought_start:thought_end]
+    remaining_content = string[content_start:]
+    return thinking_content, remaining_content
 
 
 @functools.lru_cache(maxsize=None)

From 23bb94a5fbc83995bcadd234981991584391146b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Thu, 3 Jul 2025 20:36:54 -0700
Subject: [PATCH 12/29] Update llama.cpp

---
 requirements/full/requirements.txt                     | 4 ++--
 requirements/full/requirements_amd.txt                 | 4 ++--
 requirements/full/requirements_amd_noavx2.txt          | 4 ++--
 requirements/full/requirements_apple_intel.txt         | 4 ++--
 requirements/full/requirements_apple_silicon.txt       | 6 +++---
 requirements/full/requirements_cpu_only.txt            | 4 ++--
 requirements/full/requirements_cpu_only_noavx2.txt     | 4 ++--
 requirements/full/requirements_cuda128.txt             | 4 ++--
 requirements/full/requirements_cuda128_noavx2.txt      | 4 ++--
 requirements/full/requirements_noavx2.txt              | 4 ++--
 requirements/portable/requirements.txt                 | 4 ++--
 requirements/portable/requirements_apple_intel.txt     | 4 ++--
 requirements/portable/requirements_apple_silicon.txt   | 6 +++---
 requirements/portable/requirements_cpu_only.txt        | 4 ++--
 requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++--
 requirements/portable/requirements_noavx2.txt          | 4 ++--
 requirements/portable/requirements_vulkan.txt          | 4 ++--
 requirements/portable/requirements_vulkan_noavx2.txt   | 4 ++--
 18 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 14828e8d..cc0bd517 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index ec2040d0..ce7060dc 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 4b50c693..560347fb 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index a6e2925e..bb136592 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 1b97d844..8bb21761 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -33,8 +33,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index fa553f7c..c068c8d8 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -33,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 505f8ae8..bf41929c 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -33,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt
index 84ffa327..aabad9a8 100644
--- a/requirements/full/requirements_cuda128.txt
+++ b/requirements/full/requirements_cuda128.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt
index da995438..ed6ab186 100644
--- a/requirements/full/requirements_cuda128_noavx2.txt
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 13ecaf87..ef9d5b80 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 49cd6264..77ccdfd6 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index fca8208f..f6a42e55 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 86b00651..8be7742e 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -19,6 +19,6 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index fca46fb5..de5b66b7 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index 3ebc2488..dfd71db7 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index 2bdf257e..a1923830 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 0b98ee5a..890b14ff 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 188dc085..e2607dbb 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.20.0/llama_cpp_binaries-0.20.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

From 92ec8dda03b6a64a0efb5b4dc6b8109b460f19ad Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 4 Jul 2025 06:04:04 -0700
Subject: [PATCH 13/29] Fix chat history getting lost if the UI is inactive for
 a long time (closes #7109)

---
 modules/ui.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ui.py b/modules/ui.py
index fb719158..0030bb02 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -296,7 +296,7 @@ def gather_interface_values(*args):
         shared.persistent_interface_state.pop('textbox')
 
     # Prevent history loss if backend is restarted but UI is not refreshed
-    if output['history'] is None and output['unique_id'] is not None:
+    if (output['history'] is None or (len(output['history'].get('visible', [])) == 0 and len(output['history'].get('internal', [])) == 0)) and output['unique_id'] is not None:
         output['history'] = load_history(output['unique_id'], output['character_menu'], output['mode'])
 
     return output

From c4d5331c03e223b84963b81b98e36a1d71fa3338 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 4 Jul 2025 13:21:52 -0700
Subject: [PATCH 14/29] Fix autoscroll after fonts load

---
 js/main.js | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/js/main.js b/js/main.js
index c44c144e..c805fc9a 100644
--- a/js/main.js
+++ b/js/main.js
@@ -1086,3 +1086,17 @@ chatInput.addEventListener("input", () => {
 
 // Update wasAtBottom when user scrolls
 targetElement.addEventListener("scroll", checkIfAtBottom);
+
+//------------------------------------------------
+// Fix autoscroll after fonts load
+//------------------------------------------------
+document.fonts.addEventListener("loadingdone", (event) => {
+  setTimeout(() => {
+    if (!window.isScrolled) {
+      const maxScroll = targetElement.scrollHeight - targetElement.clientHeight;
+      if (targetElement.scrollTop < maxScroll - 5) {
+        targetElement.scrollTop = maxScroll;
+      }
+    }
+  }, 50);
+});

From c4d738f39fc5f3b998517ce1778eb8662c1a60c6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 5 Jul 2025 14:09:29 -0700
Subject: [PATCH 15/29] Update llama.cpp

---
 requirements/full/requirements.txt                     | 4 ++--
 requirements/full/requirements_amd.txt                 | 4 ++--
 requirements/full/requirements_amd_noavx2.txt          | 4 ++--
 requirements/full/requirements_apple_intel.txt         | 4 ++--
 requirements/full/requirements_apple_silicon.txt       | 6 +++---
 requirements/full/requirements_cpu_only.txt            | 4 ++--
 requirements/full/requirements_cpu_only_noavx2.txt     | 4 ++--
 requirements/full/requirements_cuda128.txt             | 4 ++--
 requirements/full/requirements_cuda128_noavx2.txt      | 4 ++--
 requirements/full/requirements_noavx2.txt              | 4 ++--
 requirements/portable/requirements.txt                 | 4 ++--
 requirements/portable/requirements_apple_intel.txt     | 4 ++--
 requirements/portable/requirements_apple_silicon.txt   | 6 +++---
 requirements/portable/requirements_cpu_only.txt        | 4 ++--
 requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++--
 requirements/portable/requirements_noavx2.txt          | 4 ++--
 requirements/portable/requirements_vulkan.txt          | 4 ++--
 requirements/portable/requirements_vulkan_noavx2.txt   | 4 ++--
 18 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index cc0bd517..1f7b8f9e 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index ce7060dc..ea2dbf67 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 560347fb..251d5e24 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index bb136592..649eaaca 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -33,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 8bb21761..7f1895fc 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -33,8 +33,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index c068c8d8..6ec88d63 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -33,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index bf41929c..7e6c780d 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -33,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt
index aabad9a8..cbac577a 100644
--- a/requirements/full/requirements_cuda128.txt
+++ b/requirements/full/requirements_cuda128.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt
index ed6ab186..3f7add14 100644
--- a/requirements/full/requirements_cuda128_noavx2.txt
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index ef9d5b80..9903bf7a 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -34,8 +34,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 77ccdfd6..785ce623 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index f6a42e55..d56d6a79 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 8be7742e..3e00a781 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -19,6 +19,6 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index de5b66b7..8006762e 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index dfd71db7..9ec08082 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index a1923830..8ca5441a 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 890b14ff..a164c879 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index e2607dbb..3b6943b7 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -19,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.21.0/llama_cpp_binaries-0.21.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

From 4583924ce7ebf56b77fd7ca837a63faeb3d3b697 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 5 Jul 2025 14:24:15 -0700
Subject: [PATCH 16/29] Remove torchvision/torchaudio mentions from the README

---
 README.md | 12 ++++++------
 1 file changed, 6 insertions(+), 6 deletions(-)

diff --git a/README.md b/README.md
index ee4a326a..04449e1d 100644
--- a/README.md
+++ b/README.md
@@ -137,12 +137,12 @@ conda activate textgen
 
 | System | GPU | Command |
 |--------|---------|---------|
-| Linux/WSL | NVIDIA | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124` |
-| Linux/WSL | CPU only | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cpu` |
-| Linux | AMD | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/rocm6.2.4` |
-| MacOS + MPS | Any | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0` |
-| Windows | NVIDIA | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124` |
-| Windows | CPU only | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0` |
+| Linux/WSL | NVIDIA | `pip3 install torch==2.6.0 --index-url https://download.pytorch.org/whl/cu124` |
+| Linux/WSL | CPU only | `pip3 install torch==2.6.0 --index-url https://download.pytorch.org/whl/cpu` |
+| Linux | AMD | `pip3 install torch==2.6.0 --index-url https://download.pytorch.org/whl/rocm6.2.4` |
+| MacOS + MPS | Any | `pip3 install torch==2.6.0` |
+| Windows | NVIDIA | `pip3 install torch==2.6.0 --index-url https://download.pytorch.org/whl/cu124` |
+| Windows | CPU only | `pip3 install torch==2.6.0` |
 
 The up-to-date commands can be found here: https://pytorch.org/get-started/locally/.
 

From 0f258774d34e756a654bc4d9ed49095cef1d1f67 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 5 Jul 2025 14:25:59 -0700
Subject: [PATCH 17/29] Minor README changes

---
 README.md | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/README.md b/README.md
index 04449e1d..466e4edb 100644
--- a/README.md
+++ b/README.md
@@ -183,9 +183,9 @@ python server.py
 
 Then browse to
 
-`http://localhost:7860/?__theme=dark`
+`http://127.0.0.1:7860`
 
-##### Manual install
+#### Manual install
 
 The `requirements*.txt` above contain various wheels precompiled through GitHub Actions. If you wish to compile things manually, or if you need to because no suitable wheels are available for your hardware, you can use `requirements_nowheels.txt` and then install your desired loaders manually.
 

From de4ccffff8bd0dff1fe4aca048115af2e398ea75 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 6 Jul 2025 16:24:57 -0700
Subject: [PATCH 18/29] Fix the duckduckgo search

---
 modules/web_search.py                         | 26 +++++++++++++------
 requirements/full/requirements.txt            |  1 -
 requirements/full/requirements_amd.txt        |  1 -
 requirements/full/requirements_amd_noavx2.txt |  1 -
 .../full/requirements_apple_intel.txt         |  1 -
 .../full/requirements_apple_silicon.txt       |  1 -
 requirements/full/requirements_cpu_only.txt   |  1 -
 .../full/requirements_cpu_only_noavx2.txt     |  1 -
 requirements/full/requirements_cuda128.txt    |  1 -
 .../full/requirements_cuda128_noavx2.txt      |  1 -
 requirements/full/requirements_noavx2.txt     |  1 -
 requirements/full/requirements_nowheels.txt   |  1 -
 requirements/portable/requirements.txt        |  1 -
 .../portable/requirements_apple_intel.txt     |  1 -
 .../portable/requirements_apple_silicon.txt   |  1 -
 .../portable/requirements_cpu_only.txt        |  1 -
 .../portable/requirements_cpu_only_noavx2.txt |  1 -
 requirements/portable/requirements_noavx2.txt |  1 -
 .../portable/requirements_nowheels.txt        |  1 -
 requirements/portable/requirements_vulkan.txt |  1 -
 .../portable/requirements_vulkan_noavx2.txt   |  1 -
 21 files changed, 18 insertions(+), 28 deletions(-)

diff --git a/modules/web_search.py b/modules/web_search.py
index 401a42bb..3b1f6e18 100644
--- a/modules/web_search.py
+++ b/modules/web_search.py
@@ -1,6 +1,9 @@
 import concurrent.futures
+import html
+import re
 from concurrent.futures import as_completed
 from datetime import datetime
+from urllib.parse import quote_plus
 
 import requests
 
@@ -44,19 +47,26 @@ def download_web_page(url, timeout=10):
         return ""
 
 
-def perform_web_search(query, num_pages=3, max_workers=5):
+def perform_web_search(query, num_pages=3, max_workers=5, timeout=10):
     """Perform web search and return results with content"""
-    from duckduckgo_search import DDGS
-
     try:
-        with DDGS() as ddgs:
-            results = list(ddgs.text(query, max_results=num_pages))
+        # Use DuckDuckGo HTML search endpoint
+        search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}"
+        headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
+
+        response = requests.get(search_url, headers=headers, timeout=timeout)
+        response.raise_for_status()
+
+        # Extract results with regex
+        titles = re.findall(r'<a[^>]*class="[^"]*result__a[^"]*"[^>]*>(.*?)</a>', response.text, re.DOTALL)
+        urls = re.findall(r'<a[^>]*class="[^"]*result__url[^"]*"[^>]*>(.*?)</a>', response.text, re.DOTALL)
 
         # Prepare download tasks
         download_tasks = []
-        for i, result in enumerate(results):
-            url = result.get('href', '')
-            title = result.get('title', f'Search Result {i+1}')
+        for i in range(min(len(titles), len(urls), num_pages)):
+            url = f"https://{urls[i].strip()}"
+            title = re.sub(r'<[^>]+>', '', titles[i]).strip()
+            title = html.unescape(title)
             download_tasks.append((url, title, i))
 
         search_results = [None] * len(download_tasks)  # Pre-allocate to maintain order
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 1f7b8f9e..9735fcee 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -2,7 +2,6 @@ accelerate==1.5.*
 bitsandbytes==0.45.*
 colorama
 datasets
-duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index ea2dbf67..c7db0116 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -1,7 +1,6 @@
 accelerate==1.5.*
 colorama
 datasets
-duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 251d5e24..10c89332 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -1,7 +1,6 @@
 accelerate==1.5.*
 colorama
 datasets
-duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 649eaaca..e900f1bc 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -1,7 +1,6 @@
 accelerate==1.5.*
 colorama
 datasets
-duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 7f1895fc..a8531389 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -1,7 +1,6 @@
 accelerate==1.5.*
 colorama
 datasets
-duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 6ec88d63..3735a1a8 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -1,7 +1,6 @@
 accelerate==1.5.*
 colorama
 datasets
-duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 7e6c780d..f93a4d77 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -1,7 +1,6 @@
 accelerate==1.5.*
 colorama
 datasets
-duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt
index cbac577a..b5bee1e4 100644
--- a/requirements/full/requirements_cuda128.txt
+++ b/requirements/full/requirements_cuda128.txt
@@ -2,7 +2,6 @@ accelerate==1.5.*
 bitsandbytes==0.45.*
 colorama
 datasets
-duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt
index 3f7add14..2cd91205 100644
--- a/requirements/full/requirements_cuda128_noavx2.txt
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@@ -2,7 +2,6 @@ accelerate==1.5.*
 bitsandbytes==0.45.*
 colorama
 datasets
-duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 9903bf7a..35b683c8 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -2,7 +2,6 @@ accelerate==1.5.*
 bitsandbytes==0.45.*
 colorama
 datasets
-duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index b1c3c6ea..30020989 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -1,7 +1,6 @@
 accelerate==1.5.*
 colorama
 datasets
-duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 785ce623..d928ff08 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
 html2text==2025.4.15
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index d56d6a79..b25298fc 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
 html2text==2025.4.15
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 3e00a781..e52a7248 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
 html2text==2025.4.15
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 8006762e..7c2578d2 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
 html2text==2025.4.15
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index 9ec08082..c125c97e 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
 html2text==2025.4.15
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index 8ca5441a..6daa06fc 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
 html2text==2025.4.15
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index c22d6441..b7b73eff 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
 html2text==2025.4.15
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index a164c879..247d3a9c 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
 html2text==2025.4.15
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 3b6943b7..466a1b6e 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
 html2text==2025.4.15

From 8df1127ce291dc9f4c7f57da1faa879aa4f87ae7 Mon Sep 17 00:00:00 2001
From: Ali <17217248+abulhawa@users.noreply.github.com>
Date: Mon, 7 Jul 2025 01:42:34 +0200
Subject: [PATCH 19/29] Fix: use embedded Python in start_windows.bat to avoid
 system interpreter conflicts (#7120)

---
 start_windows.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/start_windows.bat b/start_windows.bat
index 68c6194e..08fedc68 100755
--- a/start_windows.bat
+++ b/start_windows.bat
@@ -99,7 +99,7 @@ set "CUDA_HOME=%CUDA_PATH%"
 call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniforge hook not found. && goto end )
 
 @rem setup installer env
-call python one_click.py %*
+call "%INSTALL_ENV_DIR%\python.exe" one_click.py %*
 
 @rem below are functions for the script   next line skips these during normal execution
 goto end

From 959d4ddb917b93f0e2abe0551905725cada3ed1a Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?Philipp=20Cla=C3=9Fen?= <philipp.classen@posteo.de>
Date: Mon, 7 Jul 2025 01:51:42 +0200
Subject: [PATCH 20/29] Fix for chat sidebars toggle buttons disappearing
 (#7106)

---
 js/main.js         | 4 +---
 modules/ui_chat.py | 2 +-
 2 files changed, 2 insertions(+), 4 deletions(-)

diff --git a/js/main.js b/js/main.js
index c805fc9a..17b853f4 100644
--- a/js/main.js
+++ b/js/main.js
@@ -27,9 +27,7 @@ document.querySelector(".header_bar").addEventListener("click", function(event)
     this.style.marginBottom = chatVisible ? "0px" : "19px";
 
     if (chatVisible && !showControlsChecked) {
-      document.querySelectorAll(
-        "#chat-tab > div > :nth-child(1), #chat-tab > div > :nth-child(3), #chat-tab > div > :nth-child(4), #extensions"
-      ).forEach(element => {
+      document.querySelectorAll("#extensions").forEach(element => {
         element.style.display = "none";
       });
     }
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index e2d8935a..4dade176 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -21,7 +21,7 @@ def create_ui():
     shared.gradio['history'] = gr.State({'internal': [], 'visible': [], 'metadata': {}})
     shared.gradio['display'] = gr.JSON(value={}, visible=False)  # Hidden buffer
 
-    with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
+    with gr.Tab('Chat', elem_id='chat-tab'):
         with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
             with gr.Column():
                 with gr.Row(elem_id='past-chats-buttons'):

From e6bc7742fb1e28fde10ee939f870eeb75545ec56 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 6 Jul 2025 17:29:29 -0700
Subject: [PATCH 21/29] Support installing user extensions in
 user_data/extensions/

---
 modules/extensions.py                         | 57 ++++++++++++-------
 modules/utils.py                              | 14 ++++-
 .../extensions/place-your-extensions-here.txt |  0
 3 files changed, 48 insertions(+), 23 deletions(-)
 create mode 100644 user_data/extensions/place-your-extensions-here.txt

diff --git a/modules/extensions.py b/modules/extensions.py
index 6729b996..be9bc38c 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -2,10 +2,10 @@ import importlib
 import traceback
 from functools import partial
 from inspect import signature
+from pathlib import Path
 
 import gradio as gr
 
-import extensions
 import modules.shared as shared
 from modules.logging_colors import logger
 
@@ -28,36 +28,51 @@ def apply_settings(extension, name):
 def load_extensions():
     global state, setup_called
     state = {}
+
     for i, name in enumerate(shared.args.extensions):
-        if name in available_extensions:
-            if name != 'api':
-                logger.info(f'Loading the extension "{name}"')
-            try:
-                try:
-                    extension = importlib.import_module(f"extensions.{name}.script")
-                except ModuleNotFoundError:
-                    logger.error(f"Could not import the requirements for '{name}'. Make sure to install the requirements for the extension.\n\n* To install requirements for all available extensions, launch the\n  update_wizard script for your OS and choose the B option.\n\n* To install the requirements for this extension alone, launch the\n  cmd script for your OS and paste the following command in the\n  terminal window that appears:\n\nLinux / Mac:\n\npip install -r extensions/{name}/requirements.txt --upgrade\n\nWindows:\n\npip install -r extensions\\{name}\\requirements.txt --upgrade\n")
-                    raise
+        if name not in available_extensions:
+            continue
 
-                # Only run setup() and apply settings from settings.yaml once
-                if extension not in setup_called:
-                    apply_settings(extension, name)
-                    if hasattr(extension, "setup"):
-                        extension.setup()
+        if name != 'api':
+            logger.info(f'Loading the extension "{name}"')
 
-                    setup_called.add(extension)
+        try:
+            # Prefer user extension, fall back to system extension
+            user_script_path = Path(f'user_data/extensions/{name}/script.py')
+            if user_script_path.exists():
+                extension = importlib.import_module(f"user_data.extensions.{name}.script")
+            else:
+                extension = importlib.import_module(f"extensions.{name}.script")
 
-                state[name] = [True, i]
-            except:
-                logger.error(f'Failed to load the extension "{name}".')
-                traceback.print_exc()
+            if extension not in setup_called:
+                apply_settings(extension, name)
+                if hasattr(extension, "setup"):
+                    extension.setup()
+                setup_called.add(extension)
+
+            state[name] = [True, i, extension]  # Store extension object
+
+        except ModuleNotFoundError:
+            extension_location = Path('user_data/extensions') / name if user_script_path.exists() else Path('extensions') / name
+            logger.error(
+                f"Could not import the requirements for '{name}'. Make sure to install the requirements for the extension.\n\n"
+                f"* To install requirements for all available extensions, launch the\n  update_wizard script for your OS and choose the B option.\n\n"
+                f"* To install the requirements for this extension alone, launch the\n  cmd script for your OS and paste the following command in the\n  terminal window that appears:\n\n"
+                f"Linux / Mac:\n\npip install -r {extension_location}/requirements.txt --upgrade\n\n"
+                f"Windows:\n\npip install -r {extension_location}\\requirements.txt --upgrade\n"
+            )
+            raise
+
+        except Exception:
+            logger.error(f'Failed to load the extension "{name}".')
+            traceback.print_exc()
 
 
 # This iterator returns the extensions in the order specified in the command-line
 def iterator():
     for name in sorted(state, key=lambda x: state[x][1]):
         if state[name][0]:
-            yield getattr(extensions, name).script, name
+            yield state[name][2], name  # Use stored extension object
 
 
 # Extension functions that map string -> string
diff --git a/modules/utils.py b/modules/utils.py
index c285d401..117ad590 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -183,8 +183,18 @@ def get_available_instruction_templates():
 
 
 def get_available_extensions():
-    extensions = sorted(set(map(lambda x: x.parts[1], Path('extensions').glob('*/script.py'))), key=natural_keys)
-    return extensions
+    # User extensions (higher priority)
+    user_extensions = []
+    user_ext_path = Path('user_data/extensions')
+    if user_ext_path.exists():
+        user_exts = map(lambda x: x.parts[2], user_ext_path.glob('*/script.py'))
+        user_extensions = sorted(set(user_exts), key=natural_keys)
+
+    # System extensions (excluding those overridden by user extensions)
+    system_exts = map(lambda x: x.parts[1], Path('extensions').glob('*/script.py'))
+    system_extensions = sorted(set(system_exts) - set(user_extensions), key=natural_keys)
+
+    return user_extensions + system_extensions
 
 
 def get_available_loras():
diff --git a/user_data/extensions/place-your-extensions-here.txt b/user_data/extensions/place-your-extensions-here.txt
new file mode 100644
index 00000000..e69de29b

From 60123a67ac051d1780acb13f0f40fc9fb77860e8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 6 Jul 2025 17:44:41 -0700
Subject: [PATCH 22/29] Better log message when extension requirements are not
 found

---
 modules/extensions.py | 13 +++++++++----
 1 file changed, 9 insertions(+), 4 deletions(-)

diff --git a/modules/extensions.py b/modules/extensions.py
index be9bc38c..e0010312 100644
--- a/modules/extensions.py
+++ b/modules/extensions.py
@@ -54,12 +54,17 @@ def load_extensions():
 
         except ModuleNotFoundError:
             extension_location = Path('user_data/extensions') / name if user_script_path.exists() else Path('extensions') / name
+            windows_path = str(extension_location).replace('/', '\\')
             logger.error(
                 f"Could not import the requirements for '{name}'. Make sure to install the requirements for the extension.\n\n"
-                f"* To install requirements for all available extensions, launch the\n  update_wizard script for your OS and choose the B option.\n\n"
-                f"* To install the requirements for this extension alone, launch the\n  cmd script for your OS and paste the following command in the\n  terminal window that appears:\n\n"
-                f"Linux / Mac:\n\npip install -r {extension_location}/requirements.txt --upgrade\n\n"
-                f"Windows:\n\npip install -r {extension_location}\\requirements.txt --upgrade\n"
+                f"* To install requirements automatically, launch the update_wizard script for your OS and:\n\n"
+                f"1. Choose option B (Install/update extensions requirements)\n"
+                f"2. Select '{name}' from the extension list\n\n"
+                f"* To install requirements manually, launch the cmd script for your OS and paste the following command:\n\n"
+                f"Linux / Mac:\n\n"
+                f"pip install -r {extension_location}/requirements.txt --upgrade\n\n"
+                f"Windows:\n\n"
+                f"pip install -r {windows_path}\\requirements.txt --upgrade\n"
             )
             raise
 

From e5767d4fc5871875b5c555257f94ba707dc1ad55 Mon Sep 17 00:00:00 2001
From: Alidr79 <ali.derakhshesh79@gmail.com>
Date: Mon, 7 Jul 2025 04:18:53 +0330
Subject: [PATCH 23/29] Update ui_model_menu.py blocking the --multi-user
 access in backend (#7098)

---
 modules/ui_model_menu.py |  4 ++++
 modules/ui_session.py    | 14 ++++++++------
 2 files changed, 12 insertions(+), 6 deletions(-)

diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index a18d7d10..e09e292e 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -130,6 +130,10 @@ def create_ui():
 
 
 def create_event_handlers():
+    mu = shared.args.multi_user
+    if mu:
+        return  
+
     shared.gradio['loader'].change(loaders.make_loader_params_visible, gradio('loader'), gradio(loaders.get_all_params()), show_progress=False)
 
     # In this event handler, the interface state is read and updated
diff --git a/modules/ui_session.py b/modules/ui_session.py
index a69e155b..f8d573ad 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -27,9 +27,10 @@ def create_ui():
                         shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=get_boolean_arguments(), value=get_boolean_arguments(active=True), label="Boolean command-line flags", elem_classes='checkboxgroup-table')
 
         shared.gradio['theme_state'] = gr.Textbox(visible=False, value='dark' if shared.settings['dark_theme'] else 'light')
-        shared.gradio['save_settings'].click(
-            ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-            handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
+        if not mu:
+            shared.gradio['save_settings'].click(
+                ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+                handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
 
         shared.gradio['toggle_dark_mode'].click(
             lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
@@ -42,9 +43,10 @@ def create_ui():
         )
 
         # Reset interface event
-        shared.gradio['reset_interface'].click(
-            set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then(
-            None, None, None, js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')
+        if not mu:
+            shared.gradio['reset_interface'].click(
+                set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then(
+                None, None, None, js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')
 
 
 def handle_save_settings(state, preset, extensions, show_controls, theme):

From cbef2720ce83c05c51159be374ccc3f7ba46f07c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 6 Jul 2025 20:14:02 -0700
Subject: [PATCH 24/29] Revert "Fix: use embedded Python in start_windows.bat
 to avoid system interpreter conflicts (#7120)"

This reverts commit 8df1127ce291dc9f4c7f57da1faa879aa4f87ae7.
---
 start_windows.bat | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/start_windows.bat b/start_windows.bat
index 08fedc68..68c6194e 100755
--- a/start_windows.bat
+++ b/start_windows.bat
@@ -99,7 +99,7 @@ set "CUDA_HOME=%CUDA_PATH%"
 call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || ( echo. && echo Miniforge hook not found. && goto end )
 
 @rem setup installer env
-call "%INSTALL_ENV_DIR%\python.exe" one_click.py %*
+call python one_click.py %*
 
 @rem below are functions for the script   next line skips these during normal execution
 goto end

From e52bc0acb2b2f24ced25326010629d5ed2ade3da Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 6 Jul 2025 20:28:35 -0700
Subject: [PATCH 25/29] Update llama.cpp

---
 requirements/full/requirements.txt                     | 4 ++--
 requirements/full/requirements_amd.txt                 | 4 ++--
 requirements/full/requirements_amd_noavx2.txt          | 4 ++--
 requirements/full/requirements_apple_intel.txt         | 4 ++--
 requirements/full/requirements_apple_silicon.txt       | 6 +++---
 requirements/full/requirements_cpu_only.txt            | 4 ++--
 requirements/full/requirements_cpu_only_noavx2.txt     | 4 ++--
 requirements/full/requirements_cuda128.txt             | 4 ++--
 requirements/full/requirements_cuda128_noavx2.txt      | 4 ++--
 requirements/full/requirements_noavx2.txt              | 4 ++--
 requirements/portable/requirements.txt                 | 4 ++--
 requirements/portable/requirements_apple_intel.txt     | 4 ++--
 requirements/portable/requirements_apple_silicon.txt   | 6 +++---
 requirements/portable/requirements_cpu_only.txt        | 4 ++--
 requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++--
 requirements/portable/requirements_noavx2.txt          | 4 ++--
 requirements/portable/requirements_vulkan.txt          | 4 ++--
 requirements/portable/requirements_vulkan_noavx2.txt   | 4 ++--
 18 files changed, 38 insertions(+), 38 deletions(-)

diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 9735fcee..34fd13be 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -33,8 +33,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index c7db0116..9b18524a 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -32,7 +32,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 10c89332..6b9cc2a9 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -32,7 +32,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index e900f1bc..b7830355 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -32,7 +32,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index a8531389..e48d4dee 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -32,8 +32,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4-py3-none-any.whl
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 3735a1a8..831ff51f 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -32,5 +32,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index f93a4d77..fb81f40b 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -32,5 +32,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt
index b5bee1e4..3d29c93f 100644
--- a/requirements/full/requirements_cuda128.txt
+++ b/requirements/full/requirements_cuda128.txt
@@ -33,8 +33,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt
index 2cd91205..7528aed0 100644
--- a/requirements/full/requirements_cuda128_noavx2.txt
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@@ -33,8 +33,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 35b683c8..e86e4524 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -33,8 +33,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/oobabooga/exllamav3/releases/download/v0.0.4/exllamav3-0.0.4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index d928ff08..e9fa61f1 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -18,5 +18,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index b25298fc..4f9b0668 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -18,5 +18,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index e52a7248..9f403a0b 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -18,6 +18,6 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 7c2578d2..8e9baa32 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -18,5 +18,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index c125c97e..ebd63591 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -18,5 +18,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index 6daa06fc..6802a337 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -18,5 +18,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 247d3a9c..624c222d 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -18,5 +18,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 466a1b6e..edb82a31 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -18,5 +18,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.23.0/llama_cpp_binaries-0.23.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.24.0/llama_cpp_binaries-0.24.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

From 426e7a4cec7427927cb5c0da11b1330d26ab7c5e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 7 Jul 2025 08:43:01 -0700
Subject: [PATCH 26/29] Update the extensions documentation

---
 docs/07 - Extensions.md | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/docs/07 - Extensions.md b/docs/07 - Extensions.md
index 78497888..ebcd3c0e 100644
--- a/docs/07 - Extensions.md	
+++ b/docs/07 - Extensions.md	
@@ -1,8 +1,14 @@
 # Extensions
 
-Extensions are defined by files named `script.py` inside subfolders of `text-generation-webui/extensions`. They are loaded at startup if the folder name is specified after the `--extensions` flag.
+Extensions are defined by files named `script.py` inside subfolders of either:
+- `text-generation-webui/extensions`
+- `text-generation-webui/user_data/extensions`
 
-For instance, `extensions/silero_tts/script.py` gets loaded with `python server.py --extensions silero_tts`.
+They are loaded at startup if the folder name is specified after the `--extensions` flag.
+
+For instance, `extensions/silero_tts/script.py` or `user_data/extensions/silero_tts/script.py` gets loaded with `python server.py --extensions silero_tts`.
+
+**Note:** Extensions in `user_data/extensions/` take priority over those in `extensions/` when both exist with the same name.
 
 ## [text-generation-webui-extensions](https://github.com/oobabooga/text-generation-webui-extensions)
 

From 07e6f004c59e457ed5ccbd963a97214d73b1ed4b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 7 Jul 2025 11:28:47 -0700
Subject: [PATCH 27/29] Rename a button in the Session tab for clarity

---
 modules/ui_session.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/modules/ui_session.py b/modules/ui_session.py
index f8d573ad..60b19f47 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -17,7 +17,7 @@ def create_ui():
 
             with gr.Column():
                 gr.Markdown("## Extensions & flags")
-                shared.gradio['save_settings'] = gr.Button('Save settings to user_data/settings.yaml', elem_classes='refresh-button', interactive=not mu)
+                shared.gradio['save_settings'] = gr.Button('Save extensions settings to user_data/settings.yaml', elem_classes='refresh-button', interactive=not mu)
                 shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
                 with gr.Row():
                     with gr.Column():

From ca226a54c6d8c3f3cbb27159b92eb82860517a08 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 7 Jul 2025 11:29:37 -0700
Subject: [PATCH 28/29] Disable the message version navigation hover effects
 during streaming

---
 css/main.css | 14 ++++++--------
 1 file changed, 6 insertions(+), 8 deletions(-)

diff --git a/css/main.css b/css/main.css
index 036a2bb7..240a94d5 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1291,18 +1291,16 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     opacity: 1;
 }
 
-/* Disable message action hover effects during generation */
-._generating .message:hover .message-actions,
-._generating .user-message:hover .message-actions,
-._generating .assistant-message:hover .message-actions {
+/* Disable message actions and version navigation hover effects during generation */
+._generating :is(.message, .user-message, .assistant-message):hover :is(.message-actions, .version-navigation) {
     opacity: 0 !important;
+    pointer-events: none;
 }
 
-/* Disable message action hover effects during scrolling */
-.scrolling .message:hover .message-actions,
-.scrolling .user-message:hover .message-actions,
-.scrolling .assistant-message:hover .message-actions {
+/* Disable message actions and version navigation hover effects during scrolling */
+.scrolling :is(.message, .user-message, .assistant-message):hover :is(.message-actions, .version-navigation) {
     opacity: 0 !important;
+    pointer-events: none;
 }
 
 .footer-button svg {

From 74d98186fc289087025759d16b7a8ecf5cf770da Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 7 Jul 2025 13:23:23 -0700
Subject: [PATCH 29/29] Slightly more robust autoscroll

---
 js/main.js | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/js/main.js b/js/main.js
index 17b853f4..e5b14d8e 100644
--- a/js/main.js
+++ b/js/main.js
@@ -187,8 +187,11 @@ const observer = new MutationObserver(function(mutations) {
 
   doSyntaxHighlighting();
 
-  if (!window.isScrolled && !isScrollingClassOnly && targetElement.scrollTop !== targetElement.scrollHeight) {
-    targetElement.scrollTop = targetElement.scrollHeight;
+  if (!window.isScrolled && !isScrollingClassOnly) {
+    const maxScroll = targetElement.scrollHeight - targetElement.clientHeight;
+    if (maxScroll > 0 && targetElement.scrollTop < maxScroll - 1) {
+      targetElement.scrollTop = maxScroll;
+    }
   }
 
   const chatElement = document.getElementById("chat");