diff --git a/.github/workflows/build-portable-release-cuda.yml b/.github/workflows/build-portable-release-cuda.yml
index 571cbac0..283fdd72 100644
--- a/.github/workflows/build-portable-release-cuda.yml
+++ b/.github/workflows/build-portable-release-cuda.yml
@@ -160,16 +160,19 @@ jobs:
                 rm requirements_cuda_temp.txt
             fi
 
-            # 6. Create ZIP file
+            # 6. Move up and rename folder to include version
             cd ..
             VERSION_CLEAN="${VERSION#v}"
+            mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
+
+            # 7. Create ZIP file
             ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
             echo "Creating archive: $ZIP_NAME"
 
             if [[ "$RUNNER_OS" == "Windows" ]]; then
-                powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
+                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
             else
-                zip -r "$ZIP_NAME" text-generation-webui
+                zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
             fi
 
       - name: Upload files to a GitHub release
diff --git a/.github/workflows/build-portable-release-vulkan.yml b/.github/workflows/build-portable-release-vulkan.yml
index 4e88d4d9..c6ab8fa7 100644
--- a/.github/workflows/build-portable-release-vulkan.yml
+++ b/.github/workflows/build-portable-release-vulkan.yml
@@ -146,16 +146,19 @@ jobs:
             echo "Installing Python packages from $REQ_FILE..."
             $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
 
-            # 6. Create ZIP file
+            # 5. Move up and rename folder to include version
             cd ..
             VERSION_CLEAN="${VERSION#v}"
+            mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
+
+            # 6. Create ZIP file
             ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.zip"
             echo "Creating archive: $ZIP_NAME"
 
             if [[ "$RUNNER_OS" == "Windows" ]]; then
-                powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
+                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
             else
-                zip -r "$ZIP_NAME" text-generation-webui
+                zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
             fi
 
       - name: Upload files to a GitHub release
diff --git a/.github/workflows/build-portable-release.yml b/.github/workflows/build-portable-release.yml
index 6910ce2c..58bfdb25 100644
--- a/.github/workflows/build-portable-release.yml
+++ b/.github/workflows/build-portable-release.yml
@@ -170,16 +170,19 @@ jobs:
             echo "Installing Python packages from $REQ_FILE..."
             $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
 
-            # 5. Create ZIP file
+            # 5. Move up and rename folder to include version
             cd ..
             VERSION_CLEAN="${VERSION#v}"
+            mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
+
+            # 6. Create ZIP file
             ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}.zip"
             echo "Creating archive: $ZIP_NAME"
 
             if [[ "$RUNNER_OS" == "Windows" ]]; then
-                powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
+                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
             else
-                zip -r "$ZIP_NAME" text-generation-webui
+                zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
             fi
 
       - name: Upload files to a GitHub release
diff --git a/README.md b/README.md
index 55df33d2..16b02539 100644
--- a/README.md
+++ b/README.md
@@ -16,7 +16,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory.
 - 100% offline and private, with zero telemetry, external resources, or remote update requests.
 - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats.
-- **File attachments**: Upload text files and PDF documents to talk about their contents.
+- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
 - **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
 - Aesthetic UI with dark and light themes.
 - `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters.
diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
index 6ad250aa..9831ee8f 100644
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@@ -17,6 +17,14 @@
     color: #d1d5db !important;
 }
 
+.chat .message-body :is(th, td) {
+    border-color: #40404096 !important;
+}
+
+.dark .chat .message-body :is(th, td) {
+    border-color: #ffffff75 !important;
+}
+
 .chat .message-body :is(p, ul, ol) {
     margin: 1.25em 0 !important;
 }
diff --git a/css/main.css b/css/main.css
index 967d94ed..a22fdd95 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1,11 +1,11 @@
 :root {
-    --darker-gray: #202123;
-    --dark-gray: #2A2B32;
-    --light-gray: #373943;
+    --darker-gray: #1C1C1D;
+    --dark-gray: #212125;
+    --light-gray: #2C2E34;
     --light-theme-gray: #f9fbff;
     --border-color-dark: #525252;
     --header-width: 112px;
-    --selected-item-color-dark: #2E2F38;
+    --selected-item-color-dark: #282930;
 }
 
 @font-face {
@@ -53,7 +53,7 @@ div.svelte-iyf88w {
 }
 
 .refresh-button {
-    max-width: 4.4em;
+    max-width: none;
     min-width: 2.2em !important;
     height: 39.594px;
     align-self: end;
@@ -62,6 +62,10 @@ div.svelte-iyf88w {
     flex: none;
 }
 
+.refresh-button-medium {
+    max-width: 4.4em;
+}
+
 .refresh-button-small {
     max-width: 2.2em;
 }
@@ -265,7 +269,7 @@ button {
 
 .dark .pretty_scrollbar::-webkit-scrollbar-thumb,
 .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
-    background: rgb(255 255 255 / 10%);
+    background: rgb(255 255 255 / 6.25%);
     border-radius: 10px;
 }
 
@@ -582,7 +586,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
 
 #chat-input {
     padding: 0;
-    padding-top: 18px;
     background: transparent;
     border: none;
 }
@@ -661,37 +664,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     }
 }
 
-#show-controls {
-    position: absolute;
-    background-color: transparent;
-    border: 0 !important;
-    border-radius: 0;
-}
-
-#show-controls label {
-    z-index: 1000;
-    position: absolute;
-    right: 30px;
-    top: 10px;
-    white-space: nowrap;
-    overflow: hidden;
-    text-overflow: ellipsis;
-}
-
-.dark #show-controls span {
-    color: var(--neutral-400);
-}
-
-#show-controls span {
-    color: var(--neutral-600);
-}
-
 #typing-container {
     display: none;
     position: absolute;
     background-color: transparent;
-    left: -2px;
-    top: 4px;
+    left: 23px;
+    top: -5px;
     padding: var(--block-padding);
 }
 
@@ -767,16 +745,13 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     justify-content: space-between;
     margin: 0 !important;
     height: 36px;
+    border-color: transparent !important;
 }
 
 .hover-menu button:not(#clear-history-confirm) {
     border-bottom: 0 !important;
 }
 
-.hover-menu button:not(#clear-history-confirm):last-child {
-    border-bottom: var(--button-border-width) solid var(--border-color-primary) !important;
-}
-
 .hover-menu button:hover {
     background: #dbeafe !important;
 }
@@ -785,6 +760,37 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     background: var(--selected-item-color-dark) !important;
 }
 
+#show-controls {
+    background-color: white;
+    border-color: transparent !important;
+    height: 36px;
+    border-radius: 0;
+    border-bottom: 0 !important;
+    padding-top: 3px;
+    padding-left: 4px;
+    display: flex;
+    font-weight: normal;
+}
+
+.dark #show-controls {
+    background-color: var(--darker-gray);
+}
+
+#show-controls label {
+    display: flex;
+    flex-direction: row-reverse;
+    justify-content: start;
+    width: 100%;
+    padding-right: 12px;
+    gap: 10px;
+    font-weight: 600;
+    color: var(--button-secondary-text-color);
+}
+
+#show-controls label input {
+    margin-top: 4px;
+}
+
 .transparent-substring {
     opacity: 0.333;
 }
@@ -1326,8 +1332,13 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
     overflow: hidden;
 }
 
+.thinking-content:focus, .thinking-header:focus {
+    outline: 0 !important;
+}
+
 .dark .thinking-block {
-    background-color: var(--darker-gray);
+    background-color: transparent;
+    border: 1px solid var(--input-border-color);
 }
 
 .thinking-header {
@@ -1555,3 +1566,66 @@ strong {
 button:focus {
     outline: none;
 }
+
+/* Fix extra gaps for hidden elements on the right sidebar */
+.svelte-sa48pu.stretch:has(> .hidden:only-child) {
+    display: none;
+}
+
+.delete-container {
+    position: absolute;
+    right: 8px;
+    display: flex;
+    gap: 6px;
+    opacity: 0;
+    transition: opacity 0.2s;
+    margin-left: 0;
+}
+
+.chat-label-with-delete {
+    position: relative;
+    padding-right: 60px;
+}
+
+.trash-btn {
+    border: none;
+    background: none;
+    cursor: pointer;
+    padding: 2px;
+    opacity: 0.7;
+}
+
+.cancel-btn {
+    border: none;
+    background: #ef4444;
+    color: white;
+    cursor: pointer;
+    width: 20px;
+    height: 20px;
+    border-radius: 2px;
+    font-family: monospace;
+    font-size: 12px;
+    align-items: center;
+    justify-content: center;
+    display: none;
+}
+
+.confirm-btn {
+    border: none;
+    background: #22c55e;
+    color: white;
+    cursor: pointer;
+    width: 20px;
+    height: 20px;
+    border-radius: 2px;
+    font-family: monospace;
+    font-size: 12px;
+    align-items: center;
+    justify-content: center;
+    display: none;
+}
+
+/* Disable hover effects while scrolling */
+.chat-parent.scrolling * {
+    pointer-events: none !important;
+}
diff --git a/download-model.py b/download-model.py
index 25517491..576a8b79 100644
--- a/download-model.py
+++ b/download-model.py
@@ -32,6 +32,7 @@ class ModelDownloader:
         self.max_retries = max_retries
         self.session = self.get_session()
         self._progress_bar_slots = None
+        self.progress_queue = None
 
     def get_session(self):
         session = requests.Session()
@@ -218,33 +219,45 @@ class ModelDownloader:
 
         max_retries = self.max_retries
         attempt = 0
+        file_downloaded_count_for_progress = 0
+
         try:
             while attempt < max_retries:
                 attempt += 1
                 session = self.session
                 headers = {}
                 mode = 'wb'
+                current_file_size_on_disk = 0
 
                 try:
                     if output_path.exists() and not start_from_scratch:
-                        # Resume download
-                        r = session.get(url, stream=True, timeout=20)
-                        total_size = int(r.headers.get('content-length', 0))
-                        if output_path.stat().st_size >= total_size:
+                        current_file_size_on_disk = output_path.stat().st_size
+                        r_head = session.head(url, timeout=20)
+                        r_head.raise_for_status()
+                        total_size = int(r_head.headers.get('content-length', 0))
+
+                        if current_file_size_on_disk >= total_size and total_size > 0:
+                            if self.progress_queue is not None and total_size > 0:
+                                self.progress_queue.put((1.0, str(filename)))
                             return
 
-                        headers = {'Range': f'bytes={output_path.stat().st_size}-'}
+                        headers = {'Range': f'bytes={current_file_size_on_disk}-'}
                         mode = 'ab'
 
                     with session.get(url, stream=True, headers=headers, timeout=30) as r:
-                        r.raise_for_status()  # If status is not 2xx, raise an error
-                        total_size = int(r.headers.get('content-length', 0))
-                        block_size = 1024 * 1024  # 1MB
+                        r.raise_for_status()
+                        total_size_from_stream = int(r.headers.get('content-length', 0))
+                        if mode == 'ab':
+                            effective_total_size = current_file_size_on_disk + total_size_from_stream
+                        else:
+                            effective_total_size = total_size_from_stream
 
-                        filename_str = str(filename)  # Convert PosixPath to string if necessary
+                        block_size = 1024 * 1024
+                        filename_str = str(filename)
 
                         tqdm_kwargs = {
-                            'total': total_size,
+                            'total': effective_total_size,
+                            'initial': current_file_size_on_disk if mode == 'ab' else 0,
                             'unit': 'B',
                             'unit_scale': True,
                             'unit_divisor': 1024,
@@ -261,16 +274,20 @@ class ModelDownloader:
                             })
 
                         with open(output_path, mode) as f:
+                            if mode == 'ab':
+                                f.seek(current_file_size_on_disk)
+
                             with tqdm.tqdm(**tqdm_kwargs) as t:
-                                count = 0
+                                file_downloaded_count_for_progress = current_file_size_on_disk
                                 for data in r.iter_content(block_size):
                                     f.write(data)
                                     t.update(len(data))
-                                    if total_size != 0 and self.progress_bar is not None:
-                                        count += len(data)
-                                        self.progress_bar(float(count) / float(total_size), f"{filename_str}")
+                                    if effective_total_size != 0 and self.progress_queue is not None:
+                                        file_downloaded_count_for_progress += len(data)
+                                        progress_fraction = float(file_downloaded_count_for_progress) / float(effective_total_size)
+                                        self.progress_queue.put((progress_fraction, filename_str))
+                        break
 
-                        break  # Exit loop if successful
                 except (RequestException, ConnectionError, Timeout) as e:
                     print(f"Error downloading {filename}: {e}.")
                     print(f"That was attempt {attempt}/{max_retries}.", end=' ')
@@ -295,10 +312,9 @@ class ModelDownloader:
         finally:
             print(f"\nDownload of {len(file_list)} files to {output_folder} completed.")
 
-    def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
-        self.progress_bar = progress_bar
+    def download_model_files(self, model, branch, links, sha256, output_folder, progress_queue=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
+        self.progress_queue = progress_queue
 
-        # Create the folder and writing the metadata
         output_folder.mkdir(parents=True, exist_ok=True)
 
         if not is_llamacpp:
diff --git a/js/dark_theme.js b/js/dark_theme.js
index b540fb11..7136f5bf 100644
--- a/js/dark_theme.js
+++ b/js/dark_theme.js
@@ -6,4 +6,15 @@ function toggleDarkMode() {
   } else {
     currentCSS.setAttribute("href", "file/css/highlightjs/github-dark.min.css");
   }
+
+  // Re-highlight all code blocks once stylesheet loads
+  currentCSS.onload = function() {
+    const messageBodies = document.getElementById("chat").querySelectorAll(".message-body");
+    messageBodies.forEach((messageBody) => {
+      const codeBlocks = messageBody.querySelectorAll("pre code");
+      codeBlocks.forEach((codeBlock) => {
+        hljs.highlightElement(codeBlock);
+      });
+    });
+  };
 }
diff --git a/js/global_scope_js.js b/js/global_scope_js.js
index 3274f47e..205d9375 100644
--- a/js/global_scope_js.js
+++ b/js/global_scope_js.js
@@ -95,6 +95,21 @@ function startEditing(messageElement, messageBody, isUserMessage) {
   editingInterface.textarea.focus();
   editingInterface.textarea.setSelectionRange(rawText.length, rawText.length);
 
+  // Temporarily mark as scrolled to prevent auto-scroll
+  const wasScrolled = window.isScrolled;
+  window.isScrolled = true;
+
+  // Scroll the textarea into view
+  editingInterface.textarea.scrollIntoView({
+    behavior: "smooth",
+    block: "center"
+  });
+
+  // Restore the original scroll state after animation
+  setTimeout(() => {
+    window.isScrolled = wasScrolled;
+  }, 500);
+
   // Setup event handlers
   setupEditingHandlers(editingInterface.textarea, messageElement, originalHTML, messageBody, isUserMessage);
 }
@@ -229,10 +244,23 @@ function removeLastClick() {
   document.getElementById("Remove-last").click();
 }
 
-function handleMorphdomUpdate(text) {
+function handleMorphdomUpdate(data) {
+  // Determine target element and use it as query scope
+  var target_element, target_html;
+  if (data.last_message_only) {
+    const childNodes = document.getElementsByClassName("messages")[0].childNodes;
+    target_element = childNodes[childNodes.length - 1];
+    target_html = data.html;
+  } else {
+    target_element = document.getElementById("chat").parentNode;
+    target_html =  "<div class=\"prose svelte-1ybaih5\">" + data.html + "</div>";
+  }
+
+  const queryScope = target_element;
+
   // Track open blocks
   const openBlocks = new Set();
-  document.querySelectorAll(".thinking-block").forEach(block => {
+  queryScope.querySelectorAll(".thinking-block").forEach(block => {
     const blockId = block.getAttribute("data-block-id");
     // If block exists and is open, add to open set
     if (blockId && block.hasAttribute("open")) {
@@ -242,7 +270,7 @@ function handleMorphdomUpdate(text) {
 
   // Store scroll positions for any open blocks
   const scrollPositions = {};
-  document.querySelectorAll(".thinking-block[open]").forEach(block => {
+  queryScope.querySelectorAll(".thinking-block[open]").forEach(block => {
     const content = block.querySelector(".thinking-content");
     const blockId = block.getAttribute("data-block-id");
     if (content && blockId) {
@@ -255,8 +283,8 @@ function handleMorphdomUpdate(text) {
   });
 
   morphdom(
-    document.getElementById("chat").parentNode,
-    "<div class=\"prose svelte-1ybaih5\">" + text + "</div>",
+    target_element,
+    target_html,
     {
       onBeforeElUpdated: function(fromEl, toEl) {
         // Preserve code highlighting
@@ -307,7 +335,7 @@ function handleMorphdomUpdate(text) {
   );
 
   // Add toggle listeners for new blocks
-  document.querySelectorAll(".thinking-block").forEach(block => {
+  queryScope.querySelectorAll(".thinking-block").forEach(block => {
     if (!block._hasToggleListener) {
       block.addEventListener("toggle", function(e) {
         if (this.open) {
diff --git a/js/main.js b/js/main.js
index f23dc246..e970884d 100644
--- a/js/main.js
+++ b/js/main.js
@@ -145,17 +145,26 @@ typingSibling.insertBefore(typing, typingSibling.childNodes[2]);
 const targetElement = document.getElementById("chat").parentNode.parentNode.parentNode;
 targetElement.classList.add("pretty_scrollbar");
 targetElement.classList.add("chat-parent");
-let isScrolled = false;
+window.isScrolled = false;
+let scrollTimeout;
 
 targetElement.addEventListener("scroll", function() {
+  // Add scrolling class to disable hover effects
+  targetElement.classList.add("scrolling");
+
   let diff = targetElement.scrollHeight - targetElement.clientHeight;
   if(Math.abs(targetElement.scrollTop - diff) <= 10 || diff == 0) {
-    isScrolled = false;
+    window.isScrolled = false;
   } else {
-    isScrolled = true;
+    window.isScrolled = true;
   }
 
-  doSyntaxHighlighting();
+  // Clear previous timeout and set new one
+  clearTimeout(scrollTimeout);
+  scrollTimeout = setTimeout(() => {
+    targetElement.classList.remove("scrolling");
+    doSyntaxHighlighting(); // Only run after scrolling stops
+  }, 150);
 
 });
 
@@ -173,7 +182,7 @@ const observer = new MutationObserver(function(mutations) {
 
   doSyntaxHighlighting();
 
-  if (!isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) {
+  if (!window.isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) {
     targetElement.scrollTop = targetElement.scrollHeight;
   }
 
@@ -184,7 +193,7 @@ const observer = new MutationObserver(function(mutations) {
     const prevSibling = lastChild?.previousElementSibling;
     if (lastChild && prevSibling) {
       lastChild.style.setProperty("margin-bottom",
-        `max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px) - ${lastChild.offsetHeight}px))`,
+        `max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 84px) - ${lastChild.offsetHeight}px))`,
         "important"
       );
     }
@@ -217,7 +226,7 @@ function isElementVisibleOnScreen(element) {
 }
 
 function doSyntaxHighlighting() {
-  const messageBodies = document.querySelectorAll(".message-body");
+  const messageBodies = document.getElementById("chat").querySelectorAll(".message-body");
 
   if (messageBodies.length > 0) {
     observer.disconnect();
@@ -229,6 +238,7 @@ function doSyntaxHighlighting() {
         codeBlocks.forEach((codeBlock) => {
           hljs.highlightElement(codeBlock);
           codeBlock.setAttribute("data-highlighted", "true");
+          codeBlock.classList.add("pretty_scrollbar");
         });
 
         renderMathInElement(messageBody, {
@@ -277,7 +287,7 @@ for (i = 0; i < slimDropdownElements.length; i++) {
 // The show/hide events were adapted from:
 // https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js
 //------------------------------------------------
-var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button");
+var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button, #chat-tab #chat-buttons #show-controls");
 var button = document.getElementById("hover-element-button");
 var menu = document.getElementById("hover-menu");
 var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement;
@@ -298,18 +308,21 @@ if (buttonsInChat.length > 0) {
     const thisButton = buttonsInChat[i];
     menu.appendChild(thisButton);
 
-    thisButton.addEventListener("click", () => {
-      hideMenu();
-    });
+    // Only apply transformations to button elements
+    if (thisButton.tagName.toLowerCase() === "button") {
+      thisButton.addEventListener("click", () => {
+        hideMenu();
+      });
 
-    const buttonText = thisButton.textContent;
-    const matches = buttonText.match(/(\(.*?\))/);
+      const buttonText = thisButton.textContent;
+      const matches = buttonText.match(/(\(.*?\))/);
 
-    if (matches && matches.length > 1) {
-      // Apply the transparent-substring class to the matched substring
-      const substring = matches[1];
-      const newText = buttonText.replace(substring, `&nbsp;<span class="transparent-substring">${substring.slice(1, -1)}</span>`);
-      thisButton.innerHTML = newText;
+      if (matches && matches.length > 1) {
+        // Apply the transparent-substring class to the matched substring
+        const substring = matches[1];
+        const newText = buttonText.replace(substring, `&nbsp;<span class="transparent-substring">${substring.slice(1, -1)}</span>`);
+        thisButton.innerHTML = newText;
+      }
     }
   }
 }
@@ -382,21 +395,10 @@ document.addEventListener("click", function (event) {
   }
 });
 
-//------------------------------------------------
-// Relocate the "Show controls" checkbox
-//------------------------------------------------
-var elementToMove = document.getElementById("show-controls");
-var parent = elementToMove.parentNode;
-for (var i = 0; i < 2; i++) {
-  parent = parent.parentNode;
-}
-
-parent.insertBefore(elementToMove, parent.firstChild);
-
 //------------------------------------------------
 // Position the chat input
 //------------------------------------------------
-document.getElementById("show-controls").parentNode.classList.add("chat-input-positioned");
+document.getElementById("chat-input-row").classList.add("chat-input-positioned");
 
 //------------------------------------------------
 // Focus on the chat input
@@ -562,6 +564,7 @@ function moveToChatTab() {
 
   newParent.insertBefore(grandParent, newParent.children[newPosition]);
   document.getElementById("save-character").style.display = "none";
+  document.getElementById("restore-character").style.display = "none";
 }
 
 function restoreOriginalPosition() {
@@ -573,6 +576,7 @@ function restoreOriginalPosition() {
     }
 
     document.getElementById("save-character").style.display = "";
+    document.getElementById("restore-character").style.display = "";
     movedElement.style.display = "";
     movedElement.children[0].style.minWidth = "";
   }
@@ -872,3 +876,123 @@ function navigateLastAssistantMessage(direction) {
 
   return false;
 }
+
+//------------------------------------------------
+// Paste Handler for Long Text
+//------------------------------------------------
+
+const MAX_PLAIN_TEXT_LENGTH = 2500;
+
+function setupPasteHandler() {
+  const textbox = document.querySelector("#chat-input textarea[data-testid=\"textbox\"]");
+  const fileInput = document.querySelector("#chat-input input[data-testid=\"file-upload\"]");
+
+  if (!textbox || !fileInput) {
+    setTimeout(setupPasteHandler, 500);
+    return;
+  }
+
+  textbox.addEventListener("paste", async (event) => {
+    const text = event.clipboardData?.getData("text");
+
+    if (text && text.length > MAX_PLAIN_TEXT_LENGTH && document.querySelector("#paste_to_attachment input[data-testid=\"checkbox\"]")?.checked) {
+      event.preventDefault();
+
+      const file = new File([text], "pasted_text.txt", {
+        type: "text/plain",
+        lastModified: Date.now()
+      });
+
+      const dataTransfer = new DataTransfer();
+      dataTransfer.items.add(file);
+      fileInput.files = dataTransfer.files;
+      fileInput.dispatchEvent(new Event("change", { bubbles: true }));
+    }
+  });
+}
+
+if (document.readyState === "loading") {
+  document.addEventListener("DOMContentLoaded", setupPasteHandler);
+} else {
+  setupPasteHandler();
+}
+
+//------------------------------------------------
+// Tooltips
+//------------------------------------------------
+
+// File upload button
+document.querySelector("#chat-input .upload-button").title = "Upload text files, PDFs, and DOCX documents";
+
+// Activate web search
+document.getElementById("web-search").title = "Search the internet with DuckDuckGo";
+
+//------------------------------------------------
+// Inline icons for deleting past chats
+//------------------------------------------------
+
+function addMiniDeletes() {
+  document.querySelectorAll("#past-chats label:not(.has-delete)").forEach(label => {
+    const container = document.createElement("span");
+    container.className = "delete-container";
+
+    label.classList.add("chat-label-with-delete");
+
+    const trashBtn = document.createElement("button");
+    trashBtn.innerHTML = "🗑️";
+    trashBtn.className = "trash-btn";
+
+    const cancelBtn = document.createElement("button");
+    cancelBtn.innerHTML = "✕";
+    cancelBtn.className = "cancel-btn";
+
+    const confirmBtn = document.createElement("button");
+    confirmBtn.innerHTML = "✓";
+    confirmBtn.className = "confirm-btn";
+
+    label.addEventListener("mouseenter", () => {
+      container.style.opacity = "1";
+    });
+
+    label.addEventListener("mouseleave", () => {
+      container.style.opacity = "0";
+    });
+
+    trashBtn.onclick = (e) => {
+      e.stopPropagation();
+      label.querySelector("input").click();
+      document.querySelector("#delete_chat").click();
+      trashBtn.style.display = "none";
+      cancelBtn.style.display = "flex";
+      confirmBtn.style.display = "flex";
+    };
+
+    cancelBtn.onclick = (e) => {
+      e.stopPropagation();
+      document.querySelector("#delete_chat-cancel").click();
+      resetButtons();
+    };
+
+    confirmBtn.onclick = (e) => {
+      e.stopPropagation();
+      document.querySelector("#delete_chat-confirm").click();
+      resetButtons();
+    };
+
+    function resetButtons() {
+      trashBtn.style.display = "inline";
+      cancelBtn.style.display = "none";
+      confirmBtn.style.display = "none";
+    }
+
+    container.append(trashBtn, cancelBtn, confirmBtn);
+    label.appendChild(container);
+    label.classList.add("has-delete");
+  });
+}
+
+new MutationObserver(() => addMiniDeletes()).observe(
+  document.querySelector("#past-chats"),
+  {childList: true, subtree: true}
+);
+addMiniDeletes();
diff --git a/modules/chat.py b/modules/chat.py
index 881f7330..dfc301df 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -223,7 +223,10 @@ def generate_chat_prompt(user_input, state, **kwargs):
                 for attachment in metadata[user_key]["attachments"]:
                     filename = attachment.get("name", "file")
                     content = attachment.get("content", "")
-                    attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    if attachment.get("type") == "text/html" and attachment.get("url"):
+                        attachments_text += f"\nName: {filename}\nURL: {attachment['url']}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    else:
+                        attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
 
                 if attachments_text:
                     enhanced_user_msg = f"{user_msg}\n\nATTACHMENTS:\n{attachments_text}"
@@ -250,7 +253,10 @@ def generate_chat_prompt(user_input, state, **kwargs):
                 for attachment in metadata[user_key]["attachments"]:
                     filename = attachment.get("name", "file")
                     content = attachment.get("content", "")
-                    attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    if attachment.get("type") == "text/html" and attachment.get("url"):
+                        attachments_text += f"\nName: {filename}\nURL: {attachment['url']}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    else:
+                        attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
 
                 if attachments_text:
                     user_input = f"{user_input}\n\nATTACHMENTS:\n{attachments_text}"
@@ -500,6 +506,9 @@ def add_message_attachment(history, row_idx, file_path, is_user=True):
             # Process PDF file
             content = extract_pdf_text(path)
             file_type = "application/pdf"
+        elif file_extension == '.docx':
+            content = extract_docx_text(path)
+            file_type = "application/docx"
         else:
             # Default handling for text files
             with open(path, 'r', encoding='utf-8') as f:
@@ -538,6 +547,53 @@ def extract_pdf_text(pdf_path):
         return f"[Error extracting PDF text: {str(e)}]"
 
 
+def extract_docx_text(docx_path):
+    """
+    Extract text from a .docx file, including headers,
+    body (paragraphs and tables), and footers.
+    """
+    try:
+        import docx
+
+        doc = docx.Document(docx_path)
+        parts = []
+
+        # 1) Extract non-empty header paragraphs from each section
+        for section in doc.sections:
+            for para in section.header.paragraphs:
+                text = para.text.strip()
+                if text:
+                    parts.append(text)
+
+        # 2) Extract body blocks (paragraphs and tables) in document order
+        parent_elm = doc.element.body
+        for child in parent_elm.iterchildren():
+            if isinstance(child, docx.oxml.text.paragraph.CT_P):
+                para = docx.text.paragraph.Paragraph(child, doc)
+                text = para.text.strip()
+                if text:
+                    parts.append(text)
+
+            elif isinstance(child, docx.oxml.table.CT_Tbl):
+                table = docx.table.Table(child, doc)
+                for row in table.rows:
+                    cells = [cell.text.strip() for cell in row.cells]
+                    parts.append("\t".join(cells))
+
+        # 3) Extract non-empty footer paragraphs from each section
+        for section in doc.sections:
+            for para in section.footer.paragraphs:
+                text = para.text.strip()
+                if text:
+                    parts.append(text)
+
+        return "\n".join(parts)
+
+    except Exception as e:
+        logger.error(f"Error extracting text from DOCX: {e}")
+        return f"[Error extracting DOCX text: {str(e)}]"
+
+
 def generate_search_query(user_message, state):
     """Generate a search query from user message using the LLM"""
     # Augment the user message with search instruction
@@ -554,7 +610,12 @@ def generate_search_query(user_message, state):
 
     query = ""
     for reply in generate_reply(formatted_prompt, search_state, stopping_strings=[], is_chat=True):
-        query = reply.strip()
+        query = reply
+
+    # Strip and remove surrounding quotes if present
+    query = query.strip()
+    if len(query) >= 2 and query.startswith('"') and query.endswith('"'):
+        query = query[1:-1]
 
     return query
 
@@ -660,7 +721,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
 
     # Add timestamp for assistant's response at the start of generation
     row_idx = len(output['internal']) - 1
-    update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp())
+    update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp(), model_name=shared.model_name)
 
     # Generate
     reply = None
@@ -699,7 +760,18 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
         if is_stream:
             yield output
 
-    output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
+    if _continue:
+        # Reprocess the entire internal text for extensions (like translation)
+        full_internal = output['internal'][-1][1]
+        if state['mode'] in ['chat', 'chat-instruct']:
+            full_visible = re.sub("(<USER>|<user>|{{user}})", state['name1'], full_internal)
+        else:
+            full_visible = full_internal
+
+        full_visible = html.escape(full_visible)
+        output['visible'][-1][1] = apply_extensions('output', full_visible, state, is_chat=True)
+    else:
+        output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
 
     # Final sync for version metadata (in case streaming was disabled)
     if regenerate:
@@ -775,7 +847,9 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
     last_save_time = time.monotonic()
     save_interval = 8
     for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)):
-        yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history
+        yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], last_message_only=(i > 0)), history
+        if i == 0:
+            time.sleep(0.125)  # We need this to make sure the first update goes through
 
         current_time = time.monotonic()
         # Save on first iteration or if save_interval seconds have passed
@@ -1163,6 +1237,43 @@ def load_character(character, name1, name2):
     return name1, name2, picture, greeting, context
 
 
+def restore_character_for_ui(state):
+    """Reset character fields to the currently loaded character's saved values"""
+    if state['character_menu'] and state['character_menu'] != 'None':
+        try:
+            name1, name2, picture, greeting, context = load_character(state['character_menu'], state['name1'], state['name2'])
+
+            state['name2'] = name2
+            state['greeting'] = greeting
+            state['context'] = context
+            state['character_picture'] = picture  # This triggers cache update via generate_pfp_cache
+
+            return state, name2, context, greeting, picture
+
+        except Exception as e:
+            logger.error(f"Failed to reset character '{state['character_menu']}': {e}")
+            return clear_character_for_ui(state)
+    else:
+        return clear_character_for_ui(state)
+
+
+def clear_character_for_ui(state):
+    """Clear all character fields and picture cache"""
+    state['name2'] = shared.settings['name2']
+    state['context'] = shared.settings['context']
+    state['greeting'] = shared.settings['greeting']
+    state['character_picture'] = None
+
+    # Clear the cache files
+    cache_folder = Path(shared.args.disk_cache_dir)
+    for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
+        cache_path = Path(f'{cache_folder}/{cache_file}')
+        if cache_path.exists():
+            cache_path.unlink()
+
+    return state, state['name2'], state['context'], state['greeting'], None
+
+
 def load_instruction_template(template):
     if template == 'None':
         return ''
@@ -1453,7 +1564,10 @@ def handle_start_new_chat_click(state):
 
 
 def handle_delete_chat_confirm_click(state):
-    index = str(find_all_histories(state).index(state['unique_id']))
+    filtered_histories = find_all_histories_with_first_prompts(state)
+    filtered_ids = [h[1] for h in filtered_histories]
+    index = str(filtered_ids.index(state['unique_id']))
+
     delete_history(state['unique_id'], state['character_menu'], state['mode'])
     history, unique_id = load_history_after_deletion(state, index)
     html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
@@ -1466,7 +1580,6 @@ def handle_delete_chat_confirm_click(state):
         unique_id,
         gr.update(visible=False),
         gr.update(visible=True),
-        gr.update(visible=False)
     ]
 
 
@@ -1653,6 +1766,25 @@ def handle_character_menu_change(state):
     ]
 
 
+def handle_character_picture_change(picture):
+    """Update or clear cache when character picture changes"""
+    cache_folder = Path(shared.args.disk_cache_dir)
+    if not cache_folder.exists():
+        cache_folder.mkdir()
+
+    if picture is not None:
+        # Save to cache
+        picture.save(Path(f'{cache_folder}/pfp_character.png'), format='PNG')
+        thumb = make_thumbnail(picture)
+        thumb.save(Path(f'{cache_folder}/pfp_character_thumb.png'), format='PNG')
+    else:
+        # Remove cache files when picture is cleared
+        for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
+            cache_path = Path(f'{cache_folder}/{cache_file}')
+            if cache_path.exists():
+                cache_path.unlink()
+
+
 def handle_mode_change(state):
     history = load_latest_history(state)
     histories = find_all_histories_with_first_prompts(state)
diff --git a/modules/github.py b/modules/github.py
deleted file mode 100644
index f3dc26e1..00000000
--- a/modules/github.py
+++ /dev/null
@@ -1,38 +0,0 @@
-import subprocess
-from pathlib import Path
-
-new_extensions = set()
-
-
-def clone_or_pull_repository(github_url):
-    global new_extensions
-
-    repository_folder = Path("extensions")
-    repo_name = github_url.rstrip("/").split("/")[-1].split(".")[0]
-
-    # Check if the repository folder exists
-    if not repository_folder.exists():
-        repository_folder.mkdir(parents=True)
-
-    repo_path = repository_folder / repo_name
-
-    # Check if the repository is already cloned
-    if repo_path.exists():
-        yield f"Updating {github_url}..."
-        # Perform a 'git pull' to update the repository
-        try:
-            pull_output = subprocess.check_output(["git", "-C", repo_path, "pull"], stderr=subprocess.STDOUT)
-            yield "Done."
-            return pull_output.decode()
-        except subprocess.CalledProcessError as e:
-            return str(e)
-
-    # Clone the repository
-    try:
-        yield f"Cloning {github_url}..."
-        clone_output = subprocess.check_output(["git", "clone", github_url, repo_path], stderr=subprocess.STDOUT)
-        new_extensions.add(repo_name)
-        yield f"The extension `{repo_name}` has been downloaded.\n\nPlease close the web UI completely and launch it again to be able to load it."
-        return clone_output.decode()
-    except subprocess.CalledProcessError as e:
-        return str(e)
diff --git a/modules/html_generator.py b/modules/html_generator.py
index cbf3e19c..af64894e 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -39,15 +39,16 @@ def minify_css(css: str) -> str:
     return css
 
 
-with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r', encoding='utf-8') as f:
     readable_css = f.read()
-with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r', encoding='utf-8') as f:
     instruct_css = f.read()
 
 # Custom chat styles
 chat_styles = {}
 for k in get_available_chat_styles():
-    chat_styles[k] = open(Path(f'css/chat_style-{k}.css'), 'r').read()
+    with open(Path(f'css/chat_style-{k}.css'), 'r', encoding='utf-8') as f:
+        chat_styles[k] = f.read()
 
 # Handle styles that derive from other styles
 for k in chat_styles:
@@ -350,12 +351,14 @@ remove_button = f'<button class="footer-button footer-remove-button" title="Remo
 info_button = f'<button class="footer-button footer-info-button" title="message">{info_svg}</button>'
 
 
-def format_message_timestamp(history, role, index):
+def format_message_timestamp(history, role, index, tooltip_include_timestamp=True):
     """Get a formatted timestamp HTML span for a message if available"""
     key = f"{role}_{index}"
     if 'metadata' in history and key in history['metadata'] and history['metadata'][key].get('timestamp'):
         timestamp = history['metadata'][key]['timestamp']
-        return f"<span class='timestamp'>{timestamp}</span>"
+        tooltip_text = get_message_tooltip(history, role, index, include_timestamp=tooltip_include_timestamp)
+        title_attr = f' title="{html.escape(tooltip_text)}"' if tooltip_text else ''
+        return f"<span class='timestamp'{title_attr}>{timestamp}</span>"
 
     return ""
 
@@ -388,6 +391,23 @@ def format_message_attachments(history, role, index):
     return ""
 
 
+def get_message_tooltip(history, role, index, include_timestamp=True):
+    """Get tooltip text combining timestamp and model name for a message"""
+    key = f"{role}_{index}"
+    if 'metadata' not in history or key not in history['metadata']:
+        return ""
+
+    meta = history['metadata'][key]
+    tooltip_parts = []
+
+    if include_timestamp and meta.get('timestamp'):
+        tooltip_parts.append(meta['timestamp'])
+    if meta.get('model_name'):
+        tooltip_parts.append(f"Model: {meta['model_name']}")
+
+    return " | ".join(tooltip_parts)
+
+
 def get_version_navigation_html(history, i, role):
     """Generate simple navigation arrows for message versions"""
     key = f"{role}_{i}"
@@ -443,179 +463,193 @@ def actions_html(history, i, role, info_message=""):
             f'{version_nav_html}')
 
 
-def generate_instruct_html(history):
-    output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
+def generate_instruct_html(history, last_message_only=False):
+    if not last_message_only:
+        output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
+    else:
+        output = ""
 
-    for i in range(len(history['visible'])):
-        row_visible = history['visible'][i]
-        row_internal = history['internal'][i]
-        converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+    def create_message(role, content, raw_content):
+        """Inner function that captures variables from outer scope."""
+        class_name = "user-message" if role == "user" else "assistant-message"
 
-        # Get timestamps
-        user_timestamp = format_message_timestamp(history, "user", i)
-        assistant_timestamp = format_message_timestamp(history, "assistant", i)
+        # Get role-specific data
+        timestamp = format_message_timestamp(history, role, i)
+        attachments = format_message_attachments(history, role, i)
 
-        # Get attachments
-        user_attachments = format_message_attachments(history, "user", i)
-        assistant_attachments = format_message_attachments(history, "assistant", i)
+        # Create info button if timestamp exists
+        info_message = ""
+        if timestamp:
+            tooltip_text = get_message_tooltip(history, role, i)
+            info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
 
-        # Create info buttons for timestamps if they exist
-        info_message_user = ""
-        if user_timestamp != "":
-            # Extract the timestamp value from the span
-            user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_user = info_button.replace("message", user_timestamp_value)
-
-        info_message_assistant = ""
-        if assistant_timestamp != "":
-            # Extract the timestamp value from the span
-            assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_assistant = info_button.replace("message", assistant_timestamp_value)
-
-        if converted_visible[0]:  # Don't display empty user messages
-            output += (
-                f'<div class="user-message" '
-                f'data-raw="{html.escape(row_internal[0], quote=True)}"'
-                f'data-index={i}>'
-                f'<div class="text">'
-                f'<div class="message-body">{converted_visible[0]}</div>'
-                f'{user_attachments}'
-                f'{actions_html(history, i, "user", info_message_user)}'
-                f'</div>'
-                f'</div>'
-            )
-
-        output += (
-            f'<div class="assistant-message" '
-            f'data-raw="{html.escape(row_internal[1], quote=True)}"'
+        return (
+            f'<div class="{class_name}" '
+            f'data-raw="{html.escape(raw_content, quote=True)}"'
             f'data-index={i}>'
             f'<div class="text">'
-            f'<div class="message-body">{converted_visible[1]}</div>'
-            f'{assistant_attachments}'
-            f'{actions_html(history, i, "assistant", info_message_assistant)}'
+            f'<div class="message-body">{content}</div>'
+            f'{attachments}'
+            f'{actions_html(history, i, role, info_message)}'
             f'</div>'
             f'</div>'
         )
 
-    output += "</div></div>"
+    # Determine range
+    start_idx = len(history['visible']) - 1 if last_message_only else 0
+    end_idx = len(history['visible'])
+
+    for i in range(start_idx, end_idx):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+
+        # Convert content
+        if last_message_only:
+            converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
+        else:
+            converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+        # Generate messages
+        if not last_message_only and converted_visible[0]:
+            output += create_message("user", converted_visible[0], row_internal[0])
+
+        output += create_message("assistant", converted_visible[1], row_internal[1])
+
+    if not last_message_only:
+        output += "</div></div>"
+
     return output
 
 
-def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False):
-    output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
+def get_character_image_with_cache_buster():
+    """Get character image URL with cache busting based on file modification time"""
+    cache_path = Path("user_data/cache/pfp_character_thumb.png")
+    if cache_path.exists():
+        mtime = int(cache_path.stat().st_mtime)
+        return f'<img src="file/user_data/cache/pfp_character_thumb.png?{mtime}" class="pfp_character">'
 
-    # We use ?character and ?time.time() to force the browser to reset caches
-    img_bot = (
-        f'<img src="file/user_data/cache/pfp_character_thumb.png?{character}" class="pfp_character">'
-        if Path("user_data/cache/pfp_character_thumb.png").exists() else ''
-    )
+    return ''
 
-    img_me = (
-        f'<img src="file/user_data/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
-        if Path("user_data/cache/pfp_me.png").exists() else ''
-    )
 
-    for i in range(len(history['visible'])):
-        row_visible = history['visible'][i]
-        row_internal = history['internal'][i]
-        converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False, last_message_only=False):
+    if not last_message_only:
+        output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
+    else:
+        output = ""
 
-        # Get timestamps
-        user_timestamp = format_message_timestamp(history, "user", i)
-        assistant_timestamp = format_message_timestamp(history, "assistant", i)
+    img_bot = get_character_image_with_cache_buster()
 
-        # Get attachments
-        user_attachments = format_message_attachments(history, "user", i)
-        assistant_attachments = format_message_attachments(history, "assistant", i)
+    def create_message(role, content, raw_content):
+        """Inner function for CAI-style messages."""
+        circle_class = "circle-you" if role == "user" else "circle-bot"
+        name = name1 if role == "user" else name2
 
-        if converted_visible[0]:  # Don't display empty user messages
-            output += (
-                f'<div class="message" '
-                f'data-raw="{html.escape(row_internal[0], quote=True)}"'
-                f'data-index={i}>'
-                f'<div class="circle-you">{img_me}</div>'
-                f'<div class="text">'
-                f'<div class="username">{name1}{user_timestamp}</div>'
-                f'<div class="message-body">{converted_visible[0]}</div>'
-                f'{user_attachments}'
-                f'{actions_html(history, i, "user")}'
-                f'</div>'
-                f'</div>'
-            )
+        # Get role-specific data
+        timestamp = format_message_timestamp(history, role, i, tooltip_include_timestamp=False)
+        attachments = format_message_attachments(history, role, i)
 
-        output += (
+        # Get appropriate image
+        if role == "user":
+            img = (f'<img src="file/user_data/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
+                   if Path("user_data/cache/pfp_me.png").exists() else '')
+        else:
+            img = img_bot
+
+        return (
             f'<div class="message" '
-            f'data-raw="{html.escape(row_internal[1], quote=True)}"'
+            f'data-raw="{html.escape(raw_content, quote=True)}"'
             f'data-index={i}>'
-            f'<div class="circle-bot">{img_bot}</div>'
+            f'<div class="{circle_class}">{img}</div>'
             f'<div class="text">'
-            f'<div class="username">{name2}{assistant_timestamp}</div>'
-            f'<div class="message-body">{converted_visible[1]}</div>'
-            f'{assistant_attachments}'
-            f'{actions_html(history, i, "assistant")}'
+            f'<div class="username">{name}{timestamp}</div>'
+            f'<div class="message-body">{content}</div>'
+            f'{attachments}'
+            f'{actions_html(history, i, role)}'
             f'</div>'
             f'</div>'
         )
 
-    output += "</div></div>"
+    # Determine range
+    start_idx = len(history['visible']) - 1 if last_message_only else 0
+    end_idx = len(history['visible'])
+
+    for i in range(start_idx, end_idx):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+
+        # Convert content
+        if last_message_only:
+            converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
+        else:
+            converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+        # Generate messages
+        if not last_message_only and converted_visible[0]:
+            output += create_message("user", converted_visible[0], row_internal[0])
+
+        output += create_message("assistant", converted_visible[1], row_internal[1])
+
+    if not last_message_only:
+        output += "</div></div>"
+
     return output
 
 
-def generate_chat_html(history, name1, name2, reset_cache=False):
-    output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
+def generate_chat_html(history, name1, name2, reset_cache=False, last_message_only=False):
+    if not last_message_only:
+        output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
+    else:
+        output = ""
 
-    for i in range(len(history['visible'])):
-        row_visible = history['visible'][i]
-        row_internal = history['internal'][i]
-        converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+    def create_message(role, content, raw_content):
+        """Inner function for WPP-style messages."""
+        text_class = "text-you" if role == "user" else "text-bot"
 
-        # Get timestamps
-        user_timestamp = format_message_timestamp(history, "user", i)
-        assistant_timestamp = format_message_timestamp(history, "assistant", i)
+        # Get role-specific data
+        timestamp = format_message_timestamp(history, role, i)
+        attachments = format_message_attachments(history, role, i)
 
-        # Get attachments
-        user_attachments = format_message_attachments(history, "user", i)
-        assistant_attachments = format_message_attachments(history, "assistant", i)
+        # Create info button if timestamp exists
+        info_message = ""
+        if timestamp:
+            tooltip_text = get_message_tooltip(history, role, i)
+            info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
 
-        # Create info buttons for timestamps if they exist
-        info_message_user = ""
-        if user_timestamp != "":
-            # Extract the timestamp value from the span
-            user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_user = info_button.replace("message", user_timestamp_value)
-
-        info_message_assistant = ""
-        if assistant_timestamp != "":
-            # Extract the timestamp value from the span
-            assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_assistant = info_button.replace("message", assistant_timestamp_value)
-
-        if converted_visible[0]:  # Don't display empty user messages
-            output += (
-                f'<div class="message" '
-                f'data-raw="{html.escape(row_internal[0], quote=True)}"'
-                f'data-index={i}>'
-                f'<div class="text-you">'
-                f'<div class="message-body">{converted_visible[0]}</div>'
-                f'{user_attachments}'
-                f'{actions_html(history, i, "user", info_message_user)}'
-                f'</div>'
-                f'</div>'
-            )
-
-        output += (
+        return (
             f'<div class="message" '
-            f'data-raw="{html.escape(row_internal[1], quote=True)}"'
+            f'data-raw="{html.escape(raw_content, quote=True)}"'
             f'data-index={i}>'
-            f'<div class="text-bot">'
-            f'<div class="message-body">{converted_visible[1]}</div>'
-            f'{assistant_attachments}'
-            f'{actions_html(history, i, "assistant", info_message_assistant)}'
+            f'<div class="{text_class}">'
+            f'<div class="message-body">{content}</div>'
+            f'{attachments}'
+            f'{actions_html(history, i, role, info_message)}'
             f'</div>'
             f'</div>'
         )
 
-    output += "</div></div>"
+    # Determine range
+    start_idx = len(history['visible']) - 1 if last_message_only else 0
+    end_idx = len(history['visible'])
+
+    for i in range(start_idx, end_idx):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+
+        # Convert content
+        if last_message_only:
+            converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
+        else:
+            converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+        # Generate messages
+        if not last_message_only and converted_visible[0]:
+            output += create_message("user", converted_visible[0], row_internal[0])
+
+        output += create_message("assistant", converted_visible[1], row_internal[1])
+
+    if not last_message_only:
+        output += "</div></div>"
+
     return output
 
 
@@ -629,15 +663,15 @@ def time_greeting():
         return "Good evening!"
 
 
-def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
+def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False, last_message_only=False):
     if len(history['visible']) == 0:
         greeting = f"<div class=\"welcome-greeting\">{time_greeting()} How can I help you today?</div>"
         result = f'<div class="chat" id="chat">{greeting}</div>'
     elif mode == 'instruct':
-        result = generate_instruct_html(history)
+        result = generate_instruct_html(history, last_message_only=last_message_only)
     elif style == 'wpp':
-        result = generate_chat_html(history, name1, name2)
+        result = generate_chat_html(history, name1, name2, last_message_only=last_message_only)
     else:
-        result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache)
+        result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache=reset_cache, last_message_only=last_message_only)
 
-    return {'html': result}
+    return {'html': result, 'last_message_only': last_message_only}
diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index d695c74e..a79e24e4 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -408,15 +408,42 @@ class LlamaServer:
 
 
 def filter_stderr_with_progress(process_stderr):
-    progress_pattern = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
+    """
+    Reads stderr lines from a process, filters out noise, and displays progress updates
+    inline (overwriting the same line) until completion.
+    """
+    progress_re = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
+    last_was_progress = False
+
     try:
-        for line in iter(process_stderr.readline, ''):
-            progress_match = progress_pattern.search(line)
-            if progress_match:
-                sys.stderr.write(line)
-                sys.stderr.flush()
-            elif not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
-                sys.stderr.write(line)
-                sys.stderr.flush()
+        for raw in iter(process_stderr.readline, ''):
+            line = raw.rstrip('\r\n')
+            match = progress_re.search(line)
+
+            if match:
+                progress = float(match.group(1))
+
+                # Extract just the part from "prompt processing" onwards
+                prompt_processing_idx = line.find('prompt processing')
+                if prompt_processing_idx != -1:
+                    display_line = line[prompt_processing_idx:]
+                else:
+                    display_line = line  # fallback to full line
+
+                # choose carriage return for in-progress or newline at completion
+                end_char = '\r' if progress < 1.0 else '\n'
+                print(display_line, end=end_char, file=sys.stderr, flush=True)
+                last_was_progress = (progress < 1.0)
+
+            # skip noise lines
+            elif not (line.startswith(('srv ', 'slot ')) or 'log_server_r: request: GET /health' in line):
+                # if we were in progress, finish that line first
+                if last_was_progress:
+                    print(file=sys.stderr)
+
+                print(line, file=sys.stderr, flush=True)
+                last_was_progress = False
+
     except (ValueError, IOError):
+        # silently ignore broken output or IO errors
         pass
diff --git a/modules/models.py b/modules/models.py
index d329ae3c..c1e7fb56 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -116,7 +116,7 @@ def unload_model(keep_model_name=False):
         return
 
     is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer')
-    if shared.args.loader == 'ExLlamav3_HF':
+    if shared.model.__class__.__name__ == 'Exllamav3HF':
         shared.model.unload()
 
     shared.model = shared.tokenizer = None
diff --git a/modules/models_settings.py b/modules/models_settings.py
index c914bdea..283a9744 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -329,6 +329,7 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
     # Extract values from metadata
     n_layers = None
     n_kv_heads = None
+    n_attention_heads = None  # Fallback for models without separate KV heads
     embedding_dim = None
 
     for key, value in metadata.items():
@@ -336,9 +337,14 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
             n_layers = value
         elif key.endswith('.attention.head_count_kv'):
             n_kv_heads = max(value) if isinstance(value, list) else value
+        elif key.endswith('.attention.head_count'):
+            n_attention_heads = max(value) if isinstance(value, list) else value
         elif key.endswith('.embedding_length'):
             embedding_dim = value
 
+    if n_kv_heads is None:
+        n_kv_heads = n_attention_heads
+
     if gpu_layers > n_layers:
         gpu_layers = n_layers
 
diff --git a/modules/presets.py b/modules/presets.py
index cf706605..3eb1f5fc 100644
--- a/modules/presets.py
+++ b/modules/presets.py
@@ -1,6 +1,5 @@
 import functools
 import pprint
-import random
 from pathlib import Path
 
 import yaml
@@ -93,68 +92,17 @@ def load_preset_for_ui(name, state):
     return state, *[generate_params[k] for k in presets_params()]
 
 
-def random_preset(state):
-    params_and_values = {
-        'remove_tail_tokens': {
-            'top_p': [0.5, 0.8, 0.9, 0.95, 0.99],
-            'min_p': [0.5, 0.2, 0.1, 0.05, 0.01],
-            'top_k': [3, 5, 10, 20, 30, 40],
-            'typical_p': [0.2, 0.575, 0.95],
-            'tfs': [0.5, 0.8, 0.9, 0.95, 0.99],
-            'top_a': [0.5, 0.2, 0.1, 0.05, 0.01],
-            'epsilon_cutoff': [1, 3, 5, 7, 9],
-            'eta_cutoff': [3, 6, 9, 12, 15, 18],
-        },
-        'flatten_distribution': {
-            'temperature': [0.1, 0.5, 0.7, 0.8, 1, 1.2, 1.5, 2.0, 5.0],
-            'dynamic_temperature': [
-                [0.1, 1],
-                [0.1, 1.5],
-                [0.1, 2],
-                [0.1, 5],
-                [0.5, 1],
-                [0.5, 1.5],
-                [0.5, 2],
-                [0.5, 5],
-                [0.8, 1],
-                [0.8, 1.5],
-                [0.8, 2],
-                [0.8, 5],
-                [1, 1.5],
-                [1, 2],
-                [1, 5]
-            ],
-            'smoothing_factor': [0.2, 0.3, 0.6, 1.2],
-        },
-        'repetition': {
-            'repetition_penalty': [1, 1.05, 1.1, 1.15, 1.20, 1.25],
-            'presence_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
-            'frequency_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
-        },
-        'other': {
-            'temperature_last': [True, False],
-        }
-    }
-
-    generate_params = default_preset()
-    for cat in params_and_values:
-        choices = list(params_and_values[cat].keys())
-        if shared.args.loader is not None:
-            choices = [x for x in choices if loader_contains(x)]
-
-        if len(choices) > 0:
-            choice = random.choice(choices)
-            value = random.choice(params_and_values[cat][choice])
-            if choice == 'dynamic_temperature':
-                generate_params['dynamic_temperature'] = True
-                generate_params['dynatemp_low'] = value[0]
-                generate_params['dynatemp_high'] = value[1]
-            else:
-                generate_params[choice] = value
-
+def reset_preset_for_ui(name, state):
+    """Reset current preset to its saved values from file"""
+    generate_params = load_preset(name, verbose=True)
+    state.update(generate_params)
+    return state, *[generate_params[k] for k in presets_params()]
+
+
+def neutralize_samplers_for_ui(state):
+    """Set all samplers to their default/neutral values"""
+    generate_params = default_preset()
     state.update(generate_params)
-    logger.info("GENERATED_PRESET=")
-    pprint.PrettyPrinter(indent=4, width=1, sort_dicts=False).pprint(remove_defaults(state))
     return state, *[generate_params[k] for k in presets_params()]
 
 
diff --git a/modules/shared.py b/modules/shared.py
index d2305f30..b8ab2426 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -9,6 +9,7 @@ from pathlib import Path
 import yaml
 
 from modules.logging_colors import logger
+from modules.presets import default_preset
 
 # Model variables
 model = None
@@ -21,60 +22,19 @@ lora_names = []
 # Generation variables
 stop_everything = False
 generation_lock = None
-processing_message = '*Is typing...*'
+processing_message = ''
 
 # UI variables
 gradio = {}
 persistent_interface_state = {}
 need_restart = False
 
-# UI defaults
-settings = {
-    'show_controls': True,
-    'start_with': '',
-    'mode': 'instruct',
-    'chat_style': 'cai-chat',
-    'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
-    'prompt-default': 'QA',
-    'prompt-notebook': 'QA',
-    'character': 'Assistant',
-    'name1': 'You',
-    'user_bio': '',
-    'custom_system_message': '',
-    'preset': 'min_p',
-    'max_new_tokens': 512,
-    'max_new_tokens_min': 1,
-    'max_new_tokens_max': 4096,
-    'prompt_lookup_num_tokens': 0,
-    'max_tokens_second': 0,
-    'max_updates_second': 12,
-    'auto_max_new_tokens': True,
-    'ban_eos_token': False,
-    'add_bos_token': True,
-    'enable_thinking': True,
-    'skip_special_tokens': True,
-    'stream': True,
-    'static_cache': False,
-    'truncation_length': 8192,
-    'seed': -1,
-    'custom_stopping_strings': '',
-    'custom_token_bans': '',
-    'negative_prompt': '',
-    'dark_theme': True,
-    'default_extensions': [],
-    'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '' + message['content'] + '\\n\\n' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n        {%- else -%}\n            {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'### Response:\\n'-}}\n{%- endif -%}",
-    'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
-}
-
-default_settings = copy.deepcopy(settings)
-
 # Parser copied from https://github.com/vladmandic/automatic
 parser = argparse.ArgumentParser(description="Text generation web UI", conflict_handler='resolve', add_help=True, formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=55, indent_increment=2, width=200))
 
 # Basic settings
 group = parser.add_argument_group('Basic settings')
 group.add_argument('--multi-user', action='store_true', help='Multi-user mode. Chat histories are not saved or automatically loaded. Warning: this is likely not safe for sharing publicly.')
-group.add_argument('--character', type=str, help='The name of the character to load in chat mode by default.')
 group.add_argument('--model', type=str, help='Name of the model to load by default.')
 group.add_argument('--lora', type=str, nargs='+', help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.')
 group.add_argument('--model-dir', type=str, default='user_data/models', help='Path to directory with all the models.')
@@ -230,6 +190,102 @@ for arg in sys.argv[1:]:
     elif hasattr(args, arg):
         provided_arguments.append(arg)
 
+# Default generation parameters
+neutral_samplers = default_preset()
+
+# UI defaults
+settings = {
+    'show_controls': True,
+    'start_with': '',
+    'mode': 'instruct',
+    'chat_style': 'cai-chat',
+    'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+    'enable_web_search': False,
+    'web_search_pages': 3,
+    'prompt-default': 'QA',
+    'prompt-notebook': 'QA',
+    'preset': 'Qwen3 - Thinking' if Path('user_data/presets/Qwen3 - Thinking.yaml').exists() else None,
+    'max_new_tokens': 512,
+    'max_new_tokens_min': 1,
+    'max_new_tokens_max': 4096,
+    'prompt_lookup_num_tokens': 0,
+    'max_tokens_second': 0,
+    'auto_max_new_tokens': True,
+    'ban_eos_token': False,
+    'add_bos_token': True,
+    'enable_thinking': True,
+    'skip_special_tokens': True,
+    'stream': True,
+    'static_cache': False,
+    'truncation_length': 8192,
+    'seed': -1,
+    'custom_stopping_strings': '',
+    'custom_token_bans': '',
+    'negative_prompt': '',
+    'dark_theme': True,
+    'paste_to_attachment': False,
+
+    # Character settings
+    'character': 'Assistant',
+    'name1': 'You',
+    'name2': 'AI',
+    'user_bio': '',
+    'context': 'The following is a conversation with an AI Large Language Model. The AI has been trained to answer questions, provide recommendations, and help with decision making. The AI follows user requests. The AI thinks outside the box.',
+    'greeting': 'How can I help you today?',
+    'custom_system_message': '',
+    'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '' + message['content'] + '\\n\\n' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n        {%- else -%}\n            {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'### Response:\\n'-}}\n{%- endif -%}",
+    'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
+
+    # Generation parameters - Curve shape
+    'temperature': 0.6,
+    'dynatemp_low': neutral_samplers['dynatemp_low'],
+    'dynatemp_high': neutral_samplers['dynatemp_high'],
+    'dynatemp_exponent': neutral_samplers['dynatemp_exponent'],
+    'smoothing_factor': neutral_samplers['smoothing_factor'],
+    'smoothing_curve': neutral_samplers['smoothing_curve'],
+
+    # Generation parameters - Curve cutoff
+    'min_p': neutral_samplers['min_p'],
+    'top_p': 0.95,
+    'top_k': 20,
+    'typical_p': neutral_samplers['typical_p'],
+    'xtc_threshold': neutral_samplers['xtc_threshold'],
+    'xtc_probability': neutral_samplers['xtc_probability'],
+    'epsilon_cutoff': neutral_samplers['epsilon_cutoff'],
+    'eta_cutoff': neutral_samplers['eta_cutoff'],
+    'tfs': neutral_samplers['tfs'],
+    'top_a': neutral_samplers['top_a'],
+    'top_n_sigma': neutral_samplers['top_n_sigma'],
+
+    # Generation parameters - Repetition suppression
+    'dry_multiplier': neutral_samplers['dry_multiplier'],
+    'dry_allowed_length': neutral_samplers['dry_allowed_length'],
+    'dry_base': neutral_samplers['dry_base'],
+    'repetition_penalty': neutral_samplers['repetition_penalty'],
+    'frequency_penalty': neutral_samplers['frequency_penalty'],
+    'presence_penalty': neutral_samplers['presence_penalty'],
+    'encoder_repetition_penalty': neutral_samplers['encoder_repetition_penalty'],
+    'no_repeat_ngram_size': neutral_samplers['no_repeat_ngram_size'],
+    'repetition_penalty_range': neutral_samplers['repetition_penalty_range'],
+
+    # Generation parameters - Alternative sampling methods
+    'penalty_alpha': neutral_samplers['penalty_alpha'],
+    'guidance_scale': neutral_samplers['guidance_scale'],
+    'mirostat_mode': neutral_samplers['mirostat_mode'],
+    'mirostat_tau': neutral_samplers['mirostat_tau'],
+    'mirostat_eta': neutral_samplers['mirostat_eta'],
+
+    # Generation parameters - Other options
+    'do_sample': neutral_samplers['do_sample'],
+    'dynamic_temperature': neutral_samplers['dynamic_temperature'],
+    'temperature_last': neutral_samplers['temperature_last'],
+    'sampler_priority': neutral_samplers['sampler_priority'],
+    'dry_sequence_breakers': neutral_samplers['dry_sequence_breakers'],
+    'grammar_string': '',
+}
+
+default_settings = copy.deepcopy(settings)
+
 
 def do_cmd_flags_warnings():
     # Security warnings
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 1fd6d810..55b538b0 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -65,41 +65,39 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
             all_stop_strings += st
 
     shared.stop_everything = False
-    last_update = -1
     reply = ''
     is_stream = state['stream']
     if len(all_stop_strings) > 0 and not state['stream']:
         state = copy.deepcopy(state)
         state['stream'] = True
 
-    min_update_interval = 0
-    if state.get('max_updates_second', 0) > 0:
-        min_update_interval = 1 / state['max_updates_second']
-
     # Generate
+    last_update = -1
+    latency_threshold = 1 / 1000
     for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat):
+        cur_time = time.monotonic()
         reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
         if escape_html:
             reply = html.escape(reply)
 
         if is_stream:
-            cur_time = time.time()
-
             # Limit number of tokens/second to make text readable in real time
             if state['max_tokens_second'] > 0:
                 diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
                 if diff > 0:
                     time.sleep(diff)
 
-                last_update = time.time()
+                last_update = time.monotonic()
                 yield reply
 
             # Limit updates to avoid lag in the Gradio UI
             # API updates are not limited
             else:
-                if cur_time - last_update > min_update_interval:
-                    last_update = cur_time
+                # If 'generate_func' takes less than 0.001 seconds to yield the next token
+                # (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding
+                if (cur_time - last_update) > latency_threshold:
                     yield reply
+                last_update = time.monotonic()
 
         if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
             break
@@ -481,6 +479,7 @@ def generate_reply_custom(question, original_question, state, stopping_strings=N
     For models that do not use the transformers library for sampling
     """
 
+    state = copy.deepcopy(state)
     state['seed'] = set_manual_seed(state['seed'])
     t0 = time.time()
     reply = ''
diff --git a/modules/ui.py b/modules/ui.py
index 9f4d67cb..59da5118 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1,4 +1,5 @@
 import copy
+import threading
 from pathlib import Path
 
 import gradio as gr
@@ -6,28 +7,39 @@ import yaml
 
 import extensions
 from modules import shared
+from modules.chat import load_history
+from modules.utils import gradio
 
-with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r') as f:
+# Global state for auto-saving UI settings with debouncing
+_auto_save_timer = None
+_auto_save_lock = threading.Lock()
+_last_interface_state = None
+_last_preset = None
+_last_extensions = None
+_last_show_controls = None
+_last_theme_state = None
+
+with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r', encoding='utf-8') as f:
     css = f.read()
-with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/main.css', 'r', encoding='utf-8') as f:
     css += f.read()
-with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r', encoding='utf-8') as f:
     css += f.read()
-with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r', encoding='utf-8') as f:
     css += f.read()
-with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/main.js', 'r', encoding='utf-8') as f:
     js = f.read()
-with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r', encoding='utf-8') as f:
     global_scope_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r', encoding='utf-8') as f:
     save_files_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r', encoding='utf-8') as f:
     switch_tabs_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r', encoding='utf-8') as f:
     show_controls_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r', encoding='utf-8') as f:
     update_big_picture_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r', encoding='utf-8') as f:
     dark_theme_js = f.read()
 
 refresh_symbol = '🔄'
@@ -62,8 +74,10 @@ if not shared.args.old_colors:
         body_background_fill="white",
         block_background_fill="transparent",
         body_text_color='rgb(64, 64, 64)',
-        button_secondary_background_fill="#f4f4f4",
+        button_secondary_background_fill="white",
         button_secondary_border_color="var(--border-color-primary)",
+        input_shadow="none",
+        button_shadow_hover="none",
 
         # Dark Mode Colors
         input_background_fill_dark='var(--darker-gray)',
@@ -95,6 +109,7 @@ if not shared.args.old_colors:
         button_large_radius='0.375rem',
         button_large_padding='6px 12px',
         input_radius='0.375rem',
+        block_radius='0',
     )
 
 if Path("user_data/notification.mp3").exists():
@@ -194,7 +209,6 @@ def list_interface_input_elements():
         'max_new_tokens',
         'prompt_lookup_num_tokens',
         'max_tokens_second',
-        'max_updates_second',
         'do_sample',
         'dynamic_temperature',
         'temperature_last',
@@ -257,6 +271,11 @@ def list_interface_input_elements():
     # Model elements
     elements += list_model_elements()
 
+    # Other elements
+    elements += [
+        'paste_to_attachment'
+    ]
+
     return elements
 
 
@@ -270,6 +289,13 @@ def gather_interface_values(*args):
     if not shared.args.multi_user:
         shared.persistent_interface_state = output
 
+        # Remove the chat input, as it gets cleared after this function call
+        shared.persistent_interface_state.pop('textbox')
+
+    # Prevent history loss if backend is restarted but UI is not refreshed
+    if output['history'] is None and output['unique_id'] is not None:
+        output['history'] = load_history(output['unique_id'], output['character_menu'], output['mode'])
+
     return output
 
 
@@ -292,7 +318,7 @@ def apply_interface_values(state, use_persistent=False):
 
 def save_settings(state, preset, extensions_list, show_controls, theme_state):
     output = copy.deepcopy(shared.settings)
-    exclude = ['name2', 'greeting', 'context', 'truncation_length', 'instruction_template_str']
+    exclude = []
     for k in state:
         if k in shared.settings and k not in exclude:
             output[k] = state[k]
@@ -301,10 +327,11 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
     output['prompt-default'] = state['prompt_menu-default']
     output['prompt-notebook'] = state['prompt_menu-notebook']
     output['character'] = state['character_menu']
-    output['default_extensions'] = extensions_list
     output['seed'] = int(output['seed'])
     output['show_controls'] = show_controls
     output['dark_theme'] = True if theme_state == 'dark' else False
+    output.pop('instruction_template_str')
+    output.pop('truncation_length')
 
     # Save extension values in the UI
     for extension_name in extensions_list:
@@ -327,6 +354,143 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
     return yaml.dump(output, sort_keys=False, width=float("inf"), allow_unicode=True)
 
 
+def store_current_state_and_debounce(interface_state, preset, extensions, show_controls, theme_state):
+    """Store current state and trigger debounced save"""
+    global _auto_save_timer, _last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state
+
+    if shared.args.multi_user:
+        return
+
+    # Store the current state in global variables
+    _last_interface_state = interface_state
+    _last_preset = preset
+    _last_extensions = extensions
+    _last_show_controls = show_controls
+    _last_theme_state = theme_state
+
+    # Reset the debounce timer
+    with _auto_save_lock:
+        if _auto_save_timer is not None:
+            _auto_save_timer.cancel()
+
+        _auto_save_timer = threading.Timer(1.0, _perform_debounced_save)
+        _auto_save_timer.start()
+
+
+def _perform_debounced_save():
+    """Actually perform the save using the stored state"""
+    global _auto_save_timer
+
+    try:
+        if _last_interface_state is not None:
+            contents = save_settings(_last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state)
+            settings_path = Path('user_data') / 'settings.yaml'
+            settings_path.parent.mkdir(exist_ok=True)
+            with open(settings_path, 'w', encoding='utf-8') as f:
+                f.write(contents)
+    except Exception as e:
+        print(f"Auto-save failed: {e}")
+    finally:
+        with _auto_save_lock:
+            _auto_save_timer = None
+
+
+def setup_auto_save():
+    """Attach auto-save to key UI elements"""
+    if shared.args.multi_user:
+        return
+
+    change_elements = [
+        # Chat tab (ui_chat.py)
+        'start_with',
+        'enable_web_search',
+        'web_search_pages',
+        'mode',
+        'chat_style',
+        'chat-instruct_command',
+        'character_menu',
+        'name1',
+        'name2',
+        'context',
+        'greeting',
+        'user_bio',
+        'custom_system_message',
+        'chat_template_str',
+
+        # Parameters tab (ui_parameters.py) - Generation parameters
+        'preset_menu',
+        'temperature',
+        'dynatemp_low',
+        'dynatemp_high',
+        'dynatemp_exponent',
+        'smoothing_factor',
+        'smoothing_curve',
+        'min_p',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
+        'epsilon_cutoff',
+        'eta_cutoff',
+        'tfs',
+        'top_a',
+        'top_n_sigma',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
+        'repetition_penalty',
+        'frequency_penalty',
+        'presence_penalty',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'repetition_penalty_range',
+        'penalty_alpha',
+        'guidance_scale',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'max_new_tokens',
+        'prompt_lookup_num_tokens',
+        'max_tokens_second',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
+        'ban_eos_token',
+        'add_bos_token',
+        'enable_thinking',
+        'skip_special_tokens',
+        'stream',
+        'static_cache',
+        'truncation_length',
+        'seed',
+        'sampler_priority',
+        'custom_stopping_strings',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
+
+        # Default tab (ui_default.py)
+        'prompt_menu-default',
+
+        # Notebook tab (ui_notebook.py)
+        'prompt_menu-notebook',
+
+        # Session tab (ui_session.py)
+        'show_controls',
+        'theme_state',
+        'paste_to_attachment'
+    ]
+
+    for element_name in change_elements:
+        if element_name in shared.gradio:
+            shared.gradio[element_name].change(
+                gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+                store_current_state_and_debounce, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), None, show_progress=False)
+
+
 def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class, interactive=True):
     """
     Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index d79aa523..3b841b8b 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -18,23 +18,23 @@ def create_ui():
     mu = shared.args.multi_user
 
     shared.gradio['Chat input'] = gr.State()
-    shared.gradio['history'] = gr.JSON(visible=False)
+    shared.gradio['history'] = gr.State({'internal': [], 'visible': [], 'metadata': {}})
 
     with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
         with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
             with gr.Column():
                 with gr.Row(elem_id='past-chats-buttons'):
-                    shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes='refresh-button', elem_id='Branch', interactive=not mu)
+                    shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes=['refresh-button', 'refresh-button-medium'], elem_id='Branch', interactive=not mu)
+                    shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes=['refresh-button', 'refresh-button-medium'], interactive=not mu)
+                    shared.gradio['delete_chat'] = gr.Button('🗑️', visible=False, elem_classes='refresh-button', interactive=not mu, elem_id='delete_chat')
+                    shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'refresh-button-medium', 'focus-on-chat-input'])
                     shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id="Branch-index", interactive=True)
-                    shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)
-                    shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
-                    shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input'])
 
                 shared.gradio['search_chat'] = gr.Textbox(placeholder='Search chats...', max_lines=1, elem_id='search_chat')
 
                 with gr.Row(elem_id='delete-chat-row', visible=False) as shared.gradio['delete-chat-row']:
-                    shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'])
-                    shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'])
+                    shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-cancel')
+                    shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-confirm')
 
                 with gr.Row(elem_id='rename-row', visible=False) as shared.gradio['rename-row']:
                     shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', elem_classes=['no-background'])
@@ -55,7 +55,6 @@ def create_ui():
 
                     with gr.Column(scale=10, elem_id='chat-input-container'):
                         shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
-                        shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
                         shared.gradio['typing-dots'] = gr.HTML(value='<div class="typing"><span></span><span class="dot1"></span><span class="dot2"></span></div>', label='typing', elem_id='typing-container')
 
                     with gr.Column(scale=1, elem_id='generate-stop-container'):
@@ -65,21 +64,15 @@ def create_ui():
 
         # Hover menu buttons
         with gr.Column(elem_id='chat-buttons'):
-            with gr.Row():
-                shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
-                shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
-                shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
-
-            with gr.Row():
-                shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
-
-            with gr.Row():
-                shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
-                shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
-
-            with gr.Row():
-                shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
-                shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
+            shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
+            shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
+            shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
+            shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
+            shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
+            shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
+            shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
+            shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
+            shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
 
         with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
             with gr.Column():
@@ -87,13 +80,13 @@ def create_ui():
                     shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
 
                 with gr.Row():
-                    shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search')
+                    shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search')
 
                 with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:
                     shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)
 
                 with gr.Row():
-                    shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
+                    shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
 
                 with gr.Row():
                     shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
@@ -125,14 +118,15 @@ def create_chat_settings_ui():
             with gr.Column(scale=8):
                 with gr.Tab("Character"):
                     with gr.Row():
-                        shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
+                        shared.gradio['character_menu'] = gr.Dropdown(value=shared.settings['character'], choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
                         ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
                         shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
                         shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
+                        shared.gradio['restore_character'] = gr.Button('Restore character', elem_classes='refresh-button', interactive=True, elem_id='restore-character')
 
-                    shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
-                    shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
-                    shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
+                    shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name')
+                    shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar'])
+                    shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label='Greeting', elem_classes=['add_scrollbar'])
 
                 with gr.Tab("User"):
                     shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
@@ -185,7 +179,7 @@ def create_chat_settings_ui():
         with gr.Row():
             with gr.Column():
                 shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])
-                shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
+                shared.gradio['instruction_template_str'] = gr.Textbox(value=shared.settings['instruction_template_str'], label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
                 with gr.Row():
                     shared.gradio['send_instruction_to_default'] = gr.Button('Send to default', elem_classes=['small-button'])
                     shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])
@@ -202,7 +196,7 @@ def create_event_handlers():
     shared.reload_inputs = gradio(reload_arr)
 
     # Morph HTML updates instead of updating everything
-    shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data.html)")
+    shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data)")
 
     shared.gradio['Generate'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@@ -267,11 +261,9 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
 
-    shared.gradio['delete_chat'].click(lambda: gr.update(visible=True), None, gradio('delete-chat-row'))
-    shared.gradio['delete_chat-cancel'].click(lambda: gr.update(visible=False), None, gradio('delete-chat-row'))
     shared.gradio['delete_chat-confirm'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id', 'delete-chat-row'), show_progress=False)
+        chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
 
     shared.gradio['branch_chat'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@@ -301,10 +293,12 @@ def create_event_handlers():
         chat.handle_character_menu_change, gradio('interface_state'), gradio('history', 'display', 'name1', 'name2', 'character_picture', 'greeting', 'context', 'unique_id'), show_progress=False).then(
         None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
 
+    shared.gradio['character_picture'].change(chat.handle_character_picture_change, gradio('character_picture'), None, show_progress=False)
+
     shared.gradio['mode'].change(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.handle_mode_change, gradio('interface_state'), gradio('history', 'display', 'chat_style', 'chat-instruct_command', 'unique_id'), show_progress=False).then(
-        None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
+        None, gradio('mode'), None, js="(mode) => {const characterContainer = document.getElementById('character-menu').parentNode.parentNode; const isInChatTab = document.querySelector('#chat-controls').contains(characterContainer); if (isInChatTab) { characterContainer.style.display = mode === 'instruct' ? 'none' : ''; }}")
 
     shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)
 
@@ -324,6 +318,10 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         chat.handle_save_template_click, gradio('instruction_template_str'), gradio('save_filename', 'save_root', 'save_contents', 'file_saver'), show_progress=False)
 
+    shared.gradio['restore_character'].click(
+        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+        chat.restore_character_for_ui, gradio('interface_state'), gradio('interface_state', 'name2', 'context', 'greeting', 'character_picture'), show_progress=False)
+
     shared.gradio['delete_template'].click(chat.handle_delete_template_click, gradio('instruction_template'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
     shared.gradio['save_chat_history'].click(
         lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then(
diff --git a/modules/ui_default.py b/modules/ui_default.py
index c2946b37..8acc4b10 100644
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@@ -19,7 +19,7 @@ def create_ui():
         with gr.Row():
             with gr.Column():
                 with gr.Row():
-                    shared.gradio['textbox-default'] = gr.Textbox(value='', lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
+                    shared.gradio['textbox-default'] = gr.Textbox(value=load_prompt(shared.settings['prompt-default']), lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
                     shared.gradio['token-counter-default'] = gr.HTML(value="<span>0</span>", elem_id="default-token-counter")
 
                 with gr.Row():
@@ -28,7 +28,7 @@ def create_ui():
                     shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary')
 
                 with gr.Row():
-                    shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
+                    shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-default'], label='Prompt', elem_classes='slim-dropdown')
                     ui.create_refresh_button(shared.gradio['prompt_menu-default'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, 'refresh-button', interactive=not mu)
                     shared.gradio['save_prompt-default'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
                     shared.gradio['delete_prompt-default'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 862b3893..9e982f0e 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -1,4 +1,6 @@
 import importlib
+import queue
+import threading
 import traceback
 from functools import partial
 from pathlib import Path
@@ -205,48 +207,51 @@ def load_lora_wrapper(selected_loras):
 
 
 def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
+    downloader_module = importlib.import_module("download-model")
+    downloader = downloader_module.ModelDownloader()
+    update_queue = queue.Queue()
+
     try:
         # Handle direct GGUF URLs
         if repo_id.startswith("https://") and ("huggingface.co" in repo_id) and (repo_id.endswith(".gguf") or repo_id.endswith(".gguf?download=true")):
             try:
                 path = repo_id.split("huggingface.co/")[1]
-
-                # Extract the repository ID (first two parts of the path)
                 parts = path.split("/")
                 if len(parts) >= 2:
                     extracted_repo_id = f"{parts[0]}/{parts[1]}"
-
-                    # Extract the filename (last part of the path)
-                    filename = repo_id.split("/")[-1]
-                    if "?download=true" in filename:
-                        filename = filename.replace("?download=true", "")
-
+                    filename = repo_id.split("/")[-1].replace("?download=true", "")
                     repo_id = extracted_repo_id
                     specific_file = filename
-            except:
-                pass
+            except Exception as e:
+                yield f"Error parsing GGUF URL: {e}"
+                progress(0.0)
+                return
 
-        if repo_id == "":
-            yield ("Please enter a model path")
+        if not repo_id:
+            yield "Please enter a model path."
+            progress(0.0)
             return
 
         repo_id = repo_id.strip()
         specific_file = specific_file.strip()
-        downloader = importlib.import_module("download-model").ModelDownloader()
 
-        progress(0.0)
+        progress(0.0, "Preparing download...")
+
         model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
-
-        yield ("Getting the download links from Hugging Face")
+        yield "Getting download links from Hugging Face..."
         links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
 
+        if not links:
+            yield "No files found to download for the given model/criteria."
+            progress(0.0)
+            return
+
         # Check for multiple GGUF files
         gguf_files = [link for link in links if link.lower().endswith('.gguf')]
         if len(gguf_files) > 1 and not specific_file:
             output = "Multiple GGUF files found. Please copy one of the following filenames to the 'File name' field:\n\n```\n"
             for link in gguf_files:
                 output += f"{Path(link).name}\n"
-
             output += "```"
             yield output
             return
@@ -260,12 +265,9 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
             yield output
             return
 
-        yield ("Getting the output folder")
+        yield "Determining output folder..."
         output_folder = downloader.get_output_folder(
-            model,
-            branch,
-            is_lora,
-            is_llamacpp=is_llamacpp,
+            model, branch, is_lora, is_llamacpp=is_llamacpp,
             model_dir=shared.args.model_dir if shared.args.model_dir != shared.args_defaults.model_dir else None
         )
 
@@ -275,19 +277,65 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
             output_folder = Path(shared.args.lora_dir)
 
         if check:
-            progress(0.5)
-
-            yield ("Checking previously downloaded files")
+            yield "Checking previously downloaded files..."
+            progress(0.5, "Verifying files...")
             downloader.check_model_files(model, branch, links, sha256, output_folder)
-            progress(1.0)
-        else:
-            yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
-            downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
+            progress(1.0, "Verification complete.")
+            yield "File check complete."
+            return
 
-            yield (f"Model successfully saved to `{output_folder}/`.")
-    except:
-        progress(1.0)
-        yield traceback.format_exc().replace('\n', '\n\n')
+        yield ""
+        progress(0.0, "Download starting...")
+
+        def downloader_thread_target():
+            try:
+                downloader.download_model_files(
+                    model, branch, links, sha256, output_folder,
+                    progress_queue=update_queue,
+                    threads=4,
+                    is_llamacpp=is_llamacpp,
+                    specific_file=specific_file
+                )
+                update_queue.put(("COMPLETED", f"Model successfully saved to `{output_folder}/`."))
+            except Exception as e:
+                tb_str = traceback.format_exc().replace('\n', '\n\n')
+                update_queue.put(("ERROR", tb_str))
+
+        download_thread = threading.Thread(target=downloader_thread_target)
+        download_thread.start()
+
+        while True:
+            try:
+                message = update_queue.get(timeout=0.2)
+                if not isinstance(message, tuple) or len(message) != 2:
+                    continue
+
+                msg_identifier, data = message
+
+                if msg_identifier == "COMPLETED":
+                    progress(1.0, "Download complete!")
+                    yield data
+                    break
+                elif msg_identifier == "ERROR":
+                    progress(0.0, "Error occurred")
+                    yield data
+                    break
+                elif isinstance(msg_identifier, float):
+                    progress_value = msg_identifier
+                    description_str = data
+                    progress(progress_value, f"Downloading: {description_str}")
+
+            except queue.Empty:
+                if not download_thread.is_alive():
+                    yield "Download process finished."
+                    break
+
+        download_thread.join()
+
+    except Exception as e:
+        progress(0.0)
+        tb_str = traceback.format_exc().replace('\n', '\n\n')
+        yield tb_str
 
 
 def update_truncation_length(current_length, state):
diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py
index b234ac57..3f79a93c 100644
--- a/modules/ui_notebook.py
+++ b/modules/ui_notebook.py
@@ -22,7 +22,7 @@ def create_ui():
             with gr.Column(scale=4):
                 with gr.Tab('Raw'):
                     with gr.Row():
-                        shared.gradio['textbox-notebook'] = gr.Textbox(value='', lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
+                        shared.gradio['textbox-notebook'] = gr.Textbox(value=load_prompt(shared.settings['prompt-notebook']), lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
                         shared.gradio['token-counter-notebook'] = gr.HTML(value="<span>0</span>", elem_id="notebook-token-counter")
 
                 with gr.Tab('Markdown'):
@@ -56,7 +56,7 @@ def create_ui():
             with gr.Column(scale=1):
                 gr.HTML('<div style="padding-bottom: 13px"></div>')
                 with gr.Row():
-                    shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
+                    shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-notebook'], label='Prompt', elem_classes='slim-dropdown')
                     ui.create_refresh_button(shared.gradio['prompt_menu-notebook'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, ['refresh-button', 'refresh-button-small'], interactive=not mu)
                     shared.gradio['save_prompt-notebook'] = gr.Button('💾', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)
                     shared.gradio['delete_prompt-notebook'] = gr.Button('🗑️', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 733d0901..e2b10554 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -6,19 +6,19 @@ from modules import loaders, presets, shared, ui, ui_chat, utils
 from modules.utils import gradio
 
 
-def create_ui(default_preset):
+def create_ui():
     mu = shared.args.multi_user
-    generate_params = presets.load_preset(default_preset)
     with gr.Tab("Parameters", elem_id="parameters"):
         with gr.Tab("Generation"):
             with gr.Row():
                 with gr.Column():
                     with gr.Row():
-                        shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=default_preset, label='Preset', elem_classes='slim-dropdown')
+                        shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=shared.settings['preset'], label='Preset', elem_classes='slim-dropdown')
                         ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
                         shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
                         shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
-                        shared.gradio['random_preset'] = gr.Button('🎲', elem_classes='refresh-button')
+                        shared.gradio['reset_preset'] = gr.Button('Restore preset', elem_classes='refresh-button', interactive=True)
+                        shared.gradio['neutralize_samplers'] = gr.Button('Neutralize samplers', elem_classes='refresh-button', interactive=True)
 
                 with gr.Column():
                     shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()) if not shared.args.portable else ['llama.cpp'], value="All", elem_classes='slim-dropdown')
@@ -28,57 +28,60 @@ def create_ui(default_preset):
                     with gr.Row():
                         with gr.Column():
                             gr.Markdown('## Curve shape')
-                            shared.gradio['temperature'] = gr.Slider(0.01, 5, value=generate_params['temperature'], step=0.01, label='temperature')
-                            shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature'])
-                            shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature'])
-                            shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature'])
-                            shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
-                            shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=generate_params['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
+                            shared.gradio['temperature'] = gr.Slider(0.01, 5, value=shared.settings['temperature'], step=0.01, label='temperature')
+                            shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_low'], step=0.01, label='dynatemp_low', visible=shared.settings['dynamic_temperature'])
+                            shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_high'], step=0.01, label='dynatemp_high', visible=shared.settings['dynamic_temperature'])
+                            shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=shared.settings['dynamic_temperature'])
+                            shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=shared.settings['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
+                            shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=shared.settings['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
+                            shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label='dynamic_temperature')
 
                             gr.Markdown('## Curve cutoff')
-                            shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p')
-                            shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=generate_params['top_n_sigma'], step=0.01, label='top_n_sigma')
-                            shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p')
-                            shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k')
-                            shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p')
-                            shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=generate_params['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
-                            shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=generate_params['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
-                            shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
-                            shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff')
-                            shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs')
-                            shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a')
+                            shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=shared.settings['min_p'], step=0.01, label='min_p')
+                            shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=shared.settings['top_n_sigma'], step=0.01, label='top_n_sigma')
+                            shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=shared.settings['top_p'], step=0.01, label='top_p')
+                            shared.gradio['top_k'] = gr.Slider(0, 200, value=shared.settings['top_k'], step=1, label='top_k')
+                            shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=shared.settings['typical_p'], step=0.01, label='typical_p')
+                            shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=shared.settings['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
+                            shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=shared.settings['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
+                            shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=shared.settings['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
+                            shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=shared.settings['eta_cutoff'], step=0.01, label='eta_cutoff')
+                            shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=shared.settings['tfs'], step=0.01, label='tfs')
+                            shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=shared.settings['top_a'], step=0.01, label='top_a')
 
                             gr.Markdown('## Repetition suppression')
-                            shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
-                            shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=generate_params['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
-                            shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
-                            shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty')
-                            shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=generate_params['frequency_penalty'], step=0.05, label='frequency_penalty')
-                            shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty')
-                            shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
-                            shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size')
-                            shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range')
+                            shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=shared.settings['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
+                            shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=shared.settings['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
+                            shared.gradio['dry_base'] = gr.Slider(1, 4, value=shared.settings['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
+                            shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=shared.settings['repetition_penalty'], step=0.01, label='repetition_penalty')
+                            shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=shared.settings['frequency_penalty'], step=0.05, label='frequency_penalty')
+                            shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=shared.settings['presence_penalty'], step=0.05, label='presence_penalty')
+                            shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=shared.settings['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
+                            shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=shared.settings['no_repeat_ngram_size'], label='no_repeat_ngram_size')
+                            shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=shared.settings['repetition_penalty_range'], label='repetition_penalty_range')
 
                         with gr.Column():
                             gr.Markdown('## Alternative sampling methods')
-                            shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
-                            shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
-                            shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
-                            shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau')
-                            shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta')
+                            shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=shared.settings['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
+                            shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=shared.settings['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
+                            shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=shared.settings['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
+                            shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=shared.settings['mirostat_tau'], label='mirostat_tau')
+                            shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=shared.settings['mirostat_eta'], label='mirostat_eta')
 
                             gr.Markdown('## Other options')
-                            shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
-                            shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
-                            shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
-                            shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
+                            shared.gradio['do_sample'] = gr.Checkbox(value=shared.settings['do_sample'], label='do_sample')
+                            shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
+                            shared.gradio['sampler_priority'] = gr.Textbox(value=shared.settings['sampler_priority'], lines=10, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
+                            shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
 
                 with gr.Column():
                     with gr.Row():
                         with gr.Column():
-                            shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
-                            shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature')
-                            shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
+                            with gr.Blocks():
+                                shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
+                                shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
+                                shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
+
                             shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
                             shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
                             shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
@@ -91,18 +94,16 @@ def create_ui(default_preset):
                             shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length.')
                             shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
 
-                            shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
                             shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
                             shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
                             shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])
-                            shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
                             with gr.Row() as shared.gradio['grammar_file_row']:
                                 shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')
                                 ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)
                                 shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
                                 shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
 
-                            shared.gradio['grammar_string'] = gr.Textbox(value='', label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
+                            shared.gradio['grammar_string'] = gr.Textbox(value=shared.settings['grammar_string'], label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
 
         ui_chat.create_chat_settings_ui()
 
@@ -113,9 +114,13 @@ def create_event_handlers():
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
         presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
 
-    shared.gradio['random_preset'].click(
+    shared.gradio['reset_preset'].click(
         ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
+        presets.reset_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
+
+    shared.gradio['neutralize_samplers'].click(
+        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+        presets.neutralize_samplers_for_ui, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
 
     shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'), show_progress=False)
     shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'), show_progress=False)
diff --git a/modules/ui_session.py b/modules/ui_session.py
index a4eba667..33d7dcb7 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -1,7 +1,6 @@
 import gradio as gr
 
 from modules import shared, ui, utils
-from modules.github import clone_or_pull_repository
 from modules.utils import gradio
 
 
@@ -10,11 +9,14 @@ def create_ui():
     with gr.Tab("Session", elem_id="session-tab"):
         with gr.Row():
             with gr.Column():
-                shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
-                with gr.Row():
-                    shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡')
-                    shared.gradio['save_settings'] = gr.Button('Save UI defaults to user_data/settings.yaml', interactive=not mu)
+                gr.Markdown("## Settings")
+                shared.gradio['save_settings'] = gr.Button('Save settings to user_data/settings.yaml', elem_classes='refresh-button', interactive=not mu)
+                shared.gradio['toggle_dark_mode'] = gr.Button('Toggle light/dark theme 💡', elem_classes='refresh-button')
+                shared.gradio['paste_to_attachment'] = gr.Checkbox(label='Turn long pasted text into attachments in the Chat tab', value=shared.settings['paste_to_attachment'], elem_id='paste_to_attachment')
 
+            with gr.Column():
+                gr.Markdown("## Extensions & flags")
+                shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
                 with gr.Row():
                     with gr.Column():
                         shared.gradio['extensions_menu'] = gr.CheckboxGroup(choices=utils.get_available_extensions(), value=shared.args.extensions, label="Available extensions", info='Note that some of these extensions may require manually installing Python requirements through the command: pip install -r extensions/extension_name/requirements.txt', elem_classes='checkboxgroup-table')
@@ -22,30 +24,20 @@ def create_ui():
                     with gr.Column():
                         shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=get_boolean_arguments(), value=get_boolean_arguments(active=True), label="Boolean command-line flags", elem_classes='checkboxgroup-table')
 
-            with gr.Column():
-                if not shared.args.portable:
-                    extension_name = gr.Textbox(lines=1, label='Install or update an extension', info='Enter the GitHub URL below and press Enter. For a list of extensions, see: https://github.com/oobabooga/text-generation-webui-extensions ⚠️  WARNING ⚠️ : extensions can execute arbitrary code. Make sure to inspect their source code before activating them.', interactive=not mu)
-                    extension_status = gr.Markdown()
-                else:
-                    pass
-
         shared.gradio['theme_state'] = gr.Textbox(visible=False, value='dark' if shared.settings['dark_theme'] else 'light')
-        if not shared.args.portable:
-            extension_name.submit(clone_or_pull_repository, extension_name, extension_status, show_progress=False)
+        shared.gradio['save_settings'].click(
+            ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+            handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
+
+        shared.gradio['toggle_dark_mode'].click(
+            lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
+            None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode(); localStorage.setItem("theme", document.body.classList.contains("dark") ? "dark" : "light")}}')
 
         # Reset interface event
         shared.gradio['reset_interface'].click(
             set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then(
             None, None, None, js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')
 
-        shared.gradio['toggle_dark_mode'].click(
-            lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
-            None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode()}}')
-
-        shared.gradio['save_settings'].click(
-            ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-            handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
-
 
 def handle_save_settings(state, preset, extensions, show_controls, theme):
     contents = ui.save_settings(state, preset, extensions, show_controls, theme)
diff --git a/modules/utils.py b/modules/utils.py
index 577c55b8..21873541 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -3,7 +3,7 @@ import re
 from datetime import datetime
 from pathlib import Path
 
-from modules import github, shared
+from modules import shared
 from modules.logging_colors import logger
 
 
@@ -182,7 +182,6 @@ def get_available_instruction_templates():
 
 def get_available_extensions():
     extensions = sorted(set(map(lambda x: x.parts[1], Path('extensions').glob('*/script.py'))), key=natural_keys)
-    extensions = [v for v in extensions if v not in github.new_extensions]
     return extensions
 
 
diff --git a/modules/web_search.py b/modules/web_search.py
index 1f670349..ffd7e483 100644
--- a/modules/web_search.py
+++ b/modules/web_search.py
@@ -3,8 +3,6 @@ from concurrent.futures import as_completed
 from datetime import datetime
 
 import requests
-from bs4 import BeautifulSoup
-from duckduckgo_search import DDGS
 
 from modules.logging_colors import logger
 
@@ -14,35 +12,39 @@ def get_current_timestamp():
     return datetime.now().strftime('%b %d, %Y %H:%M')
 
 
-def download_web_page(url, timeout=5):
-    """Download and extract text from a web page"""
+def download_web_page(url, timeout=10):
+    """
+    Download a web page and convert its HTML content to structured Markdown text.
+    """
+    import html2text
+
     try:
         headers = {
             'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
         }
         response = requests.get(url, headers=headers, timeout=timeout)
-        response.raise_for_status()
+        response.raise_for_status()  # Raise an exception for bad status codes
 
-        soup = BeautifulSoup(response.content, 'html.parser')
+        # Initialize the HTML to Markdown converter
+        h = html2text.HTML2Text()
+        h.body_width = 0
 
-        # Remove script and style elements
-        for script in soup(["script", "style"]):
-            script.decompose()
+        # Convert the HTML to Markdown
+        markdown_text = h.handle(response.text)
 
-        # Get text and clean it up
-        text = soup.get_text()
-        lines = (line.strip() for line in text.splitlines())
-        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-        text = ' '.join(chunk for chunk in chunks if chunk)
-
-        return text
-    except Exception as e:
+        return markdown_text
+    except requests.exceptions.RequestException as e:
         logger.error(f"Error downloading {url}: {e}")
-        return f"[Error downloading content from {url}: {str(e)}]"
+        return ""
+    except Exception as e:
+        logger.error(f"An unexpected error occurred: {e}")
+        return ""
 
 
 def perform_web_search(query, num_pages=3, max_workers=5):
     """Perform web search and return results with content"""
+    from duckduckgo_search import DDGS
+
     try:
         with DDGS() as ddgs:
             results = list(ddgs.text(query, max_results=num_pages))
@@ -74,9 +76,7 @@ def perform_web_search(query, num_pages=3, max_workers=5):
                         'url': url,
                         'content': content
                     }
-                except Exception as e:
-                    logger.error(f"Error downloading {url}: {e}")
-                    # Include failed downloads with empty content
+                except Exception:
                     search_results[index] = {
                         'title': title,
                         'url': url,
@@ -107,6 +107,13 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
             logger.warning("No search results found")
             return
 
+        # Filter out failed downloads before adding attachments
+        successful_results = [result for result in search_results if result['content'].strip()]
+
+        if not successful_results:
+            logger.warning("No successful downloads to add as attachments")
+            return
+
         # Add search results as attachments
         key = f"user_{row_idx}"
         if key not in history['metadata']:
@@ -114,7 +121,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
         if "attachments" not in history['metadata'][key]:
             history['metadata'][key]["attachments"] = []
 
-        for result in search_results:
+        for result in successful_results:
             attachment = {
                 "name": result['title'],
                 "type": "text/html",
@@ -123,7 +130,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
             }
             history['metadata'][key]["attachments"].append(attachment)
 
-        logger.info(f"Added {len(search_results)} web search results as attachments")
+        logger.info(f"Added {len(successful_results)} successful web search results as attachments.")
 
     except Exception as e:
         logger.error(f"Error in web search: {e}")
diff --git a/one_click.py b/one_click.py
index 482a6aa9..94f2aab0 100644
--- a/one_click.py
+++ b/one_click.py
@@ -17,8 +17,6 @@ import sys
 
 # Define the required versions
 TORCH_VERSION = "2.6.0"
-TORCHVISION_VERSION = "0.21.0"
-TORCHAUDIO_VERSION = "2.6.0"
 PYTHON_VERSION = "3.11"
 LIBSTDCXX_VERSION_LINUX = "12.1.0"
 
@@ -70,12 +68,8 @@ def is_installed():
 def cpu_has_avx2():
     try:
         import cpuinfo
-
         info = cpuinfo.get_cpu_info()
-        if 'avx2' in info['flags']:
-            return True
-        else:
-            return False
+        return 'avx2' in info['flags']
     except:
         return True
 
@@ -83,30 +77,119 @@ def cpu_has_avx2():
 def cpu_has_amx():
     try:
         import cpuinfo
-
         info = cpuinfo.get_cpu_info()
-        if 'amx' in info['flags']:
-            return True
-        else:
-            return False
+        return 'amx' in info['flags']
     except:
         return True
 
 
-def torch_version():
-    site_packages_path = None
-    for sitedir in site.getsitepackages():
-        if "site-packages" in sitedir and conda_env_path in sitedir:
-            site_packages_path = sitedir
-            break
+def load_state():
+    """Load installer state from JSON file"""
+    if os.path.exists(state_file):
+        try:
+            with open(state_file, 'r') as f:
+                return json.load(f)
+        except:
+            return {}
+    return {}
 
-    if site_packages_path:
-        torch_version_file = open(os.path.join(site_packages_path, 'torch', 'version.py')).read().splitlines()
-        torver = [line for line in torch_version_file if line.startswith('__version__')][0].split('__version__ = ')[1].strip("'")
+
+def save_state(state):
+    """Save installer state to JSON file"""
+    with open(state_file, 'w') as f:
+        json.dump(state, f)
+
+
+def get_gpu_choice():
+    """Get GPU choice from state file or ask user"""
+    state = load_state()
+    gpu_choice = state.get('gpu_choice')
+
+    if not gpu_choice:
+        if "GPU_CHOICE" in os.environ:
+            choice = os.environ["GPU_CHOICE"].upper()
+            print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
+        else:
+            choice = get_user_choice(
+                "What is your GPU?",
+                {
+                    'A': 'NVIDIA - CUDA 12.4',
+                    'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
+                    'C': 'Apple M Series',
+                    'D': 'Intel Arc (beta)',
+                    'E': 'NVIDIA - CUDA 12.8',
+                    'N': 'CPU mode'
+                },
+            )
+
+        # Convert choice to GPU name
+        gpu_choice = {"A": "NVIDIA", "B": "AMD", "C": "APPLE", "D": "INTEL", "E": "NVIDIA_CUDA128", "N": "NONE"}[choice]
+
+        # Save choice to state
+        state['gpu_choice'] = gpu_choice
+        save_state(state)
+
+    return gpu_choice
+
+
+def get_pytorch_install_command(gpu_choice):
+    """Get PyTorch installation command based on GPU choice"""
+    base_cmd = f"python -m pip install torch=={TORCH_VERSION} "
+
+    if gpu_choice == "NVIDIA":
+        return base_cmd + "--index-url https://download.pytorch.org/whl/cu124"
+    elif gpu_choice == "NVIDIA_CUDA128":
+        return "python -m pip install torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
+    elif gpu_choice == "AMD":
+        return base_cmd + "--index-url https://download.pytorch.org/whl/rocm6.2.4"
+    elif gpu_choice in ["APPLE", "NONE"]:
+        return base_cmd + "--index-url https://download.pytorch.org/whl/cpu"
+    elif gpu_choice == "INTEL":
+        if is_linux():
+            return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+        else:
+            return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
     else:
-        from torch import __version__ as torver
+        return base_cmd
 
-    return torver
+
+def get_pytorch_update_command(gpu_choice):
+    """Get PyTorch update command based on GPU choice"""
+    base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} "
+
+    if gpu_choice == "NVIDIA":
+        return f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
+    elif gpu_choice == "NVIDIA_CUDA128":
+        return "python -m pip install --upgrade torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
+    elif gpu_choice == "AMD":
+        return f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
+    elif gpu_choice in ["APPLE", "NONE"]:
+        return f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
+    elif gpu_choice == "INTEL":
+        intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
+        return f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+    else:
+        return base_cmd
+
+
+def get_requirements_file(gpu_choice):
+    """Get requirements file path based on GPU choice"""
+    requirements_base = os.path.join("requirements", "full")
+
+    if gpu_choice == "AMD":
+        file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    elif gpu_choice == "APPLE":
+        file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt"
+    elif gpu_choice in ["INTEL", "NONE"]:
+        file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    elif gpu_choice == "NVIDIA":
+        file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    elif gpu_choice == "NVIDIA_CUDA128":
+        file_name = f"requirements_cuda128{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    else:
+        raise ValueError(f"Unknown GPU choice: {gpu_choice}")
+
+    return os.path.join(requirements_base, file_name)
 
 
 def get_current_commit():
@@ -209,28 +292,8 @@ def get_user_choice(question, options_dict):
 
 def update_pytorch_and_python():
     print_big_message("Checking for PyTorch updates.")
-
-    # Update the Python version. Left here for future reference in case this becomes necessary.
-    # print_big_message("Checking for PyTorch and Python updates.")
-    # current_python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
-    # if current_python_version != PYTHON_VERSION:
-    #     run_cmd(f"conda install -y python={PYTHON_VERSION}", assert_success=True, environment=True)
-
-    torver = torch_version()
-    base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
-
-    if "+cu" in torver:
-        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
-    elif "+rocm" in torver:
-        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
-    elif "+cpu" in torver:
-        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
-    elif "+cxx11" in torver:
-        intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
-        install_cmd = f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
-    else:
-        install_cmd = base_cmd
-
+    gpu_choice = get_gpu_choice()
+    install_cmd = get_pytorch_update_command(gpu_choice)
     run_cmd(install_cmd, assert_success=True, environment=True)
 
 
@@ -256,43 +319,11 @@ def install_webui():
     if os.path.isfile(state_file):
         os.remove(state_file)
 
-    # Ask the user for the GPU vendor
-    if "GPU_CHOICE" in os.environ:
-        choice = os.environ["GPU_CHOICE"].upper()
-        print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
-
-        # Warn about changed meanings and handle old choices
-        if choice == "B":
-            print_big_message("Warning: GPU_CHOICE='B' now means 'AMD' in the new version.")
-        elif choice == "C":
-            print_big_message("Warning: GPU_CHOICE='C' now means 'Apple M Series' in the new version.")
-        elif choice == "D":
-            print_big_message("Warning: GPU_CHOICE='D' now means 'Intel Arc' in the new version.")
-    else:
-        choice = get_user_choice(
-            "What is your GPU?",
-            {
-                'A': 'NVIDIA - CUDA 12.4',
-                'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
-                'C': 'Apple M Series',
-                'D': 'Intel Arc (beta)',
-                'N': 'CPU mode'
-            },
-        )
-
-    # Convert choices to GPU names for compatibility
-    gpu_choice_to_name = {
-        "A": "NVIDIA",
-        "B": "AMD",
-        "C": "APPLE",
-        "D": "INTEL",
-        "N": "NONE"
-    }
-
-    selected_gpu = gpu_choice_to_name[choice]
+    # Get GPU choice and save it to state
+    gpu_choice = get_gpu_choice()
 
     # Write a flag to CMD_FLAGS.txt for CPU mode
-    if selected_gpu == "NONE":
+    if gpu_choice == "NONE":
         cmd_flags_path = os.path.join(script_dir, "user_data", "CMD_FLAGS.txt")
         with open(cmd_flags_path, 'r+') as cmd_flags_file:
             if "--cpu" not in cmd_flags_file.read():
@@ -300,34 +331,22 @@ def install_webui():
                 cmd_flags_file.write("\n--cpu\n")
 
     # Handle CUDA version display
-    elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA":
+    elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA":
         print("CUDA: 12.4")
+    elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA_CUDA128":
+        print("CUDA: 12.8")
 
     # No PyTorch for AMD on Windows (?)
-    elif is_windows() and selected_gpu == "AMD":
+    elif is_windows() and gpu_choice == "AMD":
         print("PyTorch setup on Windows is not implemented yet. Exiting...")
         sys.exit(1)
 
-    # Find the Pytorch installation command
-    install_pytorch = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
-
-    if selected_gpu == "NVIDIA":
-        install_pytorch += "--index-url https://download.pytorch.org/whl/cu124"
-    elif selected_gpu == "AMD":
-        install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.2.4"
-    elif selected_gpu in ["APPLE", "NONE"]:
-        install_pytorch += "--index-url https://download.pytorch.org/whl/cpu"
-    elif selected_gpu == "INTEL":
-        if is_linux():
-            install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
-        else:
-            install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
-
     # Install Git and then Pytorch
     print_big_message("Installing PyTorch.")
+    install_pytorch = get_pytorch_install_command(gpu_choice)
     run_cmd(f"conda install -y ninja git && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True)
 
-    if selected_gpu == "INTEL":
+    if gpu_choice == "INTEL":
         # Install oneAPI dependencies via conda
         print_big_message("Installing Intel oneAPI runtime libraries.")
         run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0", environment=True)
@@ -349,31 +368,15 @@ def update_requirements(initial_installation=False, pull=True):
             assert_success=True
         )
 
-    torver = torch_version()
-    requirements_base = os.path.join("requirements", "full")
-
-    if "+rocm" in torver:
-        file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt"
-    elif "+cpu" in torver or "+cxx11" in torver:
-        file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
-    elif is_macos():
-        file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt"
-    else:
-        file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
-
-    requirements_file = os.path.join(requirements_base, file_name)
-
-    # Load state from JSON file
     current_commit = get_current_commit()
-    wheels_changed = False
-    if os.path.exists(state_file):
-        with open(state_file, 'r') as f:
-            last_state = json.load(f)
-
-        if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit:
+    wheels_changed = not os.path.exists(state_file)
+    if not wheels_changed:
+        state = load_state()
+        if 'wheels_changed' in state or state.get('last_installed_commit') != current_commit:
             wheels_changed = True
-    else:
-        wheels_changed = True
+
+    gpu_choice = get_gpu_choice()
+    requirements_file = get_requirements_file(gpu_choice)
 
     if pull:
         # Read .whl lines before pulling
@@ -409,19 +412,17 @@ def update_requirements(initial_installation=False, pull=True):
                 print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.")
 
                 # Save state before exiting
-                current_state = {}
+                state = load_state()
                 if wheels_changed:
-                    current_state['wheels_changed'] = True
-
-                with open(state_file, 'w') as f:
-                    json.dump(current_state, f)
-
+                    state['wheels_changed'] = True
+                save_state(state)
                 sys.exit(1)
 
     # Save current state
-    current_state = {'last_installed_commit': current_commit}
-    with open(state_file, 'w') as f:
-        json.dump(current_state, f)
+    state = load_state()
+    state['last_installed_commit'] = current_commit
+    state.pop('wheels_changed', None)  # Remove wheels_changed flag
+    save_state(state)
 
     if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
         install_extensions_requirements()
@@ -432,11 +433,10 @@ def update_requirements(initial_installation=False, pull=True):
     # Update PyTorch
     if not initial_installation:
         update_pytorch_and_python()
-        torver = torch_version()
         clean_outdated_pytorch_cuda_dependencies()
 
     print_big_message(f"Installing webui requirements from file: {requirements_file}")
-    print(f"TORCH: {torver}\n")
+    print(f"GPU Choice: {gpu_choice}\n")
 
     # Prepare the requirements file
     textgen_requirements = open(requirements_file).read().splitlines()
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 2c322715..a71e5240 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -1,5 +1,4 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 bitsandbytes==0.45.*
 colorama
 datasets
@@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@@ -16,6 +16,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -33,12 +34,12 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index 6aeb325e..db1ead1a 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -32,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 3b052423..a08aa392 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -32,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 8c51459e..fa217c3e 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -32,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index b9f15d45..52581f1a 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -32,8 +33,8 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 0877d968..b72f22aa 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -32,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index cab78237..e8de6057 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -32,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt
new file mode 100644
index 00000000..7851041f
--- /dev/null
+++ b/requirements/full/requirements_cuda128.txt
@@ -0,0 +1,45 @@
+accelerate==1.5.*
+bitsandbytes==0.45.*
+colorama
+datasets
+duckduckgo_search==8.0.2
+einops
+fastapi==0.112.4
+gradio==4.37.*
+html2text==2025.4.15
+jinja2==3.1.6
+markdown
+numpy==2.2.*
+pandas
+peft==0.15.*
+Pillow>=9.5.0
+psutil
+pydantic==2.8.2
+PyPDF2==3.0.1
+python-docx==1.1.2
+pyyaml
+requests
+rich
+safetensors==0.5.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.50.*
+tqdm
+wandb
+
+# API
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
+# CUDA wheels
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt
new file mode 100644
index 00000000..c8015166
--- /dev/null
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@@ -0,0 +1,45 @@
+accelerate==1.5.*
+bitsandbytes==0.45.*
+colorama
+datasets
+duckduckgo_search==8.0.2
+einops
+fastapi==0.112.4
+gradio==4.37.*
+html2text==2025.4.15
+jinja2==3.1.6
+markdown
+numpy==2.2.*
+pandas
+peft==0.15.*
+Pillow>=9.5.0
+psutil
+pydantic==2.8.2
+PyPDF2==3.0.1
+python-docx==1.1.2
+pyyaml
+requests
+rich
+safetensors==0.5.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.50.*
+tqdm
+wandb
+
+# API
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
+# CUDA wheels
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index dfd42577..5e81ce1f 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -1,5 +1,4 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 bitsandbytes==0.45.*
 colorama
 datasets
@@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@@ -16,6 +16,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -33,12 +34,12 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 5d9f84ce..d26663a7 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index fdae681d..4ddcf43f 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index a58f39f7..38a21618 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 91ea3a6d..0b70c800 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -18,6 +19,6 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 37e5aa40..510a20f4 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index dcb2884b..e6d9f0c5 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index 8f1295bb..48f92e0a 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index 21805fe2..3d30e6d6 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 858b4488..9f93424f 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 569bae99..9070b9a6 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/server.py b/server.py
index c22ed1f1..e0e3fbe5 100644
--- a/server.py
+++ b/server.py
@@ -1,12 +1,24 @@
 import os
+import shutil
 import warnings
+from pathlib import Path
 
 from modules import shared
 from modules.block_requests import OpenMonkeyPatch, RequestBlocker
 from modules.logging_colors import logger
 
-os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
-os.environ['BITSANDBYTES_NOWELCOME'] = '1'
+# Set up Gradio temp directory path
+gradio_temp_path = Path('user_data') / 'cache' / 'gradio'
+shutil.rmtree(gradio_temp_path, ignore_errors=True)
+gradio_temp_path.mkdir(parents=True, exist_ok=True)
+
+# Set environment variables
+os.environ.update({
+    'GRADIO_ANALYTICS_ENABLED': 'False',
+    'BITSANDBYTES_NOWELCOME': '1',
+    'GRADIO_TEMP_DIR': str(gradio_temp_path)
+})
+
 warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
 warnings.filterwarnings('ignore', category=UserWarning, message='Using the update method is deprecated')
 warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_name" has conflict')
@@ -27,7 +39,6 @@ import signal
 import sys
 import time
 from functools import partial
-from pathlib import Path
 from threading import Lock, Thread
 
 import yaml
@@ -45,6 +56,7 @@ from modules import (
     ui_session,
     utils
 )
+from modules.chat import generate_pfp_cache
 from modules.extensions import apply_extensions
 from modules.LoRA import add_lora_to_model
 from modules.models import load_model, unload_model_if_idle
@@ -60,6 +72,14 @@ from modules.utils import gradio
 
 def signal_handler(sig, frame):
     logger.info("Received Ctrl+C. Shutting down Text generation web UI gracefully.")
+
+    # Explicitly stop LlamaServer to avoid __del__ cleanup issues during shutdown
+    if shared.model and shared.model.__class__.__name__ == 'LlamaServer':
+        try:
+            shared.model.stop()
+        except:
+            pass
+
     sys.exit(0)
 
 
@@ -85,17 +105,20 @@ def create_interface():
 
     # Force some events to be triggered on page load
     shared.persistent_interface_state.update({
+        'mode': shared.settings['mode'],
         'loader': shared.args.loader or 'llama.cpp',
-        'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(),
-        'character_menu': shared.args.character or shared.settings['character'],
-        'instruction_template_str': shared.settings['instruction_template_str'],
-        'prompt_menu-default': shared.settings['prompt-default'],
-        'prompt_menu-notebook': shared.settings['prompt-notebook'],
         'filter_by_loader': (shared.args.loader or 'All') if not shared.args.portable else 'llama.cpp'
     })
 
-    if Path("user_data/cache/pfp_character.png").exists():
-        Path("user_data/cache/pfp_character.png").unlink()
+    # Clear existing cache files
+    for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
+        cache_path = Path(f"user_data/cache/{cache_file}")
+        if cache_path.exists():
+            cache_path.unlink()
+
+    # Regenerate for default character
+    if shared.settings['mode'] != 'instruct':
+        generate_pfp_cache(shared.settings['character'])
 
     # css/js strings
     css = ui.css
@@ -126,7 +149,7 @@ def create_interface():
         ui_default.create_ui()
         ui_notebook.create_ui()
 
-        ui_parameters.create_ui(shared.settings['preset'])  # Parameters tab
+        ui_parameters.create_ui()  # Parameters tab
         ui_model_menu.create_ui()  # Model tab
         if not shared.args.portable:
             training.create_ui()  # Training tab
@@ -142,17 +165,35 @@ def create_interface():
         ui_parameters.create_event_handlers()
         ui_model_menu.create_event_handlers()
 
+        # UI persistence events
+        ui.setup_auto_save()
+
         # Interface launch events
         shared.gradio['interface'].load(
             None,
             gradio('show_controls'),
             None,
             js=f"""(x) => {{
-                if ({str(shared.settings['dark_theme']).lower()}) {{
-                    document.getElementsByTagName('body')[0].classList.add('dark');
-                }}
-                else {{
-                    document.getElementsByTagName('body')[0].classList.remove('dark');
+                // Check if this is first visit or if localStorage is out of sync
+                const savedTheme = localStorage.getItem('theme');
+                const serverTheme = {str(shared.settings['dark_theme']).lower()} ? 'dark' : 'light';
+
+                // If no saved theme or mismatch with server on first load, use server setting
+                if (!savedTheme || !sessionStorage.getItem('theme_synced')) {{
+                    localStorage.setItem('theme', serverTheme);
+                    sessionStorage.setItem('theme_synced', 'true');
+                    if (serverTheme === 'dark') {{
+                        document.getElementsByTagName('body')[0].classList.add('dark');
+                    }} else {{
+                        document.getElementsByTagName('body')[0].classList.remove('dark');
+                    }}
+                }} else {{
+                    // Use localStorage for subsequent reloads
+                    if (savedTheme === 'dark') {{
+                        document.getElementsByTagName('body')[0].classList.add('dark');
+                    }} else {{
+                        document.getElementsByTagName('body')[0].classList.remove('dark');
+                    }}
                 }}
                 {js}
                 {ui.show_controls_js}
@@ -208,13 +249,7 @@ if __name__ == "__main__":
     shared.model_config['.*'] = get_fallback_settings()
     shared.model_config.move_to_end('.*', last=False)  # Move to the beginning
 
-    # Activate the extensions listed on settings.yaml
     extensions_module.available_extensions = utils.get_available_extensions()
-    for extension in shared.settings['default_extensions']:
-        shared.args.extensions = shared.args.extensions or []
-        if extension not in shared.args.extensions:
-            shared.args.extensions.append(extension)
-
     available_models = utils.get_available_models()
 
     # Model defined through --model
@@ -277,8 +312,8 @@ if __name__ == "__main__":
 
     if shared.args.nowebui:
         # Start the API in standalone mode
-        shared.args.extensions = [x for x in shared.args.extensions if x != 'gallery']
-        if shared.args.extensions is not None and len(shared.args.extensions) > 0:
+        shared.args.extensions = [x for x in (shared.args.extensions or []) if x != 'gallery']
+        if shared.args.extensions:
             extensions_module.load_extensions()
     else:
         # Launch the web UI
diff --git a/user_data/presets/Contrastive Search.yaml b/user_data/presets/Contrastive Search.yaml
deleted file mode 100644
index d9a47a9f..00000000
--- a/user_data/presets/Contrastive Search.yaml	
+++ /dev/null
@@ -1,3 +0,0 @@
-do_sample: false
-top_k: 4
-penalty_alpha: 0.3
diff --git a/user_data/presets/Null preset.yaml b/user_data/presets/Null preset.yaml
deleted file mode 100644
index 714aa9a3..00000000
--- a/user_data/presets/Null preset.yaml	
+++ /dev/null
@@ -1 +0,0 @@
-temperature: 1
diff --git a/user_data/presets/Qwen3 - No Thinking.yaml b/user_data/presets/Qwen3 - No Thinking.yaml
new file mode 100644
index 00000000..b1c1e03c
--- /dev/null
+++ b/user_data/presets/Qwen3 - No Thinking.yaml	
@@ -0,0 +1,3 @@
+temperature: 0.7
+top_p: 0.8
+top_k: 20
diff --git a/user_data/presets/Qwen3 - Thinking.yaml b/user_data/presets/Qwen3 - Thinking.yaml
new file mode 100644
index 00000000..cb2942f9
--- /dev/null
+++ b/user_data/presets/Qwen3 - Thinking.yaml	
@@ -0,0 +1,3 @@
+temperature: 0.6
+top_p: 0.95
+top_k: 20
diff --git a/user_data/settings-template.yaml b/user_data/settings-template.yaml
deleted file mode 100644
index ce0f77e1..00000000
--- a/user_data/settings-template.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-show_controls: true
-start_with: ''
-mode: instruct
-chat_style: cai-chat
-chat-instruct_command: |-
-  Continue the chat dialogue below. Write a single reply for the character "<|character|>".
-
-  <|prompt|>
-prompt-default: QA
-prompt-notebook: QA
-character: Assistant
-name1: You
-user_bio: ''
-custom_system_message: ''
-preset: min_p
-max_new_tokens: 512
-max_new_tokens_min: 1
-max_new_tokens_max: 4096
-prompt_lookup_num_tokens: 0
-max_tokens_second: 0
-max_updates_second: 12
-auto_max_new_tokens: true
-ban_eos_token: false
-add_bos_token: true
-enable_thinking: true
-skip_special_tokens: true
-stream: true
-static_cache: false
-truncation_length: 8192
-seed: -1
-custom_stopping_strings: ''
-custom_token_bans: ''
-negative_prompt: ''
-dark_theme: true
-default_extensions: []
-instruction_template_str: |-
-  {%- set ns = namespace(found=false) -%}
-  {%- for message in messages -%}
-      {%- if message['role'] == 'system' -%}
-          {%- set ns.found = true -%}
-      {%- endif -%}
-  {%- endfor -%}
-  {%- if not ns.found -%}
-      {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\n\n' -}}
-  {%- endif %}
-  {%- for message in messages %}
-      {%- if message['role'] == 'system' -%}
-          {{- '' + message['content'] + '\n\n' -}}
-      {%- else -%}
-          {%- if message['role'] == 'user' -%}
-              {{-'### Instruction:\n' + message['content'] + '\n\n'-}}
-          {%- else -%}
-              {{-'### Response:\n' + message['content'] + '\n\n' -}}
-          {%- endif -%}
-      {%- endif -%}
-  {%- endfor -%}
-  {%- if add_generation_prompt -%}
-      {{-'### Response:\n'-}}
-  {%- endif -%}
-chat_template_str: |-
-  {%- for message in messages %}
-      {%- if message['role'] == 'system' -%}
-          {%- if message['content'] -%}
-              {{- message['content'] + '\n\n' -}}
-          {%- endif -%}
-          {%- if user_bio -%}
-              {{- user_bio + '\n\n' -}}
-          {%- endif -%}
-      {%- else -%}
-          {%- if message['role'] == 'user' -%}
-              {{- name1 + ': ' + message['content'] + '\n'-}}
-          {%- else -%}
-              {{- name2 + ': ' + message['content'] + '\n' -}}
-          {%- endif -%}
-      {%- endif -%}
-  {%- endfor -%}
-