Merge pull request #7057 from oobabooga/dev

Merge dev branch
2026-03-17 10:54:40 +01:00 · 2025-06-10 23:08:44 -03:00 · 2025-06-10 23:08:44 -03:00 · 1e96dcf369
parent ae61c1a0f4 552cb09f09
commit 1e96dcf369
55 changed files with 1621 additions and 906 deletions
--- a/.github/workflows/build-portable-release-cuda.yml
+++ b/.github/workflows/build-portable-release-cuda.yml
@ -160,16 +160,19 @@ jobs:
                rm requirements_cuda_temp.txt
            fi

-            # 6. Create ZIP file
+            # 6. Move up and rename folder to include version
            cd ..
            VERSION_CLEAN="${VERSION#v}"
+            mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
+
+            # 7. Create ZIP file
            ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
            echo "Creating archive: $ZIP_NAME"

            if [[ "$RUNNER_OS" == "Windows" ]]; then
-                powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
+                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
            else
-                zip -r "$ZIP_NAME" text-generation-webui
+                zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
            fi

      - name: Upload files to a GitHub release
--- a/.github/workflows/build-portable-release-vulkan.yml
+++ b/.github/workflows/build-portable-release-vulkan.yml
@ -146,16 +146,19 @@ jobs:
            echo "Installing Python packages from $REQ_FILE..."
            $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"

-            # 6. Create ZIP file
+            # 5. Move up and rename folder to include version
            cd ..
            VERSION_CLEAN="${VERSION#v}"
+            mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
+
+            # 6. Create ZIP file
            ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.zip"
            echo "Creating archive: $ZIP_NAME"

            if [[ "$RUNNER_OS" == "Windows" ]]; then
-                powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
+                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
            else
-                zip -r "$ZIP_NAME" text-generation-webui
+                zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
            fi

      - name: Upload files to a GitHub release
--- a/.github/workflows/build-portable-release.yml
+++ b/.github/workflows/build-portable-release.yml
@ -170,16 +170,19 @@ jobs:
            echo "Installing Python packages from $REQ_FILE..."
            $PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"

-            # 5. Create ZIP file
+            # 5. Move up and rename folder to include version
            cd ..
            VERSION_CLEAN="${VERSION#v}"
+            mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
+
+            # 6. Create ZIP file
            ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}.zip"
            echo "Creating archive: $ZIP_NAME"

            if [[ "$RUNNER_OS" == "Windows" ]]; then
-                powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
+                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
            else
-                zip -r "$ZIP_NAME" text-generation-webui
+                zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
            fi

      - name: Upload files to a GitHub release
--- a/README.md
+++ b/README.md
@ -16,7 +16,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory.
 - 100% offline and private, with zero telemetry, external resources, or remote update requests.
 - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats.
- **File attachments**: Upload text files and PDF documents to talk about their contents.
+- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
 - **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
 - Aesthetic UI with dark and light themes.
 - `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters.
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@ -17,6 +17,14 @@
    color: #d1d5db !important;
 }

+.chat .message-body :is(th, td) {
+    border-color: #40404096 !important;
+}
+
+.dark .chat .message-body :is(th, td) {
+    border-color: #ffffff75 !important;
+}
+
 .chat .message-body :is(p, ul, ol) {
    margin: 1.25em 0 !important;
 }
--- a/css/main.css
+++ b/css/main.css
@ -1,11 +1,11 @@
 :root {
-    --darker-gray: #202123;
-    --dark-gray: #2A2B32;
-    --light-gray: #373943;
+    --darker-gray: #1C1C1D;
+    --dark-gray: #212125;
+    --light-gray: #2C2E34;
    --light-theme-gray: #f9fbff;
    --border-color-dark: #525252;
    --header-width: 112px;
-    --selected-item-color-dark: #2E2F38;
+    --selected-item-color-dark: #282930;
 }

@font-face {
@ -53,7 +53,7 @@ div.svelte-iyf88w {
 }

 .refresh-button {
-    max-width: 4.4em;
+    max-width: none;
    min-width: 2.2em !important;
    height: 39.594px;
    align-self: end;
@ -62,6 +62,10 @@ div.svelte-iyf88w {
    flex: none;
 }

+.refresh-button-medium {
+    max-width: 4.4em;
+}
+
 .refresh-button-small {
    max-width: 2.2em;
 }
@ -265,7 +269,7 @@ button {

 .dark .pretty_scrollbar::-webkit-scrollbar-thumb,
 .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
-    background: rgb(255 255 255 / 10%);
+    background: rgb(255 255 255 / 6.25%);
    border-radius: 10px;
 }

@ -582,7 +586,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {

 #chat-input {
    padding: 0;
-    padding-top: 18px;
    background: transparent;
    border: none;
 }
@ -661,37 +664,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
    }
 }

-#show-controls {
-    position: absolute;
-    background-color: transparent;
-    border: 0 !important;
-    border-radius: 0;
-}
-
-#show-controls label {
-    z-index: 1000;
-    position: absolute;
-    right: 30px;
-    top: 10px;
-    white-space: nowrap;
-    overflow: hidden;
-    text-overflow: ellipsis;
-}
-
-.dark #show-controls span {
-    color: var(--neutral-400);
-}
-
-#show-controls span {
-    color: var(--neutral-600);
-}
-
 #typing-container {
    display: none;
    position: absolute;
    background-color: transparent;
-    left: -2px;
-    top: 4px;
+    left: 23px;
+    top: -5px;
    padding: var(--block-padding);
 }

@ -767,16 +745,13 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
    justify-content: space-between;
    margin: 0 !important;
    height: 36px;
+    border-color: transparent !important;
 }

 .hover-menu button:not(#clear-history-confirm) {
    border-bottom: 0 !important;
 }

-.hover-menu button:not(#clear-history-confirm):last-child {
-    border-bottom: var(--button-border-width) solid var(--border-color-primary) !important;
-}
-
 .hover-menu button:hover {
    background: #dbeafe !important;
 }
@ -785,6 +760,37 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
    background: var(--selected-item-color-dark) !important;
 }

+#show-controls {
+    background-color: white;
+    border-color: transparent !important;
+    height: 36px;
+    border-radius: 0;
+    border-bottom: 0 !important;
+    padding-top: 3px;
+    padding-left: 4px;
+    display: flex;
+    font-weight: normal;
+}
+
+.dark #show-controls {
+    background-color: var(--darker-gray);
+}
+
+#show-controls label {
+    display: flex;
+    flex-direction: row-reverse;
+    justify-content: start;
+    width: 100%;
+    padding-right: 12px;
+    gap: 10px;
+    font-weight: 600;
+    color: var(--button-secondary-text-color);
+}
+
+#show-controls label input {
+    margin-top: 4px;
+}
+
 .transparent-substring {
    opacity: 0.333;
 }
@ -1326,8 +1332,13 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
    overflow: hidden;
 }

+.thinking-content:focus, .thinking-header:focus {
+    outline: 0 !important;
+}
+
 .dark .thinking-block {
-    background-color: var(--darker-gray);
+    background-color: transparent;
+    border: 1px solid var(--input-border-color);
 }

 .thinking-header {
@ -1555,3 +1566,66 @@ strong {
 button:focus {
    outline: none;
 }
+
+/* Fix extra gaps for hidden elements on the right sidebar */
+.svelte-sa48pu.stretch:has(> .hidden:only-child) {
+    display: none;
+}
+
+.delete-container {
+    position: absolute;
+    right: 8px;
+    display: flex;
+    gap: 6px;
+    opacity: 0;
+    transition: opacity 0.2s;
+    margin-left: 0;
+}
+
+.chat-label-with-delete {
+    position: relative;
+    padding-right: 60px;
+}
+
+.trash-btn {
+    border: none;
+    background: none;
+    cursor: pointer;
+    padding: 2px;
+    opacity: 0.7;
+}
+
+.cancel-btn {
+    border: none;
+    background: #ef4444;
+    color: white;
+    cursor: pointer;
+    width: 20px;
+    height: 20px;
+    border-radius: 2px;
+    font-family: monospace;
+    font-size: 12px;
+    align-items: center;
+    justify-content: center;
+    display: none;
+}
+
+.confirm-btn {
+    border: none;
+    background: #22c55e;
+    color: white;
+    cursor: pointer;
+    width: 20px;
+    height: 20px;
+    border-radius: 2px;
+    font-family: monospace;
+    font-size: 12px;
+    align-items: center;
+    justify-content: center;
+    display: none;
+}
+
+/* Disable hover effects while scrolling */
+.chat-parent.scrolling * {
+    pointer-events: none !important;
+}
--- a/download-model.py
+++ b/download-model.py
@ -32,6 +32,7 @@ class ModelDownloader:
        self.max_retries = max_retries
        self.session = self.get_session()
        self._progress_bar_slots = None
+        self.progress_queue = None

    def get_session(self):
        session = requests.Session()
@ -218,33 +219,45 @@ class ModelDownloader:

        max_retries = self.max_retries
        attempt = 0
+        file_downloaded_count_for_progress = 0
+
        try:
            while attempt < max_retries:
                attempt += 1
                session = self.session
                headers = {}
                mode = 'wb'
+                current_file_size_on_disk = 0

                try:
                    if output_path.exists() and not start_from_scratch:
-                        # Resume download
-                        r = session.get(url, stream=True, timeout=20)
-                        total_size = int(r.headers.get('content-length', 0))
-                        if output_path.stat().st_size >= total_size:
+                        current_file_size_on_disk = output_path.stat().st_size
+                        r_head = session.head(url, timeout=20)
+                        r_head.raise_for_status()
+                        total_size = int(r_head.headers.get('content-length', 0))
+
+                        if current_file_size_on_disk >= total_size and total_size > 0:
+                            if self.progress_queue is not None and total_size > 0:
+                                self.progress_queue.put((1.0, str(filename)))
                            return

-                        headers = {'Range': f'bytes={output_path.stat().st_size}-'}
+                        headers = {'Range': f'bytes={current_file_size_on_disk}-'}
                        mode = 'ab'

                    with session.get(url, stream=True, headers=headers, timeout=30) as r:
-                        r.raise_for_status()  # If status is not 2xx, raise an error
-                        total_size = int(r.headers.get('content-length', 0))
-                        block_size = 1024 * 1024  # 1MB
+                        r.raise_for_status()
+                        total_size_from_stream = int(r.headers.get('content-length', 0))
+                        if mode == 'ab':
+                            effective_total_size = current_file_size_on_disk + total_size_from_stream
+                        else:
+                            effective_total_size = total_size_from_stream

-                        filename_str = str(filename)  # Convert PosixPath to string if necessary
+                        block_size = 1024 * 1024
+                        filename_str = str(filename)

                        tqdm_kwargs = {
-                            'total': total_size,
+                            'total': effective_total_size,
+                            'initial': current_file_size_on_disk if mode == 'ab' else 0,
                            'unit': 'B',
                            'unit_scale': True,
                            'unit_divisor': 1024,
@ -261,16 +274,20 @@ class ModelDownloader:
                            })

                        with open(output_path, mode) as f:
+                            if mode == 'ab':
+                                f.seek(current_file_size_on_disk)
+
                            with tqdm.tqdm(**tqdm_kwargs) as t:
-                                count = 0
+                                file_downloaded_count_for_progress = current_file_size_on_disk
                                for data in r.iter_content(block_size):
                                    f.write(data)
                                    t.update(len(data))
-                                    if total_size != 0 and self.progress_bar is not None:
-                                        count += len(data)
-                                        self.progress_bar(float(count) / float(total_size), f"{filename_str}")
+                                    if effective_total_size != 0 and self.progress_queue is not None:
+                                        file_downloaded_count_for_progress += len(data)
+                                        progress_fraction = float(file_downloaded_count_for_progress) / float(effective_total_size)
+                                        self.progress_queue.put((progress_fraction, filename_str))
+                        break

-                        break  # Exit loop if successful
                except (RequestException, ConnectionError, Timeout) as e:
                    print(f"Error downloading {filename}: {e}.")
                    print(f"That was attempt {attempt}/{max_retries}.", end=' ')
@ -295,10 +312,9 @@ class ModelDownloader:
        finally:
            print(f"\nDownload of {len(file_list)} files to {output_folder} completed.")

-    def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
-        self.progress_bar = progress_bar
+    def download_model_files(self, model, branch, links, sha256, output_folder, progress_queue=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
+        self.progress_queue = progress_queue

-        # Create the folder and writing the metadata
        output_folder.mkdir(parents=True, exist_ok=True)

        if not is_llamacpp:
--- a/js/dark_theme.js
+++ b/js/dark_theme.js
@ -6,4 +6,15 @@ function toggleDarkMode() {
  } else {
    currentCSS.setAttribute("href", "file/css/highlightjs/github-dark.min.css");
  }
+
+  // Re-highlight all code blocks once stylesheet loads
+  currentCSS.onload = function() {
+    const messageBodies = document.getElementById("chat").querySelectorAll(".message-body");
+    messageBodies.forEach((messageBody) => {
+      const codeBlocks = messageBody.querySelectorAll("pre code");
+      codeBlocks.forEach((codeBlock) => {
+        hljs.highlightElement(codeBlock);
+      });
+    });
+  };
 }
--- a/js/global_scope_js.js
+++ b/js/global_scope_js.js
@ -95,6 +95,21 @@ function startEditing(messageElement, messageBody, isUserMessage) {
  editingInterface.textarea.focus();
  editingInterface.textarea.setSelectionRange(rawText.length, rawText.length);

+  // Temporarily mark as scrolled to prevent auto-scroll
+  const wasScrolled = window.isScrolled;
+  window.isScrolled = true;
+
+  // Scroll the textarea into view
+  editingInterface.textarea.scrollIntoView({
+    behavior: "smooth",
+    block: "center"
+  });
+
+  // Restore the original scroll state after animation
+  setTimeout(() => {
+    window.isScrolled = wasScrolled;
+  }, 500);
+
  // Setup event handlers
  setupEditingHandlers(editingInterface.textarea, messageElement, originalHTML, messageBody, isUserMessage);
 }
@ -229,10 +244,23 @@ function removeLastClick() {
  document.getElementById("Remove-last").click();
 }

-function handleMorphdomUpdate(text) {
+function handleMorphdomUpdate(data) {
+  // Determine target element and use it as query scope
+  var target_element, target_html;
+  if (data.last_message_only) {
+    const childNodes = document.getElementsByClassName("messages")[0].childNodes;
+    target_element = childNodes[childNodes.length - 1];
+    target_html = data.html;
+  } else {
+    target_element = document.getElementById("chat").parentNode;
+    target_html =  "<div class=\"prose svelte-1ybaih5\">" + data.html + "</div>";
+  }
+
+  const queryScope = target_element;
+
  // Track open blocks
  const openBlocks = new Set();
-  document.querySelectorAll(".thinking-block").forEach(block => {
+  queryScope.querySelectorAll(".thinking-block").forEach(block => {
    const blockId = block.getAttribute("data-block-id");
    // If block exists and is open, add to open set
    if (blockId && block.hasAttribute("open")) {
@ -242,7 +270,7 @@ function handleMorphdomUpdate(text) {

  // Store scroll positions for any open blocks
  const scrollPositions = {};
-  document.querySelectorAll(".thinking-block[open]").forEach(block => {
+  queryScope.querySelectorAll(".thinking-block[open]").forEach(block => {
    const content = block.querySelector(".thinking-content");
    const blockId = block.getAttribute("data-block-id");
    if (content && blockId) {
@ -255,8 +283,8 @@ function handleMorphdomUpdate(text) {
  });

  morphdom(
-    document.getElementById("chat").parentNode,
-    "<div class=\"prose svelte-1ybaih5\">" + text + "</div>",
+    target_element,
+    target_html,
    {
      onBeforeElUpdated: function(fromEl, toEl) {
        // Preserve code highlighting
@ -307,7 +335,7 @@ function handleMorphdomUpdate(text) {
  );

  // Add toggle listeners for new blocks
-  document.querySelectorAll(".thinking-block").forEach(block => {
+  queryScope.querySelectorAll(".thinking-block").forEach(block => {
    if (!block._hasToggleListener) {
      block.addEventListener("toggle", function(e) {
        if (this.open) {
--- a/js/main.js
+++ b/js/main.js
@ -145,17 +145,26 @@ typingSibling.insertBefore(typing, typingSibling.childNodes[2]);
 const targetElement = document.getElementById("chat").parentNode.parentNode.parentNode;
 targetElement.classList.add("pretty_scrollbar");
 targetElement.classList.add("chat-parent");
-let isScrolled = false;
+window.isScrolled = false;
+let scrollTimeout;

 targetElement.addEventListener("scroll", function() {
+  // Add scrolling class to disable hover effects
+  targetElement.classList.add("scrolling");
+
  let diff = targetElement.scrollHeight - targetElement.clientHeight;
  if(Math.abs(targetElement.scrollTop - diff) <= 10 || diff == 0) {
-    isScrolled = false;
+    window.isScrolled = false;
  } else {
-    isScrolled = true;
+    window.isScrolled = true;
  }

-  doSyntaxHighlighting();
+  // Clear previous timeout and set new one
+  clearTimeout(scrollTimeout);
+  scrollTimeout = setTimeout(() => {
+    targetElement.classList.remove("scrolling");
+    doSyntaxHighlighting(); // Only run after scrolling stops
+  }, 150);

 });

@ -173,7 +182,7 @@ const observer = new MutationObserver(function(mutations) {

  doSyntaxHighlighting();

-  if (!isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) {
+  if (!window.isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) {
    targetElement.scrollTop = targetElement.scrollHeight;
  }

@ -184,7 +193,7 @@ const observer = new MutationObserver(function(mutations) {
    const prevSibling = lastChild?.previousElementSibling;
    if (lastChild && prevSibling) {
      lastChild.style.setProperty("margin-bottom",
-        `max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px) - ${lastChild.offsetHeight}px))`,
+        `max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 84px) - ${lastChild.offsetHeight}px))`,
        "important"
      );
    }
@ -217,7 +226,7 @@ function isElementVisibleOnScreen(element) {
 }

 function doSyntaxHighlighting() {
-  const messageBodies = document.querySelectorAll(".message-body");
+  const messageBodies = document.getElementById("chat").querySelectorAll(".message-body");

  if (messageBodies.length > 0) {
    observer.disconnect();
@ -229,6 +238,7 @@ function doSyntaxHighlighting() {
        codeBlocks.forEach((codeBlock) => {
          hljs.highlightElement(codeBlock);
          codeBlock.setAttribute("data-highlighted", "true");
+          codeBlock.classList.add("pretty_scrollbar");
        });

        renderMathInElement(messageBody, {
@ -277,7 +287,7 @@ for (i = 0; i < slimDropdownElements.length; i++) {
 // The show/hide events were adapted from:
 // https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js
 //------------------------------------------------
-var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button");
+var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button, #chat-tab #chat-buttons #show-controls");
 var button = document.getElementById("hover-element-button");
 var menu = document.getElementById("hover-menu");
 var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement;
@ -298,18 +308,21 @@ if (buttonsInChat.length > 0) {
    const thisButton = buttonsInChat[i];
    menu.appendChild(thisButton);

-    thisButton.addEventListener("click", () => {
-      hideMenu();
-    });
+    // Only apply transformations to button elements
+    if (thisButton.tagName.toLowerCase() === "button") {
+      thisButton.addEventListener("click", () => {
+        hideMenu();
+      });

-    const buttonText = thisButton.textContent;
-    const matches = buttonText.match(/(\(.*?\))/);
+      const buttonText = thisButton.textContent;
+      const matches = buttonText.match(/(\(.*?\))/);

-    if (matches && matches.length > 1) {
-      // Apply the transparent-substring class to the matched substring
-      const substring = matches[1];
-      const newText = buttonText.replace(substring, `&nbsp;<span class="transparent-substring">${substring.slice(1, -1)}</span>`);
-      thisButton.innerHTML = newText;
+      if (matches && matches.length > 1) {
+        // Apply the transparent-substring class to the matched substring
+        const substring = matches[1];
+        const newText = buttonText.replace(substring, `&nbsp;<span class="transparent-substring">${substring.slice(1, -1)}</span>`);
+        thisButton.innerHTML = newText;
+      }
    }
  }
 }
@ -382,21 +395,10 @@ document.addEventListener("click", function (event) {
  }
 });

-//------------------------------------------------
-// Relocate the "Show controls" checkbox
-//------------------------------------------------
-var elementToMove = document.getElementById("show-controls");
-var parent = elementToMove.parentNode;
-for (var i = 0; i < 2; i++) {
-  parent = parent.parentNode;
-}
-
-parent.insertBefore(elementToMove, parent.firstChild);
-
 //------------------------------------------------
 // Position the chat input
 //------------------------------------------------
-document.getElementById("show-controls").parentNode.classList.add("chat-input-positioned");
+document.getElementById("chat-input-row").classList.add("chat-input-positioned");

 //------------------------------------------------
 // Focus on the chat input
@ -562,6 +564,7 @@ function moveToChatTab() {

  newParent.insertBefore(grandParent, newParent.children[newPosition]);
  document.getElementById("save-character").style.display = "none";
+  document.getElementById("restore-character").style.display = "none";
 }

 function restoreOriginalPosition() {
@ -573,6 +576,7 @@ function restoreOriginalPosition() {
    }

    document.getElementById("save-character").style.display = "";
+    document.getElementById("restore-character").style.display = "";
    movedElement.style.display = "";
    movedElement.children[0].style.minWidth = "";
  }
@ -872,3 +876,123 @@ function navigateLastAssistantMessage(direction) {

  return false;
 }
+
+//------------------------------------------------
+// Paste Handler for Long Text
+//------------------------------------------------
+
+const MAX_PLAIN_TEXT_LENGTH = 2500;
+
+function setupPasteHandler() {
+  const textbox = document.querySelector("#chat-input textarea[data-testid=\"textbox\"]");
+  const fileInput = document.querySelector("#chat-input input[data-testid=\"file-upload\"]");
+
+  if (!textbox || !fileInput) {
+    setTimeout(setupPasteHandler, 500);
+    return;
+  }
+
+  textbox.addEventListener("paste", async (event) => {
+    const text = event.clipboardData?.getData("text");
+
+    if (text && text.length > MAX_PLAIN_TEXT_LENGTH && document.querySelector("#paste_to_attachment input[data-testid=\"checkbox\"]")?.checked) {
+      event.preventDefault();
+
+      const file = new File([text], "pasted_text.txt", {
+        type: "text/plain",
+        lastModified: Date.now()
+      });
+
+      const dataTransfer = new DataTransfer();
+      dataTransfer.items.add(file);
+      fileInput.files = dataTransfer.files;
+      fileInput.dispatchEvent(new Event("change", { bubbles: true }));
+    }
+  });
+}
+
+if (document.readyState === "loading") {
+  document.addEventListener("DOMContentLoaded", setupPasteHandler);
+} else {
+  setupPasteHandler();
+}
+
+//------------------------------------------------
+// Tooltips
+//------------------------------------------------
+
+// File upload button
+document.querySelector("#chat-input .upload-button").title = "Upload text files, PDFs, and DOCX documents";
+
+// Activate web search
+document.getElementById("web-search").title = "Search the internet with DuckDuckGo";
+
+//------------------------------------------------
+// Inline icons for deleting past chats
+//------------------------------------------------
+
+function addMiniDeletes() {
+  document.querySelectorAll("#past-chats label:not(.has-delete)").forEach(label => {
+    const container = document.createElement("span");
+    container.className = "delete-container";
+
+    label.classList.add("chat-label-with-delete");
+
+    const trashBtn = document.createElement("button");
+    trashBtn.innerHTML = "🗑️";
+    trashBtn.className = "trash-btn";
+
+    const cancelBtn = document.createElement("button");
+    cancelBtn.innerHTML = "✕";
+    cancelBtn.className = "cancel-btn";
+
+    const confirmBtn = document.createElement("button");
+    confirmBtn.innerHTML = "✓";
+    confirmBtn.className = "confirm-btn";
+
+    label.addEventListener("mouseenter", () => {
+      container.style.opacity = "1";
+    });
+
+    label.addEventListener("mouseleave", () => {
+      container.style.opacity = "0";
+    });
+
+    trashBtn.onclick = (e) => {
+      e.stopPropagation();
+      label.querySelector("input").click();
+      document.querySelector("#delete_chat").click();
+      trashBtn.style.display = "none";
+      cancelBtn.style.display = "flex";
+      confirmBtn.style.display = "flex";
+    };
+
+    cancelBtn.onclick = (e) => {
+      e.stopPropagation();
+      document.querySelector("#delete_chat-cancel").click();
+      resetButtons();
+    };
+
+    confirmBtn.onclick = (e) => {
+      e.stopPropagation();
+      document.querySelector("#delete_chat-confirm").click();
+      resetButtons();
+    };
+
+    function resetButtons() {
+      trashBtn.style.display = "inline";
+      cancelBtn.style.display = "none";
+      confirmBtn.style.display = "none";
+    }
+
+    container.append(trashBtn, cancelBtn, confirmBtn);
+    label.appendChild(container);
+    label.classList.add("has-delete");
+  });
+}
+
+new MutationObserver(() => addMiniDeletes()).observe(
+  document.querySelector("#past-chats"),
+  {childList: true, subtree: true}
+);
+addMiniDeletes();
--- a/modules/chat.py
+++ b/modules/chat.py
@ -223,7 +223,10 @@ def generate_chat_prompt(user_input, state, **kwargs):
                for attachment in metadata[user_key]["attachments"]:
                    filename = attachment.get("name", "file")
                    content = attachment.get("content", "")
-                    attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    if attachment.get("type") == "text/html" and attachment.get("url"):
+                        attachments_text += f"\nName: {filename}\nURL: {attachment['url']}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    else:
+                        attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"

                if attachments_text:
                    enhanced_user_msg = f"{user_msg}\n\nATTACHMENTS:\n{attachments_text}"
@ -250,7 +253,10 @@ def generate_chat_prompt(user_input, state, **kwargs):
                for attachment in metadata[user_key]["attachments"]:
                    filename = attachment.get("name", "file")
                    content = attachment.get("content", "")
-                    attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    if attachment.get("type") == "text/html" and attachment.get("url"):
+                        attachments_text += f"\nName: {filename}\nURL: {attachment['url']}\nContents:\n\n=====\n{content}\n=====\n\n"
+                    else:
+                        attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"

                if attachments_text:
                    user_input = f"{user_input}\n\nATTACHMENTS:\n{attachments_text}"
@ -500,6 +506,9 @@ def add_message_attachment(history, row_idx, file_path, is_user=True):
            # Process PDF file
            content = extract_pdf_text(path)
            file_type = "application/pdf"
+        elif file_extension == '.docx':
+            content = extract_docx_text(path)
+            file_type = "application/docx"
        else:
            # Default handling for text files
            with open(path, 'r', encoding='utf-8') as f:
@ -538,6 +547,53 @@ def extract_pdf_text(pdf_path):
        return f"[Error extracting PDF text: {str(e)}]"


+def extract_docx_text(docx_path):
+    """
+    Extract text from a .docx file, including headers,
+    body (paragraphs and tables), and footers.
+    """
+    try:
+        import docx
+
+        doc = docx.Document(docx_path)
+        parts = []
+
+        # 1) Extract non-empty header paragraphs from each section
+        for section in doc.sections:
+            for para in section.header.paragraphs:
+                text = para.text.strip()
+                if text:
+                    parts.append(text)
+
+        # 2) Extract body blocks (paragraphs and tables) in document order
+        parent_elm = doc.element.body
+        for child in parent_elm.iterchildren():
+            if isinstance(child, docx.oxml.text.paragraph.CT_P):
+                para = docx.text.paragraph.Paragraph(child, doc)
+                text = para.text.strip()
+                if text:
+                    parts.append(text)
+
+            elif isinstance(child, docx.oxml.table.CT_Tbl):
+                table = docx.table.Table(child, doc)
+                for row in table.rows:
+                    cells = [cell.text.strip() for cell in row.cells]
+                    parts.append("\t".join(cells))
+
+        # 3) Extract non-empty footer paragraphs from each section
+        for section in doc.sections:
+            for para in section.footer.paragraphs:
+                text = para.text.strip()
+                if text:
+                    parts.append(text)
+
+        return "\n".join(parts)
+
+    except Exception as e:
+        logger.error(f"Error extracting text from DOCX: {e}")
+        return f"[Error extracting DOCX text: {str(e)}]"
+
+
 def generate_search_query(user_message, state):
    """Generate a search query from user message using the LLM"""
    # Augment the user message with search instruction
@ -554,7 +610,12 @@ def generate_search_query(user_message, state):

    query = ""
    for reply in generate_reply(formatted_prompt, search_state, stopping_strings=[], is_chat=True):
-        query = reply.strip()
+        query = reply
+
+    # Strip and remove surrounding quotes if present
+    query = query.strip()
+    if len(query) >= 2 and query.startswith('"') and query.endswith('"'):
+        query = query[1:-1]

    return query

@ -660,7 +721,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess

    # Add timestamp for assistant's response at the start of generation
    row_idx = len(output['internal']) - 1
-    update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp())
+    update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp(), model_name=shared.model_name)

    # Generate
    reply = None
@ -699,7 +760,18 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
        if is_stream:
            yield output

-    output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
+    if _continue:
+        # Reprocess the entire internal text for extensions (like translation)
+        full_internal = output['internal'][-1][1]
+        if state['mode'] in ['chat', 'chat-instruct']:
+            full_visible = re.sub("(<USER>|<user>|{{user}})", state['name1'], full_internal)
+        else:
+            full_visible = full_internal
+
+        full_visible = html.escape(full_visible)
+        output['visible'][-1][1] = apply_extensions('output', full_visible, state, is_chat=True)
+    else:
+        output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)

    # Final sync for version metadata (in case streaming was disabled)
    if regenerate:
@ -775,7 +847,9 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
    last_save_time = time.monotonic()
    save_interval = 8
    for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)):
-        yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history
+        yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], last_message_only=(i > 0)), history
+        if i == 0:
+            time.sleep(0.125)  # We need this to make sure the first update goes through

        current_time = time.monotonic()
        # Save on first iteration or if save_interval seconds have passed
@ -1163,6 +1237,43 @@ def load_character(character, name1, name2):
    return name1, name2, picture, greeting, context


+def restore_character_for_ui(state):
+    """Reset character fields to the currently loaded character's saved values"""
+    if state['character_menu'] and state['character_menu'] != 'None':
+        try:
+            name1, name2, picture, greeting, context = load_character(state['character_menu'], state['name1'], state['name2'])
+
+            state['name2'] = name2
+            state['greeting'] = greeting
+            state['context'] = context
+            state['character_picture'] = picture  # This triggers cache update via generate_pfp_cache
+
+            return state, name2, context, greeting, picture
+
+        except Exception as e:
+            logger.error(f"Failed to reset character '{state['character_menu']}': {e}")
+            return clear_character_for_ui(state)
+    else:
+        return clear_character_for_ui(state)
+
+
+def clear_character_for_ui(state):
+    """Clear all character fields and picture cache"""
+    state['name2'] = shared.settings['name2']
+    state['context'] = shared.settings['context']
+    state['greeting'] = shared.settings['greeting']
+    state['character_picture'] = None
+
+    # Clear the cache files
+    cache_folder = Path(shared.args.disk_cache_dir)
+    for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
+        cache_path = Path(f'{cache_folder}/{cache_file}')
+        if cache_path.exists():
+            cache_path.unlink()
+
+    return state, state['name2'], state['context'], state['greeting'], None
+
+
 def load_instruction_template(template):
    if template == 'None':
        return ''
@ -1453,7 +1564,10 @@ def handle_start_new_chat_click(state):


 def handle_delete_chat_confirm_click(state):
-    index = str(find_all_histories(state).index(state['unique_id']))
+    filtered_histories = find_all_histories_with_first_prompts(state)
+    filtered_ids = [h[1] for h in filtered_histories]
+    index = str(filtered_ids.index(state['unique_id']))
+
    delete_history(state['unique_id'], state['character_menu'], state['mode'])
    history, unique_id = load_history_after_deletion(state, index)
    html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
@ -1466,7 +1580,6 @@ def handle_delete_chat_confirm_click(state):
        unique_id,
        gr.update(visible=False),
        gr.update(visible=True),
-        gr.update(visible=False)
    ]


@ -1653,6 +1766,25 @@ def handle_character_menu_change(state):
    ]


+def handle_character_picture_change(picture):
+    """Update or clear cache when character picture changes"""
+    cache_folder = Path(shared.args.disk_cache_dir)
+    if not cache_folder.exists():
+        cache_folder.mkdir()
+
+    if picture is not None:
+        # Save to cache
+        picture.save(Path(f'{cache_folder}/pfp_character.png'), format='PNG')
+        thumb = make_thumbnail(picture)
+        thumb.save(Path(f'{cache_folder}/pfp_character_thumb.png'), format='PNG')
+    else:
+        # Remove cache files when picture is cleared
+        for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
+            cache_path = Path(f'{cache_folder}/{cache_file}')
+            if cache_path.exists():
+                cache_path.unlink()
+
+
 def handle_mode_change(state):
    history = load_latest_history(state)
    histories = find_all_histories_with_first_prompts(state)
--- a/modules/github.py
+++ b/modules/github.py
@ -1,38 +0,0 @@
-import subprocess
-from pathlib import Path
-
-new_extensions = set()
-
-
-def clone_or_pull_repository(github_url):
-    global new_extensions
-
-    repository_folder = Path("extensions")
-    repo_name = github_url.rstrip("/").split("/")[-1].split(".")[0]
-
-    # Check if the repository folder exists
-    if not repository_folder.exists():
-        repository_folder.mkdir(parents=True)
-
-    repo_path = repository_folder / repo_name
-
-    # Check if the repository is already cloned
-    if repo_path.exists():
-        yield f"Updating {github_url}..."
-        # Perform a 'git pull' to update the repository
-        try:
-            pull_output = subprocess.check_output(["git", "-C", repo_path, "pull"], stderr=subprocess.STDOUT)
-            yield "Done."
-            return pull_output.decode()
-        except subprocess.CalledProcessError as e:
-            return str(e)
-
-    # Clone the repository
-    try:
-        yield f"Cloning {github_url}..."
-        clone_output = subprocess.check_output(["git", "clone", github_url, repo_path], stderr=subprocess.STDOUT)
-        new_extensions.add(repo_name)
-        yield f"The extension `{repo_name}` has been downloaded.\n\nPlease close the web UI completely and launch it again to be able to load it."
-        return clone_output.decode()
-    except subprocess.CalledProcessError as e:
-        return str(e)
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@ -39,15 +39,16 @@ def minify_css(css: str) -> str:
    return css


-with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r', encoding='utf-8') as f:
    readable_css = f.read()
-with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r', encoding='utf-8') as f:
    instruct_css = f.read()

 # Custom chat styles
 chat_styles = {}
 for k in get_available_chat_styles():
-    chat_styles[k] = open(Path(f'css/chat_style-{k}.css'), 'r').read()
+    with open(Path(f'css/chat_style-{k}.css'), 'r', encoding='utf-8') as f:
+        chat_styles[k] = f.read()

 # Handle styles that derive from other styles
 for k in chat_styles:
@ -350,12 +351,14 @@ remove_button = f'<button class="footer-button footer-remove-button" title="Remo
 info_button = f'<button class="footer-button footer-info-button" title="message">{info_svg}</button>'


-def format_message_timestamp(history, role, index):
+def format_message_timestamp(history, role, index, tooltip_include_timestamp=True):
    """Get a formatted timestamp HTML span for a message if available"""
    key = f"{role}_{index}"
    if 'metadata' in history and key in history['metadata'] and history['metadata'][key].get('timestamp'):
        timestamp = history['metadata'][key]['timestamp']
-        return f"<span class='timestamp'>{timestamp}</span>"
+        tooltip_text = get_message_tooltip(history, role, index, include_timestamp=tooltip_include_timestamp)
+        title_attr = f' title="{html.escape(tooltip_text)}"' if tooltip_text else ''
+        return f"<span class='timestamp'{title_attr}>{timestamp}</span>"

    return ""

@ -388,6 +391,23 @@ def format_message_attachments(history, role, index):
    return ""


+def get_message_tooltip(history, role, index, include_timestamp=True):
+    """Get tooltip text combining timestamp and model name for a message"""
+    key = f"{role}_{index}"
+    if 'metadata' not in history or key not in history['metadata']:
+        return ""
+
+    meta = history['metadata'][key]
+    tooltip_parts = []
+
+    if include_timestamp and meta.get('timestamp'):
+        tooltip_parts.append(meta['timestamp'])
+    if meta.get('model_name'):
+        tooltip_parts.append(f"Model: {meta['model_name']}")
+
+    return " | ".join(tooltip_parts)
+
+
 def get_version_navigation_html(history, i, role):
    """Generate simple navigation arrows for message versions"""
    key = f"{role}_{i}"
@ -443,179 +463,193 @@ def actions_html(history, i, role, info_message=""):
            f'{version_nav_html}')


-def generate_instruct_html(history):
-    output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
+def generate_instruct_html(history, last_message_only=False):
+    if not last_message_only:
+        output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
+    else:
+        output = ""

-    for i in range(len(history['visible'])):
-        row_visible = history['visible'][i]
-        row_internal = history['internal'][i]
-        converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+    def create_message(role, content, raw_content):
+        """Inner function that captures variables from outer scope."""
+        class_name = "user-message" if role == "user" else "assistant-message"

-        # Get timestamps
-        user_timestamp = format_message_timestamp(history, "user", i)
-        assistant_timestamp = format_message_timestamp(history, "assistant", i)
+        # Get role-specific data
+        timestamp = format_message_timestamp(history, role, i)
+        attachments = format_message_attachments(history, role, i)

-        # Get attachments
-        user_attachments = format_message_attachments(history, "user", i)
-        assistant_attachments = format_message_attachments(history, "assistant", i)
+        # Create info button if timestamp exists
+        info_message = ""
+        if timestamp:
+            tooltip_text = get_message_tooltip(history, role, i)
+            info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')

-        # Create info buttons for timestamps if they exist
-        info_message_user = ""
-        if user_timestamp != "":
-            # Extract the timestamp value from the span
-            user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_user = info_button.replace("message", user_timestamp_value)
-
-        info_message_assistant = ""
-        if assistant_timestamp != "":
-            # Extract the timestamp value from the span
-            assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_assistant = info_button.replace("message", assistant_timestamp_value)
-
-        if converted_visible[0]:  # Don't display empty user messages
-            output += (
-                f'<div class="user-message" '
-                f'data-raw="{html.escape(row_internal[0], quote=True)}"'
-                f'data-index={i}>'
-                f'<div class="text">'
-                f'<div class="message-body">{converted_visible[0]}</div>'
-                f'{user_attachments}'
-                f'{actions_html(history, i, "user", info_message_user)}'
-                f'</div>'
-                f'</div>'
-            )
-
-        output += (
-            f'<div class="assistant-message" '
-            f'data-raw="{html.escape(row_internal[1], quote=True)}"'
+        return (
+            f'<div class="{class_name}" '
+            f'data-raw="{html.escape(raw_content, quote=True)}"'
            f'data-index={i}>'
            f'<div class="text">'
-            f'<div class="message-body">{converted_visible[1]}</div>'
-            f'{assistant_attachments}'
-            f'{actions_html(history, i, "assistant", info_message_assistant)}'
+            f'<div class="message-body">{content}</div>'
+            f'{attachments}'
+            f'{actions_html(history, i, role, info_message)}'
            f'</div>'
            f'</div>'
        )

-    output += "</div></div>"
+    # Determine range
+    start_idx = len(history['visible']) - 1 if last_message_only else 0
+    end_idx = len(history['visible'])
+
+    for i in range(start_idx, end_idx):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+
+        # Convert content
+        if last_message_only:
+            converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
+        else:
+            converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+        # Generate messages
+        if not last_message_only and converted_visible[0]:
+            output += create_message("user", converted_visible[0], row_internal[0])
+
+        output += create_message("assistant", converted_visible[1], row_internal[1])
+
+    if not last_message_only:
+        output += "</div></div>"
+
    return output


-def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False):
-    output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
+def get_character_image_with_cache_buster():
+    """Get character image URL with cache busting based on file modification time"""
+    cache_path = Path("user_data/cache/pfp_character_thumb.png")
+    if cache_path.exists():
+        mtime = int(cache_path.stat().st_mtime)
+        return f'<img src="file/user_data/cache/pfp_character_thumb.png?{mtime}" class="pfp_character">'

-    # We use ?character and ?time.time() to force the browser to reset caches
-    img_bot = (
-        f'<img src="file/user_data/cache/pfp_character_thumb.png?{character}" class="pfp_character">'
-        if Path("user_data/cache/pfp_character_thumb.png").exists() else ''
-    )
+    return ''

-    img_me = (
-        f'<img src="file/user_data/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
-        if Path("user_data/cache/pfp_me.png").exists() else ''
-    )

-    for i in range(len(history['visible'])):
-        row_visible = history['visible'][i]
-        row_internal = history['internal'][i]
-        converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False, last_message_only=False):
+    if not last_message_only:
+        output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
+    else:
+        output = ""

-        # Get timestamps
-        user_timestamp = format_message_timestamp(history, "user", i)
-        assistant_timestamp = format_message_timestamp(history, "assistant", i)
+    img_bot = get_character_image_with_cache_buster()

-        # Get attachments
-        user_attachments = format_message_attachments(history, "user", i)
-        assistant_attachments = format_message_attachments(history, "assistant", i)
+    def create_message(role, content, raw_content):
+        """Inner function for CAI-style messages."""
+        circle_class = "circle-you" if role == "user" else "circle-bot"
+        name = name1 if role == "user" else name2

-        if converted_visible[0]:  # Don't display empty user messages
-            output += (
-                f'<div class="message" '
-                f'data-raw="{html.escape(row_internal[0], quote=True)}"'
-                f'data-index={i}>'
-                f'<div class="circle-you">{img_me}</div>'
-                f'<div class="text">'
-                f'<div class="username">{name1}{user_timestamp}</div>'
-                f'<div class="message-body">{converted_visible[0]}</div>'
-                f'{user_attachments}'
-                f'{actions_html(history, i, "user")}'
-                f'</div>'
-                f'</div>'
-            )
+        # Get role-specific data
+        timestamp = format_message_timestamp(history, role, i, tooltip_include_timestamp=False)
+        attachments = format_message_attachments(history, role, i)

-        output += (
+        # Get appropriate image
+        if role == "user":
+            img = (f'<img src="file/user_data/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
+                   if Path("user_data/cache/pfp_me.png").exists() else '')
+        else:
+            img = img_bot
+
+        return (
            f'<div class="message" '
-            f'data-raw="{html.escape(row_internal[1], quote=True)}"'
+            f'data-raw="{html.escape(raw_content, quote=True)}"'
            f'data-index={i}>'
-            f'<div class="circle-bot">{img_bot}</div>'
+            f'<div class="{circle_class}">{img}</div>'
            f'<div class="text">'
-            f'<div class="username">{name2}{assistant_timestamp}</div>'
-            f'<div class="message-body">{converted_visible[1]}</div>'
-            f'{assistant_attachments}'
-            f'{actions_html(history, i, "assistant")}'
+            f'<div class="username">{name}{timestamp}</div>'
+            f'<div class="message-body">{content}</div>'
+            f'{attachments}'
+            f'{actions_html(history, i, role)}'
            f'</div>'
            f'</div>'
        )

-    output += "</div></div>"
+    # Determine range
+    start_idx = len(history['visible']) - 1 if last_message_only else 0
+    end_idx = len(history['visible'])
+
+    for i in range(start_idx, end_idx):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+
+        # Convert content
+        if last_message_only:
+            converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
+        else:
+            converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+        # Generate messages
+        if not last_message_only and converted_visible[0]:
+            output += create_message("user", converted_visible[0], row_internal[0])
+
+        output += create_message("assistant", converted_visible[1], row_internal[1])
+
+    if not last_message_only:
+        output += "</div></div>"
+
    return output


-def generate_chat_html(history, name1, name2, reset_cache=False):
-    output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
+def generate_chat_html(history, name1, name2, reset_cache=False, last_message_only=False):
+    if not last_message_only:
+        output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
+    else:
+        output = ""

-    for i in range(len(history['visible'])):
-        row_visible = history['visible'][i]
-        row_internal = history['internal'][i]
-        converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+    def create_message(role, content, raw_content):
+        """Inner function for WPP-style messages."""
+        text_class = "text-you" if role == "user" else "text-bot"

-        # Get timestamps
-        user_timestamp = format_message_timestamp(history, "user", i)
-        assistant_timestamp = format_message_timestamp(history, "assistant", i)
+        # Get role-specific data
+        timestamp = format_message_timestamp(history, role, i)
+        attachments = format_message_attachments(history, role, i)

-        # Get attachments
-        user_attachments = format_message_attachments(history, "user", i)
-        assistant_attachments = format_message_attachments(history, "assistant", i)
+        # Create info button if timestamp exists
+        info_message = ""
+        if timestamp:
+            tooltip_text = get_message_tooltip(history, role, i)
+            info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')

-        # Create info buttons for timestamps if they exist
-        info_message_user = ""
-        if user_timestamp != "":
-            # Extract the timestamp value from the span
-            user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_user = info_button.replace("message", user_timestamp_value)
-
-        info_message_assistant = ""
-        if assistant_timestamp != "":
-            # Extract the timestamp value from the span
-            assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0]
-            info_message_assistant = info_button.replace("message", assistant_timestamp_value)
-
-        if converted_visible[0]:  # Don't display empty user messages
-            output += (
-                f'<div class="message" '
-                f'data-raw="{html.escape(row_internal[0], quote=True)}"'
-                f'data-index={i}>'
-                f'<div class="text-you">'
-                f'<div class="message-body">{converted_visible[0]}</div>'
-                f'{user_attachments}'
-                f'{actions_html(history, i, "user", info_message_user)}'
-                f'</div>'
-                f'</div>'
-            )
-
-        output += (
+        return (
            f'<div class="message" '
-            f'data-raw="{html.escape(row_internal[1], quote=True)}"'
+            f'data-raw="{html.escape(raw_content, quote=True)}"'
            f'data-index={i}>'
-            f'<div class="text-bot">'
-            f'<div class="message-body">{converted_visible[1]}</div>'
-            f'{assistant_attachments}'
-            f'{actions_html(history, i, "assistant", info_message_assistant)}'
+            f'<div class="{text_class}">'
+            f'<div class="message-body">{content}</div>'
+            f'{attachments}'
+            f'{actions_html(history, i, role, info_message)}'
            f'</div>'
            f'</div>'
        )

-    output += "</div></div>"
+    # Determine range
+    start_idx = len(history['visible']) - 1 if last_message_only else 0
+    end_idx = len(history['visible'])
+
+    for i in range(start_idx, end_idx):
+        row_visible = history['visible'][i]
+        row_internal = history['internal'][i]
+
+        # Convert content
+        if last_message_only:
+            converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
+        else:
+            converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+        # Generate messages
+        if not last_message_only and converted_visible[0]:
+            output += create_message("user", converted_visible[0], row_internal[0])
+
+        output += create_message("assistant", converted_visible[1], row_internal[1])
+
+    if not last_message_only:
+        output += "</div></div>"
+
    return output


@ -629,15 +663,15 @@ def time_greeting():
        return "Good evening!"


-def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
+def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False, last_message_only=False):
    if len(history['visible']) == 0:
        greeting = f"<div class=\"welcome-greeting\">{time_greeting()} How can I help you today?</div>"
        result = f'<div class="chat" id="chat">{greeting}</div>'
    elif mode == 'instruct':
-        result = generate_instruct_html(history)
+        result = generate_instruct_html(history, last_message_only=last_message_only)
    elif style == 'wpp':
-        result = generate_chat_html(history, name1, name2)
+        result = generate_chat_html(history, name1, name2, last_message_only=last_message_only)
    else:
-        result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache)
+        result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache=reset_cache, last_message_only=last_message_only)

-    return {'html': result}
+    return {'html': result, 'last_message_only': last_message_only}
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@ -408,15 +408,42 @@ class LlamaServer:


 def filter_stderr_with_progress(process_stderr):
-    progress_pattern = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
+    """
+    Reads stderr lines from a process, filters out noise, and displays progress updates
+    inline (overwriting the same line) until completion.
+    """
+    progress_re = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
+    last_was_progress = False
+
    try:
-        for line in iter(process_stderr.readline, ''):
-            progress_match = progress_pattern.search(line)
-            if progress_match:
-                sys.stderr.write(line)
-                sys.stderr.flush()
-            elif not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
-                sys.stderr.write(line)
-                sys.stderr.flush()
+        for raw in iter(process_stderr.readline, ''):
+            line = raw.rstrip('\r\n')
+            match = progress_re.search(line)
+
+            if match:
+                progress = float(match.group(1))
+
+                # Extract just the part from "prompt processing" onwards
+                prompt_processing_idx = line.find('prompt processing')
+                if prompt_processing_idx != -1:
+                    display_line = line[prompt_processing_idx:]
+                else:
+                    display_line = line  # fallback to full line
+
+                # choose carriage return for in-progress or newline at completion
+                end_char = '\r' if progress < 1.0 else '\n'
+                print(display_line, end=end_char, file=sys.stderr, flush=True)
+                last_was_progress = (progress < 1.0)
+
+            # skip noise lines
+            elif not (line.startswith(('srv ', 'slot ')) or 'log_server_r: request: GET /health' in line):
+                # if we were in progress, finish that line first
+                if last_was_progress:
+                    print(file=sys.stderr)
+
+                print(line, file=sys.stderr, flush=True)
+                last_was_progress = False
+
    except (ValueError, IOError):
+        # silently ignore broken output or IO errors
        pass
--- a/modules/models.py
+++ b/modules/models.py
@ -116,7 +116,7 @@ def unload_model(keep_model_name=False):
        return

    is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer')
-    if shared.args.loader == 'ExLlamav3_HF':
+    if shared.model.__class__.__name__ == 'Exllamav3HF':
        shared.model.unload()

    shared.model = shared.tokenizer = None
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@ -329,6 +329,7 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
    # Extract values from metadata
    n_layers = None
    n_kv_heads = None
+    n_attention_heads = None  # Fallback for models without separate KV heads
    embedding_dim = None

    for key, value in metadata.items():
@ -336,9 +337,14 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
            n_layers = value
        elif key.endswith('.attention.head_count_kv'):
            n_kv_heads = max(value) if isinstance(value, list) else value
+        elif key.endswith('.attention.head_count'):
+            n_attention_heads = max(value) if isinstance(value, list) else value
        elif key.endswith('.embedding_length'):
            embedding_dim = value

+    if n_kv_heads is None:
+        n_kv_heads = n_attention_heads
+
    if gpu_layers > n_layers:
        gpu_layers = n_layers

--- a/modules/presets.py
+++ b/modules/presets.py
@ -1,6 +1,5 @@
 import functools
 import pprint
-import random
 from pathlib import Path

 import yaml
@ -93,68 +92,17 @@ def load_preset_for_ui(name, state):
    return state, *[generate_params[k] for k in presets_params()]


-def random_preset(state):
-    params_and_values = {
-        'remove_tail_tokens': {
-            'top_p': [0.5, 0.8, 0.9, 0.95, 0.99],
-            'min_p': [0.5, 0.2, 0.1, 0.05, 0.01],
-            'top_k': [3, 5, 10, 20, 30, 40],
-            'typical_p': [0.2, 0.575, 0.95],
-            'tfs': [0.5, 0.8, 0.9, 0.95, 0.99],
-            'top_a': [0.5, 0.2, 0.1, 0.05, 0.01],
-            'epsilon_cutoff': [1, 3, 5, 7, 9],
-            'eta_cutoff': [3, 6, 9, 12, 15, 18],
-        },
-        'flatten_distribution': {
-            'temperature': [0.1, 0.5, 0.7, 0.8, 1, 1.2, 1.5, 2.0, 5.0],
-            'dynamic_temperature': [
-                [0.1, 1],
-                [0.1, 1.5],
-                [0.1, 2],
-                [0.1, 5],
-                [0.5, 1],
-                [0.5, 1.5],
-                [0.5, 2],
-                [0.5, 5],
-                [0.8, 1],
-                [0.8, 1.5],
-                [0.8, 2],
-                [0.8, 5],
-                [1, 1.5],
-                [1, 2],
-                [1, 5]
-            ],
-            'smoothing_factor': [0.2, 0.3, 0.6, 1.2],
-        },
-        'repetition': {
-            'repetition_penalty': [1, 1.05, 1.1, 1.15, 1.20, 1.25],
-            'presence_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
-            'frequency_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
-        },
-        'other': {
-            'temperature_last': [True, False],
-        }
-    }
-
-    generate_params = default_preset()
-    for cat in params_and_values:
-        choices = list(params_and_values[cat].keys())
-        if shared.args.loader is not None:
-            choices = [x for x in choices if loader_contains(x)]
-
-        if len(choices) > 0:
-            choice = random.choice(choices)
-            value = random.choice(params_and_values[cat][choice])
-            if choice == 'dynamic_temperature':
-                generate_params['dynamic_temperature'] = True
-                generate_params['dynatemp_low'] = value[0]
-                generate_params['dynatemp_high'] = value[1]
-            else:
-                generate_params[choice] = value
-
+def reset_preset_for_ui(name, state):
+    """Reset current preset to its saved values from file"""
+    generate_params = load_preset(name, verbose=True)
+    state.update(generate_params)
+    return state, *[generate_params[k] for k in presets_params()]
+
+
+def neutralize_samplers_for_ui(state):
+    """Set all samplers to their default/neutral values"""
+    generate_params = default_preset()
    state.update(generate_params)
-    logger.info("GENERATED_PRESET=")
-    pprint.PrettyPrinter(indent=4, width=1, sort_dicts=False).pprint(remove_defaults(state))
    return state, *[generate_params[k] for k in presets_params()]


--- a/modules/shared.py
+++ b/modules/shared.py
@ -9,6 +9,7 @@ from pathlib import Path
 import yaml

 from modules.logging_colors import logger
+from modules.presets import default_preset

 # Model variables
 model = None
@ -21,60 +22,19 @@ lora_names = []
 # Generation variables
 stop_everything = False
 generation_lock = None
-processing_message = '*Is typing...*'
+processing_message = ''

 # UI variables
 gradio = {}
 persistent_interface_state = {}
 need_restart = False

-# UI defaults
-settings = {
-    'show_controls': True,
-    'start_with': '',
-    'mode': 'instruct',
-    'chat_style': 'cai-chat',
-    'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
-    'prompt-default': 'QA',
-    'prompt-notebook': 'QA',
-    'character': 'Assistant',
-    'name1': 'You',
-    'user_bio': '',
-    'custom_system_message': '',
-    'preset': 'min_p',
-    'max_new_tokens': 512,
-    'max_new_tokens_min': 1,
-    'max_new_tokens_max': 4096,
-    'prompt_lookup_num_tokens': 0,
-    'max_tokens_second': 0,
-    'max_updates_second': 12,
-    'auto_max_new_tokens': True,
-    'ban_eos_token': False,
-    'add_bos_token': True,
-    'enable_thinking': True,
-    'skip_special_tokens': True,
-    'stream': True,
-    'static_cache': False,
-    'truncation_length': 8192,
-    'seed': -1,
-    'custom_stopping_strings': '',
-    'custom_token_bans': '',
-    'negative_prompt': '',
-    'dark_theme': True,
-    'default_extensions': [],
-    'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '' + message['content'] + '\\n\\n' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n        {%- else -%}\n            {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'### Response:\\n'-}}\n{%- endif -%}",
-    'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
-}
-
-default_settings = copy.deepcopy(settings)
-
 # Parser copied from https://github.com/vladmandic/automatic
 parser = argparse.ArgumentParser(description="Text generation web UI", conflict_handler='resolve', add_help=True, formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=55, indent_increment=2, width=200))

 # Basic settings
 group = parser.add_argument_group('Basic settings')
 group.add_argument('--multi-user', action='store_true', help='Multi-user mode. Chat histories are not saved or automatically loaded. Warning: this is likely not safe for sharing publicly.')
-group.add_argument('--character', type=str, help='The name of the character to load in chat mode by default.')
 group.add_argument('--model', type=str, help='Name of the model to load by default.')
 group.add_argument('--lora', type=str, nargs='+', help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.')
 group.add_argument('--model-dir', type=str, default='user_data/models', help='Path to directory with all the models.')
@ -230,6 +190,102 @@ for arg in sys.argv[1:]:
    elif hasattr(args, arg):
        provided_arguments.append(arg)

+# Default generation parameters
+neutral_samplers = default_preset()
+
+# UI defaults
+settings = {
+    'show_controls': True,
+    'start_with': '',
+    'mode': 'instruct',
+    'chat_style': 'cai-chat',
+    'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+    'enable_web_search': False,
+    'web_search_pages': 3,
+    'prompt-default': 'QA',
+    'prompt-notebook': 'QA',
+    'preset': 'Qwen3 - Thinking' if Path('user_data/presets/Qwen3 - Thinking.yaml').exists() else None,
+    'max_new_tokens': 512,
+    'max_new_tokens_min': 1,
+    'max_new_tokens_max': 4096,
+    'prompt_lookup_num_tokens': 0,
+    'max_tokens_second': 0,
+    'auto_max_new_tokens': True,
+    'ban_eos_token': False,
+    'add_bos_token': True,
+    'enable_thinking': True,
+    'skip_special_tokens': True,
+    'stream': True,
+    'static_cache': False,
+    'truncation_length': 8192,
+    'seed': -1,
+    'custom_stopping_strings': '',
+    'custom_token_bans': '',
+    'negative_prompt': '',
+    'dark_theme': True,
+    'paste_to_attachment': False,
+
+    # Character settings
+    'character': 'Assistant',
+    'name1': 'You',
+    'name2': 'AI',
+    'user_bio': '',
+    'context': 'The following is a conversation with an AI Large Language Model. The AI has been trained to answer questions, provide recommendations, and help with decision making. The AI follows user requests. The AI thinks outside the box.',
+    'greeting': 'How can I help you today?',
+    'custom_system_message': '',
+    'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n    {%- if message['role'] == 'system' -%}\n        {%- set ns.found = true -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n    {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {{- '' + message['content'] + '\\n\\n' -}}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n        {%- else -%}\n            {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n    {{-'### Response:\\n'-}}\n{%- endif -%}",
+    'chat_template_str': "{%- for message in messages %}\n    {%- if message['role'] == 'system' -%}\n        {%- if message['content'] -%}\n            {{- message['content'] + '\\n\\n' -}}\n        {%- endif -%}\n        {%- if user_bio -%}\n            {{- user_bio + '\\n\\n' -}}\n        {%- endif -%}\n    {%- else -%}\n        {%- if message['role'] == 'user' -%}\n            {{- name1 + ': ' + message['content'] + '\\n'-}}\n        {%- else -%}\n            {{- name2 + ': ' + message['content'] + '\\n' -}}\n        {%- endif -%}\n    {%- endif -%}\n{%- endfor -%}",
+
+    # Generation parameters - Curve shape
+    'temperature': 0.6,
+    'dynatemp_low': neutral_samplers['dynatemp_low'],
+    'dynatemp_high': neutral_samplers['dynatemp_high'],
+    'dynatemp_exponent': neutral_samplers['dynatemp_exponent'],
+    'smoothing_factor': neutral_samplers['smoothing_factor'],
+    'smoothing_curve': neutral_samplers['smoothing_curve'],
+
+    # Generation parameters - Curve cutoff
+    'min_p': neutral_samplers['min_p'],
+    'top_p': 0.95,
+    'top_k': 20,
+    'typical_p': neutral_samplers['typical_p'],
+    'xtc_threshold': neutral_samplers['xtc_threshold'],
+    'xtc_probability': neutral_samplers['xtc_probability'],
+    'epsilon_cutoff': neutral_samplers['epsilon_cutoff'],
+    'eta_cutoff': neutral_samplers['eta_cutoff'],
+    'tfs': neutral_samplers['tfs'],
+    'top_a': neutral_samplers['top_a'],
+    'top_n_sigma': neutral_samplers['top_n_sigma'],
+
+    # Generation parameters - Repetition suppression
+    'dry_multiplier': neutral_samplers['dry_multiplier'],
+    'dry_allowed_length': neutral_samplers['dry_allowed_length'],
+    'dry_base': neutral_samplers['dry_base'],
+    'repetition_penalty': neutral_samplers['repetition_penalty'],
+    'frequency_penalty': neutral_samplers['frequency_penalty'],
+    'presence_penalty': neutral_samplers['presence_penalty'],
+    'encoder_repetition_penalty': neutral_samplers['encoder_repetition_penalty'],
+    'no_repeat_ngram_size': neutral_samplers['no_repeat_ngram_size'],
+    'repetition_penalty_range': neutral_samplers['repetition_penalty_range'],
+
+    # Generation parameters - Alternative sampling methods
+    'penalty_alpha': neutral_samplers['penalty_alpha'],
+    'guidance_scale': neutral_samplers['guidance_scale'],
+    'mirostat_mode': neutral_samplers['mirostat_mode'],
+    'mirostat_tau': neutral_samplers['mirostat_tau'],
+    'mirostat_eta': neutral_samplers['mirostat_eta'],
+
+    # Generation parameters - Other options
+    'do_sample': neutral_samplers['do_sample'],
+    'dynamic_temperature': neutral_samplers['dynamic_temperature'],
+    'temperature_last': neutral_samplers['temperature_last'],
+    'sampler_priority': neutral_samplers['sampler_priority'],
+    'dry_sequence_breakers': neutral_samplers['dry_sequence_breakers'],
+    'grammar_string': '',
+}
+
+default_settings = copy.deepcopy(settings)
+

 def do_cmd_flags_warnings():
    # Security warnings
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@ -65,41 +65,39 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
            all_stop_strings += st

    shared.stop_everything = False
-    last_update = -1
    reply = ''
    is_stream = state['stream']
    if len(all_stop_strings) > 0 and not state['stream']:
        state = copy.deepcopy(state)
        state['stream'] = True

-    min_update_interval = 0
-    if state.get('max_updates_second', 0) > 0:
-        min_update_interval = 1 / state['max_updates_second']
-
    # Generate
+    last_update = -1
+    latency_threshold = 1 / 1000
    for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat):
+        cur_time = time.monotonic()
        reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
        if escape_html:
            reply = html.escape(reply)

        if is_stream:
-            cur_time = time.time()
-
            # Limit number of tokens/second to make text readable in real time
            if state['max_tokens_second'] > 0:
                diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
                if diff > 0:
                    time.sleep(diff)

-                last_update = time.time()
+                last_update = time.monotonic()
                yield reply

            # Limit updates to avoid lag in the Gradio UI
            # API updates are not limited
            else:
-                if cur_time - last_update > min_update_interval:
-                    last_update = cur_time
+                # If 'generate_func' takes less than 0.001 seconds to yield the next token
+                # (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding
+                if (cur_time - last_update) > latency_threshold:
                    yield reply
+                last_update = time.monotonic()

        if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
            break
@ -481,6 +479,7 @@ def generate_reply_custom(question, original_question, state, stopping_strings=N
    For models that do not use the transformers library for sampling
    """

+    state = copy.deepcopy(state)
    state['seed'] = set_manual_seed(state['seed'])
    t0 = time.time()
    reply = ''
--- a/modules/ui.py
+++ b/modules/ui.py
@ -1,4 +1,5 @@
 import copy
+import threading
 from pathlib import Path

 import gradio as gr
@ -6,28 +7,39 @@ import yaml

 import extensions
 from modules import shared
+from modules.chat import load_history
+from modules.utils import gradio

-with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r') as f:
+# Global state for auto-saving UI settings with debouncing
+_auto_save_timer = None
+_auto_save_lock = threading.Lock()
+_last_interface_state = None
+_last_preset = None
+_last_extensions = None
+_last_show_controls = None
+_last_theme_state = None
+
+with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r', encoding='utf-8') as f:
    css = f.read()
-with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/main.css', 'r', encoding='utf-8') as f:
    css += f.read()
-with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r', encoding='utf-8') as f:
    css += f.read()
-with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r', encoding='utf-8') as f:
    css += f.read()
-with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/main.js', 'r', encoding='utf-8') as f:
    js = f.read()
-with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r', encoding='utf-8') as f:
    global_scope_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r', encoding='utf-8') as f:
    save_files_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r', encoding='utf-8') as f:
    switch_tabs_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r', encoding='utf-8') as f:
    show_controls_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r', encoding='utf-8') as f:
    update_big_picture_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r', encoding='utf-8') as f:
    dark_theme_js = f.read()

 refresh_symbol = '🔄'
@ -62,8 +74,10 @@ if not shared.args.old_colors:
        body_background_fill="white",
        block_background_fill="transparent",
        body_text_color='rgb(64, 64, 64)',
-        button_secondary_background_fill="#f4f4f4",
+        button_secondary_background_fill="white",
        button_secondary_border_color="var(--border-color-primary)",
+        input_shadow="none",
+        button_shadow_hover="none",

        # Dark Mode Colors
        input_background_fill_dark='var(--darker-gray)',
@ -95,6 +109,7 @@ if not shared.args.old_colors:
        button_large_radius='0.375rem',
        button_large_padding='6px 12px',
        input_radius='0.375rem',
+        block_radius='0',
    )

 if Path("user_data/notification.mp3").exists():
@ -194,7 +209,6 @@ def list_interface_input_elements():
        'max_new_tokens',
        'prompt_lookup_num_tokens',
        'max_tokens_second',
-        'max_updates_second',
        'do_sample',
        'dynamic_temperature',
        'temperature_last',
@ -257,6 +271,11 @@ def list_interface_input_elements():
    # Model elements
    elements += list_model_elements()

+    # Other elements
+    elements += [
+        'paste_to_attachment'
+    ]
+
    return elements


@ -270,6 +289,13 @@ def gather_interface_values(*args):
    if not shared.args.multi_user:
        shared.persistent_interface_state = output

+        # Remove the chat input, as it gets cleared after this function call
+        shared.persistent_interface_state.pop('textbox')
+
+    # Prevent history loss if backend is restarted but UI is not refreshed
+    if output['history'] is None and output['unique_id'] is not None:
+        output['history'] = load_history(output['unique_id'], output['character_menu'], output['mode'])
+
    return output


@ -292,7 +318,7 @@ def apply_interface_values(state, use_persistent=False):

 def save_settings(state, preset, extensions_list, show_controls, theme_state):
    output = copy.deepcopy(shared.settings)
-    exclude = ['name2', 'greeting', 'context', 'truncation_length', 'instruction_template_str']
+    exclude = []
    for k in state:
        if k in shared.settings and k not in exclude:
            output[k] = state[k]
@ -301,10 +327,11 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
    output['prompt-default'] = state['prompt_menu-default']
    output['prompt-notebook'] = state['prompt_menu-notebook']
    output['character'] = state['character_menu']
-    output['default_extensions'] = extensions_list
    output['seed'] = int(output['seed'])
    output['show_controls'] = show_controls
    output['dark_theme'] = True if theme_state == 'dark' else False
+    output.pop('instruction_template_str')
+    output.pop('truncation_length')

    # Save extension values in the UI
    for extension_name in extensions_list:
@ -327,6 +354,143 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
    return yaml.dump(output, sort_keys=False, width=float("inf"), allow_unicode=True)


+def store_current_state_and_debounce(interface_state, preset, extensions, show_controls, theme_state):
+    """Store current state and trigger debounced save"""
+    global _auto_save_timer, _last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state
+
+    if shared.args.multi_user:
+        return
+
+    # Store the current state in global variables
+    _last_interface_state = interface_state
+    _last_preset = preset
+    _last_extensions = extensions
+    _last_show_controls = show_controls
+    _last_theme_state = theme_state
+
+    # Reset the debounce timer
+    with _auto_save_lock:
+        if _auto_save_timer is not None:
+            _auto_save_timer.cancel()
+
+        _auto_save_timer = threading.Timer(1.0, _perform_debounced_save)
+        _auto_save_timer.start()
+
+
+def _perform_debounced_save():
+    """Actually perform the save using the stored state"""
+    global _auto_save_timer
+
+    try:
+        if _last_interface_state is not None:
+            contents = save_settings(_last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state)
+            settings_path = Path('user_data') / 'settings.yaml'
+            settings_path.parent.mkdir(exist_ok=True)
+            with open(settings_path, 'w', encoding='utf-8') as f:
+                f.write(contents)
+    except Exception as e:
+        print(f"Auto-save failed: {e}")
+    finally:
+        with _auto_save_lock:
+            _auto_save_timer = None
+
+
+def setup_auto_save():
+    """Attach auto-save to key UI elements"""
+    if shared.args.multi_user:
+        return
+
+    change_elements = [
+        # Chat tab (ui_chat.py)
+        'start_with',
+        'enable_web_search',
+        'web_search_pages',
+        'mode',
+        'chat_style',
+        'chat-instruct_command',
+        'character_menu',
+        'name1',
+        'name2',
+        'context',
+        'greeting',
+        'user_bio',
+        'custom_system_message',
+        'chat_template_str',
+
+        # Parameters tab (ui_parameters.py) - Generation parameters
+        'preset_menu',
+        'temperature',
+        'dynatemp_low',
+        'dynatemp_high',
+        'dynatemp_exponent',
+        'smoothing_factor',
+        'smoothing_curve',
+        'min_p',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
+        'epsilon_cutoff',
+        'eta_cutoff',
+        'tfs',
+        'top_a',
+        'top_n_sigma',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
+        'repetition_penalty',
+        'frequency_penalty',
+        'presence_penalty',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
+        'repetition_penalty_range',
+        'penalty_alpha',
+        'guidance_scale',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'max_new_tokens',
+        'prompt_lookup_num_tokens',
+        'max_tokens_second',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
+        'ban_eos_token',
+        'add_bos_token',
+        'enable_thinking',
+        'skip_special_tokens',
+        'stream',
+        'static_cache',
+        'truncation_length',
+        'seed',
+        'sampler_priority',
+        'custom_stopping_strings',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
+
+        # Default tab (ui_default.py)
+        'prompt_menu-default',
+
+        # Notebook tab (ui_notebook.py)
+        'prompt_menu-notebook',
+
+        # Session tab (ui_session.py)
+        'show_controls',
+        'theme_state',
+        'paste_to_attachment'
+    ]
+
+    for element_name in change_elements:
+        if element_name in shared.gradio:
+            shared.gradio[element_name].change(
+                gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+                store_current_state_and_debounce, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), None, show_progress=False)
+
+
 def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class, interactive=True):
    """
    Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@ -18,23 +18,23 @@ def create_ui():
    mu = shared.args.multi_user

    shared.gradio['Chat input'] = gr.State()
-    shared.gradio['history'] = gr.JSON(visible=False)
+    shared.gradio['history'] = gr.State({'internal': [], 'visible': [], 'metadata': {}})

    with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
        with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
            with gr.Column():
                with gr.Row(elem_id='past-chats-buttons'):
-                    shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes='refresh-button', elem_id='Branch', interactive=not mu)
+                    shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes=['refresh-button', 'refresh-button-medium'], elem_id='Branch', interactive=not mu)
+                    shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes=['refresh-button', 'refresh-button-medium'], interactive=not mu)
+                    shared.gradio['delete_chat'] = gr.Button('🗑️', visible=False, elem_classes='refresh-button', interactive=not mu, elem_id='delete_chat')
+                    shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'refresh-button-medium', 'focus-on-chat-input'])
                    shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id="Branch-index", interactive=True)
-                    shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)
-                    shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
-                    shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input'])

                shared.gradio['search_chat'] = gr.Textbox(placeholder='Search chats...', max_lines=1, elem_id='search_chat')

                with gr.Row(elem_id='delete-chat-row', visible=False) as shared.gradio['delete-chat-row']:
-                    shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'])
-                    shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'])
+                    shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-cancel')
+                    shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-confirm')

                with gr.Row(elem_id='rename-row', visible=False) as shared.gradio['rename-row']:
                    shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', elem_classes=['no-background'])
@ -55,7 +55,6 @@ def create_ui():

                    with gr.Column(scale=10, elem_id='chat-input-container'):
                        shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
-                        shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
                        shared.gradio['typing-dots'] = gr.HTML(value='<div class="typing"><span></span><span class="dot1"></span><span class="dot2"></span></div>', label='typing', elem_id='typing-container')

                    with gr.Column(scale=1, elem_id='generate-stop-container'):
@ -65,21 +64,15 @@ def create_ui():

        # Hover menu buttons
        with gr.Column(elem_id='chat-buttons'):
-            with gr.Row():
-                shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
-                shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
-                shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
-
-            with gr.Row():
-                shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
-
-            with gr.Row():
-                shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
-                shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
-
-            with gr.Row():
-                shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
-                shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
+            shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
+            shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
+            shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
+            shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
+            shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
+            shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
+            shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
+            shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
+            shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')

        with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
            with gr.Column():
@ -87,13 +80,13 @@ def create_ui():
                    shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])

                with gr.Row():
-                    shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search')
+                    shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search')

                with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:
                    shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)

                with gr.Row():
-                    shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
+                    shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')

                with gr.Row():
                    shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
@ -125,14 +118,15 @@ def create_chat_settings_ui():
            with gr.Column(scale=8):
                with gr.Tab("Character"):
                    with gr.Row():
-                        shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
+                        shared.gradio['character_menu'] = gr.Dropdown(value=shared.settings['character'], choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
                        ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
                        shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
                        shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
+                        shared.gradio['restore_character'] = gr.Button('Restore character', elem_classes='refresh-button', interactive=True, elem_id='restore-character')

-                    shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
-                    shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
-                    shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
+                    shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name')
+                    shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar'])
+                    shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label='Greeting', elem_classes=['add_scrollbar'])

                with gr.Tab("User"):
                    shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
@ -185,7 +179,7 @@ def create_chat_settings_ui():
        with gr.Row():
            with gr.Column():
                shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])
-                shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
+                shared.gradio['instruction_template_str'] = gr.Textbox(value=shared.settings['instruction_template_str'], label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
                with gr.Row():
                    shared.gradio['send_instruction_to_default'] = gr.Button('Send to default', elem_classes=['small-button'])
                    shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])
@ -202,7 +196,7 @@ def create_event_handlers():
    shared.reload_inputs = gradio(reload_arr)

    # Morph HTML updates instead of updating everything
-    shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data.html)")
+    shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data)")

    shared.gradio['Generate'].click(
        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@ -267,11 +261,9 @@ def create_event_handlers():
        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
        chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)

-    shared.gradio['delete_chat'].click(lambda: gr.update(visible=True), None, gradio('delete-chat-row'))
-    shared.gradio['delete_chat-cancel'].click(lambda: gr.update(visible=False), None, gradio('delete-chat-row'))
    shared.gradio['delete_chat-confirm'].click(
        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id', 'delete-chat-row'), show_progress=False)
+        chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)

    shared.gradio['branch_chat'].click(
        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@ -301,10 +293,12 @@ def create_event_handlers():
        chat.handle_character_menu_change, gradio('interface_state'), gradio('history', 'display', 'name1', 'name2', 'character_picture', 'greeting', 'context', 'unique_id'), show_progress=False).then(
        None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')

+    shared.gradio['character_picture'].change(chat.handle_character_picture_change, gradio('character_picture'), None, show_progress=False)
+
    shared.gradio['mode'].change(
        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
        chat.handle_mode_change, gradio('interface_state'), gradio('history', 'display', 'chat_style', 'chat-instruct_command', 'unique_id'), show_progress=False).then(
-        None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
+        None, gradio('mode'), None, js="(mode) => {const characterContainer = document.getElementById('character-menu').parentNode.parentNode; const isInChatTab = document.querySelector('#chat-controls').contains(characterContainer); if (isInChatTab) { characterContainer.style.display = mode === 'instruct' ? 'none' : ''; }}")

    shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)

@ -324,6 +318,10 @@ def create_event_handlers():
        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
        chat.handle_save_template_click, gradio('instruction_template_str'), gradio('save_filename', 'save_root', 'save_contents', 'file_saver'), show_progress=False)

+    shared.gradio['restore_character'].click(
+        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+        chat.restore_character_for_ui, gradio('interface_state'), gradio('interface_state', 'name2', 'context', 'greeting', 'character_picture'), show_progress=False)
+
    shared.gradio['delete_template'].click(chat.handle_delete_template_click, gradio('instruction_template'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
    shared.gradio['save_chat_history'].click(
        lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then(
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@ -19,7 +19,7 @@ def create_ui():
        with gr.Row():
            with gr.Column():
                with gr.Row():
-                    shared.gradio['textbox-default'] = gr.Textbox(value='', lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
+                    shared.gradio['textbox-default'] = gr.Textbox(value=load_prompt(shared.settings['prompt-default']), lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
                    shared.gradio['token-counter-default'] = gr.HTML(value="<span>0</span>", elem_id="default-token-counter")

                with gr.Row():
@ -28,7 +28,7 @@ def create_ui():
                    shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary')

                with gr.Row():
-                    shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
+                    shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-default'], label='Prompt', elem_classes='slim-dropdown')
                    ui.create_refresh_button(shared.gradio['prompt_menu-default'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, 'refresh-button', interactive=not mu)
                    shared.gradio['save_prompt-default'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
                    shared.gradio['delete_prompt-default'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@ -1,4 +1,6 @@
 import importlib
+import queue
+import threading
 import traceback
 from functools import partial
 from pathlib import Path
@ -205,48 +207,51 @@ def load_lora_wrapper(selected_loras):


 def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
+    downloader_module = importlib.import_module("download-model")
+    downloader = downloader_module.ModelDownloader()
+    update_queue = queue.Queue()
+
    try:
        # Handle direct GGUF URLs
        if repo_id.startswith("https://") and ("huggingface.co" in repo_id) and (repo_id.endswith(".gguf") or repo_id.endswith(".gguf?download=true")):
            try:
                path = repo_id.split("huggingface.co/")[1]
-
-                # Extract the repository ID (first two parts of the path)
                parts = path.split("/")
                if len(parts) >= 2:
                    extracted_repo_id = f"{parts[0]}/{parts[1]}"
-
-                    # Extract the filename (last part of the path)
-                    filename = repo_id.split("/")[-1]
-                    if "?download=true" in filename:
-                        filename = filename.replace("?download=true", "")
-
+                    filename = repo_id.split("/")[-1].replace("?download=true", "")
                    repo_id = extracted_repo_id
                    specific_file = filename
-            except:
-                pass
+            except Exception as e:
+                yield f"Error parsing GGUF URL: {e}"
+                progress(0.0)
+                return

-        if repo_id == "":
-            yield ("Please enter a model path")
+        if not repo_id:
+            yield "Please enter a model path."
+            progress(0.0)
            return

        repo_id = repo_id.strip()
        specific_file = specific_file.strip()
-        downloader = importlib.import_module("download-model").ModelDownloader()

-        progress(0.0)
+        progress(0.0, "Preparing download...")
+
        model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
-
-        yield ("Getting the download links from Hugging Face")
+        yield "Getting download links from Hugging Face..."
        links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)

+        if not links:
+            yield "No files found to download for the given model/criteria."
+            progress(0.0)
+            return
+
        # Check for multiple GGUF files
        gguf_files = [link for link in links if link.lower().endswith('.gguf')]
        if len(gguf_files) > 1 and not specific_file:
            output = "Multiple GGUF files found. Please copy one of the following filenames to the 'File name' field:\n\n```\n"
            for link in gguf_files:
                output += f"{Path(link).name}\n"
-
            output += "```"
            yield output
            return
@ -260,12 +265,9 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
            yield output
            return

-        yield ("Getting the output folder")
+        yield "Determining output folder..."
        output_folder = downloader.get_output_folder(
-            model,
-            branch,
-            is_lora,
-            is_llamacpp=is_llamacpp,
+            model, branch, is_lora, is_llamacpp=is_llamacpp,
            model_dir=shared.args.model_dir if shared.args.model_dir != shared.args_defaults.model_dir else None
        )

@ -275,19 +277,65 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
            output_folder = Path(shared.args.lora_dir)

        if check:
-            progress(0.5)
-
-            yield ("Checking previously downloaded files")
+            yield "Checking previously downloaded files..."
+            progress(0.5, "Verifying files...")
            downloader.check_model_files(model, branch, links, sha256, output_folder)
-            progress(1.0)
-        else:
-            yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
-            downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
+            progress(1.0, "Verification complete.")
+            yield "File check complete."
+            return

-            yield (f"Model successfully saved to `{output_folder}/`.")
-    except:
-        progress(1.0)
-        yield traceback.format_exc().replace('\n', '\n\n')
+        yield ""
+        progress(0.0, "Download starting...")
+
+        def downloader_thread_target():
+            try:
+                downloader.download_model_files(
+                    model, branch, links, sha256, output_folder,
+                    progress_queue=update_queue,
+                    threads=4,
+                    is_llamacpp=is_llamacpp,
+                    specific_file=specific_file
+                )
+                update_queue.put(("COMPLETED", f"Model successfully saved to `{output_folder}/`."))
+            except Exception as e:
+                tb_str = traceback.format_exc().replace('\n', '\n\n')
+                update_queue.put(("ERROR", tb_str))
+
+        download_thread = threading.Thread(target=downloader_thread_target)
+        download_thread.start()
+
+        while True:
+            try:
+                message = update_queue.get(timeout=0.2)
+                if not isinstance(message, tuple) or len(message) != 2:
+                    continue
+
+                msg_identifier, data = message
+
+                if msg_identifier == "COMPLETED":
+                    progress(1.0, "Download complete!")
+                    yield data
+                    break
+                elif msg_identifier == "ERROR":
+                    progress(0.0, "Error occurred")
+                    yield data
+                    break
+                elif isinstance(msg_identifier, float):
+                    progress_value = msg_identifier
+                    description_str = data
+                    progress(progress_value, f"Downloading: {description_str}")
+
+            except queue.Empty:
+                if not download_thread.is_alive():
+                    yield "Download process finished."
+                    break
+
+        download_thread.join()
+
+    except Exception as e:
+        progress(0.0)
+        tb_str = traceback.format_exc().replace('\n', '\n\n')
+        yield tb_str


 def update_truncation_length(current_length, state):
--- a/modules/ui_notebook.py
+++ b/modules/ui_notebook.py
@ -22,7 +22,7 @@ def create_ui():
            with gr.Column(scale=4):
                with gr.Tab('Raw'):
                    with gr.Row():
-                        shared.gradio['textbox-notebook'] = gr.Textbox(value='', lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
+                        shared.gradio['textbox-notebook'] = gr.Textbox(value=load_prompt(shared.settings['prompt-notebook']), lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
                        shared.gradio['token-counter-notebook'] = gr.HTML(value="<span>0</span>", elem_id="notebook-token-counter")

                with gr.Tab('Markdown'):
@ -56,7 +56,7 @@ def create_ui():
            with gr.Column(scale=1):
                gr.HTML('<div style="padding-bottom: 13px"></div>')
                with gr.Row():
-                    shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
+                    shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-notebook'], label='Prompt', elem_classes='slim-dropdown')
                    ui.create_refresh_button(shared.gradio['prompt_menu-notebook'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, ['refresh-button', 'refresh-button-small'], interactive=not mu)
                    shared.gradio['save_prompt-notebook'] = gr.Button('💾', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)
                    shared.gradio['delete_prompt-notebook'] = gr.Button('🗑️', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@ -6,19 +6,19 @@ from modules import loaders, presets, shared, ui, ui_chat, utils
 from modules.utils import gradio


-def create_ui(default_preset):
+def create_ui():
    mu = shared.args.multi_user
-    generate_params = presets.load_preset(default_preset)
    with gr.Tab("Parameters", elem_id="parameters"):
        with gr.Tab("Generation"):
            with gr.Row():
                with gr.Column():
                    with gr.Row():
-                        shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=default_preset, label='Preset', elem_classes='slim-dropdown')
+                        shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=shared.settings['preset'], label='Preset', elem_classes='slim-dropdown')
                        ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
                        shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
                        shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
-                        shared.gradio['random_preset'] = gr.Button('🎲', elem_classes='refresh-button')
+                        shared.gradio['reset_preset'] = gr.Button('Restore preset', elem_classes='refresh-button', interactive=True)
+                        shared.gradio['neutralize_samplers'] = gr.Button('Neutralize samplers', elem_classes='refresh-button', interactive=True)

                with gr.Column():
                    shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()) if not shared.args.portable else ['llama.cpp'], value="All", elem_classes='slim-dropdown')
@ -28,57 +28,60 @@ def create_ui(default_preset):
                    with gr.Row():
                        with gr.Column():
                            gr.Markdown('## Curve shape')
-                            shared.gradio['temperature'] = gr.Slider(0.01, 5, value=generate_params['temperature'], step=0.01, label='temperature')
-                            shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature'])
-                            shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature'])
-                            shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature'])
-                            shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
-                            shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=generate_params['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
+                            shared.gradio['temperature'] = gr.Slider(0.01, 5, value=shared.settings['temperature'], step=0.01, label='temperature')
+                            shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_low'], step=0.01, label='dynatemp_low', visible=shared.settings['dynamic_temperature'])
+                            shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_high'], step=0.01, label='dynatemp_high', visible=shared.settings['dynamic_temperature'])
+                            shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=shared.settings['dynamic_temperature'])
+                            shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=shared.settings['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
+                            shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=shared.settings['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
+                            shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label='dynamic_temperature')

                            gr.Markdown('## Curve cutoff')
-                            shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p')
-                            shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=generate_params['top_n_sigma'], step=0.01, label='top_n_sigma')
-                            shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p')
-                            shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k')
-                            shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p')
-                            shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=generate_params['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
-                            shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=generate_params['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
-                            shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
-                            shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff')
-                            shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs')
-                            shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a')
+                            shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=shared.settings['min_p'], step=0.01, label='min_p')
+                            shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=shared.settings['top_n_sigma'], step=0.01, label='top_n_sigma')
+                            shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=shared.settings['top_p'], step=0.01, label='top_p')
+                            shared.gradio['top_k'] = gr.Slider(0, 200, value=shared.settings['top_k'], step=1, label='top_k')
+                            shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=shared.settings['typical_p'], step=0.01, label='typical_p')
+                            shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=shared.settings['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
+                            shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=shared.settings['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
+                            shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=shared.settings['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
+                            shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=shared.settings['eta_cutoff'], step=0.01, label='eta_cutoff')
+                            shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=shared.settings['tfs'], step=0.01, label='tfs')
+                            shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=shared.settings['top_a'], step=0.01, label='top_a')

                            gr.Markdown('## Repetition suppression')
-                            shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
-                            shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=generate_params['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
-                            shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
-                            shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty')
-                            shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=generate_params['frequency_penalty'], step=0.05, label='frequency_penalty')
-                            shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty')
-                            shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
-                            shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size')
-                            shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range')
+                            shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=shared.settings['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
+                            shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=shared.settings['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
+                            shared.gradio['dry_base'] = gr.Slider(1, 4, value=shared.settings['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
+                            shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=shared.settings['repetition_penalty'], step=0.01, label='repetition_penalty')
+                            shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=shared.settings['frequency_penalty'], step=0.05, label='frequency_penalty')
+                            shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=shared.settings['presence_penalty'], step=0.05, label='presence_penalty')
+                            shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=shared.settings['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
+                            shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=shared.settings['no_repeat_ngram_size'], label='no_repeat_ngram_size')
+                            shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=shared.settings['repetition_penalty_range'], label='repetition_penalty_range')

                        with gr.Column():
                            gr.Markdown('## Alternative sampling methods')
-                            shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
-                            shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
-                            shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
-                            shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau')
-                            shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta')
+                            shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=shared.settings['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
+                            shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=shared.settings['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
+                            shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=shared.settings['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
+                            shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=shared.settings['mirostat_tau'], label='mirostat_tau')
+                            shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=shared.settings['mirostat_eta'], label='mirostat_eta')

                            gr.Markdown('## Other options')
-                            shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
-                            shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
-                            shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
-                            shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
+                            shared.gradio['do_sample'] = gr.Checkbox(value=shared.settings['do_sample'], label='do_sample')
+                            shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
+                            shared.gradio['sampler_priority'] = gr.Textbox(value=shared.settings['sampler_priority'], lines=10, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
+                            shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')

                with gr.Column():
                    with gr.Row():
                        with gr.Column():
-                            shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
-                            shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature')
-                            shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
+                            with gr.Blocks():
+                                shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
+                                shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
+                                shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
+
                            shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
                            shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
                            shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
@ -91,18 +94,16 @@ def create_ui(default_preset):
                            shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length.')
                            shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')

-                            shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
                            shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
                            shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
                            shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])
-                            shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
                            with gr.Row() as shared.gradio['grammar_file_row']:
                                shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')
                                ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)
                                shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
                                shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)

-                            shared.gradio['grammar_string'] = gr.Textbox(value='', label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
+                            shared.gradio['grammar_string'] = gr.Textbox(value=shared.settings['grammar_string'], label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])

        ui_chat.create_chat_settings_ui()

@ -113,9 +114,13 @@ def create_event_handlers():
        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
        presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)

-    shared.gradio['random_preset'].click(
+    shared.gradio['reset_preset'].click(
        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-        presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
+        presets.reset_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
+
+    shared.gradio['neutralize_samplers'].click(
+        ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+        presets.neutralize_samplers_for_ui, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)

    shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'), show_progress=False)
    shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'), show_progress=False)
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@ -1,7 +1,6 @@
 import gradio as gr

 from modules import shared, ui, utils
-from modules.github import clone_or_pull_repository
 from modules.utils import gradio


@ -10,11 +9,14 @@ def create_ui():
    with gr.Tab("Session", elem_id="session-tab"):
        with gr.Row():
            with gr.Column():
-                shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
-                with gr.Row():
-                    shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡')
-                    shared.gradio['save_settings'] = gr.Button('Save UI defaults to user_data/settings.yaml', interactive=not mu)
+                gr.Markdown("## Settings")
+                shared.gradio['save_settings'] = gr.Button('Save settings to user_data/settings.yaml', elem_classes='refresh-button', interactive=not mu)
+                shared.gradio['toggle_dark_mode'] = gr.Button('Toggle light/dark theme 💡', elem_classes='refresh-button')
+                shared.gradio['paste_to_attachment'] = gr.Checkbox(label='Turn long pasted text into attachments in the Chat tab', value=shared.settings['paste_to_attachment'], elem_id='paste_to_attachment')

+            with gr.Column():
+                gr.Markdown("## Extensions & flags")
+                shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
                with gr.Row():
                    with gr.Column():
                        shared.gradio['extensions_menu'] = gr.CheckboxGroup(choices=utils.get_available_extensions(), value=shared.args.extensions, label="Available extensions", info='Note that some of these extensions may require manually installing Python requirements through the command: pip install -r extensions/extension_name/requirements.txt', elem_classes='checkboxgroup-table')
@ -22,30 +24,20 @@ def create_ui():
                    with gr.Column():
                        shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=get_boolean_arguments(), value=get_boolean_arguments(active=True), label="Boolean command-line flags", elem_classes='checkboxgroup-table')

-            with gr.Column():
-                if not shared.args.portable:
-                    extension_name = gr.Textbox(lines=1, label='Install or update an extension', info='Enter the GitHub URL below and press Enter. For a list of extensions, see: https://github.com/oobabooga/text-generation-webui-extensions ⚠️  WARNING ⚠️ : extensions can execute arbitrary code. Make sure to inspect their source code before activating them.', interactive=not mu)
-                    extension_status = gr.Markdown()
-                else:
-                    pass
-
        shared.gradio['theme_state'] = gr.Textbox(visible=False, value='dark' if shared.settings['dark_theme'] else 'light')
-        if not shared.args.portable:
-            extension_name.submit(clone_or_pull_repository, extension_name, extension_status, show_progress=False)
+        shared.gradio['save_settings'].click(
+            ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+            handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
+
+        shared.gradio['toggle_dark_mode'].click(
+            lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
+            None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode(); localStorage.setItem("theme", document.body.classList.contains("dark") ? "dark" : "light")}}')

        # Reset interface event
        shared.gradio['reset_interface'].click(
            set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then(
            None, None, None, js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')

-        shared.gradio['toggle_dark_mode'].click(
-            lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
-            None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode()}}')
-
-        shared.gradio['save_settings'].click(
-            ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
-            handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
-

 def handle_save_settings(state, preset, extensions, show_controls, theme):
    contents = ui.save_settings(state, preset, extensions, show_controls, theme)
--- a/modules/utils.py
+++ b/modules/utils.py
@ -3,7 +3,7 @@ import re
 from datetime import datetime
 from pathlib import Path

-from modules import github, shared
+from modules import shared
 from modules.logging_colors import logger


@ -182,7 +182,6 @@ def get_available_instruction_templates():

 def get_available_extensions():
    extensions = sorted(set(map(lambda x: x.parts[1], Path('extensions').glob('*/script.py'))), key=natural_keys)
-    extensions = [v for v in extensions if v not in github.new_extensions]
    return extensions


--- a/modules/web_search.py
+++ b/modules/web_search.py
@ -3,8 +3,6 @@ from concurrent.futures import as_completed
 from datetime import datetime

 import requests
-from bs4 import BeautifulSoup
-from duckduckgo_search import DDGS

 from modules.logging_colors import logger

@ -14,35 +12,39 @@ def get_current_timestamp():
    return datetime.now().strftime('%b %d, %Y %H:%M')


-def download_web_page(url, timeout=5):
-    """Download and extract text from a web page"""
+def download_web_page(url, timeout=10):
+    """
+    Download a web page and convert its HTML content to structured Markdown text.
+    """
+    import html2text
+
    try:
        headers = {
            'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
        }
        response = requests.get(url, headers=headers, timeout=timeout)
-        response.raise_for_status()
+        response.raise_for_status()  # Raise an exception for bad status codes

-        soup = BeautifulSoup(response.content, 'html.parser')
+        # Initialize the HTML to Markdown converter
+        h = html2text.HTML2Text()
+        h.body_width = 0

-        # Remove script and style elements
-        for script in soup(["script", "style"]):
-            script.decompose()
+        # Convert the HTML to Markdown
+        markdown_text = h.handle(response.text)

-        # Get text and clean it up
-        text = soup.get_text()
-        lines = (line.strip() for line in text.splitlines())
-        chunks = (phrase.strip() for line in lines for phrase in line.split("  "))
-        text = ' '.join(chunk for chunk in chunks if chunk)
-
-        return text
-    except Exception as e:
+        return markdown_text
+    except requests.exceptions.RequestException as e:
        logger.error(f"Error downloading {url}: {e}")
-        return f"[Error downloading content from {url}: {str(e)}]"
+        return ""
+    except Exception as e:
+        logger.error(f"An unexpected error occurred: {e}")
+        return ""


 def perform_web_search(query, num_pages=3, max_workers=5):
    """Perform web search and return results with content"""
+    from duckduckgo_search import DDGS
+
    try:
        with DDGS() as ddgs:
            results = list(ddgs.text(query, max_results=num_pages))
@ -74,9 +76,7 @@ def perform_web_search(query, num_pages=3, max_workers=5):
                        'url': url,
                        'content': content
                    }
-                except Exception as e:
-                    logger.error(f"Error downloading {url}: {e}")
-                    # Include failed downloads with empty content
+                except Exception:
                    search_results[index] = {
                        'title': title,
                        'url': url,
@ -107,6 +107,13 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
            logger.warning("No search results found")
            return

+        # Filter out failed downloads before adding attachments
+        successful_results = [result for result in search_results if result['content'].strip()]
+
+        if not successful_results:
+            logger.warning("No successful downloads to add as attachments")
+            return
+
        # Add search results as attachments
        key = f"user_{row_idx}"
        if key not in history['metadata']:
@ -114,7 +121,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
        if "attachments" not in history['metadata'][key]:
            history['metadata'][key]["attachments"] = []

-        for result in search_results:
+        for result in successful_results:
            attachment = {
                "name": result['title'],
                "type": "text/html",
@ -123,7 +130,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
            }
            history['metadata'][key]["attachments"].append(attachment)

-        logger.info(f"Added {len(search_results)} web search results as attachments")
+        logger.info(f"Added {len(successful_results)} successful web search results as attachments.")

    except Exception as e:
        logger.error(f"Error in web search: {e}")
--- a/one_click.py
+++ b/one_click.py
@ -17,8 +17,6 @@ import sys

 # Define the required versions
 TORCH_VERSION = "2.6.0"
-TORCHVISION_VERSION = "0.21.0"
-TORCHAUDIO_VERSION = "2.6.0"
 PYTHON_VERSION = "3.11"
 LIBSTDCXX_VERSION_LINUX = "12.1.0"

@ -70,12 +68,8 @@ def is_installed():
 def cpu_has_avx2():
    try:
        import cpuinfo
-
        info = cpuinfo.get_cpu_info()
-        if 'avx2' in info['flags']:
-            return True
-        else:
-            return False
+        return 'avx2' in info['flags']
    except:
        return True

@ -83,30 +77,119 @@ def cpu_has_avx2():
 def cpu_has_amx():
    try:
        import cpuinfo
-
        info = cpuinfo.get_cpu_info()
-        if 'amx' in info['flags']:
-            return True
-        else:
-            return False
+        return 'amx' in info['flags']
    except:
        return True


-def torch_version():
-    site_packages_path = None
-    for sitedir in site.getsitepackages():
-        if "site-packages" in sitedir and conda_env_path in sitedir:
-            site_packages_path = sitedir
-            break
+def load_state():
+    """Load installer state from JSON file"""
+    if os.path.exists(state_file):
+        try:
+            with open(state_file, 'r') as f:
+                return json.load(f)
+        except:
+            return {}
+    return {}

-    if site_packages_path:
-        torch_version_file = open(os.path.join(site_packages_path, 'torch', 'version.py')).read().splitlines()
-        torver = [line for line in torch_version_file if line.startswith('__version__')][0].split('__version__ = ')[1].strip("'")
+
+def save_state(state):
+    """Save installer state to JSON file"""
+    with open(state_file, 'w') as f:
+        json.dump(state, f)
+
+
+def get_gpu_choice():
+    """Get GPU choice from state file or ask user"""
+    state = load_state()
+    gpu_choice = state.get('gpu_choice')
+
+    if not gpu_choice:
+        if "GPU_CHOICE" in os.environ:
+            choice = os.environ["GPU_CHOICE"].upper()
+            print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
+        else:
+            choice = get_user_choice(
+                "What is your GPU?",
+                {
+                    'A': 'NVIDIA - CUDA 12.4',
+                    'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
+                    'C': 'Apple M Series',
+                    'D': 'Intel Arc (beta)',
+                    'E': 'NVIDIA - CUDA 12.8',
+                    'N': 'CPU mode'
+                },
+            )
+
+        # Convert choice to GPU name
+        gpu_choice = {"A": "NVIDIA", "B": "AMD", "C": "APPLE", "D": "INTEL", "E": "NVIDIA_CUDA128", "N": "NONE"}[choice]
+
+        # Save choice to state
+        state['gpu_choice'] = gpu_choice
+        save_state(state)
+
+    return gpu_choice
+
+
+def get_pytorch_install_command(gpu_choice):
+    """Get PyTorch installation command based on GPU choice"""
+    base_cmd = f"python -m pip install torch=={TORCH_VERSION} "
+
+    if gpu_choice == "NVIDIA":
+        return base_cmd + "--index-url https://download.pytorch.org/whl/cu124"
+    elif gpu_choice == "NVIDIA_CUDA128":
+        return "python -m pip install torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
+    elif gpu_choice == "AMD":
+        return base_cmd + "--index-url https://download.pytorch.org/whl/rocm6.2.4"
+    elif gpu_choice in ["APPLE", "NONE"]:
+        return base_cmd + "--index-url https://download.pytorch.org/whl/cpu"
+    elif gpu_choice == "INTEL":
+        if is_linux():
+            return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+        else:
+            return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
    else:
-        from torch import __version__ as torver
+        return base_cmd

-    return torver
+
+def get_pytorch_update_command(gpu_choice):
+    """Get PyTorch update command based on GPU choice"""
+    base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} "
+
+    if gpu_choice == "NVIDIA":
+        return f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
+    elif gpu_choice == "NVIDIA_CUDA128":
+        return "python -m pip install --upgrade torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
+    elif gpu_choice == "AMD":
+        return f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
+    elif gpu_choice in ["APPLE", "NONE"]:
+        return f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
+    elif gpu_choice == "INTEL":
+        intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
+        return f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+    else:
+        return base_cmd
+
+
+def get_requirements_file(gpu_choice):
+    """Get requirements file path based on GPU choice"""
+    requirements_base = os.path.join("requirements", "full")
+
+    if gpu_choice == "AMD":
+        file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    elif gpu_choice == "APPLE":
+        file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt"
+    elif gpu_choice in ["INTEL", "NONE"]:
+        file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    elif gpu_choice == "NVIDIA":
+        file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    elif gpu_choice == "NVIDIA_CUDA128":
+        file_name = f"requirements_cuda128{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+    else:
+        raise ValueError(f"Unknown GPU choice: {gpu_choice}")
+
+    return os.path.join(requirements_base, file_name)


 def get_current_commit():
@ -209,28 +292,8 @@ def get_user_choice(question, options_dict):

 def update_pytorch_and_python():
    print_big_message("Checking for PyTorch updates.")
-
-    # Update the Python version. Left here for future reference in case this becomes necessary.
-    # print_big_message("Checking for PyTorch and Python updates.")
-    # current_python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
-    # if current_python_version != PYTHON_VERSION:
-    #     run_cmd(f"conda install -y python={PYTHON_VERSION}", assert_success=True, environment=True)
-
-    torver = torch_version()
-    base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
-
-    if "+cu" in torver:
-        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
-    elif "+rocm" in torver:
-        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
-    elif "+cpu" in torver:
-        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
-    elif "+cxx11" in torver:
-        intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
-        install_cmd = f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
-    else:
-        install_cmd = base_cmd
-
+    gpu_choice = get_gpu_choice()
+    install_cmd = get_pytorch_update_command(gpu_choice)
    run_cmd(install_cmd, assert_success=True, environment=True)


@ -256,43 +319,11 @@ def install_webui():
    if os.path.isfile(state_file):
        os.remove(state_file)

-    # Ask the user for the GPU vendor
-    if "GPU_CHOICE" in os.environ:
-        choice = os.environ["GPU_CHOICE"].upper()
-        print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
-
-        # Warn about changed meanings and handle old choices
-        if choice == "B":
-            print_big_message("Warning: GPU_CHOICE='B' now means 'AMD' in the new version.")
-        elif choice == "C":
-            print_big_message("Warning: GPU_CHOICE='C' now means 'Apple M Series' in the new version.")
-        elif choice == "D":
-            print_big_message("Warning: GPU_CHOICE='D' now means 'Intel Arc' in the new version.")
-    else:
-        choice = get_user_choice(
-            "What is your GPU?",
-            {
-                'A': 'NVIDIA - CUDA 12.4',
-                'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
-                'C': 'Apple M Series',
-                'D': 'Intel Arc (beta)',
-                'N': 'CPU mode'
-            },
-        )
-
-    # Convert choices to GPU names for compatibility
-    gpu_choice_to_name = {
-        "A": "NVIDIA",
-        "B": "AMD",
-        "C": "APPLE",
-        "D": "INTEL",
-        "N": "NONE"
-    }
-
-    selected_gpu = gpu_choice_to_name[choice]
+    # Get GPU choice and save it to state
+    gpu_choice = get_gpu_choice()

    # Write a flag to CMD_FLAGS.txt for CPU mode
-    if selected_gpu == "NONE":
+    if gpu_choice == "NONE":
        cmd_flags_path = os.path.join(script_dir, "user_data", "CMD_FLAGS.txt")
        with open(cmd_flags_path, 'r+') as cmd_flags_file:
            if "--cpu" not in cmd_flags_file.read():
@ -300,34 +331,22 @@ def install_webui():
                cmd_flags_file.write("\n--cpu\n")

    # Handle CUDA version display
-    elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA":
+    elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA":
        print("CUDA: 12.4")
+    elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA_CUDA128":
+        print("CUDA: 12.8")

    # No PyTorch for AMD on Windows (?)
-    elif is_windows() and selected_gpu == "AMD":
+    elif is_windows() and gpu_choice == "AMD":
        print("PyTorch setup on Windows is not implemented yet. Exiting...")
        sys.exit(1)

-    # Find the Pytorch installation command
-    install_pytorch = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
-
-    if selected_gpu == "NVIDIA":
-        install_pytorch += "--index-url https://download.pytorch.org/whl/cu124"
-    elif selected_gpu == "AMD":
-        install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.2.4"
-    elif selected_gpu in ["APPLE", "NONE"]:
-        install_pytorch += "--index-url https://download.pytorch.org/whl/cpu"
-    elif selected_gpu == "INTEL":
-        if is_linux():
-            install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
-        else:
-            install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
-
    # Install Git and then Pytorch
    print_big_message("Installing PyTorch.")
+    install_pytorch = get_pytorch_install_command(gpu_choice)
    run_cmd(f"conda install -y ninja git && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True)

-    if selected_gpu == "INTEL":
+    if gpu_choice == "INTEL":
        # Install oneAPI dependencies via conda
        print_big_message("Installing Intel oneAPI runtime libraries.")
        run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0", environment=True)
@ -349,31 +368,15 @@ def update_requirements(initial_installation=False, pull=True):
            assert_success=True
        )

-    torver = torch_version()
-    requirements_base = os.path.join("requirements", "full")
-
-    if "+rocm" in torver:
-        file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt"
-    elif "+cpu" in torver or "+cxx11" in torver:
-        file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
-    elif is_macos():
-        file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt"
-    else:
-        file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
-
-    requirements_file = os.path.join(requirements_base, file_name)
-
-    # Load state from JSON file
    current_commit = get_current_commit()
-    wheels_changed = False
-    if os.path.exists(state_file):
-        with open(state_file, 'r') as f:
-            last_state = json.load(f)
-
-        if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit:
+    wheels_changed = not os.path.exists(state_file)
+    if not wheels_changed:
+        state = load_state()
+        if 'wheels_changed' in state or state.get('last_installed_commit') != current_commit:
            wheels_changed = True
-    else:
-        wheels_changed = True
+
+    gpu_choice = get_gpu_choice()
+    requirements_file = get_requirements_file(gpu_choice)

    if pull:
        # Read .whl lines before pulling
@ -409,19 +412,17 @@ def update_requirements(initial_installation=False, pull=True):
                print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.")

                # Save state before exiting
-                current_state = {}
+                state = load_state()
                if wheels_changed:
-                    current_state['wheels_changed'] = True
-
-                with open(state_file, 'w') as f:
-                    json.dump(current_state, f)
-
+                    state['wheels_changed'] = True
+                save_state(state)
                sys.exit(1)

    # Save current state
-    current_state = {'last_installed_commit': current_commit}
-    with open(state_file, 'w') as f:
-        json.dump(current_state, f)
+    state = load_state()
+    state['last_installed_commit'] = current_commit
+    state.pop('wheels_changed', None)  # Remove wheels_changed flag
+    save_state(state)

    if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
        install_extensions_requirements()
@ -432,11 +433,10 @@ def update_requirements(initial_installation=False, pull=True):
    # Update PyTorch
    if not initial_installation:
        update_pytorch_and_python()
-        torver = torch_version()
        clean_outdated_pytorch_cuda_dependencies()

    print_big_message(f"Installing webui requirements from file: {requirements_file}")
-    print(f"TORCH: {torver}\n")
+    print(f"GPU Choice: {gpu_choice}\n")

    # Prepare the requirements file
    textgen_requirements = open(requirements_file).read().splitlines()
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@ -1,5 +1,4 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 bitsandbytes==0.45.*
 colorama
 datasets
@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@ -16,6 +16,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -33,12 +34,12 @@ sse-starlette==1.6.5
 tiktoken

 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -32,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken

 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -32,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken

 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -32,7 +33,7 @@ sse-starlette==1.6.5
 tiktoken

 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -32,8 +33,8 @@ sse-starlette==1.6.5
 tiktoken

 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -32,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken

 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -32,5 +33,5 @@ sse-starlette==1.6.5
 tiktoken

 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
--- a/requirements/full/requirements_cuda128.txt
+++ b/requirements/full/requirements_cuda128.txt
@ -0,0 +1,45 @@
+accelerate==1.5.*
+bitsandbytes==0.45.*
+colorama
+datasets
+duckduckgo_search==8.0.2
+einops
+fastapi==0.112.4
+gradio==4.37.*
+html2text==2025.4.15
+jinja2==3.1.6
+markdown
+numpy==2.2.*
+pandas
+peft==0.15.*
+Pillow>=9.5.0
+psutil
+pydantic==2.8.2
+PyPDF2==3.0.1
+python-docx==1.1.2
+pyyaml
+requests
+rich
+safetensors==0.5.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.50.*
+tqdm
+wandb
+
+# API
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
+# CUDA wheels
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
--- a/requirements/full/requirements_cuda128_noavx2.txt
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@ -0,0 +1,45 @@
+accelerate==1.5.*
+bitsandbytes==0.45.*
+colorama
+datasets
+duckduckgo_search==8.0.2
+einops
+fastapi==0.112.4
+gradio==4.37.*
+html2text==2025.4.15
+jinja2==3.1.6
+markdown
+numpy==2.2.*
+pandas
+peft==0.15.*
+Pillow>=9.5.0
+psutil
+pydantic==2.8.2
+PyPDF2==3.0.1
+python-docx==1.1.2
+pyyaml
+requests
+rich
+safetensors==0.5.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.50.*
+tqdm
+wandb
+
+# API
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
+# CUDA wheels
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@ -1,5 +1,4 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 bitsandbytes==0.45.*
 colorama
 datasets
@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@ -16,6 +16,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -33,12 +34,12 @@ sse-starlette==1.6.5
 tiktoken

 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@ -1,11 +1,11 @@
 accelerate==1.5.*
-beautifulsoup4==4.13.4
 colorama
 datasets
 duckduckgo_search==8.0.2
 einops
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
 psutil
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken

 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken

 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -18,6 +19,6 @@ sse-starlette==1.6.5
 tiktoken

 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken

 # llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken

 # llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken

 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken

 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
 duckduckgo_search==8.0.2
 fastapi==0.112.4
 gradio==4.37.*
+html2text==2025.4.15
 jinja2==3.1.6
 markdown
 numpy==1.26.*
 pydantic==2.8.2
 PyPDF2==3.0.1
+python-docx==1.1.2
 pyyaml
 requests
 rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
 tiktoken

 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
--- a/server.py
+++ b/server.py
@ -1,12 +1,24 @@
 import os
+import shutil
 import warnings
+from pathlib import Path

 from modules import shared
 from modules.block_requests import OpenMonkeyPatch, RequestBlocker
 from modules.logging_colors import logger

-os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
-os.environ['BITSANDBYTES_NOWELCOME'] = '1'
+# Set up Gradio temp directory path
+gradio_temp_path = Path('user_data') / 'cache' / 'gradio'
+shutil.rmtree(gradio_temp_path, ignore_errors=True)
+gradio_temp_path.mkdir(parents=True, exist_ok=True)
+
+# Set environment variables
+os.environ.update({
+    'GRADIO_ANALYTICS_ENABLED': 'False',
+    'BITSANDBYTES_NOWELCOME': '1',
+    'GRADIO_TEMP_DIR': str(gradio_temp_path)
+})
+
 warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
 warnings.filterwarnings('ignore', category=UserWarning, message='Using the update method is deprecated')
 warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_name" has conflict')
@ -27,7 +39,6 @@ import signal
 import sys
 import time
 from functools import partial
-from pathlib import Path
 from threading import Lock, Thread

 import yaml
@ -45,6 +56,7 @@ from modules import (
    ui_session,
    utils
 )
+from modules.chat import generate_pfp_cache
 from modules.extensions import apply_extensions
 from modules.LoRA import add_lora_to_model
 from modules.models import load_model, unload_model_if_idle
@ -60,6 +72,14 @@ from modules.utils import gradio

 def signal_handler(sig, frame):
    logger.info("Received Ctrl+C. Shutting down Text generation web UI gracefully.")
+
+    # Explicitly stop LlamaServer to avoid __del__ cleanup issues during shutdown
+    if shared.model and shared.model.__class__.__name__ == 'LlamaServer':
+        try:
+            shared.model.stop()
+        except:
+            pass
+
    sys.exit(0)


@ -85,17 +105,20 @@ def create_interface():

    # Force some events to be triggered on page load
    shared.persistent_interface_state.update({
+        'mode': shared.settings['mode'],
        'loader': shared.args.loader or 'llama.cpp',
-        'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(),
-        'character_menu': shared.args.character or shared.settings['character'],
-        'instruction_template_str': shared.settings['instruction_template_str'],
-        'prompt_menu-default': shared.settings['prompt-default'],
-        'prompt_menu-notebook': shared.settings['prompt-notebook'],
        'filter_by_loader': (shared.args.loader or 'All') if not shared.args.portable else 'llama.cpp'
    })

-    if Path("user_data/cache/pfp_character.png").exists():
-        Path("user_data/cache/pfp_character.png").unlink()
+    # Clear existing cache files
+    for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
+        cache_path = Path(f"user_data/cache/{cache_file}")
+        if cache_path.exists():
+            cache_path.unlink()
+
+    # Regenerate for default character
+    if shared.settings['mode'] != 'instruct':
+        generate_pfp_cache(shared.settings['character'])

    # css/js strings
    css = ui.css
@ -126,7 +149,7 @@ def create_interface():
        ui_default.create_ui()
        ui_notebook.create_ui()

-        ui_parameters.create_ui(shared.settings['preset'])  # Parameters tab
+        ui_parameters.create_ui()  # Parameters tab
        ui_model_menu.create_ui()  # Model tab
        if not shared.args.portable:
            training.create_ui()  # Training tab
@ -142,17 +165,35 @@ def create_interface():
        ui_parameters.create_event_handlers()
        ui_model_menu.create_event_handlers()

+        # UI persistence events
+        ui.setup_auto_save()
+
        # Interface launch events
        shared.gradio['interface'].load(
            None,
            gradio('show_controls'),
            None,
            js=f"""(x) => {{
-                if ({str(shared.settings['dark_theme']).lower()}) {{
-                    document.getElementsByTagName('body')[0].classList.add('dark');
-                }}
-                else {{
-                    document.getElementsByTagName('body')[0].classList.remove('dark');
+                // Check if this is first visit or if localStorage is out of sync
+                const savedTheme = localStorage.getItem('theme');
+                const serverTheme = {str(shared.settings['dark_theme']).lower()} ? 'dark' : 'light';
+
+                // If no saved theme or mismatch with server on first load, use server setting
+                if (!savedTheme || !sessionStorage.getItem('theme_synced')) {{
+                    localStorage.setItem('theme', serverTheme);
+                    sessionStorage.setItem('theme_synced', 'true');
+                    if (serverTheme === 'dark') {{
+                        document.getElementsByTagName('body')[0].classList.add('dark');
+                    }} else {{
+                        document.getElementsByTagName('body')[0].classList.remove('dark');
+                    }}
+                }} else {{
+                    // Use localStorage for subsequent reloads
+                    if (savedTheme === 'dark') {{
+                        document.getElementsByTagName('body')[0].classList.add('dark');
+                    }} else {{
+                        document.getElementsByTagName('body')[0].classList.remove('dark');
+                    }}
                }}
                {js}
                {ui.show_controls_js}
@ -208,13 +249,7 @@ if __name__ == "__main__":
    shared.model_config['.*'] = get_fallback_settings()
    shared.model_config.move_to_end('.*', last=False)  # Move to the beginning

-    # Activate the extensions listed on settings.yaml
    extensions_module.available_extensions = utils.get_available_extensions()
-    for extension in shared.settings['default_extensions']:
-        shared.args.extensions = shared.args.extensions or []
-        if extension not in shared.args.extensions:
-            shared.args.extensions.append(extension)
-
    available_models = utils.get_available_models()

    # Model defined through --model
@ -277,8 +312,8 @@ if __name__ == "__main__":

    if shared.args.nowebui:
        # Start the API in standalone mode
-        shared.args.extensions = [x for x in shared.args.extensions if x != 'gallery']
-        if shared.args.extensions is not None and len(shared.args.extensions) > 0:
+        shared.args.extensions = [x for x in (shared.args.extensions or []) if x != 'gallery']
+        if shared.args.extensions:
            extensions_module.load_extensions()
    else:
        # Launch the web UI
--- a/user_data/presets/Contrastive
+++ b/user_data/presets/Contrastive
@ -1,3 +0,0 @@
-do_sample: false
-top_k: 4
-penalty_alpha: 0.3
--- a/user_data/presets/Null
+++ b/user_data/presets/Null
@ -1 +0,0 @@
-temperature: 1
--- a/user_data/presets/Qwen3
+++ b/user_data/presets/Qwen3
@ -0,0 +1,3 @@
+temperature: 0.7
+top_p: 0.8
+top_k: 20
--- a/user_data/presets/Qwen3
+++ b/user_data/presets/Qwen3
@ -0,0 +1,3 @@
+temperature: 0.6
+top_p: 0.95
+top_k: 20
--- a/user_data/settings-template.yaml
+++ b/user_data/settings-template.yaml
@ -1,77 +0,0 @@
-show_controls: true
-start_with: ''
-mode: instruct
-chat_style: cai-chat
-chat-instruct_command: |-
-  Continue the chat dialogue below. Write a single reply for the character "<|character|>".
-
-  <|prompt|>
-prompt-default: QA
-prompt-notebook: QA
-character: Assistant
-name1: You
-user_bio: ''
-custom_system_message: ''
-preset: min_p
-max_new_tokens: 512
-max_new_tokens_min: 1
-max_new_tokens_max: 4096
-prompt_lookup_num_tokens: 0
-max_tokens_second: 0
-max_updates_second: 12
-auto_max_new_tokens: true
-ban_eos_token: false
-add_bos_token: true
-enable_thinking: true
-skip_special_tokens: true
-stream: true
-static_cache: false
-truncation_length: 8192
-seed: -1
-custom_stopping_strings: ''
-custom_token_bans: ''
-negative_prompt: ''
-dark_theme: true
-default_extensions: []
-instruction_template_str: |-
-  {%- set ns = namespace(found=false) -%}
-  {%- for message in messages -%}
-      {%- if message['role'] == 'system' -%}
-          {%- set ns.found = true -%}
-      {%- endif -%}
-  {%- endfor -%}
-  {%- if not ns.found -%}
-      {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\n\n' -}}
-  {%- endif %}
-  {%- for message in messages %}
-      {%- if message['role'] == 'system' -%}
-          {{- '' + message['content'] + '\n\n' -}}
-      {%- else -%}
-          {%- if message['role'] == 'user' -%}
-              {{-'### Instruction:\n' + message['content'] + '\n\n'-}}
-          {%- else -%}
-              {{-'### Response:\n' + message['content'] + '\n\n' -}}
-          {%- endif -%}
-      {%- endif -%}
-  {%- endfor -%}
-  {%- if add_generation_prompt -%}
-      {{-'### Response:\n'-}}
-  {%- endif -%}
-chat_template_str: |-
-  {%- for message in messages %}
-      {%- if message['role'] == 'system' -%}
-          {%- if message['content'] -%}
-              {{- message['content'] + '\n\n' -}}
-          {%- endif -%}
-          {%- if user_bio -%}
-              {{- user_bio + '\n\n' -}}
-          {%- endif -%}
-      {%- else -%}
-          {%- if message['role'] == 'user' -%}
-              {{- name1 + ': ' + message['content'] + '\n'-}}
-          {%- else -%}
-              {{- name2 + ': ' + message['content'] + '\n' -}}
-          {%- endif -%}
-      {%- endif -%}
-  {%- endfor -%}
-