diff --git a/.github/workflows/build-everything-tgw.yml b/.github/workflows/build-everything-tgw.yml
index 0b65dfd6..40d9db5d 100644
--- a/.github/workflows/build-everything-tgw.yml
+++ b/.github/workflows/build-everything-tgw.yml
@@ -41,13 +41,6 @@ jobs:
       version: ${{ inputs.version }}
       config: 'os:ubuntu-22.04'
 
-  build_release_rocm_windows:
-    name: ROCm Windows
-    uses: ./.github/workflows/build-portable-release-rocm.yml
-    with:
-      version: ${{ inputs.version }}
-      config: 'os:windows-2022'
-
   build_release_rocm_linux:
     name: ROCm Linux
     uses: ./.github/workflows/build-portable-release-rocm.yml
diff --git a/.github/workflows/build-portable-release-ik-cuda.yml b/.github/workflows/build-portable-release-ik-cuda.yml
index a336a1cb..331a7653 100644
--- a/.github/workflows/build-portable-release-ik-cuda.yml
+++ b/.github/workflows/build-portable-release-ik-cuda.yml
@@ -102,8 +102,8 @@ jobs:
             VERSION_CLEAN="${{ inputs.version }}"
             VERSION_CLEAN="${VERSION_CLEAN#v}"
             cd ..
-            cp -r text-generation-webui "text-generation-webui-ik-${VERSION_CLEAN}"
-            cd "text-generation-webui-ik-${VERSION_CLEAN}"
+            cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
+            cd "text-generation-webui-${VERSION_CLEAN}"
 
             # Remove extensions that need additional requirements
             allowed=("character_bias" "gallery" "sd_api_pictures")
@@ -133,10 +133,10 @@ jobs:
             echo "Downloading Python for $PLATFORM..."
             curl -L -o python-build.tar.gz "$PYTHON_URL"
             tar -xzf python-build.tar.gz
-            mv python "text-generation-webui-ik-${VERSION_CLEAN}/portable_env"
+            mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
 
             # 3. Prepare requirements file based on CUDA version
-            cd "text-generation-webui-ik-${VERSION_CLEAN}"
+            cd "text-generation-webui-${VERSION_CLEAN}"
             if [[ "$CUDA_VERSION" == "13.1" ]]; then
                 REQ_FILE="requirements/portable/requirements_ik_cuda131.txt"
             else
@@ -158,11 +158,11 @@ jobs:
             if [[ "$RUNNER_OS" == "Windows" ]]; then
                 ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
                 echo "Creating archive: $ARCHIVE_NAME"
-                powershell -Command "Compress-Archive -Path text-generation-webui-ik-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
+                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
             else
                 ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.tar.gz"
                 echo "Creating archive: $ARCHIVE_NAME"
-                tar czf "$ARCHIVE_NAME" "text-generation-webui-ik-${VERSION_CLEAN}"
+                tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
             fi
 
       - name: Upload files to a GitHub release
diff --git a/.github/workflows/build-portable-release-ik.yml b/.github/workflows/build-portable-release-ik.yml
index 5eaf7c86..bf54eb0e 100644
--- a/.github/workflows/build-portable-release-ik.yml
+++ b/.github/workflows/build-portable-release-ik.yml
@@ -101,8 +101,8 @@ jobs:
             VERSION_CLEAN="${{ inputs.version }}"
             VERSION_CLEAN="${VERSION_CLEAN#v}"
             cd ..
-            cp -r text-generation-webui "text-generation-webui-ik-${VERSION_CLEAN}"
-            cd "text-generation-webui-ik-${VERSION_CLEAN}"
+            cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
+            cd "text-generation-webui-${VERSION_CLEAN}"
 
             # Remove extensions that need additional requirements
             allowed=("character_bias" "gallery" "sd_api_pictures")
@@ -131,10 +131,10 @@ jobs:
             cd ..
             curl -L -o python-build.tar.gz "$PYTHON_URL"
             tar -xzf python-build.tar.gz
-            mv python "text-generation-webui-ik-${VERSION_CLEAN}/portable_env"
+            mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
 
             # 3. Prepare requirements file
-            cd "text-generation-webui-ik-${VERSION_CLEAN}"
+            cd "text-generation-webui-${VERSION_CLEAN}"
             REQ_FILE="requirements/portable/requirements_ik_cpu_only.txt"
             echo "Using requirements file: $REQ_FILE"
 
@@ -153,11 +153,11 @@ jobs:
             if [[ "$RUNNER_OS" == "Windows" ]]; then
                 ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.zip"
                 echo "Creating archive: $ARCHIVE_NAME"
-                powershell -Command "Compress-Archive -Path text-generation-webui-ik-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
+                powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
             else
                 ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.tar.gz"
                 echo "Creating archive: $ARCHIVE_NAME"
-                tar czf "$ARCHIVE_NAME" "text-generation-webui-ik-${VERSION_CLEAN}"
+                tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
             fi
 
       - name: Upload files to a GitHub release
diff --git a/README.md b/README.md
index b168ebdb..ab6cc2e5 100644
--- a/README.md
+++ b/README.md
@@ -24,9 +24,9 @@ A Gradio web UI for running Large Language Models locally. 100% private and offl
 ## Features
 
 - **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set.
-- **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [ik_llama.cpp](https://github.com/ikawrakow/ik_llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting.
+- **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting.
 - **OpenAI/Anthropic-compatible API**: Chat, Completions, and Messages endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI/Anthropic APIs ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)).
-- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file. MCP servers are also supported ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).
+- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file, easy to create and extend ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).
 - **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
 - **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
 - **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)).
diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
index fc20d166..458feafc 100644
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@@ -13,12 +13,21 @@
     line-height: 28px !important;
 }
 
-.dark .chat .message-body :is(p,li),
+.dark .chat .message-body :is(p,li,h1,h2,h3,h4,h5,h6),
 .dark .chat .message-body em:not(:is(h1,h2,h3,h4,h5,h6,b,strong) em),
 .dark .chat .message-body q:not(:is(h1,h2,h3,h4,h5,h6,b,strong) q) {
     color: #d1d5db !important;
 }
 
+.chat .message-body :is(th, td),
+.prose hr {
+    border-color: #40404096 !important;
+}
+
+.dark .chat .message-body :is(th, td),
+.dark .prose hr {
+    border-color: rgb(255 255 255 / 30%) !important;
+}
 
 .chat .message-body :is(p, ul, ol) {
     margin: 1.25em 0 !important;
diff --git a/css/main.css b/css/main.css
index db0b781b..009b7c0a 100644
--- a/css/main.css
+++ b/css/main.css
@@ -22,17 +22,6 @@
     font-style: italic;
 }
 
-/* Hide spin buttons on number inputs (look bad on Windows) */
-input[type="number"]::-webkit-outer-spin-button,
-input[type="number"]::-webkit-inner-spin-button {
-    -webkit-appearance: none;
-    margin: 0;
-}
-
-input[type="number"] {
-    -moz-appearance: textfield;
-}
-
 .padded.svelte-12cmxck {
     padding: 3px 0;
 }
@@ -257,8 +246,8 @@ button {
 
 .pretty_scrollbar::-webkit-scrollbar,
 #image-history-gallery > :nth-child(2)::-webkit-scrollbar {
-    width: 7px;
-    height: 7px;
+    width: 8px;
+    height: 8px;
 }
 
 .pretty_scrollbar::-webkit-scrollbar-track,
@@ -271,7 +260,7 @@ button {
 #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
 #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
     background: var(--neutral-300);
-    border-radius: 9999px;
+    border-radius: 30px;
 }
 
 .dark .pretty_scrollbar::-webkit-scrollbar-thumb,
@@ -279,17 +268,18 @@ button {
 .dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
 .dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
     background: rgb(255 255 255 / 6.25%);
-    border-radius: 9999px;
+    border-radius: 30px;
 }
 
 .pretty_scrollbar::-webkit-resizer,
 #image-history-gallery > :nth-child(2)::-webkit-resizer {
-    background: transparent;
+    background: #d2d2d8;
 }
 
 .dark .pretty_scrollbar::-webkit-resizer,
 .dark #image-history-gallery > :nth-child(2)::-webkit-resizer {
-    background: transparent;
+    background: rgb(255 255 255 / 10%);
+    border-radius: 10px;
 }
 
 .pretty_scrollbar::-webkit-scrollbar-corner,
@@ -446,25 +436,15 @@ audio {
 .dark .message-body h4,
 .dark .message-body h5,
 .dark .message-body h6 {
-    color: #e8e8e8 !important;
+    color: white !important;
 }
 
-.message-body blockquote {
-    border-left-width: 4px;
-    border-left-color: var(--border-color-primary);
-}
-
-.message-body h1,
-.message-body h2,
-.message-body h3,
-.message-body h4,
-.message-body h5,
-.message-body h6 {
-    color: #1a1a1a;
+.dark .message-body blockquote {
+    border-left-color: rgb(255 255 255 / 30%);
 }
 
 .message-body h1 {
-    font-weight: 700;
+    font-weight: 800;
     font-size: 2.25em;
     margin-top: 0;
     margin-bottom: 0.8888889em;
@@ -496,13 +476,13 @@ audio {
 }
 
 .message-body h5 {
-    font-weight: 600;
+    font-weight: normal;
     font-size: 1em;
     margin: 0;
 }
 
 .message-body h6 {
-    font-weight: 600;
+    font-weight: normal;
     font-size: 1em;
     margin: 0;
 }
@@ -610,7 +590,7 @@ audio {
 }
 
 #chat-input textarea::-webkit-scrollbar {
-    width: 7px;
+    width: 8px;
 }
 
 #chat-input textarea::-webkit-scrollbar-track {
@@ -619,7 +599,7 @@ audio {
 
 #chat-input textarea::-webkit-scrollbar-thumb {
     background: var(--neutral-300);
-    border-radius: 9999px;
+    border-radius: 30px;
 }
 
 .dark #chat-input textarea::-webkit-scrollbar-thumb {
@@ -653,10 +633,6 @@ audio {
     background: transparent;
 }
 
-#chat-input .thumbnails {
-    padding-top: 3px;
-}
-
 .chat-input-positioned {
     max-width: 54rem;
     left: 50%;
@@ -759,30 +735,7 @@ audio {
 
 .hover-element {
     position: relative;
-    padding-top: 4px;
-}
-
-#hover-element-button {
-    display: flex;
-    align-items: center;
-    justify-content: center;
-    width: 32px;
-    height: 32px;
-    border-radius: 0.5rem;
-    cursor: pointer;
-    color: gray;
-}
-
-#hover-element-button:hover {
-    background-color: var(--background-fill-secondary);
-}
-
-#hover-element-button svg {
-    color: inherit;
-}
-
-.dark #hover-element-button:hover {
-    background-color: var(--selected-item-color-dark);
+    font-size: 24px;
 }
 
 .hover-menu {
@@ -790,40 +743,27 @@ audio {
     position: absolute;
     bottom: 100%;
     left: 0;
-    background: white;
-    border: 1px solid rgba(0, 0, 0, 0.1);
-    box-shadow: 0 4px 16px rgb(0 0 0 / 12%), 0 1px 3px rgb(0 0 0 / 8%);
-    border-radius: 0.75rem;
+    box-shadow: 0 2px 12px rgb(0 0 0 / 15%);
+    border-radius: 0.5rem;
     z-index: 10000;
     min-width: 330px;
     flex-direction: column;
-    padding: 4px;
-}
-
-.hover-menu::before {
-    content: '';
-    position: absolute;
-    top: 100%;
-    left: 0;
-    width: 100%;
-    height: 8px;
-}
-
-.hover-menu > * {
-    border: none !important;
-    box-shadow: none !important;
+    overflow: hidden;
 }
 
 .hover-menu button {
     width: 100%;
-    background: transparent !important;
-    border: none !important;
-    border-radius: 0.5rem !important;
+    background: white !important;
+    border-radius: 0 !important;
     justify-content: space-between;
     margin: 0 !important;
     height: 36px;
-    font-weight: 500;
-    box-shadow: none !important;
+    border-color: transparent !important;
+    transition: background-color 0.15s ease;
+}
+
+.hover-menu button:not(#clear-history-confirm) {
+    border-bottom: 0 !important;
 }
 
 .hover-menu button:hover {
@@ -835,26 +775,19 @@ audio {
 }
 
 #show-controls {
-    background-color: transparent;
-    border: none !important;
+    background-color: white;
+    border-color: transparent !important;
     height: 36px;
-    border-radius: 0.5rem;
+    border-radius: 0;
+    border-bottom: 0 !important;
     padding-top: 3px;
     padding-left: 4px;
     display: flex;
     font-weight: normal;
 }
 
-#show-controls:hover {
-    background-color: #dbeafe;
-}
-
 .dark #show-controls {
-    background-color: transparent;
-}
-
-.dark #show-controls:hover {
-    background-color: var(--selected-item-color-dark);
+    background-color: var(--darker-gray);
 }
 
 #show-controls label {
@@ -864,12 +797,12 @@ audio {
     width: 100%;
     padding-right: 12px;
     gap: 10px;
-    font-weight: 500;
+    font-weight: 600;
     color: var(--button-secondary-text-color);
 }
 
 #show-controls label input {
-    margin-top: 5px;
+    margin-top: 4px;
 }
 
 .transparent-substring {
@@ -909,7 +842,7 @@ audio {
 }
 
 #chat-input-row {
-    padding: 0.5rem 1rem 1rem;
+    padding: 1rem;
 }
 
 #chat-col {
@@ -1275,14 +1208,9 @@ audio {
     color: #9ca3af;
 }
 
-.dark .hover-menu {
-    background: var(--darker-gray);
-    border-color: transparent;
-    box-shadow: 0 4px 16px rgb(0 0 0 / 40%);
-}
-
 .dark .hover-menu button {
-    background-color: transparent !important;
+    border-color: var(--border-color-primary);
+    background-color: var(--darker-gray) !important;
 }
 
 .dark #chat-controls,
@@ -1444,7 +1372,8 @@ audio {
 }
 
 .footer-button svg {
-    stroke: rgb(140 140 148);
+    stroke: rgb(156 163 175);
+    transition: stroke 0.2s;
 }
 
 .footer-button:hover svg {
@@ -1459,12 +1388,12 @@ audio {
     stroke: rgb(209 213 219);
 }
 
-.block:has(> .label-wrap) {
+.tgw-accordion {
     padding: 10px 12px !important;
     border: 1px solid #d2d2d8;
 }
 
-.dark .block:has(> .label-wrap) {
+.dark .tgw-accordion {
     border: 1px solid var(--border-color-dark);
 }
 
@@ -1974,24 +1903,14 @@ table, tr, td, th, thead {
     border: 0;
 }
 
-.prose hr {
-    border-color: var(--border-color-primary);
-}
-
 td + td,
-th + th {
-    border-left: 1px solid var(--border-color-primary) !important;
-}
+th + th { border-left: 1px solid; }
 
 tr + tr td,
-tr + tr th {
-    border-top: 1px solid var(--border-color-primary) !important;
-}
+tr + tr th { border-top: 1px solid; }
 
 thead + tbody tr:first-child td,
-thead + tbody tr:first-child th {
-    border-top: 1px solid var(--border-color-primary) !important;
-}
+thead + tbody tr:first-child th { border-top: 1px solid; }
 
 /* ------------------------------------------------
    Tools CheckboxGroup - vertical DragDrop-like style
@@ -2023,8 +1942,8 @@ thead + tbody tr:first-child th {
 
 /* Pretty scrollbar for the tools list */
 #tools-group .wrap::-webkit-scrollbar {
-    width: 7px;
-    height: 7px;
+    width: 8px;
+    height: 8px;
 }
 
 #tools-group .wrap::-webkit-scrollbar-track {
@@ -2034,13 +1953,13 @@ thead + tbody tr:first-child th {
 #tools-group .wrap::-webkit-scrollbar-thumb,
 #tools-group .wrap::-webkit-scrollbar-thumb:hover {
     background: var(--neutral-300);
-    border-radius: 9999px;
+    border-radius: 30px;
 }
 
 .dark #tools-group .wrap::-webkit-scrollbar-thumb,
 .dark #tools-group .wrap::-webkit-scrollbar-thumb:hover {
     background: rgb(255 255 255 / 6.25%);
-    border-radius: 9999px;
+    border-radius: 30px;
 }
 
 #tools-group .wrap::-webkit-scrollbar-corner {
diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md
index 727f6ece..0a076c35 100644
--- a/docs/12 - OpenAI API.md	
+++ b/docs/12 - OpenAI API.md	
@@ -232,17 +232,6 @@ curl -k http://127.0.0.1:5000/v1/internal/model/load \
   }'
 ```
 
-You can also set a default instruction template for all subsequent API requests by passing `instruction_template` (a template name from `user_data/instruction-templates/`) or `instruction_template_str` (a raw Jinja2 string):
-
-```shell
-curl -k http://127.0.0.1:5000/v1/internal/model/load \
-  -H "Content-Type: application/json" \
-  -d '{
-    "model_name": "Qwen_Qwen3-0.6B-Q4_K_M.gguf",
-    "instruction_template": "Alpaca"
-  }'
-```
-
 #### Python chat example
 
 ```python
diff --git a/docs/Tool Calling Tutorial.md b/docs/Tool Calling Tutorial.md
index 7d2a86de..d95a9c80 100644
--- a/docs/Tool Calling Tutorial.md	
+++ b/docs/Tool Calling Tutorial.md	
@@ -80,19 +80,6 @@ def execute(arguments):
 
 You can open the built-in tools in `user_data/tools/` for more examples.
 
-## MCP servers
-
-You can connect to remote [MCP (Model Context Protocol)](https://modelcontextprotocol.io/) servers to use their tools alongside local ones.
-
-In the chat sidebar, open the **MCP servers** accordion and enter one server URL per line. For servers that require authentication, append headers after the URL separated by commas:
-
-```
-https://example.com/mcp
-https://other.com/mcp,Authorization: Bearer sk-xxx
-```
-
-All tools from the configured servers are automatically discovered and made available to the model during generation. If an MCP tool has the same name as a selected local tool, the local tool takes priority.
-
 ## Tool calling over the API
 
 Tool calling over the API follows the [OpenAI API](https://platform.openai.com/docs/guides/function-calling) convention. Define your tools, send them with your messages, and handle tool calls in a loop until the model gives a final answer.
diff --git a/js/main.js b/js/main.js
index 918c85c1..cba4c903 100644
--- a/js/main.js
+++ b/js/main.js
@@ -309,19 +309,18 @@ for (let i = 0; i < slimDropdownElements.length; i++) {
 // https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js
 //------------------------------------------------
 var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button, #chat-tab #chat-buttons #show-controls");
-var hoverContainer = document.getElementById("gr-hover-container");
 var button = document.getElementById("hover-element-button");
 var menu = document.getElementById("hover-menu");
 var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement;
 
 function showMenu() {
-  menu.style.display = "flex";
+  menu.style.display = "flex"; // Show the menu
 }
 
 function hideMenu() {
-  menu.style.display = "none";
+  menu.style.display = "none"; // Hide the menu
   if (!istouchscreen) {
-    document.querySelector("#chat-input textarea").focus();
+    document.querySelector("#chat-input textarea").focus(); // Focus on the chat input
   }
 }
 
@@ -330,6 +329,7 @@ if (buttonsInChat.length > 0) {
     const thisButton = buttonsInChat[i];
     menu.appendChild(thisButton);
 
+    // Only apply transformations to button elements
     if (thisButton.tagName.toLowerCase() === "button") {
       thisButton.addEventListener("click", () => {
         hideMenu();
@@ -339,6 +339,7 @@ if (buttonsInChat.length > 0) {
       const matches = buttonText.match(/(\(.*?\))/);
 
       if (matches && matches.length > 1) {
+        // Apply the transparent-substring class to the matched substring
         const substring = matches[1];
         const newText = buttonText.replace(substring, `&nbsp;<span class="transparent-substring">${substring.slice(1, -1)}</span>`);
         thisButton.innerHTML = newText;
@@ -347,19 +348,16 @@ if (buttonsInChat.length > 0) {
   }
 }
 
-var menuInteracting = false;
+function isMouseOverButtonOrMenu() {
+  return menu.matches(":hover") || button.matches(":hover");
+}
 
-hoverContainer.addEventListener("mouseenter", function () {
+button.addEventListener("mouseenter", function () {
   if (!istouchscreen) {
     showMenu();
   }
 });
 
-hoverContainer.addEventListener("mousedown", function () {
-  menuInteracting = true;
-  setTimeout(function () { menuInteracting = false; }, 300);
-});
-
 button.addEventListener("click", function () {
   if (menu.style.display === "flex") {
     hideMenu();
@@ -369,20 +367,24 @@ button.addEventListener("click", function () {
   }
 });
 
-hoverContainer.addEventListener("mouseleave", function () {
-  if (!istouchscreen) {
-    setTimeout(function () {
-      if (!hoverContainer.matches(":hover") && !menu.matches(":hover")) {
-        hideMenu();
-      }
-    }, 50);
-  }
-});
+// Delay to prevent menu hiding when the mouse leaves the button or menu
+function delayedHideMenu() {
+  setTimeout(function () {
+    if (!isMouseOverButtonOrMenu()) {
+      hideMenu();
+    }
+  }, 100);
+}
+
+// Add event listener for mouseleave on the button
+button.addEventListener("mouseleave", delayedHideMenu);
+// Add event listener for mouseleave on the menu
+menu.addEventListener("mouseleave", delayedHideMenu);
 
 // Add event listener for click anywhere in the document
 document.addEventListener("click", function (event) {
   // Check if the click is outside the button/menu and the menu is visible
-  if (!menuInteracting && !event.target.closest("#gr-hover-container") && menu.style.display === "flex") {
+  if (!isMouseOverButtonOrMenu() && menu.style.display === "flex") {
     hideMenu();
   }
 
diff --git a/modules/api/embeddings.py b/modules/api/embeddings.py
index 17e595fb..16cf0482 100644
--- a/modules/api/embeddings.py
+++ b/modules/api/embeddings.py
@@ -6,7 +6,6 @@ from transformers import AutoModel
 from .errors import ServiceUnavailableError
 from .utils import debug_msg, float_list_to_base64
 from modules.logging_colors import logger
-from modules import shared
 
 embeddings_params_initialized = False
 
@@ -42,7 +41,7 @@ def load_embedding_model(model: str):
     try:
         logger.info(f"Try embedding model: {model} on {embeddings_device}")
         if 'jina-embeddings' in model:
-            embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=shared.args.trust_remote_code)
+            embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True)  # trust_remote_code is needed to use the encode method
             embeddings_model = embeddings_model.to(embeddings_device)
         else:
             embeddings_model = SentenceTransformer(model, device=embeddings_device)
diff --git a/modules/api/images.py b/modules/api/images.py
index dde7d336..95704535 100644
--- a/modules/api/images.py
+++ b/modules/api/images.py
@@ -4,11 +4,8 @@ OpenAI-compatible image generation using local diffusion models.
 
 import base64
 import io
-import json
 import time
 
-from PIL.PngImagePlugin import PngInfo
-
 from .errors import ServiceUnavailableError
 from modules import shared
 
@@ -18,7 +15,7 @@ def generations(request):
     Generate images using the loaded diffusion model.
     Returns dict with 'created' timestamp and 'data' list of images.
     """
-    from modules.ui_image_generation import build_generation_metadata, generate
+    from modules.ui_image_generation import generate
 
     if shared.image_model is None:
         raise ServiceUnavailableError("No image model loaded. Load a model via the UI first.")
@@ -49,18 +46,10 @@ def generations(request):
     if not images:
         raise ServiceUnavailableError("Image generation failed or produced no images.")
 
-    # Build response with per-batch metadata (seed increments per batch)
-    base_seed = state.get('image_seed_resolved', state['image_seed'])
-    batch_size = int(state['image_batch_size'])
-
+    # Build response
     resp = {'created': int(time.time()), 'data': []}
-    for idx, img in enumerate(images):
-        batch_seed = base_seed + idx // batch_size
-        metadata = build_generation_metadata(state, batch_seed)
-        metadata_json = json.dumps(metadata, ensure_ascii=False)
-        png_info = PngInfo()
-        png_info.add_text("image_gen_settings", metadata_json)
-        b64 = _image_to_base64(img, png_info)
+    for img in images:
+        b64 = _image_to_base64(img)
 
         image_obj = {'revised_prompt': request.prompt}
 
@@ -74,7 +63,7 @@ def generations(request):
     return resp
 
 
-def _image_to_base64(image, png_info=None) -> str:
+def _image_to_base64(image) -> str:
     buffered = io.BytesIO()
-    image.save(buffered, format="PNG", pnginfo=png_info)
+    image.save(buffered, format="PNG")
     return base64.b64encode(buffered.getvalue()).decode('utf-8')
diff --git a/modules/api/models.py b/modules/api/models.py
index bfcd2c31..b89397d3 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -2,7 +2,7 @@ from modules import loaders, shared
 from modules.logging_colors import logger
 from modules.LoRA import add_lora_to_model
 from modules.models import load_model, unload_model
-from modules.models_settings import get_model_metadata, load_instruction_template, update_model_parameters
+from modules.models_settings import get_model_metadata, update_model_parameters
 from modules.utils import get_available_loras, get_available_models
 
 
@@ -42,10 +42,12 @@ def model_info_dict(model_name: str) -> dict:
 
 def _load_model(data):
     model_name = data["model_name"]
-    args = data.get("args")
+    args = data["args"]
+    settings = data["settings"]
 
     unload_model()
     model_settings = get_model_metadata(model_name)
+    update_model_parameters(model_settings)
 
     # Update shared.args with custom model loading settings
     # Security: only allow keys that correspond to model loading
@@ -53,16 +55,6 @@ def _load_model(data):
     # flags like trust_remote_code or extra_flags to be set via the API.
     blocked_keys = {'extra_flags'}
     allowed_keys = set(loaders.list_model_elements()) - blocked_keys
-
-    # Reset all loader args to their startup values before applying new ones,
-    # so settings from a previous API load don't leak into this one.
-    # Include blocked keys in the reset (safe: restores startup value, not API-controlled).
-    for k in allowed_keys | blocked_keys:
-        if hasattr(shared.args, k) and hasattr(shared.original_args, k):
-            setattr(shared.args, k, getattr(shared.original_args, k))
-
-    update_model_parameters(model_settings)
-
     if args:
         for k in args:
             if k in allowed_keys and hasattr(shared.args, k):
@@ -70,12 +62,15 @@ def _load_model(data):
 
     shared.model, shared.tokenizer = load_model(model_name)
 
-    if data.get("instruction_template_str") is not None:
-        shared.settings['instruction_template_str'] = data["instruction_template_str"]
-        logger.info("INSTRUCTION TEMPLATE: set to custom Jinja2 string")
-    elif data.get("instruction_template") is not None:
-        shared.settings['instruction_template_str'] = load_instruction_template(data["instruction_template"])
-        logger.info(f"INSTRUCTION TEMPLATE: {data['instruction_template']}")
+    # Update shared.settings with custom generation defaults
+    if settings:
+        for k in settings:
+            if k in shared.settings:
+                shared.settings[k] = settings[k]
+                if k == 'truncation_length':
+                    logger.info(f"CONTEXT LENGTH (UPDATED): {shared.settings['truncation_length']}")
+                elif k == 'instruction_template':
+                    logger.info(f"INSTRUCTION TEMPLATE (UPDATED): {shared.settings['instruction_template']}")
 
 
 def list_loras():
diff --git a/modules/api/script.py b/modules/api/script.py
index ceeca2dc..85f4974f 100644
--- a/modules/api/script.py
+++ b/modules/api/script.py
@@ -475,8 +475,10 @@ async def handle_list_models():
 @app.post("/v1/internal/model/load", dependencies=check_admin_key)
 async def handle_load_model(request_data: LoadModelRequest):
     '''
-    The "args" parameter can be used to modify loader flags before loading
-    a model. Example:
+    This endpoint is experimental and may change in the future.
+
+    The "args" parameter can be used to modify flags like "--load-in-4bit"
+    or "--n-gpu-layers" before loading a model. Example:
 
     ```
     "args": {
@@ -485,13 +487,18 @@ async def handle_load_model(request_data: LoadModelRequest):
     }
     ```
 
-    Loader args are reset to their startup defaults between loads, so
-    settings from a previous load do not leak into the next one.
+    Note that those settings will remain after loading the model. So you
+    may need to change them back to load a second model.
 
-    The "instruction_template" parameter sets the default instruction
-    template by name (from user_data/instruction-templates/). The
-    "instruction_template_str" parameter sets it as a raw Jinja2 string
-    and takes precedence over "instruction_template".
+    The "settings" parameter is also a dict but with keys for the
+    shared.settings object. It can be used to modify the default instruction
+    template like this:
+
+    ```
+    "settings": {
+      "instruction_template": "Alpaca"
+    }
+    ```
     '''
 
     try:
@@ -537,8 +544,8 @@ async def handle_unload_loras():
 def find_available_port(starting_port):
     """Try the starting port, then find an available one if it's taken."""
     try:
+        # Try to create a socket with the starting port
         with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
-            s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
             s.bind(('', starting_port))
             return starting_port
     except OSError:
@@ -563,7 +570,7 @@ def run_server():
         server_addrs.append(shared.args.listen_host)
     else:
         if os.environ.get('OPENEDAI_ENABLE_IPV6', shared.args.api_enable_ipv6):
-            server_addrs.append('::' if shared.args.listen else '::1')
+            server_addrs.append('[::]' if shared.args.listen else '[::1]')
         if not os.environ.get('OPENEDAI_DISABLE_IPV4', shared.args.api_disable_ipv4):
             server_addrs.append('0.0.0.0' if shared.args.listen else '127.0.0.1')
 
@@ -580,7 +587,7 @@ def run_server():
         )
     else:
         url_proto = 'https://' if (ssl_certfile and ssl_keyfile) else 'http://'
-        urls = [f'{url_proto}[{addr}]:{port}/v1' if ':' in addr else f'{url_proto}{addr}:{port}/v1' for addr in server_addrs]
+        urls = [f'{url_proto}{addr}:{port}/v1' for addr in server_addrs]
         if len(urls) > 1:
             logger.info('OpenAI/Anthropic-compatible API URLs:\n\n' + '\n'.join(urls) + '\n')
         else:
diff --git a/modules/api/typing.py b/modules/api/typing.py
index 56d7f2bc..1d486e8f 100644
--- a/modules/api/typing.py
+++ b/modules/api/typing.py
@@ -271,8 +271,7 @@ class ModelListResponse(BaseModel):
 class LoadModelRequest(BaseModel):
     model_name: str
     args: dict | None = None
-    instruction_template: str | None = Field(default=None, description="An instruction template defined under text-generation-webui/user_data/instruction-templates. Sets the default template for all subsequent API requests.")
-    instruction_template_str: str | None = Field(default=None, description="A Jinja2 instruction template string. If set, takes precedence over instruction_template.")
+    settings: dict | None = None
 
 
 class LoraListResponse(BaseModel):
diff --git a/modules/chat.py b/modules/chat.py
index 7e9cce60..edda11b0 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -210,57 +210,6 @@ def _expand_tool_sequence(tool_seq):
     return messages
 
 
-def _convert_to_tool_responses(messages):
-    """Convert role:'tool' messages to tool_responses format.
-
-    Templates like Gemma 4 expect tool results as a ``tool_responses``
-    attribute on a message rather than separate ``role: 'tool'`` messages.
-    This function groups consecutive tool messages and rewrites them.
-    """
-    result = []
-    tc_id_to_name = {}
-
-    i = 0
-    while i < len(messages):
-        msg = messages[i]
-
-        if msg.get('tool_calls'):
-            for tc in msg['tool_calls']:
-                tc_id = tc.get('id', '')
-                func_name = tc.get('function', {}).get('name', 'unknown')
-                if tc_id:
-                    tc_id_to_name[tc_id] = func_name
-
-        if msg.get('role') == 'tool':
-            tool_responses = []
-            while i < len(messages) and messages[i].get('role') == 'tool':
-                tool_msg = messages[i]
-                tc_id = tool_msg.get('tool_call_id', '')
-                func_name = tc_id_to_name.get(tc_id, 'unknown')
-
-                content = tool_msg.get('content', '')
-                try:
-                    response = json.loads(content)
-                except (json.JSONDecodeError, ValueError, TypeError):
-                    response = content
-
-                tool_responses.append({
-                    'name': func_name,
-                    'response': response,
-                })
-                i += 1
-
-            result.append({
-                'role': 'tool',
-                'tool_responses': tool_responses,
-            })
-        else:
-            result.append(msg)
-            i += 1
-
-    return result
-
-
 def _format_attachments(attachments, include_text=True):
     """Build image ref and text attachment strings from a list of attachments."""
     attachments_text = ""
@@ -318,9 +267,6 @@ def generate_chat_prompt(user_input, state, **kwargs):
         tools=state['tools'] if 'tools' in state else None,
     )
 
-    active_template_str = state['instruction_template_str'] if state['mode'] == 'instruct' else chat_template_str
-    uses_tool_responses = 'tool_responses' in active_template_str
-
     messages = []
 
     if state['mode'] == 'instruct':
@@ -557,9 +503,6 @@ def generate_chat_prompt(user_input, state, **kwargs):
 
         return prompt
 
-    if uses_tool_responses:
-        messages = _convert_to_tool_responses(messages)
-
     prompt = make_prompt(messages)
 
     # Handle truncation
@@ -568,24 +511,13 @@ def generate_chat_prompt(user_input, state, **kwargs):
         encoded_length = get_encoded_length(prompt)
         while len(messages) > 0 and encoded_length > max_length:
 
+            # Remove old message, save system message
             if len(messages) > 2 and messages[0]['role'] == 'system':
-                pop_idx = 1
+                messages.pop(1)
+
+            # Remove old message when no system message is present
             elif len(messages) > 1 and messages[0]['role'] != 'system':
-                pop_idx = 0
-            else:
-                pop_idx = None
-
-            if pop_idx is not None:
-                messages.pop(pop_idx)
-
-                # Remove orphaned tool-call/tool-result messages that
-                # would be invalid without their partner.
-                while pop_idx < len(messages):
-                    msg = messages[pop_idx]
-                    if msg.get('role') == 'tool' or (msg.get('role') == 'assistant' and msg.get('tool_calls')):
-                        messages.pop(pop_idx)
-                    else:
-                        break
+                messages.pop(0)
 
             # Resort to truncating the user input
             else:
@@ -705,7 +637,7 @@ def get_stopping_strings(state):
         # Find positions of each message content
         first_user_end = prompt.find("first user message") + len("first user message")
         first_assistant_start = prompt.find("first assistant message")
-        first_assistant_end = first_assistant_start + len("first assistant message")
+        first_assistant_end = prompt.find("first assistant message") + len("first assistant message")
         second_user_start = prompt.find("second user message")
         second_assistant_end = prompt.find("second assistant message") + len("second assistant message")
 
@@ -1194,7 +1126,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
     # visible text from before buffering started so raw markup doesn't flash
     # in the UI.  The internal text is left intact so the caller can still
     # parse tool calls from it.
-    if is_stream and _check_tool_markers and streaming_tool_buffer_check(output['internal'][-1][1], markers=_streaming_markers, tool_names=_tool_names, check_bare_names=_check_bare_names, partial_match=False):
+    if is_stream and _check_tool_markers and streaming_tool_buffer_check(output['internal'][-1][1], markers=_streaming_markers, tool_names=_tool_names, check_bare_names=_check_bare_names):
         output['visible'][-1][1] = _last_visible_before_tool_buffer or ''
 
     yield output
@@ -1275,23 +1207,14 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
 
     # Load tools if any are selected
     selected = state.get('selected_tools', [])
-    mcp_servers = state.get('mcp_servers', '')
     parse_tool_call = None
     _tool_parsers = None
-    if selected or mcp_servers:
-        from modules.tool_use import load_tools, load_mcp_tools, execute_tool
+    if selected:
+        from modules.tool_use import load_tools, execute_tool
         from modules.tool_parsing import parse_tool_call, get_tool_call_id, detect_tool_call_format
 
+    if selected:
         tool_defs, tool_executors = load_tools(selected)
-        if mcp_servers:
-            mcp_defs, mcp_executors = load_mcp_tools(mcp_servers)
-            for td in mcp_defs:
-                fn = td['function']['name']
-                if fn in tool_executors:
-                    logger.warning(f'MCP tool "{fn}" conflicts with a local tool. Skipping.')
-                    continue
-                tool_defs.append(td)
-                tool_executors[fn] = mcp_executors[fn]
         state['tools'] = tool_defs
         tool_func_names = [t['function']['name'] for t in tool_defs]
         _template_str = state.get('instruction_template_str', '') if state.get('mode') == 'instruct' else state.get('chat_template_str', '')
@@ -1839,8 +1762,7 @@ def load_history(unique_id, character, mode):
     if not p.exists():
         return {'internal': [], 'visible': [], 'metadata': {}}
 
-    with open(p, 'rb') as fh:
-        f = json.loads(fh.read())
+    f = json.loads(open(p, 'rb').read())
     if 'internal' in f and 'visible' in f:
         history = f
     else:
@@ -1904,17 +1826,19 @@ def generate_pfp_cache(character):
     if not cache_folder.exists():
         cache_folder.mkdir()
 
-    for extension in ['png', 'jpg', 'jpeg']:
-        path = shared.user_data_dir / 'characters' / f"{character}.{extension}"
+    for path in [shared.user_data_dir / 'characters' / f"{character}.{extension}" for extension in ['png', 'jpg', 'jpeg']]:
         if path.exists():
             original_img = Image.open(path)
-            pfp_path = cache_folder / 'pfp_character.png'
-            thumb_path = cache_folder / 'pfp_character_thumb.png'
+            # Define file paths
+            pfp_path = Path(f'{cache_folder}/pfp_character.png')
+            thumb_path = Path(f'{cache_folder}/pfp_character_thumb.png')
 
+            # Save main picture and thumbnail
             original_img.save(pfp_path, format='PNG')
             thumb = make_thumbnail(original_img)
             thumb.save(thumb_path, format='PNG')
 
+            # Return the path to the thumbnail, not the in-memory PIL Image object.
             return str(thumb_path)
 
     return None
@@ -1935,13 +1859,13 @@ def load_character(character, name1, name2):
         logger.error(f"Could not find the character \"{character}\" inside {shared.user_data_dir}/characters. No character has been loaded.")
         raise ValueError
 
-    with open(filepath, 'r', encoding='utf-8') as fh:
-        file_contents = fh.read()
+    file_contents = open(filepath, 'r', encoding='utf-8').read()
     data = json.loads(file_contents) if extension == "json" else yaml.safe_load(file_contents)
     cache_folder = Path(shared.args.disk_cache_dir)
 
-    for path in [cache_folder / "pfp_character.png", cache_folder / "pfp_character_thumb.png"]:
-        path.unlink(missing_ok=True)
+    for path in [Path(f"{cache_folder}/pfp_character.png"), Path(f"{cache_folder}/pfp_character_thumb.png")]:
+        if path.exists():
+            path.unlink()
 
     picture = generate_pfp_cache(character)
 
@@ -1997,7 +1921,9 @@ def clear_character_for_ui(state):
     # Clear the cache files
     cache_folder = Path(shared.args.disk_cache_dir)
     for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
-        (cache_folder / cache_file).unlink(missing_ok=True)
+        cache_path = Path(f'{cache_folder}/{cache_file}')
+        if cache_path.exists():
+            cache_path.unlink()
 
     return state, state['name2'], state['context'], state['greeting'], None
 
@@ -2092,10 +2018,11 @@ def upload_your_profile_picture(img_path):
         cache_folder.mkdir()
 
     if img is None:
-        (cache_folder / "pfp_me.png").unlink(missing_ok=True)
+        if Path(f"{cache_folder}/pfp_me.png").exists():
+            Path(f"{cache_folder}/pfp_me.png").unlink()
     else:
         img = make_thumbnail(img)
-        img.save(cache_folder / 'pfp_me.png')
+        img.save(Path(f'{cache_folder}/pfp_me.png'))
         logger.info(f'Profile picture saved to "{cache_folder}/pfp_me.png"')
 
 
@@ -2151,12 +2078,13 @@ def generate_user_pfp_cache(user):
     if not cache_folder.exists():
         cache_folder.mkdir()
 
-    for extension in ['png', 'jpg', 'jpeg']:
-        path = shared.user_data_dir / 'users' / f"{user}.{extension}"
+    for path in [shared.user_data_dir / 'users' / f"{user}.{extension}" for extension in ['png', 'jpg', 'jpeg']]:
         if path.exists():
             original_img = Image.open(path)
-            pfp_path = cache_folder / 'pfp_me.png'
+            # Define file paths
+            pfp_path = Path(f'{cache_folder}/pfp_me.png')
 
+            # Save thumbnail
             thumb = make_thumbnail(original_img)
             thumb.save(pfp_path, format='PNG')
             logger.info(f'User profile picture cached to "{pfp_path}"')
@@ -2188,7 +2116,9 @@ def load_user(user_name, name1, user_bio):
 
     # Clear existing user picture cache
     cache_folder = Path(shared.args.disk_cache_dir)
-    (cache_folder / "pfp_me.png").unlink(missing_ok=True)
+    pfp_path = Path(f"{cache_folder}/pfp_me.png")
+    if pfp_path.exists():
+        pfp_path.unlink()
 
     # Generate new picture cache
     picture = generate_user_pfp_cache(user_name)
@@ -2612,13 +2542,15 @@ def handle_character_picture_change(picture_path):
 
     if picture is not None:
         # Save to cache
-        picture.save(cache_folder / 'pfp_character.png', format='PNG')
+        picture.save(Path(f'{cache_folder}/pfp_character.png'), format='PNG')
         thumb = make_thumbnail(picture)
-        thumb.save(cache_folder / 'pfp_character_thumb.png', format='PNG')
+        thumb.save(Path(f'{cache_folder}/pfp_character_thumb.png'), format='PNG')
     else:
         # Remove cache files when picture is cleared
         for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
-            (cache_folder / cache_file).unlink(missing_ok=True)
+            cache_path = Path(f'{cache_folder}/{cache_file}')
+            if cache_path.exists():
+                cache_path.unlink()
 
 
 def handle_mode_change(state):
diff --git a/modules/html_generator.py b/modules/html_generator.py
index e3ebea8d..8f3f261f 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -14,13 +14,6 @@ from modules.reasoning import extract_reasoning
 from modules.sane_markdown_lists import SaneListExtension
 from modules.utils import get_available_chat_styles
 
-# Pre-compiled regex for protecting markdown-sensitive characters inside LaTeX.
-# Covers $$...$$, \[...\], \(...\), and inline $...$ (when content contains \\).
-_LATEX_PATTERN = re.compile(
-    r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)|(?<!\$)\$(?!\$)([^\$\n]*\\\\[^\$\n]*?)\$(?!\$)',
-    re.DOTALL
-)
-
 # This is to store the paths to the thumbnails of the profile pictures
 image_cache = {}
 
@@ -192,29 +185,28 @@ def process_markdown_content(string):
     if not string:
         return ""
 
-    # Define unique placeholders for LaTeX characters that conflict with markdown
+    # Define unique placeholders for LaTeX asterisks and underscores
     LATEX_ASTERISK_PLACEHOLDER = "LATEXASTERISKPLACEHOLDER"
     LATEX_UNDERSCORE_PLACEHOLDER = "LATEXUNDERSCOREPLACEHOLDER"
-    LATEX_PIPE_PLACEHOLDER = "LATEXPIPEPLACEHOLDER"
-
-    def protect_latex_content(content):
-        """Protect markdown-sensitive characters inside LaTeX."""
-        content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
-        content = content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
-        content = content.replace('|', LATEX_PIPE_PLACEHOLDER)
-        return content
 
     def protect_asterisks_underscores_in_latex(match):
-        """A replacer function for re.sub to protect markdown-sensitive characters in multiple LaTeX formats."""
+        """A replacer function for re.sub to protect asterisks and underscores in multiple LaTeX formats."""
         # Check which delimiter group was captured
         if match.group(1) is not None:  # Content from $$...$$
-            return protect_latex_content(match.group(1))
+            content = match.group(1)
+            modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
+            modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
+            return f'{modified_content}'
         elif match.group(2) is not None:  # Content from \[...\]
-            return f'\\[{protect_latex_content(match.group(2))}\\]'
+            content = match.group(2)
+            modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
+            modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
+            return f'\\[{modified_content}\\]'
         elif match.group(3) is not None:  # Content from \(...\)
-            return f'\\({protect_latex_content(match.group(3))}\\)'
-        elif match.group(4) is not None:  # Content from $...$
-            return f'${protect_latex_content(match.group(4).strip())}$'
+            content = match.group(3)
+            modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
+            modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
+            return f'\\({modified_content}\\)'
 
         return match.group(0)  # Fallback
 
@@ -248,7 +240,9 @@ def process_markdown_content(string):
     string = re.sub(r"(.)```", r"\1\n```", string)
 
     # Protect asterisks and underscores within all LaTeX blocks before markdown conversion
-    string = _LATEX_PATTERN.sub(protect_asterisks_underscores_in_latex, string)
+    latex_pattern = re.compile(r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)',
+                               re.DOTALL)
+    string = latex_pattern.sub(protect_asterisks_underscores_in_latex, string)
 
     result = ''
     is_code = False
@@ -312,7 +306,6 @@ def process_markdown_content(string):
     # Restore the LaTeX asterisks and underscores after markdown conversion
     html_output = html_output.replace(LATEX_ASTERISK_PLACEHOLDER, '*')
     html_output = html_output.replace(LATEX_UNDERSCORE_PLACEHOLDER, '_')
-    html_output = html_output.replace(LATEX_PIPE_PLACEHOLDER, '|')
 
     # Remove extra newlines before </code>
     html_output = re.sub(r'\s*</code>', '</code>', html_output)
diff --git a/modules/image_models.py b/modules/image_models.py
index e244c3c8..290aaf19 100644
--- a/modules/image_models.py
+++ b/modules/image_models.py
@@ -10,49 +10,72 @@ def get_quantization_config(quant_method):
     Get the appropriate quantization config based on the selected method.
     Applies quantization to both the transformer and the text_encoder.
     """
-    if quant_method == 'none' or not quant_method:
-        return None
-
     import torch
+    # Import BitsAndBytesConfig from BOTH libraries to be safe
     from diffusers import BitsAndBytesConfig as DiffusersBnBConfig
     from diffusers import TorchAoConfig
     from diffusers.quantizers import PipelineQuantizationConfig
     from transformers import BitsAndBytesConfig as TransformersBnBConfig
 
-    torchao_methods = {
-        'torchao-int8wo': 'int8wo',
-        'torchao-fp4': 'fp4_e2m1',
-        'torchao-float8wo': 'float8wo',
-    }
+    if quant_method == 'none' or not quant_method:
+        return None
 
-    if quant_method == 'bnb-8bit':
+    # Bitsandbytes 8-bit quantization
+    elif quant_method == 'bnb-8bit':
         return PipelineQuantizationConfig(
             quant_mapping={
-                "transformer": DiffusersBnBConfig(load_in_8bit=True),
-                "text_encoder": TransformersBnBConfig(load_in_8bit=True)
+                "transformer": DiffusersBnBConfig(
+                    load_in_8bit=True
+                ),
+                "text_encoder": TransformersBnBConfig(
+                    load_in_8bit=True
+                )
             }
         )
 
+    # Bitsandbytes 4-bit quantization
     elif quant_method == 'bnb-4bit':
-        bnb_4bit_kwargs = dict(
-            load_in_4bit=True,
-            bnb_4bit_quant_type="nf4",
-            bnb_4bit_compute_dtype=torch.bfloat16,
-            bnb_4bit_use_double_quant=True
-        )
         return PipelineQuantizationConfig(
             quant_mapping={
-                "transformer": DiffusersBnBConfig(**bnb_4bit_kwargs),
-                "text_encoder": TransformersBnBConfig(**bnb_4bit_kwargs)
+                "transformer": DiffusersBnBConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_quant_type="nf4",
+                    bnb_4bit_compute_dtype=torch.bfloat16,
+                    bnb_4bit_use_double_quant=True
+                ),
+                "text_encoder": TransformersBnBConfig(
+                    load_in_4bit=True,
+                    bnb_4bit_quant_type="nf4",
+                    bnb_4bit_compute_dtype=torch.bfloat16,
+                    bnb_4bit_use_double_quant=True
+                )
             }
         )
 
-    elif quant_method in torchao_methods:
-        ao_type = torchao_methods[quant_method]
+    # torchao int8 weight-only
+    elif quant_method == 'torchao-int8wo':
         return PipelineQuantizationConfig(
             quant_mapping={
-                "transformer": TorchAoConfig(ao_type),
-                "text_encoder": TorchAoConfig(ao_type)
+                "transformer": TorchAoConfig("int8wo"),
+                "text_encoder": TorchAoConfig("int8wo")
+            }
+        )
+
+    # torchao fp4 (e2m1)
+    elif quant_method == 'torchao-fp4':
+        return PipelineQuantizationConfig(
+            quant_mapping={
+                "transformer": TorchAoConfig("fp4_e2m1"),
+                "text_encoder": TorchAoConfig("fp4_e2m1")
+            }
+        )
+
+    # torchao float8 weight-only
+    elif quant_method == 'torchao-float8wo':
+        return PipelineQuantizationConfig(
+            quant_mapping={
+                "transformer": TorchAoConfig("float8wo"),
+                "text_encoder": TorchAoConfig("float8wo")
             }
         )
 
@@ -129,7 +152,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
 
         modules = ["transformer", "unet"]
 
-        # Set attention backend (diffusers defaults to native/SDPA)
+        # Set attention backend
         if attn_backend == 'flash_attention_2':
             for name in modules:
                 mod = getattr(pipe, name, None)
diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index c01f5d5b..34080466 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -373,7 +373,6 @@ class LlamaServer:
         """Check if a port is available for use."""
         with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
             try:
-                s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
                 s.bind(('', port))
                 return True
             except OSError:
diff --git a/modules/models_settings.py b/modules/models_settings.py
index b10d780c..eafa0581 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -400,19 +400,14 @@ def load_instruction_template(template):
     if template == 'None':
         return ''
 
-    for name in (template, 'Alpaca'):
-        path = shared.user_data_dir / 'instruction-templates' / f'{name}.yaml'
-        try:
-            with open(path, 'r', encoding='utf-8') as f:
-                file_contents = f.read()
-        except FileNotFoundError:
-            if name == template:
-                logger.warning(f"Instruction template '{template}' not found, falling back to Alpaca")
-            continue
-
-        break
+    for filepath in [shared.user_data_dir / 'instruction-templates' / f'{template}.yaml', shared.user_data_dir / 'instruction-templates' / 'Alpaca.yaml']:
+        if filepath.exists():
+            break
     else:
         return ''
+
+    with open(filepath, 'r', encoding='utf-8') as f:
+        file_contents = f.read()
     data = yaml.safe_load(file_contents)
     if 'instruction_template' in data:
         return data['instruction_template']
diff --git a/modules/prompts.py b/modules/prompts.py
index 85dc32e3..d107ce5a 100644
--- a/modules/prompts.py
+++ b/modules/prompts.py
@@ -1,7 +1,6 @@
 from pathlib import Path
 
 from modules import shared, utils
-from modules.utils import sanitize_filename
 from modules.text_generation import get_encoded_length
 
 
@@ -19,7 +18,6 @@ def load_prompt(fname):
 
         return initial_content
 
-    fname = sanitize_filename(fname)
     file_path = shared.user_data_dir / 'logs' / 'notebook' / f'{fname}.txt'
     if file_path.exists():
         with open(file_path, 'r', encoding='utf-8') as f:
diff --git a/modules/reasoning.py b/modules/reasoning.py
index 2b260818..aa1939b8 100644
--- a/modules/reasoning.py
+++ b/modules/reasoning.py
@@ -7,7 +7,6 @@ THINKING_FORMATS = [
     ('<|channel|>analysis<|message|>', '<|end|>', '<|channel|>final<|message|>'),
     ('<|channel|>commentary<|message|>', '<|end|>', '<|channel|>final<|message|>'),
     ('<seed:think>', '</seed:think>', None),
-    ('<|channel>thought', '<channel|>', None),  # Gemma 4
     ('<|think|>', '<|end|>', '<|content|>'),  # Solar Open
     # ('Thinking Process:', '</think>', None),  # Qwen3.5 verbose thinking outside tags -- removed: too prone to false positives in streaming
     (None, '</think>', None),  # End-only variant (e.g., Qwen3-next)
@@ -73,16 +72,9 @@ def extract_reasoning(text, html_escaped=False):
                 if content_pos != -1:
                     content_start = content_pos + len(content_esc)
                 else:
-                    # Content tag not present yet.  In GPT-OSS the region
-                    # between <|end|> and the content tag contains internal
-                    # markup (<|start|>assistant…) that must not be shown.
-                    # Suppress it to prevent tag leaks during streaming.
-                    remainder = text[end_pos + len(end_esc):].lstrip()
-                    framing_token = esc('<|start|>')
-                    if not remainder or remainder.startswith(framing_token) or framing_token.startswith(remainder):
-                        content_start = len(text)
-                    else:
-                        content_start = end_pos + len(end_esc)
+                    # Content tag not present — fall back to content after
+                    # end_tag (e.g. GPT-OSS tool calls skip the final channel).
+                    content_start = end_pos + len(end_esc)
             else:
                 content_start = end_pos + len(end_esc)
 
diff --git a/modules/shared.py b/modules/shared.py
index e04f28f3..13843f0c 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -259,7 +259,6 @@ settings = {
     'enable_web_search': False,
     'web_search_pages': 3,
     'selected_tools': [],
-    'mcp_servers': '',
     'prompt-notebook': '',
     'preset': 'Top-P' if (user_data_dir / 'presets/Top-P.yaml').exists() else None,
     'max_new_tokens': 512,
@@ -364,7 +363,7 @@ settings = {
     'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.',
     'image_model_menu': 'None',
     'image_dtype': 'bfloat16',
-    'image_attn_backend': 'sdpa',
+    'image_attn_backend': 'flash_attention_2',
     'image_cpu_offload': False,
     'image_compile': False,
     'image_quant': 'none',
diff --git a/modules/tool_parsing.py b/modules/tool_parsing.py
index aa3e0e95..ec49f77f 100644
--- a/modules/tool_parsing.py
+++ b/modules/tool_parsing.py
@@ -27,11 +27,10 @@ TOOL_CALL_OPENING_MARKERS = [
     '[TOOL_CALLS]',
     'to=functions.',
     '<|channel|>commentary',
-    '<|tool_call>call:',
 ]
 
 
-def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_names=False, partial_match=True):
+def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_names=False):
     '''
     Check whether streaming output should be withheld because it may
     contain tool-call markup.
@@ -43,10 +42,6 @@ def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_
         tool_names: List of tool function names.
         check_bare_names: Whether to do partial-prefix matching on tool
                           names (for models with unknown template format).
-        partial_match: Whether to check partial prefixes of markers/names.
-                       Set to False for end-of-generation checks where a
-                       partial prefix is just normal text, not an incomplete
-                       tool call.
     '''
     # Strip thinking blocks so tool-call syntax inside <think> doesn't
     # trigger false positives.
@@ -64,9 +59,6 @@ def streaming_tool_buffer_check(text, markers=None, tool_names=None, check_bare_
             if name + '{' in text or name + ' {' in text:
                 return True
 
-    if not partial_match:
-        return False
-
     # Partial-prefix matching: only for template-specific markers.
     for marker in (markers if markers is not None else TOOL_CALL_OPENING_MARKERS):
         for prefix_len in range(min(len(marker) - 1, len(text)), 0, -1):
@@ -408,78 +400,6 @@ def _parse_glm_tool_calls(answer: str, tool_names: list[str]):
     return matches, start_pos
 
 
-def _extract_gemma4_balanced(text, start):
-    """Extract balanced braces from Gemma 4 format, using <|"|> as string delimiters."""
-    if start >= len(text) or text[start] != '{':
-        return None
-    depth = 0
-    in_string = False
-    quote_token = '<|"|>'
-    quote_len = len(quote_token)
-    i = start
-    while i < len(text):
-        if text[i:i + quote_len] == quote_token:
-            in_string = not in_string
-            i += quote_len
-            continue
-        if in_string:
-            i += 1
-            continue
-        c = text[i]
-        if c == '{':
-            depth += 1
-        elif c == '}':
-            depth -= 1
-            if depth == 0:
-                return text[start:i + 1]
-        i += 1
-    return None
-
-
-def _parse_gemma4_tool_calls(answer: str, tool_names: list[str]):
-    """Parse Gemma 4-style tool calls.
-
-    Format:
-        <|tool_call>call:func_name{key:<|"|>value<|"|>,...}<tool_call|>
-
-    Values use <|"|> tokens instead of standard JSON quotes, and keys are
-    bare identifiers.
-    """
-    matches = []
-    start_pos = None
-
-    for m in re.finditer(r'<\|tool_call>call:([^\s{]+)\s*', answer):
-        func_name = m.group(1).strip()
-        if func_name not in tool_names:
-            continue
-
-        brace_start = m.end()
-        if brace_start >= len(answer) or answer[brace_start] != '{':
-            continue
-
-        content = _extract_gemma4_balanced(answer, brace_start)
-        if content is None:
-            continue
-
-        # Convert to JSON: split on <|"|> tokens so that key quoting
-        # only applies outside string values (even-indexed parts),
-        # then rejoin with real quotes.
-        parts = content.split('<|"|>')
-        for idx in range(0, len(parts), 2):
-            parts[idx] = re.sub(r'(^|[{,\[])\s*(\w+)\s*:', r'\1"\2":', parts[idx])
-        json_str = '"'.join(parts)
-
-        try:
-            arguments = json.loads(json_str)
-            if start_pos is None:
-                start_pos = m.start()
-            matches.append(_make_tool_call(func_name, arguments))
-        except (json.JSONDecodeError, ValueError):
-            pass
-
-    return matches, start_pos
-
-
 def _parse_pythonic_tool_calls(answer: str, tool_names: list[str]):
     """Parse pythonic-style tool calls used by Llama 4 and similar models.
 
@@ -552,11 +472,6 @@ TOOL_CALL_FORMATS = [
         'parser': _parse_channel_tool_calls,
         'markers': ['to=functions.', '<|channel|>commentary'],
     },
-    {
-        'template_hints': ['<|tool_call>call:'],
-        'parser': _parse_gemma4_tool_calls,
-        'markers': ['<|tool_call>call:'],
-    },
     {
         'template_hints': ['minimax:tool_call'],
         'parser': _parse_minimax_tool_calls,
@@ -589,7 +504,6 @@ ALL_PARSERS = [
     _parse_deep_seek_tool_calls,
     _parse_kimi_tool_calls,
     _parse_channel_tool_calls,
-    _parse_gemma4_tool_calls,
     _parse_minimax_tool_calls,
     _parse_glm_tool_calls,
     _parse_xml_param_tool_calls,
@@ -638,15 +552,9 @@ def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = Fa
     # Strip thinking blocks so tool-call syntax inside <think> is ignored.
     original_answer = answer
     _, answer = extract_reasoning(answer)
-    # Reasoning extraction returns empty content when GPT-OSS internal
-    # markup (<|start|>assistant…) follows the thinking block without a
-    # content tag.  Fall back to the full text so tool-call markers can
-    # be found.
-    if not answer.strip():
-        answer = original_answer
-        reasoning_offset = 0
-    else:
-        reasoning_offset = len(original_answer) - len(answer)
+    # Offset between original and stripped text, used to map start_pos
+    # back to the original string when returning a prefix.
+    reasoning_offset = len(original_answer) - len(answer)
 
     matches = []
     start_pos = None
@@ -712,8 +620,6 @@ def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = Fa
                 if not isinstance(candidates, list):
                     candidates = [candidates]
                 for candidate_dict in candidates:
-                    if not isinstance(candidate_dict, dict):
-                        continue
                     checked_candidate = check_and_sanitize_tool_call_candidate(candidate_dict, tool_names)
                     if checked_candidate is not None:
                         matches.append(checked_candidate)
diff --git a/modules/tool_use.py b/modules/tool_use.py
index f9ddf940..e22b1798 100644
--- a/modules/tool_use.py
+++ b/modules/tool_use.py
@@ -1,4 +1,3 @@
-import asyncio
 import importlib.util
 import json
 
@@ -56,119 +55,6 @@ def load_tools(selected_names):
     return tool_defs, executors
 
 
-def _parse_mcp_servers(servers_str):
-    """Parse MCP servers textbox: one server per line, format 'url' or 'url,Header: value,Header2: value2'."""
-    servers = []
-    for line in servers_str.strip().splitlines():
-        line = line.strip()
-        if not line:
-            continue
-        parts = line.split(',')
-        url = parts[0].strip()
-        headers = {}
-        for part in parts[1:]:
-            part = part.strip()
-            if ':' in part:
-                key, val = part.split(':', 1)
-                headers[key.strip()] = val.strip()
-        servers.append((url, headers))
-    return servers
-
-
-def _mcp_tool_to_openai(tool):
-    """Convert an MCP Tool object to OpenAI-format tool dict."""
-    return {
-        "type": "function",
-        "function": {
-            "name": tool.name,
-            "description": tool.description or "",
-            "parameters": tool.inputSchema or {"type": "object", "properties": {}}
-        }
-    }
-
-
-async def _mcp_session(url, headers, callback):
-    """Open an MCP session and pass it to the callback."""
-    from mcp.client.streamable_http import streamablehttp_client
-    from mcp import ClientSession
-
-    async with streamablehttp_client(url, headers=headers or None) as (read_stream, write_stream, _):
-        async with ClientSession(read_stream, write_stream) as session:
-            await session.initialize()
-            return await callback(session)
-
-
-def _make_mcp_executor(name, url, headers):
-    def executor(arguments):
-        return asyncio.run(_call_mcp_tool(name, arguments, url, headers))
-    return executor
-
-
-async def _connect_mcp_server(url, headers):
-    """Connect to one MCP server and return (tool_defs, executors)."""
-
-    async def _discover(session):
-        result = await session.list_tools()
-        tool_defs = []
-        executors = {}
-        for tool in result.tools:
-            tool_defs.append(_mcp_tool_to_openai(tool))
-            executors[tool.name] = _make_mcp_executor(tool.name, url, headers)
-        return tool_defs, executors
-
-    return await _mcp_session(url, headers, _discover)
-
-
-async def _call_mcp_tool(name, arguments, url, headers):
-    """Connect to an MCP server and call a single tool."""
-
-    async def _invoke(session):
-        result = await session.call_tool(name, arguments)
-        parts = []
-        for content in result.content:
-            if hasattr(content, 'text'):
-                parts.append(content.text)
-            else:
-                parts.append(str(content))
-        return '\n'.join(parts) if parts else ''
-
-    return await _mcp_session(url, headers, _invoke)
-
-
-async def _connect_all_mcp_servers(servers):
-    """Connect to all MCP servers concurrently."""
-    results = await asyncio.gather(
-        *(_connect_mcp_server(url, headers) for url, headers in servers),
-        return_exceptions=True
-    )
-    all_defs = []
-    all_executors = {}
-    for (url, _), result in zip(servers, results):
-        if isinstance(result, Exception):
-            logger.exception(f'Failed to connect to MCP server "{url}"', exc_info=result)
-            continue
-        defs, execs = result
-        for td, (fn, ex) in zip(defs, execs.items()):
-            if fn in all_executors:
-                logger.warning(f'MCP tool "{fn}" from {url} conflicts with an already loaded tool. Skipping.')
-                continue
-            all_defs.append(td)
-            all_executors[fn] = ex
-    return all_defs, all_executors
-
-
-def load_mcp_tools(servers_str):
-    """
-    Parse MCP servers string and discover tools from each server.
-    Returns (tool_defs, executors) in the same format as load_tools.
-    """
-    servers = _parse_mcp_servers(servers_str)
-    if not servers:
-        return [], {}
-
-    return asyncio.run(_connect_all_mcp_servers(servers))
-
-
 def execute_tool(func_name, arguments, executors):
     """Execute a tool by function name. Returns result as a JSON string."""
     fn = executors.get(func_name)
diff --git a/modules/training.py b/modules/training.py
index bca4f02e..145353c6 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -52,7 +52,7 @@ def create_ui():
                         with gr.Column():
                             always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
 
-                    with gr.Accordion(label='Target Modules', open=False):
+                    with gr.Accordion(label='Target Modules', open=False, elem_classes='tgw-accordion'):
                         gr.Markdown("Selects which modules to target in training. Targeting more modules is closer to a full fine-tune at the cost of increased VRAM and adapter size.")
                         all_linear = gr.Checkbox(label='Target all linear layers', value=True, info='Targets every nn.Linear layer except lm_head. Works for any model architecture. When checked, the individual module checkboxes below are ignored.', elem_classes=['no-background'])
                         with gr.Row():
@@ -87,7 +87,7 @@ def create_ui():
                             with gr.Row():
                                 lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='cosine', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.', elem_classes=['slim-dropdown'])
 
-                    with gr.Accordion(label='Advanced Options', open=False):
+                    with gr.Accordion(label='Advanced Options', open=False, elem_classes='tgw-accordion'):
                         with gr.Row():
                             with gr.Column():
                                 optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Optimizer algorithm. adamw_torch is the standard choice. adamw_bnb_8bit uses less VRAM. adafactor is memory-efficient for large models.', elem_classes=['slim-dropdown'])
diff --git a/modules/ui.py b/modules/ui.py
index 3a8390f7..02b5a9fb 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -75,7 +75,7 @@ if not shared.args.old_colors:
         background_fill_primary_dark='var(--darker-gray, #1C1C1D)',
         body_background_fill="white",
         block_background_fill="transparent",
-        body_text_color='#1a1a1a',
+        body_text_color='rgb(64, 64, 64)',
         button_secondary_background_fill="white",
         button_secondary_border_color="var(--border-color-primary)",
         block_title_text_color='*body_text_color',
@@ -209,7 +209,6 @@ def list_interface_input_elements():
         'textbox',
         'start_with',
         'selected_tools',
-        'mcp_servers',
         'mode',
         'chat_style',
         'chat-instruct_command',
@@ -435,7 +434,6 @@ def setup_auto_save():
         'custom_system_message',
         'chat_template_str',
         'selected_tools',
-        'mcp_servers',
 
         # Parameters tab (ui_parameters.py) - Generation parameters
         'preset_menu',
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 14489d96..10d05f65 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -52,7 +52,7 @@ def create_ui():
                 shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': [], 'metadata': {}}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)
                 with gr.Row(elem_id="chat-input-row"):
                     with gr.Column(scale=1, elem_id='gr-hover-container'):
-                        gr.HTML(value='<div class="hover-element" onclick="void(0)"><span id="hover-element-button"><svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round"><line x1="4" y1="6" x2="20" y2="6"></line><line x1="4" y1="12" x2="20" y2="12"></line><line x1="4" y1="18" x2="20" y2="18"></line></svg></span><div class="hover-menu" id="hover-menu"></div></div>', elem_id='gr-hover')
+                        gr.HTML(value='<div class="hover-element" onclick="void(0)"><span style="width: 100px; display: block" id="hover-element-button">&#9776;</span><div class="hover-menu" id="hover-menu"></div>', elem_id='gr-hover')
 
                     with gr.Column(scale=10, elem_id='chat-input-container'):
                         shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf', 'image'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
@@ -105,9 +105,6 @@ def create_ui():
 
                 shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False)
 
-                with gr.Accordion('MCP servers', open=False):
-                    shared.gradio['mcp_servers'] = gr.Textbox(value=shared.settings.get('mcp_servers', ''), lines=3, max_lines=3, label='', info='One url per line. For headers, write url,Header: value,Header2: value2', elem_classes=['add_scrollbar'])
-
                 gr.HTML("<div class='sidebar-vertical-separator'></div>")
 
                 with gr.Row():
diff --git a/modules/ui_default.py b/modules/ui_default.py
index 48cb2fc2..2c367cca 100644
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@@ -10,7 +10,7 @@ from modules.text_generation import (
     stop_everything_event
 )
 from modules.ui_notebook import store_notebook_state_and_debounce
-from modules.utils import gradio, sanitize_filename
+from modules.utils import gradio
 
 inputs = ('textbox-default', 'interface_state')
 outputs = ('output_textbox', 'html-default')
@@ -167,7 +167,6 @@ def handle_new_prompt():
 
 
 def handle_delete_prompt_confirm_default(prompt_name):
-    prompt_name = sanitize_filename(prompt_name)
     available_prompts = utils.get_available_prompts()
     current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
 
@@ -200,8 +199,6 @@ def handle_rename_prompt_click_default(current_name):
 
 
 def handle_rename_prompt_confirm_default(new_name, current_name):
-    new_name = sanitize_filename(new_name)
-    current_name = sanitize_filename(current_name)
     old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
     new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
 
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 727aa7b1..1efb2479 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -798,9 +798,6 @@ def generate(state, save_images=True):
         if seed == -1:
             seed = random.randint(0, 2**32 - 1)
 
-        # Store resolved seed back so callers (e.g. API) can access it
-        state['image_seed_resolved'] = seed
-
         device = get_device()
         if device is None:
             device = "cpu"
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 9c8306f5..16505afa 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -54,6 +54,7 @@ def create_ui():
                             if not shared.args.portable:
                                 shared.gradio['ik'] = gr.Checkbox(label="ik", value=shared.args.ik, info='Use ik_llama.cpp instead of upstream llama.cpp.')
 
+                            shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
                             shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
                             shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
                             shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
@@ -66,13 +67,13 @@ def create_ui():
                             )
 
                             # Multimodal
-                            with gr.Accordion("Multimodal (vision)", open=False) as shared.gradio['mmproj_accordion']:
+                            with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
                                 with gr.Row():
                                     shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info=f'Select a file that matches your model. Must be placed in {shared.user_data_dir}/mmproj/', interactive=not mu)
                                     ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
 
                             # Speculative decoding
-                            with gr.Accordion("Speculative decoding", open=False) as shared.gradio['speculative_decoding_accordion']:
+                            with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
                                 shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Maximum number of tokens to draft for speculative decoding. Recommended: 4 for draft model, 64 for n-gram.')
 
                                 gr.Markdown('#### Draft model')
@@ -91,7 +92,7 @@ def create_ui():
                                 shared.gradio['spec_ngram_min_hits'] = gr.Number(label="spec-ngram-min-hits", precision=0, step=1, value=shared.args.spec_ngram_min_hits, info='Minimum n-gram hits for ngram-map speculative decoding.', visible=shared.args.spec_type != 'none')
 
                     gr.Markdown("## Other options")
-                    with gr.Accordion("See more options", open=False):
+                    with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
                         with gr.Row():
                             with gr.Column():
                                 shared.gradio['parallel'] = gr.Slider(label="parallel", minimum=1, step=1, maximum=64, value=shared.args.parallel, info='Number of parallel request slots for the API. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
@@ -108,7 +109,6 @@ def create_ui():
                             with gr.Column():
                                 shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
                                 shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
-                                shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
                                 shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
                                 shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.')
                                 shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py
index 88f00ac5..f550e646 100644
--- a/modules/ui_notebook.py
+++ b/modules/ui_notebook.py
@@ -11,7 +11,7 @@ from modules.text_generation import (
     get_token_ids,
     stop_everything_event
 )
-from modules.utils import gradio, sanitize_filename
+from modules.utils import gradio
 
 _notebook_file_lock = threading.Lock()
 _notebook_auto_save_timer = None
@@ -202,7 +202,6 @@ def handle_new_prompt():
 
 
 def handle_delete_prompt_confirm_notebook(prompt_name):
-    prompt_name = sanitize_filename(prompt_name)
     available_prompts = utils.get_available_prompts()
     current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
 
@@ -234,8 +233,6 @@ def handle_rename_prompt_click_notebook(current_name):
 
 
 def handle_rename_prompt_confirm_notebook(new_name, current_name):
-    new_name = sanitize_filename(new_name)
-    current_name = sanitize_filename(current_name)
     old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
     new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
 
@@ -252,7 +249,6 @@ def handle_rename_prompt_confirm_notebook(new_name, current_name):
 
 def autosave_prompt(text, prompt_name):
     """Automatically save the text to the selected prompt file"""
-    prompt_name = sanitize_filename(prompt_name)
     if prompt_name and text.strip():
         prompt_path = shared.user_data_dir / "logs" / "notebook" / f"{prompt_name}.txt"
         prompt_path.parent.mkdir(parents=True, exist_ok=True)
diff --git a/modules/utils.py b/modules/utils.py
index c4acf714..b01953ee 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -105,9 +105,6 @@ def resolve_model_path(model_name_or_path, image_model=False):
     before the default models directory.
     """
 
-    if model_name_or_path is None:
-        raise FileNotFoundError("No model specified.")
-
     path_candidate = Path(model_name_or_path)
     if path_candidate.exists():
         return path_candidate
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index ed5841b8..b38ae848 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -9,7 +9,6 @@ flash-linear-attention==0.4.*
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pandas
 peft==0.18.*
@@ -32,8 +31,8 @@ tqdm
 wandb
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -41,10 +40,10 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
 https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index fe6ce28c..7fb3a7d9 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -7,7 +7,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pandas
 peft==0.18.*
@@ -29,8 +28,8 @@ trafilatura==2.0.0
 wandb
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -38,5 +37,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 09c01a61..4a0f764c 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -7,7 +7,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pandas
 peft==0.18.*
@@ -29,8 +28,8 @@ trafilatura==2.0.0
 wandb
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -38,4 +37,4 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 42210407..942d5d71 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -7,7 +7,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pandas
 peft==0.18.*
@@ -29,8 +28,8 @@ trafilatura==2.0.0
 wandb
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -38,4 +37,4 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 5cd7ae7d..6b61dca7 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -7,7 +7,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pandas
 peft==0.18.*
@@ -29,8 +28,8 @@ trafilatura==2.0.0
 wandb
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -38,7 +37,7 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 19ac5183..a4d6cc97 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -7,7 +7,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pandas
 peft==0.18.*
@@ -29,8 +28,8 @@ trafilatura==2.0.0
 wandb
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 807ff079..5aff54b2 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pydantic==2.11.0
 pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
 tqdm
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index 55fe79ea..0771f53e 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pydantic==2.11.0
 pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
 tqdm
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index 6d4a63f7..427d59b2 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pydantic==2.11.0
 pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
 tqdm
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -24,4 +23,4 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index aebb7c5b..c47a6ca1 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pydantic==2.11.0
 pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
 tqdm
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -24,4 +23,4 @@ sse-starlette==1.6.5
 tiktoken
 
 # Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index d7e2b051..e491e357 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pydantic==2.11.0
 pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
 tqdm
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cuda131.txt b/requirements/portable/requirements_cuda131.txt
index 42a9a16f..5870983a 100644
--- a/requirements/portable/requirements_cuda131.txt
+++ b/requirements/portable/requirements_cuda131.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pydantic==2.11.0
 pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
 tqdm
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_ik.txt b/requirements/portable/requirements_ik.txt
index c3fdb5e8..d11d337d 100644
--- a/requirements/portable/requirements_ik.txt
+++ b/requirements/portable/requirements_ik.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pydantic==2.11.0
 pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
 tqdm
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_ik_cpu_only.txt b/requirements/portable/requirements_ik_cpu_only.txt
index ea3ba601..c2b69e1c 100644
--- a/requirements/portable/requirements_ik_cpu_only.txt
+++ b/requirements/portable/requirements_ik_cpu_only.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pydantic==2.11.0
 pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
 tqdm
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # ik_llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_ik_cuda131.txt b/requirements/portable/requirements_ik_cuda131.txt
index 7530375d..7f280930 100644
--- a/requirements/portable/requirements_ik_cuda131.txt
+++ b/requirements/portable/requirements_ik_cuda131.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pydantic==2.11.0
 pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
 tqdm
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index cafe3cee..322056be 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pydantic==2.11.0
 pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
 tqdm
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 3b8b0573..dfd52be5 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
 huggingface-hub==1.5.*
 jinja2==3.1.6
 markdown
-mcp==1.27.0
 numpy==2.2.*
 pydantic==2.11.0
 pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
 tqdm
 
 # Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
 
 # API
 flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
 tiktoken
 
 # Vulkan wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"