From 01e42a00ffcb82747316a6ba40429919e8efd36d Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 26 Mar 2025 06:01:57 -0700
Subject: [PATCH 01/25] Bump transformers to 4.50

---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 83bd3a53..63b539cf 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.49.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 1e757ffe..71679c7e 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.49.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index f74ebf69..cfe9bf00 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.49.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index dcdeae3f..4e3c0c11 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.49.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index b823e40e..61c66e82 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.49.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index fe3f522a..0ab64e79 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.49.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 014e2e5d..17187b47 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.49.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 6139c46e..63e051ca 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.49.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 858ffff5..533d399b 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.49.*
+transformers==4.50.*
 tqdm
 wandb
 

From 2bfaf44df0904d8db3cd04e38b33b6be582babb6 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Wed, 26 Mar 2025 10:03:21 -0300
Subject: [PATCH 02/25] Update accelerate requirement from ==1.4.* to ==1.5.*
 (#6802)

---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 63b539cf..8c76ff17 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -1,4 +1,4 @@
-accelerate==1.4.*
+accelerate==1.5.*
 bitsandbytes==0.45.*
 colorama
 datasets
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 71679c7e..7fcd5c0c 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -1,4 +1,4 @@
-accelerate==1.4.*
+accelerate==1.5.*
 colorama
 datasets
 einops
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index cfe9bf00..028d37cc 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -1,4 +1,4 @@
-accelerate==1.4.*
+accelerate==1.5.*
 colorama
 datasets
 einops
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 4e3c0c11..284c2dd6 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -1,4 +1,4 @@
-accelerate==1.4.*
+accelerate==1.5.*
 colorama
 datasets
 einops
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 61c66e82..43615aeb 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -1,4 +1,4 @@
-accelerate==1.4.*
+accelerate==1.5.*
 colorama
 datasets
 einops
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 0ab64e79..9ae8e2a7 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -1,4 +1,4 @@
-accelerate==1.4.*
+accelerate==1.5.*
 colorama
 datasets
 einops
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 17187b47..77fe85fe 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -1,4 +1,4 @@
-accelerate==1.4.*
+accelerate==1.5.*
 colorama
 datasets
 einops
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 63e051ca..4bebd865 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -1,4 +1,4 @@
-accelerate==1.4.*
+accelerate==1.5.*
 bitsandbytes==0.45.*
 colorama
 datasets
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 533d399b..a7255c0a 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -1,4 +1,4 @@
-accelerate==1.4.*
+accelerate==1.5.*
 colorama
 datasets
 einops

From 525b1e020776c31e9dc3fd592855983cecfafe07 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 29 Mar 2025 13:43:16 -0700
Subject: [PATCH 03/25] Remove the stalebot

---
 .github/workflows/stale.yml | 22 ----------------------
 1 file changed, 22 deletions(-)
 delete mode 100644 .github/workflows/stale.yml

diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml
deleted file mode 100644
index 8eb03299..00000000
--- a/.github/workflows/stale.yml
+++ /dev/null
@@ -1,22 +0,0 @@
-name: Close inactive issues
-on:
-  schedule:
-    - cron: "10 23 * * *"
-
-jobs:
-  close-issues:
-    runs-on: ubuntu-latest
-    permissions:
-      issues: write
-      pull-requests: write
-    steps:
-      - uses: actions/stale@v5
-        with:
-          stale-issue-message: ""
-          close-issue-message: "This issue has been closed due to inactivity for 6 months. If you believe it is still relevant, please leave a comment below. You can tag a developer in your comment."
-          days-before-issue-stale: 180
-          days-before-issue-close: 0
-          stale-issue-label: "stale"
-          days-before-pr-stale: -1
-          days-before-pr-close: -1
-          repo-token: ${{ secrets.GITHUB_TOKEN }}

From 1bd208c219a16ec1d333f07e8a2bb2b6dd55d22d Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Sat, 29 Mar 2025 22:47:10 -0300
Subject: [PATCH 04/25] Add a new chat style: Dark (#6817)

---
 css/chat_style-Dark.css | 128 ++++++++++++++++++++++++++++++++++++++++
 1 file changed, 128 insertions(+)
 create mode 100644 css/chat_style-Dark.css

diff --git a/css/chat_style-Dark.css b/css/chat_style-Dark.css
new file mode 100644
index 00000000..7f7f0dbf
--- /dev/null
+++ b/css/chat_style-Dark.css
@@ -0,0 +1,128 @@
+.message {
+    display: grid;
+    grid-template-columns: 60px minmax(0, 1fr);
+    padding-bottom: 28px;
+    font-size: 18px;
+    font-family: 'Roboto', Arial, sans-serif; /* Modern font */
+    line-height: 1.5;
+}
+
+.circle-you,
+.circle-bot {
+    background-color: #2b2b2b; /* Darker background for circles */
+    border-radius: 50%; /* Perfect circle */
+    border: 1px solid #4a90e2; /* Soft blue border */
+    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.5); /* Soft shadow for depth */
+}
+
+.circle-bot img,
+.circle-you img {
+    border-radius: 50%; /* Make images circular */
+    width: 100%;
+    height: 100%;
+    object-fit: cover;
+}
+
+.circle-you, .circle-bot {
+    width: 64px; /* Smaller size for modern look */
+    height: 64px;
+}
+
+.text {
+    padding-left: 12px; /* Reduced padding for a cleaner layout */
+    color: #f0f0f0; /* Light text color for readability */
+}
+
+.text p {
+    margin-top: 2px;
+}
+
+.username {
+    padding-left: 10px;
+    font-size: 20px;
+    font-weight: bold;
+    color: #e0e0e0; /* Light gray text */
+    transition: color 0.3s ease; /* Smooth color transition */
+}
+
+.username:hover {
+    color: #4a90e2; /* Blue color on hover */
+}
+
+.message-body {
+    position: relative;
+    border: 1px solid rgba(255, 255, 255, 0.1); /* Soft white border */
+    border-radius: 8px; /* Slightly rounded corners */
+    padding: 15px;
+    background: #1e1e1e; /* Dark background */
+    box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3); /* Subtle shadow for depth */
+    transition: background 0.3s ease; /* Smooth transition for background */
+}
+
+.message-body:hover {
+    background: #252525; /* Slightly lighter on hover */
+}
+
+/* Adds 2 extra lines at the top and bottom of the message */
+.message-body::before,
+.message-body::after {
+    content: "";
+    position: absolute;
+    left: 10px;
+    right: 10px;
+    height: 1px;
+    background-color: rgba(255, 255, 255, 0.05); /* Faded lines for subtle separation */
+}
+
+.message-body::before {
+    top: 4px;
+}
+
+.message-body::after {
+    bottom: 4px;
+}
+
+.message-body img {
+    max-width: 300px;
+    max-height: 300px;
+    border-radius: 10px; /* Rounded corners for images */
+}
+
+.message-body p {
+    margin-bottom: 0 !important;
+    font-size: 16px !important;
+    line-height: 1.5 !important;
+    color: #e0e0e0 !important; /* Light color for text */
+}
+
+.message-body p em {
+    color: #a6a6a6 !important; /* Softer gray for emphasized text */
+}
+
+@media screen and (max-width: 688px) {
+    .message {
+        display: grid;
+        grid-template-columns: 60px minmax(0, 1fr);
+        padding-bottom: 25px;
+        font-size: 15px;
+        font-family: 'Roboto', Arial, sans-serif; /* Modern font */
+        line-height: 1.5;
+    }
+
+    .circle-you, .circle-bot {
+        width: 40px; /* Smaller size for mobile */
+        height: 40px;
+    }
+
+    .text {
+        padding-left: 10px; /* Reduced padding for mobile */
+    }
+
+    .message-body p {
+        font-size: 14px !important; /* Smaller text for mobile */
+    }
+
+    .username {
+        font-size: 18px; /* Smaller username for mobile */
+    }
+}

From 79a26d7a5cd24e952b670668e97d50f1369c3e49 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 29 Mar 2025 18:49:48 -0700
Subject: [PATCH 05/25] Lint

---
 css/chat_style-Dark.css | 14 +++++++-------
 1 file changed, 7 insertions(+), 7 deletions(-)

diff --git a/css/chat_style-Dark.css b/css/chat_style-Dark.css
index 7f7f0dbf..368a2a16 100644
--- a/css/chat_style-Dark.css
+++ b/css/chat_style-Dark.css
@@ -3,7 +3,7 @@
     grid-template-columns: 60px minmax(0, 1fr);
     padding-bottom: 28px;
     font-size: 18px;
-    font-family: 'Roboto', Arial, sans-serif; /* Modern font */
+    font-family: Roboto, Arial, sans-serif; /* Modern font */
     line-height: 1.5;
 }
 
@@ -12,7 +12,7 @@
     background-color: #2b2b2b; /* Darker background for circles */
     border-radius: 50%; /* Perfect circle */
     border: 1px solid #4a90e2; /* Soft blue border */
-    box-shadow: 0 4px 8px rgba(0, 0, 0, 0.5); /* Soft shadow for depth */
+    box-shadow: 0 4px 8px rgb(0 0 0 / 50%); /* Soft shadow for depth */
 }
 
 .circle-bot img,
@@ -51,11 +51,11 @@
 
 .message-body {
     position: relative;
-    border: 1px solid rgba(255, 255, 255, 0.1); /* Soft white border */
+    border: 1px solid rgb(255 255 255 / 10%); /* Soft white border */
     border-radius: 8px; /* Slightly rounded corners */
     padding: 15px;
     background: #1e1e1e; /* Dark background */
-    box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3); /* Subtle shadow for depth */
+    box-shadow: 0 4px 10px rgb(0 0 0 / 30%); /* Subtle shadow for depth */
     transition: background 0.3s ease; /* Smooth transition for background */
 }
 
@@ -71,7 +71,7 @@
     left: 10px;
     right: 10px;
     height: 1px;
-    background-color: rgba(255, 255, 255, 0.05); /* Faded lines for subtle separation */
+    background-color: rgb(255 255 255 / 5%); /* Faded lines for subtle separation */
 }
 
 .message-body::before {
@@ -99,13 +99,13 @@
     color: #a6a6a6 !important; /* Softer gray for emphasized text */
 }
 
-@media screen and (max-width: 688px) {
+@media screen and (width <= 688px) {
     .message {
         display: grid;
         grid-template-columns: 60px minmax(0, 1fr);
         padding-bottom: 25px;
         font-size: 15px;
-        font-family: 'Roboto', Arial, sans-serif; /* Modern font */
+        font-family: Roboto, Arial, sans-serif; /* Modern font */
         line-height: 1.5;
     }
 

From 1981327285c4411cc23472c6e874a895c8cf4424 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 29 Mar 2025 19:17:14 -0700
Subject: [PATCH 06/25] Fix the colab notebook

---
 Colab-TextGen-GPU.ipynb | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb
index 8e305e1d..ebeafc02 100644
--- a/Colab-TextGen-GPU.ipynb
+++ b/Colab-TextGen-GPU.ipynb
@@ -57,6 +57,7 @@
         "from pathlib import Path\n",
         "\n",
         "os.environ.pop('PYTHONPATH', None)\n",
+        "os.environ.pop('MPLBACKEND', None)\n",
         "\n",
         "if Path.cwd().name != 'text-generation-webui':\n",
         "  print(\"\\033[1;32;1m\\n --> Installing the web UI. This will take a while, but after the initial setup, you can download and test as many models as you like.\\033[0;37;0m\\n\")\n",

From 109de34e3b3187eb3f463bf463086a48444013a0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 31 Mar 2025 09:23:28 -0700
Subject: [PATCH 07/25] Remove the old --model-menu flag

---
 modules/shared.py |  2 +-
 server.py         | 18 ------------------
 2 files changed, 1 insertion(+), 19 deletions(-)

diff --git a/modules/shared.py b/modules/shared.py
index 2e91f4d5..ea6c581a 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -79,7 +79,6 @@ group.add_argument('--model', type=str, help='Name of the model to load by defau
 group.add_argument('--lora', type=str, nargs='+', help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.')
 group.add_argument('--model-dir', type=str, default='models/', help='Path to directory with all the models.')
 group.add_argument('--lora-dir', type=str, default='loras/', help='Path to directory with all the loras.')
-group.add_argument('--model-menu', action='store_true', help='Show a model menu in the terminal when the web UI is first launched.')
 group.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See settings-template.yaml for an example. If you create a file called settings.yaml, this file will be loaded by default without the need to use the --settings flag.')
 group.add_argument('--extensions', type=str, nargs='+', help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.')
 group.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
@@ -215,6 +214,7 @@ group.add_argument('--disable_exllama', action='store_true', help='DEPRECATED')
 group.add_argument('--disable_exllamav2', action='store_true', help='DEPRECATED')
 group.add_argument('--wbits', type=int, default=0, help='DEPRECATED')
 group.add_argument('--groupsize', type=int, default=-1, help='DEPRECATED')
+group.add_argument('--model-menu', action='store_true', help='DEPRECATED')
 
 args = parser.parse_args()
 args_defaults = parser.parse_args([])
diff --git a/server.py b/server.py
index 31e1c4c6..1f227350 100644
--- a/server.py
+++ b/server.py
@@ -218,28 +218,10 @@ if __name__ == "__main__":
         if extension not in shared.args.extensions:
             shared.args.extensions.append(extension)
 
-    available_models = utils.get_available_models()
-
     # Model defined through --model
     if shared.args.model is not None:
         shared.model_name = shared.args.model
 
-    # Select the model from a command-line menu
-    elif shared.args.model_menu:
-        if len(available_models) == 0:
-            logger.error('No models are available! Please download at least one.')
-            sys.exit(0)
-        else:
-            print('The following models are available:\n')
-            for i, model in enumerate(available_models):
-                print(f'{i+1}. {model}')
-
-            print(f'\nWhich one do you want to load? 1-{len(available_models)}\n')
-            i = int(input()) - 1
-            print()
-
-        shared.model_name = available_models[i]
-
     # If any model has been selected, load it
     if shared.model_name != 'None':
         p = Path(shared.model_name)

From 77a73cc56122dbabf8fb446f1c1c278923fbbfe9 Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 31 Mar 2025 21:01:27 -0300
Subject: [PATCH 08/25] Update peft requirement from ==0.12.* to ==0.15.*
 (#6820)

---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 8c76ff17..5ab40c42 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -10,7 +10,7 @@ markdown
 numba==0.59.*
 numpy==1.26.*
 pandas
-peft==0.12.*
+peft==0.15.*
 Pillow>=9.5.0
 psutil
 pydantic==2.8.2
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 7fcd5c0c..5f278db6 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -9,7 +9,7 @@ markdown
 numba==0.59.*
 numpy==1.26.*
 pandas
-peft==0.12.*
+peft==0.15.*
 Pillow>=9.5.0
 psutil
 pydantic==2.8.2
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 028d37cc..78d14524 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -9,7 +9,7 @@ markdown
 numba==0.59.*
 numpy==1.26.*
 pandas
-peft==0.12.*
+peft==0.15.*
 Pillow>=9.5.0
 psutil
 pydantic==2.8.2
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 284c2dd6..7ab6dfff 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -9,7 +9,7 @@ markdown
 numba==0.59.*
 numpy==1.26.*
 pandas
-peft==0.12.*
+peft==0.15.*
 Pillow>=9.5.0
 psutil
 pydantic==2.8.2
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 43615aeb..0c84a84c 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -9,7 +9,7 @@ markdown
 numba==0.59.*
 numpy==1.26.*
 pandas
-peft==0.12.*
+peft==0.15.*
 Pillow>=9.5.0
 psutil
 pydantic==2.8.2
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index 9ae8e2a7..b9eb99b1 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -9,7 +9,7 @@ markdown
 numba==0.59.*
 numpy==1.26.*
 pandas
-peft==0.12.*
+peft==0.15.*
 Pillow>=9.5.0
 psutil
 pydantic==2.8.2
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 77fe85fe..68ae5c17 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -9,7 +9,7 @@ markdown
 numba==0.59.*
 numpy==1.26.*
 pandas
-peft==0.12.*
+peft==0.15.*
 Pillow>=9.5.0
 psutil
 pydantic==2.8.2
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 4bebd865..1a2a670f 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -10,7 +10,7 @@ markdown
 numba==0.59.*
 numpy==1.26.*
 pandas
-peft==0.12.*
+peft==0.15.*
 Pillow>=9.5.0
 psutil
 pydantic==2.8.2
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index a7255c0a..3b61ca39 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -9,7 +9,7 @@ markdown
 numba==0.59.*
 numpy==1.26.*
 pandas
-peft==0.12.*
+peft==0.15.*
 Pillow>=9.5.0
 psutil
 pydantic==2.8.2

From cbffcf67ef12938e6b26b7eddffa4327d83e71b0 Mon Sep 17 00:00:00 2001
From: Shixian Sheng <shixian_sheng-2@protonmail.com>
Date: Wed, 2 Apr 2025 13:28:29 -0400
Subject: [PATCH 09/25] Fix links in the ngrok extension README (#6826)

---
 extensions/ngrok/README.md | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/extensions/ngrok/README.md b/extensions/ngrok/README.md
index 0324bf98..2e9eb82d 100644
--- a/extensions/ngrok/README.md
+++ b/extensions/ngrok/README.md
@@ -9,9 +9,9 @@ the `settings.json` file, see the Examples below. Retrieve your authtoken on the
 
 # Documentation
 
-For a list of all available options, see [the configuration documentation](https://ngrok.com/docs/ngrok-agent/config/) or [the connect example](https://github.com/ngrok/ngrok-py/blob/main/examples/ngrok-connect-full.py).
+For a list of all available options, see [the configuration documentation](https://ngrok.com/docs/ngrok-agent/config/) or [the forward example](https://github.com/ngrok/ngrok-python/blob/main/examples/ngrok-forward-full.py).
 
-The ngrok Python SDK is [on github here](https://github.com/ngrok/ngrok-py). A quickstart guide and a full API reference are included in the [ngrok-py Python API documentation](https://ngrok.github.io/ngrok-py/).
+The ngrok Python SDK is [on github here](https://github.com/ngrok/ngrok-py). A quickstart guide and a full API reference are included in the [ngrok-py Python API documentation](https://ngrok.github.io/ngrok-python/).
 
 # Running
 
@@ -66,4 +66,4 @@ To add an authtoken instead of using the NGROK_AUTHTOKEN environment variable:
         "authtoken_from_env":false
     }
 }
-```
\ No newline at end of file
+```

From c010cea7be0ba17623c16d8a2951d55c952d6ba0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 6 Apr 2025 17:17:25 -0700
Subject: [PATCH 10/25] Remove CUDA 11.8 support

---
 one_click.py | 51 ++++++++++++++-------------------------------------
 1 file changed, 14 insertions(+), 37 deletions(-)

diff --git a/one_click.py b/one_click.py
index effc7d43..2a161e11 100644
--- a/one_click.py
+++ b/one_click.py
@@ -106,9 +106,7 @@ def update_pytorch():
     torver = torch_version()
     base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
 
-    if "+cu118" in torver:
-        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu118"
-    elif "+cu" in torver:
+    if "+cu" in torver:
         install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu121"
     elif "+rocm" in torver:
         install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.1"
@@ -236,24 +234,21 @@ def install_webui():
         choice = os.environ["GPU_CHOICE"].upper()
         print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
 
-        # Warn about changed meanings and handle old NVIDIA choice
+        # Warn about changed meanings and handle old choices
         if choice == "B":
-            print_big_message("Warning: GPU_CHOICE='B' now means 'NVIDIA (CUDA 11.8)' in the new version.")
+            print_big_message("Warning: GPU_CHOICE='B' now means 'AMD' in the new version.")
         elif choice == "C":
-            print_big_message("Warning: GPU_CHOICE='C' now means 'AMD' in the new version.")
+            print_big_message("Warning: GPU_CHOICE='C' now means 'Apple M Series' in the new version.")
         elif choice == "D":
-            print_big_message("Warning: GPU_CHOICE='D' now means 'Apple M Series' in the new version.")
-        elif choice == "A" and "USE_CUDA118" in os.environ:
-            choice = "B" if os.environ.get("USE_CUDA118", "").lower() in ("yes", "y", "true", "1", "t", "on") else "A"
+            print_big_message("Warning: GPU_CHOICE='D' now means 'Intel Arc' in the new version.")
     else:
         choice = get_user_choice(
             "What is your GPU?",
             {
-                'A': 'NVIDIA - CUDA 12.1 (recommended)',
-                'B': 'NVIDIA - CUDA 11.8 (legacy GPUs)',
-                'C': 'AMD - Linux/macOS only, requires ROCm 6.1',
-                'D': 'Apple M Series',
-                'E': 'Intel Arc (beta)',
+                'A': 'NVIDIA - CUDA 12.1',
+                'B': 'AMD - Linux/macOS only, requires ROCm 6.1',
+                'C': 'Apple M Series',
+                'D': 'Intel Arc (beta)',
                 'N': 'CPU mode'
             },
         )
@@ -261,15 +256,13 @@ def install_webui():
     # Convert choices to GPU names for compatibility
     gpu_choice_to_name = {
         "A": "NVIDIA",
-        "B": "NVIDIA",
-        "C": "AMD",
-        "D": "APPLE",
-        "E": "INTEL",
+        "B": "AMD",
+        "C": "APPLE",
+        "D": "INTEL",
         "N": "NONE"
     }
 
     selected_gpu = gpu_choice_to_name[choice]
-    use_cuda118 = (choice == "B")  # CUDA version is now determined by menu choice
 
     # Write a flag to CMD_FLAGS.txt for CPU mode
     if selected_gpu == "NONE":
@@ -280,10 +273,7 @@ def install_webui():
 
     # Handle CUDA version display
     elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA":
-        if use_cuda118:
-            print("CUDA: 11.8")
-        else:
-            print("CUDA: 12.1")
+        print("CUDA: 12.1")
 
     # No PyTorch for AMD on Windows (?)
     elif is_windows() and selected_gpu == "AMD":
@@ -294,10 +284,7 @@ def install_webui():
     install_pytorch = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
 
     if selected_gpu == "NVIDIA":
-        if use_cuda118 == 'Y':
-            install_pytorch += "--index-url https://download.pytorch.org/whl/cu118"
-        else:
-            install_pytorch += "--index-url https://download.pytorch.org/whl/cu121"
+        install_pytorch += "--index-url https://download.pytorch.org/whl/cu121"
     elif selected_gpu == "AMD":
         install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.1"
     elif selected_gpu in ["APPLE", "NONE"]:
@@ -434,16 +421,6 @@ def update_requirements(initial_installation=False, pull=True):
     if not initial_installation and not wheels_changed:
         textgen_requirements = [line for line in textgen_requirements if '.whl' not in line]
 
-    if "+cu118" in torver:
-        textgen_requirements = [
-            req.replace('+cu121', '+cu118').replace('+cu122', '+cu118')
-            for req in textgen_requirements
-            if "autoawq" not in req.lower()
-        ]
-
-    if is_windows() and "+cu118" in torver:  # No flash-attention on Windows for CUDA 11
-        textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req]
-
     with open('temp_requirements.txt', 'w') as file:
         file.write('\n'.join(textgen_requirements))
 

From a8a64b6c1cf0f609348fa0182308209acd11f563 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 6 Apr 2025 17:40:18 -0700
Subject: [PATCH 11/25] Update the README

---
 README.md | 13 -------------
 1 file changed, 13 deletions(-)

diff --git a/README.md b/README.md
index 40c242c8..542e1ae1 100644
--- a/README.md
+++ b/README.md
@@ -143,19 +143,6 @@ Then browse to
 3) Manually install AutoGPTQ: [Installation](https://github.com/PanQiWei/AutoGPTQ#install-from-source).
     * Perform the from-source installation - there are no prebuilt ROCm packages for Windows.
 
-##### Older NVIDIA GPUs
-
-1) For Kepler GPUs and older, you will need to install CUDA 11.8 instead of 12:
-
-```
-pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu118
-conda install -y -c "nvidia/label/cuda-11.8.0" cuda-runtime
-```
-
-2) bitsandbytes >= 0.39 may not work. In that case, to use `--load-in-8bit`, you may have to downgrade like this:
-    * Linux: `pip install bitsandbytes==0.38.1`
-    * Windows: `pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.38.1-py3-none-any.whl`
-
 ##### Manual install
 
 The `requirements*.txt` above contain various wheels precompiled through GitHub Actions. If you wish to compile things manually, or if you need to because no suitable wheels are available for your hardware, you can use `requirements_nowheels.txt` and then install your desired loaders manually.

From eef90a4964d00a94525d7c8ec9dd9ed90c193546 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 6 Apr 2025 17:44:07 -0700
Subject: [PATCH 12/25] Update some intel arc installation commands

---
 one_click.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/one_click.py b/one_click.py
index 2a161e11..72626010 100644
--- a/one_click.py
+++ b/one_click.py
@@ -302,9 +302,9 @@ def install_webui():
     if selected_gpu == "INTEL":
         # Install oneAPI dependencies via conda
         print_big_message("Installing Intel oneAPI runtime libraries.")
-        run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0")
+        run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0", environment=True)
         # Install libuv required by Intel-patched torch
-        run_cmd("conda install -y libuv")
+        run_cmd("conda install -y libuv", environment=True)
 
     # Install the webui requirements
     update_requirements(initial_installation=True, pull=False)

From 204db283623a277d2831e0952814b7f0890ef1c6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 6 Apr 2025 18:48:31 -0700
Subject: [PATCH 13/25] Update the dockerfiles

---
 docker/amd/Dockerfile   | 2 +-
 docker/intel/Dockerfile | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/docker/amd/Dockerfile b/docker/amd/Dockerfile
index cfbcf7e4..66e5863c 100644
--- a/docker/amd/Dockerfile
+++ b/docker/amd/Dockerfile
@@ -13,7 +13,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
 WORKDIR /home/app/
 RUN git clone https://github.com/oobabooga/text-generation-webui.git 
 WORKDIR /home/app/text-generation-webui
-RUN GPU_CHOICE=C LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
+RUN GPU_CHOICE=B LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
 COPY CMD_FLAGS.txt /home/app/text-generation-webui/
 EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
 WORKDIR /home/app/text-generation-webui
diff --git a/docker/intel/Dockerfile b/docker/intel/Dockerfile
index d2ed671e..cab62442 100644
--- a/docker/intel/Dockerfile
+++ b/docker/intel/Dockerfile
@@ -13,7 +13,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \
 WORKDIR /home/app/
 RUN git clone https://github.com/oobabooga/text-generation-webui.git 
 WORKDIR /home/app/text-generation-webui
-RUN GPU_CHOICE=E LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
+RUN GPU_CHOICE=D LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose
 COPY CMD_FLAGS.txt /home/app/text-generation-webui/
 EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005}
 # set umask to ensure group read / write at runtime

From f1f32386b4338ef71cf2c23f93d6aa00b53b545c Mon Sep 17 00:00:00 2001
From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com>
Date: Mon, 7 Apr 2025 19:29:39 -0300
Subject: [PATCH 14/25] Update transformers requirement from ==4.50.* to
 ==4.51.* (#6834)

---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 5ab40c42..19f2124c 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 5f278db6..99142de3 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 78d14524..cdb6cff4 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 7ab6dfff..9dc36546 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 0c84a84c..e1fe8eaa 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index b9eb99b1..ade65fbe 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 68ae5c17..2bcfe715 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 1a2a670f..1db48c22 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 3b61ca39..bb9ea97c 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 

From a5855c345cc3e361bc8a436daf995fe6a2a5dd33 Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Mon, 7 Apr 2025 21:42:33 -0300
Subject: [PATCH 15/25] Set context lengths to at most 8192 by default (to
 prevent out of memory errors) (#6835)

---
 modules/models_settings.py |  7 +++++--
 modules/shared.py          |  6 +++---
 modules/ui_model_menu.py   | 10 +++++-----
 modules/ui_parameters.py   |  2 +-
 settings-template.yaml     |  2 +-
 5 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/modules/models_settings.py b/modules/models_settings.py
index 8d658523..b67d28a0 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -17,6 +17,7 @@ def get_fallback_settings():
         'compress_pos_emb': 1,
         'alpha_value': 1,
         'truncation_length': shared.settings['truncation_length'],
+        'truncation_length_info': shared.settings['truncation_length'],
         'skip_special_tokens': shared.settings['skip_special_tokens'],
         'custom_stopping_strings': shared.settings['custom_stopping_strings'],
     }
@@ -53,7 +54,8 @@ def get_model_metadata(model):
 
         for k in metadata:
             if k.endswith('context_length'):
-                model_settings['n_ctx'] = metadata[k]
+                model_settings['n_ctx'] = min(metadata[k], 8192)
+                model_settings['truncation_length_info'] = metadata[k]
             elif k.endswith('rope.freq_base'):
                 model_settings['rope_freq_base'] = metadata[k]
             elif k.endswith('rope.scale_linear'):
@@ -89,7 +91,8 @@ def get_model_metadata(model):
             for k in ['max_position_embeddings', 'model_max_length', 'max_seq_len']:
                 if k in metadata:
                     model_settings['truncation_length'] = metadata[k]
-                    model_settings['max_seq_len'] = metadata[k]
+                    model_settings['truncation_length_info'] = metadata[k]
+                    model_settings['max_seq_len'] = min(metadata[k], 8192)
 
             if 'rope_theta' in metadata:
                 model_settings['rope_freq_base'] = metadata['rope_theta']
diff --git a/modules/shared.py b/modules/shared.py
index ea6c581a..77bd7639 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -53,7 +53,7 @@ settings = {
     'skip_special_tokens': True,
     'stream': True,
     'static_cache': False,
-    'truncation_length': 2048,
+    'truncation_length': 8192,
     'seed': -1,
     'custom_stopping_strings': '',
     'custom_token_bans': '',
@@ -117,7 +117,7 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for
 group = parser.add_argument_group('llama.cpp')
 group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.')
 group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled without GGML_CUDA_FORCE_MMQ. This may improve performance on newer cards.')
-group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.')
+group.add_argument('--n_ctx', type=int, default=8192, help='Size of the prompt context.')
 group.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
 group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')
 group.add_argument('--no_mul_mat_q', action='store_true', help='Disable the mulmat kernels.')
@@ -139,7 +139,7 @@ group.add_argument('--tokenizer-dir', type=str, help='Load the tokenizer from th
 group = parser.add_argument_group('ExLlamaV2')
 group.add_argument('--gpu-split', type=str, help='Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.')
 group.add_argument('--autosplit', action='store_true', help='Autosplit the model tensors across the available GPUs. This causes --gpu-split to be ignored.')
-group.add_argument('--max_seq_len', type=int, default=2048, help='Maximum sequence length.')
+group.add_argument('--max_seq_len', type=int, default=8192, help='Maximum sequence length.')
 group.add_argument('--cfg-cache', action='store_true', help='ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.')
 group.add_argument('--no_flash_attn', action='store_true', help='Force flash-attention to not be used.')
 group.add_argument('--no_xformers', action='store_true', help='Force xformers to not be used.')
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 1264a9fd..c23b991a 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -200,8 +200,10 @@ def create_event_handlers():
 
 
 def load_model_wrapper(selected_model, loader, autoload=False):
+    settings = get_model_metadata(selected_model)
+
     if not autoload:
-        yield f"The settings for `{selected_model}` have been updated.\n\nClick on \"Load\" to load it."
+        yield "### {}\n\n- Settings updated: Click \"Load\" to load the model\n- Max sequence length: {}".format(selected_model, settings['truncation_length_info'])
         return
 
     if selected_model == 'None':
@@ -214,11 +216,9 @@ def load_model_wrapper(selected_model, loader, autoload=False):
                 shared.model, shared.tokenizer = load_model(selected_model, loader)
 
             if shared.model is not None:
-                output = f"Successfully loaded `{selected_model}`."
-
-                settings = get_model_metadata(selected_model)
+                output = f"Successfully loaded `{selected_model}`.\n\n"
                 if 'instruction_template' in settings:
-                    output += '\n\nIt seems to be an instruction-following model with template "{}". In the chat tab, instruct or chat-instruct modes should be used.'.format(settings['instruction_template'])
+                    output += '- It seems to be an instruction-following model with template "{}". In the chat tab, instruct or chat-instruct modes should be used.\n'.format(settings['instruction_template'])
 
                 yield output
             else:
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 846fcfe7..c3245a9d 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -87,7 +87,7 @@ def create_ui(default_preset):
                             shared.gradio['static_cache'] = gr.Checkbox(value=shared.settings['static_cache'], label='Static KV cache', info='Use a static cache for improved performance.')
 
                         with gr.Column():
-                            shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.')
+                            shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length.')
                             shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
 
                             shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
diff --git a/settings-template.yaml b/settings-template.yaml
index 74935a60..0343df0a 100644
--- a/settings-template.yaml
+++ b/settings-template.yaml
@@ -25,7 +25,7 @@ add_bos_token: true
 skip_special_tokens: true
 stream: true
 static_cache: false
-truncation_length: 2048
+truncation_length: 8192
 seed: -1
 custom_stopping_strings: ''
 custom_token_bans: ''

From bf48ec8c449cbf58172c6f3ab83dd6a844b7994f Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 7 Apr 2025 17:43:41 -0700
Subject: [PATCH 16/25] Remove an unnecessary UI message

---
 modules/ui_model_menu.py | 6 +-----
 1 file changed, 1 insertion(+), 5 deletions(-)

diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index c23b991a..4fc1de08 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -216,11 +216,7 @@ def load_model_wrapper(selected_model, loader, autoload=False):
                 shared.model, shared.tokenizer = load_model(selected_model, loader)
 
             if shared.model is not None:
-                output = f"Successfully loaded `{selected_model}`.\n\n"
-                if 'instruction_template' in settings:
-                    output += '- It seems to be an instruction-following model with template "{}". In the chat tab, instruct or chat-instruct modes should be used.\n'.format(settings['instruction_template'])
-
-                yield output
+                yield f"Successfully loaded `{selected_model}`."
             else:
                 yield f"Failed to load `{selected_model}`."
         except:

From 649ee729c126c3396b9f97c3cbfc8db8e2e6f7f0 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 8 Apr 2025 09:22:06 -0700
Subject: [PATCH 17/25] Remove Python 3.10 support

---
 requirements.txt                 | 10 ----------
 requirements_amd.txt             |  4 ----
 requirements_amd_noavx2.txt      |  3 ---
 requirements_apple_intel.txt     |  2 --
 requirements_apple_silicon.txt   |  3 ---
 requirements_cpu_only.txt        |  2 --
 requirements_cpu_only_noavx2.txt |  2 --
 requirements_noavx2.txt          | 10 ----------
 8 files changed, 36 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index 19f2124c..e13cf984 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -33,29 +33,19 @@ tiktoken
 
 # llama-cpp-python (CPU only, AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # CUDA wheels
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
 https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 99142de3..adc77d32 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -32,13 +32,9 @@ tiktoken
 
 # llama-cpp-python (CPU only, AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # AMD wheels
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.8+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.8+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index cdb6cff4..22ee57b4 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -32,11 +32,8 @@ tiktoken
 
 # llama-cpp-python (CPU only, no AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # AMD wheels
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 9dc36546..553db45a 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -32,7 +32,5 @@ tiktoken
 
 # Mac wheels
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index e1fe8eaa..e30ce816 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -32,9 +32,6 @@ tiktoken
 
 # Mac wheels
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index ade65fbe..e849a451 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -32,6 +32,4 @@ tiktoken
 
 # llama-cpp-python (CPU only, AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 2bcfe715..e10782c9 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -32,6 +32,4 @@ tiktoken
 
 # llama-cpp-python (CPU only, no AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 1db48c22..ab7e59fc 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -33,29 +33,19 @@ tiktoken
 
 # llama-cpp-python (CPU only, no AVX2)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 
 # CUDA wheels
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
 https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
 https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"

From 0b3503c91fcd3eaf6e0b93de4384794648406ba7 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 8 Apr 2025 12:26:03 -0700
Subject: [PATCH 18/25] Revert "Update transformers requirement from ==4.50.*
 to ==4.51.* (#6834)"

This reverts commit f1f32386b4338ef71cf2c23f93d6aa00b53b545c.
---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index e13cf984..4cf99b69 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_amd.txt b/requirements_amd.txt
index adc77d32..0d205725 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 22ee57b4..93a46a64 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 553db45a..00353bfd 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index e30ce816..7076b386 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index e849a451..c7e2687c 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index e10782c9..2003c544 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index ab7e59fc..d5f456f8 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index bb9ea97c..3b61ca39 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 

From 8b8d39ec4e66affac03c22176ac368785095f584 Mon Sep 17 00:00:00 2001
From: oobabooga <oobabooga4@gmail.com>
Date: Wed, 9 Apr 2025 00:07:08 -0300
Subject: [PATCH 19/25] Add ExLlamaV3 support (#6832)

---
 README.md                      |  24 ++---
 modules/exllamav3_hf.py        | 179 +++++++++++++++++++++++++++++++++
 modules/loaders.py             |  56 ++++++++++-
 modules/models.py              |  17 +++-
 modules/models_settings.py     |   4 +-
 modules/shared.py              |   4 +-
 one_click.py                   |  50 +++++++--
 requirements.txt               |  18 ++--
 requirements_amd.txt           |   2 +-
 requirements_amd_noavx2.txt    |   2 +-
 requirements_apple_intel.txt   |   1 +
 requirements_apple_silicon.txt |   1 +
 requirements_noavx2.txt        |  18 ++--
 13 files changed, 322 insertions(+), 54 deletions(-)
 create mode 100644 modules/exllamav3_hf.py

diff --git a/README.md b/README.md
index 542e1ae1..63b8931a 100644
--- a/README.md
+++ b/README.md
@@ -12,7 +12,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
 
 ## Features
 
-- Supports multiple text generation backends in one UI/API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), and [ExLlamaV2](https://github.com/turboderp-org/exllamav2). [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) is supported via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile), and the Transformers loader is compatible with libraries like [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM), but they must be installed manually.
+- Supports multiple text generation backends in one UI/API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [ExLlamaV2](https://github.com/turboderp-org/exllamav2). [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) is supported via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile), and the Transformers loader is compatible with libraries like [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM), but they must be installed manually.
 - OpenAI-compatible API with Chat and Completions endpoints – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples).
 - Automatic prompt formatting using Jinja2 templates.
 - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`.
@@ -78,25 +78,19 @@ conda activate textgen
 
 | System | GPU | Command |
 |--------|---------|---------|
-| Linux/WSL | NVIDIA | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu121` |
-| Linux/WSL | CPU only | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cpu` |
-| Linux | AMD | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/rocm6.1` |
-| MacOS + MPS | Any | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1` |
-| Windows | NVIDIA | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu121` |
-| Windows | CPU only | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1` |
+| Linux/WSL | NVIDIA | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124` |
+| Linux/WSL | CPU only | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cpu` |
+| Linux | AMD | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/rocm6.1` |
+| MacOS + MPS | Any | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0` |
+| Windows | NVIDIA | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124` |
+| Windows | CPU only | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0` |
 
 The up-to-date commands can be found here: https://pytorch.org/get-started/locally/.
 
-For NVIDIA, you also need to install the CUDA runtime libraries:
+If you need `nvcc` to compile some library manually, you will additionally need to install this:
 
 ```
-conda install -y -c "nvidia/label/cuda-12.1.1" cuda-runtime
-```
-
-If you need `nvcc` to compile some library manually, replace the command above with
-
-```
-conda install -y -c "nvidia/label/cuda-12.1.1" cuda
+conda install -y -c "nvidia/label/cuda-12.4.1" cuda
 ```
 
 #### 3. Install the web UI
diff --git a/modules/exllamav3_hf.py b/modules/exllamav3_hf.py
new file mode 100644
index 00000000..3bf44c9b
--- /dev/null
+++ b/modules/exllamav3_hf.py
@@ -0,0 +1,179 @@
+import os
+import traceback
+from pathlib import Path
+from typing import Any, Dict, Optional, Union
+
+import torch
+from exllamav3 import Cache, Config, Model
+from torch.nn import CrossEntropyLoss
+from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel
+from transformers.modeling_outputs import CausalLMOutputWithPast
+
+from modules import shared
+from modules.logging_colors import logger
+
+try:
+    import flash_attn
+except Exception:
+    logger.warning('Failed to load flash-attention due to the following error:\n')
+    traceback.print_exc()
+
+
+class Exllamav3HF(PreTrainedModel):
+    def __init__(self, model_dir):
+        super().__init__(PretrainedConfig())
+        self.generation_config = GenerationConfig()
+
+        config = Config.from_directory(model_dir)
+        self.ex_model = Model.from_config(config)
+
+        # Calculate the closest multiple of 256 at or above the chosen value
+        max_tokens = shared.args.max_seq_len
+        if max_tokens % 256 != 0:
+            adjusted_tokens = ((max_tokens // 256) + 1) * 256
+            logger.warning(f"max_num_tokens must be a multiple of 256. Adjusting from {max_tokens} to {adjusted_tokens}")
+            max_tokens = adjusted_tokens
+
+        self.ex_cache = Cache(self.ex_model, max_num_tokens=max_tokens)
+
+        # Create load parameters dictionary
+        load_params = {'progressbar': True}
+        if shared.args.gpu_split:
+            split = [float(alloc) for alloc in shared.args.gpu_split.split(",")]
+            load_params['use_per_device'] = split
+
+        self.ex_model.load(**load_params)
+        self.past_seq = None
+        self.max_tokens = max_tokens
+
+    def _validate_model_class(self):
+        pass
+
+    def _validate_model_kwargs(self, model_kwargs: Dict[str, Any]):
+        pass
+
+    def prepare_inputs_for_generation(self, input_ids, **kwargs):
+        return {'input_ids': input_ids, **kwargs}
+
+    @property
+    def device(self) -> torch.device:
+        return torch.device(0)
+
+    def __call__(self, *args, **kwargs):
+        use_cache = kwargs.get('use_cache', True)
+        labels = kwargs.get('labels', None)
+        past_key_values = kwargs.get('past_key_values', None)
+
+        if len(args) > 0:
+            if not shared.args.cfg_cache:
+                logger.error("Please enable the cfg-cache option to use CFG with ExLlamav3_HF.")
+                return
+
+            input_ids = args[0]
+            is_negative = True
+            past_seq = self.past_seq_negative
+            ex_cache = self.ex_cache_negative
+        else:
+            input_ids = kwargs['input_ids']
+            is_negative = False
+            past_seq = self.past_seq
+            ex_cache = self.ex_cache
+
+        seq = input_ids[0].tolist()
+        if is_negative and past_key_values is not None:
+            seq = past_key_values + seq
+
+        seq_tensor = torch.tensor(seq)
+        reset = True
+
+        # Make the forward call
+        if labels is None:
+            if past_seq is not None:
+                min_length = min(past_seq.shape[0], seq_tensor.shape[0])
+                indices = torch.nonzero(~torch.eq(past_seq[:min_length], seq_tensor[:min_length]))
+                if len(indices) > 0:
+                    longest_prefix = indices[0].item()
+                else:
+                    longest_prefix = min_length
+
+                if longest_prefix > 0:
+                    reset = False
+                    current_len = longest_prefix
+                    if len(seq_tensor) - longest_prefix > 1:
+                        self.ex_model.forward(
+                            input_ids=seq_tensor[longest_prefix:-1].view(1, -1),
+                            params={
+                                "attn_mode": "flash_attn",
+                                "cache": ex_cache,
+                                "past_len": longest_prefix,
+                                "batch_shape": (1, self.max_tokens)
+                            }
+                        )
+
+                        current_len = longest_prefix + len(seq_tensor) - longest_prefix - 1
+
+            if reset:
+                if len(seq_tensor) > 1:
+                    self.ex_model.forward(
+                        input_ids=seq_tensor[:-1].view(1, -1),
+                        params={
+                            "attn_mode": "flash_attn",
+                            "cache": ex_cache,
+                            "past_len": 0,
+                            "batch_shape": (1, self.max_tokens)
+                        }
+                    )
+
+                    current_len = len(seq_tensor) - 1
+                else:
+                    current_len = 0
+
+            logits = self.ex_model.forward(
+                input_ids=seq_tensor[-1:].view(1, -1),
+                params={
+                    "attn_mode": "flash_attn",
+                    "cache": ex_cache,
+                    "past_len": current_len,
+                    "batch_shape": (1, self.max_tokens)
+                }
+            ).to(input_ids.device).float()
+        else:
+            logits = self.ex_model.forward(
+                input_ids=seq_tensor.view(1, -1),
+                params={
+                    "attn_mode": "flash_attn",
+                    "cache": ex_cache,
+                    "past_len": 0,
+                    "batch_shape": (1, self.max_tokens)
+                }
+            ).float()
+
+        if is_negative:
+            self.past_seq_negative = seq_tensor
+        else:
+            self.past_seq = seq_tensor
+
+        loss = None
+        if labels is not None:
+            # Shift so that tokens < n predict n
+            shift_logits = logits[..., :-1, :].contiguous()
+            shift_labels = labels[..., 1:].contiguous()
+            # Flatten the tokens
+            loss_fct = CrossEntropyLoss()
+            shift_logits = shift_logits.view(-1, logits.shape[-1])
+            shift_labels = shift_labels.view(-1)
+            # Enable model parallelism
+            shift_labels = shift_labels.to(shift_logits.device)
+            loss = loss_fct(shift_logits, shift_labels)
+
+        return CausalLMOutputWithPast(logits=logits, past_key_values=seq if use_cache else None, loss=loss)
+
+    @classmethod
+    def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs):
+        assert len(model_args) == 0 and len(kwargs) == 0, "extra args is currently not supported"
+        if isinstance(pretrained_model_name_or_path, str):
+            pretrained_model_name_or_path = Path(pretrained_model_name_or_path)
+
+        pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path)
+
+        return Exllamav3HF(pretrained_model_name_or_path)
diff --git a/modules/loaders.py b/modules/loaders.py
index 88ded1d1..980a13e6 100644
--- a/modules/loaders.py
+++ b/modules/loaders.py
@@ -23,7 +23,6 @@ loaders_and_params = OrderedDict({
         'use_double_quant',
         'use_eager_attention',
         'bf16',
-
         'trust_remote_code',
         'no_use_fast',
     ],
@@ -76,6 +75,13 @@ loaders_and_params = OrderedDict({
         'no_use_fast',
         'llamacpp_HF_info',
     ],
+    'ExLlamav3_HF': [
+        'max_seq_len',
+        'gpu_split',
+        'cfg_cache',
+        'trust_remote_code',
+        'no_use_fast',
+    ],
     'ExLlamav2_HF': [
         'max_seq_len',
         'cache_type',
@@ -174,30 +180,38 @@ def transformers_samplers():
 loaders_samplers = {
     'Transformers': transformers_samplers(),
     'HQQ': transformers_samplers(),
-    'ExLlamav2': {
+    'ExLlamav3_HF': {
         'temperature',
         'dynatemp_low',
         'dynatemp_high',
         'dynatemp_exponent',
         'smoothing_factor',
+        'smoothing_curve',
         'min_p',
         'top_p',
         'top_k',
         'typical_p',
         'xtc_threshold',
         'xtc_probability',
+        'epsilon_cutoff',
+        'eta_cutoff',
         'tfs',
         'top_a',
+        'top_n_sigma',
         'dry_multiplier',
         'dry_allowed_length',
         'dry_base',
         'repetition_penalty',
         'frequency_penalty',
         'presence_penalty',
+        'encoder_repetition_penalty',
+        'no_repeat_ngram_size',
         'repetition_penalty_range',
+        'guidance_scale',
         'mirostat_mode',
         'mirostat_tau',
         'mirostat_eta',
+        'do_sample',
         'dynamic_temperature',
         'temperature_last',
         'auto_max_new_tokens',
@@ -205,8 +219,12 @@ loaders_samplers = {
         'add_bos_token',
         'skip_special_tokens',
         'seed',
+        'sampler_priority',
         'custom_token_bans',
+        'negative_prompt',
         'dry_sequence_breakers',
+        'grammar_string',
+        'grammar_file_row',
     },
     'ExLlamav2_HF': {
         'temperature',
@@ -254,6 +272,40 @@ loaders_samplers = {
         'grammar_string',
         'grammar_file_row',
     },
+    'ExLlamav2': {
+        'temperature',
+        'dynatemp_low',
+        'dynatemp_high',
+        'dynatemp_exponent',
+        'smoothing_factor',
+        'min_p',
+        'top_p',
+        'top_k',
+        'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
+        'tfs',
+        'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
+        'repetition_penalty',
+        'frequency_penalty',
+        'presence_penalty',
+        'repetition_penalty_range',
+        'mirostat_mode',
+        'mirostat_tau',
+        'mirostat_eta',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
+        'ban_eos_token',
+        'add_bos_token',
+        'skip_special_tokens',
+        'seed',
+        'custom_token_bans',
+        'dry_sequence_breakers',
+    },
     'llama.cpp': {
         'temperature',
         'min_p',
diff --git a/modules/models.py b/modules/models.py
index 3951fe82..288bc1b6 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -69,8 +69,9 @@ def load_model(model_name, loader=None):
         'Transformers': huggingface_loader,
         'llama.cpp': llamacpp_loader,
         'llamacpp_HF': llamacpp_HF_loader,
-        'ExLlamav2': ExLlamav2_loader,
+        'ExLlamav3_HF': ExLlamav3_HF_loader,
         'ExLlamav2_HF': ExLlamav2_HF_loader,
+        'ExLlamav2': ExLlamav2_loader,
         'HQQ': HQQ_loader,
         'TensorRT-LLM': TensorRT_LLM_loader,
     }
@@ -304,11 +305,10 @@ def llamacpp_HF_loader(model_name):
         return model
 
 
-def ExLlamav2_loader(model_name):
-    from modules.exllamav2 import Exllamav2Model
+def ExLlamav3_HF_loader(model_name):
+    from modules.exllamav3_hf import Exllamav3HF
 
-    model, tokenizer = Exllamav2Model.from_pretrained(model_name)
-    return model, tokenizer
+    return Exllamav3HF.from_pretrained(model_name)
 
 
 def ExLlamav2_HF_loader(model_name):
@@ -317,6 +317,13 @@ def ExLlamav2_HF_loader(model_name):
     return Exllamav2HF.from_pretrained(model_name)
 
 
+def ExLlamav2_loader(model_name):
+    from modules.exllamav2 import Exllamav2Model
+
+    model, tokenizer = Exllamav2Model.from_pretrained(model_name)
+    return model, tokenizer
+
+
 def HQQ_loader(model_name):
     try:
         from hqq.core.quantize import HQQBackend, HQQLinear
diff --git a/modules/models_settings.py b/modules/models_settings.py
index b67d28a0..51994e23 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -158,14 +158,14 @@ def infer_loader(model_name, model_settings):
     path_to_model = Path(f'{shared.args.model_dir}/{model_name}')
     if not path_to_model.exists():
         loader = None
-    elif (path_to_model / 'quantize_config.json').exists():  # Old GPTQ metadata file
-        loader = 'ExLlamav2_HF'
     elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists():
         loader = 'llamacpp_HF'
     elif len(list(path_to_model.glob('*.gguf'))) > 0:
         loader = 'llama.cpp'
     elif re.match(r'.*\.gguf', model_name.lower()):
         loader = 'llama.cpp'
+    elif re.match(r'.*exl3', model_name.lower()):
+        loader = 'ExLlamav3_HF'
     elif re.match(r'.*exl2', model_name.lower()):
         loader = 'ExLlamav2_HF'
     elif re.match(r'.*-hqq', model_name.lower()):
diff --git a/modules/shared.py b/modules/shared.py
index 77bd7639..0981f6fb 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -86,7 +86,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft
 
 # Model loader
 group = parser.add_argument_group('Model loader')
-group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, HQQ, TensorRT-LLM.')
+group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, HQQ, TensorRT-LLM.')
 
 # Transformers/Accelerate
 group = parser.add_argument_group('Transformers/Accelerate')
@@ -273,6 +273,8 @@ def fix_loader_name(name):
         return 'ExLlamav2'
     elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']:
         return 'ExLlamav2_HF'
+    elif name in ['exllamav3-hf', 'exllamav3_hf', 'exllama-v3-hf', 'exllama_v3_hf', 'exllama-v3_hf', 'exllama3-hf', 'exllama3_hf', 'exllama-3-hf', 'exllama_3_hf', 'exllama-3_hf']:
+        return 'ExLlamav3_HF'
     elif name in ['hqq']:
         return 'HQQ'
     elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']:
diff --git a/one_click.py b/one_click.py
index 72626010..fcca4ff5 100644
--- a/one_click.py
+++ b/one_click.py
@@ -16,10 +16,11 @@ import sys
 # os.environ["HCC_AMDGPU_TARGET"] = 'gfx1030'
 
 
-# Define the required PyTorch version
-TORCH_VERSION = "2.4.1"
-TORCHVISION_VERSION = "0.19.1"
-TORCHAUDIO_VERSION = "2.4.1"
+# Define the required versions
+TORCH_VERSION = "2.6.0"
+TORCHVISION_VERSION = "0.21.0"
+TORCHAUDIO_VERSION = "2.6.0"
+PYTHON_VERSION = "3.11"
 
 # Environment
 script_dir = os.getcwd()
@@ -101,13 +102,20 @@ def torch_version():
     return torver
 
 
-def update_pytorch():
+def update_pytorch_and_python():
     print_big_message("Checking for PyTorch updates.")
+
+    # Update the Python version. Left here for future reference in case this becomes necessary.
+    # print_big_message("Checking for PyTorch and Python updates.")
+    # current_python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
+    # if current_python_version != PYTHON_VERSION:
+    #     run_cmd(f"conda install -y python={PYTHON_VERSION}", assert_success=True, environment=True)
+
     torver = torch_version()
     base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
 
     if "+cu" in torver:
-        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu121"
+        install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
     elif "+rocm" in torver:
         install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.1"
     elif "+cpu" in torver:
@@ -245,7 +253,7 @@ def install_webui():
         choice = get_user_choice(
             "What is your GPU?",
             {
-                'A': 'NVIDIA - CUDA 12.1',
+                'A': 'NVIDIA - CUDA 12.4',
                 'B': 'AMD - Linux/macOS only, requires ROCm 6.1',
                 'C': 'Apple M Series',
                 'D': 'Intel Arc (beta)',
@@ -273,7 +281,7 @@ def install_webui():
 
     # Handle CUDA version display
     elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA":
-        print("CUDA: 12.1")
+        print("CUDA: 12.4")
 
     # No PyTorch for AMD on Windows (?)
     elif is_windows() and selected_gpu == "AMD":
@@ -284,7 +292,7 @@ def install_webui():
     install_pytorch = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
 
     if selected_gpu == "NVIDIA":
-        install_pytorch += "--index-url https://download.pytorch.org/whl/cu121"
+        install_pytorch += "--index-url https://download.pytorch.org/whl/cu124"
     elif selected_gpu == "AMD":
         install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.1"
     elif selected_gpu in ["APPLE", "NONE"]:
@@ -297,7 +305,7 @@ def install_webui():
 
     # Install Git and then Pytorch
     print_big_message("Installing PyTorch.")
-    run_cmd(f"conda install -y -k ninja git && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True)
+    run_cmd(f"conda install -y ninja git && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True)
 
     if selected_gpu == "INTEL":
         # Install oneAPI dependencies via conda
@@ -323,6 +331,24 @@ def install_extensions_requirements():
         run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", assert_success=False, environment=True)
 
 
+def clean_outdated_pytorch_cuda_dependencies():
+    patterns = ["cu121", "cu122", "torch2.4"]
+    result = run_cmd("python -m pip list --format=freeze", capture_output=True, environment=True)
+    matching_packages = []
+
+    for line in result.stdout.decode('utf-8').splitlines():
+        if "==" in line:
+            pkg_name, version = line.split('==', 1)
+            if any(pattern in version for pattern in patterns):
+                matching_packages.append(pkg_name)
+
+    if matching_packages:
+        print(f"Uninstalling: {', '.join(matching_packages)}")
+        run_cmd(f"python -m pip uninstall -y {' '.join(matching_packages)}", assert_success=True, environment=True)
+
+    return matching_packages
+
+
 def update_requirements(initial_installation=False, pull=True):
     # Create .git directory if missing
     if not os.path.exists(os.path.join(script_dir, ".git")):
@@ -410,7 +436,9 @@ def update_requirements(initial_installation=False, pull=True):
 
     # Update PyTorch
     if not initial_installation:
-        update_pytorch()
+        clean_outdated_pytorch_cuda_dependencies()
+        update_pytorch_and_python()
+        torver = torch_version()
 
     print_big_message(f"Installing webui requirements from file: {requirements_file}")
     print(f"TORCH: {torver}\n")
diff --git a/requirements.txt b/requirements.txt
index 4cf99b69..b9b4ea7a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -36,16 +36,18 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 
 # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu124-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu124-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 
 # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu124-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu124-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 
 # CUDA wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 0d205725..3d24891f 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -36,5 +36,5 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
 
 # AMD wheels
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.8+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 93a46a64..057b631d 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -35,5 +35,5 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 
 # AMD wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 00353bfd..eba21ec2 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -33,4 +33,5 @@ tiktoken
 # Mac wheels
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1-py3-none-any.whl
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 7076b386..2048c99b 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -34,4 +34,5 @@ tiktoken
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1-py3-none-any.whl
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index d5f456f8..60b71ac1 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -36,16 +36,18 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp
 https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
 
 # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu124avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu124avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 
 # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ)
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu124avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu124avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 
 # CUDA wheels
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
 https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

From d8aad6da948262e8679da2063d27600da0d8ccb4 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Tue, 8 Apr 2025 20:20:24 -0700
Subject: [PATCH 20/25] Fix an update bug

---
 one_click.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/one_click.py b/one_click.py
index fcca4ff5..8e85dc3a 100644
--- a/one_click.py
+++ b/one_click.py
@@ -436,9 +436,9 @@ def update_requirements(initial_installation=False, pull=True):
 
     # Update PyTorch
     if not initial_installation:
-        clean_outdated_pytorch_cuda_dependencies()
         update_pytorch_and_python()
         torver = torch_version()
+        clean_outdated_pytorch_cuda_dependencies()
 
     print_big_message(f"Installing webui requirements from file: {requirements_file}")
     print(f"TORCH: {torver}\n")

From ad1ada657421cc830e8cd421e50477c3c05c3332 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 9 Apr 2025 05:17:10 -0700
Subject: [PATCH 21/25] Change one message in the installer

---
 one_click.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/one_click.py b/one_click.py
index 8e85dc3a..f3536638 100644
--- a/one_click.py
+++ b/one_click.py
@@ -343,7 +343,7 @@ def clean_outdated_pytorch_cuda_dependencies():
                 matching_packages.append(pkg_name)
 
     if matching_packages:
-        print(f"Uninstalling: {', '.join(matching_packages)}")
+        print(f"\nUninstalling: {', '.join(matching_packages)}\n")
         run_cmd(f"python -m pip uninstall -y {' '.join(matching_packages)}", assert_success=True, environment=True)
 
     return matching_packages

From 89f40cdcf7f0a5eb67a41865d1c67e2ae921dfa1 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 9 Apr 2025 07:20:51 -0700
Subject: [PATCH 22/25] Update libstdcxx-ng for GLIBCXX_3.4.30 support on Linux

---
 one_click.py | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/one_click.py b/one_click.py
index f3536638..8dd11b6b 100644
--- a/one_click.py
+++ b/one_click.py
@@ -434,6 +434,9 @@ def update_requirements(initial_installation=False, pull=True):
     if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
         install_extensions_requirements()
 
+    if is_linux():
+        run_cmd("conda install -y -c conda-forge libstdcxx-ng==12.1.0", assert_success=True, environment=True)
+
     # Update PyTorch
     if not initial_installation:
         update_pytorch_and_python()

From 8229736ec4b359b2a773f36abac1bf96d4eabf56 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 9 Apr 2025 08:38:06 -0700
Subject: [PATCH 23/25] Reapply "Update transformers requirement from ==4.50.*
 to ==4.51.* (#6834)"

This reverts commit 0b3503c91fcd3eaf6e0b93de4384794648406ba7.
---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index b9b4ea7a..de338696 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 3d24891f..00281d22 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 057b631d..80fcb71c 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index eba21ec2..4253a940 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 2048c99b..6962b6fc 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index c7e2687c..e849a451 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index 2003c544..e10782c9 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 60b71ac1..00c31c40 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index 3b61ca39..bb9ea97c 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.50.*
+transformers==4.51.*
 tqdm
 wandb
 

From d337ea31fa05d3d2f60df8c28fff10c07c10156f Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 9 Apr 2025 10:16:47 -0700
Subject: [PATCH 24/25] Revert "Reapply "Update transformers requirement from
 ==4.50.* to ==4.51.* (#6834)""

This reverts commit 8229736ec4b359b2a773f36abac1bf96d4eabf56.
---
 requirements.txt                 | 2 +-
 requirements_amd.txt             | 2 +-
 requirements_amd_noavx2.txt      | 2 +-
 requirements_apple_intel.txt     | 2 +-
 requirements_apple_silicon.txt   | 2 +-
 requirements_cpu_only.txt        | 2 +-
 requirements_cpu_only_noavx2.txt | 2 +-
 requirements_noavx2.txt          | 2 +-
 requirements_nowheels.txt        | 2 +-
 9 files changed, 9 insertions(+), 9 deletions(-)

diff --git a/requirements.txt b/requirements.txt
index de338696..b9b4ea7a 100644
--- a/requirements.txt
+++ b/requirements.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_amd.txt b/requirements_amd.txt
index 00281d22..3d24891f 100644
--- a/requirements_amd.txt
+++ b/requirements_amd.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt
index 80fcb71c..057b631d 100644
--- a/requirements_amd_noavx2.txt
+++ b/requirements_amd_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt
index 4253a940..eba21ec2 100644
--- a/requirements_apple_intel.txt
+++ b/requirements_apple_intel.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt
index 6962b6fc..2048c99b 100644
--- a/requirements_apple_silicon.txt
+++ b/requirements_apple_silicon.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt
index e849a451..c7e2687c 100644
--- a/requirements_cpu_only.txt
+++ b/requirements_cpu_only.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt
index e10782c9..2003c544 100644
--- a/requirements_cpu_only_noavx2.txt
+++ b/requirements_cpu_only_noavx2.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt
index 00c31c40..60b71ac1 100644
--- a/requirements_noavx2.txt
+++ b/requirements_noavx2.txt
@@ -21,7 +21,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 
diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt
index bb9ea97c..3b61ca39 100644
--- a/requirements_nowheels.txt
+++ b/requirements_nowheels.txt
@@ -20,7 +20,7 @@ safetensors==0.5.*
 scipy
 sentencepiece
 tensorboard
-transformers==4.51.*
+transformers==4.50.*
 tqdm
 wandb
 

From 9025848df56e7095febf88fc1cd473b180e19fc5 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Wed, 9 Apr 2025 10:25:47 -0700
Subject: [PATCH 25/25] Small change to installer

---
 one_click.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/one_click.py b/one_click.py
index 8dd11b6b..9f46a2df 100644
--- a/one_click.py
+++ b/one_click.py
@@ -21,6 +21,7 @@ TORCH_VERSION = "2.6.0"
 TORCHVISION_VERSION = "0.21.0"
 TORCHAUDIO_VERSION = "2.6.0"
 PYTHON_VERSION = "3.11"
+LIBSTDCXX_VERSION_LINUX = "12.1.0"
 
 # Environment
 script_dir = os.getcwd()
@@ -435,7 +436,7 @@ def update_requirements(initial_installation=False, pull=True):
         install_extensions_requirements()
 
     if is_linux():
-        run_cmd("conda install -y -c conda-forge libstdcxx-ng==12.1.0", assert_success=True, environment=True)
+        run_cmd(f"conda install -y -c conda-forge libstdcxx-ng=={LIBSTDCXX_VERSION_LINUX}", assert_success=True, environment=True)
 
     # Update PyTorch
     if not initial_installation: