From 01e42a00ffcb82747316a6ba40429919e8efd36d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 26 Mar 2025 06:01:57 -0700 Subject: [PATCH 01/25] Bump transformers to 4.50 --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 83bd3a53..63b539cf 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.49.* +transformers==4.50.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index 1e757ffe..71679c7e 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.49.* +transformers==4.50.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index f74ebf69..cfe9bf00 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.49.* +transformers==4.50.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index dcdeae3f..4e3c0c11 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.49.* +transformers==4.50.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index b823e40e..61c66e82 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.49.* +transformers==4.50.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index fe3f522a..0ab64e79 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.49.* +transformers==4.50.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 014e2e5d..17187b47 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.49.* +transformers==4.50.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 6139c46e..63e051ca 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.49.* +transformers==4.50.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 858ffff5..533d399b 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.49.* +transformers==4.50.* tqdm wandb From 2bfaf44df0904d8db3cd04e38b33b6be582babb6 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Wed, 26 Mar 2025 10:03:21 -0300 Subject: [PATCH 02/25] Update accelerate requirement from ==1.4.* to ==1.5.* (#6802) --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 63b539cf..8c76ff17 100644 --- a/requirements.txt +++ b/requirements.txt @@ -1,4 +1,4 @@ -accelerate==1.4.* +accelerate==1.5.* bitsandbytes==0.45.* colorama datasets diff --git a/requirements_amd.txt b/requirements_amd.txt index 71679c7e..7fcd5c0c 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -1,4 +1,4 @@ -accelerate==1.4.* +accelerate==1.5.* colorama datasets einops diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index cfe9bf00..028d37cc 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==1.4.* +accelerate==1.5.* colorama datasets einops diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 4e3c0c11..284c2dd6 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -1,4 +1,4 @@ -accelerate==1.4.* +accelerate==1.5.* colorama datasets einops diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 61c66e82..43615aeb 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -1,4 +1,4 @@ -accelerate==1.4.* +accelerate==1.5.* colorama datasets einops diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 0ab64e79..9ae8e2a7 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -1,4 +1,4 @@ -accelerate==1.4.* +accelerate==1.5.* colorama datasets einops diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 17187b47..77fe85fe 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==1.4.* +accelerate==1.5.* colorama datasets einops diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 63e051ca..4bebd865 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -1,4 +1,4 @@ -accelerate==1.4.* +accelerate==1.5.* bitsandbytes==0.45.* colorama datasets diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 533d399b..a7255c0a 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -1,4 +1,4 @@ -accelerate==1.4.* +accelerate==1.5.* colorama datasets einops From 525b1e020776c31e9dc3fd592855983cecfafe07 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 29 Mar 2025 13:43:16 -0700 Subject: [PATCH 03/25] Remove the stalebot --- .github/workflows/stale.yml | 22 ---------------------- 1 file changed, 22 deletions(-) delete mode 100644 .github/workflows/stale.yml diff --git a/.github/workflows/stale.yml b/.github/workflows/stale.yml deleted file mode 100644 index 8eb03299..00000000 --- a/.github/workflows/stale.yml +++ /dev/null @@ -1,22 +0,0 @@ -name: Close inactive issues -on: - schedule: - - cron: "10 23 * * *" - -jobs: - close-issues: - runs-on: ubuntu-latest - permissions: - issues: write - pull-requests: write - steps: - - uses: actions/stale@v5 - with: - stale-issue-message: "" - close-issue-message: "This issue has been closed due to inactivity for 6 months. If you believe it is still relevant, please leave a comment below. You can tag a developer in your comment." - days-before-issue-stale: 180 - days-before-issue-close: 0 - stale-issue-label: "stale" - days-before-pr-stale: -1 - days-before-pr-close: -1 - repo-token: ${{ secrets.GITHUB_TOKEN }} From 1bd208c219a16ec1d333f07e8a2bb2b6dd55d22d Mon Sep 17 00:00:00 2001 From: oobabooga Date: Sat, 29 Mar 2025 22:47:10 -0300 Subject: [PATCH 04/25] Add a new chat style: Dark (#6817) --- css/chat_style-Dark.css | 128 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 128 insertions(+) create mode 100644 css/chat_style-Dark.css diff --git a/css/chat_style-Dark.css b/css/chat_style-Dark.css new file mode 100644 index 00000000..7f7f0dbf --- /dev/null +++ b/css/chat_style-Dark.css @@ -0,0 +1,128 @@ +.message { + display: grid; + grid-template-columns: 60px minmax(0, 1fr); + padding-bottom: 28px; + font-size: 18px; + font-family: 'Roboto', Arial, sans-serif; /* Modern font */ + line-height: 1.5; +} + +.circle-you, +.circle-bot { + background-color: #2b2b2b; /* Darker background for circles */ + border-radius: 50%; /* Perfect circle */ + border: 1px solid #4a90e2; /* Soft blue border */ + box-shadow: 0 4px 8px rgba(0, 0, 0, 0.5); /* Soft shadow for depth */ +} + +.circle-bot img, +.circle-you img { + border-radius: 50%; /* Make images circular */ + width: 100%; + height: 100%; + object-fit: cover; +} + +.circle-you, .circle-bot { + width: 64px; /* Smaller size for modern look */ + height: 64px; +} + +.text { + padding-left: 12px; /* Reduced padding for a cleaner layout */ + color: #f0f0f0; /* Light text color for readability */ +} + +.text p { + margin-top: 2px; +} + +.username { + padding-left: 10px; + font-size: 20px; + font-weight: bold; + color: #e0e0e0; /* Light gray text */ + transition: color 0.3s ease; /* Smooth color transition */ +} + +.username:hover { + color: #4a90e2; /* Blue color on hover */ +} + +.message-body { + position: relative; + border: 1px solid rgba(255, 255, 255, 0.1); /* Soft white border */ + border-radius: 8px; /* Slightly rounded corners */ + padding: 15px; + background: #1e1e1e; /* Dark background */ + box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3); /* Subtle shadow for depth */ + transition: background 0.3s ease; /* Smooth transition for background */ +} + +.message-body:hover { + background: #252525; /* Slightly lighter on hover */ +} + +/* Adds 2 extra lines at the top and bottom of the message */ +.message-body::before, +.message-body::after { + content: ""; + position: absolute; + left: 10px; + right: 10px; + height: 1px; + background-color: rgba(255, 255, 255, 0.05); /* Faded lines for subtle separation */ +} + +.message-body::before { + top: 4px; +} + +.message-body::after { + bottom: 4px; +} + +.message-body img { + max-width: 300px; + max-height: 300px; + border-radius: 10px; /* Rounded corners for images */ +} + +.message-body p { + margin-bottom: 0 !important; + font-size: 16px !important; + line-height: 1.5 !important; + color: #e0e0e0 !important; /* Light color for text */ +} + +.message-body p em { + color: #a6a6a6 !important; /* Softer gray for emphasized text */ +} + +@media screen and (max-width: 688px) { + .message { + display: grid; + grid-template-columns: 60px minmax(0, 1fr); + padding-bottom: 25px; + font-size: 15px; + font-family: 'Roboto', Arial, sans-serif; /* Modern font */ + line-height: 1.5; + } + + .circle-you, .circle-bot { + width: 40px; /* Smaller size for mobile */ + height: 40px; + } + + .text { + padding-left: 10px; /* Reduced padding for mobile */ + } + + .message-body p { + font-size: 14px !important; /* Smaller text for mobile */ + } + + .username { + font-size: 18px; /* Smaller username for mobile */ + } +} From 79a26d7a5cd24e952b670668e97d50f1369c3e49 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 29 Mar 2025 18:49:48 -0700 Subject: [PATCH 05/25] Lint --- css/chat_style-Dark.css | 14 +++++++------- 1 file changed, 7 insertions(+), 7 deletions(-) diff --git a/css/chat_style-Dark.css b/css/chat_style-Dark.css index 7f7f0dbf..368a2a16 100644 --- a/css/chat_style-Dark.css +++ b/css/chat_style-Dark.css @@ -3,7 +3,7 @@ grid-template-columns: 60px minmax(0, 1fr); padding-bottom: 28px; font-size: 18px; - font-family: 'Roboto', Arial, sans-serif; /* Modern font */ + font-family: Roboto, Arial, sans-serif; /* Modern font */ line-height: 1.5; } @@ -12,7 +12,7 @@ background-color: #2b2b2b; /* Darker background for circles */ border-radius: 50%; /* Perfect circle */ border: 1px solid #4a90e2; /* Soft blue border */ - box-shadow: 0 4px 8px rgba(0, 0, 0, 0.5); /* Soft shadow for depth */ + box-shadow: 0 4px 8px rgb(0 0 0 / 50%); /* Soft shadow for depth */ } .circle-bot img, @@ -51,11 +51,11 @@ .message-body { position: relative; - border: 1px solid rgba(255, 255, 255, 0.1); /* Soft white border */ + border: 1px solid rgb(255 255 255 / 10%); /* Soft white border */ border-radius: 8px; /* Slightly rounded corners */ padding: 15px; background: #1e1e1e; /* Dark background */ - box-shadow: 0 4px 10px rgba(0, 0, 0, 0.3); /* Subtle shadow for depth */ + box-shadow: 0 4px 10px rgb(0 0 0 / 30%); /* Subtle shadow for depth */ transition: background 0.3s ease; /* Smooth transition for background */ } @@ -71,7 +71,7 @@ left: 10px; right: 10px; height: 1px; - background-color: rgba(255, 255, 255, 0.05); /* Faded lines for subtle separation */ + background-color: rgb(255 255 255 / 5%); /* Faded lines for subtle separation */ } .message-body::before { @@ -99,13 +99,13 @@ color: #a6a6a6 !important; /* Softer gray for emphasized text */ } -@media screen and (max-width: 688px) { +@media screen and (width <= 688px) { .message { display: grid; grid-template-columns: 60px minmax(0, 1fr); padding-bottom: 25px; font-size: 15px; - font-family: 'Roboto', Arial, sans-serif; /* Modern font */ + font-family: Roboto, Arial, sans-serif; /* Modern font */ line-height: 1.5; } From 1981327285c4411cc23472c6e874a895c8cf4424 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 29 Mar 2025 19:17:14 -0700 Subject: [PATCH 06/25] Fix the colab notebook --- Colab-TextGen-GPU.ipynb | 1 + 1 file changed, 1 insertion(+) diff --git a/Colab-TextGen-GPU.ipynb b/Colab-TextGen-GPU.ipynb index 8e305e1d..ebeafc02 100644 --- a/Colab-TextGen-GPU.ipynb +++ b/Colab-TextGen-GPU.ipynb @@ -57,6 +57,7 @@ "from pathlib import Path\n", "\n", "os.environ.pop('PYTHONPATH', None)\n", + "os.environ.pop('MPLBACKEND', None)\n", "\n", "if Path.cwd().name != 'text-generation-webui':\n", " print(\"\\033[1;32;1m\\n --> Installing the web UI. This will take a while, but after the initial setup, you can download and test as many models as you like.\\033[0;37;0m\\n\")\n", From 109de34e3b3187eb3f463bf463086a48444013a0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 31 Mar 2025 09:23:28 -0700 Subject: [PATCH 07/25] Remove the old --model-menu flag --- modules/shared.py | 2 +- server.py | 18 ------------------ 2 files changed, 1 insertion(+), 19 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index 2e91f4d5..ea6c581a 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -79,7 +79,6 @@ group.add_argument('--model', type=str, help='Name of the model to load by defau group.add_argument('--lora', type=str, nargs='+', help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.') group.add_argument('--model-dir', type=str, default='models/', help='Path to directory with all the models.') group.add_argument('--lora-dir', type=str, default='loras/', help='Path to directory with all the loras.') -group.add_argument('--model-menu', action='store_true', help='Show a model menu in the terminal when the web UI is first launched.') group.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See settings-template.yaml for an example. If you create a file called settings.yaml, this file will be loaded by default without the need to use the --settings flag.') group.add_argument('--extensions', type=str, nargs='+', help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.') group.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.') @@ -215,6 +214,7 @@ group.add_argument('--disable_exllama', action='store_true', help='DEPRECATED') group.add_argument('--disable_exllamav2', action='store_true', help='DEPRECATED') group.add_argument('--wbits', type=int, default=0, help='DEPRECATED') group.add_argument('--groupsize', type=int, default=-1, help='DEPRECATED') +group.add_argument('--model-menu', action='store_true', help='DEPRECATED') args = parser.parse_args() args_defaults = parser.parse_args([]) diff --git a/server.py b/server.py index 31e1c4c6..1f227350 100644 --- a/server.py +++ b/server.py @@ -218,28 +218,10 @@ if __name__ == "__main__": if extension not in shared.args.extensions: shared.args.extensions.append(extension) - available_models = utils.get_available_models() - # Model defined through --model if shared.args.model is not None: shared.model_name = shared.args.model - # Select the model from a command-line menu - elif shared.args.model_menu: - if len(available_models) == 0: - logger.error('No models are available! Please download at least one.') - sys.exit(0) - else: - print('The following models are available:\n') - for i, model in enumerate(available_models): - print(f'{i+1}. {model}') - - print(f'\nWhich one do you want to load? 1-{len(available_models)}\n') - i = int(input()) - 1 - print() - - shared.model_name = available_models[i] - # If any model has been selected, load it if shared.model_name != 'None': p = Path(shared.model_name) From 77a73cc56122dbabf8fb446f1c1c278923fbbfe9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 31 Mar 2025 21:01:27 -0300 Subject: [PATCH 08/25] Update peft requirement from ==0.12.* to ==0.15.* (#6820) --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 8c76ff17..5ab40c42 100644 --- a/requirements.txt +++ b/requirements.txt @@ -10,7 +10,7 @@ markdown numba==0.59.* numpy==1.26.* pandas -peft==0.12.* +peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 diff --git a/requirements_amd.txt b/requirements_amd.txt index 7fcd5c0c..5f278db6 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -9,7 +9,7 @@ markdown numba==0.59.* numpy==1.26.* pandas -peft==0.12.* +peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 028d37cc..78d14524 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -9,7 +9,7 @@ markdown numba==0.59.* numpy==1.26.* pandas -peft==0.12.* +peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 284c2dd6..7ab6dfff 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -9,7 +9,7 @@ markdown numba==0.59.* numpy==1.26.* pandas -peft==0.12.* +peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 43615aeb..0c84a84c 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -9,7 +9,7 @@ markdown numba==0.59.* numpy==1.26.* pandas -peft==0.12.* +peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index 9ae8e2a7..b9eb99b1 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -9,7 +9,7 @@ markdown numba==0.59.* numpy==1.26.* pandas -peft==0.12.* +peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 77fe85fe..68ae5c17 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -9,7 +9,7 @@ markdown numba==0.59.* numpy==1.26.* pandas -peft==0.12.* +peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 4bebd865..1a2a670f 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -10,7 +10,7 @@ markdown numba==0.59.* numpy==1.26.* pandas -peft==0.12.* +peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index a7255c0a..3b61ca39 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -9,7 +9,7 @@ markdown numba==0.59.* numpy==1.26.* pandas -peft==0.12.* +peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 From cbffcf67ef12938e6b26b7eddffa4327d83e71b0 Mon Sep 17 00:00:00 2001 From: Shixian Sheng Date: Wed, 2 Apr 2025 13:28:29 -0400 Subject: [PATCH 09/25] Fix links in the ngrok extension README (#6826) --- extensions/ngrok/README.md | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/extensions/ngrok/README.md b/extensions/ngrok/README.md index 0324bf98..2e9eb82d 100644 --- a/extensions/ngrok/README.md +++ b/extensions/ngrok/README.md @@ -9,9 +9,9 @@ the `settings.json` file, see the Examples below. Retrieve your authtoken on the # Documentation -For a list of all available options, see [the configuration documentation](https://ngrok.com/docs/ngrok-agent/config/) or [the connect example](https://github.com/ngrok/ngrok-py/blob/main/examples/ngrok-connect-full.py). +For a list of all available options, see [the configuration documentation](https://ngrok.com/docs/ngrok-agent/config/) or [the forward example](https://github.com/ngrok/ngrok-python/blob/main/examples/ngrok-forward-full.py). -The ngrok Python SDK is [on github here](https://github.com/ngrok/ngrok-py). A quickstart guide and a full API reference are included in the [ngrok-py Python API documentation](https://ngrok.github.io/ngrok-py/). +The ngrok Python SDK is [on github here](https://github.com/ngrok/ngrok-py). A quickstart guide and a full API reference are included in the [ngrok-py Python API documentation](https://ngrok.github.io/ngrok-python/). # Running @@ -66,4 +66,4 @@ To add an authtoken instead of using the NGROK_AUTHTOKEN environment variable: "authtoken_from_env":false } } -``` \ No newline at end of file +``` From c010cea7be0ba17623c16d8a2951d55c952d6ba0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 6 Apr 2025 17:17:25 -0700 Subject: [PATCH 10/25] Remove CUDA 11.8 support --- one_click.py | 51 ++++++++++++++------------------------------------- 1 file changed, 14 insertions(+), 37 deletions(-) diff --git a/one_click.py b/one_click.py index effc7d43..2a161e11 100644 --- a/one_click.py +++ b/one_click.py @@ -106,9 +106,7 @@ def update_pytorch(): torver = torch_version() base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}" - if "+cu118" in torver: - install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu118" - elif "+cu" in torver: + if "+cu" in torver: install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu121" elif "+rocm" in torver: install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.1" @@ -236,24 +234,21 @@ def install_webui(): choice = os.environ["GPU_CHOICE"].upper() print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.") - # Warn about changed meanings and handle old NVIDIA choice + # Warn about changed meanings and handle old choices if choice == "B": - print_big_message("Warning: GPU_CHOICE='B' now means 'NVIDIA (CUDA 11.8)' in the new version.") + print_big_message("Warning: GPU_CHOICE='B' now means 'AMD' in the new version.") elif choice == "C": - print_big_message("Warning: GPU_CHOICE='C' now means 'AMD' in the new version.") + print_big_message("Warning: GPU_CHOICE='C' now means 'Apple M Series' in the new version.") elif choice == "D": - print_big_message("Warning: GPU_CHOICE='D' now means 'Apple M Series' in the new version.") - elif choice == "A" and "USE_CUDA118" in os.environ: - choice = "B" if os.environ.get("USE_CUDA118", "").lower() in ("yes", "y", "true", "1", "t", "on") else "A" + print_big_message("Warning: GPU_CHOICE='D' now means 'Intel Arc' in the new version.") else: choice = get_user_choice( "What is your GPU?", { - 'A': 'NVIDIA - CUDA 12.1 (recommended)', - 'B': 'NVIDIA - CUDA 11.8 (legacy GPUs)', - 'C': 'AMD - Linux/macOS only, requires ROCm 6.1', - 'D': 'Apple M Series', - 'E': 'Intel Arc (beta)', + 'A': 'NVIDIA - CUDA 12.1', + 'B': 'AMD - Linux/macOS only, requires ROCm 6.1', + 'C': 'Apple M Series', + 'D': 'Intel Arc (beta)', 'N': 'CPU mode' }, ) @@ -261,15 +256,13 @@ def install_webui(): # Convert choices to GPU names for compatibility gpu_choice_to_name = { "A": "NVIDIA", - "B": "NVIDIA", - "C": "AMD", - "D": "APPLE", - "E": "INTEL", + "B": "AMD", + "C": "APPLE", + "D": "INTEL", "N": "NONE" } selected_gpu = gpu_choice_to_name[choice] - use_cuda118 = (choice == "B") # CUDA version is now determined by menu choice # Write a flag to CMD_FLAGS.txt for CPU mode if selected_gpu == "NONE": @@ -280,10 +273,7 @@ def install_webui(): # Handle CUDA version display elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA": - if use_cuda118: - print("CUDA: 11.8") - else: - print("CUDA: 12.1") + print("CUDA: 12.1") # No PyTorch for AMD on Windows (?) elif is_windows() and selected_gpu == "AMD": @@ -294,10 +284,7 @@ def install_webui(): install_pytorch = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} " if selected_gpu == "NVIDIA": - if use_cuda118 == 'Y': - install_pytorch += "--index-url https://download.pytorch.org/whl/cu118" - else: - install_pytorch += "--index-url https://download.pytorch.org/whl/cu121" + install_pytorch += "--index-url https://download.pytorch.org/whl/cu121" elif selected_gpu == "AMD": install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.1" elif selected_gpu in ["APPLE", "NONE"]: @@ -434,16 +421,6 @@ def update_requirements(initial_installation=False, pull=True): if not initial_installation and not wheels_changed: textgen_requirements = [line for line in textgen_requirements if '.whl' not in line] - if "+cu118" in torver: - textgen_requirements = [ - req.replace('+cu121', '+cu118').replace('+cu122', '+cu118') - for req in textgen_requirements - if "autoawq" not in req.lower() - ] - - if is_windows() and "+cu118" in torver: # No flash-attention on Windows for CUDA 11 - textgen_requirements = [req for req in textgen_requirements if 'oobabooga/flash-attention' not in req] - with open('temp_requirements.txt', 'w') as file: file.write('\n'.join(textgen_requirements)) From a8a64b6c1cf0f609348fa0182308209acd11f563 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 6 Apr 2025 17:40:18 -0700 Subject: [PATCH 11/25] Update the README --- README.md | 13 ------------- 1 file changed, 13 deletions(-) diff --git a/README.md b/README.md index 40c242c8..542e1ae1 100644 --- a/README.md +++ b/README.md @@ -143,19 +143,6 @@ Then browse to 3) Manually install AutoGPTQ: [Installation](https://github.com/PanQiWei/AutoGPTQ#install-from-source). * Perform the from-source installation - there are no prebuilt ROCm packages for Windows. -##### Older NVIDIA GPUs - -1) For Kepler GPUs and older, you will need to install CUDA 11.8 instead of 12: - -``` -pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu118 -conda install -y -c "nvidia/label/cuda-11.8.0" cuda-runtime -``` - -2) bitsandbytes >= 0.39 may not work. In that case, to use `--load-in-8bit`, you may have to downgrade like this: - * Linux: `pip install bitsandbytes==0.38.1` - * Windows: `pip install https://github.com/jllllll/bitsandbytes-windows-webui/raw/main/bitsandbytes-0.38.1-py3-none-any.whl` - ##### Manual install The `requirements*.txt` above contain various wheels precompiled through GitHub Actions. If you wish to compile things manually, or if you need to because no suitable wheels are available for your hardware, you can use `requirements_nowheels.txt` and then install your desired loaders manually. From eef90a4964d00a94525d7c8ec9dd9ed90c193546 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 6 Apr 2025 17:44:07 -0700 Subject: [PATCH 12/25] Update some intel arc installation commands --- one_click.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/one_click.py b/one_click.py index 2a161e11..72626010 100644 --- a/one_click.py +++ b/one_click.py @@ -302,9 +302,9 @@ def install_webui(): if selected_gpu == "INTEL": # Install oneAPI dependencies via conda print_big_message("Installing Intel oneAPI runtime libraries.") - run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0") + run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0", environment=True) # Install libuv required by Intel-patched torch - run_cmd("conda install -y libuv") + run_cmd("conda install -y libuv", environment=True) # Install the webui requirements update_requirements(initial_installation=True, pull=False) From 204db283623a277d2831e0952814b7f0890ef1c6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 6 Apr 2025 18:48:31 -0700 Subject: [PATCH 13/25] Update the dockerfiles --- docker/amd/Dockerfile | 2 +- docker/intel/Dockerfile | 2 +- 2 files changed, 2 insertions(+), 2 deletions(-) diff --git a/docker/amd/Dockerfile b/docker/amd/Dockerfile index cfbcf7e4..66e5863c 100644 --- a/docker/amd/Dockerfile +++ b/docker/amd/Dockerfile @@ -13,7 +13,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \ WORKDIR /home/app/ RUN git clone https://github.com/oobabooga/text-generation-webui.git WORKDIR /home/app/text-generation-webui -RUN GPU_CHOICE=C LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose +RUN GPU_CHOICE=B LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose COPY CMD_FLAGS.txt /home/app/text-generation-webui/ EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005} WORKDIR /home/app/text-generation-webui diff --git a/docker/intel/Dockerfile b/docker/intel/Dockerfile index d2ed671e..cab62442 100644 --- a/docker/intel/Dockerfile +++ b/docker/intel/Dockerfile @@ -13,7 +13,7 @@ RUN --mount=type=cache,target=/var/cache/apt,sharing=locked,rw \ WORKDIR /home/app/ RUN git clone https://github.com/oobabooga/text-generation-webui.git WORKDIR /home/app/text-generation-webui -RUN GPU_CHOICE=E LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose +RUN GPU_CHOICE=D LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose COPY CMD_FLAGS.txt /home/app/text-generation-webui/ EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005} # set umask to ensure group read / write at runtime From f1f32386b4338ef71cf2c23f93d6aa00b53b545c Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 7 Apr 2025 19:29:39 -0300 Subject: [PATCH 14/25] Update transformers requirement from ==4.50.* to ==4.51.* (#6834) --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index 5ab40c42..19f2124c 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index 5f278db6..99142de3 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 78d14524..cdb6cff4 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 7ab6dfff..9dc36546 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 0c84a84c..e1fe8eaa 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index b9eb99b1..ade65fbe 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 68ae5c17..2bcfe715 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 1a2a670f..1db48c22 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 3b61ca39..bb9ea97c 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb From a5855c345cc3e361bc8a436daf995fe6a2a5dd33 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Mon, 7 Apr 2025 21:42:33 -0300 Subject: [PATCH 15/25] Set context lengths to at most 8192 by default (to prevent out of memory errors) (#6835) --- modules/models_settings.py | 7 +++++-- modules/shared.py | 6 +++--- modules/ui_model_menu.py | 10 +++++----- modules/ui_parameters.py | 2 +- settings-template.yaml | 2 +- 5 files changed, 15 insertions(+), 12 deletions(-) diff --git a/modules/models_settings.py b/modules/models_settings.py index 8d658523..b67d28a0 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -17,6 +17,7 @@ def get_fallback_settings(): 'compress_pos_emb': 1, 'alpha_value': 1, 'truncation_length': shared.settings['truncation_length'], + 'truncation_length_info': shared.settings['truncation_length'], 'skip_special_tokens': shared.settings['skip_special_tokens'], 'custom_stopping_strings': shared.settings['custom_stopping_strings'], } @@ -53,7 +54,8 @@ def get_model_metadata(model): for k in metadata: if k.endswith('context_length'): - model_settings['n_ctx'] = metadata[k] + model_settings['n_ctx'] = min(metadata[k], 8192) + model_settings['truncation_length_info'] = metadata[k] elif k.endswith('rope.freq_base'): model_settings['rope_freq_base'] = metadata[k] elif k.endswith('rope.scale_linear'): @@ -89,7 +91,8 @@ def get_model_metadata(model): for k in ['max_position_embeddings', 'model_max_length', 'max_seq_len']: if k in metadata: model_settings['truncation_length'] = metadata[k] - model_settings['max_seq_len'] = metadata[k] + model_settings['truncation_length_info'] = metadata[k] + model_settings['max_seq_len'] = min(metadata[k], 8192) if 'rope_theta' in metadata: model_settings['rope_freq_base'] = metadata['rope_theta'] diff --git a/modules/shared.py b/modules/shared.py index ea6c581a..77bd7639 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -53,7 +53,7 @@ settings = { 'skip_special_tokens': True, 'stream': True, 'static_cache': False, - 'truncation_length': 2048, + 'truncation_length': 8192, 'seed': -1, 'custom_stopping_strings': '', 'custom_token_bans': '', @@ -117,7 +117,7 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for group = parser.add_argument_group('llama.cpp') group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.') group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled without GGML_CUDA_FORCE_MMQ. This may improve performance on newer cards.') -group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.') +group.add_argument('--n_ctx', type=int, default=8192, help='Size of the prompt context.') group.add_argument('--threads', type=int, default=0, help='Number of threads to use.') group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.') group.add_argument('--no_mul_mat_q', action='store_true', help='Disable the mulmat kernels.') @@ -139,7 +139,7 @@ group.add_argument('--tokenizer-dir', type=str, help='Load the tokenizer from th group = parser.add_argument_group('ExLlamaV2') group.add_argument('--gpu-split', type=str, help='Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.') group.add_argument('--autosplit', action='store_true', help='Autosplit the model tensors across the available GPUs. This causes --gpu-split to be ignored.') -group.add_argument('--max_seq_len', type=int, default=2048, help='Maximum sequence length.') +group.add_argument('--max_seq_len', type=int, default=8192, help='Maximum sequence length.') group.add_argument('--cfg-cache', action='store_true', help='ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.') group.add_argument('--no_flash_attn', action='store_true', help='Force flash-attention to not be used.') group.add_argument('--no_xformers', action='store_true', help='Force xformers to not be used.') diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 1264a9fd..c23b991a 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -200,8 +200,10 @@ def create_event_handlers(): def load_model_wrapper(selected_model, loader, autoload=False): + settings = get_model_metadata(selected_model) + if not autoload: - yield f"The settings for `{selected_model}` have been updated.\n\nClick on \"Load\" to load it." + yield "### {}\n\n- Settings updated: Click \"Load\" to load the model\n- Max sequence length: {}".format(selected_model, settings['truncation_length_info']) return if selected_model == 'None': @@ -214,11 +216,9 @@ def load_model_wrapper(selected_model, loader, autoload=False): shared.model, shared.tokenizer = load_model(selected_model, loader) if shared.model is not None: - output = f"Successfully loaded `{selected_model}`." - - settings = get_model_metadata(selected_model) + output = f"Successfully loaded `{selected_model}`.\n\n" if 'instruction_template' in settings: - output += '\n\nIt seems to be an instruction-following model with template "{}". In the chat tab, instruct or chat-instruct modes should be used.'.format(settings['instruction_template']) + output += '- It seems to be an instruction-following model with template "{}". In the chat tab, instruct or chat-instruct modes should be used.\n'.format(settings['instruction_template']) yield output else: diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 846fcfe7..c3245a9d 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -87,7 +87,7 @@ def create_ui(default_preset): shared.gradio['static_cache'] = gr.Checkbox(value=shared.settings['static_cache'], label='Static KV cache', info='Use a static cache for improved performance.') with gr.Column(): - shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length. Most models require this to be at most 2048.') + shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length.') shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)') shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar']) diff --git a/settings-template.yaml b/settings-template.yaml index 74935a60..0343df0a 100644 --- a/settings-template.yaml +++ b/settings-template.yaml @@ -25,7 +25,7 @@ add_bos_token: true skip_special_tokens: true stream: true static_cache: false -truncation_length: 2048 +truncation_length: 8192 seed: -1 custom_stopping_strings: '' custom_token_bans: '' From bf48ec8c449cbf58172c6f3ab83dd6a844b7994f Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 7 Apr 2025 17:43:41 -0700 Subject: [PATCH 16/25] Remove an unnecessary UI message --- modules/ui_model_menu.py | 6 +----- 1 file changed, 1 insertion(+), 5 deletions(-) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index c23b991a..4fc1de08 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -216,11 +216,7 @@ def load_model_wrapper(selected_model, loader, autoload=False): shared.model, shared.tokenizer = load_model(selected_model, loader) if shared.model is not None: - output = f"Successfully loaded `{selected_model}`.\n\n" - if 'instruction_template' in settings: - output += '- It seems to be an instruction-following model with template "{}". In the chat tab, instruct or chat-instruct modes should be used.\n'.format(settings['instruction_template']) - - yield output + yield f"Successfully loaded `{selected_model}`." else: yield f"Failed to load `{selected_model}`." except: From 649ee729c126c3396b9f97c3cbfc8db8e2e6f7f0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 8 Apr 2025 09:22:06 -0700 Subject: [PATCH 17/25] Remove Python 3.10 support --- requirements.txt | 10 ---------- requirements_amd.txt | 4 ---- requirements_amd_noavx2.txt | 3 --- requirements_apple_intel.txt | 2 -- requirements_apple_silicon.txt | 3 --- requirements_cpu_only.txt | 2 -- requirements_cpu_only_noavx2.txt | 2 -- requirements_noavx2.txt | 10 ---------- 8 files changed, 36 deletions(-) diff --git a/requirements.txt b/requirements.txt index 19f2124c..e13cf984 100644 --- a/requirements.txt +++ b/requirements.txt @@ -33,29 +33,19 @@ tiktoken # llama-cpp-python (CPU only, AVX2) https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ) https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ) https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" diff --git a/requirements_amd.txt b/requirements_amd.txt index 99142de3..adc77d32 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -32,13 +32,9 @@ tiktoken # llama-cpp-python (CPU only, AVX2) https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.8+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.8+rocm6.1.2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index cdb6cff4..22ee57b4 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -32,11 +32,8 @@ tiktoken # llama-cpp-python (CPU only, no AVX2) https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # AMD wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 9dc36546..553db45a 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -32,7 +32,5 @@ tiktoken # Mac wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index e1fe8eaa..e30ce816 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -32,9 +32,6 @@ tiktoken # Mac wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp310-cp310-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index ade65fbe..e849a451 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -32,6 +32,4 @@ tiktoken # llama-cpp-python (CPU only, AVX2) https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 2bcfe715..e10782c9 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -32,6 +32,4 @@ tiktoken # llama-cpp-python (CPU only, no AVX2) https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 1db48c22..ab7e59fc 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -33,29 +33,19 @@ tiktoken # llama-cpp-python (CPU only, no AVX2) https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ) https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ) https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" # CUDA wheels https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10" https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10" From 0b3503c91fcd3eaf6e0b93de4384794648406ba7 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 8 Apr 2025 12:26:03 -0700 Subject: [PATCH 18/25] Revert "Update transformers requirement from ==4.50.* to ==4.51.* (#6834)" This reverts commit f1f32386b4338ef71cf2c23f93d6aa00b53b545c. --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index e13cf984..4cf99b69 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index adc77d32..0d205725 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 22ee57b4..93a46a64 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 553db45a..00353bfd 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index e30ce816..7076b386 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index e849a451..c7e2687c 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index e10782c9..2003c544 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index ab7e59fc..d5f456f8 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index bb9ea97c..3b61ca39 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb From 8b8d39ec4e66affac03c22176ac368785095f584 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Wed, 9 Apr 2025 00:07:08 -0300 Subject: [PATCH 19/25] Add ExLlamaV3 support (#6832) --- README.md | 24 ++--- modules/exllamav3_hf.py | 179 +++++++++++++++++++++++++++++++++ modules/loaders.py | 56 ++++++++++- modules/models.py | 17 +++- modules/models_settings.py | 4 +- modules/shared.py | 4 +- one_click.py | 50 +++++++-- requirements.txt | 18 ++-- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 1 + requirements_apple_silicon.txt | 1 + requirements_noavx2.txt | 18 ++-- 13 files changed, 322 insertions(+), 54 deletions(-) create mode 100644 modules/exllamav3_hf.py diff --git a/README.md b/README.md index 542e1ae1..63b8931a 100644 --- a/README.md +++ b/README.md @@ -12,7 +12,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features -- Supports multiple text generation backends in one UI/API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), and [ExLlamaV2](https://github.com/turboderp-org/exllamav2). [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) is supported via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile), and the Transformers loader is compatible with libraries like [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM), but they must be installed manually. +- Supports multiple text generation backends in one UI/API, including [Transformers](https://github.com/huggingface/transformers), [llama.cpp](https://github.com/ggerganov/llama.cpp), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [ExLlamaV2](https://github.com/turboderp-org/exllamav2). [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) is supported via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile), and the Transformers loader is compatible with libraries like [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM), but they must be installed manually. - OpenAI-compatible API with Chat and Completions endpoints – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). - Automatic prompt formatting using Jinja2 templates. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. @@ -78,25 +78,19 @@ conda activate textgen | System | GPU | Command | |--------|---------|---------| -| Linux/WSL | NVIDIA | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu121` | -| Linux/WSL | CPU only | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cpu` | -| Linux | AMD | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/rocm6.1` | -| MacOS + MPS | Any | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1` | -| Windows | NVIDIA | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1 --index-url https://download.pytorch.org/whl/cu121` | -| Windows | CPU only | `pip3 install torch==2.4.1 torchvision==0.19.1 torchaudio==2.4.1` | +| Linux/WSL | NVIDIA | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124` | +| Linux/WSL | CPU only | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cpu` | +| Linux | AMD | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/rocm6.1` | +| MacOS + MPS | Any | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0` | +| Windows | NVIDIA | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0 --index-url https://download.pytorch.org/whl/cu124` | +| Windows | CPU only | `pip3 install torch==2.6.0 torchvision==0.21.0 torchaudio==2.6.0` | The up-to-date commands can be found here: https://pytorch.org/get-started/locally/. -For NVIDIA, you also need to install the CUDA runtime libraries: +If you need `nvcc` to compile some library manually, you will additionally need to install this: ``` -conda install -y -c "nvidia/label/cuda-12.1.1" cuda-runtime -``` - -If you need `nvcc` to compile some library manually, replace the command above with - -``` -conda install -y -c "nvidia/label/cuda-12.1.1" cuda +conda install -y -c "nvidia/label/cuda-12.4.1" cuda ``` #### 3. Install the web UI diff --git a/modules/exllamav3_hf.py b/modules/exllamav3_hf.py new file mode 100644 index 00000000..3bf44c9b --- /dev/null +++ b/modules/exllamav3_hf.py @@ -0,0 +1,179 @@ +import os +import traceback +from pathlib import Path +from typing import Any, Dict, Optional, Union + +import torch +from exllamav3 import Cache, Config, Model +from torch.nn import CrossEntropyLoss +from transformers import GenerationConfig, PretrainedConfig, PreTrainedModel +from transformers.modeling_outputs import CausalLMOutputWithPast + +from modules import shared +from modules.logging_colors import logger + +try: + import flash_attn +except Exception: + logger.warning('Failed to load flash-attention due to the following error:\n') + traceback.print_exc() + + +class Exllamav3HF(PreTrainedModel): + def __init__(self, model_dir): + super().__init__(PretrainedConfig()) + self.generation_config = GenerationConfig() + + config = Config.from_directory(model_dir) + self.ex_model = Model.from_config(config) + + # Calculate the closest multiple of 256 at or above the chosen value + max_tokens = shared.args.max_seq_len + if max_tokens % 256 != 0: + adjusted_tokens = ((max_tokens // 256) + 1) * 256 + logger.warning(f"max_num_tokens must be a multiple of 256. Adjusting from {max_tokens} to {adjusted_tokens}") + max_tokens = adjusted_tokens + + self.ex_cache = Cache(self.ex_model, max_num_tokens=max_tokens) + + # Create load parameters dictionary + load_params = {'progressbar': True} + if shared.args.gpu_split: + split = [float(alloc) for alloc in shared.args.gpu_split.split(",")] + load_params['use_per_device'] = split + + self.ex_model.load(**load_params) + self.past_seq = None + self.max_tokens = max_tokens + + def _validate_model_class(self): + pass + + def _validate_model_kwargs(self, model_kwargs: Dict[str, Any]): + pass + + def prepare_inputs_for_generation(self, input_ids, **kwargs): + return {'input_ids': input_ids, **kwargs} + + @property + def device(self) -> torch.device: + return torch.device(0) + + def __call__(self, *args, **kwargs): + use_cache = kwargs.get('use_cache', True) + labels = kwargs.get('labels', None) + past_key_values = kwargs.get('past_key_values', None) + + if len(args) > 0: + if not shared.args.cfg_cache: + logger.error("Please enable the cfg-cache option to use CFG with ExLlamav3_HF.") + return + + input_ids = args[0] + is_negative = True + past_seq = self.past_seq_negative + ex_cache = self.ex_cache_negative + else: + input_ids = kwargs['input_ids'] + is_negative = False + past_seq = self.past_seq + ex_cache = self.ex_cache + + seq = input_ids[0].tolist() + if is_negative and past_key_values is not None: + seq = past_key_values + seq + + seq_tensor = torch.tensor(seq) + reset = True + + # Make the forward call + if labels is None: + if past_seq is not None: + min_length = min(past_seq.shape[0], seq_tensor.shape[0]) + indices = torch.nonzero(~torch.eq(past_seq[:min_length], seq_tensor[:min_length])) + if len(indices) > 0: + longest_prefix = indices[0].item() + else: + longest_prefix = min_length + + if longest_prefix > 0: + reset = False + current_len = longest_prefix + if len(seq_tensor) - longest_prefix > 1: + self.ex_model.forward( + input_ids=seq_tensor[longest_prefix:-1].view(1, -1), + params={ + "attn_mode": "flash_attn", + "cache": ex_cache, + "past_len": longest_prefix, + "batch_shape": (1, self.max_tokens) + } + ) + + current_len = longest_prefix + len(seq_tensor) - longest_prefix - 1 + + if reset: + if len(seq_tensor) > 1: + self.ex_model.forward( + input_ids=seq_tensor[:-1].view(1, -1), + params={ + "attn_mode": "flash_attn", + "cache": ex_cache, + "past_len": 0, + "batch_shape": (1, self.max_tokens) + } + ) + + current_len = len(seq_tensor) - 1 + else: + current_len = 0 + + logits = self.ex_model.forward( + input_ids=seq_tensor[-1:].view(1, -1), + params={ + "attn_mode": "flash_attn", + "cache": ex_cache, + "past_len": current_len, + "batch_shape": (1, self.max_tokens) + } + ).to(input_ids.device).float() + else: + logits = self.ex_model.forward( + input_ids=seq_tensor.view(1, -1), + params={ + "attn_mode": "flash_attn", + "cache": ex_cache, + "past_len": 0, + "batch_shape": (1, self.max_tokens) + } + ).float() + + if is_negative: + self.past_seq_negative = seq_tensor + else: + self.past_seq = seq_tensor + + loss = None + if labels is not None: + # Shift so that tokens < n predict n + shift_logits = logits[..., :-1, :].contiguous() + shift_labels = labels[..., 1:].contiguous() + # Flatten the tokens + loss_fct = CrossEntropyLoss() + shift_logits = shift_logits.view(-1, logits.shape[-1]) + shift_labels = shift_labels.view(-1) + # Enable model parallelism + shift_labels = shift_labels.to(shift_logits.device) + loss = loss_fct(shift_logits, shift_labels) + + return CausalLMOutputWithPast(logits=logits, past_key_values=seq if use_cache else None, loss=loss) + + @classmethod + def from_pretrained(cls, pretrained_model_name_or_path: Optional[Union[str, os.PathLike]], *model_args, **kwargs): + assert len(model_args) == 0 and len(kwargs) == 0, "extra args is currently not supported" + if isinstance(pretrained_model_name_or_path, str): + pretrained_model_name_or_path = Path(pretrained_model_name_or_path) + + pretrained_model_name_or_path = Path(f'{shared.args.model_dir}') / Path(pretrained_model_name_or_path) + + return Exllamav3HF(pretrained_model_name_or_path) diff --git a/modules/loaders.py b/modules/loaders.py index 88ded1d1..980a13e6 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -23,7 +23,6 @@ loaders_and_params = OrderedDict({ 'use_double_quant', 'use_eager_attention', 'bf16', - 'trust_remote_code', 'no_use_fast', ], @@ -76,6 +75,13 @@ loaders_and_params = OrderedDict({ 'no_use_fast', 'llamacpp_HF_info', ], + 'ExLlamav3_HF': [ + 'max_seq_len', + 'gpu_split', + 'cfg_cache', + 'trust_remote_code', + 'no_use_fast', + ], 'ExLlamav2_HF': [ 'max_seq_len', 'cache_type', @@ -174,30 +180,38 @@ def transformers_samplers(): loaders_samplers = { 'Transformers': transformers_samplers(), 'HQQ': transformers_samplers(), - 'ExLlamav2': { + 'ExLlamav3_HF': { 'temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', + 'smoothing_curve', 'min_p', 'top_p', 'top_k', 'typical_p', 'xtc_threshold', 'xtc_probability', + 'epsilon_cutoff', + 'eta_cutoff', 'tfs', 'top_a', + 'top_n_sigma', 'dry_multiplier', 'dry_allowed_length', 'dry_base', 'repetition_penalty', 'frequency_penalty', 'presence_penalty', + 'encoder_repetition_penalty', + 'no_repeat_ngram_size', 'repetition_penalty_range', + 'guidance_scale', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', + 'do_sample', 'dynamic_temperature', 'temperature_last', 'auto_max_new_tokens', @@ -205,8 +219,12 @@ loaders_samplers = { 'add_bos_token', 'skip_special_tokens', 'seed', + 'sampler_priority', 'custom_token_bans', + 'negative_prompt', 'dry_sequence_breakers', + 'grammar_string', + 'grammar_file_row', }, 'ExLlamav2_HF': { 'temperature', @@ -254,6 +272,40 @@ loaders_samplers = { 'grammar_string', 'grammar_file_row', }, + 'ExLlamav2': { + 'temperature', + 'dynatemp_low', + 'dynatemp_high', + 'dynatemp_exponent', + 'smoothing_factor', + 'min_p', + 'top_p', + 'top_k', + 'typical_p', + 'xtc_threshold', + 'xtc_probability', + 'tfs', + 'top_a', + 'dry_multiplier', + 'dry_allowed_length', + 'dry_base', + 'repetition_penalty', + 'frequency_penalty', + 'presence_penalty', + 'repetition_penalty_range', + 'mirostat_mode', + 'mirostat_tau', + 'mirostat_eta', + 'dynamic_temperature', + 'temperature_last', + 'auto_max_new_tokens', + 'ban_eos_token', + 'add_bos_token', + 'skip_special_tokens', + 'seed', + 'custom_token_bans', + 'dry_sequence_breakers', + }, 'llama.cpp': { 'temperature', 'min_p', diff --git a/modules/models.py b/modules/models.py index 3951fe82..288bc1b6 100644 --- a/modules/models.py +++ b/modules/models.py @@ -69,8 +69,9 @@ def load_model(model_name, loader=None): 'Transformers': huggingface_loader, 'llama.cpp': llamacpp_loader, 'llamacpp_HF': llamacpp_HF_loader, - 'ExLlamav2': ExLlamav2_loader, + 'ExLlamav3_HF': ExLlamav3_HF_loader, 'ExLlamav2_HF': ExLlamav2_HF_loader, + 'ExLlamav2': ExLlamav2_loader, 'HQQ': HQQ_loader, 'TensorRT-LLM': TensorRT_LLM_loader, } @@ -304,11 +305,10 @@ def llamacpp_HF_loader(model_name): return model -def ExLlamav2_loader(model_name): - from modules.exllamav2 import Exllamav2Model +def ExLlamav3_HF_loader(model_name): + from modules.exllamav3_hf import Exllamav3HF - model, tokenizer = Exllamav2Model.from_pretrained(model_name) - return model, tokenizer + return Exllamav3HF.from_pretrained(model_name) def ExLlamav2_HF_loader(model_name): @@ -317,6 +317,13 @@ def ExLlamav2_HF_loader(model_name): return Exllamav2HF.from_pretrained(model_name) +def ExLlamav2_loader(model_name): + from modules.exllamav2 import Exllamav2Model + + model, tokenizer = Exllamav2Model.from_pretrained(model_name) + return model, tokenizer + + def HQQ_loader(model_name): try: from hqq.core.quantize import HQQBackend, HQQLinear diff --git a/modules/models_settings.py b/modules/models_settings.py index b67d28a0..51994e23 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -158,14 +158,14 @@ def infer_loader(model_name, model_settings): path_to_model = Path(f'{shared.args.model_dir}/{model_name}') if not path_to_model.exists(): loader = None - elif (path_to_model / 'quantize_config.json').exists(): # Old GPTQ metadata file - loader = 'ExLlamav2_HF' elif len(list(path_to_model.glob('*.gguf'))) > 0 and path_to_model.is_dir() and (path_to_model / 'tokenizer_config.json').exists(): loader = 'llamacpp_HF' elif len(list(path_to_model.glob('*.gguf'))) > 0: loader = 'llama.cpp' elif re.match(r'.*\.gguf', model_name.lower()): loader = 'llama.cpp' + elif re.match(r'.*exl3', model_name.lower()): + loader = 'ExLlamav3_HF' elif re.match(r'.*exl2', model_name.lower()): loader = 'ExLlamav2_HF' elif re.match(r'.*-hqq', model_name.lower()): diff --git a/modules/shared.py b/modules/shared.py index 77bd7639..0981f6fb 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -86,7 +86,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft # Model loader group = parser.add_argument_group('Model loader') -group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, HQQ, TensorRT-LLM.') +group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, HQQ, TensorRT-LLM.') # Transformers/Accelerate group = parser.add_argument_group('Transformers/Accelerate') @@ -273,6 +273,8 @@ def fix_loader_name(name): return 'ExLlamav2' elif name in ['exllamav2-hf', 'exllamav2_hf', 'exllama-v2-hf', 'exllama_v2_hf', 'exllama-v2_hf', 'exllama2-hf', 'exllama2_hf', 'exllama-2-hf', 'exllama_2_hf', 'exllama-2_hf']: return 'ExLlamav2_HF' + elif name in ['exllamav3-hf', 'exllamav3_hf', 'exllama-v3-hf', 'exllama_v3_hf', 'exllama-v3_hf', 'exllama3-hf', 'exllama3_hf', 'exllama-3-hf', 'exllama_3_hf', 'exllama-3_hf']: + return 'ExLlamav3_HF' elif name in ['hqq']: return 'HQQ' elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']: diff --git a/one_click.py b/one_click.py index 72626010..fcca4ff5 100644 --- a/one_click.py +++ b/one_click.py @@ -16,10 +16,11 @@ import sys # os.environ["HCC_AMDGPU_TARGET"] = 'gfx1030' -# Define the required PyTorch version -TORCH_VERSION = "2.4.1" -TORCHVISION_VERSION = "0.19.1" -TORCHAUDIO_VERSION = "2.4.1" +# Define the required versions +TORCH_VERSION = "2.6.0" +TORCHVISION_VERSION = "0.21.0" +TORCHAUDIO_VERSION = "2.6.0" +PYTHON_VERSION = "3.11" # Environment script_dir = os.getcwd() @@ -101,13 +102,20 @@ def torch_version(): return torver -def update_pytorch(): +def update_pytorch_and_python(): print_big_message("Checking for PyTorch updates.") + + # Update the Python version. Left here for future reference in case this becomes necessary. + # print_big_message("Checking for PyTorch and Python updates.") + # current_python_version = f"{sys.version_info.major}.{sys.version_info.minor}" + # if current_python_version != PYTHON_VERSION: + # run_cmd(f"conda install -y python={PYTHON_VERSION}", assert_success=True, environment=True) + torver = torch_version() base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}" if "+cu" in torver: - install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu121" + install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124" elif "+rocm" in torver: install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.1" elif "+cpu" in torver: @@ -245,7 +253,7 @@ def install_webui(): choice = get_user_choice( "What is your GPU?", { - 'A': 'NVIDIA - CUDA 12.1', + 'A': 'NVIDIA - CUDA 12.4', 'B': 'AMD - Linux/macOS only, requires ROCm 6.1', 'C': 'Apple M Series', 'D': 'Intel Arc (beta)', @@ -273,7 +281,7 @@ def install_webui(): # Handle CUDA version display elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA": - print("CUDA: 12.1") + print("CUDA: 12.4") # No PyTorch for AMD on Windows (?) elif is_windows() and selected_gpu == "AMD": @@ -284,7 +292,7 @@ def install_webui(): install_pytorch = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} " if selected_gpu == "NVIDIA": - install_pytorch += "--index-url https://download.pytorch.org/whl/cu121" + install_pytorch += "--index-url https://download.pytorch.org/whl/cu124" elif selected_gpu == "AMD": install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.1" elif selected_gpu in ["APPLE", "NONE"]: @@ -297,7 +305,7 @@ def install_webui(): # Install Git and then Pytorch print_big_message("Installing PyTorch.") - run_cmd(f"conda install -y -k ninja git && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True) + run_cmd(f"conda install -y ninja git && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True) if selected_gpu == "INTEL": # Install oneAPI dependencies via conda @@ -323,6 +331,24 @@ def install_extensions_requirements(): run_cmd(f"python -m pip install -r {extension_req_path} --upgrade", assert_success=False, environment=True) +def clean_outdated_pytorch_cuda_dependencies(): + patterns = ["cu121", "cu122", "torch2.4"] + result = run_cmd("python -m pip list --format=freeze", capture_output=True, environment=True) + matching_packages = [] + + for line in result.stdout.decode('utf-8').splitlines(): + if "==" in line: + pkg_name, version = line.split('==', 1) + if any(pattern in version for pattern in patterns): + matching_packages.append(pkg_name) + + if matching_packages: + print(f"Uninstalling: {', '.join(matching_packages)}") + run_cmd(f"python -m pip uninstall -y {' '.join(matching_packages)}", assert_success=True, environment=True) + + return matching_packages + + def update_requirements(initial_installation=False, pull=True): # Create .git directory if missing if not os.path.exists(os.path.join(script_dir, ".git")): @@ -410,7 +436,9 @@ def update_requirements(initial_installation=False, pull=True): # Update PyTorch if not initial_installation: - update_pytorch() + clean_outdated_pytorch_cuda_dependencies() + update_pytorch_and_python() + torver = torch_version() print_big_message(f"Installing webui requirements from file: {requirements_file}") print(f"TORCH: {torver}\n") diff --git a/requirements.txt b/requirements.txt index 4cf99b69..b9b4ea7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -36,16 +36,18 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu124-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu124-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu124-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu124-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" diff --git a/requirements_amd.txt b/requirements_amd.txt index 0d205725..3d24891f 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -36,5 +36,5 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp # AMD wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/rocm/llama_cpp_python_cuda-0.3.8+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 93a46a64..057b631d 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -35,5 +35,5 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" # AMD wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 00353bfd..eba21ec2 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -33,4 +33,5 @@ tiktoken # Mac wheels https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1-py3-none-any.whl https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 7076b386..2048c99b 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -34,4 +34,5 @@ tiktoken https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/metal/llama_cpp_python-0.3.8-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1-py3-none-any.whl https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index d5f456f8..60b71ac1 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -36,16 +36,18 @@ https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cp https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/cpu/llama_cpp_python-0.3.8+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" # llama-cpp-python (CUDA, with GGML_CUDA_FORCE_MMQ) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu124avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda-0.3.8+cu124avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" # llama-cpp-python (CUDA, without GGML_CUDA_FORCE_MMQ) -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu121avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu124avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-python-cuBLAS-wheels/releases/download/textgen-webui/llama_cpp_python_cuda_tensorcores-0.3.8+cu124avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" # CUDA wheels -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav3/releases/download/v0.0.1/exllamav3-0.0.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64" -https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" From d8aad6da948262e8679da2063d27600da0d8ccb4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 8 Apr 2025 20:20:24 -0700 Subject: [PATCH 20/25] Fix an update bug --- one_click.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/one_click.py b/one_click.py index fcca4ff5..8e85dc3a 100644 --- a/one_click.py +++ b/one_click.py @@ -436,9 +436,9 @@ def update_requirements(initial_installation=False, pull=True): # Update PyTorch if not initial_installation: - clean_outdated_pytorch_cuda_dependencies() update_pytorch_and_python() torver = torch_version() + clean_outdated_pytorch_cuda_dependencies() print_big_message(f"Installing webui requirements from file: {requirements_file}") print(f"TORCH: {torver}\n") From ad1ada657421cc830e8cd421e50477c3c05c3332 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 9 Apr 2025 05:17:10 -0700 Subject: [PATCH 21/25] Change one message in the installer --- one_click.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/one_click.py b/one_click.py index 8e85dc3a..f3536638 100644 --- a/one_click.py +++ b/one_click.py @@ -343,7 +343,7 @@ def clean_outdated_pytorch_cuda_dependencies(): matching_packages.append(pkg_name) if matching_packages: - print(f"Uninstalling: {', '.join(matching_packages)}") + print(f"\nUninstalling: {', '.join(matching_packages)}\n") run_cmd(f"python -m pip uninstall -y {' '.join(matching_packages)}", assert_success=True, environment=True) return matching_packages From 89f40cdcf7f0a5eb67a41865d1c67e2ae921dfa1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 9 Apr 2025 07:20:51 -0700 Subject: [PATCH 22/25] Update libstdcxx-ng for GLIBCXX_3.4.30 support on Linux --- one_click.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/one_click.py b/one_click.py index f3536638..8dd11b6b 100644 --- a/one_click.py +++ b/one_click.py @@ -434,6 +434,9 @@ def update_requirements(initial_installation=False, pull=True): if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"): install_extensions_requirements() + if is_linux(): + run_cmd("conda install -y -c conda-forge libstdcxx-ng==12.1.0", assert_success=True, environment=True) + # Update PyTorch if not initial_installation: update_pytorch_and_python() From 8229736ec4b359b2a773f36abac1bf96d4eabf56 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 9 Apr 2025 08:38:06 -0700 Subject: [PATCH 23/25] Reapply "Update transformers requirement from ==4.50.* to ==4.51.* (#6834)" This reverts commit 0b3503c91fcd3eaf6e0b93de4384794648406ba7. --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index b9b4ea7a..de338696 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index 3d24891f..00281d22 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 057b631d..80fcb71c 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index eba21ec2..4253a940 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 2048c99b..6962b6fc 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index c7e2687c..e849a451 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index 2003c544..e10782c9 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 60b71ac1..00c31c40 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index 3b61ca39..bb9ea97c 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.51.* tqdm wandb From d337ea31fa05d3d2f60df8c28fff10c07c10156f Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 9 Apr 2025 10:16:47 -0700 Subject: [PATCH 24/25] Revert "Reapply "Update transformers requirement from ==4.50.* to ==4.51.* (#6834)"" This reverts commit 8229736ec4b359b2a773f36abac1bf96d4eabf56. --- requirements.txt | 2 +- requirements_amd.txt | 2 +- requirements_amd_noavx2.txt | 2 +- requirements_apple_intel.txt | 2 +- requirements_apple_silicon.txt | 2 +- requirements_cpu_only.txt | 2 +- requirements_cpu_only_noavx2.txt | 2 +- requirements_noavx2.txt | 2 +- requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements.txt b/requirements.txt index de338696..b9b4ea7a 100644 --- a/requirements.txt +++ b/requirements.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_amd.txt b/requirements_amd.txt index 00281d22..3d24891f 100644 --- a/requirements_amd.txt +++ b/requirements_amd.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_amd_noavx2.txt b/requirements_amd_noavx2.txt index 80fcb71c..057b631d 100644 --- a/requirements_amd_noavx2.txt +++ b/requirements_amd_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_apple_intel.txt b/requirements_apple_intel.txt index 4253a940..eba21ec2 100644 --- a/requirements_apple_intel.txt +++ b/requirements_apple_intel.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_apple_silicon.txt b/requirements_apple_silicon.txt index 6962b6fc..2048c99b 100644 --- a/requirements_apple_silicon.txt +++ b/requirements_apple_silicon.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_cpu_only.txt b/requirements_cpu_only.txt index e849a451..c7e2687c 100644 --- a/requirements_cpu_only.txt +++ b/requirements_cpu_only.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_cpu_only_noavx2.txt b/requirements_cpu_only_noavx2.txt index e10782c9..2003c544 100644 --- a/requirements_cpu_only_noavx2.txt +++ b/requirements_cpu_only_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_noavx2.txt b/requirements_noavx2.txt index 00c31c40..60b71ac1 100644 --- a/requirements_noavx2.txt +++ b/requirements_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb diff --git a/requirements_nowheels.txt b/requirements_nowheels.txt index bb9ea97c..3b61ca39 100644 --- a/requirements_nowheels.txt +++ b/requirements_nowheels.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.51.* +transformers==4.50.* tqdm wandb From 9025848df56e7095febf88fc1cd473b180e19fc5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 9 Apr 2025 10:25:47 -0700 Subject: [PATCH 25/25] Small change to installer --- one_click.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/one_click.py b/one_click.py index 8dd11b6b..9f46a2df 100644 --- a/one_click.py +++ b/one_click.py @@ -21,6 +21,7 @@ TORCH_VERSION = "2.6.0" TORCHVISION_VERSION = "0.21.0" TORCHAUDIO_VERSION = "2.6.0" PYTHON_VERSION = "3.11" +LIBSTDCXX_VERSION_LINUX = "12.1.0" # Environment script_dir = os.getcwd() @@ -435,7 +436,7 @@ def update_requirements(initial_installation=False, pull=True): install_extensions_requirements() if is_linux(): - run_cmd("conda install -y -c conda-forge libstdcxx-ng==12.1.0", assert_success=True, environment=True) + run_cmd(f"conda install -y -c conda-forge libstdcxx-ng=={LIBSTDCXX_VERSION_LINUX}", assert_success=True, environment=True) # Update PyTorch if not initial_installation: