☰', elem_id='gr-hover')
+ gr.HTML(value='
', elem_id='gr-hover')
with gr.Column(scale=10, elem_id='chat-input-container'):
shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf', 'image'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
From ffea8f282e3a2f798d7bf5531be278754b47da21 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 18:53:13 -0700
Subject: [PATCH 49/76] UI: Improve message text contrast
---
css/html_instruct_style.css | 2 +-
css/main.css | 17 +++++++++++++----
modules/ui.py | 2 +-
3 files changed, 15 insertions(+), 6 deletions(-)
diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
index 458feafc..aa61f33b 100644
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@@ -13,7 +13,7 @@
line-height: 28px !important;
}
-.dark .chat .message-body :is(p,li,h1,h2,h3,h4,h5,h6),
+.dark .chat .message-body :is(p,li),
.dark .chat .message-body em:not(:is(h1,h2,h3,h4,h5,h6,b,strong) em),
.dark .chat .message-body q:not(:is(h1,h2,h3,h4,h5,h6,b,strong) q) {
color: #d1d5db !important;
diff --git a/css/main.css b/css/main.css
index 913576c5..d06d2905 100644
--- a/css/main.css
+++ b/css/main.css
@@ -436,15 +436,24 @@ audio {
.dark .message-body h4,
.dark .message-body h5,
.dark .message-body h6 {
- color: white !important;
+ color: #e8e8e8 !important;
}
.dark .message-body blockquote {
border-left-color: rgb(255 255 255 / 30%);
}
+.message-body h1,
+.message-body h2,
+.message-body h3,
+.message-body h4,
+.message-body h5,
+.message-body h6 {
+ color: #1a1a1a;
+}
+
.message-body h1 {
- font-weight: 800;
+ font-weight: 700;
font-size: 2.25em;
margin-top: 0;
margin-bottom: 0.8888889em;
@@ -476,13 +485,13 @@ audio {
}
.message-body h5 {
- font-weight: normal;
+ font-weight: 600;
font-size: 1em;
margin: 0;
}
.message-body h6 {
- font-weight: normal;
+ font-weight: 600;
font-size: 1em;
margin: 0;
}
diff --git a/modules/ui.py b/modules/ui.py
index 02b5a9fb..73072cbe 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -75,7 +75,7 @@ if not shared.args.old_colors:
background_fill_primary_dark='var(--darker-gray, #1C1C1D)',
body_background_fill="white",
block_background_fill="transparent",
- body_text_color='rgb(64, 64, 64)',
+ body_text_color='#1a1a1a',
button_secondary_background_fill="white",
button_secondary_border_color="var(--border-color-primary)",
block_title_text_color='*body_text_color',
From 41bce3f4dee83ede8ba05a3f3cdab9e729ec0979 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 19:07:23 -0700
Subject: [PATCH 50/76] UI: Improve scrollbars style
---
css/main.css | 26 ++++++++++++--------------
1 file changed, 12 insertions(+), 14 deletions(-)
diff --git a/css/main.css b/css/main.css
index d06d2905..c54367e6 100644
--- a/css/main.css
+++ b/css/main.css
@@ -246,8 +246,8 @@ button {
.pretty_scrollbar::-webkit-scrollbar,
#image-history-gallery > :nth-child(2)::-webkit-scrollbar {
- width: 8px;
- height: 8px;
+ width: 7px;
+ height: 7px;
}
.pretty_scrollbar::-webkit-scrollbar-track,
@@ -260,7 +260,7 @@ button {
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
#image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
background: var(--neutral-300);
- border-radius: 30px;
+ border-radius: 9999px;
}
.dark .pretty_scrollbar::-webkit-scrollbar-thumb,
@@ -268,18 +268,17 @@ button {
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb,
.dark #image-history-gallery > :nth-child(2)::-webkit-scrollbar-thumb:hover {
background: rgb(255 255 255 / 6.25%);
- border-radius: 30px;
+ border-radius: 9999px;
}
.pretty_scrollbar::-webkit-resizer,
#image-history-gallery > :nth-child(2)::-webkit-resizer {
- background: #d2d2d8;
+ background: transparent;
}
.dark .pretty_scrollbar::-webkit-resizer,
.dark #image-history-gallery > :nth-child(2)::-webkit-resizer {
- background: rgb(255 255 255 / 10%);
- border-radius: 10px;
+ background: transparent;
}
.pretty_scrollbar::-webkit-scrollbar-corner,
@@ -599,7 +598,7 @@ audio {
}
#chat-input textarea::-webkit-scrollbar {
- width: 8px;
+ width: 7px;
}
#chat-input textarea::-webkit-scrollbar-track {
@@ -608,7 +607,7 @@ audio {
#chat-input textarea::-webkit-scrollbar-thumb {
background: var(--neutral-300);
- border-radius: 30px;
+ border-radius: 9999px;
}
.dark #chat-input textarea::-webkit-scrollbar-thumb {
@@ -869,7 +868,6 @@ audio {
min-width: 0 !important;
display: flex;
flex-direction: column-reverse;
- padding-left: 12px;
padding-right: 20px;
padding-bottom: 3px;
flex-grow: 0 !important;
@@ -2000,8 +1998,8 @@ thead + tbody tr:first-child th { border-top: 1px solid; }
/* Pretty scrollbar for the tools list */
#tools-group .wrap::-webkit-scrollbar {
- width: 8px;
- height: 8px;
+ width: 7px;
+ height: 7px;
}
#tools-group .wrap::-webkit-scrollbar-track {
@@ -2011,13 +2009,13 @@ thead + tbody tr:first-child th { border-top: 1px solid; }
#tools-group .wrap::-webkit-scrollbar-thumb,
#tools-group .wrap::-webkit-scrollbar-thumb:hover {
background: var(--neutral-300);
- border-radius: 30px;
+ border-radius: 9999px;
}
.dark #tools-group .wrap::-webkit-scrollbar-thumb,
.dark #tools-group .wrap::-webkit-scrollbar-thumb:hover {
background: rgb(255 255 255 / 6.25%);
- border-radius: 30px;
+ border-radius: 9999px;
}
#tools-group .wrap::-webkit-scrollbar-corner {
From 8cb7fe9c470101d07f80b236a1d34b906bcdb25a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 19:14:17 -0700
Subject: [PATCH 51/76] UI: Improve message action icon visibility in light
mode
---
css/main.css | 5 ++---
1 file changed, 2 insertions(+), 3 deletions(-)
diff --git a/css/main.css b/css/main.css
index c54367e6..41b08308 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1428,12 +1428,11 @@ audio {
}
.footer-button svg {
- stroke: rgb(156 163 175);
- transition: stroke 0.2s;
+ stroke: rgb(107 114 128);
}
.footer-button:hover svg {
- stroke: rgb(107 114 128);
+ stroke: rgb(64 64 64);
}
.dark .footer-button svg {
From 1b403a4ffab0833cdce527360f445a0003c7ea41 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 19:33:05 -0700
Subject: [PATCH 52/76] UI: Fix inline LaTeX rendering by protecting $...$ from
markdown (closes #7423)
---
modules/html_generator.py | 32 +++++++++++++++++---------------
1 file changed, 17 insertions(+), 15 deletions(-)
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 8f3f261f..8dd46850 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -185,28 +185,29 @@ def process_markdown_content(string):
if not string:
return ""
- # Define unique placeholders for LaTeX asterisks and underscores
+ # Define unique placeholders for LaTeX characters that conflict with markdown
LATEX_ASTERISK_PLACEHOLDER = "LATEXASTERISKPLACEHOLDER"
LATEX_UNDERSCORE_PLACEHOLDER = "LATEXUNDERSCOREPLACEHOLDER"
+ LATEX_PIPE_PLACEHOLDER = "LATEXPIPEPLACEHOLDER"
+
+ def protect_latex_content(content):
+ """Protect markdown-sensitive characters inside LaTeX."""
+ content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
+ content = content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
+ content = content.replace('|', LATEX_PIPE_PLACEHOLDER)
+ return content
def protect_asterisks_underscores_in_latex(match):
- """A replacer function for re.sub to protect asterisks and underscores in multiple LaTeX formats."""
+ """A replacer function for re.sub to protect markdown-sensitive characters in multiple LaTeX formats."""
# Check which delimiter group was captured
if match.group(1) is not None: # Content from $$...$$
- content = match.group(1)
- modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
- modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
- return f'{modified_content}'
+ return protect_latex_content(match.group(1))
elif match.group(2) is not None: # Content from \[...\]
- content = match.group(2)
- modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
- modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
- return f'\\[{modified_content}\\]'
+ return f'\\[{protect_latex_content(match.group(2))}\\]'
elif match.group(3) is not None: # Content from \(...\)
- content = match.group(3)
- modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
- modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
- return f'\\({modified_content}\\)'
+ return f'\\({protect_latex_content(match.group(3))}\\)'
+ elif match.group(4) is not None: # Content from $...$
+ return f'${protect_latex_content(match.group(4).strip())}$'
return match.group(0) # Fallback
@@ -240,7 +241,7 @@ def process_markdown_content(string):
string = re.sub(r"(.)```", r"\1\n```", string)
# Protect asterisks and underscores within all LaTeX blocks before markdown conversion
- latex_pattern = re.compile(r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)',
+ latex_pattern = re.compile(r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)|(?
html_output = re.sub(r'\s*', '', html_output)
From 0c033caf0ef79838178238912df29cc47bb10ba3 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 20:09:28 -0700
Subject: [PATCH 53/76] UI: Reduce spacing above chat input
---
css/main.css | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/css/main.css b/css/main.css
index 41b08308..43e9684f 100644
--- a/css/main.css
+++ b/css/main.css
@@ -893,7 +893,7 @@ audio {
}
#chat-input-row {
- padding: 1rem;
+ padding: 0.5rem 1rem 1rem;
}
#chat-col {
From dfd8ec9c4992f801304fc7efb89e7e47355fd18e Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 20:13:20 -0700
Subject: [PATCH 54/76] UI: Make accordion outline styling global
---
css/main.css | 4 ++--
modules/training.py | 4 ++--
modules/ui_model_menu.py | 6 +++---
3 files changed, 7 insertions(+), 7 deletions(-)
diff --git a/css/main.css b/css/main.css
index 43e9684f..459c9fab 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1443,12 +1443,12 @@ audio {
stroke: rgb(209 213 219);
}
-.tgw-accordion {
+.block:has(> .label-wrap) {
padding: 10px 12px !important;
border: 1px solid #d2d2d8;
}
-.dark .tgw-accordion {
+.dark .block:has(> .label-wrap) {
border: 1px solid var(--border-color-dark);
}
diff --git a/modules/training.py b/modules/training.py
index 145353c6..bca4f02e 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -52,7 +52,7 @@ def create_ui():
with gr.Column():
always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
- with gr.Accordion(label='Target Modules', open=False, elem_classes='tgw-accordion'):
+ with gr.Accordion(label='Target Modules', open=False):
gr.Markdown("Selects which modules to target in training. Targeting more modules is closer to a full fine-tune at the cost of increased VRAM and adapter size.")
all_linear = gr.Checkbox(label='Target all linear layers', value=True, info='Targets every nn.Linear layer except lm_head. Works for any model architecture. When checked, the individual module checkboxes below are ignored.', elem_classes=['no-background'])
with gr.Row():
@@ -87,7 +87,7 @@ def create_ui():
with gr.Row():
lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='cosine', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.', elem_classes=['slim-dropdown'])
- with gr.Accordion(label='Advanced Options', open=False, elem_classes='tgw-accordion'):
+ with gr.Accordion(label='Advanced Options', open=False):
with gr.Row():
with gr.Column():
optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Optimizer algorithm. adamw_torch is the standard choice. adamw_bnb_8bit uses less VRAM. adafactor is memory-efficient for large models.', elem_classes=['slim-dropdown'])
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 16505afa..243079a0 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -67,13 +67,13 @@ def create_ui():
)
# Multimodal
- with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
+ with gr.Accordion("Multimodal (vision)", open=False) as shared.gradio['mmproj_accordion']:
with gr.Row():
shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info=f'Select a file that matches your model. Must be placed in {shared.user_data_dir}/mmproj/', interactive=not mu)
ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
# Speculative decoding
- with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
+ with gr.Accordion("Speculative decoding", open=False) as shared.gradio['speculative_decoding_accordion']:
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Maximum number of tokens to draft for speculative decoding. Recommended: 4 for draft model, 64 for n-gram.')
gr.Markdown('#### Draft model')
@@ -92,7 +92,7 @@ def create_ui():
shared.gradio['spec_ngram_min_hits'] = gr.Number(label="spec-ngram-min-hits", precision=0, step=1, value=shared.args.spec_ngram_min_hits, info='Minimum n-gram hits for ngram-map speculative decoding.', visible=shared.args.spec_type != 'none')
gr.Markdown("## Other options")
- with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
+ with gr.Accordion("See more options", open=False):
with gr.Row():
with gr.Column():
shared.gradio['parallel'] = gr.Slider(label="parallel", minimum=1, step=1, maximum=64, value=shared.args.parallel, info='Number of parallel request slots for the API. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
From ee917cd5edfc3b192d4a3147001f0c1752a3e354 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 20:35:27 -0700
Subject: [PATCH 55/76] UI: Make table and hr borders more subtle
---
css/html_instruct_style.css | 9 ---------
css/main.css | 16 +++++++++++++---
2 files changed, 13 insertions(+), 12 deletions(-)
diff --git a/css/html_instruct_style.css b/css/html_instruct_style.css
index aa61f33b..fc20d166 100644
--- a/css/html_instruct_style.css
+++ b/css/html_instruct_style.css
@@ -19,15 +19,6 @@
color: #d1d5db !important;
}
-.chat .message-body :is(th, td),
-.prose hr {
- border-color: #40404096 !important;
-}
-
-.dark .chat .message-body :is(th, td),
-.dark .prose hr {
- border-color: rgb(255 255 255 / 30%) !important;
-}
.chat .message-body :is(p, ul, ol) {
margin: 1.25em 0 !important;
diff --git a/css/main.css b/css/main.css
index 459c9fab..d9dc5d2e 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1958,14 +1958,24 @@ table, tr, td, th, thead {
border: 0;
}
+.prose hr {
+ border-color: var(--border-color-primary);
+}
+
td + td,
-th + th { border-left: 1px solid; }
+th + th {
+ border-left: 1px solid var(--border-color-primary) !important;
+}
tr + tr td,
-tr + tr th { border-top: 1px solid; }
+tr + tr th {
+ border-top: 1px solid var(--border-color-primary) !important;
+}
thead + tbody tr:first-child td,
-thead + tbody tr:first-child th { border-top: 1px solid; }
+thead + tbody tr:first-child th {
+ border-top: 1px solid var(--border-color-primary) !important;
+}
/* ------------------------------------------------
Tools CheckboxGroup - vertical DragDrop-like style
From e8b31c063a3a5d1486dba2969b116835aa6a56bf Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 20:38:31 -0700
Subject: [PATCH 56/76] UI: Soften message action icons in light mode
---
css/main.css | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/css/main.css b/css/main.css
index d9dc5d2e..a59e08ce 100644
--- a/css/main.css
+++ b/css/main.css
@@ -1428,11 +1428,11 @@ audio {
}
.footer-button svg {
- stroke: rgb(107 114 128);
+ stroke: rgb(140 140 148);
}
.footer-button:hover svg {
- stroke: rgb(64 64 64);
+ stroke: rgb(107 114 128);
}
.dark .footer-button svg {
From 1f49a64e1ac1b2e700146956ac3dc17794d53243 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 20:44:37 -0700
Subject: [PATCH 57/76] UI: Improve blockquote border width and color
---
css/main.css | 5 +++--
1 file changed, 3 insertions(+), 2 deletions(-)
diff --git a/css/main.css b/css/main.css
index a59e08ce..6685ab34 100644
--- a/css/main.css
+++ b/css/main.css
@@ -438,8 +438,9 @@ audio {
color: #e8e8e8 !important;
}
-.dark .message-body blockquote {
- border-left-color: rgb(255 255 255 / 30%);
+.message-body blockquote {
+ border-left-width: 4px;
+ border-left-color: var(--border-color-primary);
}
.message-body h1,
From 91f9b01516ff50bd35477ccccff9b53a03041cf8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 21:13:20 -0700
Subject: [PATCH 58/76] UI: Minor change
---
css/main.css | 4 ++++
1 file changed, 4 insertions(+)
diff --git a/css/main.css b/css/main.css
index 6685ab34..7f47a3aa 100644
--- a/css/main.css
+++ b/css/main.css
@@ -642,6 +642,10 @@ audio {
background: transparent;
}
+#chat-input .thumbnails {
+ padding-top: 3px;
+}
+
.chat-input-positioned {
max-width: 54rem;
left: 50%;
From 9805ddcde95f75bb1de100553dd3b604a4a6537c Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 21:34:09 -0700
Subject: [PATCH 59/76] Update the custom gradio wheels
---
requirements/full/requirements.txt | 4 ++--
requirements/full/requirements_amd.txt | 4 ++--
requirements/full/requirements_apple_intel.txt | 4 ++--
requirements/full/requirements_apple_silicon.txt | 4 ++--
requirements/full/requirements_cpu_only.txt | 4 ++--
requirements/full/requirements_nowheels.txt | 4 ++--
requirements/portable/requirements.txt | 4 ++--
requirements/portable/requirements_amd.txt | 4 ++--
requirements/portable/requirements_apple_intel.txt | 4 ++--
requirements/portable/requirements_apple_silicon.txt | 4 ++--
requirements/portable/requirements_cpu_only.txt | 4 ++--
requirements/portable/requirements_cuda131.txt | 4 ++--
requirements/portable/requirements_ik.txt | 4 ++--
requirements/portable/requirements_ik_cpu_only.txt | 4 ++--
requirements/portable/requirements_ik_cuda131.txt | 4 ++--
requirements/portable/requirements_nowheels.txt | 4 ++--
requirements/portable/requirements_vulkan.txt | 4 ++--
17 files changed, 34 insertions(+), 34 deletions(-)
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index b7a5ca97..9f83830a 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -31,8 +31,8 @@ tqdm
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index 2c627585..b4b8386e 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -28,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 7e3fc35f..41ee6a60 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -28,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 2603201d..8be2f55e 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -28,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index fe3bf3ba..d7f1bf13 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -28,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index acae301e..7b331f96 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -28,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 56795843..b467cf26 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index abaa1338..4eca16e1 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index b22a03d9..55f8d3f8 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 97c5903c..54e8f350 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 57e92f74..f073a614 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_cuda131.txt b/requirements/portable/requirements_cuda131.txt
index 1f7d27a7..8cd40f39 100644
--- a/requirements/portable/requirements_cuda131.txt
+++ b/requirements/portable/requirements_cuda131.txt
@@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_ik.txt b/requirements/portable/requirements_ik.txt
index 65f6a004..fbb9125d 100644
--- a/requirements/portable/requirements_ik.txt
+++ b/requirements/portable/requirements_ik.txt
@@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_ik_cpu_only.txt b/requirements/portable/requirements_ik_cpu_only.txt
index 0a82adb7..59fcfae1 100644
--- a/requirements/portable/requirements_ik_cpu_only.txt
+++ b/requirements/portable/requirements_ik_cpu_only.txt
@@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_ik_cuda131.txt b/requirements/portable/requirements_ik_cuda131.txt
index 3d812045..ffdbe568 100644
--- a/requirements/portable/requirements_ik_cuda131.txt
+++ b/requirements/portable/requirements_ik_cuda131.txt
@@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index 91bef10b..4a47b1f0 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 7c61f0cc..97abd933 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -14,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio-4.37.2+custom.18-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.18/gradio_client-1.0.2+custom.18-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
# API
flask_cloudflared==0.0.15
From c63a79ee4871178aa4d7b7f570e5e9d45b0280de Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 23:15:14 -0700
Subject: [PATCH 60/76] Image generation: Embed generation metadata in API
image responses
---
modules/api/images.py | 23 +++++++++++++++++------
modules/ui_image_generation.py | 3 +++
2 files changed, 20 insertions(+), 6 deletions(-)
diff --git a/modules/api/images.py b/modules/api/images.py
index 95704535..dde7d336 100644
--- a/modules/api/images.py
+++ b/modules/api/images.py
@@ -4,8 +4,11 @@ OpenAI-compatible image generation using local diffusion models.
import base64
import io
+import json
import time
+from PIL.PngImagePlugin import PngInfo
+
from .errors import ServiceUnavailableError
from modules import shared
@@ -15,7 +18,7 @@ def generations(request):
Generate images using the loaded diffusion model.
Returns dict with 'created' timestamp and 'data' list of images.
"""
- from modules.ui_image_generation import generate
+ from modules.ui_image_generation import build_generation_metadata, generate
if shared.image_model is None:
raise ServiceUnavailableError("No image model loaded. Load a model via the UI first.")
@@ -46,10 +49,18 @@ def generations(request):
if not images:
raise ServiceUnavailableError("Image generation failed or produced no images.")
- # Build response
+ # Build response with per-batch metadata (seed increments per batch)
+ base_seed = state.get('image_seed_resolved', state['image_seed'])
+ batch_size = int(state['image_batch_size'])
+
resp = {'created': int(time.time()), 'data': []}
- for img in images:
- b64 = _image_to_base64(img)
+ for idx, img in enumerate(images):
+ batch_seed = base_seed + idx // batch_size
+ metadata = build_generation_metadata(state, batch_seed)
+ metadata_json = json.dumps(metadata, ensure_ascii=False)
+ png_info = PngInfo()
+ png_info.add_text("image_gen_settings", metadata_json)
+ b64 = _image_to_base64(img, png_info)
image_obj = {'revised_prompt': request.prompt}
@@ -63,7 +74,7 @@ def generations(request):
return resp
-def _image_to_base64(image) -> str:
+def _image_to_base64(image, png_info=None) -> str:
buffered = io.BytesIO()
- image.save(buffered, format="PNG")
+ image.save(buffered, format="PNG", pnginfo=png_info)
return base64.b64encode(buffered.getvalue()).decode('utf-8')
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 1efb2479..727aa7b1 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -798,6 +798,9 @@ def generate(state, save_images=True):
if seed == -1:
seed = random.randint(0, 2**32 - 1)
+ # Store resolved seed back so callers (e.g. API) can access it
+ state['image_seed_resolved'] = seed
+
device = get_device()
if device is None:
device = "cpu"
From 544fcb0b7f0344fac249005f869b02110da69738 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 23:29:57 -0700
Subject: [PATCH 61/76] Simplify modules/image_models.py
---
modules/image_models.py | 69 ++++++++++++++---------------------------
1 file changed, 23 insertions(+), 46 deletions(-)
diff --git a/modules/image_models.py b/modules/image_models.py
index 290aaf19..eed8783c 100644
--- a/modules/image_models.py
+++ b/modules/image_models.py
@@ -10,72 +10,49 @@ def get_quantization_config(quant_method):
Get the appropriate quantization config based on the selected method.
Applies quantization to both the transformer and the text_encoder.
"""
+ if quant_method == 'none' or not quant_method:
+ return None
+
import torch
- # Import BitsAndBytesConfig from BOTH libraries to be safe
from diffusers import BitsAndBytesConfig as DiffusersBnBConfig
from diffusers import TorchAoConfig
from diffusers.quantizers import PipelineQuantizationConfig
from transformers import BitsAndBytesConfig as TransformersBnBConfig
- if quant_method == 'none' or not quant_method:
- return None
+ torchao_methods = {
+ 'torchao-int8wo': 'int8wo',
+ 'torchao-fp4': 'fp4_e2m1',
+ 'torchao-float8wo': 'float8wo',
+ }
- # Bitsandbytes 8-bit quantization
- elif quant_method == 'bnb-8bit':
+ if quant_method == 'bnb-8bit':
return PipelineQuantizationConfig(
quant_mapping={
- "transformer": DiffusersBnBConfig(
- load_in_8bit=True
- ),
- "text_encoder": TransformersBnBConfig(
- load_in_8bit=True
- )
+ "transformer": DiffusersBnBConfig(load_in_8bit=True),
+ "text_encoder": TransformersBnBConfig(load_in_8bit=True)
}
)
- # Bitsandbytes 4-bit quantization
elif quant_method == 'bnb-4bit':
+ bnb_4bit_kwargs = dict(
+ load_in_4bit=True,
+ bnb_4bit_quant_type="nf4",
+ bnb_4bit_compute_dtype=torch.bfloat16,
+ bnb_4bit_use_double_quant=True
+ )
return PipelineQuantizationConfig(
quant_mapping={
- "transformer": DiffusersBnBConfig(
- load_in_4bit=True,
- bnb_4bit_quant_type="nf4",
- bnb_4bit_compute_dtype=torch.bfloat16,
- bnb_4bit_use_double_quant=True
- ),
- "text_encoder": TransformersBnBConfig(
- load_in_4bit=True,
- bnb_4bit_quant_type="nf4",
- bnb_4bit_compute_dtype=torch.bfloat16,
- bnb_4bit_use_double_quant=True
- )
+ "transformer": DiffusersBnBConfig(**bnb_4bit_kwargs),
+ "text_encoder": TransformersBnBConfig(**bnb_4bit_kwargs)
}
)
- # torchao int8 weight-only
- elif quant_method == 'torchao-int8wo':
+ elif quant_method in torchao_methods:
+ ao_type = torchao_methods[quant_method]
return PipelineQuantizationConfig(
quant_mapping={
- "transformer": TorchAoConfig("int8wo"),
- "text_encoder": TorchAoConfig("int8wo")
- }
- )
-
- # torchao fp4 (e2m1)
- elif quant_method == 'torchao-fp4':
- return PipelineQuantizationConfig(
- quant_mapping={
- "transformer": TorchAoConfig("fp4_e2m1"),
- "text_encoder": TorchAoConfig("fp4_e2m1")
- }
- )
-
- # torchao float8 weight-only
- elif quant_method == 'torchao-float8wo':
- return PipelineQuantizationConfig(
- quant_mapping={
- "transformer": TorchAoConfig("float8wo"),
- "text_encoder": TorchAoConfig("float8wo")
+ "transformer": TorchAoConfig(ao_type),
+ "text_encoder": TorchAoConfig(ao_type)
}
)
From 422f42ca7faa1d0834b1b503e87d605ad55f1ef8 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sat, 4 Apr 2026 23:51:15 -0700
Subject: [PATCH 62/76] Pre-compile LaTeX regex in html_generator.py
---
modules/html_generator.py | 11 ++++++++---
1 file changed, 8 insertions(+), 3 deletions(-)
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 8dd46850..e3ebea8d 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -14,6 +14,13 @@ from modules.reasoning import extract_reasoning
from modules.sane_markdown_lists import SaneListExtension
from modules.utils import get_available_chat_styles
+# Pre-compiled regex for protecting markdown-sensitive characters inside LaTeX.
+# Covers $$...$$, \[...\], \(...\), and inline $...$ (when content contains \\).
+_LATEX_PATTERN = re.compile(
+ r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)|(?
Date: Sun, 5 Apr 2026 05:55:39 -0700
Subject: [PATCH 63/76] Fix "address already in use" on server restart
(Linux/macOS)
---
modules/api/script.py | 26 ++++++++++++++++++++++++--
modules/llama_cpp_server.py | 1 +
2 files changed, 25 insertions(+), 2 deletions(-)
diff --git a/modules/api/script.py b/modules/api/script.py
index beed3d06..14e2d03a 100644
--- a/modules/api/script.py
+++ b/modules/api/script.py
@@ -591,9 +591,31 @@ def run_server():
if shared.args.admin_key and shared.args.admin_key != shared.args.api_key:
logger.info(f'OpenAI API admin key (for loading/unloading models):\n\n{shared.args.admin_key}\n')
- # Start server
+ # Use SO_REUSEADDR to avoid "address already in use" after restart
logging.getLogger("uvicorn.error").propagate = False
- uvicorn.run(app, host=server_addrs, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, access_log=False)
+ sockets = []
+ try:
+ for addr in server_addrs:
+ family = socket.AF_INET6 if ':' in addr else socket.AF_INET
+ sock = socket.socket(family, socket.SOCK_STREAM)
+ sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
+ if family == socket.AF_INET6:
+ sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1)
+ sock.bind((addr.strip('[]'), port))
+ sock.listen(socket.SOMAXCONN)
+ sockets.append(sock)
+ except Exception:
+ for s in sockets:
+ s.close()
+ raise
+
+ config = uvicorn.Config(app, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, access_log=False)
+ server = uvicorn.Server(config)
+ try:
+ server.run(sockets=sockets)
+ finally:
+ for s in sockets:
+ s.close()
_server_started = False
diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index 34080466..c01f5d5b 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -373,6 +373,7 @@ class LlamaServer:
"""Check if a port is available for use."""
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
try:
+ s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind(('', port))
return True
except OSError:
From f8db23b36286b09155e08beaa07a5797c879c7ef Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 5 Apr 2026 17:12:28 -0700
Subject: [PATCH 64/76] Call ik portable build folders
text-generation-webui-ik-version
---
.github/workflows/build-portable-release-ik-cuda.yml | 12 ++++++------
.github/workflows/build-portable-release-ik.yml | 12 ++++++------
2 files changed, 12 insertions(+), 12 deletions(-)
diff --git a/.github/workflows/build-portable-release-ik-cuda.yml b/.github/workflows/build-portable-release-ik-cuda.yml
index 331a7653..a336a1cb 100644
--- a/.github/workflows/build-portable-release-ik-cuda.yml
+++ b/.github/workflows/build-portable-release-ik-cuda.yml
@@ -102,8 +102,8 @@ jobs:
VERSION_CLEAN="${{ inputs.version }}"
VERSION_CLEAN="${VERSION_CLEAN#v}"
cd ..
- cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
- cd "text-generation-webui-${VERSION_CLEAN}"
+ cp -r text-generation-webui "text-generation-webui-ik-${VERSION_CLEAN}"
+ cd "text-generation-webui-ik-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "sd_api_pictures")
@@ -133,10 +133,10 @@ jobs:
echo "Downloading Python for $PLATFORM..."
curl -L -o python-build.tar.gz "$PYTHON_URL"
tar -xzf python-build.tar.gz
- mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
+ mv python "text-generation-webui-ik-${VERSION_CLEAN}/portable_env"
# 3. Prepare requirements file based on CUDA version
- cd "text-generation-webui-${VERSION_CLEAN}"
+ cd "text-generation-webui-ik-${VERSION_CLEAN}"
if [[ "$CUDA_VERSION" == "13.1" ]]; then
REQ_FILE="requirements/portable/requirements_ik_cuda131.txt"
else
@@ -158,11 +158,11 @@ jobs:
if [[ "$RUNNER_OS" == "Windows" ]]; then
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
echo "Creating archive: $ARCHIVE_NAME"
- powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
+ powershell -Command "Compress-Archive -Path text-generation-webui-ik-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
else
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.tar.gz"
echo "Creating archive: $ARCHIVE_NAME"
- tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
+ tar czf "$ARCHIVE_NAME" "text-generation-webui-ik-${VERSION_CLEAN}"
fi
- name: Upload files to a GitHub release
diff --git a/.github/workflows/build-portable-release-ik.yml b/.github/workflows/build-portable-release-ik.yml
index bf54eb0e..5eaf7c86 100644
--- a/.github/workflows/build-portable-release-ik.yml
+++ b/.github/workflows/build-portable-release-ik.yml
@@ -101,8 +101,8 @@ jobs:
VERSION_CLEAN="${{ inputs.version }}"
VERSION_CLEAN="${VERSION_CLEAN#v}"
cd ..
- cp -r text-generation-webui "text-generation-webui-${VERSION_CLEAN}"
- cd "text-generation-webui-${VERSION_CLEAN}"
+ cp -r text-generation-webui "text-generation-webui-ik-${VERSION_CLEAN}"
+ cd "text-generation-webui-ik-${VERSION_CLEAN}"
# Remove extensions that need additional requirements
allowed=("character_bias" "gallery" "sd_api_pictures")
@@ -131,10 +131,10 @@ jobs:
cd ..
curl -L -o python-build.tar.gz "$PYTHON_URL"
tar -xzf python-build.tar.gz
- mv python "text-generation-webui-${VERSION_CLEAN}/portable_env"
+ mv python "text-generation-webui-ik-${VERSION_CLEAN}/portable_env"
# 3. Prepare requirements file
- cd "text-generation-webui-${VERSION_CLEAN}"
+ cd "text-generation-webui-ik-${VERSION_CLEAN}"
REQ_FILE="requirements/portable/requirements_ik_cpu_only.txt"
echo "Using requirements file: $REQ_FILE"
@@ -153,11 +153,11 @@ jobs:
if [[ "$RUNNER_OS" == "Windows" ]]; then
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.zip"
echo "Creating archive: $ARCHIVE_NAME"
- powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
+ powershell -Command "Compress-Archive -Path text-generation-webui-ik-${VERSION_CLEAN} -DestinationPath $ARCHIVE_NAME"
else
ARCHIVE_NAME="textgen-portable-ik-${VERSION_CLEAN}-${PLATFORM}.tar.gz"
echo "Creating archive: $ARCHIVE_NAME"
- tar czf "$ARCHIVE_NAME" "text-generation-webui-${VERSION_CLEAN}"
+ tar czf "$ARCHIVE_NAME" "text-generation-webui-ik-${VERSION_CLEAN}"
fi
- name: Upload files to a GitHub release
From 223dd4b8017d24f7c5c2f33be2ca8409e1897b34 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 5 Apr 2026 18:22:50 -0700
Subject: [PATCH 65/76] UI: Hide spin buttons on number inputs
---
css/main.css | 11 +++++++++++
1 file changed, 11 insertions(+)
diff --git a/css/main.css b/css/main.css
index 7f47a3aa..db0b781b 100644
--- a/css/main.css
+++ b/css/main.css
@@ -22,6 +22,17 @@
font-style: italic;
}
+/* Hide spin buttons on number inputs (look bad on Windows) */
+input[type="number"]::-webkit-outer-spin-button,
+input[type="number"]::-webkit-inner-spin-button {
+ -webkit-appearance: none;
+ margin: 0;
+}
+
+input[type="number"] {
+ -moz-appearance: textfield;
+}
+
.padded.svelte-12cmxck {
padding: 3px 0;
}
From abc3487f4dec9215abd9ebfb5ac796c32361b018 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 5 Apr 2026 18:24:26 -0700
Subject: [PATCH 66/76] UI: Move cpu-moe checkbox to extra flags (no longer
useful now that --fit exists)
---
modules/ui_model_menu.py | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 243079a0..9c8306f5 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -54,7 +54,6 @@ def create_ui():
if not shared.args.portable:
shared.gradio['ik'] = gr.Checkbox(label="ik", value=shared.args.ik, info='Use ik_llama.cpp instead of upstream llama.cpp.')
- shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
@@ -109,6 +108,7 @@ def create_ui():
with gr.Column():
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
+ shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.')
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
From b1d06dcf96e2b5958ae004b8c9bbb0fc8518328b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 5 Apr 2026 23:07:14 -0300
Subject: [PATCH 67/76] UI: Add MCP server support
---
README.md | 2 +-
docs/Tool Calling Tutorial.md | 13 ++
modules/chat.py | 15 ++-
modules/shared.py | 1 +
modules/tool_use.py | 114 ++++++++++++++++++
modules/ui.py | 2 +
modules/ui_chat.py | 3 +
requirements/full/requirements.txt | 1 +
requirements/full/requirements_amd.txt | 1 +
.../full/requirements_apple_intel.txt | 1 +
.../full/requirements_apple_silicon.txt | 1 +
requirements/full/requirements_cpu_only.txt | 1 +
requirements/full/requirements_nowheels.txt | 1 +
requirements/portable/requirements.txt | 1 +
requirements/portable/requirements_amd.txt | 1 +
.../portable/requirements_apple_intel.txt | 1 +
.../portable/requirements_apple_silicon.txt | 1 +
.../portable/requirements_cpu_only.txt | 1 +
.../portable/requirements_cuda131.txt | 1 +
requirements/portable/requirements_ik.txt | 1 +
.../portable/requirements_ik_cpu_only.txt | 1 +
.../portable/requirements_ik_cuda131.txt | 1 +
.../portable/requirements_nowheels.txt | 1 +
requirements/portable/requirements_vulkan.txt | 1 +
24 files changed, 163 insertions(+), 4 deletions(-)
diff --git a/README.md b/README.md
index 23cd09c5..b168ebdb 100644
--- a/README.md
+++ b/README.md
@@ -26,7 +26,7 @@ A Gradio web UI for running Large Language Models locally. 100% private and offl
- **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set.
- **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [ik_llama.cpp](https://github.com/ikawrakow/ik_llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting.
- **OpenAI/Anthropic-compatible API**: Chat, Completions, and Messages endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI/Anthropic APIs ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)).
-- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file, easy to create and extend ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).
+- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file. MCP servers are also supported ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).
- **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
- **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)).
diff --git a/docs/Tool Calling Tutorial.md b/docs/Tool Calling Tutorial.md
index d95a9c80..7d2a86de 100644
--- a/docs/Tool Calling Tutorial.md
+++ b/docs/Tool Calling Tutorial.md
@@ -80,6 +80,19 @@ def execute(arguments):
You can open the built-in tools in `user_data/tools/` for more examples.
+## MCP servers
+
+You can connect to remote [MCP (Model Context Protocol)](https://modelcontextprotocol.io/) servers to use their tools alongside local ones.
+
+In the chat sidebar, open the **MCP servers** accordion and enter one server URL per line. For servers that require authentication, append headers after the URL separated by commas:
+
+```
+https://example.com/mcp
+https://other.com/mcp,Authorization: Bearer sk-xxx
+```
+
+All tools from the configured servers are automatically discovered and made available to the model during generation. If an MCP tool has the same name as a selected local tool, the local tool takes priority.
+
## Tool calling over the API
Tool calling over the API follows the [OpenAI API](https://platform.openai.com/docs/guides/function-calling) convention. Define your tools, send them with your messages, and handle tool calls in a loop until the model gives a final answer.
diff --git a/modules/chat.py b/modules/chat.py
index 76b8694a..aeed688d 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -1264,14 +1264,23 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
# Load tools if any are selected
selected = state.get('selected_tools', [])
+ mcp_servers = state.get('mcp_servers', '')
parse_tool_call = None
_tool_parsers = None
- if selected:
- from modules.tool_use import load_tools, execute_tool
+ if selected or mcp_servers:
+ from modules.tool_use import load_tools, load_mcp_tools, execute_tool
from modules.tool_parsing import parse_tool_call, get_tool_call_id, detect_tool_call_format
- if selected:
tool_defs, tool_executors = load_tools(selected)
+ if mcp_servers:
+ mcp_defs, mcp_executors = load_mcp_tools(mcp_servers)
+ for td in mcp_defs:
+ fn = td['function']['name']
+ if fn in tool_executors:
+ logger.warning(f'MCP tool "{fn}" conflicts with a local tool. Skipping.')
+ continue
+ tool_defs.append(td)
+ tool_executors[fn] = mcp_executors[fn]
state['tools'] = tool_defs
tool_func_names = [t['function']['name'] for t in tool_defs]
_template_str = state.get('instruction_template_str', '') if state.get('mode') == 'instruct' else state.get('chat_template_str', '')
diff --git a/modules/shared.py b/modules/shared.py
index 13843f0c..92c4f56c 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -259,6 +259,7 @@ settings = {
'enable_web_search': False,
'web_search_pages': 3,
'selected_tools': [],
+ 'mcp_servers': '',
'prompt-notebook': '',
'preset': 'Top-P' if (user_data_dir / 'presets/Top-P.yaml').exists() else None,
'max_new_tokens': 512,
diff --git a/modules/tool_use.py b/modules/tool_use.py
index e22b1798..f9ddf940 100644
--- a/modules/tool_use.py
+++ b/modules/tool_use.py
@@ -1,3 +1,4 @@
+import asyncio
import importlib.util
import json
@@ -55,6 +56,119 @@ def load_tools(selected_names):
return tool_defs, executors
+def _parse_mcp_servers(servers_str):
+ """Parse MCP servers textbox: one server per line, format 'url' or 'url,Header: value,Header2: value2'."""
+ servers = []
+ for line in servers_str.strip().splitlines():
+ line = line.strip()
+ if not line:
+ continue
+ parts = line.split(',')
+ url = parts[0].strip()
+ headers = {}
+ for part in parts[1:]:
+ part = part.strip()
+ if ':' in part:
+ key, val = part.split(':', 1)
+ headers[key.strip()] = val.strip()
+ servers.append((url, headers))
+ return servers
+
+
+def _mcp_tool_to_openai(tool):
+ """Convert an MCP Tool object to OpenAI-format tool dict."""
+ return {
+ "type": "function",
+ "function": {
+ "name": tool.name,
+ "description": tool.description or "",
+ "parameters": tool.inputSchema or {"type": "object", "properties": {}}
+ }
+ }
+
+
+async def _mcp_session(url, headers, callback):
+ """Open an MCP session and pass it to the callback."""
+ from mcp.client.streamable_http import streamablehttp_client
+ from mcp import ClientSession
+
+ async with streamablehttp_client(url, headers=headers or None) as (read_stream, write_stream, _):
+ async with ClientSession(read_stream, write_stream) as session:
+ await session.initialize()
+ return await callback(session)
+
+
+def _make_mcp_executor(name, url, headers):
+ def executor(arguments):
+ return asyncio.run(_call_mcp_tool(name, arguments, url, headers))
+ return executor
+
+
+async def _connect_mcp_server(url, headers):
+ """Connect to one MCP server and return (tool_defs, executors)."""
+
+ async def _discover(session):
+ result = await session.list_tools()
+ tool_defs = []
+ executors = {}
+ for tool in result.tools:
+ tool_defs.append(_mcp_tool_to_openai(tool))
+ executors[tool.name] = _make_mcp_executor(tool.name, url, headers)
+ return tool_defs, executors
+
+ return await _mcp_session(url, headers, _discover)
+
+
+async def _call_mcp_tool(name, arguments, url, headers):
+ """Connect to an MCP server and call a single tool."""
+
+ async def _invoke(session):
+ result = await session.call_tool(name, arguments)
+ parts = []
+ for content in result.content:
+ if hasattr(content, 'text'):
+ parts.append(content.text)
+ else:
+ parts.append(str(content))
+ return '\n'.join(parts) if parts else ''
+
+ return await _mcp_session(url, headers, _invoke)
+
+
+async def _connect_all_mcp_servers(servers):
+ """Connect to all MCP servers concurrently."""
+ results = await asyncio.gather(
+ *(_connect_mcp_server(url, headers) for url, headers in servers),
+ return_exceptions=True
+ )
+ all_defs = []
+ all_executors = {}
+ for (url, _), result in zip(servers, results):
+ if isinstance(result, Exception):
+ logger.exception(f'Failed to connect to MCP server "{url}"', exc_info=result)
+ continue
+ defs, execs = result
+ for td, (fn, ex) in zip(defs, execs.items()):
+ if fn in all_executors:
+ logger.warning(f'MCP tool "{fn}" from {url} conflicts with an already loaded tool. Skipping.')
+ continue
+ all_defs.append(td)
+ all_executors[fn] = ex
+ return all_defs, all_executors
+
+
+def load_mcp_tools(servers_str):
+ """
+ Parse MCP servers string and discover tools from each server.
+ Returns (tool_defs, executors) in the same format as load_tools.
+ """
+ servers = _parse_mcp_servers(servers_str)
+ if not servers:
+ return [], {}
+
+ return asyncio.run(_connect_all_mcp_servers(servers))
+
+
def execute_tool(func_name, arguments, executors):
"""Execute a tool by function name. Returns result as a JSON string."""
fn = executors.get(func_name)
diff --git a/modules/ui.py b/modules/ui.py
index 73072cbe..3a8390f7 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -209,6 +209,7 @@ def list_interface_input_elements():
'textbox',
'start_with',
'selected_tools',
+ 'mcp_servers',
'mode',
'chat_style',
'chat-instruct_command',
@@ -434,6 +435,7 @@ def setup_auto_save():
'custom_system_message',
'chat_template_str',
'selected_tools',
+ 'mcp_servers',
# Parameters tab (ui_parameters.py) - Generation parameters
'preset_menu',
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index d9652253..14489d96 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -105,6 +105,9 @@ def create_ui():
shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False)
+ with gr.Accordion('MCP servers', open=False):
+ shared.gradio['mcp_servers'] = gr.Textbox(value=shared.settings.get('mcp_servers', ''), lines=3, max_lines=3, label='', info='One url per line. For headers, write url,Header: value,Header2: value2', elem_classes=['add_scrollbar'])
+
gr.HTML("")
with gr.Row():
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 9f83830a..104cfdb2 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -48,3 +48,4 @@ https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
+mcp==1.27.0
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index b4b8386e..49db44db 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -39,3 +39,4 @@ tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 41ee6a60..4584708f 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -38,3 +38,4 @@ tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+mcp==1.27.0
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 8be2f55e..4376a2b4 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -38,3 +38,4 @@ tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+mcp==1.27.0
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index d7f1bf13..2999d4a9 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -41,3 +41,4 @@ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+mcp==1.27.0
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 7b331f96..5a1e504e 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -35,3 +35,4 @@ https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_cl
flask_cloudflared==0.0.15
sse-starlette==1.6.5
tiktoken
+mcp==1.27.0
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index b467cf26..fb51c7cc 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -25,3 +25,4 @@ tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index 4eca16e1..dbea7597 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -25,3 +25,4 @@ tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index 55f8d3f8..d0f83a74 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -24,3 +24,4 @@ tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+mcp==1.27.0
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 54e8f350..160c0646 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -24,3 +24,4 @@ tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+mcp==1.27.0
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index f073a614..21695585 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -25,3 +25,4 @@ tiktoken
# llama.cpp (CPU only)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+mcp==1.27.0
diff --git a/requirements/portable/requirements_cuda131.txt b/requirements/portable/requirements_cuda131.txt
index 8cd40f39..6b09a46b 100644
--- a/requirements/portable/requirements_cuda131.txt
+++ b/requirements/portable/requirements_cuda131.txt
@@ -25,3 +25,4 @@ tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
diff --git a/requirements/portable/requirements_ik.txt b/requirements/portable/requirements_ik.txt
index fbb9125d..ca5ece2d 100644
--- a/requirements/portable/requirements_ik.txt
+++ b/requirements/portable/requirements_ik.txt
@@ -25,3 +25,4 @@ tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
diff --git a/requirements/portable/requirements_ik_cpu_only.txt b/requirements/portable/requirements_ik_cpu_only.txt
index 59fcfae1..f8bafb27 100644
--- a/requirements/portable/requirements_ik_cpu_only.txt
+++ b/requirements/portable/requirements_ik_cpu_only.txt
@@ -25,3 +25,4 @@ tiktoken
# ik_llama.cpp (CPU only)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+mcp==1.27.0
diff --git a/requirements/portable/requirements_ik_cuda131.txt b/requirements/portable/requirements_ik_cuda131.txt
index ffdbe568..7825b959 100644
--- a/requirements/portable/requirements_ik_cuda131.txt
+++ b/requirements/portable/requirements_ik_cuda131.txt
@@ -25,3 +25,4 @@ tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index 4a47b1f0..cde036d9 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -21,3 +21,4 @@ https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_cl
flask_cloudflared==0.0.15
sse-starlette==1.6.5
tiktoken
+mcp==1.27.0
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 97abd933..32f9e593 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -25,3 +25,4 @@ tiktoken
# Vulkan wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
From 05e484203308adb3324f7a9edd1412ed9762e359 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 5 Apr 2026 20:03:06 -0700
Subject: [PATCH 68/76] Fix image generation: default to SDPA attention backend
---
modules/image_models.py | 2 +-
modules/shared.py | 2 +-
2 files changed, 2 insertions(+), 2 deletions(-)
diff --git a/modules/image_models.py b/modules/image_models.py
index eed8783c..e244c3c8 100644
--- a/modules/image_models.py
+++ b/modules/image_models.py
@@ -129,7 +129,7 @@ def load_image_model(model_name, dtype='bfloat16', attn_backend='sdpa', cpu_offl
modules = ["transformer", "unet"]
- # Set attention backend
+ # Set attention backend (diffusers defaults to native/SDPA)
if attn_backend == 'flash_attention_2':
for name in modules:
mod = getattr(pipe, name, None)
diff --git a/modules/shared.py b/modules/shared.py
index 92c4f56c..e04f28f3 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -364,7 +364,7 @@ settings = {
'image_llm_variations_prompt': 'Write a variation of the image generation prompt above. Consider the intent of the user with that prompt and write something that will likely please them, with added details. Output only the new prompt. Do not add any explanations, prefixes, or additional text.',
'image_model_menu': 'None',
'image_dtype': 'bfloat16',
- 'image_attn_backend': 'flash_attention_2',
+ 'image_attn_backend': 'sdpa',
'image_cpu_offload': False,
'image_compile': False,
'image_quant': 'none',
From 7b2f15e34ae57a6e86b0901482b4ed9b6b52ad8a Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 5 Apr 2026 21:16:32 -0700
Subject: [PATCH 69/76] Minor change after
b1d06dcf96e2b5958ae004b8c9bbb0fc8518328b
---
requirements/full/requirements.txt | 2 +-
requirements/full/requirements_amd.txt | 2 +-
requirements/full/requirements_apple_intel.txt | 2 +-
requirements/full/requirements_apple_silicon.txt | 2 +-
requirements/full/requirements_cpu_only.txt | 2 +-
requirements/full/requirements_nowheels.txt | 2 +-
requirements/portable/requirements.txt | 2 +-
requirements/portable/requirements_amd.txt | 2 +-
requirements/portable/requirements_apple_intel.txt | 2 +-
requirements/portable/requirements_apple_silicon.txt | 2 +-
requirements/portable/requirements_cpu_only.txt | 2 +-
requirements/portable/requirements_cuda131.txt | 2 +-
requirements/portable/requirements_ik.txt | 2 +-
requirements/portable/requirements_ik_cpu_only.txt | 2 +-
requirements/portable/requirements_ik_cuda131.txt | 2 +-
requirements/portable/requirements_nowheels.txt | 2 +-
requirements/portable/requirements_vulkan.txt | 2 +-
17 files changed, 17 insertions(+), 17 deletions(-)
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 104cfdb2..d466e7e3 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -9,6 +9,7 @@ flash-linear-attention==0.4.*
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -48,4 +49,3 @@ https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
-mcp==1.27.0
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index 49db44db..e88ff7c5 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -7,6 +7,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -39,4 +40,3 @@ tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-mcp==1.27.0
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 4584708f..eefd979e 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -7,6 +7,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -38,4 +39,3 @@ tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
-mcp==1.27.0
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 4376a2b4..d1b4e09f 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -7,6 +7,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -38,4 +39,3 @@ tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
-mcp==1.27.0
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 2999d4a9..156ceb77 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -7,6 +7,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -41,4 +42,3 @@ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
-mcp==1.27.0
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 5a1e504e..19ac5183 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -7,6 +7,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -35,4 +36,3 @@ https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_cl
flask_cloudflared==0.0.15
sse-starlette==1.6.5
tiktoken
-mcp==1.27.0
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index fb51c7cc..8a158f05 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -3,6 +3,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -25,4 +26,3 @@ tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-mcp==1.27.0
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index dbea7597..a4949a46 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -3,6 +3,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -25,4 +26,3 @@ tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-mcp==1.27.0
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index d0f83a74..227823a6 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -3,6 +3,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -24,4 +25,3 @@ tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
-mcp==1.27.0
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 160c0646..9779dd4a 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -3,6 +3,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -24,4 +25,3 @@ tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
-mcp==1.27.0
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 21695585..ff84907a 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -3,6 +3,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -25,4 +26,3 @@ tiktoken
# llama.cpp (CPU only)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
-mcp==1.27.0
diff --git a/requirements/portable/requirements_cuda131.txt b/requirements/portable/requirements_cuda131.txt
index 6b09a46b..89e43e1a 100644
--- a/requirements/portable/requirements_cuda131.txt
+++ b/requirements/portable/requirements_cuda131.txt
@@ -3,6 +3,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -25,4 +26,3 @@ tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-mcp==1.27.0
diff --git a/requirements/portable/requirements_ik.txt b/requirements/portable/requirements_ik.txt
index ca5ece2d..a23d8ff0 100644
--- a/requirements/portable/requirements_ik.txt
+++ b/requirements/portable/requirements_ik.txt
@@ -3,6 +3,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -25,4 +26,3 @@ tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-mcp==1.27.0
diff --git a/requirements/portable/requirements_ik_cpu_only.txt b/requirements/portable/requirements_ik_cpu_only.txt
index f8bafb27..a200e80f 100644
--- a/requirements/portable/requirements_ik_cpu_only.txt
+++ b/requirements/portable/requirements_ik_cpu_only.txt
@@ -3,6 +3,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -25,4 +26,3 @@ tiktoken
# ik_llama.cpp (CPU only)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
-mcp==1.27.0
diff --git a/requirements/portable/requirements_ik_cuda131.txt b/requirements/portable/requirements_ik_cuda131.txt
index 7825b959..8e9a097b 100644
--- a/requirements/portable/requirements_ik_cuda131.txt
+++ b/requirements/portable/requirements_ik_cuda131.txt
@@ -3,6 +3,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -25,4 +26,3 @@ tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-mcp==1.27.0
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index cde036d9..cafe3cee 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -3,6 +3,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -21,4 +22,3 @@ https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_cl
flask_cloudflared==0.0.15
sse-starlette==1.6.5
tiktoken
-mcp==1.27.0
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 32f9e593..59524668 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -3,6 +3,7 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
+mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -25,4 +26,3 @@ tiktoken
# Vulkan wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-mcp==1.27.0
From 4d6230a944a71dab794d880d7c353eb37934d584 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 6 Apr 2026 06:48:48 -0700
Subject: [PATCH 70/76] Follow-up to d78fc46114a4ce1de505fc286798372ddaa0c32d
---
modules/api/script.py | 28 +++-------------------------
1 file changed, 3 insertions(+), 25 deletions(-)
diff --git a/modules/api/script.py b/modules/api/script.py
index 14e2d03a..e79a1967 100644
--- a/modules/api/script.py
+++ b/modules/api/script.py
@@ -532,8 +532,8 @@ async def handle_unload_loras():
def find_available_port(starting_port):
"""Try the starting port, then find an available one if it's taken."""
try:
- # Try to create a socket with the starting port
with socket.socket(socket.AF_INET, socket.SOCK_STREAM) as s:
+ s.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
s.bind(('', starting_port))
return starting_port
except OSError:
@@ -591,31 +591,9 @@ def run_server():
if shared.args.admin_key and shared.args.admin_key != shared.args.api_key:
logger.info(f'OpenAI API admin key (for loading/unloading models):\n\n{shared.args.admin_key}\n')
- # Use SO_REUSEADDR to avoid "address already in use" after restart
+ # Start server
logging.getLogger("uvicorn.error").propagate = False
- sockets = []
- try:
- for addr in server_addrs:
- family = socket.AF_INET6 if ':' in addr else socket.AF_INET
- sock = socket.socket(family, socket.SOCK_STREAM)
- sock.setsockopt(socket.SOL_SOCKET, socket.SO_REUSEADDR, 1)
- if family == socket.AF_INET6:
- sock.setsockopt(socket.IPPROTO_IPV6, socket.IPV6_V6ONLY, 1)
- sock.bind((addr.strip('[]'), port))
- sock.listen(socket.SOMAXCONN)
- sockets.append(sock)
- except Exception:
- for s in sockets:
- s.close()
- raise
-
- config = uvicorn.Config(app, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, access_log=False)
- server = uvicorn.Server(config)
- try:
- server.run(sockets=sockets)
- finally:
- for s in sockets:
- s.close()
+ uvicorn.run(app, host=server_addrs, port=port, ssl_certfile=ssl_certfile, ssl_keyfile=ssl_keyfile, access_log=False)
_server_started = False
From c26ffdd24c60b1dc6ad339c847b8993f490dc036 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 6 Apr 2026 07:02:53 -0700
Subject: [PATCH 71/76] API: add instruction_template support to the model load
endpoint
---
docs/12 - OpenAI API.md | 11 +++++++++++
modules/api/models.py | 10 +++++++++-
modules/api/script.py | 5 +++++
modules/api/typing.py | 2 ++
modules/models_settings.py | 17 +++++++++++------
5 files changed, 38 insertions(+), 7 deletions(-)
diff --git a/docs/12 - OpenAI API.md b/docs/12 - OpenAI API.md
index 0a076c35..727f6ece 100644
--- a/docs/12 - OpenAI API.md
+++ b/docs/12 - OpenAI API.md
@@ -232,6 +232,17 @@ curl -k http://127.0.0.1:5000/v1/internal/model/load \
}'
```
+You can also set a default instruction template for all subsequent API requests by passing `instruction_template` (a template name from `user_data/instruction-templates/`) or `instruction_template_str` (a raw Jinja2 string):
+
+```shell
+curl -k http://127.0.0.1:5000/v1/internal/model/load \
+ -H "Content-Type: application/json" \
+ -d '{
+ "model_name": "Qwen_Qwen3-0.6B-Q4_K_M.gguf",
+ "instruction_template": "Alpaca"
+ }'
+```
+
#### Python chat example
```python
diff --git a/modules/api/models.py b/modules/api/models.py
index 5dd77850..bfcd2c31 100644
--- a/modules/api/models.py
+++ b/modules/api/models.py
@@ -1,7 +1,8 @@
from modules import loaders, shared
+from modules.logging_colors import logger
from modules.LoRA import add_lora_to_model
from modules.models import load_model, unload_model
-from modules.models_settings import get_model_metadata, update_model_parameters
+from modules.models_settings import get_model_metadata, load_instruction_template, update_model_parameters
from modules.utils import get_available_loras, get_available_models
@@ -69,6 +70,13 @@ def _load_model(data):
shared.model, shared.tokenizer = load_model(model_name)
+ if data.get("instruction_template_str") is not None:
+ shared.settings['instruction_template_str'] = data["instruction_template_str"]
+ logger.info("INSTRUCTION TEMPLATE: set to custom Jinja2 string")
+ elif data.get("instruction_template") is not None:
+ shared.settings['instruction_template_str'] = load_instruction_template(data["instruction_template"])
+ logger.info(f"INSTRUCTION TEMPLATE: {data['instruction_template']}")
+
def list_loras():
return {'lora_names': get_available_loras()[1:]}
diff --git a/modules/api/script.py b/modules/api/script.py
index e79a1967..1f41d0cd 100644
--- a/modules/api/script.py
+++ b/modules/api/script.py
@@ -487,6 +487,11 @@ async def handle_load_model(request_data: LoadModelRequest):
Loader args are reset to their startup defaults between loads, so
settings from a previous load do not leak into the next one.
+
+ The "instruction_template" parameter sets the default instruction
+ template by name (from user_data/instruction-templates/). The
+ "instruction_template_str" parameter sets it as a raw Jinja2 string
+ and takes precedence over "instruction_template".
'''
try:
diff --git a/modules/api/typing.py b/modules/api/typing.py
index a758743e..56d7f2bc 100644
--- a/modules/api/typing.py
+++ b/modules/api/typing.py
@@ -271,6 +271,8 @@ class ModelListResponse(BaseModel):
class LoadModelRequest(BaseModel):
model_name: str
args: dict | None = None
+ instruction_template: str | None = Field(default=None, description="An instruction template defined under text-generation-webui/user_data/instruction-templates. Sets the default template for all subsequent API requests.")
+ instruction_template_str: str | None = Field(default=None, description="A Jinja2 instruction template string. If set, takes precedence over instruction_template.")
class LoraListResponse(BaseModel):
diff --git a/modules/models_settings.py b/modules/models_settings.py
index eafa0581..b10d780c 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -400,14 +400,19 @@ def load_instruction_template(template):
if template == 'None':
return ''
- for filepath in [shared.user_data_dir / 'instruction-templates' / f'{template}.yaml', shared.user_data_dir / 'instruction-templates' / 'Alpaca.yaml']:
- if filepath.exists():
- break
+ for name in (template, 'Alpaca'):
+ path = shared.user_data_dir / 'instruction-templates' / f'{name}.yaml'
+ try:
+ with open(path, 'r', encoding='utf-8') as f:
+ file_contents = f.read()
+ except FileNotFoundError:
+ if name == template:
+ logger.warning(f"Instruction template '{template}' not found, falling back to Alpaca")
+ continue
+
+ break
else:
return ''
-
- with open(filepath, 'r', encoding='utf-8') as f:
- file_contents = f.read()
data = yaml.safe_load(file_contents)
if 'instruction_template' in data:
return data['instruction_template']
From 193424cc9359859b5b97bf5b229409a3fb727274 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 6 Apr 2026 10:07:52 -0700
Subject: [PATCH 72/76] API: Fix IPv6 address formatting
---
modules/api/script.py | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/modules/api/script.py b/modules/api/script.py
index 1f41d0cd..ceeca2dc 100644
--- a/modules/api/script.py
+++ b/modules/api/script.py
@@ -563,7 +563,7 @@ def run_server():
server_addrs.append(shared.args.listen_host)
else:
if os.environ.get('OPENEDAI_ENABLE_IPV6', shared.args.api_enable_ipv6):
- server_addrs.append('[::]' if shared.args.listen else '[::1]')
+ server_addrs.append('::' if shared.args.listen else '::1')
if not os.environ.get('OPENEDAI_DISABLE_IPV4', shared.args.api_disable_ipv4):
server_addrs.append('0.0.0.0' if shared.args.listen else '127.0.0.1')
@@ -580,7 +580,7 @@ def run_server():
)
else:
url_proto = 'https://' if (ssl_certfile and ssl_keyfile) else 'http://'
- urls = [f'{url_proto}{addr}:{port}/v1' for addr in server_addrs]
+ urls = [f'{url_proto}[{addr}]:{port}/v1' if ':' in addr else f'{url_proto}{addr}:{port}/v1' for addr in server_addrs]
if len(urls) > 1:
logger.info('OpenAI/Anthropic-compatible API URLs:\n\n' + '\n'.join(urls) + '\n')
else:
From cb511928e2be4b7ee234582ecba96801fccf94fe Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 6 Apr 2026 12:06:28 -0700
Subject: [PATCH 73/76] Fix GPT-OSS tag leak during streaming between thinking
and tool calls
---
modules/reasoning.py | 13 ++++++++++---
modules/tool_parsing.py | 12 +++++++++---
2 files changed, 19 insertions(+), 6 deletions(-)
diff --git a/modules/reasoning.py b/modules/reasoning.py
index 4a7cfa79..2b260818 100644
--- a/modules/reasoning.py
+++ b/modules/reasoning.py
@@ -73,9 +73,16 @@ def extract_reasoning(text, html_escaped=False):
if content_pos != -1:
content_start = content_pos + len(content_esc)
else:
- # Content tag not present — fall back to content after
- # end_tag (e.g. GPT-OSS tool calls skip the final channel).
- content_start = end_pos + len(end_esc)
+ # Content tag not present yet. In GPT-OSS the region
+ # between <|end|> and the content tag contains internal
+ # markup (<|start|>assistant…) that must not be shown.
+ # Suppress it to prevent tag leaks during streaming.
+ remainder = text[end_pos + len(end_esc):].lstrip()
+ framing_token = esc('<|start|>')
+ if not remainder or remainder.startswith(framing_token) or framing_token.startswith(remainder):
+ content_start = len(text)
+ else:
+ content_start = end_pos + len(end_esc)
else:
content_start = end_pos + len(end_esc)
diff --git a/modules/tool_parsing.py b/modules/tool_parsing.py
index 7fcf58b7..aa3e0e95 100644
--- a/modules/tool_parsing.py
+++ b/modules/tool_parsing.py
@@ -638,9 +638,15 @@ def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = Fa
# Strip thinking blocks so tool-call syntax inside
is ignored.
original_answer = answer
_, answer = extract_reasoning(answer)
- # Offset between original and stripped text, used to map start_pos
- # back to the original string when returning a prefix.
- reasoning_offset = len(original_answer) - len(answer)
+ # Reasoning extraction returns empty content when GPT-OSS internal
+ # markup (<|start|>assistant…) follows the thinking block without a
+ # content tag. Fall back to the full text so tool-call markers can
+ # be found.
+ if not answer.strip():
+ answer = original_answer
+ reasoning_offset = 0
+ else:
+ reasoning_offset = len(original_answer) - len(answer)
matches = []
start_pos = None
From 775c913de20824d187f677e65845fe8680ecd7f6 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 6 Apr 2026 14:13:01 -0700
Subject: [PATCH 74/76] Fix crash when truncating prompts with tool call
messages
---
modules/chat.py | 21 ++++++++++++++++-----
1 file changed, 16 insertions(+), 5 deletions(-)
diff --git a/modules/chat.py b/modules/chat.py
index aeed688d..7e9cce60 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -568,13 +568,24 @@ def generate_chat_prompt(user_input, state, **kwargs):
encoded_length = get_encoded_length(prompt)
while len(messages) > 0 and encoded_length > max_length:
- # Remove old message, save system message
if len(messages) > 2 and messages[0]['role'] == 'system':
- messages.pop(1)
-
- # Remove old message when no system message is present
+ pop_idx = 1
elif len(messages) > 1 and messages[0]['role'] != 'system':
- messages.pop(0)
+ pop_idx = 0
+ else:
+ pop_idx = None
+
+ if pop_idx is not None:
+ messages.pop(pop_idx)
+
+ # Remove orphaned tool-call/tool-result messages that
+ # would be invalid without their partner.
+ while pop_idx < len(messages):
+ msg = messages[pop_idx]
+ if msg.get('role') == 'tool' or (msg.get('role') == 'assistant' and msg.get('tool_calls')):
+ messages.pop(pop_idx)
+ else:
+ break
# Resort to truncating the user input
else:
From 778e1c4d52cc6f86cd55207543563773b12cd2cf Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 6 Apr 2026 17:04:49 -0700
Subject: [PATCH 75/76] Update llama.cpp/ik_llama.cpp
---
requirements/full/requirements.txt | 8 ++++----
requirements/full/requirements_amd.txt | 4 ++--
requirements/full/requirements_apple_intel.txt | 2 +-
requirements/full/requirements_apple_silicon.txt | 2 +-
requirements/full/requirements_cpu_only.txt | 8 ++++----
requirements/portable/requirements.txt | 4 ++--
requirements/portable/requirements_amd.txt | 4 ++--
requirements/portable/requirements_apple_intel.txt | 2 +-
requirements/portable/requirements_apple_silicon.txt | 2 +-
requirements/portable/requirements_cpu_only.txt | 4 ++--
requirements/portable/requirements_cuda131.txt | 4 ++--
requirements/portable/requirements_ik.txt | 4 ++--
requirements/portable/requirements_ik_cpu_only.txt | 4 ++--
requirements/portable/requirements_ik_cuda131.txt | 4 ++--
requirements/portable/requirements_vulkan.txt | 4 ++--
15 files changed, 30 insertions(+), 30 deletions(-)
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index d466e7e3..ed5841b8 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -41,10 +41,10 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index e88ff7c5..fe6ce28c 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -38,5 +38,5 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index eefd979e..09c01a61 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -38,4 +38,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index d1b4e09f..42210407 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -38,4 +38,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 156ceb77..5cd7ae7d 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -38,7 +38,7 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 8a158f05..807ff079 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -24,5 +24,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index a4949a46..55fe79ea 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -24,5 +24,5 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index 227823a6..6d4a63f7 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -24,4 +24,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 9779dd4a..aebb7c5b 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -24,4 +24,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index ff84907a..d7e2b051 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -24,5 +24,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cuda131.txt b/requirements/portable/requirements_cuda131.txt
index 89e43e1a..42a9a16f 100644
--- a/requirements/portable/requirements_cuda131.txt
+++ b/requirements/portable/requirements_cuda131.txt
@@ -24,5 +24,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_ik.txt b/requirements/portable/requirements_ik.txt
index a23d8ff0..c3fdb5e8 100644
--- a/requirements/portable/requirements_ik.txt
+++ b/requirements/portable/requirements_ik.txt
@@ -24,5 +24,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_ik_cpu_only.txt b/requirements/portable/requirements_ik_cpu_only.txt
index a200e80f..ea3ba601 100644
--- a/requirements/portable/requirements_ik_cpu_only.txt
+++ b/requirements/portable/requirements_ik_cpu_only.txt
@@ -24,5 +24,5 @@ sse-starlette==1.6.5
tiktoken
# ik_llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_ik_cuda131.txt b/requirements/portable/requirements_ik_cuda131.txt
index 8e9a097b..7530375d 100644
--- a/requirements/portable/requirements_ik_cuda131.txt
+++ b/requirements/portable/requirements_ik_cuda131.txt
@@ -24,5 +24,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 59524668..3b8b0573 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -24,5 +24,5 @@ sse-starlette==1.6.5
tiktoken
# Vulkan wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
From e18f32cba78d471dd86a924147aa3ea6638d5e97 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Mon, 6 Apr 2026 17:47:50 -0700
Subject: [PATCH 76/76] Remove hardcoded trust_remote_code=True in embedding
loader
---
modules/api/embeddings.py | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/modules/api/embeddings.py b/modules/api/embeddings.py
index 16cf0482..17e595fb 100644
--- a/modules/api/embeddings.py
+++ b/modules/api/embeddings.py
@@ -6,6 +6,7 @@ from transformers import AutoModel
from .errors import ServiceUnavailableError
from .utils import debug_msg, float_list_to_base64
from modules.logging_colors import logger
+from modules import shared
embeddings_params_initialized = False
@@ -41,7 +42,7 @@ def load_embedding_model(model: str):
try:
logger.info(f"Try embedding model: {model} on {embeddings_device}")
if 'jina-embeddings' in model:
- embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=True) # trust_remote_code is needed to use the encode method
+ embeddings_model = AutoModel.from_pretrained(model, trust_remote_code=shared.args.trust_remote_code)
embeddings_model = embeddings_model.to(embeddings_device)
else:
embeddings_model = SentenceTransformer(model, device=embeddings_device)