is ignored.
original_answer = answer
_, answer = extract_reasoning(answer)
- # Reasoning extraction returns empty content when GPT-OSS internal
- # markup (<|start|>assistant…) follows the thinking block without a
- # content tag. Fall back to the full text so tool-call markers can
- # be found.
- if not answer.strip():
- answer = original_answer
- reasoning_offset = 0
- else:
- reasoning_offset = len(original_answer) - len(answer)
+ # Offset between original and stripped text, used to map start_pos
+ # back to the original string when returning a prefix.
+ reasoning_offset = len(original_answer) - len(answer)
matches = []
start_pos = None
@@ -712,8 +620,6 @@ def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = Fa
if not isinstance(candidates, list):
candidates = [candidates]
for candidate_dict in candidates:
- if not isinstance(candidate_dict, dict):
- continue
checked_candidate = check_and_sanitize_tool_call_candidate(candidate_dict, tool_names)
if checked_candidate is not None:
matches.append(checked_candidate)
diff --git a/modules/tool_use.py b/modules/tool_use.py
index f9ddf940..e22b1798 100644
--- a/modules/tool_use.py
+++ b/modules/tool_use.py
@@ -1,4 +1,3 @@
-import asyncio
import importlib.util
import json
@@ -56,119 +55,6 @@ def load_tools(selected_names):
return tool_defs, executors
-def _parse_mcp_servers(servers_str):
- """Parse MCP servers textbox: one server per line, format 'url' or 'url,Header: value,Header2: value2'."""
- servers = []
- for line in servers_str.strip().splitlines():
- line = line.strip()
- if not line:
- continue
- parts = line.split(',')
- url = parts[0].strip()
- headers = {}
- for part in parts[1:]:
- part = part.strip()
- if ':' in part:
- key, val = part.split(':', 1)
- headers[key.strip()] = val.strip()
- servers.append((url, headers))
- return servers
-
-
-def _mcp_tool_to_openai(tool):
- """Convert an MCP Tool object to OpenAI-format tool dict."""
- return {
- "type": "function",
- "function": {
- "name": tool.name,
- "description": tool.description or "",
- "parameters": tool.inputSchema or {"type": "object", "properties": {}}
- }
- }
-
-
-async def _mcp_session(url, headers, callback):
- """Open an MCP session and pass it to the callback."""
- from mcp.client.streamable_http import streamablehttp_client
- from mcp import ClientSession
-
- async with streamablehttp_client(url, headers=headers or None) as (read_stream, write_stream, _):
- async with ClientSession(read_stream, write_stream) as session:
- await session.initialize()
- return await callback(session)
-
-
-def _make_mcp_executor(name, url, headers):
- def executor(arguments):
- return asyncio.run(_call_mcp_tool(name, arguments, url, headers))
- return executor
-
-
-async def _connect_mcp_server(url, headers):
- """Connect to one MCP server and return (tool_defs, executors)."""
-
- async def _discover(session):
- result = await session.list_tools()
- tool_defs = []
- executors = {}
- for tool in result.tools:
- tool_defs.append(_mcp_tool_to_openai(tool))
- executors[tool.name] = _make_mcp_executor(tool.name, url, headers)
- return tool_defs, executors
-
- return await _mcp_session(url, headers, _discover)
-
-
-async def _call_mcp_tool(name, arguments, url, headers):
- """Connect to an MCP server and call a single tool."""
-
- async def _invoke(session):
- result = await session.call_tool(name, arguments)
- parts = []
- for content in result.content:
- if hasattr(content, 'text'):
- parts.append(content.text)
- else:
- parts.append(str(content))
- return '\n'.join(parts) if parts else ''
-
- return await _mcp_session(url, headers, _invoke)
-
-
-async def _connect_all_mcp_servers(servers):
- """Connect to all MCP servers concurrently."""
- results = await asyncio.gather(
- *(_connect_mcp_server(url, headers) for url, headers in servers),
- return_exceptions=True
- )
- all_defs = []
- all_executors = {}
- for (url, _), result in zip(servers, results):
- if isinstance(result, Exception):
- logger.exception(f'Failed to connect to MCP server "{url}"', exc_info=result)
- continue
- defs, execs = result
- for td, (fn, ex) in zip(defs, execs.items()):
- if fn in all_executors:
- logger.warning(f'MCP tool "{fn}" from {url} conflicts with an already loaded tool. Skipping.')
- continue
- all_defs.append(td)
- all_executors[fn] = ex
- return all_defs, all_executors
-
-
-def load_mcp_tools(servers_str):
- """
- Parse MCP servers string and discover tools from each server.
- Returns (tool_defs, executors) in the same format as load_tools.
- """
- servers = _parse_mcp_servers(servers_str)
- if not servers:
- return [], {}
-
- return asyncio.run(_connect_all_mcp_servers(servers))
-
-
def execute_tool(func_name, arguments, executors):
"""Execute a tool by function name. Returns result as a JSON string."""
fn = executors.get(func_name)
diff --git a/modules/training.py b/modules/training.py
index bca4f02e..145353c6 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -52,7 +52,7 @@ def create_ui():
with gr.Column():
always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
- with gr.Accordion(label='Target Modules', open=False):
+ with gr.Accordion(label='Target Modules', open=False, elem_classes='tgw-accordion'):
gr.Markdown("Selects which modules to target in training. Targeting more modules is closer to a full fine-tune at the cost of increased VRAM and adapter size.")
all_linear = gr.Checkbox(label='Target all linear layers', value=True, info='Targets every nn.Linear layer except lm_head. Works for any model architecture. When checked, the individual module checkboxes below are ignored.', elem_classes=['no-background'])
with gr.Row():
@@ -87,7 +87,7 @@ def create_ui():
with gr.Row():
lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='cosine', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.', elem_classes=['slim-dropdown'])
- with gr.Accordion(label='Advanced Options', open=False):
+ with gr.Accordion(label='Advanced Options', open=False, elem_classes='tgw-accordion'):
with gr.Row():
with gr.Column():
optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Optimizer algorithm. adamw_torch is the standard choice. adamw_bnb_8bit uses less VRAM. adafactor is memory-efficient for large models.', elem_classes=['slim-dropdown'])
diff --git a/modules/transformers_loader.py b/modules/transformers_loader.py
index 5964f012..7f521b8c 100644
--- a/modules/transformers_loader.py
+++ b/modules/transformers_loader.py
@@ -109,6 +109,7 @@ def load_model_HF(model_name):
params = {
'low_cpu_mem_usage': True,
'attn_implementation': shared.args.attn_implementation,
+ 'torch_dtype': torch.bfloat16 if shared.args.bf16 else torch.float16,
}
if shared.original_args.trust_remote_code:
@@ -119,17 +120,6 @@ def load_model_HF(model_name):
config = AutoConfig.from_pretrained(path_to_model, trust_remote_code=shared.original_args.trust_remote_code)
- # Determine torch_dtype: respect --bf16 flag, otherwise autodetect
- # from model config, but never allow float32.
- if shared.args.bf16:
- params['torch_dtype'] = torch.bfloat16
- else:
- dtype = getattr(config, 'torch_dtype', None) or getattr(getattr(config, 'text_config', None), 'torch_dtype', None)
- if dtype in (torch.float16, torch.bfloat16):
- params['torch_dtype'] = dtype
- else:
- params['torch_dtype'] = torch.float16
-
if 'chatglm' in model_name.lower():
LoaderClass = AutoModel
else:
diff --git a/modules/ui.py b/modules/ui.py
index 3a8390f7..02b5a9fb 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -75,7 +75,7 @@ if not shared.args.old_colors:
background_fill_primary_dark='var(--darker-gray, #1C1C1D)',
body_background_fill="white",
block_background_fill="transparent",
- body_text_color='#1a1a1a',
+ body_text_color='rgb(64, 64, 64)',
button_secondary_background_fill="white",
button_secondary_border_color="var(--border-color-primary)",
block_title_text_color='*body_text_color',
@@ -209,7 +209,6 @@ def list_interface_input_elements():
'textbox',
'start_with',
'selected_tools',
- 'mcp_servers',
'mode',
'chat_style',
'chat-instruct_command',
@@ -435,7 +434,6 @@ def setup_auto_save():
'custom_system_message',
'chat_template_str',
'selected_tools',
- 'mcp_servers',
# Parameters tab (ui_parameters.py) - Generation parameters
'preset_menu',
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 14489d96..f1dc7883 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -52,7 +52,7 @@ def create_ui():
shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': [], 'metadata': {}}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)
with gr.Row(elem_id="chat-input-row"):
with gr.Column(scale=1, elem_id='gr-hover-container'):
- gr.HTML(value='
', elem_id='gr-hover')
+ gr.HTML(value='☰', elem_id='gr-hover')
with gr.Column(scale=10, elem_id='chat-input-container'):
shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf', 'image'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
@@ -82,7 +82,7 @@ def create_ui():
gr.HTML("")
shared.gradio['reasoning_effort'] = gr.Dropdown(value=shared.settings['reasoning_effort'], choices=['low', 'medium', 'high'], label='Reasoning effort', info='Used by GPT-OSS.')
- shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='For models with thinking support.')
+ shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='Used by Seed-OSS and pre-2507 Qwen3.')
gr.HTML("")
@@ -105,9 +105,6 @@ def create_ui():
shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False)
- with gr.Accordion('MCP servers', open=False):
- shared.gradio['mcp_servers'] = gr.Textbox(value=shared.settings.get('mcp_servers', ''), lines=3, max_lines=3, label='', info='One url per line. For headers, write url,Header: value,Header2: value2', elem_classes=['add_scrollbar'])
-
gr.HTML("")
with gr.Row():
diff --git a/modules/ui_default.py b/modules/ui_default.py
index 48cb2fc2..2c367cca 100644
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@@ -10,7 +10,7 @@ from modules.text_generation import (
stop_everything_event
)
from modules.ui_notebook import store_notebook_state_and_debounce
-from modules.utils import gradio, sanitize_filename
+from modules.utils import gradio
inputs = ('textbox-default', 'interface_state')
outputs = ('output_textbox', 'html-default')
@@ -167,7 +167,6 @@ def handle_new_prompt():
def handle_delete_prompt_confirm_default(prompt_name):
- prompt_name = sanitize_filename(prompt_name)
available_prompts = utils.get_available_prompts()
current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
@@ -200,8 +199,6 @@ def handle_rename_prompt_click_default(current_name):
def handle_rename_prompt_confirm_default(new_name, current_name):
- new_name = sanitize_filename(new_name)
- current_name = sanitize_filename(current_name)
old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 727aa7b1..1efb2479 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -798,9 +798,6 @@ def generate(state, save_images=True):
if seed == -1:
seed = random.randint(0, 2**32 - 1)
- # Store resolved seed back so callers (e.g. API) can access it
- state['image_seed_resolved'] = seed
-
device = get_device()
if device is None:
device = "cpu"
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 9c8306f5..5b7621a7 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -51,9 +51,7 @@ def create_ui():
with gr.Column():
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
- if not shared.args.portable:
- shared.gradio['ik'] = gr.Checkbox(label="ik", value=shared.args.ik, info='Use ik_llama.cpp instead of upstream llama.cpp.')
-
+ shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
@@ -66,13 +64,13 @@ def create_ui():
)
# Multimodal
- with gr.Accordion("Multimodal (vision)", open=False) as shared.gradio['mmproj_accordion']:
+ with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
with gr.Row():
shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info=f'Select a file that matches your model. Must be placed in {shared.user_data_dir}/mmproj/', interactive=not mu)
ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
# Speculative decoding
- with gr.Accordion("Speculative decoding", open=False) as shared.gradio['speculative_decoding_accordion']:
+ with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Maximum number of tokens to draft for speculative decoding. Recommended: 4 for draft model, 64 for n-gram.')
gr.Markdown('#### Draft model')
@@ -91,7 +89,7 @@ def create_ui():
shared.gradio['spec_ngram_min_hits'] = gr.Number(label="spec-ngram-min-hits", precision=0, step=1, value=shared.args.spec_ngram_min_hits, info='Minimum n-gram hits for ngram-map speculative decoding.', visible=shared.args.spec_type != 'none')
gr.Markdown("## Other options")
- with gr.Accordion("See more options", open=False):
+ with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
with gr.Row():
with gr.Column():
shared.gradio['parallel'] = gr.Slider(label="parallel", minimum=1, step=1, maximum=64, value=shared.args.parallel, info='Number of parallel request slots for the API. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
@@ -108,7 +106,6 @@ def create_ui():
with gr.Column():
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
- shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.')
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py
index 88f00ac5..f550e646 100644
--- a/modules/ui_notebook.py
+++ b/modules/ui_notebook.py
@@ -11,7 +11,7 @@ from modules.text_generation import (
get_token_ids,
stop_everything_event
)
-from modules.utils import gradio, sanitize_filename
+from modules.utils import gradio
_notebook_file_lock = threading.Lock()
_notebook_auto_save_timer = None
@@ -202,7 +202,6 @@ def handle_new_prompt():
def handle_delete_prompt_confirm_notebook(prompt_name):
- prompt_name = sanitize_filename(prompt_name)
available_prompts = utils.get_available_prompts()
current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
@@ -234,8 +233,6 @@ def handle_rename_prompt_click_notebook(current_name):
def handle_rename_prompt_confirm_notebook(new_name, current_name):
- new_name = sanitize_filename(new_name)
- current_name = sanitize_filename(current_name)
old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
@@ -252,7 +249,6 @@ def handle_rename_prompt_confirm_notebook(new_name, current_name):
def autosave_prompt(text, prompt_name):
"""Automatically save the text to the selected prompt file"""
- prompt_name = sanitize_filename(prompt_name)
if prompt_name and text.strip():
prompt_path = shared.user_data_dir / "logs" / "notebook" / f"{prompt_name}.txt"
prompt_path.parent.mkdir(parents=True, exist_ok=True)
diff --git a/modules/utils.py b/modules/utils.py
index c4acf714..b01953ee 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -105,9 +105,6 @@ def resolve_model_path(model_name_or_path, image_model=False):
before the default models directory.
"""
- if model_name_or_path is None:
- raise FileNotFoundError("No model specified.")
-
path_candidate = Path(model_name_or_path)
if path_candidate.exists():
return path_candidate
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index ed5841b8..56619627 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -1,4 +1,4 @@
-accelerate==1.13.*
+accelerate==1.12.*
audioop-lts<1.0; python_version >= "3.13"
bitsandbytes==0.49.*
datasets
@@ -9,7 +9,6 @@ flash-linear-attention==0.4.*
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -26,14 +25,14 @@ sentencepiece
tensorboard
torchao==0.15.*
trafilatura==2.0.0
-transformers==5.5.*
+transformers==5.3.*
triton-windows==3.5.1.post24; platform_system == "Windows"
tqdm
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -41,11 +40,9 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
-https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.26/exllamav3-0.0.26+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.26/exllamav3-0.0.26+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index fe6ce28c..620683cc 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -1,4 +1,4 @@
-accelerate==1.13.*
+accelerate==1.12.*
audioop-lts<1.0; python_version >= "3.13"
datasets
diffusers==0.37.*
@@ -7,7 +7,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -23,14 +22,14 @@ scipy
sentencepiece
tensorboard
torchao==0.15.*
-transformers==5.5.*
+transformers==5.3.*
tqdm
trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -38,5 +37,5 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 09c01a61..b1f109b2 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -1,4 +1,4 @@
-accelerate==1.13.*
+accelerate==1.12.*
audioop-lts<1.0; python_version >= "3.13"
datasets
diffusers==0.37.*
@@ -7,7 +7,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -23,14 +22,14 @@ scipy
sentencepiece
tensorboard
torchao==0.15.*
-transformers==5.5.*
+transformers==5.3.*
tqdm
trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -38,4 +37,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 42210407..a54476a9 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -1,4 +1,4 @@
-accelerate==1.13.*
+accelerate==1.12.*
audioop-lts<1.0; python_version >= "3.13"
datasets
diffusers==0.37.*
@@ -7,7 +7,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -23,14 +22,14 @@ scipy
sentencepiece
tensorboard
torchao==0.15.*
-transformers==5.5.*
+transformers==5.3.*
tqdm
trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -38,4 +37,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 5cd7ae7d..be82c904 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -1,4 +1,4 @@
-accelerate==1.13.*
+accelerate==1.12.*
audioop-lts<1.0; python_version >= "3.13"
datasets
diffusers==0.37.*
@@ -7,7 +7,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -23,14 +22,14 @@ scipy
sentencepiece
tensorboard
torchao==0.15.*
-transformers==5.5.*
+transformers==5.3.*
tqdm
trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -38,7 +37,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 19ac5183..77c254e6 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -1,4 +1,4 @@
-accelerate==1.13.*
+accelerate==1.12.*
audioop-lts<1.0; python_version >= "3.13"
datasets
diffusers==0.37.*
@@ -7,7 +7,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -23,14 +22,14 @@ scipy
sentencepiece
tensorboard
torchao==0.15.*
-transformers==5.5.*
+transformers==5.3.*
tqdm
trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 807ff079..188da380 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index 55fe79ea..4562b6d0 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index 6d4a63f7..04dcf25e 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,4 +23,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index aebb7c5b..4b8af78a 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,4 +23,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index d7e2b051..5b0eaf89 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cuda131.txt b/requirements/portable/requirements_cuda131.txt
index 42a9a16f..90b3234f 100644
--- a/requirements/portable/requirements_cuda131.txt
+++ b/requirements/portable/requirements_cuda131.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_ik.txt b/requirements/portable/requirements_ik.txt
deleted file mode 100644
index c3fdb5e8..00000000
--- a/requirements/portable/requirements_ik.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-audioop-lts<1.0; python_version >= "3.13"
-fastapi==0.112.4
-huggingface-hub==1.5.*
-jinja2==3.1.6
-markdown
-mcp==1.27.0
-numpy==2.2.*
-pydantic==2.11.0
-pymupdf==1.27.*
-python-docx==1.1.2
-pyyaml
-requests
-rich
-trafilatura==2.0.0
-tqdm
-
-# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
-
-# API
-flask_cloudflared==0.0.15
-sse-starlette==1.6.5
-tiktoken
-
-# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_ik_cpu_only.txt b/requirements/portable/requirements_ik_cpu_only.txt
deleted file mode 100644
index ea3ba601..00000000
--- a/requirements/portable/requirements_ik_cpu_only.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-audioop-lts<1.0; python_version >= "3.13"
-fastapi==0.112.4
-huggingface-hub==1.5.*
-jinja2==3.1.6
-markdown
-mcp==1.27.0
-numpy==2.2.*
-pydantic==2.11.0
-pymupdf==1.27.*
-python-docx==1.1.2
-pyyaml
-requests
-rich
-trafilatura==2.0.0
-tqdm
-
-# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
-
-# API
-flask_cloudflared==0.0.15
-sse-starlette==1.6.5
-tiktoken
-
-# ik_llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_ik_cuda131.txt b/requirements/portable/requirements_ik_cuda131.txt
deleted file mode 100644
index 7530375d..00000000
--- a/requirements/portable/requirements_ik_cuda131.txt
+++ /dev/null
@@ -1,28 +0,0 @@
-audioop-lts<1.0; python_version >= "3.13"
-fastapi==0.112.4
-huggingface-hub==1.5.*
-jinja2==3.1.6
-markdown
-mcp==1.27.0
-numpy==2.2.*
-pydantic==2.11.0
-pymupdf==1.27.*
-python-docx==1.1.2
-pyyaml
-requests
-rich
-trafilatura==2.0.0
-tqdm
-
-# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
-
-# API
-flask_cloudflared==0.0.15
-sse-starlette==1.6.5
-tiktoken
-
-# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index cafe3cee..e8457909 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 3b8b0573..ea72b4ec 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio-4.37.2+custom.12-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.12/gradio_client-1.0.2+custom.12-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# Vulkan wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.98.0/llama_cpp_binaries-0.98.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/server.py b/server.py
index 88936ca6..d224909c 100644
--- a/server.py
+++ b/server.py
@@ -18,6 +18,7 @@ import modules.extensions as extensions_module
from modules.LoRA import add_lora_to_model
from modules.models import load_model, unload_model_if_idle
from modules.models_settings import (
+ get_fallback_settings,
get_model_metadata,
update_model_parameters
)
@@ -270,6 +271,10 @@ if __name__ == "__main__":
# Apply CLI overrides for image model settings (CLI flags take precedence over saved settings)
shared.apply_image_model_cli_overrides()
+ # Fallback settings for models
+ shared.model_config['.*'] = get_fallback_settings()
+ shared.model_config.move_to_end('.*', last=False) # Move to the beginning
+
# Activate the extensions listed on settings.yaml
extensions_module.available_extensions = utils.get_available_extensions()
for extension in shared.settings['default_extensions']:
diff --git a/user_data/models/config.yaml b/user_data/models/config.yaml
new file mode 100644
index 00000000..038ebcf1
--- /dev/null
+++ b/user_data/models/config.yaml
@@ -0,0 +1,203 @@
+.*(llama|alpac|vicuna|guanaco|koala|llava|wizardlm|metharme|pygmalion-7b|pygmalion-2|mythalion|wizard-mega|openbuddy|vigogne|h2ogpt-research|manticore):
+ model_type: 'llama'
+.*(opt-|opt_|opt1|opt3|optfor|galactica|galpaca|pygmalion-350m):
+ model_type: 'opt'
+.*(gpt-j|gptj|gpt4all-j|malion-6b|pygway|pygmalion-6b|dolly-v1):
+ model_type: 'gptj'
+.*(gpt-neox|koalpaca-polyglot|polyglot.*koalpaca|polyglot-ko|polyglot_ko|pythia|stablelm|incite|dolly-v2|polycoder|h2ogpt-oig|h2ogpt-oasst1|h2ogpt-gm):
+ model_type: 'gptneox'
+.*bloom:
+ model_type: 'bloom'
+.*gpt2:
+ model_type: 'gpt2'
+.*falcon:
+ model_type: 'falcon'
+.*mpt:
+ model_type: 'mpt'
+.*(starcoder|starchat):
+ model_type: 'starcoder'
+.*dolly-v2:
+ model_type: 'dollyv2'
+.*replit:
+ model_type: 'replit'
+.*(oasst|openassistant-|stablelm-7b-sft-v7-epoch-3):
+ instruction_template: 'Open Assistant'
+ skip_special_tokens: false
+(?!.*galactica)(?!.*reward).*openassistant:
+ instruction_template: 'Open Assistant'
+ skip_special_tokens: false
+.*galactica:
+ skip_special_tokens: false
+.*dolly-v[0-9]-[0-9]*b:
+ instruction_template: 'Alpaca'
+ skip_special_tokens: false
+.*alpaca-native-4bit:
+ instruction_template: 'Alpaca'
+.*llava:
+ instruction_template: 'LLaVA'
+.*llava.*1.5:
+ instruction_template: 'Vicuna-v1.1'
+.*wizard.*mega:
+ instruction_template: 'Wizard-Mega'
+.*starchat-beta:
+ instruction_template: 'Starchat-Beta'
+(?!.*v0)(?!.*1.1)(?!.*1_1)(?!.*stable)(?!.*chinese).*vicuna:
+ instruction_template: 'Vicuna-v0'
+.*vicuna.*v0:
+ instruction_template: 'Vicuna-v0'
+.*vicuna.*(1.1|1_1|1.3|1_3):
+ instruction_template: 'Vicuna-v1.1'
+.*vicuna.*(1.5|1_5):
+ instruction_template: 'Vicuna-v1.1'
+.*stable.*vicuna:
+ instruction_template: 'StableVicuna'
+(?!.*chat).*chinese-vicuna:
+ instruction_template: 'Alpaca'
+.*chinese-vicuna.*chat:
+ instruction_template: 'Chinese-Vicuna-Chat'
+.*alpaca:
+ instruction_template: 'Alpaca'
+.*koala:
+ instruction_template: 'Koala'
+.*chatglm:
+ instruction_template: 'ChatGLM'
+.*(metharme|pygmalion|mythalion):
+ instruction_template: 'Metharme'
+.*raven:
+ instruction_template: 'RWKV-Raven'
+.*moss-moon.*sft:
+ instruction_template: 'MOSS'
+.*stablelm-tuned:
+ instruction_template: 'StableLM'
+.*galactica.*finetuned:
+ instruction_template: 'Galactica Finetuned'
+.*galactica.*-v2:
+ instruction_template: 'Galactica v2'
+(?!.*finetuned)(?!.*-v2).*galactica:
+ instruction_template: 'Galactica'
+.*guanaco:
+ instruction_template: 'Guanaco non-chat'
+.*baize:
+ instruction_template: 'Baize'
+.*mpt-.*instruct:
+ instruction_template: 'Alpaca'
+.*mpt-.*chat:
+ instruction_template: 'ChatML'
+(?!.*-flan-)(?!.*-t5-).*lamini-:
+ instruction_template: 'Alpaca'
+.*incite.*chat:
+ instruction_template: 'INCITE-Chat'
+.*incite.*instruct:
+ instruction_template: 'INCITE-Instruct'
+.*ziya-:
+ instruction_template: 'Ziya'
+.*koalpaca:
+ instruction_template: 'KoAlpaca'
+.*openbuddy:
+ instruction_template: 'OpenBuddy'
+(?!.*chat).*vigogne:
+ instruction_template: 'Vigogne-Instruct'
+.*vigogne.*chat:
+ instruction_template: 'Vigogne-Chat'
+.*(llama-deus|supercot|llama-natural-instructions|open-llama-0.3t-7b-instruct-dolly-hhrlhf|open-llama-0.3t-7b-open-instruct):
+ instruction_template: 'Alpaca'
+.*bactrian:
+ instruction_template: 'Bactrian'
+.*(h2ogpt-oig-|h2ogpt-oasst1-|h2ogpt-research-oasst1-):
+ instruction_template: 'INCITE-Chat'
+.*h2ogpt-gm-:
+ instruction_template: 'H2O-prompt_answer'
+.*manticore:
+ instruction_template: 'Manticore Chat'
+.*bluemoonrp-(30|13)b:
+ instruction_template: 'Bluemoon'
+.*Nous-Hermes-13b:
+ instruction_template: 'Alpaca'
+.*airoboros:
+ instruction_template: 'Vicuna-v1.1'
+.*airoboros.*1.2:
+ instruction_template: 'Airoboros-v1.2'
+.*alpa(cino|sta):
+ instruction_template: 'Alpaca'
+.*hippogriff:
+ instruction_template: 'Hippogriff'
+.*lazarus:
+ instruction_template: 'Alpaca'
+.*guanaco-.*(7|13|33|65)b:
+ instruction_template: 'Vicuna-v0'
+.*hypermantis:
+ instruction_template: 'Alpaca'
+.*open-llama-.*-open-instruct:
+ instruction_template: 'Alpaca'
+.*starcoder-gpteacher-code-instruct:
+ instruction_template: 'Alpaca'
+.*tulu:
+ instruction_template: 'Tulu'
+.*chronos:
+ instruction_template: 'Alpaca'
+.*samantha:
+ instruction_template: 'Samantha'
+.*wizardcoder:
+ instruction_template: 'Alpaca'
+.*minotaur:
+ instruction_template: 'Manticore Chat'
+.*orca_mini:
+ instruction_template: 'Orca Mini'
+.*(platypus|gplatty|superplatty):
+ instruction_template: 'Alpaca'
+.*(openorca-platypus2):
+ instruction_template: 'OpenOrca-Platypus2'
+.*longchat:
+ instruction_template: 'Vicuna-v1.1'
+.*vicuna-33b:
+ instruction_template: 'Vicuna-v1.1'
+.*redmond-hermes-coder:
+ instruction_template: 'Alpaca'
+.*wizardcoder-15b:
+ instruction_template: 'Alpaca'
+.*wizardlm:
+ instruction_template: 'Vicuna-v1.1'
+.*godzilla:
+ instruction_template: 'Alpaca'
+.*llama(-?)(2|v2).*chat:
+ instruction_template: 'Llama-v2'
+.*newhope:
+ instruction_template: 'NewHope'
+.*stablebeluga2:
+ instruction_template: 'StableBeluga2'
+.*openchat:
+ instruction_template: 'OpenChat'
+.*codellama.*instruct:
+ instruction_template: 'Llama-v2'
+.*(mistral|mixtral).*instruct:
+ instruction_template: 'Mistral'
+.*mistral.*openorca:
+ instruction_template: 'ChatML'
+.*(WizardCoder-Python-34B-V1.0|Phind-CodeLlama-34B-v2|CodeBooga-34B-v0.1):
+ instruction_template: 'Alpaca'
+.*orca-2-(13|7)b:
+ instruction_template: 'ChatML'
+.*openhermes.*mistral:
+ instruction_template: 'ChatML'
+.*Yi-34B-Chat:
+ instruction_template: 'ChatML'
+(dolphin).*:
+ instruction_template: 'ChatML'
+.*synthia:
+ instruction_template: 'Synthia'
+.*(hercules|hyperion):
+ instruction_template: 'ChatML'
+.*command-r:
+ instruction_template: 'Command-R'
+.*xwin-lm-70b-v0.1:
+ instruction_template: 'Vicuna-v1.1'
+.*platypus-yi-34b:
+ instruction_template: 'Vicuna-v1.1'
+.*CausalLM-RP-34B:
+ instruction_template: 'ChatML'
+34b-beta:
+ instruction_template: 'ChatML'
+.*airoboros-3_1-yi-34b-200k:
+ instruction_template: 'Llama-v2'
+.*chatqa:
+ instruction_template: 'NVIDIA-ChatQA'