is ignored.
original_answer = answer
_, answer = extract_reasoning(answer)
- # Reasoning extraction returns empty content when GPT-OSS internal
- # markup (<|start|>assistant…) follows the thinking block without a
- # content tag. Fall back to the full text so tool-call markers can
- # be found.
- if not answer.strip():
- answer = original_answer
- reasoning_offset = 0
- else:
- reasoning_offset = len(original_answer) - len(answer)
+ # Offset between original and stripped text, used to map start_pos
+ # back to the original string when returning a prefix.
+ reasoning_offset = len(original_answer) - len(answer)
matches = []
start_pos = None
@@ -712,8 +620,6 @@ def parse_tool_call(answer: str, tool_names: list[str], return_prefix: bool = Fa
if not isinstance(candidates, list):
candidates = [candidates]
for candidate_dict in candidates:
- if not isinstance(candidate_dict, dict):
- continue
checked_candidate = check_and_sanitize_tool_call_candidate(candidate_dict, tool_names)
if checked_candidate is not None:
matches.append(checked_candidate)
diff --git a/modules/tool_use.py b/modules/tool_use.py
index f9ddf940..e22b1798 100644
--- a/modules/tool_use.py
+++ b/modules/tool_use.py
@@ -1,4 +1,3 @@
-import asyncio
import importlib.util
import json
@@ -56,119 +55,6 @@ def load_tools(selected_names):
return tool_defs, executors
-def _parse_mcp_servers(servers_str):
- """Parse MCP servers textbox: one server per line, format 'url' or 'url,Header: value,Header2: value2'."""
- servers = []
- for line in servers_str.strip().splitlines():
- line = line.strip()
- if not line:
- continue
- parts = line.split(',')
- url = parts[0].strip()
- headers = {}
- for part in parts[1:]:
- part = part.strip()
- if ':' in part:
- key, val = part.split(':', 1)
- headers[key.strip()] = val.strip()
- servers.append((url, headers))
- return servers
-
-
-def _mcp_tool_to_openai(tool):
- """Convert an MCP Tool object to OpenAI-format tool dict."""
- return {
- "type": "function",
- "function": {
- "name": tool.name,
- "description": tool.description or "",
- "parameters": tool.inputSchema or {"type": "object", "properties": {}}
- }
- }
-
-
-async def _mcp_session(url, headers, callback):
- """Open an MCP session and pass it to the callback."""
- from mcp.client.streamable_http import streamablehttp_client
- from mcp import ClientSession
-
- async with streamablehttp_client(url, headers=headers or None) as (read_stream, write_stream, _):
- async with ClientSession(read_stream, write_stream) as session:
- await session.initialize()
- return await callback(session)
-
-
-def _make_mcp_executor(name, url, headers):
- def executor(arguments):
- return asyncio.run(_call_mcp_tool(name, arguments, url, headers))
- return executor
-
-
-async def _connect_mcp_server(url, headers):
- """Connect to one MCP server and return (tool_defs, executors)."""
-
- async def _discover(session):
- result = await session.list_tools()
- tool_defs = []
- executors = {}
- for tool in result.tools:
- tool_defs.append(_mcp_tool_to_openai(tool))
- executors[tool.name] = _make_mcp_executor(tool.name, url, headers)
- return tool_defs, executors
-
- return await _mcp_session(url, headers, _discover)
-
-
-async def _call_mcp_tool(name, arguments, url, headers):
- """Connect to an MCP server and call a single tool."""
-
- async def _invoke(session):
- result = await session.call_tool(name, arguments)
- parts = []
- for content in result.content:
- if hasattr(content, 'text'):
- parts.append(content.text)
- else:
- parts.append(str(content))
- return '\n'.join(parts) if parts else ''
-
- return await _mcp_session(url, headers, _invoke)
-
-
-async def _connect_all_mcp_servers(servers):
- """Connect to all MCP servers concurrently."""
- results = await asyncio.gather(
- *(_connect_mcp_server(url, headers) for url, headers in servers),
- return_exceptions=True
- )
- all_defs = []
- all_executors = {}
- for (url, _), result in zip(servers, results):
- if isinstance(result, Exception):
- logger.exception(f'Failed to connect to MCP server "{url}"', exc_info=result)
- continue
- defs, execs = result
- for td, (fn, ex) in zip(defs, execs.items()):
- if fn in all_executors:
- logger.warning(f'MCP tool "{fn}" from {url} conflicts with an already loaded tool. Skipping.')
- continue
- all_defs.append(td)
- all_executors[fn] = ex
- return all_defs, all_executors
-
-
-def load_mcp_tools(servers_str):
- """
- Parse MCP servers string and discover tools from each server.
- Returns (tool_defs, executors) in the same format as load_tools.
- """
- servers = _parse_mcp_servers(servers_str)
- if not servers:
- return [], {}
-
- return asyncio.run(_connect_all_mcp_servers(servers))
-
-
def execute_tool(func_name, arguments, executors):
"""Execute a tool by function name. Returns result as a JSON string."""
fn = executors.get(func_name)
diff --git a/modules/training.py b/modules/training.py
index bca4f02e..145353c6 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -52,7 +52,7 @@ def create_ui():
with gr.Column():
always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
- with gr.Accordion(label='Target Modules', open=False):
+ with gr.Accordion(label='Target Modules', open=False, elem_classes='tgw-accordion'):
gr.Markdown("Selects which modules to target in training. Targeting more modules is closer to a full fine-tune at the cost of increased VRAM and adapter size.")
all_linear = gr.Checkbox(label='Target all linear layers', value=True, info='Targets every nn.Linear layer except lm_head. Works for any model architecture. When checked, the individual module checkboxes below are ignored.', elem_classes=['no-background'])
with gr.Row():
@@ -87,7 +87,7 @@ def create_ui():
with gr.Row():
lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='cosine', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.', elem_classes=['slim-dropdown'])
- with gr.Accordion(label='Advanced Options', open=False):
+ with gr.Accordion(label='Advanced Options', open=False, elem_classes='tgw-accordion'):
with gr.Row():
with gr.Column():
optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Optimizer algorithm. adamw_torch is the standard choice. adamw_bnb_8bit uses less VRAM. adafactor is memory-efficient for large models.', elem_classes=['slim-dropdown'])
diff --git a/modules/ui.py b/modules/ui.py
index 3a8390f7..02b5a9fb 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -75,7 +75,7 @@ if not shared.args.old_colors:
background_fill_primary_dark='var(--darker-gray, #1C1C1D)',
body_background_fill="white",
block_background_fill="transparent",
- body_text_color='#1a1a1a',
+ body_text_color='rgb(64, 64, 64)',
button_secondary_background_fill="white",
button_secondary_border_color="var(--border-color-primary)",
block_title_text_color='*body_text_color',
@@ -209,7 +209,6 @@ def list_interface_input_elements():
'textbox',
'start_with',
'selected_tools',
- 'mcp_servers',
'mode',
'chat_style',
'chat-instruct_command',
@@ -435,7 +434,6 @@ def setup_auto_save():
'custom_system_message',
'chat_template_str',
'selected_tools',
- 'mcp_servers',
# Parameters tab (ui_parameters.py) - Generation parameters
'preset_menu',
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 14489d96..10d05f65 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -52,7 +52,7 @@ def create_ui():
shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': [], 'metadata': {}}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)
with gr.Row(elem_id="chat-input-row"):
with gr.Column(scale=1, elem_id='gr-hover-container'):
- gr.HTML(value='
', elem_id='gr-hover')
+ gr.HTML(value='☰', elem_id='gr-hover')
with gr.Column(scale=10, elem_id='chat-input-container'):
shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf', 'image'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
@@ -105,9 +105,6 @@ def create_ui():
shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False)
- with gr.Accordion('MCP servers', open=False):
- shared.gradio['mcp_servers'] = gr.Textbox(value=shared.settings.get('mcp_servers', ''), lines=3, max_lines=3, label='', info='One url per line. For headers, write url,Header: value,Header2: value2', elem_classes=['add_scrollbar'])
-
gr.HTML("")
with gr.Row():
diff --git a/modules/ui_default.py b/modules/ui_default.py
index 48cb2fc2..2c367cca 100644
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@@ -10,7 +10,7 @@ from modules.text_generation import (
stop_everything_event
)
from modules.ui_notebook import store_notebook_state_and_debounce
-from modules.utils import gradio, sanitize_filename
+from modules.utils import gradio
inputs = ('textbox-default', 'interface_state')
outputs = ('output_textbox', 'html-default')
@@ -167,7 +167,6 @@ def handle_new_prompt():
def handle_delete_prompt_confirm_default(prompt_name):
- prompt_name = sanitize_filename(prompt_name)
available_prompts = utils.get_available_prompts()
current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
@@ -200,8 +199,6 @@ def handle_rename_prompt_click_default(current_name):
def handle_rename_prompt_confirm_default(new_name, current_name):
- new_name = sanitize_filename(new_name)
- current_name = sanitize_filename(current_name)
old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
diff --git a/modules/ui_image_generation.py b/modules/ui_image_generation.py
index 727aa7b1..1efb2479 100644
--- a/modules/ui_image_generation.py
+++ b/modules/ui_image_generation.py
@@ -798,9 +798,6 @@ def generate(state, save_images=True):
if seed == -1:
seed = random.randint(0, 2**32 - 1)
- # Store resolved seed back so callers (e.g. API) can access it
- state['image_seed_resolved'] = seed
-
device = get_device()
if device is None:
device = "cpu"
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 9c8306f5..16505afa 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -54,6 +54,7 @@ def create_ui():
if not shared.args.portable:
shared.gradio['ik'] = gr.Checkbox(label="ik", value=shared.args.ik, info='Use ik_llama.cpp instead of upstream llama.cpp.')
+ shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
@@ -66,13 +67,13 @@ def create_ui():
)
# Multimodal
- with gr.Accordion("Multimodal (vision)", open=False) as shared.gradio['mmproj_accordion']:
+ with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
with gr.Row():
shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info=f'Select a file that matches your model. Must be placed in {shared.user_data_dir}/mmproj/', interactive=not mu)
ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
# Speculative decoding
- with gr.Accordion("Speculative decoding", open=False) as shared.gradio['speculative_decoding_accordion']:
+ with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Maximum number of tokens to draft for speculative decoding. Recommended: 4 for draft model, 64 for n-gram.')
gr.Markdown('#### Draft model')
@@ -91,7 +92,7 @@ def create_ui():
shared.gradio['spec_ngram_min_hits'] = gr.Number(label="spec-ngram-min-hits", precision=0, step=1, value=shared.args.spec_ngram_min_hits, info='Minimum n-gram hits for ngram-map speculative decoding.', visible=shared.args.spec_type != 'none')
gr.Markdown("## Other options")
- with gr.Accordion("See more options", open=False):
+ with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
with gr.Row():
with gr.Column():
shared.gradio['parallel'] = gr.Slider(label="parallel", minimum=1, step=1, maximum=64, value=shared.args.parallel, info='Number of parallel request slots for the API. The context size is divided equally among slots. For example, to have 4 slots with 8192 context each, set ctx_size to 32768.')
@@ -108,7 +109,6 @@ def create_ui():
with gr.Column():
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
- shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces performance.')
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py
index 88f00ac5..f550e646 100644
--- a/modules/ui_notebook.py
+++ b/modules/ui_notebook.py
@@ -11,7 +11,7 @@ from modules.text_generation import (
get_token_ids,
stop_everything_event
)
-from modules.utils import gradio, sanitize_filename
+from modules.utils import gradio
_notebook_file_lock = threading.Lock()
_notebook_auto_save_timer = None
@@ -202,7 +202,6 @@ def handle_new_prompt():
def handle_delete_prompt_confirm_notebook(prompt_name):
- prompt_name = sanitize_filename(prompt_name)
available_prompts = utils.get_available_prompts()
current_index = available_prompts.index(prompt_name) if prompt_name in available_prompts else 0
@@ -234,8 +233,6 @@ def handle_rename_prompt_click_notebook(current_name):
def handle_rename_prompt_confirm_notebook(new_name, current_name):
- new_name = sanitize_filename(new_name)
- current_name = sanitize_filename(current_name)
old_path = shared.user_data_dir / "logs" / "notebook" / f"{current_name}.txt"
new_path = shared.user_data_dir / "logs" / "notebook" / f"{new_name}.txt"
@@ -252,7 +249,6 @@ def handle_rename_prompt_confirm_notebook(new_name, current_name):
def autosave_prompt(text, prompt_name):
"""Automatically save the text to the selected prompt file"""
- prompt_name = sanitize_filename(prompt_name)
if prompt_name and text.strip():
prompt_path = shared.user_data_dir / "logs" / "notebook" / f"{prompt_name}.txt"
prompt_path.parent.mkdir(parents=True, exist_ok=True)
diff --git a/modules/utils.py b/modules/utils.py
index c4acf714..b01953ee 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -105,9 +105,6 @@ def resolve_model_path(model_name_or_path, image_model=False):
before the default models directory.
"""
- if model_name_or_path is None:
- raise FileNotFoundError("No model specified.")
-
path_candidate = Path(model_name_or_path)
if path_candidate.exists():
return path_candidate
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index ed5841b8..b38ae848 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -9,7 +9,6 @@ flash-linear-attention==0.4.*
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -32,8 +31,8 @@ tqdm
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -41,10 +40,10 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index fe6ce28c..7fb3a7d9 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -7,7 +7,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -29,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -38,5 +37,5 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 09c01a61..4a0f764c 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -7,7 +7,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -29,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -38,4 +37,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 42210407..942d5d71 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -7,7 +7,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -29,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -38,4 +37,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 5cd7ae7d..6b61dca7 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -7,7 +7,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -29,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -38,7 +37,7 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 19ac5183..a4d6cc97 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -7,7 +7,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pandas
peft==0.18.*
@@ -29,8 +28,8 @@ trafilatura==2.0.0
wandb
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 807ff079..5aff54b2 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index 55fe79ea..0771f53e 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index 6d4a63f7..427d59b2 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,4 +23,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index aebb7c5b..c47a6ca1 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,4 +23,4 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index d7e2b051..e491e357 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cuda131.txt b/requirements/portable/requirements_cuda131.txt
index 42a9a16f..5870983a 100644
--- a/requirements/portable/requirements_cuda131.txt
+++ b/requirements/portable/requirements_cuda131.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_ik.txt b/requirements/portable/requirements_ik.txt
index c3fdb5e8..d11d337d 100644
--- a/requirements/portable/requirements_ik.txt
+++ b/requirements/portable/requirements_ik.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_ik_cpu_only.txt b/requirements/portable/requirements_ik_cpu_only.txt
index ea3ba601..c2b69e1c 100644
--- a/requirements/portable/requirements_ik_cpu_only.txt
+++ b/requirements/portable/requirements_ik_cpu_only.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# ik_llama.cpp (CPU only)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_ik_cuda131.txt b/requirements/portable/requirements_ik_cuda131.txt
index 7530375d..7f280930 100644
--- a/requirements/portable/requirements_ik_cuda131.txt
+++ b/requirements/portable/requirements_ik_cuda131.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/ik_llama_cpp_binaries-0.110.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/ik_llama_cpp_binaries-0.102.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index cafe3cee..322056be 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 3b8b0573..dfd52be5 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -3,7 +3,6 @@ fastapi==0.112.4
huggingface-hub==1.5.*
jinja2==3.1.6
markdown
-mcp==1.27.0
numpy==2.2.*
pydantic==2.11.0
pymupdf==1.27.*
@@ -15,8 +14,8 @@ trafilatura==2.0.0
tqdm
# Gradio
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio-4.37.2+custom.19-py3-none-any.whl
-https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_client-1.0.2+custom.19-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio-4.37.2+custom.14-py3-none-any.whl
+https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.14/gradio_client-1.0.2+custom.14-py3-none-any.whl
# API
flask_cloudflared==0.0.15
@@ -24,5 +23,5 @@ sse-starlette==1.6.5
tiktoken
# Vulkan wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.110.0/llama_cpp_binaries-0.110.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.102.0/llama_cpp_binaries-0.102.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"