UI: Add MCP server support

2026-04-20 22:13:43 +00:00 · 2026-04-05 23:07:14 -03:00 · 2026-04-05 23:07:14 -03:00 · b1d06dcf96
commit b1d06dcf96
parent abc3487f4d
24 changed files with 163 additions and 4 deletions
--- a/README.md
+++ b/README.md
@ -26,7 +26,7 @@ A Gradio web UI for running Large Language Models locally. 100% private and offl
 - **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set.
 - **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [ik_llama.cpp](https://github.com/ikawrakow/ik_llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting.
 - **OpenAI/Anthropic-compatible API**: Chat, Completions, and Messages endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI/Anthropic APIs ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)).
- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file, easy to create and extend ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).
+- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file. MCP servers are also supported ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)).
 - **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)).
 - **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
 - **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)).
--- a/Tutorial.md
+++ b/Tutorial.md
@ -80,6 +80,19 @@ def execute(arguments):

 You can open the built-in tools in `user_data/tools/` for more examples.

+## MCP servers
+
+You can connect to remote [MCP (Model Context Protocol)](https://modelcontextprotocol.io/) servers to use their tools alongside local ones.
+
+In the chat sidebar, open the **MCP servers** accordion and enter one server URL per line. For servers that require authentication, append headers after the URL separated by commas:
+
+```
+https://example.com/mcp
+https://other.com/mcp,Authorization: Bearer sk-xxx
+```
+
+All tools from the configured servers are automatically discovered and made available to the model during generation. If an MCP tool has the same name as a selected local tool, the local tool takes priority.
+
 ## Tool calling over the API

 Tool calling over the API follows the [OpenAI API](https://platform.openai.com/docs/guides/function-calling) convention. Define your tools, send them with your messages, and handle tool calls in a loop until the model gives a final answer.
--- a/modules/chat.py
+++ b/modules/chat.py
@ -1264,14 +1264,23 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):

    # Load tools if any are selected
    selected = state.get('selected_tools', [])
+    mcp_servers = state.get('mcp_servers', '')
    parse_tool_call = None
    _tool_parsers = None
-    if selected:
-        from modules.tool_use import load_tools, execute_tool
+    if selected or mcp_servers:
+        from modules.tool_use import load_tools, load_mcp_tools, execute_tool
        from modules.tool_parsing import parse_tool_call, get_tool_call_id, detect_tool_call_format

-    if selected:
        tool_defs, tool_executors = load_tools(selected)
+        if mcp_servers:
+            mcp_defs, mcp_executors = load_mcp_tools(mcp_servers)
+            for td in mcp_defs:
+                fn = td['function']['name']
+                if fn in tool_executors:
+                    logger.warning(f'MCP tool "{fn}" conflicts with a local tool. Skipping.')
+                    continue
+                tool_defs.append(td)
+                tool_executors[fn] = mcp_executors[fn]
        state['tools'] = tool_defs
        tool_func_names = [t['function']['name'] for t in tool_defs]
        _template_str = state.get('instruction_template_str', '') if state.get('mode') == 'instruct' else state.get('chat_template_str', '')
--- a/modules/shared.py
+++ b/modules/shared.py
@ -259,6 +259,7 @@ settings = {
    'enable_web_search': False,
    'web_search_pages': 3,
    'selected_tools': [],
+    'mcp_servers': '',
    'prompt-notebook': '',
    'preset': 'Top-P' if (user_data_dir / 'presets/Top-P.yaml').exists() else None,
    'max_new_tokens': 512,
--- a/modules/tool_use.py
+++ b/modules/tool_use.py
@ -1,3 +1,4 @@
+import asyncio
 import importlib.util
 import json

@ -55,6 +56,119 @@ def load_tools(selected_names):
    return tool_defs, executors


+def _parse_mcp_servers(servers_str):
+    """Parse MCP servers textbox: one server per line, format 'url' or 'url,Header: value,Header2: value2'."""
+    servers = []
+    for line in servers_str.strip().splitlines():
+        line = line.strip()
+        if not line:
+            continue
+        parts = line.split(',')
+        url = parts[0].strip()
+        headers = {}
+        for part in parts[1:]:
+            part = part.strip()
+            if ':' in part:
+                key, val = part.split(':', 1)
+                headers[key.strip()] = val.strip()
+        servers.append((url, headers))
+    return servers
+
+
+def _mcp_tool_to_openai(tool):
+    """Convert an MCP Tool object to OpenAI-format tool dict."""
+    return {
+        "type": "function",
+        "function": {
+            "name": tool.name,
+            "description": tool.description or "",
+            "parameters": tool.inputSchema or {"type": "object", "properties": {}}
+        }
+    }
+
+
+async def _mcp_session(url, headers, callback):
+    """Open an MCP session and pass it to the callback."""
+    from mcp.client.streamable_http import streamablehttp_client
+    from mcp import ClientSession
+
+    async with streamablehttp_client(url, headers=headers or None) as (read_stream, write_stream, _):
+        async with ClientSession(read_stream, write_stream) as session:
+            await session.initialize()
+            return await callback(session)
+
+
+def _make_mcp_executor(name, url, headers):
+    def executor(arguments):
+        return asyncio.run(_call_mcp_tool(name, arguments, url, headers))
+    return executor
+
+
+async def _connect_mcp_server(url, headers):
+    """Connect to one MCP server and return (tool_defs, executors)."""
+
+    async def _discover(session):
+        result = await session.list_tools()
+        tool_defs = []
+        executors = {}
+        for tool in result.tools:
+            tool_defs.append(_mcp_tool_to_openai(tool))
+            executors[tool.name] = _make_mcp_executor(tool.name, url, headers)
+        return tool_defs, executors
+
+    return await _mcp_session(url, headers, _discover)
+
+
+async def _call_mcp_tool(name, arguments, url, headers):
+    """Connect to an MCP server and call a single tool."""
+
+    async def _invoke(session):
+        result = await session.call_tool(name, arguments)
+        parts = []
+        for content in result.content:
+            if hasattr(content, 'text'):
+                parts.append(content.text)
+            else:
+                parts.append(str(content))
+        return '\n'.join(parts) if parts else ''
+
+    return await _mcp_session(url, headers, _invoke)
+
+
+async def _connect_all_mcp_servers(servers):
+    """Connect to all MCP servers concurrently."""
+    results = await asyncio.gather(
+        *(_connect_mcp_server(url, headers) for url, headers in servers),
+        return_exceptions=True
+    )
+    all_defs = []
+    all_executors = {}
+    for (url, _), result in zip(servers, results):
+        if isinstance(result, Exception):
+            logger.exception(f'Failed to connect to MCP server "{url}"', exc_info=result)
+            continue
+        defs, execs = result
+        for td, (fn, ex) in zip(defs, execs.items()):
+            if fn in all_executors:
+                logger.warning(f'MCP tool "{fn}" from {url} conflicts with an already loaded tool. Skipping.')
+                continue
+            all_defs.append(td)
+            all_executors[fn] = ex
+    return all_defs, all_executors
+
+
+def load_mcp_tools(servers_str):
+    """
+    Parse MCP servers string and discover tools from each server.
+    Returns (tool_defs, executors) in the same format as load_tools.
+    """
+    servers = _parse_mcp_servers(servers_str)
+    if not servers:
+        return [], {}
+
+    return asyncio.run(_connect_all_mcp_servers(servers))
+
+
 def execute_tool(func_name, arguments, executors):
    """Execute a tool by function name. Returns result as a JSON string."""
    fn = executors.get(func_name)
--- a/modules/ui.py
+++ b/modules/ui.py
@ -209,6 +209,7 @@ def list_interface_input_elements():
        'textbox',
        'start_with',
        'selected_tools',
+        'mcp_servers',
        'mode',
        'chat_style',
        'chat-instruct_command',
@ -434,6 +435,7 @@ def setup_auto_save():
        'custom_system_message',
        'chat_template_str',
        'selected_tools',
+        'mcp_servers',

        # Parameters tab (ui_parameters.py) - Generation parameters
        'preset_menu',
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@ -105,6 +105,9 @@ def create_ui():

                shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False)

+                with gr.Accordion('MCP servers', open=False):
+                    shared.gradio['mcp_servers'] = gr.Textbox(value=shared.settings.get('mcp_servers', ''), lines=3, max_lines=3, label='', info='One url per line. For headers, write url,Header: value,Header2: value2', elem_classes=['add_scrollbar'])
+
                gr.HTML("<div class='sidebar-vertical-separator'></div>")

                with gr.Row():
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@ -48,3 +48,4 @@ https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0
 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
 https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13"
 https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13"
+mcp==1.27.0
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@ -39,3 +39,4 @@ tiktoken
 # AMD wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@ -38,3 +38,4 @@ tiktoken

 # Mac wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+mcp==1.27.0
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@ -38,3 +38,4 @@ tiktoken

 # Mac wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+mcp==1.27.0
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@ -41,3 +41,4 @@ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+mcp==1.27.0
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@ -35,3 +35,4 @@ https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_cl
 flask_cloudflared==0.0.15
 sse-starlette==1.6.5
 tiktoken
+mcp==1.27.0
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@ -25,3 +25,4 @@ tiktoken
 # CUDA wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@ -25,3 +25,4 @@ tiktoken
 # AMD wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@ -24,3 +24,4 @@ tiktoken

 # Mac wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin"
+mcp==1.27.0
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@ -24,3 +24,4 @@ tiktoken

 # Mac wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin"
+mcp==1.27.0
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@ -25,3 +25,4 @@ tiktoken
 # llama.cpp (CPU only)
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+mcp==1.27.0
--- a/requirements/portable/requirements_cuda131.txt
+++ b/requirements/portable/requirements_cuda131.txt
@ -25,3 +25,4 @@ tiktoken
 # CUDA wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
--- a/requirements/portable/requirements_ik.txt
+++ b/requirements/portable/requirements_ik.txt
@ -25,3 +25,4 @@ tiktoken
 # CUDA wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
--- a/requirements/portable/requirements_ik_cpu_only.txt
+++ b/requirements/portable/requirements_ik_cpu_only.txt
@ -25,3 +25,4 @@ tiktoken
 # ik_llama.cpp (CPU only)
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows"
+mcp==1.27.0
--- a/requirements/portable/requirements_ik_cuda131.txt
+++ b/requirements/portable/requirements_ik_cuda131.txt
@ -25,3 +25,4 @@ tiktoken
 # CUDA wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@ -21,3 +21,4 @@ https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_cl
 flask_cloudflared==0.0.15
 sse-starlette==1.6.5
 tiktoken
+mcp==1.27.0
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@ -25,3 +25,4 @@ tiktoken
 # Vulkan wheels
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
 https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+mcp==1.27.0