diff --git a/README.md b/README.md index 23cd09c5..b168ebdb 100644 --- a/README.md +++ b/README.md @@ -26,7 +26,7 @@ A Gradio web UI for running Large Language Models locally. 100% private and offl - **Easy setup**: [Portable builds](https://github.com/oobabooga/text-generation-webui/releases) (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or a one-click installer for the full feature set. - **Multiple backends**: [llama.cpp](https://github.com/ggerganov/llama.cpp), [ik_llama.cpp](https://github.com/ikawrakow/ik_llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM). Switch between backends and models without restarting. - **OpenAI/Anthropic-compatible API**: Chat, Completions, and Messages endpoints with tool-calling support. Use as a local drop-in replacement for the OpenAI/Anthropic APIs ([examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples)). -- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file, easy to create and extend ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)). +- **Tool-calling**: Models can call custom functions during chat — web search, page fetching, math, and more. Each tool is a single `.py` file. MCP servers are also supported ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Tool-Calling-Tutorial)). - **Vision (multimodal)**: Attach images to messages for visual understanding ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/Multimodal-Tutorial)). - **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents. - **Training**: Fine-tune LoRAs on multi-turn chat or raw text datasets. Supports resuming interrupted runs ([tutorial](https://github.com/oobabooga/text-generation-webui/wiki/05-%E2%80%90-Training-Tab)). diff --git a/docs/Tool Calling Tutorial.md b/docs/Tool Calling Tutorial.md index d95a9c80..7d2a86de 100644 --- a/docs/Tool Calling Tutorial.md +++ b/docs/Tool Calling Tutorial.md @@ -80,6 +80,19 @@ def execute(arguments): You can open the built-in tools in `user_data/tools/` for more examples. +## MCP servers + +You can connect to remote [MCP (Model Context Protocol)](https://modelcontextprotocol.io/) servers to use their tools alongside local ones. + +In the chat sidebar, open the **MCP servers** accordion and enter one server URL per line. For servers that require authentication, append headers after the URL separated by commas: + +``` +https://example.com/mcp +https://other.com/mcp,Authorization: Bearer sk-xxx +``` + +All tools from the configured servers are automatically discovered and made available to the model during generation. If an MCP tool has the same name as a selected local tool, the local tool takes priority. + ## Tool calling over the API Tool calling over the API follows the [OpenAI API](https://platform.openai.com/docs/guides/function-calling) convention. Define your tools, send them with your messages, and handle tool calls in a loop until the model gives a final answer. diff --git a/modules/chat.py b/modules/chat.py index 76b8694a..aeed688d 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -1264,14 +1264,23 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False): # Load tools if any are selected selected = state.get('selected_tools', []) + mcp_servers = state.get('mcp_servers', '') parse_tool_call = None _tool_parsers = None - if selected: - from modules.tool_use import load_tools, execute_tool + if selected or mcp_servers: + from modules.tool_use import load_tools, load_mcp_tools, execute_tool from modules.tool_parsing import parse_tool_call, get_tool_call_id, detect_tool_call_format - if selected: tool_defs, tool_executors = load_tools(selected) + if mcp_servers: + mcp_defs, mcp_executors = load_mcp_tools(mcp_servers) + for td in mcp_defs: + fn = td['function']['name'] + if fn in tool_executors: + logger.warning(f'MCP tool "{fn}" conflicts with a local tool. Skipping.') + continue + tool_defs.append(td) + tool_executors[fn] = mcp_executors[fn] state['tools'] = tool_defs tool_func_names = [t['function']['name'] for t in tool_defs] _template_str = state.get('instruction_template_str', '') if state.get('mode') == 'instruct' else state.get('chat_template_str', '') diff --git a/modules/shared.py b/modules/shared.py index 13843f0c..92c4f56c 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -259,6 +259,7 @@ settings = { 'enable_web_search': False, 'web_search_pages': 3, 'selected_tools': [], + 'mcp_servers': '', 'prompt-notebook': '', 'preset': 'Top-P' if (user_data_dir / 'presets/Top-P.yaml').exists() else None, 'max_new_tokens': 512, diff --git a/modules/tool_use.py b/modules/tool_use.py index e22b1798..f9ddf940 100644 --- a/modules/tool_use.py +++ b/modules/tool_use.py @@ -1,3 +1,4 @@ +import asyncio import importlib.util import json @@ -55,6 +56,119 @@ def load_tools(selected_names): return tool_defs, executors +def _parse_mcp_servers(servers_str): + """Parse MCP servers textbox: one server per line, format 'url' or 'url,Header: value,Header2: value2'.""" + servers = [] + for line in servers_str.strip().splitlines(): + line = line.strip() + if not line: + continue + parts = line.split(',') + url = parts[0].strip() + headers = {} + for part in parts[1:]: + part = part.strip() + if ':' in part: + key, val = part.split(':', 1) + headers[key.strip()] = val.strip() + servers.append((url, headers)) + return servers + + +def _mcp_tool_to_openai(tool): + """Convert an MCP Tool object to OpenAI-format tool dict.""" + return { + "type": "function", + "function": { + "name": tool.name, + "description": tool.description or "", + "parameters": tool.inputSchema or {"type": "object", "properties": {}} + } + } + + +async def _mcp_session(url, headers, callback): + """Open an MCP session and pass it to the callback.""" + from mcp.client.streamable_http import streamablehttp_client + from mcp import ClientSession + + async with streamablehttp_client(url, headers=headers or None) as (read_stream, write_stream, _): + async with ClientSession(read_stream, write_stream) as session: + await session.initialize() + return await callback(session) + + +def _make_mcp_executor(name, url, headers): + def executor(arguments): + return asyncio.run(_call_mcp_tool(name, arguments, url, headers)) + return executor + + +async def _connect_mcp_server(url, headers): + """Connect to one MCP server and return (tool_defs, executors).""" + + async def _discover(session): + result = await session.list_tools() + tool_defs = [] + executors = {} + for tool in result.tools: + tool_defs.append(_mcp_tool_to_openai(tool)) + executors[tool.name] = _make_mcp_executor(tool.name, url, headers) + return tool_defs, executors + + return await _mcp_session(url, headers, _discover) + + +async def _call_mcp_tool(name, arguments, url, headers): + """Connect to an MCP server and call a single tool.""" + + async def _invoke(session): + result = await session.call_tool(name, arguments) + parts = [] + for content in result.content: + if hasattr(content, 'text'): + parts.append(content.text) + else: + parts.append(str(content)) + return '\n'.join(parts) if parts else '' + + return await _mcp_session(url, headers, _invoke) + + +async def _connect_all_mcp_servers(servers): + """Connect to all MCP servers concurrently.""" + results = await asyncio.gather( + *(_connect_mcp_server(url, headers) for url, headers in servers), + return_exceptions=True + ) + all_defs = [] + all_executors = {} + for (url, _), result in zip(servers, results): + if isinstance(result, Exception): + logger.exception(f'Failed to connect to MCP server "{url}"', exc_info=result) + continue + defs, execs = result + for td, (fn, ex) in zip(defs, execs.items()): + if fn in all_executors: + logger.warning(f'MCP tool "{fn}" from {url} conflicts with an already loaded tool. Skipping.') + continue + all_defs.append(td) + all_executors[fn] = ex + return all_defs, all_executors + + +def load_mcp_tools(servers_str): + """ + Parse MCP servers string and discover tools from each server. + Returns (tool_defs, executors) in the same format as load_tools. + """ + servers = _parse_mcp_servers(servers_str) + if not servers: + return [], {} + + return asyncio.run(_connect_all_mcp_servers(servers)) + + def execute_tool(func_name, arguments, executors): """Execute a tool by function name. Returns result as a JSON string.""" fn = executors.get(func_name) diff --git a/modules/ui.py b/modules/ui.py index 73072cbe..3a8390f7 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -209,6 +209,7 @@ def list_interface_input_elements(): 'textbox', 'start_with', 'selected_tools', + 'mcp_servers', 'mode', 'chat_style', 'chat-instruct_command', @@ -434,6 +435,7 @@ def setup_auto_save(): 'custom_system_message', 'chat_template_str', 'selected_tools', + 'mcp_servers', # Parameters tab (ui_parameters.py) - Generation parameters 'preset_menu', diff --git a/modules/ui_chat.py b/modules/ui_chat.py index d9652253..14489d96 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -105,6 +105,9 @@ def create_ui(): shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False) + with gr.Accordion('MCP servers', open=False): + shared.gradio['mcp_servers'] = gr.Textbox(value=shared.settings.get('mcp_servers', ''), lines=3, max_lines=3, label='', info='One url per line. For headers, write url,Header: value,Header2: value2', elem_classes=['add_scrollbar']) + gr.HTML("") with gr.Row(): diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 9f83830a..104cfdb2 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -48,3 +48,4 @@ https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0 https://github.com/turboderp-org/exllamav3/releases/download/v0.0.28/exllamav3-0.0.28+cu128.torch2.9.0-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13" https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-win_amd64.whl; platform_system == "Windows" and python_version == "3.13" https://github.com/kingbri1/flash-attention/releases/download/v2.8.3/flash_attn-2.8.3+cu128torch2.9.0cxx11abiFALSE-cp313-cp313-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.13" +mcp==1.27.0 diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index b4b8386e..49db44db 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -39,3 +39,4 @@ tiktoken # AMD wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +mcp==1.27.0 diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 41ee6a60..4584708f 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -38,3 +38,4 @@ tiktoken # Mac wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" +mcp==1.27.0 diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 8be2f55e..4376a2b4 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -38,3 +38,4 @@ tiktoken # Mac wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" +mcp==1.27.0 diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index d7f1bf13..2999d4a9 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -41,3 +41,4 @@ https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" +mcp==1.27.0 diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 7b331f96..5a1e504e 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -35,3 +35,4 @@ https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_cl flask_cloudflared==0.0.15 sse-starlette==1.6.5 tiktoken +mcp==1.27.0 diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index b467cf26..fb51c7cc 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -25,3 +25,4 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +mcp==1.27.0 diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt index 4eca16e1..dbea7597 100644 --- a/requirements/portable/requirements_amd.txt +++ b/requirements/portable/requirements_amd.txt @@ -25,3 +25,4 @@ tiktoken # AMD wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+rocm7.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +mcp==1.27.0 diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 55f8d3f8..d0f83a74 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -24,3 +24,4 @@ tiktoken # Mac wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_x86_64.whl; platform_system == "Darwin" +mcp==1.27.0 diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 54e8f350..160c0646 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -24,3 +24,4 @@ tiktoken # Mac wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" +mcp==1.27.0 diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index f073a614..21695585 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -25,3 +25,4 @@ tiktoken # llama.cpp (CPU only) https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" +mcp==1.27.0 diff --git a/requirements/portable/requirements_cuda131.txt b/requirements/portable/requirements_cuda131.txt index 8cd40f39..6b09a46b 100644 --- a/requirements/portable/requirements_cuda131.txt +++ b/requirements/portable/requirements_cuda131.txt @@ -25,3 +25,4 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +mcp==1.27.0 diff --git a/requirements/portable/requirements_ik.txt b/requirements/portable/requirements_ik.txt index fbb9125d..ca5ece2d 100644 --- a/requirements/portable/requirements_ik.txt +++ b/requirements/portable/requirements_ik.txt @@ -25,3 +25,4 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +mcp==1.27.0 diff --git a/requirements/portable/requirements_ik_cpu_only.txt b/requirements/portable/requirements_ik_cpu_only.txt index 59fcfae1..f8bafb27 100644 --- a/requirements/portable/requirements_ik_cpu_only.txt +++ b/requirements/portable/requirements_ik_cpu_only.txt @@ -25,3 +25,4 @@ tiktoken # ik_llama.cpp (CPU only) https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cpu-py3-none-win_amd64.whl; platform_system == "Windows" +mcp==1.27.0 diff --git a/requirements/portable/requirements_ik_cuda131.txt b/requirements/portable/requirements_ik_cuda131.txt index ffdbe568..7825b959 100644 --- a/requirements/portable/requirements_ik_cuda131.txt +++ b/requirements/portable/requirements_ik_cuda131.txt @@ -25,3 +25,4 @@ tiktoken # CUDA wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu131-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/ik_llama_cpp_binaries-0.106.0+cu131-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +mcp==1.27.0 diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt index 4a47b1f0..cde036d9 100644 --- a/requirements/portable/requirements_nowheels.txt +++ b/requirements/portable/requirements_nowheels.txt @@ -21,3 +21,4 @@ https://github.com/oobabooga/gradio/releases/download/4.37.2-custom.19/gradio_cl flask_cloudflared==0.0.15 sse-starlette==1.6.5 tiktoken +mcp==1.27.0 diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 97abd933..32f9e593 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -25,3 +25,4 @@ tiktoken # Vulkan wheels https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.106.0/llama_cpp_binaries-0.106.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +mcp==1.27.0