From 5362bbb4132ae5ddbed4c4dab739e7dd64c1e6ab Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 13 Mar 2026 12:09:08 -0700
Subject: [PATCH] Make web_search not download the page contents, use
 fetch_webpage instead

---
 modules/ui_chat.py            | 10 ++++++++++
 modules/web_search.py         | 14 ++++++++++++--
 user_data/tools/web_search.py | 14 ++++++--------
 3 files changed, 28 insertions(+), 10 deletions(-)
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 039b9af6..ea341fa6 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -96,6 +96,16 @@ def create_ui():
                 shared.gradio['tools_refresh'] = gr.Button('Refresh list', elem_id='tools-refresh-btn', visible=False)
                 shared.gradio['tools_refresh'].click(fn=lambda: gr.update(choices=get_available_tools()), inputs=[], outputs=[shared.gradio['selected_tools']])
 
+                def sync_web_tools(selected):
+                    if 'web_search' in selected and 'fetch_webpage' not in selected:
+                        selected.append('fetch_webpage')
+                    elif 'web_search' not in selected and 'fetch_webpage' in selected:
+                        selected.remove('fetch_webpage')
+
+                    return gr.update(value=selected)
+
+                shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False)
+
                 gr.HTML("<div class='sidebar-vertical-separator'></div>")
 
                 with gr.Row():
diff --git a/modules/web_search.py b/modules/web_search.py
index b14cd042..754dd111 100644
--- a/modules/web_search.py
+++ b/modules/web_search.py
@@ -49,8 +49,8 @@ def download_web_page(url, timeout=10, include_links=False):
         return ""
 
 
-def perform_web_search(query, num_pages=3, max_workers=5, timeout=10):
-    """Perform web search and return results with content"""
+def perform_web_search(query, num_pages=3, max_workers=5, timeout=10, fetch_content=True):
+    """Perform web search and return results, optionally with page content"""
     try:
         search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}"
 
@@ -78,6 +78,16 @@ def perform_web_search(query, num_pages=3, max_workers=5, timeout=10):
 
         search_results = [None] * len(download_tasks)  # Pre-allocate to maintain order
 
+        if not fetch_content:
+            for url, title, index in download_tasks:
+                search_results[index] = {
+                    'title': title,
+                    'url': url,
+                    'content': ''
+                }
+
+            return search_results
+
         # Download pages in parallel
         with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
             # Submit all download tasks
diff --git a/user_data/tools/web_search.py b/user_data/tools/web_search.py
index 80845963..30d13473 100644
--- a/user_data/tools/web_search.py
+++ b/user_data/tools/web_search.py
@@ -1,16 +1,15 @@
-from modules.web_search import perform_web_search, truncate_content_by_tokens
+from modules.web_search import perform_web_search
 
 tool = {
     "type": "function",
     "function": {
         "name": "web_search",
-        "description": "Search the web using DuckDuckGo and return page contents.",
+        "description": "Search the web using DuckDuckGo and return a list of result titles and URLs. Use fetch_webpage to read the contents of a specific result.",
         "parameters": {
             "type": "object",
             "properties": {
                 "query": {"type": "string", "description": "The search query."},
-                "num_pages": {"type": "integer", "description": "Number of search result pages to fetch (default: 3)."},
-                "max_tokens": {"type": "integer", "description": "Maximum number of tokens per page result (default: 2048)."},
+                "num_pages": {"type": "integer", "description": "Number of search results to return (default: 3)."},
             },
             "required": ["query"]
         }
@@ -21,11 +20,10 @@ tool = {
 def execute(arguments):
     query = arguments.get("query", "")
     num_pages = arguments.get("num_pages", 3)
-    max_tokens = arguments.get("max_tokens", 2048)
-    results = perform_web_search(query, num_pages=num_pages)
+    results = perform_web_search(query, num_pages=num_pages, fetch_content=False)
     output = []
     for r in results:
-        if r and r["content"].strip():
-            output.append({"title": r["title"], "url": r["url"], "content": truncate_content_by_tokens(r["content"], max_tokens=max_tokens)})
+        if r:
+            output.append({"title": r["title"], "url": r["url"]})
 
     return output if output else [{"error": "No results found."}]