Make web_search not download the page contents, use fetch_webpage instead

2026-04-07 23:53:40 +00:00 · 2026-03-13 12:09:08 -07:00 · 2026-03-13 12:09:08 -07:00 · 5362bbb413
commit 5362bbb413
parent d4c22ced83
3 changed files with 28 additions and 10 deletions
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@ -96,6 +96,16 @@ def create_ui():
                shared.gradio['tools_refresh'] = gr.Button('Refresh list', elem_id='tools-refresh-btn', visible=False)
                shared.gradio['tools_refresh'].click(fn=lambda: gr.update(choices=get_available_tools()), inputs=[], outputs=[shared.gradio['selected_tools']])

+                def sync_web_tools(selected):
+                    if 'web_search' in selected and 'fetch_webpage' not in selected:
+                        selected.append('fetch_webpage')
+                    elif 'web_search' not in selected and 'fetch_webpage' in selected:
+                        selected.remove('fetch_webpage')
+
+                    return gr.update(value=selected)
+
+                shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False)
+
                gr.HTML("<div class='sidebar-vertical-separator'></div>")

                with gr.Row():
--- a/modules/web_search.py
+++ b/modules/web_search.py
@ -49,8 +49,8 @@ def download_web_page(url, timeout=10, include_links=False):
        return ""


-def perform_web_search(query, num_pages=3, max_workers=5, timeout=10):
-    """Perform web search and return results with content"""
+def perform_web_search(query, num_pages=3, max_workers=5, timeout=10, fetch_content=True):
+    """Perform web search and return results, optionally with page content"""
    try:
        search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}"

@ -78,6 +78,16 @@ def perform_web_search(query, num_pages=3, max_workers=5, timeout=10):

        search_results = [None] * len(download_tasks)  # Pre-allocate to maintain order

+        if not fetch_content:
+            for url, title, index in download_tasks:
+                search_results[index] = {
+                    'title': title,
+                    'url': url,
+                    'content': ''
+                }
+
+            return search_results
+
        # Download pages in parallel
        with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
            # Submit all download tasks