From 5362bbb4132ae5ddbed4c4dab739e7dd64c1e6ab Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 13 Mar 2026 12:09:08 -0700 Subject: [PATCH] Make web_search not download the page contents, use fetch_webpage instead --- modules/ui_chat.py | 10 ++++++++++ modules/web_search.py | 14 ++++++++++++-- user_data/tools/web_search.py | 14 ++++++-------- 3 files changed, 28 insertions(+), 10 deletions(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 039b9af6..ea341fa6 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -96,6 +96,16 @@ def create_ui(): shared.gradio['tools_refresh'] = gr.Button('Refresh list', elem_id='tools-refresh-btn', visible=False) shared.gradio['tools_refresh'].click(fn=lambda: gr.update(choices=get_available_tools()), inputs=[], outputs=[shared.gradio['selected_tools']]) + def sync_web_tools(selected): + if 'web_search' in selected and 'fetch_webpage' not in selected: + selected.append('fetch_webpage') + elif 'web_search' not in selected and 'fetch_webpage' in selected: + selected.remove('fetch_webpage') + + return gr.update(value=selected) + + shared.gradio['selected_tools'].change(fn=sync_web_tools, inputs=[shared.gradio['selected_tools']], outputs=[shared.gradio['selected_tools']], show_progress=False) + gr.HTML("
") with gr.Row(): diff --git a/modules/web_search.py b/modules/web_search.py index b14cd042..754dd111 100644 --- a/modules/web_search.py +++ b/modules/web_search.py @@ -49,8 +49,8 @@ def download_web_page(url, timeout=10, include_links=False): return "" -def perform_web_search(query, num_pages=3, max_workers=5, timeout=10): - """Perform web search and return results with content""" +def perform_web_search(query, num_pages=3, max_workers=5, timeout=10, fetch_content=True): + """Perform web search and return results, optionally with page content""" try: search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}" @@ -78,6 +78,16 @@ def perform_web_search(query, num_pages=3, max_workers=5, timeout=10): search_results = [None] * len(download_tasks) # Pre-allocate to maintain order + if not fetch_content: + for url, title, index in download_tasks: + search_results[index] = { + 'title': title, + 'url': url, + 'content': '' + } + + return search_results + # Download pages in parallel with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: # Submit all download tasks diff --git a/user_data/tools/web_search.py b/user_data/tools/web_search.py index 80845963..30d13473 100644 --- a/user_data/tools/web_search.py +++ b/user_data/tools/web_search.py @@ -1,16 +1,15 @@ -from modules.web_search import perform_web_search, truncate_content_by_tokens +from modules.web_search import perform_web_search tool = { "type": "function", "function": { "name": "web_search", - "description": "Search the web using DuckDuckGo and return page contents.", + "description": "Search the web using DuckDuckGo and return a list of result titles and URLs. Use fetch_webpage to read the contents of a specific result.", "parameters": { "type": "object", "properties": { "query": {"type": "string", "description": "The search query."}, - "num_pages": {"type": "integer", "description": "Number of search result pages to fetch (default: 3)."}, - "max_tokens": {"type": "integer", "description": "Maximum number of tokens per page result (default: 2048)."}, + "num_pages": {"type": "integer", "description": "Number of search results to return (default: 3)."}, }, "required": ["query"] } @@ -21,11 +20,10 @@ tool = { def execute(arguments): query = arguments.get("query", "") num_pages = arguments.get("num_pages", 3) - max_tokens = arguments.get("max_tokens", 2048) - results = perform_web_search(query, num_pages=num_pages) + results = perform_web_search(query, num_pages=num_pages, fetch_content=False) output = [] for r in results: - if r and r["content"].strip(): - output.append({"title": r["title"], "url": r["url"], "content": truncate_content_by_tokens(r["content"], max_tokens=max_tokens)}) + if r: + output.append({"title": r["title"], "url": r["url"]}) return output if output else [{"error": "No results found."}]