From de4ccffff8bd0dff1fe4aca048115af2e398ea75 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 6 Jul 2025 16:24:57 -0700 Subject: [PATCH] Fix the duckduckgo search --- modules/web_search.py | 26 +++++++++++++------ requirements/full/requirements.txt | 1 - requirements/full/requirements_amd.txt | 1 - requirements/full/requirements_amd_noavx2.txt | 1 - .../full/requirements_apple_intel.txt | 1 - .../full/requirements_apple_silicon.txt | 1 - requirements/full/requirements_cpu_only.txt | 1 - .../full/requirements_cpu_only_noavx2.txt | 1 - requirements/full/requirements_cuda128.txt | 1 - .../full/requirements_cuda128_noavx2.txt | 1 - requirements/full/requirements_noavx2.txt | 1 - requirements/full/requirements_nowheels.txt | 1 - requirements/portable/requirements.txt | 1 - .../portable/requirements_apple_intel.txt | 1 - .../portable/requirements_apple_silicon.txt | 1 - .../portable/requirements_cpu_only.txt | 1 - .../portable/requirements_cpu_only_noavx2.txt | 1 - requirements/portable/requirements_noavx2.txt | 1 - .../portable/requirements_nowheels.txt | 1 - requirements/portable/requirements_vulkan.txt | 1 - .../portable/requirements_vulkan_noavx2.txt | 1 - 21 files changed, 18 insertions(+), 28 deletions(-) diff --git a/modules/web_search.py b/modules/web_search.py index 401a42bb..3b1f6e18 100644 --- a/modules/web_search.py +++ b/modules/web_search.py @@ -1,6 +1,9 @@ import concurrent.futures +import html +import re from concurrent.futures import as_completed from datetime import datetime +from urllib.parse import quote_plus import requests @@ -44,19 +47,26 @@ def download_web_page(url, timeout=10): return "" -def perform_web_search(query, num_pages=3, max_workers=5): +def perform_web_search(query, num_pages=3, max_workers=5, timeout=10): """Perform web search and return results with content""" - from duckduckgo_search import DDGS - try: - with DDGS() as ddgs: - results = list(ddgs.text(query, max_results=num_pages)) + # Use DuckDuckGo HTML search endpoint + search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}" + headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'} + + response = requests.get(search_url, headers=headers, timeout=timeout) + response.raise_for_status() + + # Extract results with regex + titles = re.findall(r']*class="[^"]*result__a[^"]*"[^>]*>(.*?)', response.text, re.DOTALL) + urls = re.findall(r']*class="[^"]*result__url[^"]*"[^>]*>(.*?)', response.text, re.DOTALL) # Prepare download tasks download_tasks = [] - for i, result in enumerate(results): - url = result.get('href', '') - title = result.get('title', f'Search Result {i+1}') + for i in range(min(len(titles), len(urls), num_pages)): + url = f"https://{urls[i].strip()}" + title = re.sub(r'<[^>]+>', '', titles[i]).strip() + title = html.unescape(title) download_tasks.append((url, title, i)) search_results = [None] * len(download_tasks) # Pre-allocate to maintain order diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 1f7b8f9e..9735fcee 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -2,7 +2,6 @@ accelerate==1.5.* bitsandbytes==0.45.* colorama datasets -duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index ea2dbf67..c7db0116 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -1,7 +1,6 @@ accelerate==1.5.* colorama datasets -duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 251d5e24..10c89332 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -1,7 +1,6 @@ accelerate==1.5.* colorama datasets -duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 649eaaca..e900f1bc 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -1,7 +1,6 @@ accelerate==1.5.* colorama datasets -duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 7f1895fc..a8531389 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -1,7 +1,6 @@ accelerate==1.5.* colorama datasets -duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 6ec88d63..3735a1a8 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -1,7 +1,6 @@ accelerate==1.5.* colorama datasets -duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 7e6c780d..f93a4d77 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -1,7 +1,6 @@ accelerate==1.5.* colorama datasets -duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt index cbac577a..b5bee1e4 100644 --- a/requirements/full/requirements_cuda128.txt +++ b/requirements/full/requirements_cuda128.txt @@ -2,7 +2,6 @@ accelerate==1.5.* bitsandbytes==0.45.* colorama datasets -duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt index 3f7add14..2cd91205 100644 --- a/requirements/full/requirements_cuda128_noavx2.txt +++ b/requirements/full/requirements_cuda128_noavx2.txt @@ -2,7 +2,6 @@ accelerate==1.5.* bitsandbytes==0.45.* colorama datasets -duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 9903bf7a..35b683c8 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -2,7 +2,6 @@ accelerate==1.5.* bitsandbytes==0.45.* colorama datasets -duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index b1c3c6ea..30020989 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -1,7 +1,6 @@ accelerate==1.5.* colorama datasets -duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index 785ce623..d928ff08 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -1,4 +1,3 @@ -duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* html2text==2025.4.15 diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index d56d6a79..b25298fc 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -1,4 +1,3 @@ -duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* html2text==2025.4.15 diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 3e00a781..e52a7248 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -1,4 +1,3 @@ -duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* html2text==2025.4.15 diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 8006762e..7c2578d2 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -1,4 +1,3 @@ -duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* html2text==2025.4.15 diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index 9ec08082..c125c97e 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -1,4 +1,3 @@ -duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* html2text==2025.4.15 diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 8ca5441a..6daa06fc 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -1,4 +1,3 @@ -duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* html2text==2025.4.15 diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt index c22d6441..b7b73eff 100644 --- a/requirements/portable/requirements_nowheels.txt +++ b/requirements/portable/requirements_nowheels.txt @@ -1,4 +1,3 @@ -duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* html2text==2025.4.15 diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index a164c879..247d3a9c 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -1,4 +1,3 @@ -duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* html2text==2025.4.15 diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 3b6943b7..466a1b6e 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -1,4 +1,3 @@ -duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* html2text==2025.4.15