From de4ccffff8bd0dff1fe4aca048115af2e398ea75 Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 6 Jul 2025 16:24:57 -0700
Subject: [PATCH] Fix the duckduckgo search
---
modules/web_search.py | 26 +++++++++++++------
requirements/full/requirements.txt | 1 -
requirements/full/requirements_amd.txt | 1 -
requirements/full/requirements_amd_noavx2.txt | 1 -
.../full/requirements_apple_intel.txt | 1 -
.../full/requirements_apple_silicon.txt | 1 -
requirements/full/requirements_cpu_only.txt | 1 -
.../full/requirements_cpu_only_noavx2.txt | 1 -
requirements/full/requirements_cuda128.txt | 1 -
.../full/requirements_cuda128_noavx2.txt | 1 -
requirements/full/requirements_noavx2.txt | 1 -
requirements/full/requirements_nowheels.txt | 1 -
requirements/portable/requirements.txt | 1 -
.../portable/requirements_apple_intel.txt | 1 -
.../portable/requirements_apple_silicon.txt | 1 -
.../portable/requirements_cpu_only.txt | 1 -
.../portable/requirements_cpu_only_noavx2.txt | 1 -
requirements/portable/requirements_noavx2.txt | 1 -
.../portable/requirements_nowheels.txt | 1 -
requirements/portable/requirements_vulkan.txt | 1 -
.../portable/requirements_vulkan_noavx2.txt | 1 -
21 files changed, 18 insertions(+), 28 deletions(-)
diff --git a/modules/web_search.py b/modules/web_search.py
index 401a42bb..3b1f6e18 100644
--- a/modules/web_search.py
+++ b/modules/web_search.py
@@ -1,6 +1,9 @@
import concurrent.futures
+import html
+import re
from concurrent.futures import as_completed
from datetime import datetime
+from urllib.parse import quote_plus
import requests
@@ -44,19 +47,26 @@ def download_web_page(url, timeout=10):
return ""
-def perform_web_search(query, num_pages=3, max_workers=5):
+def perform_web_search(query, num_pages=3, max_workers=5, timeout=10):
"""Perform web search and return results with content"""
- from duckduckgo_search import DDGS
-
try:
- with DDGS() as ddgs:
- results = list(ddgs.text(query, max_results=num_pages))
+ # Use DuckDuckGo HTML search endpoint
+ search_url = f"https://html.duckduckgo.com/html/?q={quote_plus(query)}"
+ headers = {'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36'}
+
+ response = requests.get(search_url, headers=headers, timeout=timeout)
+ response.raise_for_status()
+
+ # Extract results with regex
+ titles = re.findall(r']*class="[^"]*result__a[^"]*"[^>]*>(.*?)', response.text, re.DOTALL)
+ urls = re.findall(r']*class="[^"]*result__url[^"]*"[^>]*>(.*?)', response.text, re.DOTALL)
# Prepare download tasks
download_tasks = []
- for i, result in enumerate(results):
- url = result.get('href', '')
- title = result.get('title', f'Search Result {i+1}')
+ for i in range(min(len(titles), len(urls), num_pages)):
+ url = f"https://{urls[i].strip()}"
+ title = re.sub(r'<[^>]+>', '', titles[i]).strip()
+ title = html.unescape(title)
download_tasks.append((url, title, i))
search_results = [None] * len(download_tasks) # Pre-allocate to maintain order
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 1f7b8f9e..9735fcee 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -2,7 +2,6 @@ accelerate==1.5.*
bitsandbytes==0.45.*
colorama
datasets
-duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index ea2dbf67..c7db0116 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -1,7 +1,6 @@
accelerate==1.5.*
colorama
datasets
-duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 251d5e24..10c89332 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -1,7 +1,6 @@
accelerate==1.5.*
colorama
datasets
-duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 649eaaca..e900f1bc 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -1,7 +1,6 @@
accelerate==1.5.*
colorama
datasets
-duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 7f1895fc..a8531389 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -1,7 +1,6 @@
accelerate==1.5.*
colorama
datasets
-duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 6ec88d63..3735a1a8 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -1,7 +1,6 @@
accelerate==1.5.*
colorama
datasets
-duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 7e6c780d..f93a4d77 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -1,7 +1,6 @@
accelerate==1.5.*
colorama
datasets
-duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt
index cbac577a..b5bee1e4 100644
--- a/requirements/full/requirements_cuda128.txt
+++ b/requirements/full/requirements_cuda128.txt
@@ -2,7 +2,6 @@ accelerate==1.5.*
bitsandbytes==0.45.*
colorama
datasets
-duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt
index 3f7add14..2cd91205 100644
--- a/requirements/full/requirements_cuda128_noavx2.txt
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@@ -2,7 +2,6 @@ accelerate==1.5.*
bitsandbytes==0.45.*
colorama
datasets
-duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 9903bf7a..35b683c8 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -2,7 +2,6 @@ accelerate==1.5.*
bitsandbytes==0.45.*
colorama
datasets
-duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index b1c3c6ea..30020989 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -1,7 +1,6 @@
accelerate==1.5.*
colorama
datasets
-duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 785ce623..d928ff08 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index d56d6a79..b25298fc 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 3e00a781..e52a7248 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 8006762e..7c2578d2 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index 9ec08082..c125c97e 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index 8ca5441a..6daa06fc 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index c22d6441..b7b73eff 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index a164c879..247d3a9c 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 3b6943b7..466a1b6e 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -1,4 +1,3 @@
-duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15