Remove images and links from websearch results

This reduces noise a lot
This commit is contained in:
oobabooga 2025-06-14 20:00:11 -07:00
parent e263dbf852
commit db7d717df7

View file

@ -29,6 +29,8 @@ def download_web_page(url, timeout=10):
# Initialize the HTML to Markdown converter # Initialize the HTML to Markdown converter
h = html2text.HTML2Text() h = html2text.HTML2Text()
h.body_width = 0 h.body_width = 0
h.ignore_images = True
h.ignore_links = True
# Convert the HTML to Markdown # Convert the HTML to Markdown
markdown_text = h.handle(response.text) markdown_text = h.handle(response.text)