diff --git a/modules/web_search.py b/modules/web_search.py index ffd7e483..5b14be55 100644 --- a/modules/web_search.py +++ b/modules/web_search.py @@ -4,6 +4,7 @@ from datetime import datetime import requests +from modules import shared from modules.logging_colors import logger @@ -90,6 +91,22 @@ def perform_web_search(query, num_pages=3, max_workers=5): return [] +def truncate_content_by_tokens(content, max_tokens=8192): + """Truncate content to fit within token limit using binary search""" + if len(shared.tokenizer.encode(content)) <= max_tokens: + return content + + left, right = 0, len(content) + while left < right: + mid = (left + right + 1) // 2 + if len(shared.tokenizer.encode(content[:mid])) <= max_tokens: + left = mid + else: + right = mid - 1 + + return content[:left] + + def add_web_search_attachments(history, row_idx, user_message, search_query, state): """Perform web search and add results as attachments""" if not search_query: @@ -126,7 +143,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta "name": result['title'], "type": "text/html", "url": result['url'], - "content": result['content'] + "content": truncate_content_by_tokens(result['content']) } history['metadata'][key]["attachments"].append(attachment)