Make web search functional with thinking models

2026-03-25 06:44:39 +01:00 · 2025-08-05 15:43:44 -07:00 · 2025-08-05 15:43:44 -07:00 · 7d98ca6195
parent 0e42575c57
commit 7d98ca6195
1 changed files with 9 additions and 3 deletions
--- a/modules/chat.py
+++ b/modules/chat.py
@ -307,7 +307,7 @@ def generate_chat_prompt(user_input, state, **kwargs):
        tools=state['tools'] if 'tools' in state else None,
        tools_in_user_message=False,
        add_generation_prompt=False,
-        reasoning_effort=state.get('reasoning_effort', 'medium')
+        reasoning_effort=state['reasoning_effort'])
    )

    chat_renderer = partial(
@ -730,9 +730,9 @@ def generate_search_query(user_message, state):

    # Use a minimal state for search query generation but keep the full history
    search_state = state.copy()
-    search_state['max_new_tokens'] = 64
-    search_state['auto_max_new_tokens'] = False
+    search_state['auto_max_new_tokens'] = True
    search_state['enable_thinking'] = False
+    search_state['reasoning_effort'] = 'low'
    search_state['start_with'] = ""

    # Generate the full prompt using existing history + augmented message
@ -742,6 +742,12 @@ def generate_search_query(user_message, state):
    for reply in generate_reply(formatted_prompt, search_state, stopping_strings=[], is_chat=True):
        query = reply

+    # Check for thinking block delimiters and extract content after them
+    if "</think>" in query:
+        query = query.rsplit("</think>", 1)[1]
+    elif "<|start|>assistant<|channel|>final<|message|>" in query:
+        query = query.rsplit("<|start|>assistant<|channel|>final<|message|>", 1)[1]
+
    # Strip and remove surrounding quotes if present
    query = query.strip()
    if len(query) >= 2 and query.startswith('"') and query.endswith('"'):