diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py index 6643ed16..6bd3749f 100644 --- a/extensions/openai/typing.py +++ b/extensions/openai/typing.py @@ -43,6 +43,7 @@ class GenerationOptions(BaseModel): ban_eos_token: bool = False add_bos_token: bool = True enable_thinking: bool = True + reasoning_effort: str = "medium" skip_special_tokens: bool = True static_cache: bool = False truncation_length: int = 0 diff --git a/modules/chat.py b/modules/chat.py index 043908c9..dd923d67 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -306,7 +306,8 @@ def generate_chat_prompt(user_input, state, **kwargs): builtin_tools=None, tools=state['tools'] if 'tools' in state else None, tools_in_user_message=False, - add_generation_prompt=False + add_generation_prompt=False, + reasoning_effort=state.get('reasoning_effort', 'medium') ) chat_renderer = partial( diff --git a/modules/loaders.py b/modules/loaders.py index f515aeca..7546bc5b 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -137,6 +137,7 @@ def transformers_samplers(): 'ban_eos_token', 'add_bos_token', 'enable_thinking', + 'reasoning_effort', 'skip_special_tokens', 'static_cache', 'seed', @@ -189,6 +190,7 @@ loaders_samplers = { 'ban_eos_token', 'add_bos_token', 'enable_thinking', + 'reasoning_effort', 'skip_special_tokens', 'seed', 'sampler_priority', @@ -236,6 +238,7 @@ loaders_samplers = { 'ban_eos_token', 'add_bos_token', 'enable_thinking', + 'reasoning_effort', 'skip_special_tokens', 'seed', 'sampler_priority', @@ -275,6 +278,7 @@ loaders_samplers = { 'ban_eos_token', 'add_bos_token', 'enable_thinking', + 'reasoning_effort', 'skip_special_tokens', 'seed', 'custom_token_bans', @@ -308,6 +312,7 @@ loaders_samplers = { 'ban_eos_token', 'add_bos_token', 'enable_thinking', + 'reasoning_effort', 'seed', 'sampler_priority', 'dry_sequence_breakers', diff --git a/modules/shared.py b/modules/shared.py index 5e3e11c0..ab5198d1 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -211,6 +211,7 @@ settings = { 'ban_eos_token': False, 'add_bos_token': True, 'enable_thinking': True, + 'reasoning_effort': 'medium', 'skip_special_tokens': True, 'stream': True, 'static_cache': False, diff --git a/modules/ui.py b/modules/ui.py index 98acc038..e7805046 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -215,6 +215,7 @@ def list_interface_input_elements(): 'ban_eos_token', 'add_bos_token', 'enable_thinking', + 'reasoning_effort', 'skip_special_tokens', 'stream', 'static_cache', @@ -482,6 +483,7 @@ def setup_auto_save(): 'ban_eos_token', 'add_bos_token', 'enable_thinking', + 'reasoning_effort', 'skip_special_tokens', 'stream', 'static_cache', diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 4dade176..1d85a398 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -78,7 +78,8 @@ def create_ui(): with gr.Row(): shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar']) - shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='Used by Qwen3 to toggle mode.') + shared.gradio['reasoning_effort'] = gr.Dropdown(value=shared.settings['reasoning_effort'], choices=['low', 'medium', 'high'], label='Reasoning effort', info='Used by GPT-OSS.') + shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='Used by pre-2507 Qwen3.') shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search') with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']: shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)