From b46ca01340dfc096e88147e4f9dd3f971708d635 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 30 Apr 2025 14:53:15 -0700 Subject: [PATCH] UI: Set max_updates_second to 12 by default When the tokens/second at at ~50 and the model is a thinking model, the markdown rendering for the streaming message becomes a CPU bottleneck. --- modules/shared.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/shared.py b/modules/shared.py index 4c1179e3..fb10c014 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -47,7 +47,7 @@ settings = { 'max_new_tokens_max': 4096, 'prompt_lookup_num_tokens': 0, 'max_tokens_second': 0, - 'max_updates_second': 0, + 'max_updates_second': 12, 'auto_max_new_tokens': True, 'ban_eos_token': False, 'add_bos_token': True,