From 9a2353f97bf18e597358747ca4853a5128bc97b1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 13 Jun 2025 05:44:02 -0700 Subject: [PATCH] Better log message when the user input gets truncated --- modules/chat.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/modules/chat.py b/modules/chat.py index f080ef65..d9ac652c 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -353,7 +353,17 @@ def generate_chat_prompt(user_input, state, **kwargs): logger.error(f"Failed to build the chat prompt. The input is too long for the available context length.\n\nTruncation length: {state['truncation_length']}\nmax_new_tokens: {state['max_new_tokens']} (is it too high?)\nAvailable context length: {max_length}\n") raise ValueError else: - logger.warning(f"The input has been truncated. Context length: {state['truncation_length']}, max_new_tokens: {state['max_new_tokens']}, available context length: {max_length}.") + # Calculate token counts for the log message + original_user_tokens = get_encoded_length(user_message) + truncated_user_tokens = get_encoded_length(user_message[:left]) + total_context = max_length + state['max_new_tokens'] + + logger.warning( + f"User message truncated from {original_user_tokens} to {truncated_user_tokens} tokens. " + f"Context full: {max_length} input tokens ({total_context} total, {state['max_new_tokens']} for output). " + f"Increase ctx-size while loading the model to avoid truncation." + ) + break prompt = make_prompt(messages)