Set context lengths to at most 8192 by default (to prevent out of memory errors) (#6835)

This commit is contained in:
oobabooga 2025-04-07 21:42:33 -03:00 committed by GitHub
parent f1f32386b4
commit a5855c345c
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
5 changed files with 15 additions and 12 deletions

View file

@ -200,8 +200,10 @@ def create_event_handlers():
def load_model_wrapper(selected_model, loader, autoload=False):
settings = get_model_metadata(selected_model)
if not autoload:
yield f"The settings for `{selected_model}` have been updated.\n\nClick on \"Load\" to load it."
yield "### {}\n\n- Settings updated: Click \"Load\" to load the model\n- Max sequence length: {}".format(selected_model, settings['truncation_length_info'])
return
if selected_model == 'None':
@ -214,11 +216,9 @@ def load_model_wrapper(selected_model, loader, autoload=False):
shared.model, shared.tokenizer = load_model(selected_model, loader)
if shared.model is not None:
output = f"Successfully loaded `{selected_model}`."
settings = get_model_metadata(selected_model)
output = f"Successfully loaded `{selected_model}`.\n\n"
if 'instruction_template' in settings:
output += '\n\nIt seems to be an instruction-following model with template "{}". In the chat tab, instruct or chat-instruct modes should be used.'.format(settings['instruction_template'])
output += '- It seems to be an instruction-following model with template "{}". In the chat tab, instruct or chat-instruct modes should be used.\n'.format(settings['instruction_template'])
yield output
else: