From abc920752f3ec321c5283b3d10fd65993bc1dee5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 25 Jan 2023 22:27:04 -0300 Subject: [PATCH] Stop at eos_token while streaming text (for #26) --- server.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/server.py b/server.py index c42a7869..8a080206 100644 --- a/server.py +++ b/server.py @@ -177,7 +177,7 @@ def generate_reply(question, tokens, inference_settings, selected_model, eos_tok loaded_preset = inference_settings cuda = "" if args.cpu else ".cuda()" - n = None if eos_token is None else tokenizer.encode(eos_token, return_tensors='pt')[0][-1] + n = tokenizer.eos_token_id if eos_token is None else tokenizer.encode(eos_token, return_tensors='pt')[0][-1] input_ids = encode(question, tokens) # The stopping_criteria code below was copied from # https://github.com/PygmalionAI/gradio-ui/blob/master/src/model.py @@ -208,10 +208,10 @@ def generate_reply(question, tokens, inference_settings, selected_model, eos_tok for i in tqdm(range(tokens//8+1)): output = eval(f"model.generate(input_ids, eos_token_id={n}, stopping_criteria=stopping_criteria_list, {preset}){cuda}") reply = decode(output[0]) - if eos_token is not None and reply[-1] == eos_token: - break yield formatted_outputs(reply, model_name) input_ids = output + if output[0][-1] == n: + break def get_available_models(): return sorted(set([item.replace('.pt', '') for item in map(lambda x : str(x.name), list(Path('models/').glob('*'))+list(Path('torch-dumps/').glob('*'))) if not item.endswith('.txt')]), key=str.lower)