New attempt at solving the exl2 issue

This commit is contained in:
oobabooga 2025-04-17 22:03:11 -07:00
parent c9b3c9dfbf
commit 5dfab7d363

View file

@ -263,6 +263,9 @@ def apply_stopping_strings(reply, all_stop_strings):
def get_reply_from_output_ids(output_ids, state=None, starting_from=0):
if torch.cuda.is_available():
torch.cuda.synchronize()
reply = decode(output_ids[starting_from:], state['skip_special_tokens'] if state else True)
# Handle tokenizers that do not add the leading space for the first token