mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-04 14:17:28 +00:00
Optimize the end of generation with llama.cpp
This commit is contained in:
parent
db7d717df7
commit
609c3ac893
2 changed files with 10 additions and 2 deletions
|
|
@ -498,8 +498,14 @@ def generate_reply_custom(question, original_question, state, stopping_strings=N
|
|||
traceback.print_exc()
|
||||
finally:
|
||||
t1 = time.time()
|
||||
original_tokens = len(encode(original_question)[0])
|
||||
new_tokens = len(encode(original_question + reply)[0]) - original_tokens
|
||||
|
||||
if hasattr(shared.model, 'last_prompt_token_count'):
|
||||
original_tokens = shared.model.last_prompt_token_count
|
||||
new_tokens = len(encode(reply)[0]) if reply else 0
|
||||
else:
|
||||
original_tokens = len(encode(original_question)[0])
|
||||
new_tokens = len(encode(original_question + reply)[0]) - original_tokens
|
||||
|
||||
logger.info(f'Output generated in {(t1-t0):.2f} seconds ({new_tokens/(t1-t0):.2f} tokens/s, {new_tokens} tokens, context {original_tokens}, seed {state["seed"]})')
|
||||
return
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue