mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-06 15:13:38 +00:00
API: Rewrite logprobs for OpenAI spec compliance across all backends
- Rewrite logprobs output format to match the OpenAI specification for both chat completions and completions endpoints - Fix top_logprobs count being ignored for llama.cpp and ExLlamav3 backends in chat completions (always returned 1 instead of requested N) - Fix non-streaming responses only returning logprobs for the last token instead of all generated tokens (affects all HF-based loaders) - Fix logprobs returning null for non-streaming chat requests on HF loaders - Fix off-by-one returning one extra top alternative on HF loaders
This commit is contained in:
parent
5a017aa338
commit
fb1b3b6ddf
3 changed files with 149 additions and 43 deletions
|
|
@ -78,10 +78,13 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
|
|||
reply = ''
|
||||
is_stream = state['stream']
|
||||
if len(all_stop_strings) > 0 and not state['stream']:
|
||||
original_logits_processor = state.get('logits_processor')
|
||||
stop_event_ref = state.pop('stop_event', None)
|
||||
state = copy.deepcopy(state)
|
||||
if stop_event_ref is not None:
|
||||
state['stop_event'] = stop_event_ref
|
||||
if original_logits_processor is not None:
|
||||
state['logits_processor'] = original_logits_processor
|
||||
state['stream'] = True
|
||||
|
||||
# Generate
|
||||
|
|
|
|||
|
|
@ -65,14 +65,16 @@ class LogprobProcessor(LogitsProcessor):
|
|||
def __init__(self, logprobs=None):
|
||||
self.logprobs = logprobs
|
||||
self.token_alternatives = {}
|
||||
self.token_alternatives_history = []
|
||||
|
||||
def __call__(self, input_ids: torch.LongTensor, logits: torch.FloatTensor) -> torch.FloatTensor:
|
||||
if self.logprobs is not None: # 0-5
|
||||
log_e_probabilities = F.log_softmax(logits, dim=1)
|
||||
top_values, top_indices = torch.topk(log_e_probabilities, k=self.logprobs + 1)
|
||||
top_values, top_indices = torch.topk(log_e_probabilities, k=self.logprobs)
|
||||
top_tokens = [get_reply_from_output_ids([tok]) for tok in top_indices[0]]
|
||||
top_probs = [float(x) for x in top_values[0]]
|
||||
self.token_alternatives = dict(zip(top_tokens, top_probs))
|
||||
self.token_alternatives_history.append(self.token_alternatives)
|
||||
|
||||
return logits
|
||||
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue