mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-03-09 23:23:49 +01:00
Several small fixes
- Stop llama-server subprocess on model unload instead of relying on GC - Fix tool_calls[].index being string instead of int in API responses - Omit tool_calls key from API response when empty per OpenAI spec - Prevent division by zero when micro_batch_size > batch_size in training - Copy sampler_priority list before mutating in ExLlamaV3 - Normalize presence/frequency_penalty names for ExLlamaV3 sampler sorting - Restore original chat_template after training instead of leaving it mutated
This commit is contained in:
parent
044566d42d
commit
d03923924a
|
|
@ -343,7 +343,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
|
|||
if len(tool_call) > 0:
|
||||
for tc in tool_call:
|
||||
tc["id"] = getToolCallId()
|
||||
tc["index"] = str(len(tool_calls))
|
||||
tc["index"] = len(tool_calls)
|
||||
tc["function"]["arguments"] = json.dumps(tc["function"]["arguments"])
|
||||
tool_calls.append(tc)
|
||||
end_last_tool_call = len(answer)
|
||||
|
|
@ -391,7 +391,7 @@ def chat_completions_common(body: dict, is_legacy: bool = False, stream=False, p
|
|||
resp_list: [{
|
||||
"index": 0,
|
||||
"finish_reason": stop_reason,
|
||||
"message": {"role": "assistant", "content": answer, "tool_calls": tool_calls},
|
||||
"message": {"role": "assistant", "content": answer, **({"tool_calls": tool_calls} if tool_calls else {})},
|
||||
}],
|
||||
"usage": {
|
||||
"prompt_tokens": token_count,
|
||||
|
|
|
|||
|
|
@ -339,11 +339,16 @@ class Exllamav3Model:
|
|||
|
||||
# 3. Get the priority list and handle temperature_last
|
||||
default_priority = ['repetition_penalty', 'presence_frequency_penalty', 'top_k', 'top_p', 'min_p', 'temperature']
|
||||
sampler_priority = state.get('sampler_priority') or default_priority
|
||||
sampler_priority = list(state.get('sampler_priority') or default_priority)
|
||||
|
||||
if state['temperature_last'] and 'temperature' in sampler_priority:
|
||||
sampler_priority.append(sampler_priority.pop(sampler_priority.index('temperature')))
|
||||
|
||||
# The preset system uses separate 'presence_penalty' and
|
||||
# 'frequency_penalty', but ExLlamaV3 has a single combined
|
||||
# SS_PresFreqP sampler. Normalize to the combined name.
|
||||
sampler_priority = ['presence_frequency_penalty' if x in ('presence_penalty', 'frequency_penalty') else x for x in sampler_priority]
|
||||
|
||||
# 4. Sort the unordered list based on the priority list
|
||||
def custom_sort_key(sampler_obj):
|
||||
class_name = sampler_obj.__class__.__name__
|
||||
|
|
|
|||
|
|
@ -126,6 +126,8 @@ def unload_model(keep_model_name=False):
|
|||
|
||||
if model_class_name in ['Exllamav3Model', 'Exllamav3HF', 'TensorRTLLMModel']:
|
||||
shared.model.unload()
|
||||
elif model_class_name == 'LlamaServer':
|
||||
shared.model.stop()
|
||||
|
||||
shared.model = shared.tokenizer = None
|
||||
shared.lora_names = []
|
||||
|
|
|
|||
|
|
@ -333,7 +333,8 @@ def do_train(lora_name: str, always_override: bool, all_linear: bool, q_proj_en:
|
|||
yield "Cannot input zeroes."
|
||||
return
|
||||
|
||||
gradient_accumulation_steps = batch_size // micro_batch_size
|
||||
gradient_accumulation_steps = max(1, batch_size // micro_batch_size)
|
||||
original_chat_template = getattr(shared.tokenizer, 'chat_template', None)
|
||||
if shared.tokenizer.pad_token_id is None:
|
||||
shared.tokenizer.pad_token_id = shared.tokenizer.eos_token_id
|
||||
shared.tokenizer.padding_side = "right"
|
||||
|
|
@ -820,6 +821,10 @@ def do_train(lora_name: str, always_override: bool, all_linear: bool, q_proj_en:
|
|||
logger.info("Training complete, saving")
|
||||
lora_model.save_pretrained(lora_file_path)
|
||||
|
||||
# Restore the original chat_template if we changed it for training
|
||||
if shared.tokenizer is not None and hasattr(shared.tokenizer, 'chat_template'):
|
||||
shared.tokenizer.chat_template = original_chat_template
|
||||
|
||||
if WANT_INTERRUPT:
|
||||
logger.info("Training interrupted.")
|
||||
yield f"Interrupted. Incomplete LoRA saved to `{lora_file_path}`."
|
||||
|
|
|
|||
Loading…
Reference in a new issue