Safer usage of mkdir across the project

This commit is contained in:
oobabooga 2025-06-17 07:09:33 -07:00
parent 8689d7ecea
commit 0d1597616f
16 changed files with 240 additions and 206 deletions

View file

@ -314,8 +314,8 @@ class ModelDownloader:
def download_model_files(self, model, branch, links, sha256, output_folder, progress_queue=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
self.progress_queue = progress_queue
output_folder.mkdir(parents=True, exist_ok=True)
if not output_folder.exists():
output_folder.mkdir(parents=True, exist_ok=True)
if not is_llamacpp:
metadata = f'url: https://huggingface.co/{model}\n' \

View file

@ -20,7 +20,7 @@ custom_scheduler_params = {'trigger_loss': 0.0, 'ramp_down_ratio':1.0, 'current_
def custom_scheduler_global_update(current_loss: float):
custom_scheduler_params.update({'current_loss': current_loss})
def custom_scheduler_global_setup(trigger_loss: float, ramp_down_ratio: float):
custom_scheduler_params.update({'trigger_loss': trigger_loss})
custom_scheduler_params.update({'ramp_down_ratio': ramp_down_ratio})
@ -35,12 +35,12 @@ def custom_scheduler_global_setup(trigger_loss: float, ramp_down_ratio: float):
# hold constant to the half of epochs then cosine down to 0
def _get_fp_half_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int):
global last_print_label
print_label = ''
half_steps = num_training_steps//2
num_warmup_steps = min(num_warmup_steps,half_steps)
if current_step < num_warmup_steps:
@ -49,57 +49,57 @@ def _get_fp_half_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup
print_label = 'Scheduler: Hold'
else:
print_label = 'Scheduler: Annealing'
if print_label != last_print_label:
print(print_label)
last_print_label = print_label
if current_step < num_warmup_steps:
return float(current_step) / float(max(1, num_warmup_steps))
if current_step < half_steps:
return 1.0
return 1.0
progress = float(current_step - half_steps) / float(max(1, num_training_steps - half_steps))
num_cycles = 0.5
return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
# raise up in cosine, then fall back in cosine
def _get_fp_cosine_raise_and_fall_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int):
global last_print_label
print_label = ''
half_steps = num_training_steps//2
#num_warmup_steps = min(num_warmup_steps,half_steps)
if current_step < half_steps:
print_label = 'Scheduler: Raise'
else:
print_label = 'Scheduler: Fall'
if print_label != last_print_label:
print(print_label)
last_print_label = print_label
# linear
# return float(current_step) / float(max(1, num_warmup_steps))
progress = float(current_step - half_steps) / float(max(1, num_training_steps - half_steps))
num_cycles = 0.5
return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
# constant to the first epochs then cosine down to 0 over the rest epochs
def _get_fp_cosine_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int):
global last_print_label
print_label = ''
num_warmup_steps = min(num_warmup_steps,num_firstepoch_steps)
if current_step < num_warmup_steps:
@ -108,56 +108,56 @@ def _get_fp_cosine_schedule_with_warmup_lr_lambda(current_step: int, *, num_warm
print_label = 'Scheduler: Hold'
else:
print_label = 'Scheduler: Annealing'
if print_label != last_print_label:
print(print_label)
last_print_label = print_label
if current_step < num_warmup_steps:
return float(current_step) / float(max(1, num_warmup_steps))
if current_step < num_firstepoch_steps:
return 1.0
return 1.0
progress = float(current_step - num_firstepoch_steps) / float(max(1, num_training_steps - num_firstepoch_steps))
num_cycles = 0.5
return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
# halve lr each epoch
return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress)))
# halve lr each epoch
def _get_fp_cdrop_rate_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int):
global last_print_label
print_label = ''
num_warmup_steps = min(num_warmup_steps, num_firstepoch_steps)
current_epoch = (current_step // num_firstepoch_steps) + 1
if current_step < num_warmup_steps:
print_label = 'Scheduler: Warmup'
elif current_step < num_firstepoch_steps:
print_label = 'Scheduler: Hold'
else:
print_label = 'Scheduler: Drop Rate'
if print_label != last_print_label:
print(print_label)
last_print_label = print_label
if current_step < num_warmup_steps:
return float(current_step) / float(max(1, num_warmup_steps))
if current_step < num_firstepoch_steps:
return 1.0
return 1.0
# Compute the learning rate for the annealing phase
learning_rate = 1.0 / float(2 ** (current_epoch - 1))
return learning_rate
# epoch decay: 1/(1 + decay * epoch)
@ -177,7 +177,7 @@ def custom_cosine_scheduler_with_warmup(optimizer, num_warmup_steps, num_trainin
Return:
`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
"""
lr_lambda = partial(
_get_fp_cosine_schedule_with_warmup_lr_lambda,
num_warmup_steps=num_warmup_steps,
@ -201,7 +201,7 @@ def custom_half_scheduler_with_warmup(optimizer, num_warmup_steps, num_training_
Return:
`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
"""
lr_lambda = partial(
_get_fp_half_schedule_with_warmup_lr_lambda,
num_warmup_steps=num_warmup_steps,
@ -225,7 +225,7 @@ def custom_raise_fall_scheduler_with_warmup(optimizer, num_warmup_steps, num_tra
Return:
`torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule.
"""
lr_lambda = partial(
_get_fp_cosine_raise_and_fall_lr_lambda,
num_warmup_steps=num_warmup_steps,
@ -257,7 +257,7 @@ def neftune_forward(self, input: torch.Tensor):
mag_norm = self.neftune_noise_alpha / torch.sqrt(dims)
embeddings = embeddings + torch.zeros_like(embeddings).uniform_(-mag_norm, mag_norm)
return embeddings
return embeddings
class FPNEFtuneTrainer(transformers.Trainer):
@ -267,7 +267,7 @@ class FPNEFtuneTrainer(transformers.Trainer):
model = self._activate_neftune(model)
super().__init__(model = model, *args, **kwargs)
def _activate_neftune(self, model):
r"""
Activates the neftune as presented in this code: https://github.com/neelsjain/NEFTune and paper: https://arxiv.org/abs/2310.05914
@ -290,7 +290,7 @@ class FPNEFtuneTrainer(transformers.Trainer):
embeddings._trl_old_forward = old_forward
return model
def train(self, *args, **kwargs):
output = super().train(*args, **kwargs)
@ -318,7 +318,7 @@ class FPSchedulerTrainer(transformers.Trainer):
model = self._activate_neftune(model)
super().__init__(model = model, *args, **kwargs)
def _activate_neftune(self, model):
r"""
Activates the neftune as presented in this code: https://github.com/neelsjain/NEFTune and paper: https://arxiv.org/abs/2310.05914
@ -341,7 +341,7 @@ class FPSchedulerTrainer(transformers.Trainer):
embeddings._trl_old_forward = old_forward
return model
def train(self, *args, **kwargs):
output = super().train(*args, **kwargs)
@ -364,19 +364,19 @@ class FPSchedulerTrainer(transformers.Trainer):
def create_scheduler(self, num_training_steps: int, optimizer: torch.optim.Optimizer = None):
#Setup the scheduler. The optimizer of the trainer must have been set up either before this method is called or passed as an argument.
num_train_epochs = self.args.num_train_epochs
num_warmup_steps=self.args.get_warmup_steps(num_training_steps)
num_firstepoch_steps = math.ceil(num_training_steps/num_train_epochs)
num_warmup_acc = num_warmup_steps*self.args.gradient_accumulation_steps
num_warmup_acc = num_warmup_steps*self.args.gradient_accumulation_steps
num_firstepoch_steps_acc = num_firstepoch_steps*self.args.gradient_accumulation_steps
num_training_steps_acc = num_training_steps*self.args.gradient_accumulation_steps
custom_scheduler_params.update({'dynamic_scheduler_stop': False})
print (f"Warm-up steps aligned to Gradient accumulation ({self.args.gradient_accumulation_steps}) = {num_warmup_acc} actual warmup steps")
if self.args.lr_scheduler_type == 'cosine':
num_warmup_acc_min = min(num_warmup_acc, num_firstepoch_steps_acc)
if num_warmup_acc>num_firstepoch_steps_acc:
@ -388,13 +388,13 @@ class FPSchedulerTrainer(transformers.Trainer):
self.lr_scheduler = custom_cosine_scheduler_with_warmup(
optimizer=self.optimizer if optimizer is None else optimizer,
num_warmup_steps=num_warmup_steps,
num_training_steps=num_training_steps,
num_training_steps=num_training_steps,
num_firstepoch_steps = num_firstepoch_steps,
)
self._created_lr_scheduler = True
return self.lr_scheduler
elif self.args.lr_scheduler_type == 'constant':
half_step_acc = num_training_steps_acc//2
num_warmup_acc_min = min(num_warmup_acc, half_step_acc)
@ -407,15 +407,15 @@ class FPSchedulerTrainer(transformers.Trainer):
self.lr_scheduler = custom_half_scheduler_with_warmup(
optimizer=self.optimizer if optimizer is None else optimizer,
num_warmup_steps=num_warmup_steps,
num_training_steps=num_training_steps,
num_training_steps=num_training_steps,
num_firstepoch_steps = num_firstepoch_steps,
)
self._created_lr_scheduler = True
return self.lr_scheduler
elif self.args.lr_scheduler_type == 'constant_with_warmup':
half_step_acc = num_training_steps_acc//2
if num_warmup_steps>0:
print(f"Warmup doesn't apply to this scheduler [Raise-Fall]")
@ -424,10 +424,10 @@ class FPSchedulerTrainer(transformers.Trainer):
self.lr_scheduler = custom_raise_fall_scheduler_with_warmup(
optimizer=self.optimizer if optimizer is None else optimizer,
num_warmup_steps=num_warmup_steps,
num_training_steps=num_training_steps,
num_training_steps=num_training_steps,
num_firstepoch_steps = num_firstepoch_steps,
)
self._created_lr_scheduler = True
return self.lr_scheduler
return self.lr_scheduler
else:
return super().create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer)
return super().create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer)

View file

@ -5,7 +5,7 @@ def create_graph(lora_path, lora_name):
try:
import matplotlib.pyplot as plt
from matplotlib.ticker import ScalarFormatter
peft_model_path = f'{lora_path}/training_graph.json'
image_model_path = f'{lora_path}/training_graph.png'
# Check if the JSON file exists
@ -20,7 +20,7 @@ def create_graph(lora_path, lora_name):
# Create the line chart
fig, ax1 = plt.subplots(figsize=(10, 6))
# Plot y1 (learning rate) on the first y-axis
ax1.plot(x, y1, 'b-', label='Learning Rate')
@ -57,6 +57,6 @@ def create_graph(lora_path, lora_name):
print(f"Graph saved in {image_model_path}")
else:
print(f"File 'training_graph.json' does not exist in the {lora_path}")
except ImportError:
print("matplotlib is not installed. Please install matplotlib to create PNG graphs")

View file

@ -109,12 +109,12 @@ def ui():
copy_from = gr.Dropdown(label='Copy parameters from', value='None', choices=get_available_loras_local(non_serialized_params['Lora_sortedByTime']), elem_classes=['slim-dropdown'])
create_refresh_button(copy_from, lambda: None, lambda: {'choices': get_available_loras_local(non_serialized_params['Lora_sortedByTime'])}, 'refresh-button')
with gr.Column():
sort_byTime = gr.Checkbox(label='Sort list by Date', value=False, info='Sorts Loras by date created.', elem_classes=['no-background'])
sort_byTime = gr.Checkbox(label='Sort list by Date', value=False, info='Sorts Loras by date created.', elem_classes=['no-background'])
with gr.Row():
with gr.Column(scale=5):
lora_name = gr.Textbox(label='Name', info='The name of your new LoRA file')
with gr.Column():
always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
@ -132,14 +132,14 @@ def ui():
epochs = gr.Number(label='Epochs', value=3, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.')
learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='In scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.')
lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='linear', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt', 'FP_low_epoch_annealing', 'FP_half_time_annealing','FP_raise_fall_creative'], info='Learning rate scheduler - defines how the learning rate changes over time. Custom schedulers: FP_low_epoch_annealing, FP_half_time_annealing, FP_raise_fall_creative (see README)', elem_classes=['slim-dropdown'])
with gr.Accordion(label='Checkpoints', open=True):
with gr.Row():
with gr.Column():
save_steps = gr.Number(label='Save every n steps', value=0, info='A checkpoint will be saved every n steps and at each Epoch boundary. (0 = OFF)')
with gr.Column():
save_steps_under_loss = gr.Slider(label='Save at 10% Loss change', value=1.8, minimum=0.0, maximum=3.0, step=0.1, info="Saves checkpoints at (or bellow) this loss and then each time loss falls by at least 10% This works independently from 'Save every n steps'")
with gr.Row():
with gr.Column():
save_steps_under_loss = gr.Slider(label='Save at 10% Loss change', value=1.8, minimum=0.0, maximum=3.0, step=0.1, info="Saves checkpoints at (or bellow) this loss and then each time loss falls by at least 10% This works independently from 'Save every n steps'")
with gr.Row():
save_chackpoint_now = gr.Button('Queue Checkpoint Now')
with gr.Accordion(label='Advanced Options', open=True):
@ -148,7 +148,7 @@ def ui():
warmup_steps = gr.Number(label='Warmup Steps', value=100, info='Number of max steps used for a linear warmup. Reduces early over-fitting by the first training blocks. Value has precedent over Warmup Ratio. Aligns to the closest multiple of graddient accumulation')
warmup_ratio = gr.Slider(label='Warmup Ratio', minimum=0.0, maximum=0.2, step=0.025, value=0.0, info='Ratio of total training steps that will be used for a linear warmup. It applies only if Warmup Step is 0.')
neft_noise_alpha = gr.Slider(label='NEFtune noise scale', minimum=0.0, maximum=15, step=1, value=0.0, info='Add noise to the training to improve generalization. [0 - OFF, Starting value to experiment: 5]')
training_projection = gr.Radio(value = train_choices[4], label='LLaMA Target Projections', info='Change the targets (LORA is typically q-v)', choices=train_choices)
training_projection = gr.Radio(value = train_choices[4], label='LLaMA Target Projections', info='Change the targets (LORA is typically q-v)', choices=train_choices)
lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers. This can help reduce overfitting. Most users should leave at default.')
optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Different optimizer implementation options, for advanced users. Effects of different options are not well documented yet.', elem_classes=['slim-dropdown'])
@ -157,10 +157,10 @@ def ui():
add_bos_token = gr.Checkbox(label='Add BOS token', value=True, info="Adds BOS token for each dataset item")
add_eos_token = gr.Checkbox(label='Add EOS token', value=False, info="Adds EOS token for each dataset item")
add_eos_token_type = gr.Dropdown(label='EOS placement (Text file)', choices=['Every Block', 'Hard Cut Blocks Only'], value='Every Block', info='', allow_custom_value = False)
higher_rank_limit = gr.Checkbox(label='Enable higher ranks', value=False, info='If checked, changes Rank/Alpha slider above to go much higher. This will not work without a datacenter-class GPU.')
report_to = gr.Radio(label="Save detailed logs with", value="None", choices=["None", "wandb", "tensorboard"], interactive=True)
# for future
# for future
#with gr.Accordion(label='Dynamic Scheduler', open = False):
# ds_min_epochs = gr.Number(label='Minimum Epochs', value='1', info='Minimum epochs that will be always performed before ramp down can be triggered')
# ds_max_epochs = gr.Number(label='Maximum Epochs (fallback)', value='50', info='Maximum Epochs before the training will bail out completely (should be a large number)')
@ -168,7 +168,7 @@ def ui():
# ds_loss_rolling_window = gr.Number(label='Loss rolling average', value='4', info='Calculate loss by averaging last x numbers to avoid jumps and noise')
# ds_epochs_to_ramp = gr.Slider(label='Ramp down ratio', minimum=0.0, maximum=2.0, step=0.1, value=1.00, info='How long the ramp down will last relative to ellapsed steps (before trigger)')
# gr.Markdown('These are settings for FP_dynamic_loss_trigger scheduler. The scheduler will do warm up, then hold constant untill a loss falls under Trigger Loss, then it will commence linear ramp down schedule and stop. The length of ramp down is set by Ramp down ratio where (ramp down steps) = ratio * (elapsed steps). (The time to completition shown will be very high untill ramp down is triggered.)')
with gr.Column():
with gr.Tab(label='Formatted Dataset'):
@ -217,7 +217,7 @@ def ui():
cutoff_len = gr.Slider(label='Chunk Length (Cutoff Length)', minimum=32, maximum=2048, value=256, step=32, info='The maximum length of a chunk (in tokens). Applies to both JSON dataset and text files. Higher values require much more VRAM.')
with gr.Row():
with gr.Column():
check_dataset_btn = gr.Button('Verify Dataset/Text File and suggest data entries')
check_dataset_btn = gr.Button('Verify Dataset/Text File and suggest data entries')
check_dataset_txt = gr.Textbox(label='Dataset info', value='')
with gr.Row():
@ -227,8 +227,8 @@ def ui():
with gr.Accordion(label="Graph", open=True):
with gr.Row():
# show_actions_button = False - we use old gradio
plot_graph = gr.LinePlot(x="epoch", y="value", title="Loss Metrics", overlay_point=True, tooltip=["epoch", "value"], x_lim=[0, 1], y_lim=[0, 3.5], width=500, height=250)
plot_graph = gr.LinePlot(x="epoch", y="value", title="Loss Metrics", overlay_point=True, tooltip=["epoch", "value"], x_lim=[0, 1], y_lim=[0, 3.5], width=500, height=250)
output = gr.Markdown(value="Ready")
with gr.Tab('Perplexity evaluation', elem_id='evaluate-tab'):
@ -267,7 +267,7 @@ def ui():
return grad_accumulation_val
copy_from.change(partial(do_copy_params, all_params= all_params), copy_from, all_params).then(fix_old_version,[batch_size,micro_batch_size, grad_accumulation],grad_accumulation)
start_button.click(do_train, all_params, [output,plot_graph])
stop_button.click(do_interrupt, None, None, queue=False)
@ -306,8 +306,8 @@ def ui():
if shared.tokenizer is None:
yield "Tokenizer is not available. Please Load some Model first."
return
if raw_text_file not in ['None', '']:
logger.info("Loading Text file...")
fullpath = clean_path('user_data/training/datasets', f'{raw_text_file}')
@ -329,8 +329,8 @@ def ui():
except:
yield f"{raw_text_file}.txt doesn't seem to exsist anymore... check your user_data/training/datasets folder"
return
if min_chars<0:
min_chars = 0
@ -343,11 +343,11 @@ def ui():
total_blocks = len(text_chunks)
result = f"Text: ({raw_text_file}.txt) has {total_blocks} blocks (Block Size {cutoff_len} tokens)"
del text_chunks
else:
if dataset in ['None', '']:
yield "Select dataset or text file."
return
return
if format in ['None', '']:
yield "Select format choice for dataset."
@ -382,8 +382,8 @@ def ui():
logger.info("Loading JSON datasets...")
data = load_dataset("json", data_files=clean_path('user_data/training/datasets', f'{dataset}.json'))
data_keys = []
data_keys = []
if data:
if 'train' in data: # Check if the 'train' split exists in the dataset
@ -400,11 +400,11 @@ def ui():
#for options, data in format_data.items():
# format_keys = options.split(',')
# result += f"{format_keys}, "
#result = result.rstrip()
#result = result.rstrip(',')
#result = result.rstrip()
#result = result.rstrip(',')
if total_blocks>0:
number_ofSteps = int(math.ceil(total_blocks / micro_batch_size) * epochs)
number_ofSteps = int(math.ceil(total_blocks / micro_batch_size) * epochs)
num_stepsPer_epoch = int(math.ceil(number_ofSteps/epochs))
min_warm = math.ceil(100 / grad_accumulation)
@ -415,20 +415,20 @@ def ui():
save_each_n_max = int(math.ceil(number_ofSteps/5))
gradient_accumulation_max = int(total_blocks)//micro_batch_size
result += f"\n[Batch Size: {micro_batch_size}, Epochs: {epochs}, Gradient Accumulation: {grad_accumulation}]\n"
result += f"Total number of steps: {number_ofSteps}\n"
result += f"Steps per each Epoch: {num_stepsPer_epoch}\n"
result += f"Suggestions:\n"
result += f"Checkpoints: Save every {save_each_n_min} - {save_each_n_max} steps (Current: {int(save_steps)})\n"
result += f"Warmup steps: {warmup_steps_suggest} (Current: {int(warmup_steps)})"
if gradient_accumulation_max < grad_accumulation:
if gradient_accumulation_max < grad_accumulation:
result += f"\n\nWARNING: Gradient Accumulation {grad_accumulation} is too high: It should be below {gradient_accumulation_max}"
yield result
return
check_dataset_btn.click(check_dataset, dataset_calc_params ,check_dataset_txt)
# Evaluation events. For some reason, the interrupt event
@ -449,10 +449,10 @@ def ui():
def reload_lora():
return gr.Dropdown.update(choices=get_available_loras_local(non_serialized_params['Lora_sortedByTime']))
# nonserialized items
sort_byTime.change(lambda x: non_serialized_params.update({"Lora_sortedByTime": x}), sort_byTime, None).then(reload_lora,None,copy_from)
sort_byTime.change(lambda x: non_serialized_params.update({"Lora_sortedByTime": x}), sort_byTime, None).then(reload_lora,None,copy_from)
#debug_slicer.change(lambda x: non_serialized_params.update({"debug_slicer": x}), debug_slicer, None)
def update_dataset():
@ -482,7 +482,7 @@ def do_copy_params(lora_name: str, all_params):
else:
params = {}
else:
params = {}
params = {}
result = list()
for i in range(0, len(PARAMETERS)):
@ -521,7 +521,8 @@ def backup_adapter(input_folder):
# Create the new subfolder
subfolder_path = Path(f"{input_folder}/{creation_date_str}")
subfolder_path.mkdir(parents=True, exist_ok=True)
if not subfolder_path.exists():
subfolder_path.mkdir(parents=True, exist_ok=True)
# Check if the file already exists in the subfolder
backup_adapter_file = Path(f"{input_folder}/{creation_date_str}/adapter_model.bin")
@ -607,7 +608,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
shared.tokenizer.padding_side = "left"
def encode(text, prepend_bos_token):
result = shared.tokenizer.encode(text, truncation=True, max_length=cutoff_len)
# Check if the first two tokens are BOS
if len(result) >= 2 and result[:2] == [shared.tokenizer.bos_token_id, shared.tokenizer.bos_token_id]:
@ -626,7 +627,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
input_ids.append(shared.tokenizer.eos_token_id)
input_ids = [shared.tokenizer.pad_token_id] * (cutoff_len - len(input_ids)) + input_ids
labels = [1] * len(input_ids)
else:
ind = prompt.index(train_only_after) + len(train_only_after)
@ -653,7 +654,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
}
train_template.clear()
#reset stuff
print(f"*** LoRA: {lora_name} ***")
non_serialized_params.update({"stop_at_loss": stop_at_loss})
@ -665,7 +666,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
non_serialized_params.update({"checkpoint_offset": 0})
non_serialized_params.update({"epoch_offset": 0})
train_log_graph.clear()
# == Prep the dataset, format, etc ==
if raw_text_file not in ['None', '']:
train_template["template_type"] = "raw_text"
@ -685,8 +686,8 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
else:
with open(clean_path('user_data/training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
raw_text = file.read().replace('\r', '')
# FPHAM PRECISE SLICING
# FPHAM PRECISE SLICING
if min_chars<0:
min_chars = 0
@ -703,7 +704,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
train_data = Dataset.from_list([tokenize(x, add_EOS_to_all, add_bos_token) for x in text_chunks])
if add_EOS_to_all:
print(f"Added EOS to {len(text_chunks)} blocks")
print(f"Added EOS to {len(text_chunks)} blocks")
print(f"All Data Blocks: {len(text_chunks)}")
@ -745,7 +746,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
data = load_dataset("json", data_files=clean_path('user_data/training/datasets', f'{dataset}.json'))
train_data = data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
print(f"BOS: {add_bos_token} EOS: {add_eos_token}")
print(f"BOS: {add_bos_token} EOS: {add_eos_token}")
print(f"Data Blocks: {train_data.num_rows}")
if eval_dataset == 'None':
@ -783,7 +784,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
print(f"Method: {RED}QLORA{RESET}")
prepare_model_for_kbit_training(shared.model)
else:
print(f"Method: {RED}LoRA{RESET}")
print(f"Method: {RED}LoRA{RESET}")
# base model is now frozen and should not be reused for any other LoRA training than this one
shared.model_dirty_from_training = True
@ -796,7 +797,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
elif training_projection==train_choices[2]:
model_to_lora_modules[model_id] = ["q_proj","k_proj", "v_proj"]
elif training_projection==train_choices[3]:
model_to_lora_modules[model_id] = ["k_proj", "v_proj", "down_proj"]
model_to_lora_modules[model_id] = ["k_proj", "v_proj", "down_proj"]
else:
model_to_lora_modules[model_id] = ["q_proj", "v_proj"]
@ -827,9 +828,9 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
set_peft_model_state_dict(lora_model, state_dict_peft)
print(f" + Continue Training on {RED}{lora_file_path}/adapter_model.bin{RESET}")
#load training_log.json if exist
if Path(f"{lora_file_path}/training_log.json").is_file():
with open(f"{lora_file_path}/training_log.json", 'r') as json_file:
json_ilog = json.load(json_file)
@ -840,13 +841,13 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
if key=='epoch':
non_serialized_params.update({"epoch_offset": value})
print(f" + Epoch offset: {RED}{non_serialized_params['epoch_offset']}{RESET}")
if Path(f"{lora_file_path}/training_graph.json").is_file():
try:
with open(f"{lora_file_path}/training_graph.json", 'r') as json_file:
train_log_graph = json.load(json_file)
print(" + Training Graph loaded")
print(" + Training Graph loaded")
except:
print(f"Can't read training_graph")
@ -876,72 +877,72 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
else:
current_loss = float(train_log.get('loss', 0.0))
current_epoch_int = int(float(train_log.get('epoch', 0.0)))
force_save = False
current_steps_offset = tracked.current_steps + non_serialized_params['checkpoint_offset']
folder_save = f"checkpoint-{current_steps_offset}"
folder_save = f"checkpoint-{current_steps_offset}"
# save if triggered by user
if non_serialized_params['save_checkpoint_now']:
force_save = True
non_serialized_params.update({"save_checkpoint_now": False})
print(f"\033[1;31;1mSave Checkpoint manually trigerred.\033[0;37;0m")
folder_save = f"checkpoint-{current_steps_offset}-user"
folder_save = f"checkpoint-{current_steps_offset}-user"
patience = 3 # Set the number of consecutive steps for tracking stability
if gradient_accumulation_steps==1:
patience = 4
min_steps = ssteps10
# Save each time the loss is below the threshold
# Save each time the loss is below the threshold
if current_loss < non_serialized_params['save_steps_under_loss'] and current_loss > 0 and state.global_step > min_steps:
current_stability = non_serialized_params['current_stability']
current_stability += 1
non_serialized_params.update({"current_stability": current_stability})
non_serialized_params.update({"current_stability": current_stability})
if current_stability >= patience:
current_stability = 0
non_serialized_params.update({"current_stability": current_stability})
non_serialized_params.update({"current_stability": current_stability})
current_loss_dec = round(current_loss, 2)
loss_str = f"{current_loss_dec:.2f}"
loss_str = loss_str.replace('.', '_')
new_save = (current_loss_dec-0.1) + 0.01
non_serialized_params.update({"save_steps_under_loss": new_save})
folder_save = f"checkpoint-{current_steps_offset}-loss-{loss_str}"
force_save = True
folder_save = f"checkpoint-{current_steps_offset}-loss-{loss_str}"
force_save = True
else:
# Reset stability if the loss goes above the threshold
non_serialized_params.update({"current_stability": 0})
non_serialized_params.update({"current_stability": 0})
# Save full epochs
if actual_save_steps>0 and current_epoch_int > non_serialized_params['save_epochs'] and state.global_step > min_steps:
if actual_save_steps>0 and current_epoch_int > non_serialized_params['save_epochs'] and state.global_step > min_steps:
current_epoch_offset = current_epoch_int
if non_serialized_params['epoch_offset'] > 0:
current_epoch_offset = current_epoch_int + round(non_serialized_params['epoch_offset'], 2)
ep_off_str = f"{current_epoch_offset}"
ep_off_str = ep_off_str.replace('.', '_')
folder_save = f"checkpoint-{current_steps_offset}-epoch-{ep_off_str}"
folder_save = f"checkpoint-{current_steps_offset}-epoch-{ep_off_str}"
non_serialized_params.update({"save_epochs": current_epoch_int})
force_save = True
# save each actual_save_steps
if state.global_step > 0 and actual_save_steps > 0 and state.global_step % actual_save_steps == 0:
folder_save = f"checkpoint-{current_steps_offset}"
force_save = True
folder_save = f"checkpoint-{current_steps_offset}"
force_save = True
if force_save:
if force_save:
lora_model.save_pretrained(f"{lora_file_path}/{folder_save}/", safe_serialization = non_serialized_params['safe_serialization'])
print(f"\033[1;30;40mStep: {tracked.current_steps:6} \033[0;37;0m Saved: [{folder_save}]")
# Save log
@ -950,7 +951,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
# == Save training prompt ==
with open(f"{lora_file_path}/{folder_save}/training_prompt.json", 'w', encoding='utf-8') as file:
json.dump(train_template, file, indent=2)
def on_substep_end(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs):
tracked.current_steps += 1
@ -975,7 +976,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
print(f"\033[1;30;40mStep: {tracked.current_steps:6} [+{non_serialized_params['checkpoint_offset']}] \033[0;37;0m", end='')
else:
print(f"\033[1;30;40mStep: {tracked.current_steps:6} \033[0;37;0m", end='')
graphentry = {
'current_steps': int(train_log.get('current_steps_adjusted',0)),
'loss': float(train_log.get('loss', 0.0)),
@ -986,7 +987,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
cur_loss = float(train_log.get('loss', 0.0))
cur_lr = float(train_log.get('learning_rate', 0.0))
cur_epoch = float(train_log.get('epoch', 0.0))
if len(statistics['loss']) == 1:
first_epoch = statistics['loss'][0]['epoch']
first_value = statistics['loss'][0]['value']
@ -1013,7 +1014,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
# FPHAM SAMPLE REQ Transformers error handling
gradient_accumulation_max = int(train_data.num_rows)//micro_batch_size
if gradient_accumulation_max < gradient_accumulation_steps:
print(f"{RED}WARNING:{RESET} Current gradient accumulation is {RED}too high{RESET} for the amount of training data.")
print(f"Gradient accumulation: {gradient_accumulation_steps} should be less than: {gradient_accumulation_max}. {RED}This could crash Accelerate/Transformers{RESET}")
@ -1041,9 +1042,9 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
elif lr_scheduler_type =='FP_raise_fall_creative':
custom_scheduller = True
lr_scheduler_type_arg = 'constant_with_warmup'
#gradient_checkpointing=True
args=transformers.TrainingArguments(
report_to=report_to if report_to != "None" else None,
per_device_train_batch_size=micro_batch_size,
@ -1095,7 +1096,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False),
callbacks=list([Callbacks()])
)
# END OF FPHAM CUSTOM SCHEDULER
lora_model.config.use_cache = False
@ -1141,7 +1142,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
if stop_at_loss > 0:
print(f"Monitoring loss {RED}(Auto-Stop at: {stop_at_loss}){RESET}")
if WANT_INTERRUPT:
yield "Interrupted before start.", zero_pd
@ -1157,7 +1158,9 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
decoded_entries.append({"value": decoded_text})
# Write the log file
Path('user_data/logs').mkdir(exist_ok=True)
if not Path('user_data/logs').exists():
Path('user_data/logs').mkdir(exist_ok=True)
with open(Path('user_data/logs/train_dataset_sample.json'), 'w') as json_file:
json.dump(decoded_entries, json_file, indent=4)
@ -1191,7 +1194,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
else:
max_value = 3.5
last_epoch = 0
first_epoch = 0
first_epoch = 0
if WANT_INTERRUPT:
@ -1210,7 +1213,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
time_elapsed = time.perf_counter() - start_time
lastloss = float(train_log.get('loss', 0.0))
non_serialized_params.update({"training_loop": True})
non_serialized_params.update({"training_loop": True})
if lastloss > 0:
lastloss_str = f", ... Current Loss: `{lastloss:.2f}`"
@ -1232,7 +1235,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
if stop_at_loss != non_serialized_params['stop_at_loss']:
stop_at_loss = non_serialized_params['stop_at_loss']
print(f"Stop at loss changed {RED}(Auto-Stop at: {stop_at_loss}){RESET}")
losses = gr.LinePlot.update(
value = pd.DataFrame(statistics['loss']),
x="epoch", y="value",
@ -1240,7 +1243,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
overlay_point=True, tooltip=["epoch", "value"],
x_lim=[first_epoch,last_epoch], y_lim=[0,max_value],
width=500, height=250 )
yield f"Running... **{tracked.current_steps}** / **{tracked.max_steps}** ... {timer_info}, {format_time(time_elapsed)} / {format_time(total_time_estimate)} ... {format_time(total_time_estimate - time_elapsed)} remaining {lastloss_str}", losses
@ -1256,7 +1259,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch
else:
max_value = 3.5
last_epoch = 0
first_epoch = 0
first_epoch = 0
return_pd = gr.LinePlot.update(
value = pd.DataFrame(statistics['loss']),

View file

@ -20,7 +20,7 @@ def list_subfoldersByTime(directory):
if not directory.endswith('/'):
directory += '/'
subfolders = []
subfolders.append('None')
subfolders.append('None')
path = directory
name_list = os.listdir(path)
full_list = [os.path.join(path,i) for i in name_list]
@ -37,19 +37,19 @@ def list_subfoldersByTime(directory):
return subfolders
def get_available_loras_local(_sortedByTime):
model_dir = shared.args.lora_dir # Update with the appropriate directory path
subfolders = []
if _sortedByTime:
subfolders = list_subfoldersByTime(model_dir)
else:
subfolders = utils.get_available_loras()
subfolders = utils.get_available_loras()
return subfolders
# FPHAM SPLIT BY SENTENCE BLOCK ===============
def split_sentences(text: str, cutoff_len: int):
sentences = []
sentence = ''
@ -57,24 +57,24 @@ def split_sentences(text: str, cutoff_len: int):
abbreviations = ['Mr. ', 'Mrs. ', 'Dr. ', 'Ms. ', 'St. ', 'Prof. ', 'Jr. ', 'Ltd. ', 'Capt. ', 'Col. ', 'Gen. ', 'Ave. ', 'Blvd. ', 'Co. ', 'Corp. ', 'Dept. ', 'Est. ', 'Gov. ', 'Inc. ', 'Ph.D. ', 'Univ. ']
errors = 0
max_cut = cutoff_len-1
prev_char = ''
prev_char = ''
for char in text:
sentence += char
if (any(sentence.endswith(delimiter) for delimiter in delimiters) and
not (prev_char.isupper() and len(sentence) >= 3 and sentence[-3] != ' ') and
not (prev_char.isupper() and len(sentence) >= 3 and sentence[-3] != ' ') and
not any(sentence.endswith(abbreviation) for abbreviation in abbreviations)):
tokens = shared.tokenizer.encode(sentence)
if len(tokens) > max_cut:
tokens = tokens[:max_cut]
sentence = shared.tokenizer.decode(tokens, skip_special_tokens=True)
errors = errors + 1
sentences.append({'text': sentence, 'size': len(tokens)})
sentence = ''
prev_char = char
@ -83,7 +83,7 @@ def split_sentences(text: str, cutoff_len: int):
tokens = shared.tokenizer.encode(sentence)
if len(tokens) > max_cut:
tokens = tokens[:max_cut]
sentence = shared.tokenizer.decode(tokens, skip_special_tokens=True)
sentence = shared.tokenizer.decode(tokens, skip_special_tokens=True)
errors = errors + 1
sentences.append({'text': sentence, 'size': len(tokens)})
@ -95,16 +95,16 @@ def split_sentences(text: str, cutoff_len: int):
# The goal of following code is to create blocks of text + overlapping blocks while:
# respects sentence boundaries
# always uses all the text
# always uses all the text
# hard cut defined by hard_cut_string or </s> will always end at the end of data block
# no overlapping blocks will be created across hard cut or across </s> token
def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, cutoff_len: int, hard_cut_string: str, debug_slicer:bool):
EOSX_str = '<//>' #hardcut placeholder
EOS_str = '</s>'
EOS_str = '</s>'
print("Precise raw text slicer: ON")
cut_string = hard_cut_string.replace('\\n', '\n')
text = text.replace(cut_string, EOSX_str)
sentences = split_sentences(text, cutoff_len)
@ -121,7 +121,7 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c
half_index = 0
for index, item in enumerate(sentences):
if halfcut_length+ item['size'] < half_cut:
halfcut_length += item['size']
half_index = index
@ -130,7 +130,7 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c
halfcut_length = -2 * max_cut
if totalLength + item['size'] < max_cut and not currentSentence.endswith(EOSX_str):
if totalLength + item['size'] < max_cut and not currentSentence.endswith(EOSX_str):
currentSentence += item['text']
totalLength += item['size']
else:
@ -141,14 +141,14 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c
currentSentence = item['text']
totalLength = item['size']
halfcut_length = item['size']
if len(currentSentence.strip()) > min_chars_cut:
if len(currentSentence.strip()) > min_chars_cut:
sentencelist.append(currentSentence.strip())
unique_blocks = len(sentencelist)
print(f"Text Blocks: {unique_blocks}")
#overlap strategies:
#overlap strategies:
# don't overlap across HARD CUT (EOSX)
if overlap:
for edge_idx in edgeindex:
@ -162,15 +162,15 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c
else:
#if by chance EOSX is at the end then it's acceptable
if currentSentence.endswith(EOSX_str) and len(currentSentence.strip()) > min_chars_cut:
sentencelist.append(currentSentence.strip())
# otherwise don't cross hard cut
sentencelist.append(currentSentence.strip())
# otherwise don't cross hard cut
elif EOSX_str not in currentSentence and len(currentSentence.strip()) > min_chars_cut:
sentencelist.append(currentSentence.strip())
currentSentence = ''
totalLength = 0
break
print(f"+ Overlapping blocks: {len(sentencelist)-unique_blocks}")
num_EOS = 0
@ -179,7 +179,7 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c
sentencelist[i] = sentencelist[i].replace(EOSX_str, EOS_str)
else:
sentencelist[i] = sentencelist[i].replace(EOSX_str, '')
#someone may have had stop strings in the raw text...
sentencelist[i] = sentencelist[i].replace("</s></s>", EOS_str)
num_EOS += sentencelist[i].count(EOS_str)
@ -193,47 +193,49 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c
if debug_slicer:
# Write the log file
Path('user_data/logs').mkdir(exist_ok=True)
# Write the log file
if not Path('user_data/logs').exists():
Path('user_data/logs').mkdir(exist_ok=True)
sentencelist_dict = {index: sentence for index, sentence in enumerate(sentencelist)}
output_file = "user_data/logs/sentencelist.json"
with open(output_file, 'w') as f:
json.dump(sentencelist_dict, f,indent=2)
print("Saved sentencelist.json in user_data/logs folder")
return sentencelist
return sentencelist
def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len: int, hard_cut_string: str, debug_slicer:bool):
EOSX_str = '<//>' #hardcut placeholder
EOS_str = '</s>'
EOS_str = '</s>'
print("Mega Block Overlap: ON")
cut_string = hard_cut_string.replace('\\n', '\n')
text = text.replace(cut_string, EOSX_str)
sentences = split_sentences(text, cutoff_len)
print(f"Sentences: {len(sentences)}")
sentencelist = []
max_cut = cutoff_len-1
#print(f"max_cut: {max_cut}")
advancing_to = 0
prev_block_lastsentence = ""
for i in range(len(sentences)):
totalLength = 0
currentSentence = ''
lastsentence = ""
if i >= advancing_to:
for k in range(i, len(sentences)):
current_length = sentences[k]['size']
if totalLength + current_length <= max_cut and not currentSentence.endswith(EOSX_str):
@ -245,7 +247,7 @@ def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len
if prev_block_lastsentence!=lastsentence:
sentencelist.append(currentSentence.strip())
prev_block_lastsentence = lastsentence
advancing_to = 0
if currentSentence.endswith(EOSX_str):
advancing_to = k
@ -253,7 +255,7 @@ def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len
currentSentence = ""
totalLength = 0
break
if currentSentence != "":
if len(currentSentence.strip()) > min_chars_cut:
sentencelist.append(currentSentence.strip())
@ -266,7 +268,7 @@ def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len
sentencelist[i] = sentencelist[i].replace(EOSX_str, EOS_str)
else:
sentencelist[i] = sentencelist[i].replace(EOSX_str, '')
#someone may have had stop strings in the raw text...
sentencelist[i] = sentencelist[i].replace("</s></s>", EOS_str)
num_EOS += sentencelist[i].count(EOS_str)
@ -280,16 +282,18 @@ def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len
if debug_slicer:
# Write the log file
Path('user_data/logs').mkdir(exist_ok=True)
# Write the log file
if not Path('user_data/logs').exists():
Path('user_data/logs').mkdir(exist_ok=True)
sentencelist_dict = {index: sentence for index, sentence in enumerate(sentencelist)}
output_file = "user_data/logs/sentencelist.json"
with open(output_file, 'w') as f:
json.dump(sentencelist_dict, f,indent=2)
print("Saved sentencelist.json in user_data/logs folder")
return sentencelist
return sentencelist
# Example usage:
# download_file_from_url('https://example.com/path/to/your/file.ext', '/output/directory')
@ -326,17 +330,17 @@ def download_file_from_url(url, overwrite, output_dir_in, valid_extensions = {'.
# Send an HTTP GET request to the URL with a timeout
file_extension = os.path.splitext(filename_lower)[-1]
if file_extension not in valid_extensions:
yield f"Invalid file extension: {file_extension}. Only {valid_extensions} files are supported."
return
with session.get(url, stream=True, headers=headers, timeout=10) as r:
r.raise_for_status()
r.raise_for_status()
# total size can be wildly inaccurate
#total_size = int(r.headers.get('content-length', 0))
block_size = 1024 * 4
block_size = 1024 * 4
with open(local_filename, mode) as f:
count = 0
for data in r.iter_content(block_size):

View file

@ -168,7 +168,8 @@ def setup():
print("[XTTS] Loading XTTS...")
model = load_model()
print("[XTTS] Done!")
Path(f"{this_dir}/outputs").mkdir(parents=True, exist_ok=True)
if not Path(f"{this_dir}/outputs").exists():
Path(f"{this_dir}/outputs").mkdir(parents=True, exist_ok=True)
def ui():

View file

@ -168,7 +168,8 @@ def get_SD_pictures(description, character):
variadic = f'{date.today().strftime("%Y_%m_%d")}/{character}_{int(time.time())}'
output_file = Path(f'extensions/sd_api_pictures/outputs/{variadic}.png')
output_file.parent.mkdir(parents=True, exist_ok=True)
if not output_file.parent.exists():
output_file.parent.mkdir(parents=True, exist_ok=True)
with open(output_file.as_posix(), 'wb') as f:
f.write(img_data)

View file

@ -973,8 +973,8 @@ def save_history(history, unique_id, character, mode):
return
p = get_history_file_path(unique_id, character, mode)
if not p.parent.is_dir():
p.parent.mkdir(parents=True)
if not p.parent.exists():
p.parent.mkdir(parents=True, exist_ok=True)
with open(p, 'w', encoding='utf-8') as f:
f.write(json.dumps(history, indent=4, ensure_ascii=False))
@ -1014,7 +1014,9 @@ def get_paths(state):
unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S')
p = get_history_file_path(unique_id, character, state['mode'])
logger.warning(f"Moving \"{new_p}\" to \"{p}\"")
p.parent.mkdir(exist_ok=True)
if not p.parent.exists():
p.parent.mkdir(exist_ok=True)
new_p.rename(p)
return Path(f'user_data/logs/chat/{character}').glob('*.json')
@ -1163,7 +1165,9 @@ def save_last_chat_state(character, mode, unique_id):
state["last_chats"][key] = unique_id
state_file = Path('user_data/logs/chat_state.json')
state_file.parent.mkdir(exist_ok=True)
if not state_file.parent.exists():
state_file.parent.mkdir(exist_ok=True)
with open(state_file, 'w', encoding='utf-8') as f:
f.write(json.dumps(state, indent=2))

View file

@ -27,7 +27,9 @@ def save_past_evaluations(df):
global past_evaluations
past_evaluations = df
filepath = Path('user_data/logs/evaluations.csv')
filepath.parent.mkdir(parents=True, exist_ok=True)
if not filepath.parent.exists():
filepath.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(filepath, index=False)

View file

@ -9,7 +9,9 @@ def load_prompt(fname):
# Create new file
new_name = utils.current_time()
prompt_path = Path("user_data/logs/notebook") / f"{new_name}.txt"
prompt_path.parent.mkdir(parents=True, exist_ok=True)
if not prompt_path.parent.exists():
prompt_path.parent.mkdir(parents=True, exist_ok=True)
initial_content = "In this story,"
prompt_path.write_text(initial_content, encoding='utf-8')

View file

@ -241,7 +241,8 @@ def backup_adapter(input_folder):
# Create the new subfolder
subfolder_path = Path(f"{input_folder}/{creation_date_str}")
subfolder_path.mkdir(parents=True, exist_ok=True)
if not subfolder_path.exists():
subfolder_path.mkdir(parents=True, exist_ok=True)
# Check if the file already exists in the subfolder
backup_adapter_file = Path(f"{input_folder}/{creation_date_str}/adapter_model.bin")
@ -676,7 +677,9 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
decoded_entries.append({"value": decoded_text})
# Write the log file
Path('user_data/logs').mkdir(exist_ok=True)
if not Path('user_data/logs').exists():
Path('user_data/logs').mkdir(exist_ok=True)
with open(Path('user_data/logs/train_dataset_sample.json'), 'w') as json_file:
json.dump(decoded_entries, json_file, indent=4)

View file

@ -409,7 +409,9 @@ def _perform_debounced_save():
if _last_interface_state is not None:
contents = save_settings(_last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state, manual_save=False)
settings_path = Path('user_data') / 'settings.yaml'
settings_path.parent.mkdir(exist_ok=True)
if not settings_path.parent.exists():
settings_path.parent.mkdir(exist_ok=True)
with open(settings_path, 'w', encoding='utf-8') as f:
f.write(contents)
except Exception as e:

View file

@ -152,7 +152,9 @@ def autosave_prompt(text, prompt_name):
"""Automatically save the text to the selected prompt file"""
if prompt_name and text.strip():
prompt_path = Path("user_data/logs/notebook") / f"{prompt_name}.txt"
prompt_path.parent.mkdir(parents=True, exist_ok=True)
if not prompt_path.parent.exists():
prompt_path.parent.mkdir(parents=True, exist_ok=True)
prompt_path.write_text(text, encoding='utf-8')
@ -205,7 +207,9 @@ def handle_new_prompt():
# Create the new prompt file
prompt_path = Path("user_data/logs/notebook") / f"{new_name}.txt"
prompt_path.parent.mkdir(parents=True, exist_ok=True)
if not prompt_path.parent.exists():
prompt_path.parent.mkdir(parents=True, exist_ok=True)
prompt_path.write_text("In this story,", encoding='utf-8')
return gr.update(choices=utils.get_available_prompts(), value=new_name)
@ -222,7 +226,9 @@ def handle_delete_prompt_confirm_default(prompt_name):
new_value = available_prompts[min(current_index, len(available_prompts) - 1)]
else:
new_value = utils.current_time()
Path("user_data/logs/notebook").mkdir(parents=True, exist_ok=True)
if not Path("user_data/logs/notebook").exists():
Path("user_data/logs/notebook").mkdir(parents=True, exist_ok=True)
(Path("user_data/logs/notebook") / f"{new_value}.txt").write_text("In this story,")
available_prompts = [new_value]

View file

@ -197,7 +197,9 @@ def handle_new_prompt():
# Create the new prompt file
prompt_path = Path("user_data/logs/notebook") / f"{new_name}.txt"
prompt_path.parent.mkdir(parents=True, exist_ok=True)
if not prompt_path.parent.exists():
prompt_path.parent.mkdir(parents=True, exist_ok=True)
prompt_path.write_text("In this story,", encoding='utf-8')
return gr.update(choices=utils.get_available_prompts(), value=new_name)
@ -214,7 +216,9 @@ def handle_delete_prompt_confirm_notebook(prompt_name):
new_value = available_prompts[min(current_index, len(available_prompts) - 1)]
else:
new_value = utils.current_time()
Path("user_data/logs/notebook").mkdir(parents=True, exist_ok=True)
if not Path("user_data/logs/notebook").exists():
Path("user_data/logs/notebook").mkdir(parents=True, exist_ok=True)
(Path("user_data/logs/notebook") / f"{new_value}.txt").write_text("In this story,")
available_prompts = [new_value]

View file

@ -160,7 +160,8 @@ def get_available_presets():
def get_available_prompts():
notebook_dir = Path('user_data/logs/notebook')
notebook_dir.mkdir(parents=True, exist_ok=True)
if not notebook_dir.exists():
notebook_dir.mkdir(parents=True, exist_ok=True)
prompt_files = list(notebook_dir.glob('*.txt'))
sorted_files = sorted(prompt_files, key=lambda x: x.stat().st_mtime, reverse=True)

View file

@ -10,7 +10,8 @@ from modules.logging_colors import logger
# Set up Gradio temp directory path
gradio_temp_path = Path('user_data') / 'cache' / 'gradio'
shutil.rmtree(gradio_temp_path, ignore_errors=True)
gradio_temp_path.mkdir(parents=True, exist_ok=True)
if not gradio_temp_path.exists():
gradio_temp_path.mkdir(parents=True, exist_ok=True)
# Set environment variables
os.environ.update({