diff --git a/download-model.py b/download-model.py index 0c6307a1..576a8b79 100644 --- a/download-model.py +++ b/download-model.py @@ -314,8 +314,8 @@ class ModelDownloader: def download_model_files(self, model, branch, links, sha256, output_folder, progress_queue=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False): self.progress_queue = progress_queue - if not output_folder.exists(): - output_folder.mkdir(parents=True, exist_ok=True) + + output_folder.mkdir(parents=True, exist_ok=True) if not is_llamacpp: metadata = f'url: https://huggingface.co/{model}\n' \ diff --git a/extensions/Training_PRO/custom_scheduler.py b/extensions/Training_PRO/custom_scheduler.py index 59e7706e..1e80daed 100644 --- a/extensions/Training_PRO/custom_scheduler.py +++ b/extensions/Training_PRO/custom_scheduler.py @@ -20,7 +20,7 @@ custom_scheduler_params = {'trigger_loss': 0.0, 'ramp_down_ratio':1.0, 'current_ def custom_scheduler_global_update(current_loss: float): custom_scheduler_params.update({'current_loss': current_loss}) - + def custom_scheduler_global_setup(trigger_loss: float, ramp_down_ratio: float): custom_scheduler_params.update({'trigger_loss': trigger_loss}) custom_scheduler_params.update({'ramp_down_ratio': ramp_down_ratio}) @@ -35,12 +35,12 @@ def custom_scheduler_global_setup(trigger_loss: float, ramp_down_ratio: float): # hold constant to the half of epochs then cosine down to 0 def _get_fp_half_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int): - + global last_print_label print_label = '' half_steps = num_training_steps//2 - + num_warmup_steps = min(num_warmup_steps,half_steps) if current_step < num_warmup_steps: @@ -49,57 +49,57 @@ def _get_fp_half_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup print_label = 'Scheduler: Hold' else: print_label = 'Scheduler: Annealing' - + if print_label != last_print_label: print(print_label) - + last_print_label = print_label if current_step < num_warmup_steps: return float(current_step) / float(max(1, num_warmup_steps)) - + if current_step < half_steps: - return 1.0 - + return 1.0 + progress = float(current_step - half_steps) / float(max(1, num_training_steps - half_steps)) num_cycles = 0.5 - return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) - + return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) + # raise up in cosine, then fall back in cosine def _get_fp_cosine_raise_and_fall_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int): - + global last_print_label print_label = '' half_steps = num_training_steps//2 - + #num_warmup_steps = min(num_warmup_steps,half_steps) if current_step < half_steps: print_label = 'Scheduler: Raise' else: print_label = 'Scheduler: Fall' - + if print_label != last_print_label: print(print_label) - + last_print_label = print_label - + # linear # return float(current_step) / float(max(1, num_warmup_steps)) - + progress = float(current_step - half_steps) / float(max(1, num_training_steps - half_steps)) num_cycles = 0.5 - return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) - + return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) + # constant to the first epochs then cosine down to 0 over the rest epochs def _get_fp_cosine_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int): - + global last_print_label print_label = '' - + num_warmup_steps = min(num_warmup_steps,num_firstepoch_steps) if current_step < num_warmup_steps: @@ -108,56 +108,56 @@ def _get_fp_cosine_schedule_with_warmup_lr_lambda(current_step: int, *, num_warm print_label = 'Scheduler: Hold' else: print_label = 'Scheduler: Annealing' - + if print_label != last_print_label: print(print_label) - + last_print_label = print_label if current_step < num_warmup_steps: return float(current_step) / float(max(1, num_warmup_steps)) - + if current_step < num_firstepoch_steps: - return 1.0 - + return 1.0 + progress = float(current_step - num_firstepoch_steps) / float(max(1, num_training_steps - num_firstepoch_steps)) num_cycles = 0.5 - return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) - -# halve lr each epoch + return max(0.0, 0.5 * (1.0 + math.cos(math.pi * float(num_cycles) * 2.0 * progress))) + +# halve lr each epoch def _get_fp_cdrop_rate_schedule_with_warmup_lr_lambda(current_step: int, *, num_warmup_steps: int, num_training_steps: int, num_firstepoch_steps: int): - + global last_print_label print_label = '' - + num_warmup_steps = min(num_warmup_steps, num_firstepoch_steps) current_epoch = (current_step // num_firstepoch_steps) + 1 - - + + if current_step < num_warmup_steps: print_label = 'Scheduler: Warmup' elif current_step < num_firstepoch_steps: print_label = 'Scheduler: Hold' else: print_label = 'Scheduler: Drop Rate' - + if print_label != last_print_label: print(print_label) - + last_print_label = print_label if current_step < num_warmup_steps: return float(current_step) / float(max(1, num_warmup_steps)) - + if current_step < num_firstepoch_steps: - return 1.0 + return 1.0 # Compute the learning rate for the annealing phase - + learning_rate = 1.0 / float(2 ** (current_epoch - 1)) - + return learning_rate # epoch decay: 1/(1 + decay * epoch) @@ -177,7 +177,7 @@ def custom_cosine_scheduler_with_warmup(optimizer, num_warmup_steps, num_trainin Return: `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule. """ - + lr_lambda = partial( _get_fp_cosine_schedule_with_warmup_lr_lambda, num_warmup_steps=num_warmup_steps, @@ -201,7 +201,7 @@ def custom_half_scheduler_with_warmup(optimizer, num_warmup_steps, num_training_ Return: `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule. """ - + lr_lambda = partial( _get_fp_half_schedule_with_warmup_lr_lambda, num_warmup_steps=num_warmup_steps, @@ -225,7 +225,7 @@ def custom_raise_fall_scheduler_with_warmup(optimizer, num_warmup_steps, num_tra Return: `torch.optim.lr_scheduler.LambdaLR` with the appropriate schedule. """ - + lr_lambda = partial( _get_fp_cosine_raise_and_fall_lr_lambda, num_warmup_steps=num_warmup_steps, @@ -257,7 +257,7 @@ def neftune_forward(self, input: torch.Tensor): mag_norm = self.neftune_noise_alpha / torch.sqrt(dims) embeddings = embeddings + torch.zeros_like(embeddings).uniform_(-mag_norm, mag_norm) - return embeddings + return embeddings class FPNEFtuneTrainer(transformers.Trainer): @@ -267,7 +267,7 @@ class FPNEFtuneTrainer(transformers.Trainer): model = self._activate_neftune(model) super().__init__(model = model, *args, **kwargs) - + def _activate_neftune(self, model): r""" Activates the neftune as presented in this code: https://github.com/neelsjain/NEFTune and paper: https://arxiv.org/abs/2310.05914 @@ -290,7 +290,7 @@ class FPNEFtuneTrainer(transformers.Trainer): embeddings._trl_old_forward = old_forward return model - + def train(self, *args, **kwargs): output = super().train(*args, **kwargs) @@ -318,7 +318,7 @@ class FPSchedulerTrainer(transformers.Trainer): model = self._activate_neftune(model) super().__init__(model = model, *args, **kwargs) - + def _activate_neftune(self, model): r""" Activates the neftune as presented in this code: https://github.com/neelsjain/NEFTune and paper: https://arxiv.org/abs/2310.05914 @@ -341,7 +341,7 @@ class FPSchedulerTrainer(transformers.Trainer): embeddings._trl_old_forward = old_forward return model - + def train(self, *args, **kwargs): output = super().train(*args, **kwargs) @@ -364,19 +364,19 @@ class FPSchedulerTrainer(transformers.Trainer): def create_scheduler(self, num_training_steps: int, optimizer: torch.optim.Optimizer = None): #Setup the scheduler. The optimizer of the trainer must have been set up either before this method is called or passed as an argument. - + num_train_epochs = self.args.num_train_epochs num_warmup_steps=self.args.get_warmup_steps(num_training_steps) num_firstepoch_steps = math.ceil(num_training_steps/num_train_epochs) - num_warmup_acc = num_warmup_steps*self.args.gradient_accumulation_steps + num_warmup_acc = num_warmup_steps*self.args.gradient_accumulation_steps num_firstepoch_steps_acc = num_firstepoch_steps*self.args.gradient_accumulation_steps num_training_steps_acc = num_training_steps*self.args.gradient_accumulation_steps custom_scheduler_params.update({'dynamic_scheduler_stop': False}) - + print (f"Warm-up steps aligned to Gradient accumulation ({self.args.gradient_accumulation_steps}) = {num_warmup_acc} actual warmup steps") if self.args.lr_scheduler_type == 'cosine': - + num_warmup_acc_min = min(num_warmup_acc, num_firstepoch_steps_acc) if num_warmup_acc>num_firstepoch_steps_acc: @@ -388,13 +388,13 @@ class FPSchedulerTrainer(transformers.Trainer): self.lr_scheduler = custom_cosine_scheduler_with_warmup( optimizer=self.optimizer if optimizer is None else optimizer, num_warmup_steps=num_warmup_steps, - num_training_steps=num_training_steps, + num_training_steps=num_training_steps, num_firstepoch_steps = num_firstepoch_steps, ) self._created_lr_scheduler = True return self.lr_scheduler elif self.args.lr_scheduler_type == 'constant': - + half_step_acc = num_training_steps_acc//2 num_warmup_acc_min = min(num_warmup_acc, half_step_acc) @@ -407,15 +407,15 @@ class FPSchedulerTrainer(transformers.Trainer): self.lr_scheduler = custom_half_scheduler_with_warmup( optimizer=self.optimizer if optimizer is None else optimizer, num_warmup_steps=num_warmup_steps, - num_training_steps=num_training_steps, + num_training_steps=num_training_steps, num_firstepoch_steps = num_firstepoch_steps, ) self._created_lr_scheduler = True return self.lr_scheduler elif self.args.lr_scheduler_type == 'constant_with_warmup': - + half_step_acc = num_training_steps_acc//2 - + if num_warmup_steps>0: print(f"Warmup doesn't apply to this scheduler [Raise-Fall]") @@ -424,10 +424,10 @@ class FPSchedulerTrainer(transformers.Trainer): self.lr_scheduler = custom_raise_fall_scheduler_with_warmup( optimizer=self.optimizer if optimizer is None else optimizer, num_warmup_steps=num_warmup_steps, - num_training_steps=num_training_steps, + num_training_steps=num_training_steps, num_firstepoch_steps = num_firstepoch_steps, ) self._created_lr_scheduler = True - return self.lr_scheduler + return self.lr_scheduler else: - return super().create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer) + return super().create_scheduler(num_training_steps=num_training_steps, optimizer=optimizer) \ No newline at end of file diff --git a/extensions/Training_PRO/matplotgraph.py b/extensions/Training_PRO/matplotgraph.py index 10199ab6..b30bee83 100644 --- a/extensions/Training_PRO/matplotgraph.py +++ b/extensions/Training_PRO/matplotgraph.py @@ -5,7 +5,7 @@ def create_graph(lora_path, lora_name): try: import matplotlib.pyplot as plt from matplotlib.ticker import ScalarFormatter - + peft_model_path = f'{lora_path}/training_graph.json' image_model_path = f'{lora_path}/training_graph.png' # Check if the JSON file exists @@ -20,7 +20,7 @@ def create_graph(lora_path, lora_name): # Create the line chart fig, ax1 = plt.subplots(figsize=(10, 6)) - + # Plot y1 (learning rate) on the first y-axis ax1.plot(x, y1, 'b-', label='Learning Rate') @@ -57,6 +57,6 @@ def create_graph(lora_path, lora_name): print(f"Graph saved in {image_model_path}") else: print(f"File 'training_graph.json' does not exist in the {lora_path}") - + except ImportError: print("matplotlib is not installed. Please install matplotlib to create PNG graphs") diff --git a/extensions/Training_PRO/script.py b/extensions/Training_PRO/script.py index 05a879f6..cb11a8df 100644 --- a/extensions/Training_PRO/script.py +++ b/extensions/Training_PRO/script.py @@ -109,12 +109,12 @@ def ui(): copy_from = gr.Dropdown(label='Copy parameters from', value='None', choices=get_available_loras_local(non_serialized_params['Lora_sortedByTime']), elem_classes=['slim-dropdown']) create_refresh_button(copy_from, lambda: None, lambda: {'choices': get_available_loras_local(non_serialized_params['Lora_sortedByTime'])}, 'refresh-button') with gr.Column(): - sort_byTime = gr.Checkbox(label='Sort list by Date', value=False, info='Sorts Loras by date created.', elem_classes=['no-background']) + sort_byTime = gr.Checkbox(label='Sort list by Date', value=False, info='Sorts Loras by date created.', elem_classes=['no-background']) with gr.Row(): with gr.Column(scale=5): lora_name = gr.Textbox(label='Name', info='The name of your new LoRA file') - + with gr.Column(): always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background']) @@ -132,14 +132,14 @@ def ui(): epochs = gr.Number(label='Epochs', value=3, info='Number of times every entry in the dataset should be fed into training. So 1 means feed each item in once, 5 means feed it in five times, etc.') learning_rate = gr.Textbox(label='Learning Rate', value='3e-4', info='In scientific notation. 3e-4 is a good starting base point. 1e-2 is extremely high, 1e-6 is extremely low.') lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='linear', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt', 'FP_low_epoch_annealing', 'FP_half_time_annealing','FP_raise_fall_creative'], info='Learning rate scheduler - defines how the learning rate changes over time. Custom schedulers: FP_low_epoch_annealing, FP_half_time_annealing, FP_raise_fall_creative (see README)', elem_classes=['slim-dropdown']) - + with gr.Accordion(label='Checkpoints', open=True): with gr.Row(): with gr.Column(): save_steps = gr.Number(label='Save every n steps', value=0, info='A checkpoint will be saved every n steps and at each Epoch boundary. (0 = OFF)') - with gr.Column(): - save_steps_under_loss = gr.Slider(label='Save at 10% Loss change', value=1.8, minimum=0.0, maximum=3.0, step=0.1, info="Saves checkpoints at (or bellow) this loss and then each time loss falls by at least 10% This works independently from 'Save every n steps'") - with gr.Row(): + with gr.Column(): + save_steps_under_loss = gr.Slider(label='Save at 10% Loss change', value=1.8, minimum=0.0, maximum=3.0, step=0.1, info="Saves checkpoints at (or bellow) this loss and then each time loss falls by at least 10% This works independently from 'Save every n steps'") + with gr.Row(): save_chackpoint_now = gr.Button('Queue Checkpoint Now') with gr.Accordion(label='Advanced Options', open=True): @@ -148,7 +148,7 @@ def ui(): warmup_steps = gr.Number(label='Warmup Steps', value=100, info='Number of max steps used for a linear warmup. Reduces early over-fitting by the first training blocks. Value has precedent over Warmup Ratio. Aligns to the closest multiple of graddient accumulation') warmup_ratio = gr.Slider(label='Warmup Ratio', minimum=0.0, maximum=0.2, step=0.025, value=0.0, info='Ratio of total training steps that will be used for a linear warmup. It applies only if Warmup Step is 0.') neft_noise_alpha = gr.Slider(label='NEFtune noise scale', minimum=0.0, maximum=15, step=1, value=0.0, info='Add noise to the training to improve generalization. [0 - OFF, Starting value to experiment: 5]') - training_projection = gr.Radio(value = train_choices[4], label='LLaMA Target Projections', info='Change the targets (LORA is typically q-v)', choices=train_choices) + training_projection = gr.Radio(value = train_choices[4], label='LLaMA Target Projections', info='Change the targets (LORA is typically q-v)', choices=train_choices) lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers. This can help reduce overfitting. Most users should leave at default.') optimizer = gr.Dropdown(label='Optimizer', value='adamw_torch', choices=['adamw_hf', 'adamw_torch', 'adamw_torch_fused', 'adamw_torch_xla', 'adamw_apex_fused', 'adafactor', 'adamw_bnb_8bit', 'adamw_anyprecision', 'sgd', 'adagrad'], info='Different optimizer implementation options, for advanced users. Effects of different options are not well documented yet.', elem_classes=['slim-dropdown']) @@ -157,10 +157,10 @@ def ui(): add_bos_token = gr.Checkbox(label='Add BOS token', value=True, info="Adds BOS token for each dataset item") add_eos_token = gr.Checkbox(label='Add EOS token', value=False, info="Adds EOS token for each dataset item") add_eos_token_type = gr.Dropdown(label='EOS placement (Text file)', choices=['Every Block', 'Hard Cut Blocks Only'], value='Every Block', info='', allow_custom_value = False) - + higher_rank_limit = gr.Checkbox(label='Enable higher ranks', value=False, info='If checked, changes Rank/Alpha slider above to go much higher. This will not work without a datacenter-class GPU.') report_to = gr.Radio(label="Save detailed logs with", value="None", choices=["None", "wandb", "tensorboard"], interactive=True) - # for future + # for future #with gr.Accordion(label='Dynamic Scheduler', open = False): # ds_min_epochs = gr.Number(label='Minimum Epochs', value='1', info='Minimum epochs that will be always performed before ramp down can be triggered') # ds_max_epochs = gr.Number(label='Maximum Epochs (fallback)', value='50', info='Maximum Epochs before the training will bail out completely (should be a large number)') @@ -168,7 +168,7 @@ def ui(): # ds_loss_rolling_window = gr.Number(label='Loss rolling average', value='4', info='Calculate loss by averaging last x numbers to avoid jumps and noise') # ds_epochs_to_ramp = gr.Slider(label='Ramp down ratio', minimum=0.0, maximum=2.0, step=0.1, value=1.00, info='How long the ramp down will last relative to ellapsed steps (before trigger)') # gr.Markdown('These are settings for FP_dynamic_loss_trigger scheduler. The scheduler will do warm up, then hold constant untill a loss falls under Trigger Loss, then it will commence linear ramp down schedule and stop. The length of ramp down is set by Ramp down ratio where (ramp down steps) = ratio * (elapsed steps). (The time to completition shown will be very high untill ramp down is triggered.)') - + with gr.Column(): with gr.Tab(label='Formatted Dataset'): @@ -217,7 +217,7 @@ def ui(): cutoff_len = gr.Slider(label='Chunk Length (Cutoff Length)', minimum=32, maximum=2048, value=256, step=32, info='The maximum length of a chunk (in tokens). Applies to both JSON dataset and text files. Higher values require much more VRAM.') with gr.Row(): with gr.Column(): - check_dataset_btn = gr.Button('Verify Dataset/Text File and suggest data entries') + check_dataset_btn = gr.Button('Verify Dataset/Text File and suggest data entries') check_dataset_txt = gr.Textbox(label='Dataset info', value='') with gr.Row(): @@ -227,8 +227,8 @@ def ui(): with gr.Accordion(label="Graph", open=True): with gr.Row(): # show_actions_button = False - we use old gradio - plot_graph = gr.LinePlot(x="epoch", y="value", title="Loss Metrics", overlay_point=True, tooltip=["epoch", "value"], x_lim=[0, 1], y_lim=[0, 3.5], width=500, height=250) - + plot_graph = gr.LinePlot(x="epoch", y="value", title="Loss Metrics", overlay_point=True, tooltip=["epoch", "value"], x_lim=[0, 1], y_lim=[0, 3.5], width=500, height=250) + output = gr.Markdown(value="Ready") with gr.Tab('Perplexity evaluation', elem_id='evaluate-tab'): @@ -267,7 +267,7 @@ def ui(): return grad_accumulation_val - + copy_from.change(partial(do_copy_params, all_params= all_params), copy_from, all_params).then(fix_old_version,[batch_size,micro_batch_size, grad_accumulation],grad_accumulation) start_button.click(do_train, all_params, [output,plot_graph]) stop_button.click(do_interrupt, None, None, queue=False) @@ -306,8 +306,8 @@ def ui(): if shared.tokenizer is None: yield "Tokenizer is not available. Please Load some Model first." return - - + + if raw_text_file not in ['None', '']: logger.info("Loading Text file...") fullpath = clean_path('user_data/training/datasets', f'{raw_text_file}') @@ -329,8 +329,8 @@ def ui(): except: yield f"{raw_text_file}.txt doesn't seem to exsist anymore... check your user_data/training/datasets folder" return - - + + if min_chars<0: min_chars = 0 @@ -343,11 +343,11 @@ def ui(): total_blocks = len(text_chunks) result = f"Text: ({raw_text_file}.txt) has {total_blocks} blocks (Block Size {cutoff_len} tokens)" del text_chunks - + else: if dataset in ['None', '']: yield "Select dataset or text file." - return + return if format in ['None', '']: yield "Select format choice for dataset." @@ -382,8 +382,8 @@ def ui(): logger.info("Loading JSON datasets...") data = load_dataset("json", data_files=clean_path('user_data/training/datasets', f'{dataset}.json')) - - data_keys = [] + + data_keys = [] if data: if 'train' in data: # Check if the 'train' split exists in the dataset @@ -400,11 +400,11 @@ def ui(): #for options, data in format_data.items(): # format_keys = options.split(',') # result += f"{format_keys}, " - #result = result.rstrip() - #result = result.rstrip(',') + #result = result.rstrip() + #result = result.rstrip(',') if total_blocks>0: - number_ofSteps = int(math.ceil(total_blocks / micro_batch_size) * epochs) + number_ofSteps = int(math.ceil(total_blocks / micro_batch_size) * epochs) num_stepsPer_epoch = int(math.ceil(number_ofSteps/epochs)) min_warm = math.ceil(100 / grad_accumulation) @@ -415,20 +415,20 @@ def ui(): save_each_n_max = int(math.ceil(number_ofSteps/5)) gradient_accumulation_max = int(total_blocks)//micro_batch_size - + result += f"\n[Batch Size: {micro_batch_size}, Epochs: {epochs}, Gradient Accumulation: {grad_accumulation}]\n" result += f"Total number of steps: {number_ofSteps}\n" result += f"Steps per each Epoch: {num_stepsPer_epoch}\n" result += f"Suggestions:\n" result += f"Checkpoints: Save every {save_each_n_min} - {save_each_n_max} steps (Current: {int(save_steps)})\n" result += f"Warmup steps: {warmup_steps_suggest} (Current: {int(warmup_steps)})" - if gradient_accumulation_max < grad_accumulation: + if gradient_accumulation_max < grad_accumulation: result += f"\n\nWARNING: Gradient Accumulation {grad_accumulation} is too high: It should be below {gradient_accumulation_max}" yield result return - + check_dataset_btn.click(check_dataset, dataset_calc_params ,check_dataset_txt) # Evaluation events. For some reason, the interrupt event @@ -449,10 +449,10 @@ def ui(): def reload_lora(): return gr.Dropdown.update(choices=get_available_loras_local(non_serialized_params['Lora_sortedByTime'])) - + # nonserialized items - sort_byTime.change(lambda x: non_serialized_params.update({"Lora_sortedByTime": x}), sort_byTime, None).then(reload_lora,None,copy_from) + sort_byTime.change(lambda x: non_serialized_params.update({"Lora_sortedByTime": x}), sort_byTime, None).then(reload_lora,None,copy_from) #debug_slicer.change(lambda x: non_serialized_params.update({"debug_slicer": x}), debug_slicer, None) def update_dataset(): @@ -482,7 +482,7 @@ def do_copy_params(lora_name: str, all_params): else: params = {} else: - params = {} + params = {} result = list() for i in range(0, len(PARAMETERS)): @@ -521,8 +521,7 @@ def backup_adapter(input_folder): # Create the new subfolder subfolder_path = Path(f"{input_folder}/{creation_date_str}") - if not subfolder_path.exists(): - subfolder_path.mkdir(parents=True, exist_ok=True) + subfolder_path.mkdir(parents=True, exist_ok=True) # Check if the file already exists in the subfolder backup_adapter_file = Path(f"{input_folder}/{creation_date_str}/adapter_model.bin") @@ -608,7 +607,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch shared.tokenizer.padding_side = "left" def encode(text, prepend_bos_token): - + result = shared.tokenizer.encode(text, truncation=True, max_length=cutoff_len) # Check if the first two tokens are BOS if len(result) >= 2 and result[:2] == [shared.tokenizer.bos_token_id, shared.tokenizer.bos_token_id]: @@ -627,7 +626,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch input_ids.append(shared.tokenizer.eos_token_id) input_ids = [shared.tokenizer.pad_token_id] * (cutoff_len - len(input_ids)) + input_ids - + labels = [1] * len(input_ids) else: ind = prompt.index(train_only_after) + len(train_only_after) @@ -654,7 +653,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch } train_template.clear() - + #reset stuff print(f"*** LoRA: {lora_name} ***") non_serialized_params.update({"stop_at_loss": stop_at_loss}) @@ -666,7 +665,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch non_serialized_params.update({"checkpoint_offset": 0}) non_serialized_params.update({"epoch_offset": 0}) train_log_graph.clear() - + # == Prep the dataset, format, etc == if raw_text_file not in ['None', '']: train_template["template_type"] = "raw_text" @@ -686,8 +685,8 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch else: with open(clean_path('user_data/training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file: raw_text = file.read().replace('\r', '') - - # FPHAM PRECISE SLICING + + # FPHAM PRECISE SLICING if min_chars<0: min_chars = 0 @@ -704,7 +703,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch train_data = Dataset.from_list([tokenize(x, add_EOS_to_all, add_bos_token) for x in text_chunks]) if add_EOS_to_all: - print(f"Added EOS to {len(text_chunks)} blocks") + print(f"Added EOS to {len(text_chunks)} blocks") print(f"All Data Blocks: {len(text_chunks)}") @@ -746,7 +745,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch data = load_dataset("json", data_files=clean_path('user_data/training/datasets', f'{dataset}.json')) train_data = data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30)) - print(f"BOS: {add_bos_token} EOS: {add_eos_token}") + print(f"BOS: {add_bos_token} EOS: {add_eos_token}") print(f"Data Blocks: {train_data.num_rows}") if eval_dataset == 'None': @@ -784,7 +783,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch print(f"Method: {RED}QLORA{RESET}") prepare_model_for_kbit_training(shared.model) else: - print(f"Method: {RED}LoRA{RESET}") + print(f"Method: {RED}LoRA{RESET}") # base model is now frozen and should not be reused for any other LoRA training than this one shared.model_dirty_from_training = True @@ -797,7 +796,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch elif training_projection==train_choices[2]: model_to_lora_modules[model_id] = ["q_proj","k_proj", "v_proj"] elif training_projection==train_choices[3]: - model_to_lora_modules[model_id] = ["k_proj", "v_proj", "down_proj"] + model_to_lora_modules[model_id] = ["k_proj", "v_proj", "down_proj"] else: model_to_lora_modules[model_id] = ["q_proj", "v_proj"] @@ -828,9 +827,9 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch set_peft_model_state_dict(lora_model, state_dict_peft) print(f" + Continue Training on {RED}{lora_file_path}/adapter_model.bin{RESET}") - + #load training_log.json if exist - + if Path(f"{lora_file_path}/training_log.json").is_file(): with open(f"{lora_file_path}/training_log.json", 'r') as json_file: json_ilog = json.load(json_file) @@ -841,13 +840,13 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch if key=='epoch': non_serialized_params.update({"epoch_offset": value}) print(f" + Epoch offset: {RED}{non_serialized_params['epoch_offset']}{RESET}") - + if Path(f"{lora_file_path}/training_graph.json").is_file(): try: with open(f"{lora_file_path}/training_graph.json", 'r') as json_file: train_log_graph = json.load(json_file) - print(" + Training Graph loaded") + print(" + Training Graph loaded") except: print(f"Can't read training_graph") @@ -877,72 +876,72 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch else: current_loss = float(train_log.get('loss', 0.0)) current_epoch_int = int(float(train_log.get('epoch', 0.0))) - + force_save = False current_steps_offset = tracked.current_steps + non_serialized_params['checkpoint_offset'] - folder_save = f"checkpoint-{current_steps_offset}" + folder_save = f"checkpoint-{current_steps_offset}" # save if triggered by user if non_serialized_params['save_checkpoint_now']: force_save = True non_serialized_params.update({"save_checkpoint_now": False}) print(f"\033[1;31;1mSave Checkpoint manually trigerred.\033[0;37;0m") - folder_save = f"checkpoint-{current_steps_offset}-user" + folder_save = f"checkpoint-{current_steps_offset}-user" patience = 3 # Set the number of consecutive steps for tracking stability - + if gradient_accumulation_steps==1: patience = 4 min_steps = ssteps10 - # Save each time the loss is below the threshold + # Save each time the loss is below the threshold if current_loss < non_serialized_params['save_steps_under_loss'] and current_loss > 0 and state.global_step > min_steps: current_stability = non_serialized_params['current_stability'] current_stability += 1 - non_serialized_params.update({"current_stability": current_stability}) + non_serialized_params.update({"current_stability": current_stability}) if current_stability >= patience: current_stability = 0 - non_serialized_params.update({"current_stability": current_stability}) + non_serialized_params.update({"current_stability": current_stability}) current_loss_dec = round(current_loss, 2) loss_str = f"{current_loss_dec:.2f}" loss_str = loss_str.replace('.', '_') new_save = (current_loss_dec-0.1) + 0.01 non_serialized_params.update({"save_steps_under_loss": new_save}) - folder_save = f"checkpoint-{current_steps_offset}-loss-{loss_str}" - force_save = True - + folder_save = f"checkpoint-{current_steps_offset}-loss-{loss_str}" + force_save = True + else: # Reset stability if the loss goes above the threshold - non_serialized_params.update({"current_stability": 0}) + non_serialized_params.update({"current_stability": 0}) # Save full epochs - if actual_save_steps>0 and current_epoch_int > non_serialized_params['save_epochs'] and state.global_step > min_steps: - + if actual_save_steps>0 and current_epoch_int > non_serialized_params['save_epochs'] and state.global_step > min_steps: + current_epoch_offset = current_epoch_int - + if non_serialized_params['epoch_offset'] > 0: current_epoch_offset = current_epoch_int + round(non_serialized_params['epoch_offset'], 2) - + ep_off_str = f"{current_epoch_offset}" ep_off_str = ep_off_str.replace('.', '_') - folder_save = f"checkpoint-{current_steps_offset}-epoch-{ep_off_str}" + folder_save = f"checkpoint-{current_steps_offset}-epoch-{ep_off_str}" non_serialized_params.update({"save_epochs": current_epoch_int}) force_save = True # save each actual_save_steps if state.global_step > 0 and actual_save_steps > 0 and state.global_step % actual_save_steps == 0: - folder_save = f"checkpoint-{current_steps_offset}" - force_save = True + folder_save = f"checkpoint-{current_steps_offset}" + force_save = True - if force_save: + if force_save: lora_model.save_pretrained(f"{lora_file_path}/{folder_save}/", safe_serialization = non_serialized_params['safe_serialization']) print(f"\033[1;30;40mStep: {tracked.current_steps:6} \033[0;37;0m Saved: [{folder_save}]") # Save log @@ -951,7 +950,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch # == Save training prompt == with open(f"{lora_file_path}/{folder_save}/training_prompt.json", 'w', encoding='utf-8') as file: json.dump(train_template, file, indent=2) - + def on_substep_end(self, args: transformers.TrainingArguments, state: transformers.TrainerState, control: transformers.TrainerControl, **kwargs): tracked.current_steps += 1 @@ -976,7 +975,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch print(f"\033[1;30;40mStep: {tracked.current_steps:6} [+{non_serialized_params['checkpoint_offset']}] \033[0;37;0m", end='') else: print(f"\033[1;30;40mStep: {tracked.current_steps:6} \033[0;37;0m", end='') - + graphentry = { 'current_steps': int(train_log.get('current_steps_adjusted',0)), 'loss': float(train_log.get('loss', 0.0)), @@ -987,7 +986,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch cur_loss = float(train_log.get('loss', 0.0)) cur_lr = float(train_log.get('learning_rate', 0.0)) cur_epoch = float(train_log.get('epoch', 0.0)) - + if len(statistics['loss']) == 1: first_epoch = statistics['loss'][0]['epoch'] first_value = statistics['loss'][0]['value'] @@ -1014,7 +1013,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch # FPHAM SAMPLE REQ Transformers error handling gradient_accumulation_max = int(train_data.num_rows)//micro_batch_size - + if gradient_accumulation_max < gradient_accumulation_steps: print(f"{RED}WARNING:{RESET} Current gradient accumulation is {RED}too high{RESET} for the amount of training data.") print(f"Gradient accumulation: {gradient_accumulation_steps} should be less than: {gradient_accumulation_max}. {RED}This could crash Accelerate/Transformers{RESET}") @@ -1042,9 +1041,9 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch elif lr_scheduler_type =='FP_raise_fall_creative': custom_scheduller = True lr_scheduler_type_arg = 'constant_with_warmup' - + #gradient_checkpointing=True - + args=transformers.TrainingArguments( report_to=report_to if report_to != "None" else None, per_device_train_batch_size=micro_batch_size, @@ -1096,7 +1095,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch data_collator=transformers.DataCollatorForLanguageModeling(shared.tokenizer, mlm=False), callbacks=list([Callbacks()]) ) - + # END OF FPHAM CUSTOM SCHEDULER lora_model.config.use_cache = False @@ -1142,7 +1141,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch if stop_at_loss > 0: print(f"Monitoring loss {RED}(Auto-Stop at: {stop_at_loss}){RESET}") - + if WANT_INTERRUPT: yield "Interrupted before start.", zero_pd @@ -1158,9 +1157,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch decoded_entries.append({"value": decoded_text}) # Write the log file - if not Path('user_data/logs').exists(): - Path('user_data/logs').mkdir(exist_ok=True) - + Path('user_data/logs').mkdir(exist_ok=True) with open(Path('user_data/logs/train_dataset_sample.json'), 'w') as json_file: json.dump(decoded_entries, json_file, indent=4) @@ -1194,7 +1191,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch else: max_value = 3.5 last_epoch = 0 - first_epoch = 0 + first_epoch = 0 if WANT_INTERRUPT: @@ -1213,7 +1210,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch time_elapsed = time.perf_counter() - start_time lastloss = float(train_log.get('loss', 0.0)) - non_serialized_params.update({"training_loop": True}) + non_serialized_params.update({"training_loop": True}) if lastloss > 0: lastloss_str = f", ... Current Loss: `{lastloss:.2f}`" @@ -1235,7 +1232,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch if stop_at_loss != non_serialized_params['stop_at_loss']: stop_at_loss = non_serialized_params['stop_at_loss'] print(f"Stop at loss changed {RED}(Auto-Stop at: {stop_at_loss}){RESET}") - + losses = gr.LinePlot.update( value = pd.DataFrame(statistics['loss']), x="epoch", y="value", @@ -1243,7 +1240,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch overlay_point=True, tooltip=["epoch", "value"], x_lim=[first_epoch,last_epoch], y_lim=[0,max_value], width=500, height=250 ) - + yield f"Running... **{tracked.current_steps}** / **{tracked.max_steps}** ... {timer_info}, {format_time(time_elapsed)} / {format_time(total_time_estimate)} ... {format_time(total_time_estimate - time_elapsed)} remaining {lastloss_str}", losses @@ -1259,7 +1256,7 @@ def do_train(lora_name: str, always_override: bool, save_steps: int, micro_batch else: max_value = 3.5 last_epoch = 0 - first_epoch = 0 + first_epoch = 0 return_pd = gr.LinePlot.update( value = pd.DataFrame(statistics['loss']), diff --git a/extensions/Training_PRO/train_utils.py b/extensions/Training_PRO/train_utils.py index d1a45c17..79994880 100644 --- a/extensions/Training_PRO/train_utils.py +++ b/extensions/Training_PRO/train_utils.py @@ -20,7 +20,7 @@ def list_subfoldersByTime(directory): if not directory.endswith('/'): directory += '/' subfolders = [] - subfolders.append('None') + subfolders.append('None') path = directory name_list = os.listdir(path) full_list = [os.path.join(path,i) for i in name_list] @@ -37,19 +37,19 @@ def list_subfoldersByTime(directory): return subfolders def get_available_loras_local(_sortedByTime): - + model_dir = shared.args.lora_dir # Update with the appropriate directory path subfolders = [] if _sortedByTime: subfolders = list_subfoldersByTime(model_dir) else: - subfolders = utils.get_available_loras() + subfolders = utils.get_available_loras() return subfolders # FPHAM SPLIT BY SENTENCE BLOCK =============== - + def split_sentences(text: str, cutoff_len: int): sentences = [] sentence = '' @@ -57,24 +57,24 @@ def split_sentences(text: str, cutoff_len: int): abbreviations = ['Mr. ', 'Mrs. ', 'Dr. ', 'Ms. ', 'St. ', 'Prof. ', 'Jr. ', 'Ltd. ', 'Capt. ', 'Col. ', 'Gen. ', 'Ave. ', 'Blvd. ', 'Co. ', 'Corp. ', 'Dept. ', 'Est. ', 'Gov. ', 'Inc. ', 'Ph.D. ', 'Univ. '] errors = 0 max_cut = cutoff_len-1 - prev_char = '' + prev_char = '' for char in text: sentence += char - + if (any(sentence.endswith(delimiter) for delimiter in delimiters) and - not (prev_char.isupper() and len(sentence) >= 3 and sentence[-3] != ' ') and + not (prev_char.isupper() and len(sentence) >= 3 and sentence[-3] != ' ') and not any(sentence.endswith(abbreviation) for abbreviation in abbreviations)): tokens = shared.tokenizer.encode(sentence) - + if len(tokens) > max_cut: tokens = tokens[:max_cut] sentence = shared.tokenizer.decode(tokens, skip_special_tokens=True) errors = errors + 1 sentences.append({'text': sentence, 'size': len(tokens)}) - + sentence = '' prev_char = char @@ -83,7 +83,7 @@ def split_sentences(text: str, cutoff_len: int): tokens = shared.tokenizer.encode(sentence) if len(tokens) > max_cut: tokens = tokens[:max_cut] - sentence = shared.tokenizer.decode(tokens, skip_special_tokens=True) + sentence = shared.tokenizer.decode(tokens, skip_special_tokens=True) errors = errors + 1 sentences.append({'text': sentence, 'size': len(tokens)}) @@ -95,16 +95,16 @@ def split_sentences(text: str, cutoff_len: int): # The goal of following code is to create blocks of text + overlapping blocks while: # respects sentence boundaries -# always uses all the text +# always uses all the text # hard cut defined by hard_cut_string or will always end at the end of data block # no overlapping blocks will be created across hard cut or across token def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, cutoff_len: int, hard_cut_string: str, debug_slicer:bool): EOSX_str = '' #hardcut placeholder - EOS_str = '' + EOS_str = '' print("Precise raw text slicer: ON") - + cut_string = hard_cut_string.replace('\\n', '\n') text = text.replace(cut_string, EOSX_str) sentences = split_sentences(text, cutoff_len) @@ -121,7 +121,7 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c half_index = 0 for index, item in enumerate(sentences): - + if halfcut_length+ item['size'] < half_cut: halfcut_length += item['size'] half_index = index @@ -130,7 +130,7 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c halfcut_length = -2 * max_cut - if totalLength + item['size'] < max_cut and not currentSentence.endswith(EOSX_str): + if totalLength + item['size'] < max_cut and not currentSentence.endswith(EOSX_str): currentSentence += item['text'] totalLength += item['size'] else: @@ -141,14 +141,14 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c currentSentence = item['text'] totalLength = item['size'] halfcut_length = item['size'] - - if len(currentSentence.strip()) > min_chars_cut: + + if len(currentSentence.strip()) > min_chars_cut: sentencelist.append(currentSentence.strip()) unique_blocks = len(sentencelist) print(f"Text Blocks: {unique_blocks}") - #overlap strategies: + #overlap strategies: # don't overlap across HARD CUT (EOSX) if overlap: for edge_idx in edgeindex: @@ -162,15 +162,15 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c else: #if by chance EOSX is at the end then it's acceptable if currentSentence.endswith(EOSX_str) and len(currentSentence.strip()) > min_chars_cut: - sentencelist.append(currentSentence.strip()) - # otherwise don't cross hard cut + sentencelist.append(currentSentence.strip()) + # otherwise don't cross hard cut elif EOSX_str not in currentSentence and len(currentSentence.strip()) > min_chars_cut: sentencelist.append(currentSentence.strip()) - + currentSentence = '' totalLength = 0 break - + print(f"+ Overlapping blocks: {len(sentencelist)-unique_blocks}") num_EOS = 0 @@ -179,7 +179,7 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c sentencelist[i] = sentencelist[i].replace(EOSX_str, EOS_str) else: sentencelist[i] = sentencelist[i].replace(EOSX_str, '') - + #someone may have had stop strings in the raw text... sentencelist[i] = sentencelist[i].replace("", EOS_str) num_EOS += sentencelist[i].count(EOS_str) @@ -193,49 +193,47 @@ def precise_cut(text: str, overlap: bool, min_chars_cut: int, eos_to_hc: bool, c if debug_slicer: - # Write the log file - if not Path('user_data/logs').exists(): - Path('user_data/logs').mkdir(exist_ok=True) - + # Write the log file + Path('user_data/logs').mkdir(exist_ok=True) sentencelist_dict = {index: sentence for index, sentence in enumerate(sentencelist)} output_file = "user_data/logs/sentencelist.json" with open(output_file, 'w') as f: json.dump(sentencelist_dict, f,indent=2) - + print("Saved sentencelist.json in user_data/logs folder") - - return sentencelist + + return sentencelist def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len: int, hard_cut_string: str, debug_slicer:bool): EOSX_str = '' #hardcut placeholder - EOS_str = '' + EOS_str = '' print("Mega Block Overlap: ON") - + cut_string = hard_cut_string.replace('\\n', '\n') text = text.replace(cut_string, EOSX_str) sentences = split_sentences(text, cutoff_len) print(f"Sentences: {len(sentences)}") sentencelist = [] - + max_cut = cutoff_len-1 #print(f"max_cut: {max_cut}") advancing_to = 0 prev_block_lastsentence = "" - + for i in range(len(sentences)): totalLength = 0 currentSentence = '' lastsentence = "" - + if i >= advancing_to: for k in range(i, len(sentences)): - + current_length = sentences[k]['size'] if totalLength + current_length <= max_cut and not currentSentence.endswith(EOSX_str): @@ -247,7 +245,7 @@ def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len if prev_block_lastsentence!=lastsentence: sentencelist.append(currentSentence.strip()) prev_block_lastsentence = lastsentence - + advancing_to = 0 if currentSentence.endswith(EOSX_str): advancing_to = k @@ -255,7 +253,7 @@ def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len currentSentence = "" totalLength = 0 break - + if currentSentence != "": if len(currentSentence.strip()) > min_chars_cut: sentencelist.append(currentSentence.strip()) @@ -268,7 +266,7 @@ def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len sentencelist[i] = sentencelist[i].replace(EOSX_str, EOS_str) else: sentencelist[i] = sentencelist[i].replace(EOSX_str, '') - + #someone may have had stop strings in the raw text... sentencelist[i] = sentencelist[i].replace("", EOS_str) num_EOS += sentencelist[i].count(EOS_str) @@ -282,18 +280,16 @@ def sliding_block_cut(text: str, min_chars_cut: int, eos_to_hc: bool, cutoff_len if debug_slicer: - # Write the log file - if not Path('user_data/logs').exists(): - Path('user_data/logs').mkdir(exist_ok=True) - + # Write the log file + Path('user_data/logs').mkdir(exist_ok=True) sentencelist_dict = {index: sentence for index, sentence in enumerate(sentencelist)} output_file = "user_data/logs/sentencelist.json" with open(output_file, 'w') as f: json.dump(sentencelist_dict, f,indent=2) - + print("Saved sentencelist.json in user_data/logs folder") - - return sentencelist + + return sentencelist # Example usage: # download_file_from_url('https://example.com/path/to/your/file.ext', '/output/directory') @@ -330,17 +326,17 @@ def download_file_from_url(url, overwrite, output_dir_in, valid_extensions = {'. # Send an HTTP GET request to the URL with a timeout file_extension = os.path.splitext(filename_lower)[-1] - + if file_extension not in valid_extensions: yield f"Invalid file extension: {file_extension}. Only {valid_extensions} files are supported." return with session.get(url, stream=True, headers=headers, timeout=10) as r: - r.raise_for_status() + r.raise_for_status() # total size can be wildly inaccurate #total_size = int(r.headers.get('content-length', 0)) - - block_size = 1024 * 4 + + block_size = 1024 * 4 with open(local_filename, mode) as f: count = 0 for data in r.iter_content(block_size): diff --git a/extensions/coqui_tts/script.py b/extensions/coqui_tts/script.py index 4f5700d1..fe6364b9 100644 --- a/extensions/coqui_tts/script.py +++ b/extensions/coqui_tts/script.py @@ -168,8 +168,7 @@ def setup(): print("[XTTS] Loading XTTS...") model = load_model() print("[XTTS] Done!") - if not Path(f"{this_dir}/outputs").exists(): - Path(f"{this_dir}/outputs").mkdir(parents=True, exist_ok=True) + Path(f"{this_dir}/outputs").mkdir(parents=True, exist_ok=True) def ui(): diff --git a/extensions/sd_api_pictures/script.py b/extensions/sd_api_pictures/script.py index d6853664..f216da38 100644 --- a/extensions/sd_api_pictures/script.py +++ b/extensions/sd_api_pictures/script.py @@ -168,8 +168,7 @@ def get_SD_pictures(description, character): variadic = f'{date.today().strftime("%Y_%m_%d")}/{character}_{int(time.time())}' output_file = Path(f'extensions/sd_api_pictures/outputs/{variadic}.png') - if not output_file.parent.exists(): - output_file.parent.mkdir(parents=True, exist_ok=True) + output_file.parent.mkdir(parents=True, exist_ok=True) with open(output_file.as_posix(), 'wb') as f: f.write(img_data) diff --git a/modules/chat.py b/modules/chat.py index 7770b5cf..d7683566 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -973,8 +973,8 @@ def save_history(history, unique_id, character, mode): return p = get_history_file_path(unique_id, character, mode) - if not p.parent.exists(): - p.parent.mkdir(parents=True, exist_ok=True) + if not p.parent.is_dir(): + p.parent.mkdir(parents=True) with open(p, 'w', encoding='utf-8') as f: f.write(json.dumps(history, indent=4, ensure_ascii=False)) @@ -1014,9 +1014,7 @@ def get_paths(state): unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S') p = get_history_file_path(unique_id, character, state['mode']) logger.warning(f"Moving \"{new_p}\" to \"{p}\"") - if not p.parent.exists(): - p.parent.mkdir(exist_ok=True) - + p.parent.mkdir(exist_ok=True) new_p.rename(p) return Path(f'user_data/logs/chat/{character}').glob('*.json') @@ -1165,9 +1163,7 @@ def save_last_chat_state(character, mode, unique_id): state["last_chats"][key] = unique_id state_file = Path('user_data/logs/chat_state.json') - if not state_file.parent.exists(): - state_file.parent.mkdir(exist_ok=True) - + state_file.parent.mkdir(exist_ok=True) with open(state_file, 'w', encoding='utf-8') as f: f.write(json.dumps(state, indent=2)) diff --git a/modules/evaluate.py b/modules/evaluate.py index 65cceda9..4f41c1fc 100644 --- a/modules/evaluate.py +++ b/modules/evaluate.py @@ -27,9 +27,7 @@ def save_past_evaluations(df): global past_evaluations past_evaluations = df filepath = Path('user_data/logs/evaluations.csv') - if not filepath.parent.exists(): - filepath.parent.mkdir(parents=True, exist_ok=True) - + filepath.parent.mkdir(parents=True, exist_ok=True) df.to_csv(filepath, index=False) diff --git a/modules/prompts.py b/modules/prompts.py index 1b154cca..79d9b56e 100644 --- a/modules/prompts.py +++ b/modules/prompts.py @@ -9,9 +9,7 @@ def load_prompt(fname): # Create new file new_name = utils.current_time() prompt_path = Path("user_data/logs/notebook") / f"{new_name}.txt" - if not prompt_path.parent.exists(): - prompt_path.parent.mkdir(parents=True, exist_ok=True) - + prompt_path.parent.mkdir(parents=True, exist_ok=True) initial_content = "In this story," prompt_path.write_text(initial_content, encoding='utf-8') diff --git a/modules/training.py b/modules/training.py index 2689977f..2354c39d 100644 --- a/modules/training.py +++ b/modules/training.py @@ -241,8 +241,7 @@ def backup_adapter(input_folder): # Create the new subfolder subfolder_path = Path(f"{input_folder}/{creation_date_str}") - if not subfolder_path.exists(): - subfolder_path.mkdir(parents=True, exist_ok=True) + subfolder_path.mkdir(parents=True, exist_ok=True) # Check if the file already exists in the subfolder backup_adapter_file = Path(f"{input_folder}/{creation_date_str}/adapter_model.bin") @@ -677,9 +676,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en: decoded_entries.append({"value": decoded_text}) # Write the log file - if not Path('user_data/logs').exists(): - Path('user_data/logs').mkdir(exist_ok=True) - + Path('user_data/logs').mkdir(exist_ok=True) with open(Path('user_data/logs/train_dataset_sample.json'), 'w') as json_file: json.dump(decoded_entries, json_file, indent=4) diff --git a/modules/ui.py b/modules/ui.py index 1247b365..0e8afa8f 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -409,9 +409,7 @@ def _perform_debounced_save(): if _last_interface_state is not None: contents = save_settings(_last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state, manual_save=False) settings_path = Path('user_data') / 'settings.yaml' - if not settings_path.parent.exists(): - settings_path.parent.mkdir(exist_ok=True) - + settings_path.parent.mkdir(exist_ok=True) with open(settings_path, 'w', encoding='utf-8') as f: f.write(contents) except Exception as e: diff --git a/modules/ui_default.py b/modules/ui_default.py index 21a63892..66174e35 100644 --- a/modules/ui_default.py +++ b/modules/ui_default.py @@ -152,9 +152,7 @@ def autosave_prompt(text, prompt_name): """Automatically save the text to the selected prompt file""" if prompt_name and text.strip(): prompt_path = Path("user_data/logs/notebook") / f"{prompt_name}.txt" - if not prompt_path.parent.exists(): - prompt_path.parent.mkdir(parents=True, exist_ok=True) - + prompt_path.parent.mkdir(parents=True, exist_ok=True) prompt_path.write_text(text, encoding='utf-8') @@ -207,9 +205,7 @@ def handle_new_prompt(): # Create the new prompt file prompt_path = Path("user_data/logs/notebook") / f"{new_name}.txt" - if not prompt_path.parent.exists(): - prompt_path.parent.mkdir(parents=True, exist_ok=True) - + prompt_path.parent.mkdir(parents=True, exist_ok=True) prompt_path.write_text("In this story,", encoding='utf-8') return gr.update(choices=utils.get_available_prompts(), value=new_name) @@ -226,9 +222,7 @@ def handle_delete_prompt_confirm_default(prompt_name): new_value = available_prompts[min(current_index, len(available_prompts) - 1)] else: new_value = utils.current_time() - if not Path("user_data/logs/notebook").exists(): - Path("user_data/logs/notebook").mkdir(parents=True, exist_ok=True) - + Path("user_data/logs/notebook").mkdir(parents=True, exist_ok=True) (Path("user_data/logs/notebook") / f"{new_value}.txt").write_text("In this story,") available_prompts = [new_value] diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py index 916e613f..3a8c9df2 100644 --- a/modules/ui_notebook.py +++ b/modules/ui_notebook.py @@ -197,9 +197,7 @@ def handle_new_prompt(): # Create the new prompt file prompt_path = Path("user_data/logs/notebook") / f"{new_name}.txt" - if not prompt_path.parent.exists(): - prompt_path.parent.mkdir(parents=True, exist_ok=True) - + prompt_path.parent.mkdir(parents=True, exist_ok=True) prompt_path.write_text("In this story,", encoding='utf-8') return gr.update(choices=utils.get_available_prompts(), value=new_name) @@ -216,9 +214,7 @@ def handle_delete_prompt_confirm_notebook(prompt_name): new_value = available_prompts[min(current_index, len(available_prompts) - 1)] else: new_value = utils.current_time() - if not Path("user_data/logs/notebook").exists(): - Path("user_data/logs/notebook").mkdir(parents=True, exist_ok=True) - + Path("user_data/logs/notebook").mkdir(parents=True, exist_ok=True) (Path("user_data/logs/notebook") / f"{new_value}.txt").write_text("In this story,") available_prompts = [new_value] diff --git a/modules/utils.py b/modules/utils.py index b3bac583..da5ac978 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -160,8 +160,7 @@ def get_available_presets(): def get_available_prompts(): notebook_dir = Path('user_data/logs/notebook') - if not notebook_dir.exists(): - notebook_dir.mkdir(parents=True, exist_ok=True) + notebook_dir.mkdir(parents=True, exist_ok=True) prompt_files = list(notebook_dir.glob('*.txt')) sorted_files = sorted(prompt_files, key=lambda x: x.stat().st_mtime, reverse=True) diff --git a/server.py b/server.py index 2b998792..7ce3c208 100644 --- a/server.py +++ b/server.py @@ -10,8 +10,7 @@ from modules.logging_colors import logger # Set up Gradio temp directory path gradio_temp_path = Path('user_data') / 'cache' / 'gradio' shutil.rmtree(gradio_temp_path, ignore_errors=True) -if not gradio_temp_path.exists(): - gradio_temp_path.mkdir(parents=True, exist_ok=True) +gradio_temp_path.mkdir(parents=True, exist_ok=True) # Set environment variables os.environ.update({