import argparse import time from pydub import AudioSegment from scipy.signal import resample from tqdm import tqdm import os import sounddevice as sd import soundfile as sf DURATION = 10.0 def countdown(count: int): """Create a countdown for specified number of seconds. Args: count (int): The amount of milliseconds to wait for. """ for elapsed_ms in range(count): print(f'{(count-elapsed_ms)/1000}', end='\r') time.sleep(0.001) def record_until_keypress( voice_name: str, samplerate: int=22050, channels: int=1): """Record audio in chunks until interrupted, then chop into 10s fragments. Args: voice_name (str): The name of the voice that records. samplerate (int, optional): Target sample rate. Defaults to 22050. channels (int, optional): The number of channels (1=mono, 2=stereo). Defaults to 1. """ recordings = [] print("·You are recording! Press Ctrl+C to stop.") i=0 try: while True: recording = sd.rec( int(samplerate * DURATION), samplerate=samplerate, channels=channels, blocking=True) i+=1 print(f"Chunk #{i} was recorded") recordings.append(recording) except KeyboardInterrupt: print("Recording stopped.") output_folder = f'../tortoise/voices/{voice_name}/' try: os.mkdir(output_folder) except FileExistsError: pass for i, rec in enumerate(recordings): fname = f'{output_folder}/{i+1}.wav' try: sf.write(fname, rec, samplerate, subtype='FLOAT') except Exception: print(f"Error saving chunk #{i+1}.") def record_audio( file_path: str, num_samples: int=3, samplerate: int=22050, channels: int=1, timeout: int=5): """Record audio with the specified parameters. Args: file_path (str): Path to the voice folder where the samples will be saved. num_samples (int, optional): The amount of samples to save. Defaults to 3. samplerate (int, optional): Target sample rate. Defaults to 22050. channels (int, optional): The number of channels (1=mono, 2=stereo). Defaults to 1. timeout (int, optional): The seconds to wait between samples. Defaults to 5. """ for i in range(num_samples): print(f"Preparing to record sample {i+1} in {timeout} seconds...") countdown(timeout * 1000) print("Recording...") recording = sd.rec(int(samplerate * DURATION), samplerate=samplerate, channels=channels, blocking=True) fname = f'{file_path}/{i+1}.wav' sf.write(fname, recording, samplerate, subtype='FLOAT') print(f"Recording of sample {i+1} finished and saved as '{fname}'.") def chop_audio(input_path: str, output_folder: str, no_conversion: bool=True, samplerate: int=22050): """Chop an audio file into chunks with specified duration. Args: input_path (str): Path to the original voice sample. output_folder (str): Path to the voice folder where the sample's chunks will be saved. no_conversion (bool, optional): Ignore the conversion to WAV format. Defaults to True. samplerate (int, optional): Target sample rate. Defaults to 22050. """ if no_conversion: intermediate_path = input_path else: print("Loading file...") audio = AudioSegment.from_file(input_path) print("Converting to WAV...") intermediate_path = "intermediate.wav" audio.export(intermediate_path, format="wav") print("Loading WAV file...") data, og_samplerate = sf.read(intermediate_path) print("Resampling audio data...") data = resample(data, len(data) * samplerate // og_samplerate) num_chunks = len(data) // int(samplerate * DURATION) + 1 print("Saving chunks...") for i, _ in tqdm(enumerate(range(0, len(data), int(samplerate * DURATION)), 1), total=num_chunks): chunk = data[i:i + int(samplerate * DURATION)] sf.write(f'{output_folder}/{i}.wav', chunk, samplerate, subtype='FLOAT') print(f"Conversion and chopping finished. Saved files in '{output_folder}'.") if __name__ == "__main__": parser = argparse.ArgumentParser(description='Process some audio.') parser.add_argument( 'command', choices=['record', 'chop', 'keypress'], help='Command to execute') parser.add_argument( '--file_path', help='Path to the file to process') parser.add_argument( '--voice_name', help='The name of the voice that will record') parser.add_argument( '--output_folder', help='Folder to save the chunks') parser.add_argument( '--no-convert', action='store_false', default=True, help='Convert file to WAV format') parser.add_argument( '--num_samples', type=int, default=1, help='Number of samples to record') parser.add_argument( '--rec_timeout', type=int, default=5, help='Seconds between recordings') args = parser.parse_args() print(args.no_convert) if args.command == 'record': record_audio( args.file_path, num_samples=args.num_samples, timeout=args.rec_timeout) elif args.command == 'chop': chop_audio( args.file_path, args.output_folder, no_conversion=args.no_convert) elif args.command == 'keypress': record_until_keypress(args.voice_name)