diff --git a/scripts/recording_tools.py b/scripts/recording_tools.py new file mode 100644 index 0000000..3e56aa6 --- /dev/null +++ b/scripts/recording_tools.py @@ -0,0 +1,166 @@ +import argparse +import time +from pydub import AudioSegment +from scipy.signal import resample +from tqdm import tqdm +import os +import sounddevice as sd +import soundfile as sf + +DURATION = 10.0 + +def countdown(count: int): + """Create a countdown for specified number of seconds. + + Args: + count (int): The amount of milliseconds to wait for. + """ + for elapsed_ms in range(count): + print(f'{(count-elapsed_ms)/1000}', end='\r') + time.sleep(0.001) + +def record_until_keypress( + voice_name: str, + samplerate: int=22050, + channels: int=1): + """Record audio in chunks until interrupted, then chop into 10s fragments. + + Args: + voice_name (str): The name of the voice that records. + samplerate (int, optional): Target sample rate. Defaults to 22050. + channels (int, optional): The number of channels (1=mono, 2=stereo). Defaults to 1. + """ + recordings = [] + print("·You are recording! Press Ctrl+C to stop.") + i=0 + try: + while True: + recording = sd.rec( + int(samplerate * DURATION), samplerate=samplerate, channels=channels, blocking=True) + i+=1 + print(f"Chunk #{i} was recorded") + recordings.append(recording) + except KeyboardInterrupt: + print("Recording stopped.") + + output_folder = f'../tortoise/voices/{voice_name}/' + try: + os.mkdir(output_folder) + except FileExistsError: + pass + for i, rec in enumerate(recordings): + fname = f'{output_folder}/{i+1}.wav' + try: + sf.write(fname, rec, samplerate, subtype='FLOAT') + except Exception: + print(f"Error saving chunk #{i+1}.") + +def record_audio( + file_path: str, + num_samples: int=3, + samplerate: int=22050, + channels: int=1, + timeout: int=5): + """Record audio with the specified parameters. + + Args: + file_path (str): Path to the voice folder where the samples will be saved. + num_samples (int, optional): The amount of samples to save. Defaults to 3. + samplerate (int, optional): Target sample rate. Defaults to 22050. + channels (int, optional): The number of channels (1=mono, 2=stereo). Defaults to 1. + timeout (int, optional): The seconds to wait between samples. Defaults to 5. + """ + for i in range(num_samples): + print(f"Preparing to record sample {i+1} in {timeout} seconds...") + countdown(timeout * 1000) + + print("Recording...") + recording = sd.rec(int(samplerate * DURATION), samplerate=samplerate, + channels=channels, blocking=True) + + fname = f'{file_path}/{i+1}.wav' + sf.write(fname, recording, samplerate, subtype='FLOAT') + + print(f"Recording of sample {i+1} finished and saved as '{fname}'.") + +def chop_audio(input_path: str, + output_folder: str, + no_conversion: bool=True, + samplerate: int=22050): + """Chop an audio file into chunks with specified duration. + + Args: + input_path (str): Path to the original voice sample. + output_folder (str): Path to the voice folder where the sample's chunks will be saved. + no_conversion (bool, optional): Ignore the conversion to WAV format. Defaults to True. + samplerate (int, optional): Target sample rate. Defaults to 22050. + """ + if no_conversion: + intermediate_path = input_path + else: + print("Loading file...") + audio = AudioSegment.from_file(input_path) + + print("Converting to WAV...") + intermediate_path = "intermediate.wav" + audio.export(intermediate_path, format="wav") + + print("Loading WAV file...") + data, og_samplerate = sf.read(intermediate_path) + + print("Resampling audio data...") + data = resample(data, len(data) * samplerate // og_samplerate) + + num_chunks = len(data) // int(samplerate * DURATION) + 1 + + print("Saving chunks...") + for i, _ in tqdm(enumerate(range(0, len(data), int(samplerate * DURATION)), 1), + total=num_chunks): + chunk = data[i:i + int(samplerate * DURATION)] + sf.write(f'{output_folder}/{i}.wav', chunk, samplerate, subtype='FLOAT') + + print(f"Conversion and chopping finished. Saved files in '{output_folder}'.") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Process some audio.') + parser.add_argument( + 'command', + choices=['record', 'chop', 'keypress'], + help='Command to execute') + parser.add_argument( + '--file_path', + help='Path to the file to process') + parser.add_argument( + '--voice_name', + help='The name of the voice that will record') + parser.add_argument( + '--output_folder', + help='Folder to save the chunks') + parser.add_argument( + '--no-convert', + action='store_false', default=True, + help='Convert file to WAV format') + parser.add_argument( + '--num_samples', + type=int, default=1, + help='Number of samples to record') + parser.add_argument( + '--rec_timeout', + type=int, default=5, + help='Seconds between recordings') + + args = parser.parse_args() + print(args.no_convert) + + if args.command == 'record': + record_audio( + args.file_path, + num_samples=args.num_samples, + timeout=args.rec_timeout) + elif args.command == 'chop': + chop_audio( + args.file_path, + args.output_folder, + no_conversion=args.no_convert) + elif args.command == 'keypress': + record_until_keypress(args.voice_name)