From 3d003c608d20c5ce81d4c9d376d62d5182fe2787 Mon Sep 17 00:00:00 2001 From: Luis Ruanova Date: Sat, 3 Jun 2023 05:29:59 +0200 Subject: [PATCH 1/3] Adding recording and chopping tools --- scripts/recording_tools.py | 98 ++++++++++++++++++++++++++++++++++++++ 1 file changed, 98 insertions(+) create mode 100644 scripts/recording_tools.py diff --git a/scripts/recording_tools.py b/scripts/recording_tools.py new file mode 100644 index 0000000..de8f121 --- /dev/null +++ b/scripts/recording_tools.py @@ -0,0 +1,98 @@ +import argparse +import time +from pydub import AudioSegment +from scipy.signal import resample +from tqdm import tqdm +import sounddevice as sd +import soundfile as sf + +DURATION = 10.0 + + +def countdown(count): + """Create a countdown for specified number of seconds.""" + for s_count in range(count): + print(f'{(count-s_count)/1000}', end='\r') + time.sleep(0.001) + +def record_until_keypress(output_folder, samplerate=22050, channels=1): + """Record audio in chunks until interrupted, then chop into 10s fragments.""" + recordings = [] + print("·You are recording! Press Ctrl+C to stop.") + i=0 + try: + while True: + recording = sd.rec(int(samplerate * DURATION), samplerate=samplerate, channels=channels, blocking=True) + i+=1 + print(f"Chunk #{i} was recorded") + recordings.append(recording) + except KeyboardInterrupt: + print("Recording stopped.") + + for i, rec in enumerate(recordings): + fname = f'{output_folder}/{i+1}.wav' + try: + sf.write(fname, rec, samplerate, subtype='FLOAT') + except Exception: + print(f"Error saving chunk #{i+1}.") + +def record_audio(file_path, num_samples=1, samplerate=22050, channels=1, timeout=5000): + """Record audio with the specified parameters.""" + for i in range(num_samples): + print(f"Preparing to record sample {i+1} in 5 seconds...") + countdown(timeout) + + print("Recording...") + recording = sd.rec(int(samplerate * DURATION), samplerate=samplerate, + channels=channels, blocking=True) + + fname = f'{file_path}/{i+1}.wav' + sf.write(fname, recording, samplerate, subtype='FLOAT') + + print(f"Recording of sample {i+1} finished and saved as '{fname}'.") + +def chop_audio(input_path, output_folder, convert=False, samplerate=22050): + """Chop an audio file into chunks with specified duration.""" + if convert: + print("Loading file...") + audio = AudioSegment.from_file(input_path) + + print("Converting to WAV...") + intermediate_path = "intermediate.wav" + audio.export(intermediate_path, format="wav") + else: + intermediate_path = input_path + + print("Loading WAV file...") + data, og_samplerate = sf.read(intermediate_path) + + print("Resampling audio data...") + data = resample(data, len(data) * samplerate // og_samplerate) + + num_chunks = len(data) // int(samplerate * DURATION) + 1 + + print("Saving chunks...") + for i, _ in tqdm(enumerate(range(0, len(data), int(samplerate * DURATION)), 1), + total=num_chunks): + chunk = data[i:i + int(samplerate * DURATION)] + sf.write(f'{output_folder}/{i}.wav', chunk, samplerate, subtype='FLOAT') + + print(f"Conversion and chopping finished. Saved files in '{output_folder}'.") + +if __name__ == "__main__": + parser = argparse.ArgumentParser(description='Process some audio.') + parser.add_argument('command', choices=['record', 'chop', 'keypress'], help='Command to execute') + parser.add_argument('--file_path', help='Path to the file to process') + parser.add_argument('--output_folder', help='Folder to save the chunks') + parser.add_argument('--convert', action='store_true', default=False, help='Convert file to WAV format') + parser.add_argument('--num_samples', type=int, default=1, help='Number of samples to record') + parser.add_argument('--rec_timeout', type=int, default=5000, help='Milliseconds between recordings') + + args = parser.parse_args() + + if args.command == 'record': + record_audio(args.file_path, num_samples=args.num_samples, timeout=args.rec_timeout) + elif args.command == 'chop': + chop_audio(args.file_path, args.output_folder, convert=args.convert) + elif args.command == 'keypress': + record_until_keypress(args.file_path) \ No newline at end of file From c871a8f7f71e0a285255f091446204cd3f2b47df Mon Sep 17 00:00:00 2001 From: TheMaakarov Date: Sun, 4 Jun 2023 00:56:21 +0200 Subject: [PATCH 2/3] Clarifies input and adds docstrings --- scripts/recording_tools.py | 108 ++++++++++++++++++++++++++++--------- 1 file changed, 83 insertions(+), 25 deletions(-) diff --git a/scripts/recording_tools.py b/scripts/recording_tools.py index de8f121..46c36bb 100644 --- a/scripts/recording_tools.py +++ b/scripts/recording_tools.py @@ -8,21 +8,34 @@ import soundfile as sf DURATION = 10.0 +def countdown(count: int): + """Create a countdown for specified number of seconds. -def countdown(count): - """Create a countdown for specified number of seconds.""" - for s_count in range(count): - print(f'{(count-s_count)/1000}', end='\r') + Args: + count (int): The amount of milliseconds to wait for. + """ + for elapsed_ms in range(count): + print(f'{(count-elapsed_ms)/1000}', end='\r') time.sleep(0.001) -def record_until_keypress(output_folder, samplerate=22050, channels=1): - """Record audio in chunks until interrupted, then chop into 10s fragments.""" +def record_until_keypress( + output_folder: str, + samplerate: int=22050, + channels: int=1): + """Record audio in chunks until interrupted, then chop into 10s fragments. + + Args: + output_folder (str): Path to the voice folder where the chunks will be saved. + samplerate (int, optional): Target sample rate. Defaults to 22050. + channels (int, optional): The number of channels (1=mono, 2=stereo). Defaults to 1. + """ recordings = [] print("·You are recording! Press Ctrl+C to stop.") i=0 try: while True: - recording = sd.rec(int(samplerate * DURATION), samplerate=samplerate, channels=channels, blocking=True) + recording =sd.rec( + int(samplerate * DURATION), samplerate=samplerate, channels=channels, blocking=True) i+=1 print(f"Chunk #{i} was recorded") recordings.append(recording) @@ -36,11 +49,24 @@ def record_until_keypress(output_folder, samplerate=22050, channels=1): except Exception: print(f"Error saving chunk #{i+1}.") -def record_audio(file_path, num_samples=1, samplerate=22050, channels=1, timeout=5000): - """Record audio with the specified parameters.""" +def record_audio( + file_path: str, + num_samples: int=3, + samplerate: int=22050, + channels: int=1, + timeout: int=5): + """Record audio with the specified parameters. + + Args: + file_path (str): Path to the voice folder where the samples will be saved. + num_samples (int, optional): The amount of samples to save. Defaults to 3. + samplerate (int, optional): Target sample rate. Defaults to 22050. + channels (int, optional): The number of channels (1=mono, 2=stereo). Defaults to 1. + timeout (int, optional): The seconds to wait between samples. Defaults to 5. + """ for i in range(num_samples): - print(f"Preparing to record sample {i+1} in 5 seconds...") - countdown(timeout) + print(f"Preparing to record sample {i+1} in {timeout} seconds...") + countdown(timeout * 1000) print("Recording...") recording = sd.rec(int(samplerate * DURATION), samplerate=samplerate, @@ -51,17 +77,27 @@ def record_audio(file_path, num_samples=1, samplerate=22050, channels=1, timeout print(f"Recording of sample {i+1} finished and saved as '{fname}'.") -def chop_audio(input_path, output_folder, convert=False, samplerate=22050): - """Chop an audio file into chunks with specified duration.""" - if convert: +def chop_audio(input_path: str, + output_folder: str, + no_conversion: bool=True, + samplerate: int=22050): + """Chop an audio file into chunks with specified duration. + + Args: + input_path (str): Path to the original voice sample. + output_folder (str): Path to the voice folder where the sample's chunks will be saved. + no_conversion (bool, optional): Ignore the conversion to WAV format. Defaults to True. + samplerate (int, optional): Target sample rate. Defaults to 22050. + """ + if no_conversion: + intermediate_path = input_path + else: print("Loading file...") audio = AudioSegment.from_file(input_path) print("Converting to WAV...") intermediate_path = "intermediate.wav" audio.export(intermediate_path, format="wav") - else: - intermediate_path = input_path print("Loading WAV file...") data, og_samplerate = sf.read(intermediate_path) @@ -81,18 +117,40 @@ def chop_audio(input_path, output_folder, convert=False, samplerate=22050): if __name__ == "__main__": parser = argparse.ArgumentParser(description='Process some audio.') - parser.add_argument('command', choices=['record', 'chop', 'keypress'], help='Command to execute') - parser.add_argument('--file_path', help='Path to the file to process') - parser.add_argument('--output_folder', help='Folder to save the chunks') - parser.add_argument('--convert', action='store_true', default=False, help='Convert file to WAV format') - parser.add_argument('--num_samples', type=int, default=1, help='Number of samples to record') - parser.add_argument('--rec_timeout', type=int, default=5000, help='Milliseconds between recordings') + parser.add_argument( + 'command', + choices=['record', 'chop', 'keypress'], + help='Command to execute') + parser.add_argument( + '--file_path', + help='Path to the file to process') + parser.add_argument( + '--output_folder', + help='Folder to save the chunks') + parser.add_argument( + '--no-convert', + action='store_false', default=True, + help='Convert file to WAV format') + parser.add_argument( + '--num_samples', + type=int, default=1, + help='Number of samples to record') + parser.add_argument( + '--rec_timeout', + type=int, default=5, + help='Seconds between recordings') args = parser.parse_args() if args.command == 'record': - record_audio(args.file_path, num_samples=args.num_samples, timeout=args.rec_timeout) + record_audio( + args.file_path, + num_samples=args.num_samples, + timeout=args.rec_timeout) elif args.command == 'chop': - chop_audio(args.file_path, args.output_folder, convert=args.convert) + chop_audio( + args.file_path, + args.output_folder, + no_conversion=args.no_convert) elif args.command == 'keypress': - record_until_keypress(args.file_path) \ No newline at end of file + record_until_keypress(args.file_path) From 1d5f26e647d90967e92fb06beaa619c488a93488 Mon Sep 17 00:00:00 2001 From: TheMaakarov Date: Sun, 4 Jun 2023 01:16:35 +0200 Subject: [PATCH 3/3] Adds parameter --voice-name for keypress recording only --- scripts/recording_tools.py | 18 ++++++++++++++---- 1 file changed, 14 insertions(+), 4 deletions(-) diff --git a/scripts/recording_tools.py b/scripts/recording_tools.py index 46c36bb..3e56aa6 100644 --- a/scripts/recording_tools.py +++ b/scripts/recording_tools.py @@ -3,6 +3,7 @@ import time from pydub import AudioSegment from scipy.signal import resample from tqdm import tqdm +import os import sounddevice as sd import soundfile as sf @@ -19,13 +20,13 @@ def countdown(count: int): time.sleep(0.001) def record_until_keypress( - output_folder: str, + voice_name: str, samplerate: int=22050, channels: int=1): """Record audio in chunks until interrupted, then chop into 10s fragments. Args: - output_folder (str): Path to the voice folder where the chunks will be saved. + voice_name (str): The name of the voice that records. samplerate (int, optional): Target sample rate. Defaults to 22050. channels (int, optional): The number of channels (1=mono, 2=stereo). Defaults to 1. """ @@ -34,7 +35,7 @@ def record_until_keypress( i=0 try: while True: - recording =sd.rec( + recording = sd.rec( int(samplerate * DURATION), samplerate=samplerate, channels=channels, blocking=True) i+=1 print(f"Chunk #{i} was recorded") @@ -42,6 +43,11 @@ def record_until_keypress( except KeyboardInterrupt: print("Recording stopped.") + output_folder = f'../tortoise/voices/{voice_name}/' + try: + os.mkdir(output_folder) + except FileExistsError: + pass for i, rec in enumerate(recordings): fname = f'{output_folder}/{i+1}.wav' try: @@ -124,6 +130,9 @@ if __name__ == "__main__": parser.add_argument( '--file_path', help='Path to the file to process') + parser.add_argument( + '--voice_name', + help='The name of the voice that will record') parser.add_argument( '--output_folder', help='Folder to save the chunks') @@ -141,6 +150,7 @@ if __name__ == "__main__": help='Seconds between recordings') args = parser.parse_args() + print(args.no_convert) if args.command == 'record': record_audio( @@ -153,4 +163,4 @@ if __name__ == "__main__": args.output_folder, no_conversion=args.no_convert) elif args.command == 'keypress': - record_until_keypress(args.file_path) + record_until_keypress(args.voice_name)