From 72eccabcb7d4c7a9d943a2c9b63211e4286cf385 Mon Sep 17 00:00:00 2001 From: Roberts Slisans Date: Tue, 15 Aug 2023 11:27:43 +0300 Subject: [PATCH] add tokenizer --- setup.py | 2 +- tortoise/api.py | 9 +++++++-- tortoise/utils/tokenizer.py | 15 ++++++++------- 3 files changed, 16 insertions(+), 10 deletions(-) diff --git a/setup.py b/setup.py index b4357a6..0d86300 100644 --- a/setup.py +++ b/setup.py @@ -6,7 +6,7 @@ with open("README.md", "r", encoding="utf-8") as fh: setuptools.setup( name="TorToiSe", packages=setuptools.find_packages(), - version="2.7.0", + version="2.8.0", author="James Betker", author_email="james@adamant.ai", description="A high quality multi-voice text-to-speech library", diff --git a/tortoise/api.py b/tortoise/api.py index 59f1681..57d0159 100644 --- a/tortoise/api.py +++ b/tortoise/api.py @@ -207,7 +207,9 @@ class TextToSpeech: """ def __init__(self, autoregressive_batch_size=None, models_dir=MODELS_DIR, - enable_redaction=True, kv_cache=False, use_deepspeed=False, half=False, device=None): + enable_redaction=True, kv_cache=False, use_deepspeed=False, half=False, device=None, + tokenizer_vocab_file=None, tokenizer_basic=False): + """ Constructor :param autoregressive_batch_size: Specifies how many samples to generate per batch. Lower this if you are seeing @@ -228,7 +230,10 @@ class TextToSpeech: if self.enable_redaction: self.aligner = Wav2VecAlignment() - self.tokenizer = VoiceBpeTokenizer() + self.tokenizer = VoiceBpeTokenizer( + vocab_file=tokenizer_vocab_file, + use_basic_cleaners=tokenizer_basic, + ) self.half = half if os.path.exists(f'{models_dir}/autoregressive.ptt'): # Assume this is a traced directory. diff --git a/tortoise/utils/tokenizer.py b/tortoise/utils/tokenizer.py index 70bcdb6..922f23e 100644 --- a/tortoise/utils/tokenizer.py +++ b/tortoise/utils/tokenizer.py @@ -170,13 +170,14 @@ DEFAULT_VOCAB_FILE = os.path.join(os.path.dirname(os.path.realpath(__file__)), ' class VoiceBpeTokenizer: - def __init__(self, vocab_file=DEFAULT_VOCAB_FILE): - if vocab_file is not None: - self.tokenizer = Tokenizer.from_file(vocab_file) - - def preprocess_text(self, txt): - txt = english_cleaners(txt) - return txt + def __init__(self, vocab_file=None, use_basic_cleaners=False): + self.tokenizer = Tokenizer.from_file( + DEFAULT_VOCAB_FILE if vocab_file is None else vocab_file + ) + if use_basic_cleaners: + self.preprocess_text = basic_cleaners + else: + self.preprocess_text = english_cleaners def encode(self, txt): txt = self.preprocess_text(txt)