diff --git a/tortoise/api.py b/tortoise/api.py index 8a010c2..7188a4d 100644 --- a/tortoise/api.py +++ b/tortoise/api.py @@ -285,7 +285,7 @@ class TextToSpeech: sample = torchaudio.functional.resample(sample, 22050, 24000) sample = pad_or_truncate(sample, 102400) cond_mel = wav_to_univnet_mel(sample.to(self.device), do_normalization=False, - device=self.device, stft=self.stft) + device=self.device) diffusion_conds.append(cond_mel) diffusion_conds = torch.stack(diffusion_conds, dim=1)