diff --git a/.gitignore b/.gitignore index 7693938..ac2c02d 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# data +data/ + # Byte-compiled / optimized / DLL files __pycache__/ *.py[cod] diff --git a/Dockerfile b/Dockerfile index 037a364..6b56bd8 100644 --- a/Dockerfile +++ b/Dockerfile @@ -45,3 +45,6 @@ RUN bash -c "source ${CONDA_DIR}/etc/profile.d/conda.sh && conda activate tortoi # Default command CMD ["/bin/bash"] + +# Default entrypoint +ENTRYPOINT ["python", "tortoise/do_tts.py"] \ No newline at end of file diff --git a/bin/docker-build.sh b/bin/docker-build.sh new file mode 100644 index 0000000..48bce78 --- /dev/null +++ b/bin/docker-build.sh @@ -0,0 +1 @@ +docker build . -t tts \ No newline at end of file diff --git a/bin/docker-remove.sh b/bin/docker-remove.sh new file mode 100644 index 0000000..af89f31 --- /dev/null +++ b/bin/docker-remove.sh @@ -0,0 +1,9 @@ +# optionally: --gpus all +docker run \ + -e TORTOISE_MODELS_DIR=/models \ + -v ./data/models:/models \ + -v ./data/results:/results \ + -v ./data/.cache/huggingface:/root/.cache/huggingface \ + -v /root:/work \ + --name tts-app \ + -it tts \ No newline at end of file diff --git a/bin/docker-run.sh b/bin/docker-run.sh new file mode 100644 index 0000000..30eaf64 --- /dev/null +++ b/bin/docker-run.sh @@ -0,0 +1,8 @@ +docker run --gpus all \ + -e TORTOISE_MODELS_DIR=/models \ + -v ./data/models:/models \ + -v ./data/results:/results \ + -v ./data/.cache/huggingface:/root/.cache/huggingface \ + -v /root:/work \ + --name tts-app \ + -it tts \ No newline at end of file diff --git a/bin/docker-start.sh b/bin/docker-start.sh new file mode 100644 index 0000000..a0cdb9c --- /dev/null +++ b/bin/docker-start.sh @@ -0,0 +1,2 @@ +. bin/docker-build.sh +. bin/docker-run.sh \ No newline at end of file diff --git a/tortoise/do_tts.py b/tortoise/do_tts.py index c6e2b17..430bd3a 100644 --- a/tortoise/do_tts.py +++ b/tortoise/do_tts.py @@ -10,8 +10,9 @@ from utils.audio import load_voices if __name__ == '__main__': parser = argparse.ArgumentParser() parser.add_argument('--text', type=str, help='Text to speak.', default="The expressiveness of autoregressive transformers is literally nuts! I absolutely adore them.") - parser.add_argument('--voice', type=str, help='Selects the voice to use for generation. See options in voices/ directory (and add your own!) ' - 'Use the & character to join two voices together. Use a comma to perform inference on multiple voices.', default='random') + parser.add_argument('--voice', type=str, help=""" + Selects the voice to use for generation. See options in voices/ directory (and add your own!) ' + 'Use the & character to join two voices together. Use a comma to perform inference on multiple voices.""", default='random') parser.add_argument('--preset', type=str, help='Which voice preset to use.', default='fast') parser.add_argument('--use_deepspeed', type=str, help='Use deepspeed for speed bump.', default=False) parser.add_argument('--kv_cache', type=bool, help='If you disable this please wait for a long a time to get the output', default=True)