diff --git a/modules/exllamav2.py b/modules/exllamav2.py index 42b9ade1..0498c488 100644 --- a/modules/exllamav2.py +++ b/modules/exllamav2.py @@ -19,14 +19,6 @@ from modules.text_generation import get_max_prompt_length try: import flash_attn -except ModuleNotFoundError: - logger.warning( - 'You are running ExLlamaV2 without flash-attention. This will cause the VRAM usage ' - 'to be a lot higher than it could be.\n' - 'Try installing flash-attention following the instructions here: ' - 'https://github.com/Dao-AILab/flash-attention#installation-and-features' - ) - pass except Exception: logger.warning('Failed to load flash-attention due to the following error:\n') traceback.print_exc() diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py index 96a89429..320a8d24 100644 --- a/modules/exllamav2_hf.py +++ b/modules/exllamav2_hf.py @@ -21,14 +21,6 @@ from modules.logging_colors import logger try: import flash_attn -except ModuleNotFoundError: - logger.warning( - 'You are running ExLlamaV2 without flash-attention. This will cause the VRAM usage ' - 'to be a lot higher than it could be.\n' - 'Try installing flash-attention following the instructions here: ' - 'https://github.com/Dao-AILab/flash-attention#installation-and-features' - ) - pass except Exception: logger.warning('Failed to load flash-attention due to the following error:\n') traceback.print_exc()