From f396b82a4f92f5823ed2a9bd1ff32d915da4cf9a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 9 Aug 2025 07:31:36 -0700 Subject: [PATCH] mtmd: Better way to detect if an EXL3 model is multimodal --- modules/exllamav3.py | 17 ++++++++++------- 1 file changed, 10 insertions(+), 7 deletions(-) diff --git a/modules/exllamav3.py b/modules/exllamav3.py index f6c56cb0..70f6c2f1 100644 --- a/modules/exllamav3.py +++ b/modules/exllamav3.py @@ -98,13 +98,16 @@ class Exllamav3Model: # Load vision model component (ExLlamaV3 native) vision_model = None - try: - logger.info("Loading vision model component...") - vision_model = Model.from_config(config, component="vision") - vision_model.load(progressbar=True) - logger.info("Vision model loaded successfully") - except Exception as e: - logger.warning(f"Vision model loading failed (multimodal disabled): {e}") + if "vision_config" in config.config_dict: + logger.info("Vision component detected in model config. Attempting to load...") + try: + vision_model = Model.from_config(config, component="vision") + vision_model.load(progressbar=True) + logger.info("Vision model loaded successfully.") + except Exception as e: + logger.warning(f"Vision model loading failed (multimodal disabled): {e}") + else: + logger.info("No vision component in model config. Skipping multimodal setup.") generator = Generator( model=model,