diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index e82edb90..51dacb84 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -34,6 +34,7 @@ class LlamaServer:
         self.process = None
         self.session = requests.Session()
         self.vocabulary_size = None
+        self.has_multimodal = False
         self.bos_token = "<s>"
         self.last_prompt_token_count = 0
 
@@ -144,6 +145,10 @@ class LlamaServer:
         elif 'raw_images' in state and state['raw_images']:
             pil_images.extend(state.get('raw_images', []))
 
+        # Fail early if images are provided but the model doesn't support them
+        if pil_images and not self.has_multimodal:
+            raise RuntimeError("The loaded llama.cpp model does not support multimodal requests. You must load a vision model and provide an mmproj file.")
+
         if pil_images:
             # Multimodal case
             IMAGE_TOKEN_COST_ESTIMATE = 600  # A safe, conservative estimate per image
@@ -261,8 +266,8 @@ class LlamaServer:
         else:
             raise Exception(f"Unexpected response format: 'completion_probabilities' not found in {result}")
 
-    def _get_vocabulary_size(self):
-        """Get and store the model's maximum context length."""
+    def _get_model_properties(self):
+        """Get and store the model's properties, including vocab size and multimodal capability."""
         url = f"http://127.0.0.1:{self.port}/v1/models"
         response = self.session.get(url).json()
 
@@ -271,6 +276,10 @@ class LlamaServer:
             if "meta" in model_info and "n_vocab" in model_info["meta"]:
                 self.vocabulary_size = model_info["meta"]["n_vocab"]
 
+            # Check for multimodal capability
+            if "capabilities" in model_info and "multimodal" in model_info["capabilities"]:
+                self.has_multimodal = True
+
     def _get_bos_token(self):
         """Get and store the model's BOS token."""
         url = f"http://127.0.0.1:{self.port}/props"
@@ -421,7 +430,7 @@ class LlamaServer:
             time.sleep(1)
 
         # Server is now healthy, get model info
-        self._get_vocabulary_size()
+        self._get_model_properties()
         self._get_bos_token()
         return self.port