From 8d7b88106a34102863a491a9c8848871c5118a85 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 12 Aug 2025 13:20:16 -0700 Subject: [PATCH] Revert "mtmd: Fail early if images are provided but the model doesn't support them (llama.cpp)" This reverts commit d8fcc71616307a8ecacea93b7bdfa1117a23e1fe. --- modules/llama_cpp_server.py | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 51dacb84..e82edb90 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -34,7 +34,6 @@ class LlamaServer: self.process = None self.session = requests.Session() self.vocabulary_size = None - self.has_multimodal = False self.bos_token = "" self.last_prompt_token_count = 0 @@ -145,10 +144,6 @@ class LlamaServer: elif 'raw_images' in state and state['raw_images']: pil_images.extend(state.get('raw_images', [])) - # Fail early if images are provided but the model doesn't support them - if pil_images and not self.has_multimodal: - raise RuntimeError("The loaded llama.cpp model does not support multimodal requests. You must load a vision model and provide an mmproj file.") - if pil_images: # Multimodal case IMAGE_TOKEN_COST_ESTIMATE = 600 # A safe, conservative estimate per image @@ -266,8 +261,8 @@ class LlamaServer: else: raise Exception(f"Unexpected response format: 'completion_probabilities' not found in {result}") - def _get_model_properties(self): - """Get and store the model's properties, including vocab size and multimodal capability.""" + def _get_vocabulary_size(self): + """Get and store the model's maximum context length.""" url = f"http://127.0.0.1:{self.port}/v1/models" response = self.session.get(url).json() @@ -276,10 +271,6 @@ class LlamaServer: if "meta" in model_info and "n_vocab" in model_info["meta"]: self.vocabulary_size = model_info["meta"]["n_vocab"] - # Check for multimodal capability - if "capabilities" in model_info and "multimodal" in model_info["capabilities"]: - self.has_multimodal = True - def _get_bos_token(self): """Get and store the model's BOS token.""" url = f"http://127.0.0.1:{self.port}/props" @@ -430,7 +421,7 @@ class LlamaServer: time.sleep(1) # Server is now healthy, get model info - self._get_model_properties() + self._get_vocabulary_size() self._get_bos_token() return self.port