Set multimodal status during Model Loading (#7199)

This commit is contained in:
altoiddealer 2025-08-13 15:47:27 -04:00 committed by GitHub
parent 725a8bcf60
commit 57f6e9af5a
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 30 additions and 9 deletions

View file

@ -8,6 +8,7 @@ import sys
import threading
import time
from pathlib import Path
from typing import Any, List
import llama_cpp_binaries
import requests
@ -129,10 +130,10 @@ class LlamaServer:
return payload
def generate_with_streaming(self, prompt, state):
url = f"http://127.0.0.1:{self.port}/completion"
payload = self.prepare_payload(state)
def _process_images_for_generation(self, state: dict) -> List[Any]:
"""
Process all possible image inputs and return PIL images
"""
pil_images = []
# Source 1: Web UI (from chatbot_wrapper)
if 'image_attachments' in state and state['image_attachments']:
@ -144,6 +145,21 @@ class LlamaServer:
elif 'raw_images' in state and state['raw_images']:
pil_images.extend(state.get('raw_images', []))
return pil_images
def is_multimodal(self) -> bool:
"""Check if this model supports multimodal input."""
return shared.args.mmproj not in [None, 'None']
def generate_with_streaming(self, prompt, state):
url = f"http://127.0.0.1:{self.port}/completion"
payload = self.prepare_payload(state)
pil_images = []
if shared.is_multimodal:
pil_images = self._process_images_for_generation(state)
if pil_images:
# Multimodal case
IMAGE_TOKEN_COST_ESTIMATE = 600 # A safe, conservative estimate per image