mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-01-04 15:50:26 +01:00
mtmd: Fix /chat/completions for llama.cpp
This commit is contained in:
parent
38c0b4a1ad
commit
b62c8845f3
|
|
@ -85,16 +85,28 @@ def process_parameters(body, is_legacy=False):
|
|||
|
||||
|
||||
def process_multimodal_content(content):
|
||||
"""Extract text from OpenAI multimodal format for non-multimodal models"""
|
||||
"""Extract text and add image placeholders from OpenAI multimodal format"""
|
||||
if isinstance(content, str):
|
||||
return content
|
||||
|
||||
if isinstance(content, list):
|
||||
text_parts = []
|
||||
image_placeholders = ""
|
||||
for item in content:
|
||||
if isinstance(item, dict) and item.get('type') == 'text':
|
||||
if not isinstance(item, dict):
|
||||
continue
|
||||
|
||||
item_type = item.get('type', '')
|
||||
if item_type == 'text':
|
||||
text_parts.append(item.get('text', ''))
|
||||
return ' '.join(text_parts) if text_parts else str(content)
|
||||
elif item_type == 'image_url':
|
||||
image_placeholders += "<__media__>"
|
||||
|
||||
final_text = ' '.join(text_parts)
|
||||
if image_placeholders:
|
||||
return f"{image_placeholders}\n\n{final_text}"
|
||||
else:
|
||||
return final_text
|
||||
|
||||
return str(content)
|
||||
|
||||
|
|
|
|||
|
|
@ -870,18 +870,19 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
|
|||
|
||||
row_idx = len(output['internal']) - 1
|
||||
|
||||
# Collect image attachments for multimodal generation
|
||||
image_attachments = []
|
||||
# Collect image attachments for multimodal generation from the entire history
|
||||
all_image_attachments = []
|
||||
if 'metadata' in output:
|
||||
user_key = f"user_{row_idx}"
|
||||
if user_key in output['metadata'] and "attachments" in output['metadata'][user_key]:
|
||||
for attachment in output['metadata'][user_key]["attachments"]:
|
||||
if attachment.get("type") == "image":
|
||||
image_attachments.append(attachment)
|
||||
for i in range(len(output['internal'])):
|
||||
user_key = f"user_{i}"
|
||||
if user_key in output['metadata'] and "attachments" in output['metadata'][user_key]:
|
||||
for attachment in output['metadata'][user_key]["attachments"]:
|
||||
if attachment.get("type") == "image":
|
||||
all_image_attachments.append(attachment)
|
||||
|
||||
# Add image attachments to state for the generation
|
||||
if image_attachments:
|
||||
state['image_attachments'] = image_attachments
|
||||
# Add all collected image attachments to state for the generation
|
||||
if all_image_attachments:
|
||||
state['image_attachments'] = all_image_attachments
|
||||
|
||||
# Generate the prompt
|
||||
kwargs = {
|
||||
|
|
|
|||
|
|
@ -15,6 +15,7 @@ import requests
|
|||
from modules import shared
|
||||
from modules.image_utils import (
|
||||
convert_image_attachments_to_pil,
|
||||
convert_openai_messages_to_images,
|
||||
convert_pil_to_base64
|
||||
)
|
||||
from modules.logging_colors import logger
|
||||
|
|
@ -133,10 +134,13 @@ class LlamaServer:
|
|||
payload = self.prepare_payload(state)
|
||||
|
||||
pil_images = []
|
||||
# Check for images from the Web UI (image_attachments)
|
||||
# Source 1: Web UI (from chatbot_wrapper)
|
||||
if 'image_attachments' in state and state['image_attachments']:
|
||||
pil_images.extend(convert_image_attachments_to_pil(state['image_attachments']))
|
||||
# Else, check for images from the API (raw_images)
|
||||
# Source 2: Chat Completions API (/v1/chat/completions)
|
||||
elif 'history' in state and state.get('history', {}).get('messages'):
|
||||
pil_images.extend(convert_openai_messages_to_images(state['history']['messages']))
|
||||
# Source 3: Legacy Completions API (/v1/completions)
|
||||
elif 'raw_images' in state and state['raw_images']:
|
||||
pil_images.extend(state.get('raw_images', []))
|
||||
|
||||
|
|
|
|||
Loading…
Reference in a new issue