diff --git a/extensions/openai/completions.py b/extensions/openai/completions.py
index 6f4dfc29..c3037d0c 100644
--- a/extensions/openai/completions.py
+++ b/extensions/openai/completions.py
@@ -85,16 +85,28 @@ def process_parameters(body, is_legacy=False):
 
 
 def process_multimodal_content(content):
-    """Extract text from OpenAI multimodal format for non-multimodal models"""
+    """Extract text and add image placeholders from OpenAI multimodal format"""
     if isinstance(content, str):
         return content
 
     if isinstance(content, list):
         text_parts = []
+        image_placeholders = ""
         for item in content:
-            if isinstance(item, dict) and item.get('type') == 'text':
+            if not isinstance(item, dict):
+                continue
+
+            item_type = item.get('type', '')
+            if item_type == 'text':
                 text_parts.append(item.get('text', ''))
-        return ' '.join(text_parts) if text_parts else str(content)
+            elif item_type == 'image_url':
+                image_placeholders += "<__media__>"
+
+        final_text = ' '.join(text_parts)
+        if image_placeholders:
+            return f"{image_placeholders}\n\n{final_text}"
+        else:
+            return final_text
 
     return str(content)
 
diff --git a/modules/chat.py b/modules/chat.py
index 42bb58a5..7b1629dd 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -870,18 +870,19 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
 
     row_idx = len(output['internal']) - 1
 
-    # Collect image attachments for multimodal generation
-    image_attachments = []
+    # Collect image attachments for multimodal generation from the entire history
+    all_image_attachments = []
     if 'metadata' in output:
-        user_key = f"user_{row_idx}"
-        if user_key in output['metadata'] and "attachments" in output['metadata'][user_key]:
-            for attachment in output['metadata'][user_key]["attachments"]:
-                if attachment.get("type") == "image":
-                    image_attachments.append(attachment)
+        for i in range(len(output['internal'])):
+            user_key = f"user_{i}"
+            if user_key in output['metadata'] and "attachments" in output['metadata'][user_key]:
+                for attachment in output['metadata'][user_key]["attachments"]:
+                    if attachment.get("type") == "image":
+                        all_image_attachments.append(attachment)
 
-    # Add image attachments to state for the generation
-    if image_attachments:
-        state['image_attachments'] = image_attachments
+    # Add all collected image attachments to state for the generation
+    if all_image_attachments:
+        state['image_attachments'] = all_image_attachments
 
     # Generate the prompt
     kwargs = {
diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index 3e8127ab..63c8eda0 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -15,6 +15,7 @@ import requests
 from modules import shared
 from modules.image_utils import (
     convert_image_attachments_to_pil,
+    convert_openai_messages_to_images,
     convert_pil_to_base64
 )
 from modules.logging_colors import logger
@@ -133,10 +134,13 @@ class LlamaServer:
         payload = self.prepare_payload(state)
 
         pil_images = []
-        # Check for images from the Web UI (image_attachments)
+        # Source 1: Web UI (from chatbot_wrapper)
         if 'image_attachments' in state and state['image_attachments']:
             pil_images.extend(convert_image_attachments_to_pil(state['image_attachments']))
-        # Else, check for images from the API (raw_images)
+        # Source 2: Chat Completions API (/v1/chat/completions)
+        elif 'history' in state and state.get('history', {}).get('messages'):
+            pil_images.extend(convert_openai_messages_to_images(state['history']['messages']))
+        # Source 3: Legacy Completions API (/v1/completions)
         elif 'raw_images' in state and state['raw_images']:
             pil_images.extend(state.get('raw_images', []))