From 8be798e15f48bd1f498d2c609ddf2f31cf22524b Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Sun, 24 Aug 2025 12:19:19 -0700
Subject: [PATCH] llama.cpp: Fix stderr deadlock while loading some multimodal
 models

---
 modules/llama_cpp_server.py | 70 ++++++++++++++++++++++++-------------
 1 file changed, 45 insertions(+), 25 deletions(-)

diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index 8f1924cb..e3dd43b4 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -410,8 +410,7 @@ class LlamaServer:
         self.process = subprocess.Popen(
             cmd,
             stderr=subprocess.PIPE,
-            text=True,
-            bufsize=1,
+            bufsize=0,
             env=env
         )
 
@@ -473,34 +472,55 @@ def filter_stderr_with_progress(process_stderr):
     last_was_progress = False
 
     try:
-        for raw in iter(process_stderr.readline, ''):
-            line = raw.rstrip('\r\n')
-            match = progress_re.search(line)
+        # Read in binary mode and decode manually
+        buffer = b""
+        while True:
+            # Read chunks aggressively to prevent buffer overflow
+            chunk = process_stderr.read(4096)
+            if not chunk:
+                break
 
-            if match:
-                progress = float(match.group(1))
+            buffer += chunk
 
-                # Extract just the part from "prompt processing" onwards
-                prompt_processing_idx = line.find('prompt processing')
-                if prompt_processing_idx != -1:
-                    display_line = line[prompt_processing_idx:]
-                else:
-                    display_line = line  # fallback to full line
+            # Process complete lines
+            while b'\n' in buffer:
+                line_bytes, buffer = buffer.split(b'\n', 1)
+                try:
+                    line = line_bytes.decode('utf-8', errors='replace').strip('\r\n')
+                    if line:  # Process non-empty lines
+                        match = progress_re.search(line)
 
-                # choose carriage return for in-progress or newline at completion
-                end_char = '\r' if progress < 1.0 else '\n'
-                print(display_line, end=end_char, file=sys.stderr, flush=True)
-                last_was_progress = (progress < 1.0)
+                        if match:
+                            progress = float(match.group(1))
 
-            # skip noise lines
-            elif not (line.startswith(('srv ', 'slot ')) or 'log_server_r: request: GET /health' in line):
-                # if we were in progress, finish that line first
-                if last_was_progress:
-                    print(file=sys.stderr)
+                            # Extract just the part from "prompt processing" onwards
+                            prompt_processing_idx = line.find('prompt processing')
+                            if prompt_processing_idx != -1:
+                                display_line = line[prompt_processing_idx:]
+                            else:
+                                display_line = line  # fallback to full line
 
-                print(line, file=sys.stderr, flush=True)
-                last_was_progress = False
+                            # choose carriage return for in-progress or newline at completion
+                            end_char = '\r' if progress < 1.0 else '\n'
+                            print(display_line, end=end_char, file=sys.stderr, flush=True)
+                            last_was_progress = (progress < 1.0)
+
+                        # skip noise lines
+                        elif not (line.startswith(('srv ', 'slot ')) or 'log_server_r: request: GET /health' in line):
+                            # if we were in progress, finish that line first
+                            if last_was_progress:
+                                print(file=sys.stderr)
+
+                            print(line, file=sys.stderr, flush=True)
+                            last_was_progress = False
+
+                except Exception:
+                    continue
 
     except (ValueError, IOError):
-        # silently ignore broken output or IO errors
         pass
+    finally:
+        try:
+            process_stderr.close()
+        except:
+            pass