From 9f07a1f5d7bda07655e924fbbf3d9375104336ad Mon Sep 17 00:00:00 2001
From: oobabooga <112222186+oobabooga@users.noreply.github.com>
Date: Fri, 18 Apr 2025 19:30:53 -0700
Subject: [PATCH] llama.cpp: new attempt at optimizing the llama-server
 connection

---
 modules/llama_cpp_server.py | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index 34aab613..5986ac36 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -147,19 +147,18 @@ class LlamaServer:
             full_text = ""
 
             # Process the streaming response
-            for line in response.iter_lines():
+            for line in response.iter_lines(decode_unicode=True):
                 if shared.stop_everything:
                     break
 
                 if line:
                     try:
                         # Check if the line starts with "data: " and remove it
-                        line_str = line.decode('utf-8')
-                        if line_str.startswith('data: '):
-                            line_str = line_str[6:]  # Remove the "data: " prefix
+                        if line.startswith('data: '):
+                            line = line[6:]  # Remove the "data: " prefix
 
                         # Parse the JSON data
-                        data = json.loads(line_str)
+                        data = json.loads(line)
 
                         # Extract the token content
                         if 'content' in data: