From 9f07a1f5d7bda07655e924fbbf3d9375104336ad Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 18 Apr 2025 19:30:53 -0700 Subject: [PATCH] llama.cpp: new attempt at optimizing the llama-server connection --- modules/llama_cpp_server.py | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 34aab613..5986ac36 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -147,19 +147,18 @@ class LlamaServer: full_text = "" # Process the streaming response - for line in response.iter_lines(): + for line in response.iter_lines(decode_unicode=True): if shared.stop_everything: break if line: try: # Check if the line starts with "data: " and remove it - line_str = line.decode('utf-8') - if line_str.startswith('data: '): - line_str = line_str[6:] # Remove the "data: " prefix + if line.startswith('data: '): + line = line[6:] # Remove the "data: " prefix # Parse the JSON data - data = json.loads(line_str) + data = json.loads(line) # Extract the token content if 'content' in data: