From 5fdebc554b7ca46afb9695babf89397635e9f91d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 19 Apr 2025 04:59:24 -0700 Subject: [PATCH] llama.cpp: close the connection immediately on 'Stop' --- modules/llama_cpp_server.py | 14 +++++++++++--- 1 file changed, 11 insertions(+), 3 deletions(-) diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 5071c40c..3025aa7d 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -141,16 +141,24 @@ class LlamaServer: print() # Make a direct request with streaming enabled using a context manager - with self.session.post(url, json=payload, stream=True) as response: + with self.session.post(url, json=payload, stream=True, timeout=(5, 0.1)) as response: response.raise_for_status() # Raise an exception for HTTP errors full_text = "" + iterator = response.iter_lines(decode_unicode=True) - # Process the streaming response - for line in response.iter_lines(decode_unicode=True): + while True: if shared.stop_everything: break + try: + line = next(iterator) + except requests.exceptions.Timeout: + # Check stop flag again on timeout + continue + except StopIteration: + break + if line: try: # Check if the line starts with "data: " and remove it