diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 5071c40c..3025aa7d 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -141,16 +141,24 @@ class LlamaServer: print() # Make a direct request with streaming enabled using a context manager - with self.session.post(url, json=payload, stream=True) as response: + with self.session.post(url, json=payload, stream=True, timeout=(5, 0.1)) as response: response.raise_for_status() # Raise an exception for HTTP errors full_text = "" + iterator = response.iter_lines(decode_unicode=True) - # Process the streaming response - for line in response.iter_lines(decode_unicode=True): + while True: if shared.stop_everything: break + try: + line = next(iterator) + except requests.exceptions.Timeout: + # Check stop flag again on timeout + continue + except StopIteration: + break + if line: try: # Check if the line starts with "data: " and remove it