diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 3025aa7d..5071c40c 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -141,24 +141,16 @@ class LlamaServer: print() # Make a direct request with streaming enabled using a context manager - with self.session.post(url, json=payload, stream=True, timeout=(5, 0.1)) as response: + with self.session.post(url, json=payload, stream=True) as response: response.raise_for_status() # Raise an exception for HTTP errors full_text = "" - iterator = response.iter_lines(decode_unicode=True) - while True: + # Process the streaming response + for line in response.iter_lines(decode_unicode=True): if shared.stop_everything: break - try: - line = next(iterator) - except requests.exceptions.Timeout: - # Check stop flag again on timeout - continue - except StopIteration: - break - if line: try: # Check if the line starts with "data: " and remove it