llama.cpp: close the connection immediately on 'Stop'

2025-12-06 07:12:10 +01:00 · 2025-04-19 04:59:24 -07:00 · 2025-04-19 04:59:24 -07:00 · 5fdebc554b
parent 6589ebeca8
commit 5fdebc554b
1 changed files with 11 additions and 3 deletions
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@ -141,16 +141,24 @@ class LlamaServer:
            print()
        # Make a direct request with streaming enabled using a context manager
-        with self.session.post(url, json=payload, stream=True) as response:
+        with self.session.post(url, json=payload, stream=True, timeout=(5, 0.1)) as response:
            response.raise_for_status()  # Raise an exception for HTTP errors
            full_text = ""
            iterator = response.iter_lines(decode_unicode=True)
-            # Process the streaming response
+            while True:
            for line in response.iter_lines(decode_unicode=True):
                if shared.stop_everything:
                    break
                try:
                    line = next(iterator)
                except requests.exceptions.Timeout:
                    # Check stop flag again on timeout
                    continue
                except StopIteration:
                    break
                if line:
                    try:
                        # Check if the line starts with "data: " and remove it