llama.cpp: close the connection immediately on 'Stop'

2026-04-20 22:13:43 +00:00 · 2025-04-19 04:59:24 -07:00 · 2025-04-19 04:59:24 -07:00 · 5fdebc554b
commit 5fdebc554b
parent 6589ebeca8
1 changed files with 11 additions and 3 deletions
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@ -141,16 +141,24 @@ class LlamaServer:
            print()

        # Make a direct request with streaming enabled using a context manager
-        with self.session.post(url, json=payload, stream=True) as response:
+        with self.session.post(url, json=payload, stream=True, timeout=(5, 0.1)) as response:
            response.raise_for_status()  # Raise an exception for HTTP errors

            full_text = ""
+            iterator = response.iter_lines(decode_unicode=True)

-            # Process the streaming response
-            for line in response.iter_lines(decode_unicode=True):
+            while True:
                if shared.stop_everything:
                    break

+                try:
+                    line = next(iterator)
+                except requests.exceptions.Timeout:
+                    # Check stop flag again on timeout
+                    continue
+                except StopIteration:
+                    break
+
                if line:
                    try:
                        # Check if the line starts with "data: " and remove it