llama.cpp: close the connection immediately on 'Stop'

This commit is contained in:
oobabooga 2025-04-19 04:59:24 -07:00
parent 6589ebeca8
commit 5fdebc554b

View file

@ -141,16 +141,24 @@ class LlamaServer:
print()
# Make a direct request with streaming enabled using a context manager
with self.session.post(url, json=payload, stream=True) as response:
with self.session.post(url, json=payload, stream=True, timeout=(5, 0.1)) as response:
response.raise_for_status() # Raise an exception for HTTP errors
full_text = ""
iterator = response.iter_lines(decode_unicode=True)
# Process the streaming response
for line in response.iter_lines(decode_unicode=True):
while True:
if shared.stop_everything:
break
try:
line = next(iterator)
except requests.exceptions.Timeout:
# Check stop flag again on timeout
continue
except StopIteration:
break
if line:
try:
# Check if the line starts with "data: " and remove it