From e4d3f4449d75ea1b1f7f3438dbed8c910a970cec Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 16 May 2025 13:02:27 -0700 Subject: [PATCH] API: Fix a regression --- modules/llama_cpp_server.py | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 3fc7a0cc..d695c74e 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -146,8 +146,9 @@ class LlamaServer: pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(printable_payload) print() - # Make a direct request with streaming enabled using a context manager - with self.session.post(url, json=payload, stream=True) as response: + # Make the generation request + response = self.session.post(url, json=payload, stream=True) + try: response.raise_for_status() # Raise an exception for HTTP errors full_text = "" @@ -184,6 +185,8 @@ class LlamaServer: print(f"JSON decode error: {e}") print(f"Problematic line: {line}") continue + finally: + response.close() def generate(self, prompt, state): output = ""