diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 1046969a..615f29ad 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -146,8 +146,9 @@ class LlamaServer: pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(printable_payload) print() - # Make a direct request with streaming enabled using a context manager - with self.session.post(url, json=payload, stream=True) as response: + # Make a request with streaming enabled + response = self.session.post(url, json=payload, stream=True) + try: response.raise_for_status() # Raise an exception for HTTP errors full_text = "" @@ -185,6 +186,9 @@ class LlamaServer: print(f"Problematic line: {line}") continue + finally: + response.close() + def generate(self, prompt, state): output = "" for output in self.generate_with_streaming(prompt, state):