From 4c2e3b168bc1751dbb3f1b222fdd749ad7a5d36e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 3 May 2025 06:51:20 -0700 Subject: [PATCH] llama.cpp: Add a retry mechanism when getting the logits (sometimes it fails) --- modules/llama_cpp_server.py | 15 ++++++++------- 1 file changed, 8 insertions(+), 7 deletions(-) diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index d9187db8..2ebeb560 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -210,14 +210,15 @@ class LlamaServer: pprint.PrettyPrinter(indent=4, sort_dicts=False).pprint(printable_payload) print() - response = self.session.post(url, json=payload) - result = response.json() + for retry in range(5): + response = self.session.post(url, json=payload) + result = response.json() - if "completion_probabilities" in result: - if use_samplers: - return result["completion_probabilities"][0]["top_probs"] - else: - return result["completion_probabilities"][0]["top_logprobs"] + if "completion_probabilities" in result: + if use_samplers: + return result["completion_probabilities"][0]["top_probs"] + else: + return result["completion_probabilities"][0]["top_logprobs"] else: raise Exception(f"Unexpected response format: 'completion_probabilities' not found in {result}")