From d00d713ace9a1e39bb291175c468eea71a42dfa2 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Fri, 18 Apr 2025 08:14:15 -0700 Subject: [PATCH] Rename get_max_context_length to get_vocabulary_size in the new llama.cpp loader --- modules/llama_cpp_server.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py index 1965a170..db500c9c 100644 --- a/modules/llama_cpp_server.py +++ b/modules/llama_cpp_server.py @@ -24,7 +24,7 @@ class LlamaServer: self.server_path = server_path self.port = self._find_available_port() self.process = None - self.max_context_length = None + self.vocabulary_size = None self.bos_token = "" # Start the server @@ -209,7 +209,7 @@ class LlamaServer: else: raise Exception(f"Unexpected response format: 'completion_probabilities' not found in {result}") - def _get_max_context_length(self): + def _get_vocabulary_size(self): """Get and store the model's maximum context length.""" url = f"http://localhost:{self.port}/v1/models" response = requests.get(url).json() @@ -217,7 +217,7 @@ class LlamaServer: if "data" in response and len(response["data"]) > 0: model_info = response["data"][0] if "meta" in model_info and "n_vocab" in model_info["meta"]: - self.max_context_length = model_info["meta"]["n_vocab"] + self.vocabulary_size = model_info["meta"]["n_vocab"] def _get_bos_token(self): """Get and store the model's BOS token.""" @@ -311,7 +311,7 @@ class LlamaServer: raise TimeoutError(f"Server health check timed out after {timeout} seconds") # Server is now healthy, get model info - self._get_max_context_length() + self._get_vocabulary_size() self._get_bos_token() return self.port