Bump llama-cpp-python to 0.2.61 & fix the crash

2026-04-11 01:23:40 +00:00 · 2024-04-11 14:15:34 -07:00 · 2024-04-11 14:15:34 -07:00 · 3e3a7c4250
commit 3e3a7c4250
parent 5f5ceaf025
9 changed files with 68 additions and 66 deletions
--- a/modules/llama_cpp_python_hijack.py
+++ b/modules/llama_cpp_python_hijack.py
@ -39,7 +39,7 @@ def eval_with_progress(self, tokens: Sequence[int]):
        progress_bar = range(0, len(tokens), self.n_batch)

    for i in progress_bar:
-        batch = tokens[i: min(len(tokens), i + self.n_batch)]
+        batch = tokens[i : min(len(tokens), i + self.n_batch)]
        n_past = self.n_tokens
        n_tokens = len(batch)
        self._batch.set_batch(
@ -47,16 +47,18 @@ def eval_with_progress(self, tokens: Sequence[int]):
        )
        self._ctx.decode(self._batch)
        # Save tokens
-        self.input_ids[n_past: n_past + n_tokens] = batch
+        self.input_ids[n_past : n_past + n_tokens] = batch
        # Save logits
-        rows = n_tokens
-        cols = self._n_vocab
-        offset = (
-            0 if self.context_params.logits_all else n_tokens - 1
-        )  # NOTE: Only save the last token logits if logits_all is False
-        self.scores[n_past + offset: n_past + n_tokens, :].reshape(-1)[
-            :
-        ] = self._ctx.get_logits()[offset * cols: rows * cols]
+        if self.context_params.logits_all:
+            rows = n_tokens
+            cols = self._n_vocab
+            logits = self._ctx.get_logits()[: rows * cols]
+            self.scores[n_past : n_past + n_tokens, :].reshape(-1)[: :] = logits
+        else:
+            rows = 1
+            cols = self._n_vocab
+            logits = self._ctx.get_logits()[: rows * cols]
+            self.scores[n_past + n_tokens - 1, :].reshape(-1)[: :] = logits
        # Update n_tokens
        self.n_tokens += n_tokens