Split by rows instead of layers for llama.cpp multi-gpu (#5435)

This commit is contained in:
Forkoz 2024-02-05 02:36:40 +00:00 committed by GitHub
parent 3df7e151f7
commit 2a45620c85
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 9 additions and 2 deletions

View file

@ -93,6 +93,7 @@ def list_model_elements():
'numa',
'logits_all',
'no_offload_kqv',
'row_split',
'tensorcores',
'hqq_backend',
]