Add cache_4bit option for ExLlamaV2 (#5645)

This commit is contained in:
oobabooga 2024-03-06 23:02:25 -03:00 committed by GitHub
parent fa0e68cefd
commit 2ec1d96c91
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
11 changed files with 28 additions and 16 deletions

View file

@ -76,6 +76,7 @@ def list_model_elements():
'no_flash_attn',
'num_experts_per_token',
'cache_8bit',
'cache_4bit',
'autosplit',
'threads',
'threads_batch',