Allow more granular KV cache settings (#6561)

This commit is contained in:
Diner Burger 2024-12-17 15:43:48 -05:00 committed by GitHub
parent c43ee5db11
commit addad3c63e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 140 additions and 37 deletions

View file

@ -130,8 +130,7 @@ def list_model_elements():
'no_xformers',
'no_sdpa',
'num_experts_per_token',
'cache_8bit',
'cache_4bit',
'cache_type',
'autosplit',
'enable_tp',
'threads',