Allow more granular KV cache settings (#6561)

This commit is contained in:
Diner Burger 2024-12-17 15:43:48 -05:00 committed by GitHub
parent c43ee5db11
commit addad3c63e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
8 changed files with 140 additions and 37 deletions

View file

@ -31,8 +31,7 @@ loaders_and_params = OrderedDict({
'llama.cpp': [
'n_ctx',
'n_gpu_layers',
'cache_8bit',
'cache_4bit',
'cache_type',
'tensor_split',
'n_batch',
'threads',
@ -54,8 +53,7 @@ loaders_and_params = OrderedDict({
'llamacpp_HF': [
'n_ctx',
'n_gpu_layers',
'cache_8bit',
'cache_4bit',
'cache_type',
'tensor_split',
'n_batch',
'threads',
@ -87,8 +85,7 @@ loaders_and_params = OrderedDict({
'no_xformers',
'no_sdpa',
'num_experts_per_token',
'cache_8bit',
'cache_4bit',
'cache_type',
'autosplit',
'enable_tp',
'alpha_value',
@ -103,8 +100,7 @@ loaders_and_params = OrderedDict({
'no_xformers',
'no_sdpa',
'num_experts_per_token',
'cache_8bit',
'cache_4bit',
'cache_type',
'autosplit',
'enable_tp',
'alpha_value',