ExLlamaV3: Add kv cache quantization (#6903)

This commit is contained in:
oobabooga 2025-04-25 21:32:00 -03:00 committed by GitHub
parent d4b1e31c49
commit d4017fbb6d
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
4 changed files with 32 additions and 3 deletions

View file

@ -13,6 +13,7 @@ loaders_and_params = OrderedDict({
'cache_type',
'tensor_split',
'extra_flags',
'streaming_llm',
'rope_freq_base',
'compress_pos_emb',
'flash_attn',
@ -49,6 +50,7 @@ loaders_and_params = OrderedDict({
],
'ExLlamav3_HF': [
'ctx_size',
'cache_type',
'gpu_split',
'cfg_cache',
'trust_remote_code',