Add cache_4bit option for ExLlamaV2 (#5645)

2026-04-05 06:35:15 +00:00 · 2024-03-06 23:02:25 -03:00 · 2024-03-06 23:02:25 -03:00 · 2ec1d96c91
commit 2ec1d96c91
parent fa0e68cefd
11 changed files with 28 additions and 16 deletions
--- a/modules/ui.py
+++ b/modules/ui.py
@ -76,6 +76,7 @@ def list_model_elements():
        'no_flash_attn',
        'num_experts_per_token',
        'cache_8bit',
+        'cache_4bit',
        'autosplit',
        'threads',
        'threads_batch',