Add StreamingLLM for llamacpp & llamacpp_HF (2nd attempt) (#5669)

2026-04-04 22:27:29 +00:00 · 2024-03-09 00:25:33 -03:00 · 2024-03-09 00:25:33 -03:00 · afb51bd5d6
commit afb51bd5d6
parent 9271e80914
7 changed files with 147 additions and 0 deletions
--- a/modules/loaders.py
+++ b/modules/loaders.py
@ -46,6 +46,8 @@ loaders_and_params = OrderedDict({
        'no_offload_kqv',
        'row_split',
        'tensorcores',
+        'streaming_llm',
+        'attention_sink_size',
    ],
    'llamacpp_HF': [
        'n_ctx',
@ -69,6 +71,8 @@ loaders_and_params = OrderedDict({
        'no_offload_kqv',
        'row_split',
        'tensorcores',
+        'streaming_llm',
+        'attention_sink_size',
        'llamacpp_HF_info',
    ],
    'ExLlamav2_HF': [