New llama.cpp loader (#6846)

This commit is contained in:
oobabooga 2025-04-18 09:59:37 -03:00 committed by GitHub
parent 5c2f8d828e
commit ae54d8faaa
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
23 changed files with 471 additions and 999 deletions

View file

@ -110,7 +110,7 @@ def list_model_elements():
'n_gpu_layers',
'threads',
'threads_batch',
'n_batch',
'batch_size',
'hqq_backend',
'n_ctx',
'max_seq_len',
@ -122,20 +122,17 @@ def list_model_elements():
'compress_pos_emb',
'compute_dtype',
'quant_type',
'attention_sink_size',
'num_experts_per_token',
'tensorcores',
'load_in_8bit',
'load_in_4bit',
'torch_compile',
'flash_attn',
'use_flash_attention_2',
'streaming_llm',
'auto_devices',
'cpu',
'disk',
'row_split',
'no_offload_kqv',
'no_kv_offload',
'no_mul_mat_q',
'no_mmap',
'mlock',
@ -150,7 +147,6 @@ def list_model_elements():
'no_sdpa',
'cfg_cache',
'cpp_runner',
'logits_all',
'trust_remote_code',
'no_use_fast',
]