Bump llama-cpp-python to 0.2.69, add --flash-attn option

This commit is contained in:
oobabooga 2024-05-03 04:31:22 -07:00
parent 0476f9fe70
commit e61055253c
15 changed files with 66 additions and 58 deletions

View file

@ -104,6 +104,7 @@ def list_model_elements():
'no_offload_kqv',
'row_split',
'tensorcores',
'flash-attn',
'streaming_llm',
'attention_sink_size',
'hqq_backend',