ExLlamaV2: Add speculative decoding (#6899)

This commit is contained in:
oobabooga 2025-04-25 00:11:04 -03:00 committed by GitHub
parent 8f2493cc60
commit ae1fe87365
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
2 changed files with 53 additions and 2 deletions

View file

@ -25,7 +25,7 @@ loaders_and_params = OrderedDict({
'gpu_layers_draft',
'device_draft',
'ctx_size_draft',
'speculative_decoding_accordion'
'speculative_decoding_accordion',
],
'Transformers': [
'gpu_split',
@ -82,6 +82,9 @@ loaders_and_params = OrderedDict({
'no_xformers',
'no_sdpa',
'exllamav2_info',
'model_draft',
'ctx_size_draft',
'speculative_decoding_accordion',
],
'HQQ': [
'hqq_backend',