Add ExLlamaV3 support (#6832)

This commit is contained in:
oobabooga 2025-04-09 00:07:08 -03:00 committed by GitHub
parent 0b3503c91f
commit 8b8d39ec4e
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
13 changed files with 322 additions and 54 deletions

View file

@ -23,7 +23,6 @@ loaders_and_params = OrderedDict({
'use_double_quant',
'use_eager_attention',
'bf16',
'trust_remote_code',
'no_use_fast',
],
@ -76,6 +75,13 @@ loaders_and_params = OrderedDict({
'no_use_fast',
'llamacpp_HF_info',
],
'ExLlamav3_HF': [
'max_seq_len',
'gpu_split',
'cfg_cache',
'trust_remote_code',
'no_use_fast',
],
'ExLlamav2_HF': [
'max_seq_len',
'cache_type',
@ -174,30 +180,38 @@ def transformers_samplers():
loaders_samplers = {
'Transformers': transformers_samplers(),
'HQQ': transformers_samplers(),
'ExLlamav2': {
'ExLlamav3_HF': {
'temperature',
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'smoothing_factor',
'smoothing_curve',
'min_p',
'top_p',
'top_k',
'typical_p',
'xtc_threshold',
'xtc_probability',
'epsilon_cutoff',
'eta_cutoff',
'tfs',
'top_a',
'top_n_sigma',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty',
'frequency_penalty',
'presence_penalty',
'encoder_repetition_penalty',
'no_repeat_ngram_size',
'repetition_penalty_range',
'guidance_scale',
'mirostat_mode',
'mirostat_tau',
'mirostat_eta',
'do_sample',
'dynamic_temperature',
'temperature_last',
'auto_max_new_tokens',
@ -205,8 +219,12 @@ loaders_samplers = {
'add_bos_token',
'skip_special_tokens',
'seed',
'sampler_priority',
'custom_token_bans',
'negative_prompt',
'dry_sequence_breakers',
'grammar_string',
'grammar_file_row',
},
'ExLlamav2_HF': {
'temperature',
@ -254,6 +272,40 @@ loaders_samplers = {
'grammar_string',
'grammar_file_row',
},
'ExLlamav2': {
'temperature',
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'smoothing_factor',
'min_p',
'top_p',
'top_k',
'typical_p',
'xtc_threshold',
'xtc_probability',
'tfs',
'top_a',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty',
'frequency_penalty',
'presence_penalty',
'repetition_penalty_range',
'mirostat_mode',
'mirostat_tau',
'mirostat_eta',
'dynamic_temperature',
'temperature_last',
'auto_max_new_tokens',
'ban_eos_token',
'add_bos_token',
'skip_special_tokens',
'seed',
'custom_token_bans',
'dry_sequence_breakers',
},
'llama.cpp': {
'temperature',
'min_p',