Organize internals (#6646)

This commit is contained in:
oobabooga 2025-01-10 18:04:32 -03:00 committed by GitHub
parent 17aa97248f
commit 83c426e96b
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
6 changed files with 346 additions and 310 deletions

View file

@ -7,102 +7,103 @@ from modules import shared
loaders_and_params = OrderedDict({
'Transformers': [
'cpu_memory',
'gpu_memory',
'load_in_4bit',
'load_in_8bit',
'torch_compile',
'bf16',
'cpu',
'disk',
'auto_devices',
'use_double_quant',
'quant_type',
'compute_dtype',
'trust_remote_code',
'no_use_fast',
'use_flash_attention_2',
'use_eager_attention',
'cpu_memory',
'alpha_value',
'compress_pos_emb',
],
'llama.cpp': [
'n_ctx',
'n_gpu_layers',
'cache_type',
'tensor_split',
'n_batch',
'threads',
'threads_batch',
'no_mmap',
'mlock',
'no_mul_mat_q',
'rope_freq_base',
'compress_pos_emb',
'compute_dtype',
'quant_type',
'load_in_8bit',
'load_in_4bit',
'torch_compile',
'use_flash_attention_2',
'auto_devices',
'cpu',
'numa',
'no_offload_kqv',
'row_split',
'tensorcores',
'flash_attn',
'streaming_llm',
'attention_sink_size',
],
'llamacpp_HF': [
'n_ctx',
'n_gpu_layers',
'cache_type',
'tensor_split',
'n_batch',
'threads',
'threads_batch',
'no_mmap',
'mlock',
'no_mul_mat_q',
'rope_freq_base',
'compress_pos_emb',
'cpu',
'numa',
'cfg_cache',
'disk',
'use_double_quant',
'use_eager_attention',
'bf16',
'trust_remote_code',
'no_use_fast',
'logits_all',
'no_offload_kqv',
'row_split',
],
'llama.cpp': [
'n_gpu_layers',
'threads',
'threads_batch',
'n_batch',
'n_ctx',
'cache_type',
'tensor_split',
'rope_freq_base',
'compress_pos_emb',
'attention_sink_size',
'tensorcores',
'flash_attn',
'streaming_llm',
'cpu',
'row_split',
'no_offload_kqv',
'no_mul_mat_q',
'no_mmap',
'mlock',
'numa',
],
'llamacpp_HF': [
'n_gpu_layers',
'threads',
'threads_batch',
'n_batch',
'n_ctx',
'cache_type',
'tensor_split',
'rope_freq_base',
'compress_pos_emb',
'attention_sink_size',
'tensorcores',
'flash_attn',
'streaming_llm',
'cpu',
'row_split',
'no_offload_kqv',
'no_mul_mat_q',
'no_mmap',
'mlock',
'numa',
'cfg_cache',
'logits_all',
'trust_remote_code',
'no_use_fast',
'llamacpp_HF_info',
],
'ExLlamav2_HF': [
'gpu_split',
'max_seq_len',
'cfg_cache',
'cache_type',
'gpu_split',
'alpha_value',
'compress_pos_emb',
'num_experts_per_token',
'autosplit',
'enable_tp',
'no_flash_attn',
'no_xformers',
'no_sdpa',
'num_experts_per_token',
'cache_type',
'autosplit',
'enable_tp',
'alpha_value',
'compress_pos_emb',
'cfg_cache',
'trust_remote_code',
'no_use_fast',
],
'ExLlamav2': [
'gpu_split',
'max_seq_len',
'cache_type',
'gpu_split',
'alpha_value',
'compress_pos_emb',
'num_experts_per_token',
'autosplit',
'enable_tp',
'no_flash_attn',
'no_xformers',
'no_sdpa',
'num_experts_per_token',
'cache_type',
'autosplit',
'enable_tp',
'alpha_value',
'compress_pos_emb',
'exllamav2_info',
],
'HQQ': [
@ -121,51 +122,51 @@ loaders_and_params = OrderedDict({
def transformers_samplers():
return {
'temperature',
'temperature_last',
'dynamic_temperature',
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'smoothing_factor',
'smoothing_curve',
'top_p',
'min_p',
'top_p',
'top_k',
'typical_p',
'xtc_threshold',
'xtc_probability',
'epsilon_cutoff',
'eta_cutoff',
'tfs',
'top_a',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty',
'presence_penalty',
'frequency_penalty',
'repetition_penalty_range',
'presence_penalty',
'encoder_repetition_penalty',
'no_repeat_ngram_size',
'dry_multiplier',
'dry_base',
'dry_allowed_length',
'dry_sequence_breakers',
'xtc_threshold',
'xtc_probability',
'seed',
'do_sample',
'repetition_penalty_range',
'penalty_alpha',
'guidance_scale',
'mirostat_mode',
'mirostat_tau',
'mirostat_eta',
'grammar_file_row',
'grammar_string',
'guidance_scale',
'negative_prompt',
'prompt_lookup_num_tokens',
'do_sample',
'dynamic_temperature',
'temperature_last',
'auto_max_new_tokens',
'ban_eos_token',
'custom_token_bans',
'sampler_priority',
'add_bos_token',
'skip_special_tokens',
'auto_max_new_tokens',
'prompt_lookup_num_tokens',
'static_cache',
'seed',
'sampler_priority',
'custom_token_bans',
'negative_prompt',
'dry_sequence_breakers',
'grammar_string',
'grammar_file_row',
}
@ -174,155 +175,156 @@ loaders_samplers = {
'HQQ': transformers_samplers(),
'ExLlamav2': {
'temperature',
'temperature_last',
'smoothing_factor',
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'top_p',
'smoothing_factor',
'min_p',
'top_p',
'top_k',
'typical_p',
'xtc_threshold',
'xtc_probability',
'tfs',
'top_a',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty',
'presence_penalty',
'frequency_penalty',
'presence_penalty',
'repetition_penalty_range',
'mirostat_mode',
'mirostat_tau',
'mirostat_eta',
'dry_multiplier',
'dry_base',
'dry_allowed_length',
'dry_sequence_breakers',
'xtc_threshold',
'xtc_probability',
'seed',
'dynamic_temperature',
'temperature_last',
'auto_max_new_tokens',
'ban_eos_token',
'add_bos_token',
'custom_token_bans',
'skip_special_tokens',
'auto_max_new_tokens',
'seed',
'custom_token_bans',
'dry_sequence_breakers',
},
'ExLlamav2_HF': {
'temperature',
'temperature_last',
'dynamic_temperature',
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'smoothing_factor',
'smoothing_curve',
'top_p',
'min_p',
'top_p',
'top_k',
'typical_p',
'xtc_threshold',
'xtc_probability',
'epsilon_cutoff',
'eta_cutoff',
'tfs',
'top_a',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty',
'presence_penalty',
'frequency_penalty',
'repetition_penalty_range',
'presence_penalty',
'encoder_repetition_penalty',
'no_repeat_ngram_size',
'dry_multiplier',
'dry_base',
'dry_allowed_length',
'dry_sequence_breakers',
'xtc_threshold',
'xtc_probability',
'seed',
'do_sample',
'repetition_penalty_range',
'guidance_scale',
'mirostat_mode',
'mirostat_tau',
'mirostat_eta',
'grammar_file_row',
'grammar_string',
'guidance_scale',
'negative_prompt',
'do_sample',
'dynamic_temperature',
'temperature_last',
'auto_max_new_tokens',
'ban_eos_token',
'custom_token_bans',
'sampler_priority',
'add_bos_token',
'skip_special_tokens',
'auto_max_new_tokens',
'seed',
'sampler_priority',
'custom_token_bans',
'negative_prompt',
'dry_sequence_breakers',
'grammar_string',
'grammar_file_row',
},
'llama.cpp': {
'temperature',
'top_p',
'min_p',
'top_p',
'top_k',
'typical_p',
'tfs',
'repetition_penalty',
'presence_penalty',
'frequency_penalty',
'seed',
'presence_penalty',
'mirostat_mode',
'mirostat_tau',
'mirostat_eta',
'grammar_file_row',
'grammar_string',
'ban_eos_token',
'seed',
'custom_token_bans',
'grammar_string',
'grammar_file_row',
},
'llamacpp_HF': {
'temperature',
'temperature_last',
'dynamic_temperature',
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'smoothing_factor',
'smoothing_curve',
'top_p',
'min_p',
'top_p',
'top_k',
'typical_p',
'xtc_threshold',
'xtc_probability',
'epsilon_cutoff',
'eta_cutoff',
'tfs',
'top_a',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty',
'presence_penalty',
'frequency_penalty',
'repetition_penalty_range',
'presence_penalty',
'encoder_repetition_penalty',
'no_repeat_ngram_size',
'dry_multiplier',
'dry_base',
'dry_allowed_length',
'dry_sequence_breakers',
'xtc_threshold',
'xtc_probability',
'seed',
'do_sample',
'repetition_penalty_range',
'guidance_scale',
'mirostat_mode',
'mirostat_tau',
'mirostat_eta',
'grammar_file_row',
'grammar_string',
'guidance_scale',
'negative_prompt',
'do_sample',
'dynamic_temperature',
'temperature_last',
'auto_max_new_tokens',
'ban_eos_token',
'custom_token_bans',
'sampler_priority',
'add_bos_token',
'skip_special_tokens',
'auto_max_new_tokens',
'seed',
'sampler_priority',
'custom_token_bans',
'negative_prompt',
'dry_sequence_breakers',
'grammar_string',
'grammar_file_row',
},
'TensorRT-LLM': {
'temperature',
'top_p',
'top_k',
'repetition_penalty',
'presence_penalty',
'frequency_penalty',
'ban_eos_token',
'presence_penalty',
'auto_max_new_tokens',
'ban_eos_token',
}
}