Organize internals (#6646)

2026-04-04 22:27:29 +00:00 · 2025-01-10 18:04:32 -03:00 · 2025-01-10 18:04:32 -03:00 · 83c426e96b
commit 83c426e96b
parent 17aa97248f
6 changed files with 346 additions and 310 deletions
--- a/modules/loaders.py
+++ b/modules/loaders.py
@ -7,102 +7,103 @@ from modules import shared

 loaders_and_params = OrderedDict({
    'Transformers': [
-        'cpu_memory',
        'gpu_memory',
-        'load_in_4bit',
-        'load_in_8bit',
-        'torch_compile',
-        'bf16',
-        'cpu',
-        'disk',
-        'auto_devices',
-        'use_double_quant',
-        'quant_type',
-        'compute_dtype',
-        'trust_remote_code',
-        'no_use_fast',
-        'use_flash_attention_2',
-        'use_eager_attention',
+        'cpu_memory',
        'alpha_value',
        'compress_pos_emb',
-    ],
-    'llama.cpp': [
-        'n_ctx',
-        'n_gpu_layers',
-        'cache_type',
-        'tensor_split',
-        'n_batch',
-        'threads',
-        'threads_batch',
-        'no_mmap',
-        'mlock',
-        'no_mul_mat_q',
-        'rope_freq_base',
-        'compress_pos_emb',
+        'compute_dtype',
+        'quant_type',
+        'load_in_8bit',
+        'load_in_4bit',
+        'torch_compile',
+        'use_flash_attention_2',
+        'auto_devices',
        'cpu',
-        'numa',
-        'no_offload_kqv',
-        'row_split',
-        'tensorcores',
-        'flash_attn',
-        'streaming_llm',
-        'attention_sink_size',
-    ],
-    'llamacpp_HF': [
-        'n_ctx',
-        'n_gpu_layers',
-        'cache_type',
-        'tensor_split',
-        'n_batch',
-        'threads',
-        'threads_batch',
-        'no_mmap',
-        'mlock',
-        'no_mul_mat_q',
-        'rope_freq_base',
-        'compress_pos_emb',
-        'cpu',
-        'numa',
-        'cfg_cache',
+        'disk',
+        'use_double_quant',
+        'use_eager_attention',
+        'bf16',
+
        'trust_remote_code',
        'no_use_fast',
-        'logits_all',
-        'no_offload_kqv',
-        'row_split',
+    ],
+    'llama.cpp': [
+        'n_gpu_layers',
+        'threads',
+        'threads_batch',
+        'n_batch',
+        'n_ctx',
+        'cache_type',
+        'tensor_split',
+        'rope_freq_base',
+        'compress_pos_emb',
+        'attention_sink_size',
        'tensorcores',
        'flash_attn',
        'streaming_llm',
+        'cpu',
+        'row_split',
+        'no_offload_kqv',
+        'no_mul_mat_q',
+        'no_mmap',
+        'mlock',
+        'numa',
+    ],
+    'llamacpp_HF': [
+        'n_gpu_layers',
+        'threads',
+        'threads_batch',
+        'n_batch',
+        'n_ctx',
+        'cache_type',
+        'tensor_split',
+        'rope_freq_base',
+        'compress_pos_emb',
        'attention_sink_size',
+        'tensorcores',
+        'flash_attn',
+        'streaming_llm',
+        'cpu',
+        'row_split',
+        'no_offload_kqv',
+        'no_mul_mat_q',
+        'no_mmap',
+        'mlock',
+        'numa',
+        'cfg_cache',
+        'logits_all',
+        'trust_remote_code',
+        'no_use_fast',
        'llamacpp_HF_info',
    ],
    'ExLlamav2_HF': [
-        'gpu_split',
        'max_seq_len',
-        'cfg_cache',
+        'cache_type',
+        'gpu_split',
+        'alpha_value',
+        'compress_pos_emb',
+        'num_experts_per_token',
+        'autosplit',
+        'enable_tp',
        'no_flash_attn',
        'no_xformers',
        'no_sdpa',
-        'num_experts_per_token',
-        'cache_type',
-        'autosplit',
-        'enable_tp',
-        'alpha_value',
-        'compress_pos_emb',
+        'cfg_cache',
        'trust_remote_code',
        'no_use_fast',
    ],
    'ExLlamav2': [
-        'gpu_split',
        'max_seq_len',
+        'cache_type',
+        'gpu_split',
+        'alpha_value',
+        'compress_pos_emb',
+        'num_experts_per_token',
+        'autosplit',
+        'enable_tp',
        'no_flash_attn',
        'no_xformers',
        'no_sdpa',
-        'num_experts_per_token',
-        'cache_type',
-        'autosplit',
-        'enable_tp',
-        'alpha_value',
-        'compress_pos_emb',
        'exllamav2_info',
    ],
    'HQQ': [
@ -121,51 +122,51 @@ loaders_and_params = OrderedDict({
 def transformers_samplers():
    return {
        'temperature',
-        'temperature_last',
-        'dynamic_temperature',
        'dynatemp_low',
        'dynatemp_high',
        'dynatemp_exponent',
        'smoothing_factor',
        'smoothing_curve',
-        'top_p',
        'min_p',
+        'top_p',
        'top_k',
        'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
        'repetition_penalty',
-        'presence_penalty',
        'frequency_penalty',
-        'repetition_penalty_range',
+        'presence_penalty',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
-        'do_sample',
+        'repetition_penalty_range',
        'penalty_alpha',
+        'guidance_scale',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
-        'guidance_scale',
-        'negative_prompt',
+        'prompt_lookup_num_tokens',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
        'ban_eos_token',
-        'custom_token_bans',
-        'sampler_priority',
        'add_bos_token',
        'skip_special_tokens',
-        'auto_max_new_tokens',
-        'prompt_lookup_num_tokens',
        'static_cache',
+        'seed',
+        'sampler_priority',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
+        'grammar_file_row',
    }


@ -174,155 +175,156 @@ loaders_samplers = {
    'HQQ': transformers_samplers(),
    'ExLlamav2': {
        'temperature',
-        'temperature_last',
-        'smoothing_factor',
        'dynatemp_low',
        'dynatemp_high',
        'dynatemp_exponent',
-        'top_p',
+        'smoothing_factor',
        'min_p',
+        'top_p',
        'top_k',
        'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
        'tfs',
        'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
        'repetition_penalty',
-        'presence_penalty',
        'frequency_penalty',
+        'presence_penalty',
        'repetition_penalty_range',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
        'ban_eos_token',
        'add_bos_token',
-        'custom_token_bans',
        'skip_special_tokens',
-        'auto_max_new_tokens',
+        'seed',
+        'custom_token_bans',
+        'dry_sequence_breakers',
    },
    'ExLlamav2_HF': {
        'temperature',
-        'temperature_last',
-        'dynamic_temperature',
        'dynatemp_low',
        'dynatemp_high',
        'dynatemp_exponent',
        'smoothing_factor',
        'smoothing_curve',
-        'top_p',
        'min_p',
+        'top_p',
        'top_k',
        'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
        'repetition_penalty',
-        'presence_penalty',
        'frequency_penalty',
-        'repetition_penalty_range',
+        'presence_penalty',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
-        'do_sample',
+        'repetition_penalty_range',
+        'guidance_scale',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
-        'guidance_scale',
-        'negative_prompt',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
        'ban_eos_token',
-        'custom_token_bans',
-        'sampler_priority',
        'add_bos_token',
        'skip_special_tokens',
-        'auto_max_new_tokens',
+        'seed',
+        'sampler_priority',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
+        'grammar_file_row',
    },
    'llama.cpp': {
        'temperature',
-        'top_p',
        'min_p',
+        'top_p',
        'top_k',
        'typical_p',
        'tfs',
        'repetition_penalty',
-        'presence_penalty',
        'frequency_penalty',
-        'seed',
+        'presence_penalty',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
        'ban_eos_token',
+        'seed',
        'custom_token_bans',
+        'grammar_string',
+        'grammar_file_row',
    },
    'llamacpp_HF': {
        'temperature',
-        'temperature_last',
-        'dynamic_temperature',
        'dynatemp_low',
        'dynatemp_high',
        'dynatemp_exponent',
        'smoothing_factor',
        'smoothing_curve',
-        'top_p',
        'min_p',
+        'top_p',
        'top_k',
        'typical_p',
+        'xtc_threshold',
+        'xtc_probability',
        'epsilon_cutoff',
        'eta_cutoff',
        'tfs',
        'top_a',
+        'dry_multiplier',
+        'dry_allowed_length',
+        'dry_base',
        'repetition_penalty',
-        'presence_penalty',
        'frequency_penalty',
-        'repetition_penalty_range',
+        'presence_penalty',
        'encoder_repetition_penalty',
        'no_repeat_ngram_size',
-        'dry_multiplier',
-        'dry_base',
-        'dry_allowed_length',
-        'dry_sequence_breakers',
-        'xtc_threshold',
-        'xtc_probability',
-        'seed',
-        'do_sample',
+        'repetition_penalty_range',
+        'guidance_scale',
        'mirostat_mode',
        'mirostat_tau',
        'mirostat_eta',
-        'grammar_file_row',
-        'grammar_string',
-        'guidance_scale',
-        'negative_prompt',
+        'do_sample',
+        'dynamic_temperature',
+        'temperature_last',
+        'auto_max_new_tokens',
        'ban_eos_token',
-        'custom_token_bans',
-        'sampler_priority',
        'add_bos_token',
        'skip_special_tokens',
-        'auto_max_new_tokens',
+        'seed',
+        'sampler_priority',
+        'custom_token_bans',
+        'negative_prompt',
+        'dry_sequence_breakers',
+        'grammar_string',
+        'grammar_file_row',
    },
    'TensorRT-LLM': {
        'temperature',
        'top_p',
        'top_k',
        'repetition_penalty',
-        'presence_penalty',
        'frequency_penalty',
-        'ban_eos_token',
+        'presence_penalty',
        'auto_max_new_tokens',
+        'ban_eos_token',
    }
 }