diff --git a/modules/loaders.py b/modules/loaders.py index f69f1720..7d6afe80 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -4,6 +4,23 @@ from collections import OrderedDict import gradio as gr loaders_and_params = OrderedDict({ + 'llama.cpp': [ + 'n_gpu_layers', + 'threads', + 'threads_batch', + 'batch_size', + 'n_ctx', + 'cache_type', + 'tensor_split', + 'rope_freq_base', + 'compress_pos_emb', + 'flash_attn', + 'row_split', + 'no_kv_offload', + 'no_mmap', + 'mlock', + 'numa', + ], 'Transformers': [ 'gpu_split', 'cpu_memory', @@ -23,23 +40,6 @@ loaders_and_params = OrderedDict({ 'trust_remote_code', 'no_use_fast', ], - 'llama.cpp': [ - 'n_gpu_layers', - 'threads', - 'threads_batch', - 'batch_size', - 'n_ctx', - 'cache_type', - 'tensor_split', - 'rope_freq_base', - 'compress_pos_emb', - 'flash_attn', - 'row_split', - 'no_kv_offload', - 'no_mmap', - 'mlock', - 'numa', - ], 'ExLlamav3_HF': [ 'max_seq_len', 'gpu_split', diff --git a/server.py b/server.py index 1d261566..41a5660d 100644 --- a/server.py +++ b/server.py @@ -85,7 +85,7 @@ def create_interface(): # Force some events to be triggered on page load shared.persistent_interface_state.update({ - 'loader': shared.args.loader or 'Transformers', + 'loader': shared.args.loader or 'llama.cpp', 'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(), 'character_menu': shared.args.character or shared.settings['character'], 'instruction_template_str': shared.settings['instruction_template_str'],