mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-17 12:33:44 +00:00
Revert "Remove non-HF ExLlamaV2 loader (#5431)"
This reverts commit cde000d478.
This commit is contained in:
parent
8c35fefb3b
commit
2a1063eff5
9 changed files with 220 additions and 18 deletions
|
|
@ -142,6 +142,7 @@ def create_ui():
|
|||
shared.gradio['disable_exllama'] = gr.Checkbox(label="disable_exllama", value=shared.args.disable_exllama, info='Disable ExLlama kernel for GPTQ models.')
|
||||
shared.gradio['disable_exllamav2'] = gr.Checkbox(label="disable_exllamav2", value=shared.args.disable_exllamav2, info='Disable ExLlamav2 kernel for GPTQ models.')
|
||||
shared.gradio['gptq_for_llama_info'] = gr.Markdown('Legacy loader for compatibility with older GPUs. ExLlamav2_HF or AutoGPTQ are preferred for GPTQ models when supported.')
|
||||
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
|
||||
shared.gradio['llamacpp_HF_info'] = gr.Markdown('llamacpp_HF loads llama.cpp as a Transformers model. To use it, you need to download a tokenizer.\n\nOption 1 (recommended): place your .gguf in a subfolder of models/ along with these 4 files: special_tokens_map.json, tokenizer_config.json, tokenizer.json, tokenizer.model.\n\nOption 2: download `oobabooga/llama-tokenizer` under "Download model or LoRA". That\'s a default Llama tokenizer that will work for some (but not all) models.')
|
||||
|
||||
with gr.Column():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue