Add ik_llama.cpp support via ik_llama_cpp_binaries package

This commit is contained in:
oobabooga 2026-03-28 11:49:47 -03:00
parent 9dd04b86ce
commit 4979e87e48
19 changed files with 469 additions and 24 deletions

View file

@ -51,6 +51,9 @@ def create_ui():
with gr.Column():
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
if not shared.args.portable:
shared.gradio['ik'] = gr.Checkbox(label="ik", value=shared.args.ik, info='Use ik_llama.cpp instead of upstream llama.cpp.')
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)