mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-09 00:23:38 +00:00
llama.cpp: Document the --device-draft syntax
This commit is contained in:
parent
f1b64df8dd
commit
93fd4ad25d
2 changed files with 2 additions and 2 deletions
|
|
@ -100,7 +100,7 @@ def create_ui():
|
|||
|
||||
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Number of tokens to draft for speculative decoding.')
|
||||
shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.')
|
||||
shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model.')
|
||||
shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
|
||||
shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
|
||||
|
||||
with gr.Column():
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue