diff --git a/modules/shared.py b/modules/shared.py index 79925909..eeaeb689 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -134,7 +134,7 @@ group = parser.add_argument_group('Speculative decoding') group.add_argument('--model-draft', type=str, default=None, help='Path to the draft model for speculative decoding.') group.add_argument('--draft-max', type=int, default=4, help='Number of tokens to draft for speculative decoding.') group.add_argument('--gpu-layers-draft', type=int, default=0, help='Number of layers to offload to the GPU for the draft model.') -group.add_argument('--device-draft', type=str, default=None, help='Comma-separated list of devices to use for offloading the draft model.') +group.add_argument('--device-draft', type=str, default=None, help='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1') group.add_argument('--ctx-size-draft', type=int, default=0, help='Size of the prompt context for the draft model. If 0, uses the same as the main model.') # ExLlamaV2 diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 1b0c25fa..56b6903f 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -100,7 +100,7 @@ def create_ui(): shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Number of tokens to draft for speculative decoding.') shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.') - shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model.') + shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1') shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.') with gr.Column():