mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2026-04-07 15:43:49 +00:00
Add adaptive-p sampler and n-gram speculative decoding support
This commit is contained in:
parent
f010aa1612
commit
65de4c30c8
10 changed files with 145 additions and 3 deletions
|
|
@ -76,6 +76,10 @@ def create_ui():
|
|||
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Number of tokens to draft for speculative decoding. Recommended value: 4.')
|
||||
shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
|
||||
shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
|
||||
shared.gradio['spec_type'] = gr.Dropdown(label="spec-type", choices=['none', 'ngram-cache', 'ngram-simple', 'ngram-map-k', 'ngram-map-k4v', 'ngram-mod'], value=shared.args.spec_type, info='Draftless speculative decoding type. Uses n-gram matching from context.')
|
||||
shared.gradio['spec_ngram_size_n'] = gr.Number(label="spec-ngram-size-n", precision=0, step=1, value=shared.args.spec_ngram_size_n, info='N-gram lookup size for speculative decoding.', visible=shared.args.spec_type != 'none')
|
||||
shared.gradio['spec_ngram_size_m'] = gr.Number(label="spec-ngram-size-m", precision=0, step=1, value=shared.args.spec_ngram_size_m, info='Draft n-gram size for speculative decoding.', visible=shared.args.spec_type != 'none')
|
||||
shared.gradio['spec_ngram_min_hits'] = gr.Number(label="spec-ngram-min-hits", precision=0, step=1, value=shared.args.spec_ngram_min_hits, info='Minimum n-gram hits for ngram-map speculative decoding.', visible=shared.args.spec_type != 'none')
|
||||
|
||||
gr.Markdown("## Other options")
|
||||
with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
|
||||
|
|
@ -179,6 +183,13 @@ def create_event_handlers():
|
|||
if not shared.args.portable:
|
||||
shared.gradio['lora_menu_apply'].click(load_lora_wrapper, gradio('lora_menu'), gradio('model_status'), show_progress=False)
|
||||
|
||||
shared.gradio['spec_type'].change(
|
||||
lambda x: [gr.update(visible=x != 'none')] * 3,
|
||||
gradio('spec_type'),
|
||||
gradio('spec_ngram_size_n', 'spec_ngram_size_m', 'spec_ngram_min_hits'),
|
||||
show_progress=False
|
||||
)
|
||||
|
||||
shared.gradio['download_model_button'].click(download_model_wrapper, gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
|
||||
shared.gradio['get_file_list'].click(partial(download_model_wrapper, return_links=True), gradio('custom_model_menu', 'download_specific_file'), gradio('model_status'), show_progress=True)
|
||||
shared.gradio['customized_template_submit'].click(save_instruction_template, gradio('model_menu', 'customized_template'), gradio('model_status'), show_progress=True)
|
||||
|
|
|
|||
Loading…
Add table
Add a link
Reference in a new issue