This commit is contained in:
ystartgo 2026-02-15 20:45:39 +08:00
parent 910456ba31
commit d262526450
4 changed files with 461 additions and 179 deletions

279
modules/i18n.py Normal file
View file

@ -0,0 +1,279 @@
from modules import shared
LANG_CHOICES = [
("en", "English"),
("zh_TW", "繁體中文"),
]
_ZH_TW = {
"Text Generation Web UI": "文字生成 Web UI",
"Chat": "聊天",
"Notebook": "筆記本",
"Parameters": "參數",
"Character": "角色",
"Model": "模型",
"Image generation": "圖像生成",
"Training": "訓練",
"Session": "工作階段",
"Settings": "設定",
"Extensions & flags": "擴充與旗標",
"Available extensions": "可用擴充",
"Boolean command-line flags": "布林命令列旗標",
"Toggle light/dark theme 💡": "切換亮/暗主題 💡",
"Show two columns in the Notebook tab": "Notebook 分頁顯示雙欄",
"Turn long pasted text into attachments in the Chat tab": "在聊天分頁將長貼上文字轉為附件",
"Include attachments/search results from previous messages in the chat prompt": "聊天提示包含前訊息的附件/搜尋結果",
"Save extensions settings to user_data/settings.yaml": "儲存擴充設定到 user_data/settings.yaml",
"Apply flags/extensions and restart": "套用旗標/擴充並重新啟動",
"Language": "語言",
"Input": "輸入",
"Output": "輸出",
"Continue": "繼續",
"Stop": "停止",
"Generate": "生成",
"Prompt": "提示",
"New": "新增",
"Rename": "重新命名",
"Delete": "刪除",
"Confirm": "確認",
"Cancel": "取消",
"Raw": "原始",
"Markdown": "Markdown",
"HTML": "HTML",
"Logits": "Logits",
"Tokens": "詞元",
"Get next token probabilities": "取得下一個詞元機率",
"Use samplers": "使用採樣器",
"Previous output": "前一次輸出",
"Get token IDs for the input": "取得輸入的詞元 ID",
"Render": "渲染",
"<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.": "<|character|> 與 <|prompt|> 會分別替換為機器人名稱與一般聊天提示。",
"dynatemp_low": "dynatemp_low 低溫",
"dynatemp_high": "dynatemp_high 高溫",
"dynatemp_exponent": "dynatemp_exponent 指數",
"dynamic_temperature": "dynamic_temperature 動態溫度",
"mirostat_tau": "mirostat_tau 目標熵",
"mirostat_eta": "mirostat_eta 學習率",
"Ban the eos_token": "Ban the eos_token 禁用結束詞元",
"threads": "threads 執行緒數",
"threads_batch": "threads_batch 每批執行緒",
"batch_size": "batch_size 批次大小",
"ubatch_size": "ubatch_size 微批次大小",
"extra-flags": "extra-flags 額外旗標",
"rope_freq_base": "rope_freq_base 頻率基準",
"mlock": "mlock 鎖定記憶體",
"numa": "numa NUMA",
"Send a message": "送出訊息",
"Send": "送出",
"no_kv_offload": "no_kv_offload 不卸載 K/V",
"Please enter a model path.": "請輸入模型路徑。",
"Undo": "復原",
"Regenerate": "重新生成",
"Generation": "生成",
"Preset": "預設",
"Restore preset": "還原預設",
"Neutralize samplers": "重置採樣器",
"Filter by loader": "依載入器篩選",
"## Curve shape": "## 曲線形狀",
"## Curve cutoff": "## 曲線截斷",
"## Repetition suppression": "## 重複抑制",
"## Alternative sampling methods": "## 替代採樣方法",
"## Other options": "## 其他選項",
"Truncate the prompt up to this length": "將提示截斷至此長度",
"Seed (-1 for random)": "隨機種子(-1 為隨機)",
"Custom system message": "自訂系統訊息",
"If not empty, will be used instead of the default one.": "若不為空,將取代預設內容。",
"Custom stopping strings": "自訂停止字串",
"Token bans": "禁用詞元",
"Negative prompt": "負面提示",
"Load grammar from file (.gbnf)": "從檔案載入文法(.gbnf",
"Grammar": "文法",
"temperature": "temperature 溫度",
"Instruction template": "指令模板",
"Saved instruction templates": "已儲存的指令模板",
"Load": "載入",
"Send to notebook": "發送到筆記本",
"Send to Notebook": "發送到筆記本",
"Chat template": "聊天模板",
"Send": "傳送",
"Regenerate (Ctrl + Enter)": "重新生成Ctrl + Enter",
"Continue (Alt + Enter)": "繼續Alt + Enter",
"Remove last reply (Ctrl + Shift + Backspace)": "移除上一則回覆Ctrl + Shift + Backspace",
"Impersonate (Ctrl + Shift + M)": "扮演Ctrl + Shift + M",
"Send dummy message": "送出範例訊息",
"Send dummy reply": "送出範例回覆",
"Show controls (Ctrl+S)": "顯示控制列Ctrl+S",
"Start reply with": "以此開頭回覆",
"Reasoning effort": "推理強度",
"Enable thinking": "啟用思考",
"Activate web search": "啟用網頁搜尋",
"Number of pages to download": "下載頁數",
"Mode": "模式",
"Chat style": "聊天風格",
"Command for chat-instruct mode": "chat-instruct 模式指令",
"Count tokens": "計算詞元數",
"New chat": "新聊天",
"Branch": "分支",
"Search chats...": "搜尋聊天…",
"Rename to:": "重新命名為:",
"New name": "新名稱",
"Restore character": "還原角色",
"Character's name": "角色名稱",
"Context": "情境",
"Greeting": "問候語",
"User": "使用者",
"Name": "名稱",
"Description": "描述",
"Here you can optionally write a description of yourself.": "此處可選擇性撰寫自我描述。",
"Chat history": "聊天記錄",
"Save history": "儲存記錄",
"Upload History JSON": "上傳記錄 JSON",
"Upload character": "上傳角色",
"YAML or JSON": "YAML 或 JSON",
"JSON or YAML File": "JSON 或 YAML 檔",
"Profile Picture (optional)": "大頭貼(可選)",
"Submit": "送出",
"TavernAI PNG File": "TavernAI PNG 檔",
"Character picture": "角色圖片",
"Your picture": "你的圖片",
"Save settings": "儲存設定",
"Unload": "卸載",
"Model loader": "模型載入器",
"## Main options": "## 主要選項",
"See more options": "查看更多選項",
"Multimodal (vision)": "多模態(視覺)",
"mmproj file": "mmproj 檔案",
"Speculative decoding": "投機解碼",
"gpu-layers": "GPU 層數",
"ctx-size": "上下文長度",
"cache-type": "快取類型",
"cpu-moe": "將專家移至 CPUcpu-moe",
"streaming-llm": "啟用 StreamingLLM",
"Download": "下載",
"Download model or LoRA": "下載模型或 LoRA",
"Get file list": "取得檔案清單",
"Customize instruction template": "自訂指令模板",
"Select the desired instruction template": "選擇想要的指令模板",
"No model is loaded": "尚未載入模型",
"Ready": "就緒",
"File name (for GGUF models)": "檔名(適用 GGUF 模型)",
"This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.": "這可為「模型載入器」中目前選取的模型設定自訂模板。之後每次載入該模型時,都會使用此模板,取代模型中標註的模板(有時會標註錯誤)。",
"Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.": "輸入 Hugging Face 的使用者/模型路徑例如facebook/galactica-125m。若要指定分支請在最後加上冒號例如facebook/galactica-125m:main。若只下載單一檔案請在第二個輸入框填入檔名。",
"Used by GPT-OSS.": "供 GPT-OSS 使用。",
"Used by Seed-OSS and pre-2507 Qwen3.": "供 Seed-OSS 與 2507 之前的 Qwen3 使用。",
"In instruct and chat-instruct modes, the template under Parameters > Instruction template is used.": "在 instruct 與 chat-instruct 模式下,會使用「參數 > 指令模板」中的模板。",
"After selecting the template, click on \"Load\" to load and apply it.": "選擇模板後,點擊「載入」以套用。",
"This gets autodetected; you usually don't need to change it. Used in instruct and chat-instruct modes.": "此值通常會自動偵測,通常不需更改。用於 instruct 與 chat-instruct 模式。",
"Defines how the chat prompt in chat/chat-instruct modes is generated.": "定義 chat 或 chat-instruct 模式下聊天提示的生成方式。",
"Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can't load the model.": "必須大於 0 才會使用 GPU。⚠ 如果無法載入模型,請降低此值。",
"Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.": "上下文長度。常見值4096、8192、16384、32768、65536、131072。",
"Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7": "每張 GPU 使用的 VRAMGB以逗號分隔。範例20,7,7",
"Attention implementation.": "注意力實作。",
"Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).": "有效選項llama.cpp - fp16、q8_0、q4_0ExLlamaV2 - fp16、fp8、q8、q6、q4ExLlamaV3 - fp16、q2 到 q8。對 ExLlamaV3可為 k/v 分別輸入自訂組合(例如 q4_q8",
"The backend for tensor parallelism.": "張量並行的後端。",
"Move the experts to the CPU. Saves VRAM on MoE models.": "將 experts 移至 CPU可在 MoE 模型節省 VRAM。",
"Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.": "啟用 StreamingLLM移除舊訊息時免重新評估整個提示。",
"Used by load-in-4bit.": "供 load-in-4bit 使用。",
"Automatically split the model tensors across the available GPUs.": "自動將模型張量分散至可用 GPU。",
"Enable tensor parallelism (TP).": "啟用張量並行TP",
"Enable inference with ModelRunnerCpp, which is faster than the default ModelRunner.": "使用 ModelRunnerCpp 進行推論,通常比預設 ModelRunner 更快。",
"Select a file that matches your model. Must be placed in user_data/mmproj/": "選擇與模型相容的檔案,需放在 user_data/mmproj/。",
"Draft model. Speculative decoding only works with models sharing the same vocabulary (e.g., same model family).": "草稿模型。投機解碼僅適用於共享相同詞彙表的模型(例如相同家族)。",
"Number of layers to offload to the GPU for the draft model.": "草稿模型要卸載到 GPU 的層數。",
"Number of tokens to draft for speculative decoding. Recommended value: 4.": "投機解碼的草稿詞元數。建議值4。",
"Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1": "卸載草稿模型的裝置列表以逗號分隔。範例CUDA0,CUDA1",
"Size of the prompt context for the draft model. If 0, uses the same as the main model.": "草稿模型的提示上下文大小。若為 0則與主模型相同。",
"* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `ctx_size` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.": "* 目前需要在獨立的 Python 3.10 環境手動安裝 TensorRT-LLM。安裝指南請參考此 PR 的說明。\n\n* 僅在勾選 `cpp-runner` 時使用 `ctx_size`。\n\n* 目前 `cpp_runner` 不支援串流輸出。",
"Use PyTorch in CPU mode.": "以 CPU 模式使用 PyTorch。",
"Split the model by rows across GPUs. This may improve multi-gpu performance.": "將模型依列在多張 GPU 上切分,可能改善多 GPU 效能。",
"Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.": "不要將 K、Q、V 卸載至 GPU可節省 VRAM但會降低效能。",
"NUMA support can help on some systems with non-uniform memory access.": "在非一致性記憶體架構NUMA的系統上啟用可改善效能。",
"Necessary to use CFG with this loader.": "使用此載入器啟用 CFG 時所必需。",
"Set use_fast=False while loading the tokenizer.": "在載入 tokenizer 時設定 use_fast=False。",
"Activates Quadratic Sampling.": "啟用二次取樣Quadratic Sampling",
"Adjusts the dropoff curve of Quadratic Sampling.": "調整二次取樣的衰減曲線。",
"Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.": "實際執行移除的機率。0 代表停用此採樣器1 代表必定執行。",
"For Contrastive Search. do_sample must be unchecked.": "用於對比式搜尋Contrastive Search。需取消勾選 do_sample。",
"For CFG. 1.5 is a good value.": "用於 CFG。建議值1.5。",
"Parameter names separated by new lines or commas.": "以換行或逗號分隔參數名稱。",
"Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.": "不進行序列比對延續的斷點詞元,以加引號的字串並以逗號分隔。",
"Activates Prompt Lookup Decoding.": "啟用提示查找解碼Prompt Lookup Decoding",
"Expand max_new_tokens to the available context length.": "將 max_new_tokens 擴展至可用的上下文長度。",
"Forces the model to never end the generation prematurely.": "強制模型不要過早結束生成。",
"Only applies to text completion (notebook). In chat mode, templates control BOS tokens.": "僅適用於文字補全Notebook。聊天模式由模板控制 BOS。",
"Some specific models need this unset.": "部分模型需要取消此選項。",
"Activate text streaming": "啟用文字串流",
"Static KV cache": "靜態 KV 快取",
"Use a static cache for improved performance.": "使用靜態快取以提升效能。",
"List of proportions to split the model across multiple GPUs. Example: 60,40": "模型在多張 GPU 間的比例設定。範例60,40",
"tensor_split": "張量切分比例",
"auto_max_new_tokens": "自動調整最大新詞元數",
"Add the bos_token to the beginning of prompts": "在提示開頭加入 BOS 詞元",
"Additional flags to pass to llama-server. Format: \"flag1=value1,flag2,flag3=value3\". Example: \"override-tensor=exps=CPU\"": "傳遞給 llama-server 的額外旗標。格式「flag1=value1,flag2,flag3=value3」。範例「override-tensor=exps=CPU」。",
"Maximum CPU memory in GiB. Use this for CPU offloading.": "最大 CPU 記憶體GiB。用於 CPU 卸載。",
"Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.": "NTK RoPE 縮放用的位置嵌入 alpha 因子。建議值NTKv11.75 對應 1.5x 上下文、2.5 對應 2x 上下文。與 compress_pos_emb 擇一使用。",
"Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.": "NTK RoPE 縮放用的位置嵌入頻率基準。與 alpha_value 的關係rope_freq_base = 10000 * alpha_value ^ (64 / 63)。0 表示使用模型內建值。",
"Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale.": "位置嵌入壓縮係數。應設為(新上下文長度)/(模型原始上下文長度)。等同 1/rope_freq_scale。",
"compress_pos_emb": "位置嵌入壓縮係數",
"row_split": "列切分row_split",
"no-mmap": "停用 mmapno-mmap",
"Only applies to MoE models like Mixtral.": "僅適用於 Mixtral 等 MoE 模型。",
"Estimated VRAM to load the model:": "載入模型的預估 VRAM",
"mirostat_mode": "mirostat 模式",
"mode=1 is for llama.cpp only.": "mode=1 只適用於 llama.cpp。",
"Note that some of these extensions may require manually installing Python requirements through the command: pip install -r extensions/extension_name/requirements.txt": "部分擴充可能需要手動安裝 Python 相依套件pip install -r extensions/extension_name/requirements.txt",
"Used in chat and chat-instruct modes.": "用於 chat 與 chat-instruct 模式。",
"max_new_tokens": "最大新詞元數",
"⚠️ Setting this too high can cause prompt truncation.": "⚠️ 設定過高可能導致提示被截斷。",
"The leftmost tokens are removed if the prompt exceeds this length.": "若提示超過此長度,會移除最左側的詞元。",
"Sampler priority": "採樣器優先順序",
"Maximum tokens/second": "每秒最大詞元數",
"To make text readable in real time.": "讓文字即時可讀。",
"min_p": "min_p 最小機率",
"xtc_threshold": "xtc_threshold XTC 臨界值",
"xtc_probability": "xtc_probability XTC 機率",
"dry_base": "dry 基數",
"dry_allowed_length": "dry 允許重複長度",
"dry_sequence_breakers": "dry_sequence_breakers 斷序詞元",
"repetition_penalty": "repetition_penalty 重複懲罰",
"frequency_penalty": "frequency_penalty 頻率懲罰",
"presence_penalty": "presence_penalty 出現懲罰",
"repetition_penalty_range": "repetition_penalty_range 重複懲罰範圍",
"temperature_last": "temperature_last 溫度後置",
"If 2 or more tokens have probability above this threshold, consider removing all but the last one.": "若有 2 個以上詞元的機率高於此門檻,考慮只保留最後一個。",
"Set to greater than 0 to enable DRY. Recommended value: 0.8.": "設為大於 0 以啟用 DRY。建議值0.8。",
"Longest sequence that can be repeated without being penalized.": "不受懲罰可重複的最長序列長度。",
"Controls how fast the penalty grows with increasing sequence length.": "控制懲罰隨序列長度增加的成長速度。",
"Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in \"Sampler priority\".": "將溫度/動態溫度/二次取樣移至採樣器堆疊末端,忽略其在「採樣器優先順序」中的位置。",
"top_n_sigma": "top_n_sigma Top-N 標準差",
"top_p": "top_p Top-p",
"top_k": "top_k Top-k",
"typical_p": "typical_p Typical-p",
"dry_multiplier": "dry 乘數",
"dry_allowed_length": "允許重複長度",
"Good morning!": "早安!",
"Good afternoon!": "午安!",
"Good evening!": "晚安!",
"How can I help you today?": "今天我可以怎麼幫你?",
}
_MAP = {
"zh_TW": _ZH_TW,
}
def lang():
v = shared.settings.get("language", "en")
# Normalize legacy or mislabeled values
if v in ("English", "en_US", "en-GB"):
return "en"
if v in ("zh_TW", "zh-TW", "繁體中文"):
return "zh_TW"
return v
def t(s: str) -> str:
d = _MAP.get(lang())
if not d:
return s
return d.get(s, s)

View file

@ -9,6 +9,7 @@ from modules import chat, shared, ui, utils
from modules.html_generator import chat_html_wrapper
from modules.text_generation import stop_everything_event
from modules.utils import gradio
from modules.i18n import t
inputs = ('Chat input', 'interface_state')
reload_arr = ('history', 'name1', 'name2', 'mode', 'chat_style', 'character_menu')
@ -21,27 +22,27 @@ def create_ui():
shared.gradio['history'] = gr.State({'internal': [], 'visible': [], 'metadata': {}})
shared.gradio['display'] = gr.JSON(value={}, visible=False) # Hidden buffer
with gr.Tab('Chat', elem_id='chat-tab'):
with gr.Tab(t('Chat'), elem_id='chat-tab'):
with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
with gr.Column():
with gr.Row(elem_id='past-chats-buttons'):
shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes=['refresh-button', 'refresh-button-medium'], elem_id='Branch', interactive=not mu)
shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes=['refresh-button', 'refresh-button-medium'], interactive=not mu)
shared.gradio['branch_chat'] = gr.Button(t('Branch'), elem_classes=['refresh-button', 'refresh-button-medium'], elem_id='Branch', interactive=not mu)
shared.gradio['rename_chat'] = gr.Button(t('Rename'), elem_classes=['refresh-button', 'refresh-button-medium'], interactive=not mu)
shared.gradio['delete_chat'] = gr.Button('🗑️', visible=False, elem_classes='refresh-button', interactive=not mu, elem_id='delete_chat')
shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'refresh-button-medium', 'focus-on-chat-input'])
shared.gradio['Start new chat'] = gr.Button(t('New chat'), elem_classes=['refresh-button', 'refresh-button-medium', 'focus-on-chat-input'])
shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id="Branch-index", interactive=True)
shared.gradio['search_chat'] = gr.Textbox(placeholder='Search chats...', max_lines=1, elem_id='search_chat')
shared.gradio['search_chat'] = gr.Textbox(placeholder=t('Search chats...'), max_lines=1, elem_id='search_chat')
with gr.Row(elem_id='delete-chat-row', visible=False) as shared.gradio['delete-chat-row']:
shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-cancel')
shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-confirm')
shared.gradio['delete_chat-cancel'] = gr.Button(t('Cancel'), elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-cancel')
shared.gradio['delete_chat-confirm'] = gr.Button(t('Confirm'), variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-confirm')
with gr.Row(elem_id='rename-row', visible=False) as shared.gradio['rename-row']:
shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', elem_classes=['no-background'])
shared.gradio['rename_to'] = gr.Textbox(label=t('Rename to:'), placeholder=t('New name'), elem_classes=['no-background'])
with gr.Row():
shared.gradio['rename_to-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'])
shared.gradio['rename_to-confirm'] = gr.Button('Confirm', elem_classes=['refresh-button', 'focus-on-chat-input'], variant='primary')
shared.gradio['rename_to-cancel'] = gr.Button(t('Cancel'), elem_classes=['refresh-button', 'focus-on-chat-input'])
shared.gradio['rename_to-confirm'] = gr.Button(t('Confirm'), elem_classes=['refresh-button', 'focus-on-chat-input'], variant='primary')
with gr.Row():
shared.gradio['unique_id'] = gr.Radio(label="", elem_classes=['slim-dropdown', 'pretty_scrollbar'], interactive=not mu, elem_id='past-chats')
@ -54,56 +55,56 @@ def create_ui():
gr.HTML(value='<div class="hover-element" onclick="void(0)"><span style="width: 100px; display: block" id="hover-element-button">&#9776;</span><div class="hover-menu" id="hover-menu"></div>', elem_id='gr-hover')
with gr.Column(scale=10, elem_id='chat-input-container'):
shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf', 'image'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder=t('Send a message'), file_types=['text', '.pdf', 'image'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
shared.gradio['typing-dots'] = gr.HTML(value='<div class="typing"><span></span><span class="dot1"></span><span class="dot2"></span></div>', label='typing', elem_id='typing-container')
with gr.Column(scale=1, elem_id='generate-stop-container'):
with gr.Row():
shared.gradio['Stop'] = gr.Button('Stop', elem_id='stop', visible=False)
shared.gradio['Generate'] = gr.Button('Send', elem_id='Generate', variant='primary')
shared.gradio['Stop'] = gr.Button(t('Stop'), elem_id='stop', visible=False)
shared.gradio['Generate'] = gr.Button(t('Send'), elem_id='Generate', variant='primary')
# Hover menu buttons
with gr.Column(elem_id='chat-buttons'):
shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
shared.gradio['Regenerate'] = gr.Button(t('Regenerate (Ctrl + Enter)'), elem_id='Regenerate')
shared.gradio['Continue'] = gr.Button(t('Continue (Alt + Enter)'), elem_id='Continue')
shared.gradio['Remove last'] = gr.Button(t('Remove last reply (Ctrl + Shift + Backspace)'), elem_id='Remove-last')
shared.gradio['Impersonate'] = gr.Button(t('Impersonate (Ctrl + Shift + M)'), elem_id='Impersonate')
shared.gradio['Send dummy message'] = gr.Button(t('Send dummy message'))
shared.gradio['Send dummy reply'] = gr.Button(t('Send dummy reply'))
shared.gradio['send-chat-to-notebook'] = gr.Button(t('Send to Notebook'))
shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label=t('Show controls (Ctrl+S)'), elem_id='show-controls')
with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
with gr.Column():
with gr.Row():
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
shared.gradio['start_with'] = gr.Textbox(label=t('Start reply with'), placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
gr.HTML("<div class='sidebar-vertical-separator'></div>")
shared.gradio['reasoning_effort'] = gr.Dropdown(value=shared.settings['reasoning_effort'], choices=['low', 'medium', 'high'], label='Reasoning effort', info='Used by GPT-OSS.')
shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label='Enable thinking', info='Used by Seed-OSS and pre-2507 Qwen3.')
shared.gradio['reasoning_effort'] = gr.Dropdown(value=shared.settings['reasoning_effort'], choices=['low', 'medium', 'high'], label=t('Reasoning effort'), info=t('Used by GPT-OSS.'))
shared.gradio['enable_thinking'] = gr.Checkbox(value=shared.settings['enable_thinking'], label=t('Enable thinking'), info=t('Used by Seed-OSS and pre-2507 Qwen3.'))
gr.HTML("<div class='sidebar-vertical-separator'></div>")
shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search')
shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label=t('Activate web search'), elem_id='web-search')
with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:
shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)
shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label=t('Number of pages to download'), minimum=1, maximum=10)
gr.HTML("<div class='sidebar-vertical-separator'></div>")
with gr.Row():
shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=None, label='Mode', info='In instruct and chat-instruct modes, the template under Parameters > Instruction template is used.', elem_id='chat-mode')
shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=None, label=t('Mode'), info=t('In instruct and chat-instruct modes, the template under Parameters > Instruction template is used.'), elem_id='chat-mode')
with gr.Row():
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label=t('Chat style'), value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
with gr.Row():
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar'])
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label=t('Command for chat-instruct mode'), info=t('<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.'), visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar'])
gr.HTML("<div class='sidebar-vertical-separator'></div>")
with gr.Row():
shared.gradio['count_tokens'] = gr.Button('Count tokens', size='sm')
shared.gradio['count_tokens'] = gr.Button(t('Count tokens'), size='sm')
shared.gradio['token_display'] = gr.HTML(value='', elem_classes='token-display')
@ -121,66 +122,66 @@ def create_ui():
def create_character_settings_ui():
mu = shared.args.multi_user
with gr.Tab('Character', elem_id="character-tab"):
with gr.Tab(t('Character'), elem_id="character-tab"):
with gr.Row():
with gr.Column(scale=8):
with gr.Tab("Character"):
with gr.Tab(t("Character")):
with gr.Row():
shared.gradio['character_menu'] = gr.Dropdown(value=shared.settings['character'], choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
shared.gradio['character_menu'] = gr.Dropdown(value=shared.settings['character'], choices=utils.get_available_characters(), label=t('Character'), elem_id='character-menu', info=t('Used in chat and chat-instruct modes.'), elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
shared.gradio['restore_character'] = gr.Button('Restore character', elem_classes='refresh-button', interactive=True, elem_id='restore-character')
shared.gradio['restore_character'] = gr.Button(t('Restore character'), elem_classes='refresh-button', interactive=True, elem_id='restore-character')
shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name')
shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar'], elem_id="character-context")
shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label='Greeting', elem_classes=['add_scrollbar'], elem_id="character-greeting")
shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label=t('Character\'s name'))
shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label=t('Context'), elem_classes=['add_scrollbar'], elem_id="character-context")
shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label=t('Greeting'), elem_classes=['add_scrollbar'], elem_id="character-greeting")
with gr.Tab("User"):
shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
shared.gradio['user_bio'] = gr.Textbox(value=shared.settings['user_bio'], lines=10, label='Description', info='Here you can optionally write a description of yourself.', placeholder='{{user}}\'s personality: ...', elem_classes=['add_scrollbar'], elem_id="user-description")
with gr.Tab(t("User")):
shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label=t('Name'))
shared.gradio['user_bio'] = gr.Textbox(value=shared.settings['user_bio'], lines=10, label=t('Description'), info=t('Here you can optionally write a description of yourself.'), placeholder='{{user}}\'s personality: ...', elem_classes=['add_scrollbar'], elem_id="user-description")
with gr.Tab('Chat history'):
with gr.Tab(t('Chat history')):
with gr.Row():
with gr.Column():
shared.gradio['save_chat_history'] = gr.Button(value='Save history')
shared.gradio['save_chat_history'] = gr.Button(value=t('Save history'))
with gr.Column():
shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label='Upload History JSON')
shared.gradio['load_chat_history'] = gr.File(type='binary', file_types=['.json', '.txt'], label=t('Upload History JSON'))
with gr.Tab('Upload character'):
with gr.Tab('YAML or JSON'):
with gr.Tab(t('Upload character')):
with gr.Tab(t('YAML or JSON')):
with gr.Row():
shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label='JSON or YAML File', interactive=not mu)
shared.gradio['upload_img_bot'] = gr.Image(type='filepath', label='Profile Picture (optional)', interactive=not mu)
shared.gradio['upload_json'] = gr.File(type='binary', file_types=['.json', '.yaml'], label=t('JSON or YAML File'), interactive=not mu)
shared.gradio['upload_img_bot'] = gr.Image(type='filepath', label=t('Profile Picture (optional)'), interactive=not mu)
shared.gradio['Submit character'] = gr.Button(value='Submit', interactive=False)
shared.gradio['Submit character'] = gr.Button(value=t('Submit'), interactive=False)
with gr.Tab('TavernAI PNG'):
with gr.Row():
with gr.Column():
shared.gradio['upload_img_tavern'] = gr.Image(type='filepath', label='TavernAI PNG File', elem_id='upload_img_tavern', interactive=not mu)
shared.gradio['upload_img_tavern'] = gr.Image(type='filepath', label=t('TavernAI PNG File'), elem_id='upload_img_tavern', interactive=not mu)
shared.gradio['tavern_json'] = gr.State()
with gr.Column():
shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label='Name', interactive=False)
shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=10, label='Description', interactive=False, elem_classes=['add_scrollbar'])
shared.gradio['tavern_name'] = gr.Textbox(value='', lines=1, label=t('Name'), interactive=False)
shared.gradio['tavern_desc'] = gr.Textbox(value='', lines=10, label=t('Description'), interactive=False, elem_classes=['add_scrollbar'])
shared.gradio['Submit tavern character'] = gr.Button(value='Submit', interactive=False)
shared.gradio['Submit tavern character'] = gr.Button(value=t('Submit'), interactive=False)
with gr.Column(scale=1):
shared.gradio['character_picture'] = gr.Image(label='Character picture', type='filepath', interactive=not mu)
shared.gradio['your_picture'] = gr.Image(label='Your picture', type='filepath', value=Image.open(Path('user_data/cache/pfp_me.png')) if Path('user_data/cache/pfp_me.png').exists() else None, interactive=not mu)
shared.gradio['character_picture'] = gr.Image(label=t('Character picture'), type='filepath', interactive=not mu)
shared.gradio['your_picture'] = gr.Image(label=t('Your picture'), type='filepath', value=Image.open(Path('user_data/cache/pfp_me.png')) if Path('user_data/cache/pfp_me.png').exists() else None, interactive=not mu)
def create_chat_settings_ui():
mu = shared.args.multi_user
with gr.Tab('Instruction template'):
with gr.Tab(t('Instruction template')):
with gr.Row():
with gr.Column():
with gr.Row():
shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label='Saved instruction templates', info="After selecting the template, click on \"Load\" to load and apply it.", value='None', elem_classes='slim-dropdown')
shared.gradio['instruction_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), label=t('Saved instruction templates'), info=t("After selecting the template, click on \"Load\" to load and apply it."), value='None', elem_classes=['slim-dropdown'])
ui.create_refresh_button(shared.gradio['instruction_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
shared.gradio['load_template'] = gr.Button("Load", elem_classes='refresh-button')
shared.gradio['load_template'] = gr.Button(t("Load"), elem_classes='refresh-button')
shared.gradio['save_template'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_template'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
@ -189,12 +190,12 @@ def create_chat_settings_ui():
with gr.Row():
with gr.Column():
shared.gradio['instruction_template_str'] = gr.Textbox(value=shared.settings['instruction_template_str'], label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'], elem_id='instruction-template-str')
shared.gradio['instruction_template_str'] = gr.Textbox(value=shared.settings['instruction_template_str'], label=t('Instruction template'), lines=24, info=t("This gets autodetected; you usually don't need to change it. Used in instruct and chat-instruct modes."), elem_classes=['add_scrollbar', 'monospace'], elem_id='instruction-template-str')
with gr.Row():
shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])
shared.gradio['send_instruction_to_notebook'] = gr.Button(t('Send to notebook'), elem_classes=['small-button'])
with gr.Column():
shared.gradio['chat_template_str'] = gr.Textbox(value=shared.settings['chat_template_str'], label='Chat template', lines=22, elem_classes=['add_scrollbar', 'monospace'], info='Defines how the chat prompt in chat/chat-instruct modes is generated.', elem_id='chat-template-str')
shared.gradio['chat_template_str'] = gr.Textbox(value=shared.settings['chat_template_str'], label=t('Chat template'), lines=22, elem_classes=['add_scrollbar', 'monospace'], info=t('Defines how the chat prompt in chat/chat-instruct modes is generated.'), elem_id='chat-template-str')
def create_event_handlers():

View file

@ -21,94 +21,95 @@ from modules.models_settings import (
update_model_parameters
)
from modules.utils import gradio
from modules.i18n import t
def create_ui():
mu = shared.args.multi_user
with gr.Tab("Model", elem_id="model-tab"):
with gr.Tab(t("Model"), elem_id="model-tab"):
with gr.Row():
with gr.Column():
with gr.Row():
shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=lambda: shared.model_name, label='Model', elem_classes='slim-dropdown', interactive=not mu)
shared.gradio['model_menu'] = gr.Dropdown(choices=utils.get_available_models(), value=lambda: shared.model_name, label=t('Model'), elem_classes='slim-dropdown', interactive=not mu)
ui.create_refresh_button(shared.gradio['model_menu'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
shared.gradio['load_model'] = gr.Button("Load", elem_classes='refresh-button', interactive=not mu)
shared.gradio['unload_model'] = gr.Button("Unload", elem_classes='refresh-button', interactive=not mu)
shared.gradio['save_model_settings'] = gr.Button("Save settings", elem_classes='refresh-button', interactive=not mu)
shared.gradio['load_model'] = gr.Button(t("Load"), elem_classes='refresh-button', interactive=not mu)
shared.gradio['unload_model'] = gr.Button(t("Unload"), elem_classes='refresh-button', interactive=not mu)
shared.gradio['save_model_settings'] = gr.Button(t("Save settings"), elem_classes='refresh-button', interactive=not mu)
shared.gradio['loader'] = gr.Dropdown(label="Model loader", choices=loaders.loaders_and_params.keys() if not shared.args.portable else ['llama.cpp'], value=None)
shared.gradio['loader'] = gr.Dropdown(label=t("Model loader"), choices=loaders.loaders_and_params.keys() if not shared.args.portable else ['llama.cpp'], value=None)
with gr.Blocks():
gr.Markdown("## Main options")
gr.Markdown(t("## Main options"))
with gr.Row():
with gr.Column():
shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.')
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info='Attention implementation.')
shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
shared.gradio['tp_backend'] = gr.Dropdown(label="tp-backend", choices=['native', 'nccl'], value=shared.args.tp_backend, info='The backend for tensor parallelism.')
shared.gradio['gpu_layers'] = gr.Slider(label=t("gpu-layers"), minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info=t("Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can't load the model."))
shared.gradio['ctx_size'] = gr.Slider(label=t('ctx-size'), minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info=t('Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072.'))
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info=t('Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7'))
shared.gradio['attn_implementation'] = gr.Dropdown(label="attn-implementation", choices=['sdpa', 'eager', 'flash_attention_2'], value=shared.args.attn_implementation, info=t('Attention implementation.'))
shared.gradio['cache_type'] = gr.Dropdown(label=t("cache-type"), choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info=t('Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).'))
shared.gradio['tp_backend'] = gr.Dropdown(label="tp-backend", choices=['native', 'nccl'], value=shared.args.tp_backend, info=t('The backend for tensor parallelism.'))
with gr.Column():
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
shared.gradio['cpu_moe'] = gr.Checkbox(label="cpu-moe", value=shared.args.cpu_moe, info='Move the experts to the CPU. Saves VRAM on MoE models.')
shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming-llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['cpu_moe'] = gr.Checkbox(label=t("cpu-moe"), value=shared.args.cpu_moe, info=t('Move the experts to the CPU. Saves VRAM on MoE models.'))
shared.gradio['streaming_llm'] = gr.Checkbox(label=t("streaming-llm"), value=shared.args.streaming_llm, info=t('Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.'))
shared.gradio['load_in_8bit'] = gr.Checkbox(label="load-in-8bit", value=shared.args.load_in_8bit)
shared.gradio['load_in_4bit'] = gr.Checkbox(label="load-in-4bit", value=shared.args.load_in_4bit)
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant, info='Used by load-in-4bit.')
shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info='Automatically split the model tensors across the available GPUs.')
shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info='Enable tensor parallelism (TP).')
shared.gradio['cpp_runner'] = gr.Checkbox(label="cpp-runner", value=shared.args.cpp_runner, info='Enable inference with ModelRunnerCpp, which is faster than the default ModelRunner.')
shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `ctx_size` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
shared.gradio['use_double_quant'] = gr.Checkbox(label="use_double_quant", value=shared.args.use_double_quant, info=t('Used by load-in-4bit.'))
shared.gradio['autosplit'] = gr.Checkbox(label="autosplit", value=shared.args.autosplit, info=t('Automatically split the model tensors across the available GPUs.'))
shared.gradio['enable_tp'] = gr.Checkbox(label="enable_tp", value=shared.args.enable_tp, info=t('Enable tensor parallelism (TP).'))
shared.gradio['cpp_runner'] = gr.Checkbox(label="cpp-runner", value=shared.args.cpp_runner, info=t('Enable inference with ModelRunnerCpp, which is faster than the default ModelRunner.'))
shared.gradio['tensorrt_llm_info'] = gr.Markdown(t('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `ctx_size` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.'))
# Multimodal
with gr.Accordion("Multimodal (vision)", open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
with gr.Accordion(t("Multimodal (vision)"), open=False, elem_classes='tgw-accordion') as shared.gradio['mmproj_accordion']:
with gr.Row():
shared.gradio['mmproj'] = gr.Dropdown(label="mmproj file", choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info='Select a file that matches your model. Must be placed in user_data/mmproj/', interactive=not mu)
shared.gradio['mmproj'] = gr.Dropdown(label=t("mmproj file"), choices=utils.get_available_mmproj(), value=lambda: shared.args.mmproj or 'None', elem_classes='slim-dropdown', info=t('Select a file that matches your model. Must be placed in user_data/mmproj/'), interactive=not mu)
ui.create_refresh_button(shared.gradio['mmproj'], lambda: None, lambda: {'choices': utils.get_available_mmproj()}, 'refresh-button', interactive=not mu)
# Speculative decoding
with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
with gr.Accordion(t("Speculative decoding"), open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
with gr.Row():
shared.gradio['model_draft'] = gr.Dropdown(label="model-draft", choices=['None'] + utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', info='Draft model. Speculative decoding only works with models sharing the same vocabulary (e.g., same model family).', interactive=not mu)
shared.gradio['model_draft'] = gr.Dropdown(label="model-draft", choices=['None'] + utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', info=t('Draft model. Speculative decoding only works with models sharing the same vocabulary (e.g., same model family).'), interactive=not mu)
ui.create_refresh_button(shared.gradio['model_draft'], lambda: None, lambda: {'choices': ['None'] + utils.get_available_models()}, 'refresh-button', interactive=not mu)
shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.')
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Number of tokens to draft for speculative decoding. Recommended value: 4.')
shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info=t('Number of layers to offload to the GPU for the draft model.'))
shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info=t('Number of tokens to draft for speculative decoding. Recommended value: 4.'))
shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info=t('Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1'))
shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info=t('Size of the prompt context for the draft model. If 0, uses the same as the main model.'))
gr.Markdown("## Other options")
with gr.Accordion("See more options", open=False, elem_classes='tgw-accordion'):
gr.Markdown(t("## Other options"))
with gr.Accordion(t("See more options"), open=False, elem_classes='tgw-accordion'):
with gr.Row():
with gr.Column():
shared.gradio['threads'] = gr.Slider(label="threads", minimum=0, step=1, maximum=256, value=shared.args.threads)
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
shared.gradio['ubatch_size'] = gr.Slider(label="ubatch_size", minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"', value=shared.args.extra_flags)
shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.')
shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.')
shared.gradio['compress_pos_emb'] = gr.Number(label='compress_pos_emb', value=shared.args.compress_pos_emb, precision=2, info='Positional embeddings compression factor. Should be set to (context length) / (model\'s original context length). Equal to 1/rope_freq_scale.')
shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype, info='Used by load-in-4bit.')
shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type, info='Used by load-in-4bit.')
shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info='Only applies to MoE models like Mixtral.')
shared.gradio['threads'] = gr.Slider(label=t("threads"), minimum=0, step=1, maximum=256, value=shared.args.threads)
shared.gradio['threads_batch'] = gr.Slider(label=t("threads_batch"), minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
shared.gradio['batch_size'] = gr.Slider(label=t("batch_size"), minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
shared.gradio['ubatch_size'] = gr.Slider(label=t("ubatch_size"), minimum=1, maximum=4096, step=1, value=shared.args.ubatch_size)
shared.gradio['tensor_split'] = gr.Textbox(label=t('tensor_split'), info=t('List of proportions to split the model across multiple GPUs. Example: 60,40'))
shared.gradio['extra_flags'] = gr.Textbox(label=t('extra-flags'), info=t('Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"'), value=shared.args.extra_flags)
shared.gradio['cpu_memory'] = gr.Number(label=t("Maximum CPU memory in GiB. Use this for CPU offloading."), value=shared.args.cpu_memory)
shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info=t('Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.'))
shared.gradio['rope_freq_base'] = gr.Number(label=t('rope_freq_base'), value=shared.args.rope_freq_base, precision=0, info=t('Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.'))
shared.gradio['compress_pos_emb'] = gr.Number(label=t('compress_pos_emb'), value=shared.args.compress_pos_emb, precision=2, info=t("Positional embeddings compression factor. Should be set to (context length) / (model's original context length). Equal to 1/rope_freq_scale."))
shared.gradio['compute_dtype'] = gr.Dropdown(label="compute_dtype", choices=["bfloat16", "float16", "float32"], value=shared.args.compute_dtype, info=t('Used by load-in-4bit.'))
shared.gradio['quant_type'] = gr.Dropdown(label="quant_type", choices=["nf4", "fp4"], value=shared.args.quant_type, info=t('Used by load-in-4bit.'))
shared.gradio['num_experts_per_token'] = gr.Number(label="Number of experts per token", value=shared.args.num_experts_per_token, info=t('Only applies to MoE models like Mixtral.'))
with gr.Column():
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='Use PyTorch in CPU mode.')
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info=t('Use PyTorch in CPU mode.'))
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
shared.gradio['no_kv_offload'] = gr.Checkbox(label="no_kv_offload", value=shared.args.no_kv_offload, info='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
shared.gradio['no_mmap'] = gr.Checkbox(label="no-mmap", value=shared.args.no_mmap)
shared.gradio['mlock'] = gr.Checkbox(label="mlock", value=shared.args.mlock)
shared.gradio['numa'] = gr.Checkbox(label="numa", value=shared.args.numa, info='NUMA support can help on some systems with non-uniform memory access.')
shared.gradio['row_split'] = gr.Checkbox(label=t("row_split"), value=shared.args.row_split, info=t('Split the model by rows across GPUs. This may improve multi-gpu performance.'))
shared.gradio['no_kv_offload'] = gr.Checkbox(label=t("no_kv_offload"), value=shared.args.no_kv_offload, info=t('Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.'))
shared.gradio['no_mmap'] = gr.Checkbox(label=t("no-mmap"), value=shared.args.no_mmap)
shared.gradio['mlock'] = gr.Checkbox(label=t("mlock"), value=shared.args.mlock)
shared.gradio['numa'] = gr.Checkbox(label=t("numa"), value=shared.args.numa, info=t('NUMA support can help on some systems with non-uniform memory access.'))
shared.gradio['bf16'] = gr.Checkbox(label="bf16", value=shared.args.bf16)
shared.gradio['no_flash_attn'] = gr.Checkbox(label="no_flash_attn", value=shared.args.no_flash_attn)
shared.gradio['no_xformers'] = gr.Checkbox(label="no_xformers", value=shared.args.no_xformers)
shared.gradio['no_sdpa'] = gr.Checkbox(label="no_sdpa", value=shared.args.no_sdpa)
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info='Necessary to use CFG with this loader.')
shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
shared.gradio['cfg_cache'] = gr.Checkbox(label="cfg-cache", value=shared.args.cfg_cache, info=t('Necessary to use CFG with this loader.'))
shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info=t('Set use_fast=False while loading the tokenizer.'))
if not shared.args.portable:
with gr.Row():
shared.gradio['lora_menu'] = gr.Dropdown(multiselect=True, choices=utils.get_available_loras(), value=shared.lora_names, label='LoRA(s)', elem_classes='slim-dropdown', interactive=not mu)
@ -116,23 +117,23 @@ def create_ui():
shared.gradio['lora_menu_apply'] = gr.Button(value='Apply LoRAs', elem_classes='refresh-button', interactive=not mu)
with gr.Column():
with gr.Tab("Download"):
shared.gradio['custom_model_menu'] = gr.Textbox(label="Download model or LoRA", info="Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box.", interactive=not mu)
shared.gradio['download_specific_file'] = gr.Textbox(placeholder="File name (for GGUF models)", show_label=False, max_lines=1, interactive=not mu)
with gr.Tab(t("Download")):
shared.gradio['custom_model_menu'] = gr.Textbox(label=t("Download model or LoRA"), info=t("Enter the Hugging Face username/model path, for instance: facebook/galactica-125m. To specify a branch, add it at the end after a \":\" character like this: facebook/galactica-125m:main. To download a single file, enter its name in the second box."), interactive=not mu)
shared.gradio['download_specific_file'] = gr.Textbox(placeholder=t("File name (for GGUF models)"), show_label=False, max_lines=1, interactive=not mu)
with gr.Row():
shared.gradio['download_model_button'] = gr.Button("Download", variant='primary', interactive=not mu)
shared.gradio['get_file_list'] = gr.Button("Get file list", interactive=not mu)
shared.gradio['download_model_button'] = gr.Button(t("Download"), variant='primary', interactive=not mu)
shared.gradio['get_file_list'] = gr.Button(t("Get file list"), interactive=not mu)
with gr.Tab("Customize instruction template"):
with gr.Tab(t("Customize instruction template")):
with gr.Row():
shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label='Select the desired instruction template', elem_classes='slim-dropdown')
shared.gradio['customized_template'] = gr.Dropdown(choices=utils.get_available_instruction_templates(), value='None', label=t('Select the desired instruction template'), elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['customized_template'], lambda: None, lambda: {'choices': utils.get_available_instruction_templates()}, 'refresh-button', interactive=not mu)
shared.gradio['customized_template_submit'] = gr.Button("Submit", variant="primary", interactive=not mu)
gr.Markdown("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong.")
shared.gradio['customized_template_submit'] = gr.Button(t("Submit"), variant="primary", interactive=not mu)
gr.Markdown(t("This allows you to set a customized template for the model currently selected in the \"Model loader\" menu. Whenever the model gets loaded, this template will be used in place of the template specified in the model's medatada, which sometimes is wrong."))
with gr.Row():
shared.gradio['model_status'] = gr.Markdown('No model is loaded' if shared.model_name == 'None' else 'Ready')
shared.gradio['model_status'] = gr.Markdown(t('No model is loaded') if shared.model_name == 'None' else t('Ready'))
def create_event_handlers():
@ -244,7 +245,7 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
return
if not repo_id:
yield "Please enter a model path."
yield t("Please enter a model path.")
progress(0.0)
return

View file

@ -4,105 +4,106 @@ import gradio as gr
from modules import loaders, presets, shared, ui, ui_chat, utils
from modules.utils import gradio
from modules.i18n import t
def create_ui():
mu = shared.args.multi_user
with gr.Tab("Parameters", elem_id="parameters"):
with gr.Tab("Generation"):
with gr.Tab(t("Parameters"), elem_id="parameters"):
with gr.Tab(t("Generation")):
with gr.Row():
with gr.Column():
with gr.Row():
shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=shared.settings['preset'], label='Preset', elem_classes='slim-dropdown')
shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=shared.settings['preset'], label=t('Preset'), elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
shared.gradio['reset_preset'] = gr.Button('Restore preset', elem_classes='refresh-button', interactive=True)
shared.gradio['neutralize_samplers'] = gr.Button('Neutralize samplers', elem_classes='refresh-button', interactive=True)
shared.gradio['reset_preset'] = gr.Button(t('Restore preset'), elem_classes='refresh-button', interactive=True)
shared.gradio['neutralize_samplers'] = gr.Button(t('Neutralize samplers'), elem_classes='refresh-button', interactive=True)
with gr.Column():
shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()) if not shared.args.portable else ['llama.cpp'], value="All", elem_classes='slim-dropdown')
shared.gradio['filter_by_loader'] = gr.Dropdown(label=t("Filter by loader"), choices=["All"] + list(loaders.loaders_and_params.keys()) if not shared.args.portable else ['llama.cpp'], value="All", elem_classes='slim-dropdown')
with gr.Row():
with gr.Column():
with gr.Row():
with gr.Column():
gr.Markdown('## Curve shape')
shared.gradio['temperature'] = gr.Slider(0.01, 5, value=shared.settings['temperature'], step=0.01, label='temperature')
shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_low'], step=0.01, label='dynatemp_low', visible=shared.settings['dynamic_temperature'])
shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_high'], step=0.01, label='dynatemp_high', visible=shared.settings['dynamic_temperature'])
shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=shared.settings['dynamic_temperature'])
shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=shared.settings['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=shared.settings['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label='dynamic_temperature')
gr.Markdown(t('## Curve shape'))
shared.gradio['temperature'] = gr.Slider(0.01, 5, value=shared.settings['temperature'], step=0.01, label=t('temperature'))
shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_low'], step=0.01, label=t('dynatemp_low'), visible=shared.settings['dynamic_temperature'])
shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_high'], step=0.01, label=t('dynatemp_high'), visible=shared.settings['dynamic_temperature'])
shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_exponent'], step=0.01, label=t('dynatemp_exponent'), visible=shared.settings['dynamic_temperature'])
shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=shared.settings['smoothing_factor'], step=0.01, label='smoothing_factor', info=t('Activates Quadratic Sampling.'))
shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=shared.settings['smoothing_curve'], step=0.01, label='smoothing_curve', info=t('Adjusts the dropoff curve of Quadratic Sampling.'))
shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label=t('dynamic_temperature'))
gr.Markdown('## Curve cutoff')
shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=shared.settings['min_p'], step=0.01, label='min_p')
shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=shared.settings['top_n_sigma'], step=0.01, label='top_n_sigma')
shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=shared.settings['top_p'], step=0.01, label='top_p')
shared.gradio['top_k'] = gr.Slider(0, 200, value=shared.settings['top_k'], step=1, label='top_k')
shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=shared.settings['typical_p'], step=0.01, label='typical_p')
shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=shared.settings['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=shared.settings['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
gr.Markdown(t('## Curve cutoff'))
shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=shared.settings['min_p'], step=0.01, label=t('min_p'))
shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=shared.settings['top_n_sigma'], step=0.01, label=t('top_n_sigma'))
shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=shared.settings['top_p'], step=0.01, label=t('top_p'))
shared.gradio['top_k'] = gr.Slider(0, 200, value=shared.settings['top_k'], step=1, label=t('top_k'))
shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=shared.settings['typical_p'], step=0.01, label=t('typical_p'))
shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=shared.settings['xtc_threshold'], step=0.01, label=t('xtc_threshold'), info=t('If 2 or more tokens have probability above this threshold, consider removing all but the last one.'))
shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=shared.settings['xtc_probability'], step=0.01, label=t('xtc_probability'), info=t('Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.'))
shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=shared.settings['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=shared.settings['eta_cutoff'], step=0.01, label='eta_cutoff')
shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=shared.settings['tfs'], step=0.01, label='tfs')
shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=shared.settings['top_a'], step=0.01, label='top_a')
gr.Markdown('## Repetition suppression')
shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=shared.settings['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=shared.settings['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
shared.gradio['dry_base'] = gr.Slider(1, 4, value=shared.settings['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=shared.settings['repetition_penalty'], step=0.01, label='repetition_penalty')
shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=shared.settings['frequency_penalty'], step=0.05, label='frequency_penalty')
shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=shared.settings['presence_penalty'], step=0.05, label='presence_penalty')
gr.Markdown(t('## Repetition suppression'))
shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=shared.settings['dry_multiplier'], step=0.01, label=t('dry_multiplier'), info=t('Set to greater than 0 to enable DRY. Recommended value: 0.8.'))
shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=shared.settings['dry_allowed_length'], step=1, label=t('dry_allowed_length'), info=t('Longest sequence that can be repeated without being penalized.'))
shared.gradio['dry_base'] = gr.Slider(1, 4, value=shared.settings['dry_base'], step=0.01, label=t('dry_base'), info=t('Controls how fast the penalty grows with increasing sequence length.'))
shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=shared.settings['repetition_penalty'], step=0.01, label=t('repetition_penalty'))
shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=shared.settings['frequency_penalty'], step=0.05, label=t('frequency_penalty'))
shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=shared.settings['presence_penalty'], step=0.05, label=t('presence_penalty'))
shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=shared.settings['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=shared.settings['no_repeat_ngram_size'], label='no_repeat_ngram_size')
shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=shared.settings['repetition_penalty_range'], label='repetition_penalty_range')
shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=shared.settings['repetition_penalty_range'], label=t('repetition_penalty_range'))
with gr.Column():
gr.Markdown('## Alternative sampling methods')
shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=shared.settings['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=shared.settings['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=shared.settings['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=shared.settings['mirostat_tau'], label='mirostat_tau')
shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=shared.settings['mirostat_eta'], label='mirostat_eta')
gr.Markdown(t('## Alternative sampling methods'))
shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=shared.settings['penalty_alpha'], label='penalty_alpha', info=t('For Contrastive Search. do_sample must be unchecked.'))
shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=shared.settings['guidance_scale'], label='guidance_scale', info=t('For CFG. 1.5 is a good value.'))
shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=shared.settings['mirostat_mode'], label=t('mirostat_mode'), info=t('mode=1 is for llama.cpp only.'))
shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=shared.settings['mirostat_tau'], label=t('mirostat_tau'))
shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=shared.settings['mirostat_eta'], label=t('mirostat_eta'))
gr.Markdown('## Other options')
gr.Markdown(t('## Other options'))
shared.gradio['do_sample'] = gr.Checkbox(value=shared.settings['do_sample'], label='do_sample')
shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
shared.gradio['sampler_priority'] = gr.Textbox(value=shared.settings['sampler_priority'], lines=10, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label=t('temperature_last'), info=t('Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".'))
shared.gradio['sampler_priority'] = gr.Textbox(value=shared.settings['sampler_priority'], lines=10, label=t('Sampler priority'), info=t('Parameter names separated by new lines or commas.'), elem_classes=['add_scrollbar'])
shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label=t('dry_sequence_breakers'), info=t('Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.'))
with gr.Column():
with gr.Row():
with gr.Column():
with gr.Blocks():
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label=t('max_new_tokens'), info=t('⚠️ Setting this too high can cause prompt truncation.'))
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info=t('Activates Prompt Lookup Decoding.'))
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label=t('Maximum tokens/second'), info=t('To make text readable in real time.'))
shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Only applies to text completion (notebook). In chat mode, templates control BOS tokens.')
shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info='Some specific models need this unset.')
shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label='Activate text streaming')
shared.gradio['static_cache'] = gr.Checkbox(value=shared.settings['static_cache'], label='Static KV cache', info='Use a static cache for improved performance.')
shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label=t('auto_max_new_tokens'), info=t('Expand max_new_tokens to the available context length.'))
shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label=t('Ban the eos_token'), info=t('Forces the model to never end the generation prematurely.'))
shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label=t('Add the bos_token to the beginning of prompts'), info=t('Only applies to text completion (notebook). In chat mode, templates control BOS tokens.'))
shared.gradio['skip_special_tokens'] = gr.Checkbox(value=shared.settings['skip_special_tokens'], label='Skip special tokens', info=t('Some specific models need this unset.'))
shared.gradio['stream'] = gr.Checkbox(value=shared.settings['stream'], label=t('Activate text streaming'))
shared.gradio['static_cache'] = gr.Checkbox(value=shared.settings['static_cache'], label=t('Static KV cache'), info=t('Use a static cache for improved performance.'))
with gr.Column():
shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length.')
shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])
shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])
shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label=t('Truncate the prompt up to this length'), info=t('The leftmost tokens are removed if the prompt exceeds this length.'))
shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label=t('Seed (-1 for random)'))
shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label=t('Custom system message'), info=t('If not empty, will be used instead of the default one.'), elem_classes=['add_scrollbar'])
shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label=t('Custom stopping strings'), info=t('Written between \"\" and separated by commas.'), placeholder='"\\n", "\\nYou:"')
shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label=t('Token bans'), info=t('Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.'))
shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label=t('Negative prompt'), info=t('For CFG. Only used when guidance_scale is different than 1.'), lines=3, elem_classes=['add_scrollbar'])
with gr.Row() as shared.gradio['grammar_file_row']:
shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')
shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label=t('Load grammar from file (.gbnf)'), elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)
shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
shared.gradio['grammar_string'] = gr.Textbox(value=shared.settings['grammar_string'], label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
shared.gradio['grammar_string'] = gr.Textbox(value=shared.settings['grammar_string'], label=t('Grammar'), lines=16, elem_classes=['add_scrollbar', 'monospace'])
ui_chat.create_chat_settings_ui()