mirror of
https://github.com/oobabooga/text-generation-webui.git
synced 2025-12-06 07:12:10 +01:00
commit
7e80266ae9
30
README.md
30
README.md
|
|
@ -204,17 +204,16 @@ List of command-line flags
|
|||
usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [--lora LORA [LORA ...]] [--model-dir MODEL_DIR] [--lora-dir LORA_DIR] [--model-menu] [--settings SETTINGS]
|
||||
[--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--auto-devices] [--gpu-memory GPU_MEMORY [GPU_MEMORY ...]]
|
||||
[--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast]
|
||||
[--use_flash_attention_2] [--use_eager_attention] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--tensorcores]
|
||||
[--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS]
|
||||
[--use_flash_attention_2] [--use_eager_attention] [--torch-compile] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn]
|
||||
[--tensorcores] [--n_ctx N_CTX] [--threads THREADS] [--threads-batch THREADS_BATCH] [--no_mul_mat_q] [--n_batch N_BATCH] [--no-mmap] [--mlock] [--n-gpu-layers N_GPU_LAYERS]
|
||||
[--tensor_split TENSOR_SPLIT] [--numa] [--logits_all] [--no_offload_kqv] [--cache-capacity CACHE_CAPACITY] [--row_split] [--streaming-llm] [--attention-sink-size ATTENTION_SINK_SIZE]
|
||||
[--tokenizer-dir TOKENIZER_DIR] [--gpu-split GPU_SPLIT] [--autosplit] [--max_seq_len MAX_SEQ_LEN] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa]
|
||||
[--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--enable_tp] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16] [--desc_act] [--disable_exllama] [--disable_exllamav2]
|
||||
[--wbits WBITS] [--groupsize GROUPSIZE] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR]
|
||||
[--num_experts_per_token NUM_EXPERTS_PER_TOKEN] [--enable_tp] [--hqq-backend HQQ_BACKEND] [--cpp-runner] [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR]
|
||||
[--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT]
|
||||
[--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE]
|
||||
[--subpath SUBPATH] [--old-colors] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--nowebui]
|
||||
[--multimodal-pipeline MULTIMODAL_PIPELINE] [--model_type MODEL_TYPE] [--pre_layer PRE_LAYER [PRE_LAYER ...]] [--checkpoint CHECKPOINT] [--monkey-patch] [--no_inject_fused_attention]
|
||||
[--cache_4bit] [--cache_8bit] [--chat-buttons]
|
||||
[--subpath SUBPATH] [--old-colors] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--api-enable-ipv6]
|
||||
[--api-disable-ipv4] [--nowebui] [--multimodal-pipeline MULTIMODAL_PIPELINE] [--cache_4bit] [--cache_8bit] [--chat-buttons] [--triton] [--no_inject_fused_mlp] [--no_use_cuda_fp16]
|
||||
[--desc_act] [--disable_exllama] [--disable_exllamav2] [--wbits WBITS] [--groupsize GROUPSIZE]
|
||||
|
||||
Text generation web UI
|
||||
|
||||
|
|
@ -237,7 +236,7 @@ Basic settings:
|
|||
|
||||
Model loader:
|
||||
--loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2,
|
||||
AutoGPTQ.
|
||||
HQQ, TensorRT-LLM.
|
||||
|
||||
Transformers/Accelerate:
|
||||
--cpu Use the CPU to generate text. Warning: Training on CPU is extremely slow.
|
||||
|
|
@ -255,6 +254,7 @@ Transformers/Accelerate:
|
|||
--no_use_fast Set use_fast=False while loading the tokenizer (it's True by default). Use this if you have any problems related to use_fast.
|
||||
--use_flash_attention_2 Set use_flash_attention_2=True while loading the model.
|
||||
--use_eager_attention Set attn_implementation= eager while loading the model.
|
||||
--torch-compile Compile the model with torch.compile for improved performance.
|
||||
|
||||
bitsandbytes 4-bit:
|
||||
--load-in-4bit Load the model with 4-bit precision (using bitsandbytes).
|
||||
|
|
@ -264,7 +264,7 @@ bitsandbytes 4-bit:
|
|||
|
||||
llama.cpp:
|
||||
--flash-attn Use flash-attention.
|
||||
--tensorcores NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.
|
||||
--tensorcores NVIDIA only: use llama-cpp-python compiled without GGML_CUDA_FORCE_MMQ. This may improve performance on newer cards.
|
||||
--n_ctx N_CTX Size of the prompt context.
|
||||
--threads THREADS Number of threads to use.
|
||||
--threads-batch THREADS_BATCH Number of threads to use for batches/prompt processing.
|
||||
|
|
@ -294,16 +294,6 @@ ExLlamaV2:
|
|||
--num_experts_per_token NUM_EXPERTS_PER_TOKEN Number of experts to use for generation. Applies to MoE models like Mixtral.
|
||||
--enable_tp Enable Tensor Parallelism (TP) in ExLlamaV2.
|
||||
|
||||
AutoGPTQ:
|
||||
--triton Use triton.
|
||||
--no_inject_fused_mlp Triton mode only: disable the use of fused MLP, which will use less VRAM at the cost of slower inference.
|
||||
--no_use_cuda_fp16 This can make models faster on some systems.
|
||||
--desc_act For models that do not have a quantize_config.json, this parameter is used to define whether to set desc_act or not in BaseQuantizeConfig.
|
||||
--disable_exllama Disable ExLlama kernel, which can improve inference speed on some systems.
|
||||
--disable_exllamav2 Disable ExLlamav2 kernel.
|
||||
--wbits WBITS Load a pre-quantized model with specified precision in bits. 2, 3, 4 and 8 are supported.
|
||||
--groupsize GROUPSIZE Group size.
|
||||
|
||||
HQQ:
|
||||
--hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.
|
||||
|
||||
|
|
@ -343,6 +333,8 @@ API:
|
|||
--api-port API_PORT The listening port for the API.
|
||||
--api-key API_KEY API authentication key.
|
||||
--admin-key ADMIN_KEY API authentication key for admin tasks like loading and unloading models. If not set, will be the same as --api-key.
|
||||
--api-enable-ipv6 Enable IPv6 for the API
|
||||
--api-disable-ipv4 Disable IPv4 for the API
|
||||
--nowebui Do not launch the Gradio UI. Useful for launching the API in standalone mode.
|
||||
|
||||
Multimodal:
|
||||
|
|
|
|||
|
|
@ -48,12 +48,14 @@
|
|||
.chat .user-message {
|
||||
background: #f4f4f4;
|
||||
padding: 1.5rem 1rem;
|
||||
padding-bottom: 2rem;
|
||||
border-radius: 0;
|
||||
border-bottom-right-radius: 0;
|
||||
}
|
||||
|
||||
.chat .assistant-message {
|
||||
padding: 1.5rem 1rem;
|
||||
padding-bottom: 2rem;
|
||||
border-radius: 0;
|
||||
border: 0;
|
||||
}
|
||||
|
|
|
|||
62
css/main.css
62
css/main.css
|
|
@ -1142,7 +1142,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||
}
|
||||
|
||||
.dark svg {
|
||||
fill: white;
|
||||
color: white;
|
||||
}
|
||||
|
||||
|
|
@ -1221,3 +1220,64 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
|
|||
background: var(--light-theme-gray);
|
||||
}
|
||||
}
|
||||
|
||||
/* ----------------------------------------------
|
||||
Copy button for chat messages
|
||||
---------------------------------------------- */
|
||||
.message .text,
|
||||
.message .text-you,
|
||||
.message .text-bot,
|
||||
.user-message .text,
|
||||
.assistant-message .text {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.message, .user-message, .assistant-message {
|
||||
position: relative;
|
||||
}
|
||||
|
||||
.footer-button {
|
||||
position: absolute;
|
||||
padding: 0;
|
||||
margin: 0;
|
||||
border: none;
|
||||
border-radius: 3px;
|
||||
cursor: pointer;
|
||||
opacity: 0;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
transition: opacity 0.2s;
|
||||
}
|
||||
|
||||
.footer-button.footer-copy-button {
|
||||
bottom: -23px;
|
||||
left: 0;
|
||||
}
|
||||
|
||||
.footer-button.footer-refresh-button {
|
||||
bottom: -23px;
|
||||
left: 25px;
|
||||
}
|
||||
|
||||
.message:hover .footer-button,
|
||||
.user-message:hover .footer-button,
|
||||
.assistant-message:hover .footer-button {
|
||||
opacity: 1;
|
||||
}
|
||||
|
||||
.footer-button svg {
|
||||
stroke: rgb(156 163 175);
|
||||
transition: stroke 0.2s;
|
||||
}
|
||||
|
||||
.footer-button:hover svg {
|
||||
stroke: rgb(107 114 128);
|
||||
}
|
||||
|
||||
.dark .footer-button svg {
|
||||
stroke: rgb(156 163 175);
|
||||
}
|
||||
|
||||
.dark .footer-button:hover svg {
|
||||
stroke: rgb(209 213 219);
|
||||
}
|
||||
|
|
|
|||
|
|
@ -5,7 +5,6 @@ import gradio as gr
|
|||
from modules.html_generator import get_image_cache
|
||||
from modules.shared import gradio
|
||||
|
||||
|
||||
params = {
|
||||
'items_per_page': 50,
|
||||
'open': False,
|
||||
|
|
@ -93,10 +92,11 @@ def generate_html():
|
|||
|
||||
def filter_cards(filter_str=''):
|
||||
if filter_str == '':
|
||||
return cards
|
||||
return gr.Dataset(samples=cards)
|
||||
|
||||
filter_upper = filter_str.upper()
|
||||
return [k for k in cards if filter_upper in k[1].upper()]
|
||||
filtered = [k for k in cards if filter_upper in k[1].upper()]
|
||||
return gr.Dataset(samples=filtered)
|
||||
|
||||
|
||||
def select_character(evt: gr.SelectData):
|
||||
|
|
|
|||
|
|
@ -7,48 +7,48 @@ from pydantic import BaseModel, Field
|
|||
|
||||
class GenerationOptions(BaseModel):
|
||||
preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.")
|
||||
min_p: float = 0
|
||||
dynamic_temperature: bool = False
|
||||
dynatemp_low: float = 1
|
||||
dynatemp_high: float = 1
|
||||
dynatemp_exponent: float = 1
|
||||
smoothing_factor: float = 0
|
||||
smoothing_curve: float = 1
|
||||
min_p: float = 0
|
||||
top_k: int = 0
|
||||
repetition_penalty: float = 1
|
||||
repetition_penalty_range: int = 1024
|
||||
typical_p: float = 1
|
||||
tfs: float = 1
|
||||
top_a: float = 0
|
||||
xtc_threshold: float = 0.1
|
||||
xtc_probability: float = 0
|
||||
epsilon_cutoff: float = 0
|
||||
eta_cutoff: float = 0
|
||||
guidance_scale: float = 1
|
||||
negative_prompt: str = ''
|
||||
tfs: float = 1
|
||||
top_a: float = 0
|
||||
dry_multiplier: float = 0
|
||||
dry_allowed_length: int = 2
|
||||
dry_base: float = 1.75
|
||||
repetition_penalty: float = 1
|
||||
encoder_repetition_penalty: float = 1
|
||||
no_repeat_ngram_size: int = 0
|
||||
repetition_penalty_range: int = 1024
|
||||
penalty_alpha: float = 0
|
||||
guidance_scale: float = 1
|
||||
mirostat_mode: int = 0
|
||||
mirostat_tau: float = 5
|
||||
mirostat_eta: float = 0.1
|
||||
temperature_last: bool = False
|
||||
do_sample: bool = True
|
||||
seed: int = -1
|
||||
encoder_repetition_penalty: float = 1
|
||||
no_repeat_ngram_size: int = 0
|
||||
dry_multiplier: float = 0
|
||||
dry_base: float = 1.75
|
||||
dry_allowed_length: int = 2
|
||||
dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"'
|
||||
xtc_threshold: float = 0.1
|
||||
xtc_probability: float = 0
|
||||
truncation_length: int = 0
|
||||
max_tokens_second: int = 0
|
||||
prompt_lookup_num_tokens: int = 0
|
||||
static_cache: bool = False
|
||||
custom_token_bans: str = ""
|
||||
sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].")
|
||||
max_tokens_second: int = 0
|
||||
do_sample: bool = True
|
||||
dynamic_temperature: bool = False
|
||||
temperature_last: bool = False
|
||||
auto_max_new_tokens: bool = False
|
||||
ban_eos_token: bool = False
|
||||
add_bos_token: bool = True
|
||||
skip_special_tokens: bool = True
|
||||
static_cache: bool = False
|
||||
truncation_length: int = 0
|
||||
seed: int = -1
|
||||
sampler_priority: List[str] | str | None = Field(default=None, description="List of samplers where the first items will appear first in the stack. Example: [\"top_k\", \"temperature\", \"top_p\"].")
|
||||
custom_token_bans: str = ""
|
||||
negative_prompt: str = ''
|
||||
dry_sequence_breakers: str = '"\\n", ":", "\\"", "*"'
|
||||
grammar_string: str = ""
|
||||
|
||||
|
||||
|
|
|
|||
47
js/global_scope_js.js
Normal file
47
js/global_scope_js.js
Normal file
|
|
@ -0,0 +1,47 @@
|
|||
function copyToClipboard(element) {
|
||||
if (!element) return;
|
||||
|
||||
const messageElement = element.closest(".message, .user-message, .assistant-message");
|
||||
if (!messageElement) return;
|
||||
|
||||
const rawText = messageElement.getAttribute("data-raw");
|
||||
if (!rawText) return;
|
||||
|
||||
navigator.clipboard.writeText(rawText).then(function() {
|
||||
const originalSvg = element.innerHTML;
|
||||
element.innerHTML = "<svg xmlns=\"http://www.w3.org/2000/svg\" width=\"20\" height=\"20\" viewBox=\"0 0 24 24\" fill=\"none\" stroke=\"currentColor\" stroke-width=\"2\" stroke-linecap=\"round\" stroke-linejoin=\"round\" class=\"text-green-500 dark:text-green-400\"><path d=\"M5 12l5 5l10 -10\"></path></svg>";
|
||||
setTimeout(() => {
|
||||
element.innerHTML = originalSvg;
|
||||
}, 1000);
|
||||
}).catch(function(err) {
|
||||
console.error("Failed to copy text: ", err);
|
||||
});
|
||||
}
|
||||
|
||||
function regenerateClick() {
|
||||
document.getElementById("Regenerate").click();
|
||||
}
|
||||
|
||||
function handleMorphdomUpdate(text) {
|
||||
console.log("Morphing!");
|
||||
morphdom(
|
||||
document.getElementById("chat").parentNode,
|
||||
"<div class=\"prose svelte-1ybaih5\">" + text + "</div>",
|
||||
{
|
||||
onBeforeElUpdated: function(fromEl, toEl) {
|
||||
if (fromEl.tagName === "PRE" && fromEl.querySelector("code[data-highlighted]")) {
|
||||
const fromCode = fromEl.querySelector("code");
|
||||
const toCode = toEl.querySelector("code");
|
||||
|
||||
if (fromCode && toCode && fromCode.textContent === toCode.textContent) {
|
||||
// If the <code> content is the same, preserve the entire <pre> element
|
||||
toEl.className = fromEl.className;
|
||||
toEl.innerHTML = fromEl.innerHTML;
|
||||
return false; // Skip updating the <pre> element
|
||||
}
|
||||
}
|
||||
return !fromEl.isEqualNode(toEl); // Update only if nodes differ
|
||||
}
|
||||
}
|
||||
);
|
||||
}
|
||||
58
js/main.js
58
js/main.js
|
|
@ -147,10 +147,9 @@ const observer = new MutationObserver(function(mutations) {
|
|||
|
||||
doSyntaxHighlighting();
|
||||
|
||||
if(!isScrolled) {
|
||||
if (!isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) {
|
||||
targetElement.scrollTop = targetElement.scrollHeight;
|
||||
}
|
||||
|
||||
});
|
||||
|
||||
// Configure the observer to watch for changes in the subtree and attributes
|
||||
|
|
@ -178,47 +177,30 @@ function isElementVisibleOnScreen(element) {
|
|||
);
|
||||
}
|
||||
|
||||
function getVisibleMessagesIndexes() {
|
||||
const elements = document.querySelectorAll(".message-body");
|
||||
const visibleIndexes = [];
|
||||
|
||||
elements.forEach((element, index) => {
|
||||
if (isElementVisibleOnScreen(element) && !element.hasAttribute("data-highlighted")) {
|
||||
visibleIndexes.push(index);
|
||||
}
|
||||
});
|
||||
|
||||
return visibleIndexes;
|
||||
}
|
||||
|
||||
function doSyntaxHighlighting() {
|
||||
const indexes = getVisibleMessagesIndexes();
|
||||
const elements = document.querySelectorAll(".message-body");
|
||||
const messageBodies = document.querySelectorAll(".message-body");
|
||||
|
||||
if (indexes.length > 0) {
|
||||
if (messageBodies.length > 0) {
|
||||
observer.disconnect();
|
||||
|
||||
indexes.forEach((index) => {
|
||||
const element = elements[index];
|
||||
messageBodies.forEach((messageBody) => {
|
||||
if (isElementVisibleOnScreen(messageBody)) {
|
||||
// Handle both code and math in a single pass through each message
|
||||
const codeBlocks = messageBody.querySelectorAll("pre code:not([data-highlighted])");
|
||||
codeBlocks.forEach((codeBlock) => {
|
||||
hljs.highlightElement(codeBlock);
|
||||
codeBlock.setAttribute("data-highlighted", "true");
|
||||
});
|
||||
|
||||
// Tag this element to prevent it from being highlighted twice
|
||||
element.setAttribute("data-highlighted", "true");
|
||||
|
||||
// Perform syntax highlighting
|
||||
const codeBlocks = element.querySelectorAll("pre code");
|
||||
|
||||
codeBlocks.forEach((codeBlock) => {
|
||||
hljs.highlightElement(codeBlock);
|
||||
});
|
||||
|
||||
renderMathInElement(element, {
|
||||
delimiters: [
|
||||
{ left: "$$", right: "$$", display: true },
|
||||
{ left: "$", right: "$", display: false },
|
||||
{ left: "\\(", right: "\\)", display: false },
|
||||
{ left: "\\[", right: "\\]", display: true },
|
||||
],
|
||||
});
|
||||
renderMathInElement(messageBody, {
|
||||
delimiters: [
|
||||
{ left: "$$", right: "$$", display: true },
|
||||
{ left: "$", right: "$", display: false },
|
||||
{ left: "\\(", right: "\\)", display: false },
|
||||
{ left: "\\[", right: "\\]", display: true },
|
||||
],
|
||||
});
|
||||
}
|
||||
});
|
||||
|
||||
observer.observe(targetElement, config);
|
||||
|
|
|
|||
1
js/morphdom/morphdom-umd.min.js
vendored
Normal file
1
js/morphdom/morphdom-umd.min.js
vendored
Normal file
File diff suppressed because one or more lines are too long
|
|
@ -3,7 +3,7 @@ import io
|
|||
|
||||
import requests
|
||||
|
||||
from modules import shared
|
||||
from modules import shared, ui
|
||||
from modules.logging_colors import logger
|
||||
|
||||
original_open = open
|
||||
|
|
@ -55,8 +55,10 @@ def my_open(*args, **kwargs):
|
|||
'\n <script src="file/js/katex/auto-render.min.js"></script>'
|
||||
'\n <script src="file/js/highlightjs/highlight.min.js"></script>'
|
||||
'\n <script src="file/js/highlightjs/highlightjs-copy.min.js"></script>'
|
||||
'\n <script src="file/js/morphdom/morphdom-umd.min.js"></script>'
|
||||
f'\n <link id="highlight-css" rel="stylesheet" href="file/css/highlightjs/{"github-dark" if shared.settings["dark_theme"] else "github"}.min.css">'
|
||||
'\n <script>hljs.addPlugin(new CopyButtonPlugin());</script>'
|
||||
f'\n <script>{ui.global_scope_js}</script>'
|
||||
'\n </head>'
|
||||
)
|
||||
|
||||
|
|
|
|||
|
|
@ -73,7 +73,6 @@ def fix_newlines(string):
|
|||
|
||||
|
||||
def replace_quotes(text):
|
||||
|
||||
# Define a list of quote pairs (opening and closing), using HTML entities
|
||||
quote_pairs = [
|
||||
('"', '"'), # Double quotes
|
||||
|
|
@ -84,14 +83,22 @@ def replace_quotes(text):
|
|||
('‘', '’'), # Alternative single quotes
|
||||
('“', '”'), # Unicode quotes (numeric entities)
|
||||
('“', '”'), # Unicode quotes (hex entities)
|
||||
('\u201C', '\u201D'), # Unicode quotes (literal chars)
|
||||
]
|
||||
|
||||
# Create a regex pattern that matches any of the quote pairs, including newlines
|
||||
pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs)
|
||||
|
||||
# Replace matched patterns with <q> tags, keeping original quotes
|
||||
replaced_text = re.sub(pattern, lambda m: f'<q>{m.group(1)}{m.group(2)}{m.group(3)}</q>', text, flags=re.DOTALL)
|
||||
def replacer(m):
|
||||
# Find the first non-None group set
|
||||
for i in range(1, len(m.groups()), 3): # Step through each sub-pattern's groups
|
||||
if m.group(i): # If this sub-pattern matched
|
||||
return f'<q>{m.group(i)}{m.group(i + 1)}{m.group(i + 2)}</q>'
|
||||
|
||||
return m.group(0) # Fallback (shouldn't happen)
|
||||
|
||||
replaced_text = re.sub(pattern, replacer, text, flags=re.DOTALL)
|
||||
return replaced_text
|
||||
|
||||
|
||||
|
|
@ -239,6 +246,9 @@ def convert_to_markdown(string):
|
|||
pattern = re.compile(r'<code[^>]*>(.*?)</code>', re.DOTALL)
|
||||
html_output = pattern.sub(lambda x: html.unescape(x.group()), html_output)
|
||||
|
||||
# Unescape backslashes
|
||||
html_output = html_output.replace('\\\\', '\\')
|
||||
|
||||
# Add "long-list" class to <ul> or <ol> containing a long <li> item
|
||||
html_output = add_long_list_class(html_output)
|
||||
|
||||
|
|
@ -292,24 +302,38 @@ def get_image_cache(path):
|
|||
return image_cache[path][1]
|
||||
|
||||
|
||||
copy_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-copy"><path d="M8 8m0 2a2 2 0 0 1 2 -2h8a2 2 0 0 1 2 2v8a2 2 0 0 1 -2 2h-8a2 2 0 0 1 -2 -2z"></path><path d="M16 8v-2a2 2 0 0 0 -2 -2h-8a2 2 0 0 0 -2 2v8a2 2 0 0 0 2 2h2"></path></svg>'''
|
||||
refresh_svg = '''<svg xmlns="http://www.w3.org/2000/svg" width="20" height="20" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2" stroke-linecap="round" stroke-linejoin="round" class="tabler-icon tabler-icon-repeat"><path d="M4 12v-3a3 3 0 0 1 3 -3h13m-3 -3l3 3l-3 3"></path><path d="M20 12v3a3 3 0 0 1 -3 3h-13m3 3l-3 -3l3 -3"></path></svg>'''
|
||||
copy_button = f'<button class="footer-button footer-copy-button" onclick="copyToClipboard(this)">{copy_svg}</button>'
|
||||
refresh_button = f'<button class="footer-button footer-refresh-button" onclick="regenerateClick()">{refresh_svg}</button>'
|
||||
|
||||
|
||||
def generate_instruct_html(history):
|
||||
output = f'<style>{instruct_css}</style><div class="chat" id="chat"><div class="messages">'
|
||||
for i, _row in enumerate(history):
|
||||
row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
|
||||
|
||||
if row[0]: # Don't display empty user messages
|
||||
for i in range(len(history['visible'])):
|
||||
row_visible = history['visible'][i]
|
||||
row_internal = history['internal'][i]
|
||||
converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||
|
||||
if converted_visible[0]: # Don't display empty user messages
|
||||
output += (
|
||||
f'<div class="user-message">'
|
||||
f'<div class="user-message" '
|
||||
f'data-raw="{html.escape(row_internal[0], quote=True)}">'
|
||||
f'<div class="text">'
|
||||
f'<div class="message-body">{row[0]}</div>'
|
||||
f'<div class="message-body">{converted_visible[0]}</div>'
|
||||
f'{copy_button}'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
||||
output += (
|
||||
f'<div class="assistant-message">'
|
||||
f'<div class="assistant-message" '
|
||||
f'data-raw="{html.escape(row_internal[1], quote=True)}">'
|
||||
f'<div class="text">'
|
||||
f'<div class="message-body">{row[1]}</div>'
|
||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
||||
f'{copy_button}'
|
||||
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
|
@ -332,26 +356,33 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
|
|||
if Path("cache/pfp_me.png").exists() else ''
|
||||
)
|
||||
|
||||
for i, _row in enumerate(history):
|
||||
row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
|
||||
for i in range(len(history['visible'])):
|
||||
row_visible = history['visible'][i]
|
||||
row_internal = history['internal'][i]
|
||||
converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||
|
||||
if row[0]: # Don't display empty user messages
|
||||
if converted_visible[0]: # Don't display empty user messages
|
||||
output += (
|
||||
f'<div class="message">'
|
||||
f'<div class="message" '
|
||||
f'data-raw="{html.escape(row_internal[0], quote=True)}">'
|
||||
f'<div class="circle-you">{img_me}</div>'
|
||||
f'<div class="text">'
|
||||
f'<div class="username">{name1}</div>'
|
||||
f'<div class="message-body">{row[0]}</div>'
|
||||
f'<div class="message-body">{converted_visible[0]}</div>'
|
||||
f'{copy_button}'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
||||
output += (
|
||||
f'<div class="message">'
|
||||
f'<div class="message" '
|
||||
f'data-raw="{html.escape(row_internal[1], quote=True)}">'
|
||||
f'<div class="circle-bot">{img_bot}</div>'
|
||||
f'<div class="text">'
|
||||
f'<div class="username">{name2}</div>'
|
||||
f'<div class="message-body">{row[1]}</div>'
|
||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
||||
f'{copy_button}'
|
||||
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
|
@ -363,22 +394,29 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
|
|||
def generate_chat_html(history, name1, name2, reset_cache=False):
|
||||
output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
|
||||
|
||||
for i, _row in enumerate(history):
|
||||
row = [convert_to_markdown_wrapped(entry, use_cache=i != len(history) - 1) for entry in _row]
|
||||
for i in range(len(history['visible'])):
|
||||
row_visible = history['visible'][i]
|
||||
row_internal = history['internal'][i]
|
||||
converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
|
||||
|
||||
if row[0]: # Don't display empty user messages
|
||||
if converted_visible[0]: # Don't display empty user messages
|
||||
output += (
|
||||
f'<div class="message">'
|
||||
f'<div class="message" '
|
||||
f'data-raw="{html.escape(row_internal[0], quote=True)}">'
|
||||
f'<div class="text-you">'
|
||||
f'<div class="message-body">{row[0]}</div>'
|
||||
f'<div class="message-body">{converted_visible[0]}</div>'
|
||||
f'{copy_button}'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
||||
output += (
|
||||
f'<div class="message">'
|
||||
f'<div class="message" '
|
||||
f'data-raw="{html.escape(row_internal[1], quote=True)}">'
|
||||
f'<div class="text-bot">'
|
||||
f'<div class="message-body">{row[1]}</div>'
|
||||
f'<div class="message-body">{converted_visible[1]}</div>'
|
||||
f'{copy_button}'
|
||||
f'{refresh_button if i == len(history["visible"]) - 1 else ""}'
|
||||
f'</div>'
|
||||
f'</div>'
|
||||
)
|
||||
|
|
@ -389,8 +427,8 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
|
|||
|
||||
def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
|
||||
if mode == 'instruct':
|
||||
return generate_instruct_html(history['visible'])
|
||||
return generate_instruct_html(history)
|
||||
elif style == 'wpp':
|
||||
return generate_chat_html(history['visible'], name1, name2)
|
||||
return generate_chat_html(history, name1, name2)
|
||||
else:
|
||||
return generate_cai_chat_html(history['visible'], name1, name2, style, character, reset_cache)
|
||||
return generate_cai_chat_html(history, name1, name2, style, character, reset_cache)
|
||||
|
|
|
|||
|
|
@ -7,102 +7,103 @@ from modules import shared
|
|||
|
||||
loaders_and_params = OrderedDict({
|
||||
'Transformers': [
|
||||
'cpu_memory',
|
||||
'gpu_memory',
|
||||
'load_in_4bit',
|
||||
'load_in_8bit',
|
||||
'torch_compile',
|
||||
'bf16',
|
||||
'cpu',
|
||||
'disk',
|
||||
'auto_devices',
|
||||
'use_double_quant',
|
||||
'quant_type',
|
||||
'compute_dtype',
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
'use_flash_attention_2',
|
||||
'use_eager_attention',
|
||||
'cpu_memory',
|
||||
'alpha_value',
|
||||
'compress_pos_emb',
|
||||
],
|
||||
'llama.cpp': [
|
||||
'n_ctx',
|
||||
'n_gpu_layers',
|
||||
'cache_type',
|
||||
'tensor_split',
|
||||
'n_batch',
|
||||
'threads',
|
||||
'threads_batch',
|
||||
'no_mmap',
|
||||
'mlock',
|
||||
'no_mul_mat_q',
|
||||
'rope_freq_base',
|
||||
'compress_pos_emb',
|
||||
'compute_dtype',
|
||||
'quant_type',
|
||||
'load_in_8bit',
|
||||
'load_in_4bit',
|
||||
'torch_compile',
|
||||
'use_flash_attention_2',
|
||||
'auto_devices',
|
||||
'cpu',
|
||||
'numa',
|
||||
'no_offload_kqv',
|
||||
'row_split',
|
||||
'tensorcores',
|
||||
'flash_attn',
|
||||
'streaming_llm',
|
||||
'attention_sink_size',
|
||||
],
|
||||
'llamacpp_HF': [
|
||||
'n_ctx',
|
||||
'n_gpu_layers',
|
||||
'cache_type',
|
||||
'tensor_split',
|
||||
'n_batch',
|
||||
'threads',
|
||||
'threads_batch',
|
||||
'no_mmap',
|
||||
'mlock',
|
||||
'no_mul_mat_q',
|
||||
'rope_freq_base',
|
||||
'compress_pos_emb',
|
||||
'cpu',
|
||||
'numa',
|
||||
'cfg_cache',
|
||||
'disk',
|
||||
'use_double_quant',
|
||||
'use_eager_attention',
|
||||
'bf16',
|
||||
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
'logits_all',
|
||||
'no_offload_kqv',
|
||||
'row_split',
|
||||
],
|
||||
'llama.cpp': [
|
||||
'n_gpu_layers',
|
||||
'threads',
|
||||
'threads_batch',
|
||||
'n_batch',
|
||||
'n_ctx',
|
||||
'cache_type',
|
||||
'tensor_split',
|
||||
'rope_freq_base',
|
||||
'compress_pos_emb',
|
||||
'attention_sink_size',
|
||||
'tensorcores',
|
||||
'flash_attn',
|
||||
'streaming_llm',
|
||||
'cpu',
|
||||
'row_split',
|
||||
'no_offload_kqv',
|
||||
'no_mul_mat_q',
|
||||
'no_mmap',
|
||||
'mlock',
|
||||
'numa',
|
||||
],
|
||||
'llamacpp_HF': [
|
||||
'n_gpu_layers',
|
||||
'threads',
|
||||
'threads_batch',
|
||||
'n_batch',
|
||||
'n_ctx',
|
||||
'cache_type',
|
||||
'tensor_split',
|
||||
'rope_freq_base',
|
||||
'compress_pos_emb',
|
||||
'attention_sink_size',
|
||||
'tensorcores',
|
||||
'flash_attn',
|
||||
'streaming_llm',
|
||||
'cpu',
|
||||
'row_split',
|
||||
'no_offload_kqv',
|
||||
'no_mul_mat_q',
|
||||
'no_mmap',
|
||||
'mlock',
|
||||
'numa',
|
||||
'cfg_cache',
|
||||
'logits_all',
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
'llamacpp_HF_info',
|
||||
],
|
||||
'ExLlamav2_HF': [
|
||||
'gpu_split',
|
||||
'max_seq_len',
|
||||
'cfg_cache',
|
||||
'cache_type',
|
||||
'gpu_split',
|
||||
'alpha_value',
|
||||
'compress_pos_emb',
|
||||
'num_experts_per_token',
|
||||
'autosplit',
|
||||
'enable_tp',
|
||||
'no_flash_attn',
|
||||
'no_xformers',
|
||||
'no_sdpa',
|
||||
'num_experts_per_token',
|
||||
'cache_type',
|
||||
'autosplit',
|
||||
'enable_tp',
|
||||
'alpha_value',
|
||||
'compress_pos_emb',
|
||||
'cfg_cache',
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
],
|
||||
'ExLlamav2': [
|
||||
'gpu_split',
|
||||
'max_seq_len',
|
||||
'cache_type',
|
||||
'gpu_split',
|
||||
'alpha_value',
|
||||
'compress_pos_emb',
|
||||
'num_experts_per_token',
|
||||
'autosplit',
|
||||
'enable_tp',
|
||||
'no_flash_attn',
|
||||
'no_xformers',
|
||||
'no_sdpa',
|
||||
'num_experts_per_token',
|
||||
'cache_type',
|
||||
'autosplit',
|
||||
'enable_tp',
|
||||
'alpha_value',
|
||||
'compress_pos_emb',
|
||||
'exllamav2_info',
|
||||
],
|
||||
'HQQ': [
|
||||
|
|
@ -121,51 +122,51 @@ loaders_and_params = OrderedDict({
|
|||
def transformers_samplers():
|
||||
return {
|
||||
'temperature',
|
||||
'temperature_last',
|
||||
'dynamic_temperature',
|
||||
'dynatemp_low',
|
||||
'dynatemp_high',
|
||||
'dynatemp_exponent',
|
||||
'smoothing_factor',
|
||||
'smoothing_curve',
|
||||
'top_p',
|
||||
'min_p',
|
||||
'top_p',
|
||||
'top_k',
|
||||
'typical_p',
|
||||
'xtc_threshold',
|
||||
'xtc_probability',
|
||||
'epsilon_cutoff',
|
||||
'eta_cutoff',
|
||||
'tfs',
|
||||
'top_a',
|
||||
'dry_multiplier',
|
||||
'dry_allowed_length',
|
||||
'dry_base',
|
||||
'repetition_penalty',
|
||||
'presence_penalty',
|
||||
'frequency_penalty',
|
||||
'repetition_penalty_range',
|
||||
'presence_penalty',
|
||||
'encoder_repetition_penalty',
|
||||
'no_repeat_ngram_size',
|
||||
'dry_multiplier',
|
||||
'dry_base',
|
||||
'dry_allowed_length',
|
||||
'dry_sequence_breakers',
|
||||
'xtc_threshold',
|
||||
'xtc_probability',
|
||||
'seed',
|
||||
'do_sample',
|
||||
'repetition_penalty_range',
|
||||
'penalty_alpha',
|
||||
'guidance_scale',
|
||||
'mirostat_mode',
|
||||
'mirostat_tau',
|
||||
'mirostat_eta',
|
||||
'grammar_file_row',
|
||||
'grammar_string',
|
||||
'guidance_scale',
|
||||
'negative_prompt',
|
||||
'prompt_lookup_num_tokens',
|
||||
'do_sample',
|
||||
'dynamic_temperature',
|
||||
'temperature_last',
|
||||
'auto_max_new_tokens',
|
||||
'ban_eos_token',
|
||||
'custom_token_bans',
|
||||
'sampler_priority',
|
||||
'add_bos_token',
|
||||
'skip_special_tokens',
|
||||
'auto_max_new_tokens',
|
||||
'prompt_lookup_num_tokens',
|
||||
'static_cache',
|
||||
'seed',
|
||||
'sampler_priority',
|
||||
'custom_token_bans',
|
||||
'negative_prompt',
|
||||
'dry_sequence_breakers',
|
||||
'grammar_string',
|
||||
'grammar_file_row',
|
||||
}
|
||||
|
||||
|
||||
|
|
@ -174,155 +175,156 @@ loaders_samplers = {
|
|||
'HQQ': transformers_samplers(),
|
||||
'ExLlamav2': {
|
||||
'temperature',
|
||||
'temperature_last',
|
||||
'smoothing_factor',
|
||||
'dynatemp_low',
|
||||
'dynatemp_high',
|
||||
'dynatemp_exponent',
|
||||
'top_p',
|
||||
'smoothing_factor',
|
||||
'min_p',
|
||||
'top_p',
|
||||
'top_k',
|
||||
'typical_p',
|
||||
'xtc_threshold',
|
||||
'xtc_probability',
|
||||
'tfs',
|
||||
'top_a',
|
||||
'dry_multiplier',
|
||||
'dry_allowed_length',
|
||||
'dry_base',
|
||||
'repetition_penalty',
|
||||
'presence_penalty',
|
||||
'frequency_penalty',
|
||||
'presence_penalty',
|
||||
'repetition_penalty_range',
|
||||
'mirostat_mode',
|
||||
'mirostat_tau',
|
||||
'mirostat_eta',
|
||||
'dry_multiplier',
|
||||
'dry_base',
|
||||
'dry_allowed_length',
|
||||
'dry_sequence_breakers',
|
||||
'xtc_threshold',
|
||||
'xtc_probability',
|
||||
'seed',
|
||||
'dynamic_temperature',
|
||||
'temperature_last',
|
||||
'auto_max_new_tokens',
|
||||
'ban_eos_token',
|
||||
'add_bos_token',
|
||||
'custom_token_bans',
|
||||
'skip_special_tokens',
|
||||
'auto_max_new_tokens',
|
||||
'seed',
|
||||
'custom_token_bans',
|
||||
'dry_sequence_breakers',
|
||||
},
|
||||
'ExLlamav2_HF': {
|
||||
'temperature',
|
||||
'temperature_last',
|
||||
'dynamic_temperature',
|
||||
'dynatemp_low',
|
||||
'dynatemp_high',
|
||||
'dynatemp_exponent',
|
||||
'smoothing_factor',
|
||||
'smoothing_curve',
|
||||
'top_p',
|
||||
'min_p',
|
||||
'top_p',
|
||||
'top_k',
|
||||
'typical_p',
|
||||
'xtc_threshold',
|
||||
'xtc_probability',
|
||||
'epsilon_cutoff',
|
||||
'eta_cutoff',
|
||||
'tfs',
|
||||
'top_a',
|
||||
'dry_multiplier',
|
||||
'dry_allowed_length',
|
||||
'dry_base',
|
||||
'repetition_penalty',
|
||||
'presence_penalty',
|
||||
'frequency_penalty',
|
||||
'repetition_penalty_range',
|
||||
'presence_penalty',
|
||||
'encoder_repetition_penalty',
|
||||
'no_repeat_ngram_size',
|
||||
'dry_multiplier',
|
||||
'dry_base',
|
||||
'dry_allowed_length',
|
||||
'dry_sequence_breakers',
|
||||
'xtc_threshold',
|
||||
'xtc_probability',
|
||||
'seed',
|
||||
'do_sample',
|
||||
'repetition_penalty_range',
|
||||
'guidance_scale',
|
||||
'mirostat_mode',
|
||||
'mirostat_tau',
|
||||
'mirostat_eta',
|
||||
'grammar_file_row',
|
||||
'grammar_string',
|
||||
'guidance_scale',
|
||||
'negative_prompt',
|
||||
'do_sample',
|
||||
'dynamic_temperature',
|
||||
'temperature_last',
|
||||
'auto_max_new_tokens',
|
||||
'ban_eos_token',
|
||||
'custom_token_bans',
|
||||
'sampler_priority',
|
||||
'add_bos_token',
|
||||
'skip_special_tokens',
|
||||
'auto_max_new_tokens',
|
||||
'seed',
|
||||
'sampler_priority',
|
||||
'custom_token_bans',
|
||||
'negative_prompt',
|
||||
'dry_sequence_breakers',
|
||||
'grammar_string',
|
||||
'grammar_file_row',
|
||||
},
|
||||
'llama.cpp': {
|
||||
'temperature',
|
||||
'top_p',
|
||||
'min_p',
|
||||
'top_p',
|
||||
'top_k',
|
||||
'typical_p',
|
||||
'tfs',
|
||||
'repetition_penalty',
|
||||
'presence_penalty',
|
||||
'frequency_penalty',
|
||||
'seed',
|
||||
'presence_penalty',
|
||||
'mirostat_mode',
|
||||
'mirostat_tau',
|
||||
'mirostat_eta',
|
||||
'grammar_file_row',
|
||||
'grammar_string',
|
||||
'ban_eos_token',
|
||||
'seed',
|
||||
'custom_token_bans',
|
||||
'grammar_string',
|
||||
'grammar_file_row',
|
||||
},
|
||||
'llamacpp_HF': {
|
||||
'temperature',
|
||||
'temperature_last',
|
||||
'dynamic_temperature',
|
||||
'dynatemp_low',
|
||||
'dynatemp_high',
|
||||
'dynatemp_exponent',
|
||||
'smoothing_factor',
|
||||
'smoothing_curve',
|
||||
'top_p',
|
||||
'min_p',
|
||||
'top_p',
|
||||
'top_k',
|
||||
'typical_p',
|
||||
'xtc_threshold',
|
||||
'xtc_probability',
|
||||
'epsilon_cutoff',
|
||||
'eta_cutoff',
|
||||
'tfs',
|
||||
'top_a',
|
||||
'dry_multiplier',
|
||||
'dry_allowed_length',
|
||||
'dry_base',
|
||||
'repetition_penalty',
|
||||
'presence_penalty',
|
||||
'frequency_penalty',
|
||||
'repetition_penalty_range',
|
||||
'presence_penalty',
|
||||
'encoder_repetition_penalty',
|
||||
'no_repeat_ngram_size',
|
||||
'dry_multiplier',
|
||||
'dry_base',
|
||||
'dry_allowed_length',
|
||||
'dry_sequence_breakers',
|
||||
'xtc_threshold',
|
||||
'xtc_probability',
|
||||
'seed',
|
||||
'do_sample',
|
||||
'repetition_penalty_range',
|
||||
'guidance_scale',
|
||||
'mirostat_mode',
|
||||
'mirostat_tau',
|
||||
'mirostat_eta',
|
||||
'grammar_file_row',
|
||||
'grammar_string',
|
||||
'guidance_scale',
|
||||
'negative_prompt',
|
||||
'do_sample',
|
||||
'dynamic_temperature',
|
||||
'temperature_last',
|
||||
'auto_max_new_tokens',
|
||||
'ban_eos_token',
|
||||
'custom_token_bans',
|
||||
'sampler_priority',
|
||||
'add_bos_token',
|
||||
'skip_special_tokens',
|
||||
'auto_max_new_tokens',
|
||||
'seed',
|
||||
'sampler_priority',
|
||||
'custom_token_bans',
|
||||
'negative_prompt',
|
||||
'dry_sequence_breakers',
|
||||
'grammar_string',
|
||||
'grammar_file_row',
|
||||
},
|
||||
'TensorRT-LLM': {
|
||||
'temperature',
|
||||
'top_p',
|
||||
'top_k',
|
||||
'repetition_penalty',
|
||||
'presence_penalty',
|
||||
'frequency_penalty',
|
||||
'ban_eos_token',
|
||||
'presence_penalty',
|
||||
'auto_max_new_tokens',
|
||||
'ban_eos_token',
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
|||
|
|
@ -13,40 +13,40 @@ from modules.logging_colors import logger
|
|||
def default_preset():
|
||||
return {
|
||||
'temperature': 1,
|
||||
'temperature_last': False,
|
||||
'dynamic_temperature': False,
|
||||
'dynatemp_low': 1,
|
||||
'dynatemp_high': 1,
|
||||
'dynatemp_exponent': 1,
|
||||
'smoothing_factor': 0,
|
||||
'smoothing_curve': 1,
|
||||
'top_p': 1,
|
||||
'min_p': 0,
|
||||
'top_p': 1,
|
||||
'top_k': 0,
|
||||
'repetition_penalty': 1,
|
||||
'presence_penalty': 0,
|
||||
'frequency_penalty': 0,
|
||||
'repetition_penalty_range': 1024,
|
||||
'typical_p': 1,
|
||||
'tfs': 1,
|
||||
'top_a': 0,
|
||||
'xtc_threshold': 0.1,
|
||||
'xtc_probability': 0,
|
||||
'epsilon_cutoff': 0,
|
||||
'eta_cutoff': 0,
|
||||
'guidance_scale': 1,
|
||||
'tfs': 1,
|
||||
'top_a': 0,
|
||||
'dry_multiplier': 0,
|
||||
'dry_allowed_length': 2,
|
||||
'dry_base': 1.75,
|
||||
'repetition_penalty': 1,
|
||||
'frequency_penalty': 0,
|
||||
'presence_penalty': 0,
|
||||
'encoder_repetition_penalty': 1,
|
||||
'no_repeat_ngram_size': 0,
|
||||
'repetition_penalty_range': 1024,
|
||||
'penalty_alpha': 0,
|
||||
'guidance_scale': 1,
|
||||
'mirostat_mode': 0,
|
||||
'mirostat_tau': 5,
|
||||
'mirostat_eta': 0.1,
|
||||
'do_sample': True,
|
||||
'encoder_repetition_penalty': 1,
|
||||
'no_repeat_ngram_size': 0,
|
||||
'dry_multiplier': 0,
|
||||
'dry_base': 1.75,
|
||||
'dry_allowed_length': 2,
|
||||
'dynamic_temperature': False,
|
||||
'temperature_last': False,
|
||||
'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram',
|
||||
'dry_sequence_breakers': '"\\n", ":", "\\"", "*"',
|
||||
'xtc_threshold': 0.1,
|
||||
'xtc_probability': 0,
|
||||
'sampler_priority': 'repetition_penalty\npresence_penalty\nfrequency_penalty\ndry\ntemperature\ndynamic_temperature\nquadratic_sampling\ntop_k\ntop_p\ntypical_p\nepsilon_cutoff\neta_cutoff\ntfs\ntop_a\nmin_p\nmirostat\nxtc\nencoder_repetition_penalty\nno_repeat_ngram'
|
||||
}
|
||||
|
||||
|
||||
|
|
|
|||
|
|
@ -29,40 +29,40 @@ need_restart = False
|
|||
|
||||
# UI defaults
|
||||
settings = {
|
||||
'dark_theme': True,
|
||||
'show_controls': True,
|
||||
'start_with': '',
|
||||
'mode': 'chat-instruct',
|
||||
'chat_style': 'cai-chat',
|
||||
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
|
||||
'prompt-default': 'QA',
|
||||
'prompt-notebook': 'QA',
|
||||
'preset': 'min_p',
|
||||
'max_new_tokens': 512,
|
||||
'max_new_tokens_min': 1,
|
||||
'max_new_tokens_max': 4096,
|
||||
'negative_prompt': '',
|
||||
'seed': -1,
|
||||
'truncation_length': 2048,
|
||||
'max_tokens_second': 0,
|
||||
'max_updates_second': 0,
|
||||
'prompt_lookup_num_tokens': 0,
|
||||
'static_cache': False,
|
||||
'custom_stopping_strings': '',
|
||||
'custom_token_bans': '',
|
||||
'auto_max_new_tokens': False,
|
||||
'ban_eos_token': False,
|
||||
'add_bos_token': True,
|
||||
'skip_special_tokens': True,
|
||||
'stream': True,
|
||||
'character': 'Assistant',
|
||||
'name1': 'You',
|
||||
'user_bio': '',
|
||||
'custom_system_message': '',
|
||||
'preset': 'min_p',
|
||||
'max_new_tokens': 512,
|
||||
'max_new_tokens_min': 1,
|
||||
'max_new_tokens_max': 4096,
|
||||
'prompt_lookup_num_tokens': 0,
|
||||
'max_tokens_second': 0,
|
||||
'max_updates_second': 0,
|
||||
'auto_max_new_tokens': True,
|
||||
'ban_eos_token': False,
|
||||
'add_bos_token': True,
|
||||
'skip_special_tokens': True,
|
||||
'stream': True,
|
||||
'static_cache': False,
|
||||
'truncation_length': 2048,
|
||||
'seed': -1,
|
||||
'custom_stopping_strings': '',
|
||||
'custom_token_bans': '',
|
||||
'negative_prompt': '',
|
||||
'autoload_model': False,
|
||||
'dark_theme': True,
|
||||
'default_extensions': [],
|
||||
'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}",
|
||||
'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}",
|
||||
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
|
||||
'autoload_model': False,
|
||||
'default_extensions': [],
|
||||
}
|
||||
|
||||
default_settings = copy.deepcopy(settings)
|
||||
|
|
@ -86,7 +86,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft
|
|||
|
||||
# Model loader
|
||||
group = parser.add_argument_group('Model loader')
|
||||
group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2.')
|
||||
group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, llamacpp_HF, ExLlamav2_HF, ExLlamav2, HQQ, TensorRT-LLM.')
|
||||
|
||||
# Transformers/Accelerate
|
||||
group = parser.add_argument_group('Transformers/Accelerate')
|
||||
|
|
@ -116,7 +116,7 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for
|
|||
# llama.cpp
|
||||
group = parser.add_argument_group('llama.cpp')
|
||||
group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.')
|
||||
group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled with tensor cores support. This may increase performance on newer cards.')
|
||||
group.add_argument('--tensorcores', action='store_true', help='NVIDIA only: use llama-cpp-python compiled without GGML_CUDA_FORCE_MMQ. This may improve performance on newer cards.')
|
||||
group.add_argument('--n_ctx', type=int, default=2048, help='Size of the prompt context.')
|
||||
group.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
|
||||
group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')
|
||||
|
|
@ -203,11 +203,6 @@ group.add_argument('--multimodal-pipeline', type=str, default=None, help='The mu
|
|||
|
||||
# Deprecated parameters
|
||||
group = parser.add_argument_group('Deprecated')
|
||||
group.add_argument('--model_type', type=str, help='DEPRECATED')
|
||||
group.add_argument('--pre_layer', type=int, nargs='+', help='DEPRECATED')
|
||||
group.add_argument('--checkpoint', type=str, help='DEPRECATED')
|
||||
group.add_argument('--monkey-patch', action='store_true', help='DEPRECATED')
|
||||
group.add_argument('--no_inject_fused_attention', action='store_true', help='DEPRECATED')
|
||||
group.add_argument('--cache_4bit', action='store_true', help='DEPRECATED')
|
||||
group.add_argument('--cache_8bit', action='store_true', help='DEPRECATED')
|
||||
group.add_argument('--chat-buttons', action='store_true', help='DEPRECATED')
|
||||
|
|
@ -228,14 +223,26 @@ for arg in sys.argv[1:]:
|
|||
if hasattr(args, arg):
|
||||
provided_arguments.append(arg)
|
||||
|
||||
deprecated_args = []
|
||||
deprecated_args = [
|
||||
'cache_4bit',
|
||||
'cache_8bit',
|
||||
'chat_buttons',
|
||||
'triton',
|
||||
'no_inject_fused_mlp',
|
||||
'no_use_cuda_fp16',
|
||||
'desc_act',
|
||||
'disable_exllama',
|
||||
'disable_exllamav2',
|
||||
'wbits',
|
||||
'groupsize'
|
||||
]
|
||||
|
||||
|
||||
def do_cmd_flags_warnings():
|
||||
|
||||
# Deprecation warnings
|
||||
for k in deprecated_args:
|
||||
if getattr(args, k):
|
||||
if k in provided_arguments:
|
||||
logger.warning(f'The --{k} flag has been deprecated and will be removed soon. Please remove that flag.')
|
||||
|
||||
# Security warnings
|
||||
|
|
|
|||
|
|
@ -287,31 +287,62 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
|
|||
clear_torch_cache()
|
||||
|
||||
generate_params = {}
|
||||
for k in ['max_new_tokens', 'temperature', 'temperature_last', 'dynamic_temperature', 'dynatemp_low', 'dynatemp_high', 'dynatemp_exponent', 'smoothing_factor', 'smoothing_curve', 'top_p', 'min_p', 'top_k', 'repetition_penalty', 'presence_penalty', 'frequency_penalty', 'repetition_penalty_range', 'typical_p', 'tfs', 'top_a', 'guidance_scale', 'penalty_alpha', 'mirostat_mode', 'mirostat_tau', 'mirostat_eta', 'do_sample', 'encoder_repetition_penalty', 'no_repeat_ngram_size', 'dry_multiplier', 'dry_base', 'dry_allowed_length', 'dry_sequence_breakers', 'xtc_threshold', 'xtc_probability']:
|
||||
for k in [
|
||||
'temperature',
|
||||
'dynatemp_low',
|
||||
'dynatemp_high',
|
||||
'dynatemp_exponent',
|
||||
'smoothing_factor',
|
||||
'smoothing_curve',
|
||||
'min_p',
|
||||
'top_p',
|
||||
'top_k',
|
||||
'typical_p',
|
||||
'xtc_threshold',
|
||||
'xtc_probability',
|
||||
'tfs',
|
||||
'top_a',
|
||||
'dry_multiplier',
|
||||
'dry_allowed_length',
|
||||
'dry_base',
|
||||
'repetition_penalty',
|
||||
'frequency_penalty',
|
||||
'presence_penalty',
|
||||
'encoder_repetition_penalty',
|
||||
'no_repeat_ngram_size',
|
||||
'repetition_penalty_range',
|
||||
'penalty_alpha',
|
||||
'guidance_scale',
|
||||
'mirostat_mode',
|
||||
'mirostat_tau',
|
||||
'mirostat_eta',
|
||||
'max_new_tokens',
|
||||
'do_sample',
|
||||
'dynamic_temperature',
|
||||
'temperature_last',
|
||||
'dry_sequence_breakers',
|
||||
]:
|
||||
if k in state:
|
||||
generate_params[k] = state[k]
|
||||
|
||||
if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0:
|
||||
generate_params['sampler_priority'] = state['sampler_priority']
|
||||
elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '':
|
||||
generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()]
|
||||
|
||||
if state['negative_prompt'] != '':
|
||||
generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])
|
||||
|
||||
if state['prompt_lookup_num_tokens'] > 0:
|
||||
generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens']
|
||||
|
||||
if state['static_cache']:
|
||||
generate_params['cache_implementation'] = 'static'
|
||||
|
||||
for k in ['epsilon_cutoff', 'eta_cutoff']:
|
||||
if state[k] > 0:
|
||||
generate_params[k] = state[k] * 1e-4
|
||||
|
||||
if state['prompt_lookup_num_tokens'] > 0:
|
||||
generate_params['prompt_lookup_num_tokens'] = state['prompt_lookup_num_tokens']
|
||||
|
||||
if state['ban_eos_token']:
|
||||
generate_params['suppress_tokens'] = [shared.tokenizer.eos_token_id]
|
||||
|
||||
if state['static_cache']:
|
||||
generate_params['cache_implementation'] = 'static'
|
||||
|
||||
if isinstance(state['sampler_priority'], list) and len(state['sampler_priority']) > 0:
|
||||
generate_params['sampler_priority'] = state['sampler_priority']
|
||||
elif isinstance(state['sampler_priority'], str) and state['sampler_priority'].strip() != '':
|
||||
generate_params['sampler_priority'] = [x.strip() for x in state['sampler_priority'].replace('\n', ',').split(',') if x.strip()]
|
||||
|
||||
if state['custom_token_bans']:
|
||||
to_ban = [int(x) for x in state['custom_token_bans'].split(',')]
|
||||
if len(to_ban) > 0:
|
||||
|
|
@ -320,6 +351,9 @@ def generate_reply_HF(question, original_question, seed, state, stopping_strings
|
|||
else:
|
||||
generate_params['suppress_tokens'] = to_ban
|
||||
|
||||
if state['negative_prompt'] != '':
|
||||
generate_params['negative_prompt_ids'] = encode(state['negative_prompt'])
|
||||
|
||||
generate_params.update({'use_cache': not shared.args.no_cache})
|
||||
if shared.args.deepspeed:
|
||||
generate_params.update({'synced_gpus': True})
|
||||
|
|
|
|||
164
modules/ui.py
164
modules/ui.py
|
|
@ -19,6 +19,8 @@ with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy
|
|||
css += f.read()
|
||||
with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f:
|
||||
js = f.read()
|
||||
with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r') as f:
|
||||
global_scope_js = f.read()
|
||||
with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f:
|
||||
save_files_js = f.read()
|
||||
with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f:
|
||||
|
|
@ -102,55 +104,55 @@ else:
|
|||
|
||||
def list_model_elements():
|
||||
elements = [
|
||||
'loader',
|
||||
'filter_by_loader',
|
||||
'loader',
|
||||
'cpu_memory',
|
||||
'auto_devices',
|
||||
'disk',
|
||||
'cpu',
|
||||
'bf16',
|
||||
'load_in_4bit',
|
||||
'load_in_8bit',
|
||||
'torch_compile',
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
'use_flash_attention_2',
|
||||
'use_eager_attention',
|
||||
'compute_dtype',
|
||||
'quant_type',
|
||||
'use_double_quant',
|
||||
'cfg_cache',
|
||||
'no_flash_attn',
|
||||
'no_xformers',
|
||||
'no_sdpa',
|
||||
'num_experts_per_token',
|
||||
'cache_type',
|
||||
'autosplit',
|
||||
'enable_tp',
|
||||
'n_gpu_layers',
|
||||
'threads',
|
||||
'threads_batch',
|
||||
'n_batch',
|
||||
'no_mmap',
|
||||
'mlock',
|
||||
'no_mul_mat_q',
|
||||
'n_gpu_layers',
|
||||
'tensor_split',
|
||||
'hqq_backend',
|
||||
'n_ctx',
|
||||
'gpu_split',
|
||||
'max_seq_len',
|
||||
'compress_pos_emb',
|
||||
'cache_type',
|
||||
'tensor_split',
|
||||
'gpu_split',
|
||||
'alpha_value',
|
||||
'rope_freq_base',
|
||||
'numa',
|
||||
'logits_all',
|
||||
'no_offload_kqv',
|
||||
'row_split',
|
||||
'tensorcores',
|
||||
'flash_attn',
|
||||
'streaming_llm',
|
||||
'compress_pos_emb',
|
||||
'compute_dtype',
|
||||
'quant_type',
|
||||
'attention_sink_size',
|
||||
'hqq_backend',
|
||||
'num_experts_per_token',
|
||||
'tensorcores',
|
||||
'load_in_8bit',
|
||||
'load_in_4bit',
|
||||
'torch_compile',
|
||||
'flash_attn',
|
||||
'use_flash_attention_2',
|
||||
'streaming_llm',
|
||||
'auto_devices',
|
||||
'cpu',
|
||||
'disk',
|
||||
'row_split',
|
||||
'no_offload_kqv',
|
||||
'no_mul_mat_q',
|
||||
'no_mmap',
|
||||
'mlock',
|
||||
'numa',
|
||||
'use_double_quant',
|
||||
'use_eager_attention',
|
||||
'bf16',
|
||||
'autosplit',
|
||||
'enable_tp',
|
||||
'no_flash_attn',
|
||||
'no_xformers',
|
||||
'no_sdpa',
|
||||
'cfg_cache',
|
||||
'cpp_runner',
|
||||
'logits_all',
|
||||
'trust_remote_code',
|
||||
'no_use_fast',
|
||||
]
|
||||
|
||||
if is_torch_xpu_available():
|
||||
|
|
@ -165,87 +167,87 @@ def list_model_elements():
|
|||
|
||||
def list_interface_input_elements():
|
||||
elements = [
|
||||
'max_new_tokens',
|
||||
'auto_max_new_tokens',
|
||||
'max_tokens_second',
|
||||
'max_updates_second',
|
||||
'prompt_lookup_num_tokens',
|
||||
'seed',
|
||||
'temperature',
|
||||
'temperature_last',
|
||||
'dynamic_temperature',
|
||||
'dynatemp_low',
|
||||
'dynatemp_high',
|
||||
'dynatemp_exponent',
|
||||
'smoothing_factor',
|
||||
'smoothing_curve',
|
||||
'top_p',
|
||||
'min_p',
|
||||
'top_p',
|
||||
'top_k',
|
||||
'typical_p',
|
||||
'epsilon_cutoff',
|
||||
'eta_cutoff',
|
||||
'repetition_penalty',
|
||||
'presence_penalty',
|
||||
'frequency_penalty',
|
||||
'repetition_penalty_range',
|
||||
'encoder_repetition_penalty',
|
||||
'no_repeat_ngram_size',
|
||||
'dry_multiplier',
|
||||
'dry_base',
|
||||
'dry_allowed_length',
|
||||
'dry_sequence_breakers',
|
||||
'xtc_threshold',
|
||||
'xtc_probability',
|
||||
'do_sample',
|
||||
'epsilon_cutoff',
|
||||
'eta_cutoff',
|
||||
'tfs',
|
||||
'top_a',
|
||||
'dry_multiplier',
|
||||
'dry_allowed_length',
|
||||
'dry_base',
|
||||
'repetition_penalty',
|
||||
'frequency_penalty',
|
||||
'presence_penalty',
|
||||
'encoder_repetition_penalty',
|
||||
'no_repeat_ngram_size',
|
||||
'repetition_penalty_range',
|
||||
'penalty_alpha',
|
||||
'guidance_scale',
|
||||
'mirostat_mode',
|
||||
'mirostat_tau',
|
||||
'mirostat_eta',
|
||||
'grammar_string',
|
||||
'negative_prompt',
|
||||
'guidance_scale',
|
||||
'add_bos_token',
|
||||
'max_new_tokens',
|
||||
'prompt_lookup_num_tokens',
|
||||
'max_tokens_second',
|
||||
'max_updates_second',
|
||||
'do_sample',
|
||||
'dynamic_temperature',
|
||||
'temperature_last',
|
||||
'auto_max_new_tokens',
|
||||
'ban_eos_token',
|
||||
'custom_token_bans',
|
||||
'sampler_priority',
|
||||
'truncation_length',
|
||||
'custom_stopping_strings',
|
||||
'add_bos_token',
|
||||
'skip_special_tokens',
|
||||
'stream',
|
||||
'static_cache',
|
||||
'tfs',
|
||||
'top_a',
|
||||
'truncation_length',
|
||||
'seed',
|
||||
'sampler_priority',
|
||||
'custom_stopping_strings',
|
||||
'custom_token_bans',
|
||||
'negative_prompt',
|
||||
'dry_sequence_breakers',
|
||||
'grammar_string',
|
||||
]
|
||||
|
||||
# Chat elements
|
||||
elements += [
|
||||
'textbox',
|
||||
'start_with',
|
||||
'character_menu',
|
||||
'history',
|
||||
'search_chat',
|
||||
'unique_id',
|
||||
'textbox',
|
||||
'start_with',
|
||||
'mode',
|
||||
'chat_style',
|
||||
'chat-instruct_command',
|
||||
'character_menu',
|
||||
'name2',
|
||||
'context',
|
||||
'greeting',
|
||||
'name1',
|
||||
'user_bio',
|
||||
'name2',
|
||||
'greeting',
|
||||
'context',
|
||||
'mode',
|
||||
'custom_system_message',
|
||||
'instruction_template_str',
|
||||
'chat_template_str',
|
||||
'chat_style',
|
||||
'chat-instruct_command',
|
||||
]
|
||||
|
||||
# Notebook/default elements
|
||||
elements += [
|
||||
'textbox-notebook',
|
||||
'textbox-default',
|
||||
'output_textbox',
|
||||
'textbox-notebook',
|
||||
'prompt_menu-default',
|
||||
'prompt_menu-notebook',
|
||||
'output_textbox',
|
||||
]
|
||||
|
||||
# Model elements
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ def create_ui():
|
|||
shared.gradio['Chat input'] = gr.State()
|
||||
shared.gradio['history'] = gr.JSON(visible=False)
|
||||
|
||||
with gr.Tab('Chat', elem_id='chat-tab'):
|
||||
with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
|
||||
with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
|
||||
with gr.Column():
|
||||
with gr.Row(elem_id='past-chats-buttons'):
|
||||
|
|
@ -46,8 +46,8 @@ def create_ui():
|
|||
|
||||
with gr.Row():
|
||||
with gr.Column(elem_id='chat-col'):
|
||||
shared.gradio['display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', ''))
|
||||
|
||||
shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', ''), visible=True)
|
||||
shared.gradio['display'] = gr.Textbox(value="", visible=False) # Hidden buffer
|
||||
with gr.Row(elem_id="chat-input-row"):
|
||||
with gr.Column(scale=1, elem_id='gr-hover-container'):
|
||||
gr.HTML(value='<div class="hover-element" onclick="void(0)"><span style="width: 100px; display: block" id="hover-element-button">☰</span><div class="hover-menu" id="hover-menu"></div>', elem_id='gr-hover')
|
||||
|
|
@ -164,7 +164,7 @@ def create_chat_settings_ui():
|
|||
with gr.Row():
|
||||
with gr.Column():
|
||||
shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])
|
||||
shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='Change this according to the model/LoRA that you are using. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
|
||||
shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
|
||||
with gr.Row():
|
||||
shared.gradio['send_instruction_to_default'] = gr.Button('Send to default', elem_classes=['small-button'])
|
||||
shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])
|
||||
|
|
@ -180,6 +180,9 @@ def create_event_handlers():
|
|||
shared.input_params = gradio(inputs)
|
||||
shared.reload_inputs = gradio(reload_arr)
|
||||
|
||||
# Morph HTML updates instead of updating everything
|
||||
shared.gradio['display'].change(None, gradio('display'), None, js="(text) => handleMorphdomUpdate(text)")
|
||||
|
||||
shared.gradio['Generate'].click(
|
||||
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
|
||||
lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.47.*
|
||||
transformers==4.48.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+
|
|||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.47.*
|
||||
transformers==4.48.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.47.*
|
||||
transformers==4.48.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.47.*
|
||||
transformers==4.48.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.47.*
|
||||
transformers==4.48.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.47.*
|
||||
transformers==4.48.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.47.*
|
||||
transformers==4.48.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
|
|||
|
|
@ -21,7 +21,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.47.*
|
||||
transformers==4.48.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
@ -55,7 +55,7 @@ https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+
|
|||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7+cu121.torch2.4.1-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/exllamav2/releases/download/v0.2.7/exllamav2-0.2.7-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.2.post1/flash_attn-2.7.2.post1+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
|
||||
https://github.com/oobabooga/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu122torch2.4.1cxx11abiFALSE-cp310-cp310-win_amd64.whl; platform_system == "Windows" and python_version == "3.10"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
|
||||
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.3/flash_attn-2.7.3+cu12torch2.4cxx11abiFALSE-cp310-cp310-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.10"
|
||||
|
|
|
|||
|
|
@ -20,7 +20,7 @@ safetensors==0.5.*
|
|||
scipy
|
||||
sentencepiece
|
||||
tensorboard
|
||||
transformers==4.47.*
|
||||
transformers==4.48.*
|
||||
tqdm
|
||||
wandb
|
||||
|
||||
|
|
|
|||
|
|
@ -1,31 +1,38 @@
|
|||
dark_theme: true
|
||||
show_controls: true
|
||||
start_with: ''
|
||||
mode: chat-instruct
|
||||
chat_style: cai-chat
|
||||
chat-instruct_command: |-
|
||||
Continue the chat dialogue below. Write a single reply for the character "<|character|>".
|
||||
|
||||
<|prompt|>
|
||||
prompt-default: QA
|
||||
prompt-notebook: QA
|
||||
character: Assistant
|
||||
name1: You
|
||||
user_bio: ''
|
||||
custom_system_message: ''
|
||||
preset: min_p
|
||||
max_new_tokens: 512
|
||||
max_new_tokens_min: 1
|
||||
max_new_tokens_max: 4096
|
||||
negative_prompt: ''
|
||||
seed: -1
|
||||
truncation_length: 2048
|
||||
prompt_lookup_num_tokens: 0
|
||||
max_tokens_second: 0
|
||||
max_updates_second: 0
|
||||
prompt_lookup_num_tokens: 0
|
||||
custom_stopping_strings: ''
|
||||
custom_token_bans: ''
|
||||
auto_max_new_tokens: false
|
||||
auto_max_new_tokens: true
|
||||
ban_eos_token: false
|
||||
add_bos_token: true
|
||||
skip_special_tokens: true
|
||||
stream: true
|
||||
static_cache: false
|
||||
character: Assistant
|
||||
name1: You
|
||||
custom_system_message: ''
|
||||
truncation_length: 2048
|
||||
seed: -1
|
||||
custom_stopping_strings: ''
|
||||
custom_token_bans: ''
|
||||
negative_prompt: ''
|
||||
autoload_model: false
|
||||
dark_theme: true
|
||||
default_extensions: []
|
||||
instruction_template_str: |-
|
||||
{%- set ns = namespace(found=false) -%}
|
||||
{%- for message in messages -%}
|
||||
|
|
@ -67,11 +74,4 @@ chat_template_str: |-
|
|||
{%- endif -%}
|
||||
{%- endif -%}
|
||||
{%- endfor -%}
|
||||
chat-instruct_command: |-
|
||||
Continue the chat dialogue below. Write a single reply for the character "<|character|>".
|
||||
|
||||
<|prompt|>
|
||||
autoload_model: false
|
||||
gallery-items_per_page: 50
|
||||
gallery-open: false
|
||||
default_extensions: []
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ esac
|
|||
INSTALL_DIR="$(pwd)/installer_files"
|
||||
CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
|
||||
INSTALL_ENV_DIR="$(pwd)/installer_files/env"
|
||||
MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Linux-${OS_ARCH}.sh"
|
||||
MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Linux-${OS_ARCH}.sh"
|
||||
conda_exists="F"
|
||||
|
||||
# figure out whether git and conda needs to be installed
|
||||
|
|
|
|||
|
|
@ -19,7 +19,7 @@ esac
|
|||
INSTALL_DIR="$(pwd)/installer_files"
|
||||
CONDA_ROOT_PREFIX="$(pwd)/installer_files/conda"
|
||||
INSTALL_ENV_DIR="$(pwd)/installer_files/env"
|
||||
MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-MacOSX-${OS_ARCH}.sh"
|
||||
MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-MacOSX-${OS_ARCH}.sh"
|
||||
conda_exists="F"
|
||||
|
||||
# figure out whether git and conda needs to be installed
|
||||
|
|
|
|||
|
|
@ -25,8 +25,8 @@ set TEMP=%cd%\installer_files
|
|||
set INSTALL_DIR=%cd%\installer_files
|
||||
set CONDA_ROOT_PREFIX=%cd%\installer_files\conda
|
||||
set INSTALL_ENV_DIR=%cd%\installer_files\env
|
||||
set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Windows-x86_64.exe
|
||||
set MINICONDA_CHECKSUM=307194e1f12bbeb52b083634e89cc67db4f7980bd542254b43d3309eaf7cb358
|
||||
set MINICONDA_DOWNLOAD_URL=https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Windows-x86_64.exe
|
||||
set MINICONDA_CHECKSUM=43dcbcc315ff91edf959e002cd2f1ede38c64b999fefcc951bccf2ed69c9e8bb
|
||||
set conda_exists=F
|
||||
|
||||
@rem figure out whether git and conda needs to be installed
|
||||
|
|
@ -41,10 +41,18 @@ if "%conda_exists%" == "F" (
|
|||
mkdir "%INSTALL_DIR%"
|
||||
call curl -Lk "%MINICONDA_DOWNLOAD_URL%" > "%INSTALL_DIR%\miniconda_installer.exe" || ( echo. && echo Miniconda failed to download. && goto end )
|
||||
|
||||
:: Try CertUtil first
|
||||
for /f %%a in ('CertUtil -hashfile "%INSTALL_DIR%\miniconda_installer.exe" SHA256 ^| find /i /v " " ^| find /i "%MINICONDA_CHECKSUM%"') do (
|
||||
set "output=%%a"
|
||||
)
|
||||
|
||||
:: If CertUtil fails, try PowerShell
|
||||
if not defined output (
|
||||
for /f %%a in ('powershell -Command "if((Get-FileHash \"%INSTALL_DIR%\miniconda_installer.exe\" -Algorithm SHA256).Hash -eq ''%MINICONDA_CHECKSUM%''){echo true}"') do (
|
||||
set "output=%%a"
|
||||
)
|
||||
)
|
||||
|
||||
if not defined output (
|
||||
echo The checksum verification for miniconda_installer.exe has failed.
|
||||
del "%INSTALL_DIR%\miniconda_installer.exe"
|
||||
|
|
|
|||
|
|
@ -23,4 +23,4 @@ source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains a
|
|||
conda activate "$INSTALL_ENV_DIR"
|
||||
|
||||
# update installer env
|
||||
python one_click.py --update-wizard && echo -e "\nDone!"
|
||||
python one_click.py --update-wizard && echo -e "\nHave a great day!"
|
||||
|
|
|
|||
|
|
@ -23,4 +23,4 @@ source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains a
|
|||
conda activate "$INSTALL_ENV_DIR"
|
||||
|
||||
# update installer env
|
||||
python one_click.py --update-wizard && echo -e "\nDone!"
|
||||
python one_click.py --update-wizard && echo -e "\nHave a great day!"
|
||||
|
|
|
|||
|
|
@ -30,7 +30,7 @@ call "%CONDA_ROOT_PREFIX%\condabin\conda.bat" activate "%INSTALL_ENV_DIR%" || (
|
|||
@rem update installer env
|
||||
call python one_click.py --update-wizard && (
|
||||
echo.
|
||||
echo Done!
|
||||
echo Have a great day!
|
||||
)
|
||||
|
||||
:end
|
||||
|
|
|
|||
2
wsl.sh
2
wsl.sh
|
|
@ -26,7 +26,7 @@ fi
|
|||
INSTALL_DIR="$INSTALL_DIR_PREFIX/text-generation-webui"
|
||||
CONDA_ROOT_PREFIX="$INSTALL_DIR/installer_files/conda"
|
||||
INSTALL_ENV_DIR="$INSTALL_DIR/installer_files/env"
|
||||
MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py310_23.3.1-0-Linux-x86_64.sh"
|
||||
MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Linux-x86_64.sh"
|
||||
conda_exists="F"
|
||||
|
||||
# environment isolation
|
||||
|
|
|
|||
Loading…
Reference in a new issue