Merge pull request #7057 from oobabooga/dev

Merge dev branch
This commit is contained in:
oobabooga 2025-06-10 23:08:44 -03:00 committed by GitHub
commit 1e96dcf369
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
55 changed files with 1621 additions and 906 deletions

View file

@ -160,16 +160,19 @@ jobs:
rm requirements_cuda_temp.txt
fi
# 6. Create ZIP file
# 6. Move up and rename folder to include version
cd ..
VERSION_CLEAN="${VERSION#v}"
mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
# 7. Create ZIP file
ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-cuda${CUDA_VERSION}.zip"
echo "Creating archive: $ZIP_NAME"
if [[ "$RUNNER_OS" == "Windows" ]]; then
powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
else
zip -r "$ZIP_NAME" text-generation-webui
zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
fi
- name: Upload files to a GitHub release

View file

@ -146,16 +146,19 @@ jobs:
echo "Installing Python packages from $REQ_FILE..."
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
# 6. Create ZIP file
# 5. Move up and rename folder to include version
cd ..
VERSION_CLEAN="${VERSION#v}"
mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
# 6. Create ZIP file
ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}-vulkan.zip"
echo "Creating archive: $ZIP_NAME"
if [[ "$RUNNER_OS" == "Windows" ]]; then
powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
else
zip -r "$ZIP_NAME" text-generation-webui
zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
fi
- name: Upload files to a GitHub release

View file

@ -170,16 +170,19 @@ jobs:
echo "Installing Python packages from $REQ_FILE..."
$PIP_PATH install --target="./$PACKAGES_PATH" -r "$REQ_FILE"
# 5. Create ZIP file
# 5. Move up and rename folder to include version
cd ..
VERSION_CLEAN="${VERSION#v}"
mv text-generation-webui text-generation-webui-${VERSION_CLEAN}
# 6. Create ZIP file
ZIP_NAME="textgen-portable-${VERSION_CLEAN}-${PLATFORM}.zip"
echo "Creating archive: $ZIP_NAME"
if [[ "$RUNNER_OS" == "Windows" ]]; then
powershell -Command "Compress-Archive -Path text-generation-webui -DestinationPath $ZIP_NAME"
powershell -Command "Compress-Archive -Path text-generation-webui-${VERSION_CLEAN} -DestinationPath $ZIP_NAME"
else
zip -r "$ZIP_NAME" text-generation-webui
zip -r "$ZIP_NAME" text-generation-webui-${VERSION_CLEAN}
fi
- name: Upload files to a GitHub release

View file

@ -16,7 +16,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github.
- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory.
- 100% offline and private, with zero telemetry, external resources, or remote update requests.
- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats.
- **File attachments**: Upload text files and PDF documents to talk about their contents.
- **File attachments**: Upload text files, PDF documents, and .docx documents to talk about their contents.
- **Web search**: Optionally search the internet with LLM-generated queries to add context to the conversation.
- Aesthetic UI with dark and light themes.
- `instruct` mode for instruction-following (like ChatGPT), and `chat-instruct`/`chat` modes for talking to custom characters.

View file

@ -17,6 +17,14 @@
color: #d1d5db !important;
}
.chat .message-body :is(th, td) {
border-color: #40404096 !important;
}
.dark .chat .message-body :is(th, td) {
border-color: #ffffff75 !important;
}
.chat .message-body :is(p, ul, ol) {
margin: 1.25em 0 !important;
}

View file

@ -1,11 +1,11 @@
:root {
--darker-gray: #202123;
--dark-gray: #2A2B32;
--light-gray: #373943;
--darker-gray: #1C1C1D;
--dark-gray: #212125;
--light-gray: #2C2E34;
--light-theme-gray: #f9fbff;
--border-color-dark: #525252;
--header-width: 112px;
--selected-item-color-dark: #2E2F38;
--selected-item-color-dark: #282930;
}
@font-face {
@ -53,7 +53,7 @@ div.svelte-iyf88w {
}
.refresh-button {
max-width: 4.4em;
max-width: none;
min-width: 2.2em !important;
height: 39.594px;
align-self: end;
@ -62,6 +62,10 @@ div.svelte-iyf88w {
flex: none;
}
.refresh-button-medium {
max-width: 4.4em;
}
.refresh-button-small {
max-width: 2.2em;
}
@ -265,7 +269,7 @@ button {
.dark .pretty_scrollbar::-webkit-scrollbar-thumb,
.dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover {
background: rgb(255 255 255 / 10%);
background: rgb(255 255 255 / 6.25%);
border-radius: 10px;
}
@ -582,7 +586,6 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
#chat-input {
padding: 0;
padding-top: 18px;
background: transparent;
border: none;
}
@ -661,37 +664,12 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
}
}
#show-controls {
position: absolute;
background-color: transparent;
border: 0 !important;
border-radius: 0;
}
#show-controls label {
z-index: 1000;
position: absolute;
right: 30px;
top: 10px;
white-space: nowrap;
overflow: hidden;
text-overflow: ellipsis;
}
.dark #show-controls span {
color: var(--neutral-400);
}
#show-controls span {
color: var(--neutral-600);
}
#typing-container {
display: none;
position: absolute;
background-color: transparent;
left: -2px;
top: 4px;
left: 23px;
top: -5px;
padding: var(--block-padding);
}
@ -767,16 +745,13 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
justify-content: space-between;
margin: 0 !important;
height: 36px;
border-color: transparent !important;
}
.hover-menu button:not(#clear-history-confirm) {
border-bottom: 0 !important;
}
.hover-menu button:not(#clear-history-confirm):last-child {
border-bottom: var(--button-border-width) solid var(--border-color-primary) !important;
}
.hover-menu button:hover {
background: #dbeafe !important;
}
@ -785,6 +760,37 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
background: var(--selected-item-color-dark) !important;
}
#show-controls {
background-color: white;
border-color: transparent !important;
height: 36px;
border-radius: 0;
border-bottom: 0 !important;
padding-top: 3px;
padding-left: 4px;
display: flex;
font-weight: normal;
}
.dark #show-controls {
background-color: var(--darker-gray);
}
#show-controls label {
display: flex;
flex-direction: row-reverse;
justify-content: start;
width: 100%;
padding-right: 12px;
gap: 10px;
font-weight: 600;
color: var(--button-secondary-text-color);
}
#show-controls label input {
margin-top: 4px;
}
.transparent-substring {
opacity: 0.333;
}
@ -1326,8 +1332,13 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* {
overflow: hidden;
}
.thinking-content:focus, .thinking-header:focus {
outline: 0 !important;
}
.dark .thinking-block {
background-color: var(--darker-gray);
background-color: transparent;
border: 1px solid var(--input-border-color);
}
.thinking-header {
@ -1555,3 +1566,66 @@ strong {
button:focus {
outline: none;
}
/* Fix extra gaps for hidden elements on the right sidebar */
.svelte-sa48pu.stretch:has(> .hidden:only-child) {
display: none;
}
.delete-container {
position: absolute;
right: 8px;
display: flex;
gap: 6px;
opacity: 0;
transition: opacity 0.2s;
margin-left: 0;
}
.chat-label-with-delete {
position: relative;
padding-right: 60px;
}
.trash-btn {
border: none;
background: none;
cursor: pointer;
padding: 2px;
opacity: 0.7;
}
.cancel-btn {
border: none;
background: #ef4444;
color: white;
cursor: pointer;
width: 20px;
height: 20px;
border-radius: 2px;
font-family: monospace;
font-size: 12px;
align-items: center;
justify-content: center;
display: none;
}
.confirm-btn {
border: none;
background: #22c55e;
color: white;
cursor: pointer;
width: 20px;
height: 20px;
border-radius: 2px;
font-family: monospace;
font-size: 12px;
align-items: center;
justify-content: center;
display: none;
}
/* Disable hover effects while scrolling */
.chat-parent.scrolling * {
pointer-events: none !important;
}

View file

@ -32,6 +32,7 @@ class ModelDownloader:
self.max_retries = max_retries
self.session = self.get_session()
self._progress_bar_slots = None
self.progress_queue = None
def get_session(self):
session = requests.Session()
@ -218,33 +219,45 @@ class ModelDownloader:
max_retries = self.max_retries
attempt = 0
file_downloaded_count_for_progress = 0
try:
while attempt < max_retries:
attempt += 1
session = self.session
headers = {}
mode = 'wb'
current_file_size_on_disk = 0
try:
if output_path.exists() and not start_from_scratch:
# Resume download
r = session.get(url, stream=True, timeout=20)
total_size = int(r.headers.get('content-length', 0))
if output_path.stat().st_size >= total_size:
current_file_size_on_disk = output_path.stat().st_size
r_head = session.head(url, timeout=20)
r_head.raise_for_status()
total_size = int(r_head.headers.get('content-length', 0))
if current_file_size_on_disk >= total_size and total_size > 0:
if self.progress_queue is not None and total_size > 0:
self.progress_queue.put((1.0, str(filename)))
return
headers = {'Range': f'bytes={output_path.stat().st_size}-'}
headers = {'Range': f'bytes={current_file_size_on_disk}-'}
mode = 'ab'
with session.get(url, stream=True, headers=headers, timeout=30) as r:
r.raise_for_status() # If status is not 2xx, raise an error
total_size = int(r.headers.get('content-length', 0))
block_size = 1024 * 1024 # 1MB
r.raise_for_status()
total_size_from_stream = int(r.headers.get('content-length', 0))
if mode == 'ab':
effective_total_size = current_file_size_on_disk + total_size_from_stream
else:
effective_total_size = total_size_from_stream
filename_str = str(filename) # Convert PosixPath to string if necessary
block_size = 1024 * 1024
filename_str = str(filename)
tqdm_kwargs = {
'total': total_size,
'total': effective_total_size,
'initial': current_file_size_on_disk if mode == 'ab' else 0,
'unit': 'B',
'unit_scale': True,
'unit_divisor': 1024,
@ -261,16 +274,20 @@ class ModelDownloader:
})
with open(output_path, mode) as f:
if mode == 'ab':
f.seek(current_file_size_on_disk)
with tqdm.tqdm(**tqdm_kwargs) as t:
count = 0
file_downloaded_count_for_progress = current_file_size_on_disk
for data in r.iter_content(block_size):
f.write(data)
t.update(len(data))
if total_size != 0 and self.progress_bar is not None:
count += len(data)
self.progress_bar(float(count) / float(total_size), f"{filename_str}")
if effective_total_size != 0 and self.progress_queue is not None:
file_downloaded_count_for_progress += len(data)
progress_fraction = float(file_downloaded_count_for_progress) / float(effective_total_size)
self.progress_queue.put((progress_fraction, filename_str))
break
break # Exit loop if successful
except (RequestException, ConnectionError, Timeout) as e:
print(f"Error downloading {filename}: {e}.")
print(f"That was attempt {attempt}/{max_retries}.", end=' ')
@ -295,10 +312,9 @@ class ModelDownloader:
finally:
print(f"\nDownload of {len(file_list)} files to {output_folder} completed.")
def download_model_files(self, model, branch, links, sha256, output_folder, progress_bar=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
self.progress_bar = progress_bar
def download_model_files(self, model, branch, links, sha256, output_folder, progress_queue=None, start_from_scratch=False, threads=4, specific_file=None, is_llamacpp=False):
self.progress_queue = progress_queue
# Create the folder and writing the metadata
output_folder.mkdir(parents=True, exist_ok=True)
if not is_llamacpp:

View file

@ -6,4 +6,15 @@ function toggleDarkMode() {
} else {
currentCSS.setAttribute("href", "file/css/highlightjs/github-dark.min.css");
}
// Re-highlight all code blocks once stylesheet loads
currentCSS.onload = function() {
const messageBodies = document.getElementById("chat").querySelectorAll(".message-body");
messageBodies.forEach((messageBody) => {
const codeBlocks = messageBody.querySelectorAll("pre code");
codeBlocks.forEach((codeBlock) => {
hljs.highlightElement(codeBlock);
});
});
};
}

View file

@ -95,6 +95,21 @@ function startEditing(messageElement, messageBody, isUserMessage) {
editingInterface.textarea.focus();
editingInterface.textarea.setSelectionRange(rawText.length, rawText.length);
// Temporarily mark as scrolled to prevent auto-scroll
const wasScrolled = window.isScrolled;
window.isScrolled = true;
// Scroll the textarea into view
editingInterface.textarea.scrollIntoView({
behavior: "smooth",
block: "center"
});
// Restore the original scroll state after animation
setTimeout(() => {
window.isScrolled = wasScrolled;
}, 500);
// Setup event handlers
setupEditingHandlers(editingInterface.textarea, messageElement, originalHTML, messageBody, isUserMessage);
}
@ -229,10 +244,23 @@ function removeLastClick() {
document.getElementById("Remove-last").click();
}
function handleMorphdomUpdate(text) {
function handleMorphdomUpdate(data) {
// Determine target element and use it as query scope
var target_element, target_html;
if (data.last_message_only) {
const childNodes = document.getElementsByClassName("messages")[0].childNodes;
target_element = childNodes[childNodes.length - 1];
target_html = data.html;
} else {
target_element = document.getElementById("chat").parentNode;
target_html = "<div class=\"prose svelte-1ybaih5\">" + data.html + "</div>";
}
const queryScope = target_element;
// Track open blocks
const openBlocks = new Set();
document.querySelectorAll(".thinking-block").forEach(block => {
queryScope.querySelectorAll(".thinking-block").forEach(block => {
const blockId = block.getAttribute("data-block-id");
// If block exists and is open, add to open set
if (blockId && block.hasAttribute("open")) {
@ -242,7 +270,7 @@ function handleMorphdomUpdate(text) {
// Store scroll positions for any open blocks
const scrollPositions = {};
document.querySelectorAll(".thinking-block[open]").forEach(block => {
queryScope.querySelectorAll(".thinking-block[open]").forEach(block => {
const content = block.querySelector(".thinking-content");
const blockId = block.getAttribute("data-block-id");
if (content && blockId) {
@ -255,8 +283,8 @@ function handleMorphdomUpdate(text) {
});
morphdom(
document.getElementById("chat").parentNode,
"<div class=\"prose svelte-1ybaih5\">" + text + "</div>",
target_element,
target_html,
{
onBeforeElUpdated: function(fromEl, toEl) {
// Preserve code highlighting
@ -307,7 +335,7 @@ function handleMorphdomUpdate(text) {
);
// Add toggle listeners for new blocks
document.querySelectorAll(".thinking-block").forEach(block => {
queryScope.querySelectorAll(".thinking-block").forEach(block => {
if (!block._hasToggleListener) {
block.addEventListener("toggle", function(e) {
if (this.open) {

View file

@ -145,17 +145,26 @@ typingSibling.insertBefore(typing, typingSibling.childNodes[2]);
const targetElement = document.getElementById("chat").parentNode.parentNode.parentNode;
targetElement.classList.add("pretty_scrollbar");
targetElement.classList.add("chat-parent");
let isScrolled = false;
window.isScrolled = false;
let scrollTimeout;
targetElement.addEventListener("scroll", function() {
// Add scrolling class to disable hover effects
targetElement.classList.add("scrolling");
let diff = targetElement.scrollHeight - targetElement.clientHeight;
if(Math.abs(targetElement.scrollTop - diff) <= 10 || diff == 0) {
isScrolled = false;
window.isScrolled = false;
} else {
isScrolled = true;
window.isScrolled = true;
}
doSyntaxHighlighting();
// Clear previous timeout and set new one
clearTimeout(scrollTimeout);
scrollTimeout = setTimeout(() => {
targetElement.classList.remove("scrolling");
doSyntaxHighlighting(); // Only run after scrolling stops
}, 150);
});
@ -173,7 +182,7 @@ const observer = new MutationObserver(function(mutations) {
doSyntaxHighlighting();
if (!isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) {
if (!window.isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) {
targetElement.scrollTop = targetElement.scrollHeight;
}
@ -184,7 +193,7 @@ const observer = new MutationObserver(function(mutations) {
const prevSibling = lastChild?.previousElementSibling;
if (lastChild && prevSibling) {
lastChild.style.setProperty("margin-bottom",
`max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px) - ${lastChild.offsetHeight}px))`,
`max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 84px) - ${lastChild.offsetHeight}px))`,
"important"
);
}
@ -217,7 +226,7 @@ function isElementVisibleOnScreen(element) {
}
function doSyntaxHighlighting() {
const messageBodies = document.querySelectorAll(".message-body");
const messageBodies = document.getElementById("chat").querySelectorAll(".message-body");
if (messageBodies.length > 0) {
observer.disconnect();
@ -229,6 +238,7 @@ function doSyntaxHighlighting() {
codeBlocks.forEach((codeBlock) => {
hljs.highlightElement(codeBlock);
codeBlock.setAttribute("data-highlighted", "true");
codeBlock.classList.add("pretty_scrollbar");
});
renderMathInElement(messageBody, {
@ -277,7 +287,7 @@ for (i = 0; i < slimDropdownElements.length; i++) {
// The show/hide events were adapted from:
// https://github.com/SillyTavern/SillyTavern/blob/6c8bd06308c69d51e2eb174541792a870a83d2d6/public/script.js
//------------------------------------------------
var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button");
var buttonsInChat = document.querySelectorAll("#chat-tab #chat-buttons button, #chat-tab #chat-buttons #show-controls");
var button = document.getElementById("hover-element-button");
var menu = document.getElementById("hover-menu");
var istouchscreen = (navigator.maxTouchPoints > 0) || "ontouchstart" in document.documentElement;
@ -298,18 +308,21 @@ if (buttonsInChat.length > 0) {
const thisButton = buttonsInChat[i];
menu.appendChild(thisButton);
thisButton.addEventListener("click", () => {
hideMenu();
});
// Only apply transformations to button elements
if (thisButton.tagName.toLowerCase() === "button") {
thisButton.addEventListener("click", () => {
hideMenu();
});
const buttonText = thisButton.textContent;
const matches = buttonText.match(/(\(.*?\))/);
const buttonText = thisButton.textContent;
const matches = buttonText.match(/(\(.*?\))/);
if (matches && matches.length > 1) {
// Apply the transparent-substring class to the matched substring
const substring = matches[1];
const newText = buttonText.replace(substring, `&nbsp;<span class="transparent-substring">${substring.slice(1, -1)}</span>`);
thisButton.innerHTML = newText;
if (matches && matches.length > 1) {
// Apply the transparent-substring class to the matched substring
const substring = matches[1];
const newText = buttonText.replace(substring, `&nbsp;<span class="transparent-substring">${substring.slice(1, -1)}</span>`);
thisButton.innerHTML = newText;
}
}
}
}
@ -382,21 +395,10 @@ document.addEventListener("click", function (event) {
}
});
//------------------------------------------------
// Relocate the "Show controls" checkbox
//------------------------------------------------
var elementToMove = document.getElementById("show-controls");
var parent = elementToMove.parentNode;
for (var i = 0; i < 2; i++) {
parent = parent.parentNode;
}
parent.insertBefore(elementToMove, parent.firstChild);
//------------------------------------------------
// Position the chat input
//------------------------------------------------
document.getElementById("show-controls").parentNode.classList.add("chat-input-positioned");
document.getElementById("chat-input-row").classList.add("chat-input-positioned");
//------------------------------------------------
// Focus on the chat input
@ -562,6 +564,7 @@ function moveToChatTab() {
newParent.insertBefore(grandParent, newParent.children[newPosition]);
document.getElementById("save-character").style.display = "none";
document.getElementById("restore-character").style.display = "none";
}
function restoreOriginalPosition() {
@ -573,6 +576,7 @@ function restoreOriginalPosition() {
}
document.getElementById("save-character").style.display = "";
document.getElementById("restore-character").style.display = "";
movedElement.style.display = "";
movedElement.children[0].style.minWidth = "";
}
@ -872,3 +876,123 @@ function navigateLastAssistantMessage(direction) {
return false;
}
//------------------------------------------------
// Paste Handler for Long Text
//------------------------------------------------
const MAX_PLAIN_TEXT_LENGTH = 2500;
function setupPasteHandler() {
const textbox = document.querySelector("#chat-input textarea[data-testid=\"textbox\"]");
const fileInput = document.querySelector("#chat-input input[data-testid=\"file-upload\"]");
if (!textbox || !fileInput) {
setTimeout(setupPasteHandler, 500);
return;
}
textbox.addEventListener("paste", async (event) => {
const text = event.clipboardData?.getData("text");
if (text && text.length > MAX_PLAIN_TEXT_LENGTH && document.querySelector("#paste_to_attachment input[data-testid=\"checkbox\"]")?.checked) {
event.preventDefault();
const file = new File([text], "pasted_text.txt", {
type: "text/plain",
lastModified: Date.now()
});
const dataTransfer = new DataTransfer();
dataTransfer.items.add(file);
fileInput.files = dataTransfer.files;
fileInput.dispatchEvent(new Event("change", { bubbles: true }));
}
});
}
if (document.readyState === "loading") {
document.addEventListener("DOMContentLoaded", setupPasteHandler);
} else {
setupPasteHandler();
}
//------------------------------------------------
// Tooltips
//------------------------------------------------
// File upload button
document.querySelector("#chat-input .upload-button").title = "Upload text files, PDFs, and DOCX documents";
// Activate web search
document.getElementById("web-search").title = "Search the internet with DuckDuckGo";
//------------------------------------------------
// Inline icons for deleting past chats
//------------------------------------------------
function addMiniDeletes() {
document.querySelectorAll("#past-chats label:not(.has-delete)").forEach(label => {
const container = document.createElement("span");
container.className = "delete-container";
label.classList.add("chat-label-with-delete");
const trashBtn = document.createElement("button");
trashBtn.innerHTML = "🗑️";
trashBtn.className = "trash-btn";
const cancelBtn = document.createElement("button");
cancelBtn.innerHTML = "✕";
cancelBtn.className = "cancel-btn";
const confirmBtn = document.createElement("button");
confirmBtn.innerHTML = "✓";
confirmBtn.className = "confirm-btn";
label.addEventListener("mouseenter", () => {
container.style.opacity = "1";
});
label.addEventListener("mouseleave", () => {
container.style.opacity = "0";
});
trashBtn.onclick = (e) => {
e.stopPropagation();
label.querySelector("input").click();
document.querySelector("#delete_chat").click();
trashBtn.style.display = "none";
cancelBtn.style.display = "flex";
confirmBtn.style.display = "flex";
};
cancelBtn.onclick = (e) => {
e.stopPropagation();
document.querySelector("#delete_chat-cancel").click();
resetButtons();
};
confirmBtn.onclick = (e) => {
e.stopPropagation();
document.querySelector("#delete_chat-confirm").click();
resetButtons();
};
function resetButtons() {
trashBtn.style.display = "inline";
cancelBtn.style.display = "none";
confirmBtn.style.display = "none";
}
container.append(trashBtn, cancelBtn, confirmBtn);
label.appendChild(container);
label.classList.add("has-delete");
});
}
new MutationObserver(() => addMiniDeletes()).observe(
document.querySelector("#past-chats"),
{childList: true, subtree: true}
);
addMiniDeletes();

View file

@ -223,7 +223,10 @@ def generate_chat_prompt(user_input, state, **kwargs):
for attachment in metadata[user_key]["attachments"]:
filename = attachment.get("name", "file")
content = attachment.get("content", "")
attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
if attachment.get("type") == "text/html" and attachment.get("url"):
attachments_text += f"\nName: {filename}\nURL: {attachment['url']}\nContents:\n\n=====\n{content}\n=====\n\n"
else:
attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
if attachments_text:
enhanced_user_msg = f"{user_msg}\n\nATTACHMENTS:\n{attachments_text}"
@ -250,7 +253,10 @@ def generate_chat_prompt(user_input, state, **kwargs):
for attachment in metadata[user_key]["attachments"]:
filename = attachment.get("name", "file")
content = attachment.get("content", "")
attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
if attachment.get("type") == "text/html" and attachment.get("url"):
attachments_text += f"\nName: {filename}\nURL: {attachment['url']}\nContents:\n\n=====\n{content}\n=====\n\n"
else:
attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n"
if attachments_text:
user_input = f"{user_input}\n\nATTACHMENTS:\n{attachments_text}"
@ -500,6 +506,9 @@ def add_message_attachment(history, row_idx, file_path, is_user=True):
# Process PDF file
content = extract_pdf_text(path)
file_type = "application/pdf"
elif file_extension == '.docx':
content = extract_docx_text(path)
file_type = "application/docx"
else:
# Default handling for text files
with open(path, 'r', encoding='utf-8') as f:
@ -538,6 +547,53 @@ def extract_pdf_text(pdf_path):
return f"[Error extracting PDF text: {str(e)}]"
def extract_docx_text(docx_path):
"""
Extract text from a .docx file, including headers,
body (paragraphs and tables), and footers.
"""
try:
import docx
doc = docx.Document(docx_path)
parts = []
# 1) Extract non-empty header paragraphs from each section
for section in doc.sections:
for para in section.header.paragraphs:
text = para.text.strip()
if text:
parts.append(text)
# 2) Extract body blocks (paragraphs and tables) in document order
parent_elm = doc.element.body
for child in parent_elm.iterchildren():
if isinstance(child, docx.oxml.text.paragraph.CT_P):
para = docx.text.paragraph.Paragraph(child, doc)
text = para.text.strip()
if text:
parts.append(text)
elif isinstance(child, docx.oxml.table.CT_Tbl):
table = docx.table.Table(child, doc)
for row in table.rows:
cells = [cell.text.strip() for cell in row.cells]
parts.append("\t".join(cells))
# 3) Extract non-empty footer paragraphs from each section
for section in doc.sections:
for para in section.footer.paragraphs:
text = para.text.strip()
if text:
parts.append(text)
return "\n".join(parts)
except Exception as e:
logger.error(f"Error extracting text from DOCX: {e}")
return f"[Error extracting DOCX text: {str(e)}]"
def generate_search_query(user_message, state):
"""Generate a search query from user message using the LLM"""
# Augment the user message with search instruction
@ -554,7 +610,12 @@ def generate_search_query(user_message, state):
query = ""
for reply in generate_reply(formatted_prompt, search_state, stopping_strings=[], is_chat=True):
query = reply.strip()
query = reply
# Strip and remove surrounding quotes if present
query = query.strip()
if len(query) >= 2 and query.startswith('"') and query.endswith('"'):
query = query[1:-1]
return query
@ -660,7 +721,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
# Add timestamp for assistant's response at the start of generation
row_idx = len(output['internal']) - 1
update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp())
update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp(), model_name=shared.model_name)
# Generate
reply = None
@ -699,7 +760,18 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess
if is_stream:
yield output
output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
if _continue:
# Reprocess the entire internal text for extensions (like translation)
full_internal = output['internal'][-1][1]
if state['mode'] in ['chat', 'chat-instruct']:
full_visible = re.sub("(<USER>|<user>|{{user}})", state['name1'], full_internal)
else:
full_visible = full_internal
full_visible = html.escape(full_visible)
output['visible'][-1][1] = apply_extensions('output', full_visible, state, is_chat=True)
else:
output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True)
# Final sync for version metadata (in case streaming was disabled)
if regenerate:
@ -775,7 +847,9 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False):
last_save_time = time.monotonic()
save_interval = 8
for i, history in enumerate(generate_chat_reply(text, state, regenerate, _continue, loading_message=True, for_ui=True)):
yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']), history
yield chat_html_wrapper(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'], last_message_only=(i > 0)), history
if i == 0:
time.sleep(0.125) # We need this to make sure the first update goes through
current_time = time.monotonic()
# Save on first iteration or if save_interval seconds have passed
@ -1163,6 +1237,43 @@ def load_character(character, name1, name2):
return name1, name2, picture, greeting, context
def restore_character_for_ui(state):
"""Reset character fields to the currently loaded character's saved values"""
if state['character_menu'] and state['character_menu'] != 'None':
try:
name1, name2, picture, greeting, context = load_character(state['character_menu'], state['name1'], state['name2'])
state['name2'] = name2
state['greeting'] = greeting
state['context'] = context
state['character_picture'] = picture # This triggers cache update via generate_pfp_cache
return state, name2, context, greeting, picture
except Exception as e:
logger.error(f"Failed to reset character '{state['character_menu']}': {e}")
return clear_character_for_ui(state)
else:
return clear_character_for_ui(state)
def clear_character_for_ui(state):
"""Clear all character fields and picture cache"""
state['name2'] = shared.settings['name2']
state['context'] = shared.settings['context']
state['greeting'] = shared.settings['greeting']
state['character_picture'] = None
# Clear the cache files
cache_folder = Path(shared.args.disk_cache_dir)
for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
cache_path = Path(f'{cache_folder}/{cache_file}')
if cache_path.exists():
cache_path.unlink()
return state, state['name2'], state['context'], state['greeting'], None
def load_instruction_template(template):
if template == 'None':
return ''
@ -1453,7 +1564,10 @@ def handle_start_new_chat_click(state):
def handle_delete_chat_confirm_click(state):
index = str(find_all_histories(state).index(state['unique_id']))
filtered_histories = find_all_histories_with_first_prompts(state)
filtered_ids = [h[1] for h in filtered_histories]
index = str(filtered_ids.index(state['unique_id']))
delete_history(state['unique_id'], state['character_menu'], state['mode'])
history, unique_id = load_history_after_deletion(state, index)
html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu'])
@ -1466,7 +1580,6 @@ def handle_delete_chat_confirm_click(state):
unique_id,
gr.update(visible=False),
gr.update(visible=True),
gr.update(visible=False)
]
@ -1653,6 +1766,25 @@ def handle_character_menu_change(state):
]
def handle_character_picture_change(picture):
"""Update or clear cache when character picture changes"""
cache_folder = Path(shared.args.disk_cache_dir)
if not cache_folder.exists():
cache_folder.mkdir()
if picture is not None:
# Save to cache
picture.save(Path(f'{cache_folder}/pfp_character.png'), format='PNG')
thumb = make_thumbnail(picture)
thumb.save(Path(f'{cache_folder}/pfp_character_thumb.png'), format='PNG')
else:
# Remove cache files when picture is cleared
for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
cache_path = Path(f'{cache_folder}/{cache_file}')
if cache_path.exists():
cache_path.unlink()
def handle_mode_change(state):
history = load_latest_history(state)
histories = find_all_histories_with_first_prompts(state)

View file

@ -1,38 +0,0 @@
import subprocess
from pathlib import Path
new_extensions = set()
def clone_or_pull_repository(github_url):
global new_extensions
repository_folder = Path("extensions")
repo_name = github_url.rstrip("/").split("/")[-1].split(".")[0]
# Check if the repository folder exists
if not repository_folder.exists():
repository_folder.mkdir(parents=True)
repo_path = repository_folder / repo_name
# Check if the repository is already cloned
if repo_path.exists():
yield f"Updating {github_url}..."
# Perform a 'git pull' to update the repository
try:
pull_output = subprocess.check_output(["git", "-C", repo_path, "pull"], stderr=subprocess.STDOUT)
yield "Done."
return pull_output.decode()
except subprocess.CalledProcessError as e:
return str(e)
# Clone the repository
try:
yield f"Cloning {github_url}..."
clone_output = subprocess.check_output(["git", "clone", github_url, repo_path], stderr=subprocess.STDOUT)
new_extensions.add(repo_name)
yield f"The extension `{repo_name}` has been downloaded.\n\nPlease close the web UI completely and launch it again to be able to load it."
return clone_output.decode()
except subprocess.CalledProcessError as e:
return str(e)

View file

@ -39,15 +39,16 @@ def minify_css(css: str) -> str:
return css
with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r') as f:
with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r', encoding='utf-8') as f:
readable_css = f.read()
with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r') as f:
with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r', encoding='utf-8') as f:
instruct_css = f.read()
# Custom chat styles
chat_styles = {}
for k in get_available_chat_styles():
chat_styles[k] = open(Path(f'css/chat_style-{k}.css'), 'r').read()
with open(Path(f'css/chat_style-{k}.css'), 'r', encoding='utf-8') as f:
chat_styles[k] = f.read()
# Handle styles that derive from other styles
for k in chat_styles:
@ -350,12 +351,14 @@ remove_button = f'<button class="footer-button footer-remove-button" title="Remo
info_button = f'<button class="footer-button footer-info-button" title="message">{info_svg}</button>'
def format_message_timestamp(history, role, index):
def format_message_timestamp(history, role, index, tooltip_include_timestamp=True):
"""Get a formatted timestamp HTML span for a message if available"""
key = f"{role}_{index}"
if 'metadata' in history and key in history['metadata'] and history['metadata'][key].get('timestamp'):
timestamp = history['metadata'][key]['timestamp']
return f"<span class='timestamp'>{timestamp}</span>"
tooltip_text = get_message_tooltip(history, role, index, include_timestamp=tooltip_include_timestamp)
title_attr = f' title="{html.escape(tooltip_text)}"' if tooltip_text else ''
return f"<span class='timestamp'{title_attr}>{timestamp}</span>"
return ""
@ -388,6 +391,23 @@ def format_message_attachments(history, role, index):
return ""
def get_message_tooltip(history, role, index, include_timestamp=True):
"""Get tooltip text combining timestamp and model name for a message"""
key = f"{role}_{index}"
if 'metadata' not in history or key not in history['metadata']:
return ""
meta = history['metadata'][key]
tooltip_parts = []
if include_timestamp and meta.get('timestamp'):
tooltip_parts.append(meta['timestamp'])
if meta.get('model_name'):
tooltip_parts.append(f"Model: {meta['model_name']}")
return " | ".join(tooltip_parts)
def get_version_navigation_html(history, i, role):
"""Generate simple navigation arrows for message versions"""
key = f"{role}_{i}"
@ -443,179 +463,193 @@ def actions_html(history, i, role, info_message=""):
f'{version_nav_html}')
def generate_instruct_html(history):
output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
def generate_instruct_html(history, last_message_only=False):
if not last_message_only:
output = f'<style>{instruct_css}</style><div class="chat" id="chat" data-mode="instruct"><div class="messages">'
else:
output = ""
for i in range(len(history['visible'])):
row_visible = history['visible'][i]
row_internal = history['internal'][i]
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
def create_message(role, content, raw_content):
"""Inner function that captures variables from outer scope."""
class_name = "user-message" if role == "user" else "assistant-message"
# Get timestamps
user_timestamp = format_message_timestamp(history, "user", i)
assistant_timestamp = format_message_timestamp(history, "assistant", i)
# Get role-specific data
timestamp = format_message_timestamp(history, role, i)
attachments = format_message_attachments(history, role, i)
# Get attachments
user_attachments = format_message_attachments(history, "user", i)
assistant_attachments = format_message_attachments(history, "assistant", i)
# Create info button if timestamp exists
info_message = ""
if timestamp:
tooltip_text = get_message_tooltip(history, role, i)
info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
# Create info buttons for timestamps if they exist
info_message_user = ""
if user_timestamp != "":
# Extract the timestamp value from the span
user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0]
info_message_user = info_button.replace("message", user_timestamp_value)
info_message_assistant = ""
if assistant_timestamp != "":
# Extract the timestamp value from the span
assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0]
info_message_assistant = info_button.replace("message", assistant_timestamp_value)
if converted_visible[0]: # Don't display empty user messages
output += (
f'<div class="user-message" '
f'data-raw="{html.escape(row_internal[0], quote=True)}"'
f'data-index={i}>'
f'<div class="text">'
f'<div class="message-body">{converted_visible[0]}</div>'
f'{user_attachments}'
f'{actions_html(history, i, "user", info_message_user)}'
f'</div>'
f'</div>'
)
output += (
f'<div class="assistant-message" '
f'data-raw="{html.escape(row_internal[1], quote=True)}"'
return (
f'<div class="{class_name}" '
f'data-raw="{html.escape(raw_content, quote=True)}"'
f'data-index={i}>'
f'<div class="text">'
f'<div class="message-body">{converted_visible[1]}</div>'
f'{assistant_attachments}'
f'{actions_html(history, i, "assistant", info_message_assistant)}'
f'<div class="message-body">{content}</div>'
f'{attachments}'
f'{actions_html(history, i, role, info_message)}'
f'</div>'
f'</div>'
)
output += "</div></div>"
# Determine range
start_idx = len(history['visible']) - 1 if last_message_only else 0
end_idx = len(history['visible'])
for i in range(start_idx, end_idx):
row_visible = history['visible'][i]
row_internal = history['internal'][i]
# Convert content
if last_message_only:
converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
else:
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
# Generate messages
if not last_message_only and converted_visible[0]:
output += create_message("user", converted_visible[0], row_internal[0])
output += create_message("assistant", converted_visible[1], row_internal[1])
if not last_message_only:
output += "</div></div>"
return output
def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False):
output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
def get_character_image_with_cache_buster():
"""Get character image URL with cache busting based on file modification time"""
cache_path = Path("user_data/cache/pfp_character_thumb.png")
if cache_path.exists():
mtime = int(cache_path.stat().st_mtime)
return f'<img src="file/user_data/cache/pfp_character_thumb.png?{mtime}" class="pfp_character">'
# We use ?character and ?time.time() to force the browser to reset caches
img_bot = (
f'<img src="file/user_data/cache/pfp_character_thumb.png?{character}" class="pfp_character">'
if Path("user_data/cache/pfp_character_thumb.png").exists() else ''
)
return ''
img_me = (
f'<img src="file/user_data/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
if Path("user_data/cache/pfp_me.png").exists() else ''
)
for i in range(len(history['visible'])):
row_visible = history['visible'][i]
row_internal = history['internal'][i]
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=False, last_message_only=False):
if not last_message_only:
output = f'<style>{chat_styles[style]}</style><div class="chat" id="chat"><div class="messages">'
else:
output = ""
# Get timestamps
user_timestamp = format_message_timestamp(history, "user", i)
assistant_timestamp = format_message_timestamp(history, "assistant", i)
img_bot = get_character_image_with_cache_buster()
# Get attachments
user_attachments = format_message_attachments(history, "user", i)
assistant_attachments = format_message_attachments(history, "assistant", i)
def create_message(role, content, raw_content):
"""Inner function for CAI-style messages."""
circle_class = "circle-you" if role == "user" else "circle-bot"
name = name1 if role == "user" else name2
if converted_visible[0]: # Don't display empty user messages
output += (
f'<div class="message" '
f'data-raw="{html.escape(row_internal[0], quote=True)}"'
f'data-index={i}>'
f'<div class="circle-you">{img_me}</div>'
f'<div class="text">'
f'<div class="username">{name1}{user_timestamp}</div>'
f'<div class="message-body">{converted_visible[0]}</div>'
f'{user_attachments}'
f'{actions_html(history, i, "user")}'
f'</div>'
f'</div>'
)
# Get role-specific data
timestamp = format_message_timestamp(history, role, i, tooltip_include_timestamp=False)
attachments = format_message_attachments(history, role, i)
output += (
# Get appropriate image
if role == "user":
img = (f'<img src="file/user_data/cache/pfp_me.png?{time.time() if reset_cache else ""}">'
if Path("user_data/cache/pfp_me.png").exists() else '')
else:
img = img_bot
return (
f'<div class="message" '
f'data-raw="{html.escape(row_internal[1], quote=True)}"'
f'data-raw="{html.escape(raw_content, quote=True)}"'
f'data-index={i}>'
f'<div class="circle-bot">{img_bot}</div>'
f'<div class="{circle_class}">{img}</div>'
f'<div class="text">'
f'<div class="username">{name2}{assistant_timestamp}</div>'
f'<div class="message-body">{converted_visible[1]}</div>'
f'{assistant_attachments}'
f'{actions_html(history, i, "assistant")}'
f'<div class="username">{name}{timestamp}</div>'
f'<div class="message-body">{content}</div>'
f'{attachments}'
f'{actions_html(history, i, role)}'
f'</div>'
f'</div>'
)
output += "</div></div>"
# Determine range
start_idx = len(history['visible']) - 1 if last_message_only else 0
end_idx = len(history['visible'])
for i in range(start_idx, end_idx):
row_visible = history['visible'][i]
row_internal = history['internal'][i]
# Convert content
if last_message_only:
converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
else:
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
# Generate messages
if not last_message_only and converted_visible[0]:
output += create_message("user", converted_visible[0], row_internal[0])
output += create_message("assistant", converted_visible[1], row_internal[1])
if not last_message_only:
output += "</div></div>"
return output
def generate_chat_html(history, name1, name2, reset_cache=False):
output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
def generate_chat_html(history, name1, name2, reset_cache=False, last_message_only=False):
if not last_message_only:
output = f'<style>{chat_styles["wpp"]}</style><div class="chat" id="chat"><div class="messages">'
else:
output = ""
for i in range(len(history['visible'])):
row_visible = history['visible'][i]
row_internal = history['internal'][i]
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
def create_message(role, content, raw_content):
"""Inner function for WPP-style messages."""
text_class = "text-you" if role == "user" else "text-bot"
# Get timestamps
user_timestamp = format_message_timestamp(history, "user", i)
assistant_timestamp = format_message_timestamp(history, "assistant", i)
# Get role-specific data
timestamp = format_message_timestamp(history, role, i)
attachments = format_message_attachments(history, role, i)
# Get attachments
user_attachments = format_message_attachments(history, "user", i)
assistant_attachments = format_message_attachments(history, "assistant", i)
# Create info button if timestamp exists
info_message = ""
if timestamp:
tooltip_text = get_message_tooltip(history, role, i)
info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
# Create info buttons for timestamps if they exist
info_message_user = ""
if user_timestamp != "":
# Extract the timestamp value from the span
user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0]
info_message_user = info_button.replace("message", user_timestamp_value)
info_message_assistant = ""
if assistant_timestamp != "":
# Extract the timestamp value from the span
assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0]
info_message_assistant = info_button.replace("message", assistant_timestamp_value)
if converted_visible[0]: # Don't display empty user messages
output += (
f'<div class="message" '
f'data-raw="{html.escape(row_internal[0], quote=True)}"'
f'data-index={i}>'
f'<div class="text-you">'
f'<div class="message-body">{converted_visible[0]}</div>'
f'{user_attachments}'
f'{actions_html(history, i, "user", info_message_user)}'
f'</div>'
f'</div>'
)
output += (
return (
f'<div class="message" '
f'data-raw="{html.escape(row_internal[1], quote=True)}"'
f'data-raw="{html.escape(raw_content, quote=True)}"'
f'data-index={i}>'
f'<div class="text-bot">'
f'<div class="message-body">{converted_visible[1]}</div>'
f'{assistant_attachments}'
f'{actions_html(history, i, "assistant", info_message_assistant)}'
f'<div class="{text_class}">'
f'<div class="message-body">{content}</div>'
f'{attachments}'
f'{actions_html(history, i, role, info_message)}'
f'</div>'
f'</div>'
)
output += "</div></div>"
# Determine range
start_idx = len(history['visible']) - 1 if last_message_only else 0
end_idx = len(history['visible'])
for i in range(start_idx, end_idx):
row_visible = history['visible'][i]
row_internal = history['internal'][i]
# Convert content
if last_message_only:
converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
else:
converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
# Generate messages
if not last_message_only and converted_visible[0]:
output += create_message("user", converted_visible[0], row_internal[0])
output += create_message("assistant", converted_visible[1], row_internal[1])
if not last_message_only:
output += "</div></div>"
return output
@ -629,15 +663,15 @@ def time_greeting():
return "Good evening!"
def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False, last_message_only=False):
if len(history['visible']) == 0:
greeting = f"<div class=\"welcome-greeting\">{time_greeting()} How can I help you today?</div>"
result = f'<div class="chat" id="chat">{greeting}</div>'
elif mode == 'instruct':
result = generate_instruct_html(history)
result = generate_instruct_html(history, last_message_only=last_message_only)
elif style == 'wpp':
result = generate_chat_html(history, name1, name2)
result = generate_chat_html(history, name1, name2, last_message_only=last_message_only)
else:
result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache)
result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache=reset_cache, last_message_only=last_message_only)
return {'html': result}
return {'html': result, 'last_message_only': last_message_only}

View file

@ -408,15 +408,42 @@ class LlamaServer:
def filter_stderr_with_progress(process_stderr):
progress_pattern = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
"""
Reads stderr lines from a process, filters out noise, and displays progress updates
inline (overwriting the same line) until completion.
"""
progress_re = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
last_was_progress = False
try:
for line in iter(process_stderr.readline, ''):
progress_match = progress_pattern.search(line)
if progress_match:
sys.stderr.write(line)
sys.stderr.flush()
elif not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
sys.stderr.write(line)
sys.stderr.flush()
for raw in iter(process_stderr.readline, ''):
line = raw.rstrip('\r\n')
match = progress_re.search(line)
if match:
progress = float(match.group(1))
# Extract just the part from "prompt processing" onwards
prompt_processing_idx = line.find('prompt processing')
if prompt_processing_idx != -1:
display_line = line[prompt_processing_idx:]
else:
display_line = line # fallback to full line
# choose carriage return for in-progress or newline at completion
end_char = '\r' if progress < 1.0 else '\n'
print(display_line, end=end_char, file=sys.stderr, flush=True)
last_was_progress = (progress < 1.0)
# skip noise lines
elif not (line.startswith(('srv ', 'slot ')) or 'log_server_r: request: GET /health' in line):
# if we were in progress, finish that line first
if last_was_progress:
print(file=sys.stderr)
print(line, file=sys.stderr, flush=True)
last_was_progress = False
except (ValueError, IOError):
# silently ignore broken output or IO errors
pass

View file

@ -116,7 +116,7 @@ def unload_model(keep_model_name=False):
return
is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer')
if shared.args.loader == 'ExLlamav3_HF':
if shared.model.__class__.__name__ == 'Exllamav3HF':
shared.model.unload()
shared.model = shared.tokenizer = None

View file

@ -329,6 +329,7 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
# Extract values from metadata
n_layers = None
n_kv_heads = None
n_attention_heads = None # Fallback for models without separate KV heads
embedding_dim = None
for key, value in metadata.items():
@ -336,9 +337,14 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
n_layers = value
elif key.endswith('.attention.head_count_kv'):
n_kv_heads = max(value) if isinstance(value, list) else value
elif key.endswith('.attention.head_count'):
n_attention_heads = max(value) if isinstance(value, list) else value
elif key.endswith('.embedding_length'):
embedding_dim = value
if n_kv_heads is None:
n_kv_heads = n_attention_heads
if gpu_layers > n_layers:
gpu_layers = n_layers

View file

@ -1,6 +1,5 @@
import functools
import pprint
import random
from pathlib import Path
import yaml
@ -93,68 +92,17 @@ def load_preset_for_ui(name, state):
return state, *[generate_params[k] for k in presets_params()]
def random_preset(state):
params_and_values = {
'remove_tail_tokens': {
'top_p': [0.5, 0.8, 0.9, 0.95, 0.99],
'min_p': [0.5, 0.2, 0.1, 0.05, 0.01],
'top_k': [3, 5, 10, 20, 30, 40],
'typical_p': [0.2, 0.575, 0.95],
'tfs': [0.5, 0.8, 0.9, 0.95, 0.99],
'top_a': [0.5, 0.2, 0.1, 0.05, 0.01],
'epsilon_cutoff': [1, 3, 5, 7, 9],
'eta_cutoff': [3, 6, 9, 12, 15, 18],
},
'flatten_distribution': {
'temperature': [0.1, 0.5, 0.7, 0.8, 1, 1.2, 1.5, 2.0, 5.0],
'dynamic_temperature': [
[0.1, 1],
[0.1, 1.5],
[0.1, 2],
[0.1, 5],
[0.5, 1],
[0.5, 1.5],
[0.5, 2],
[0.5, 5],
[0.8, 1],
[0.8, 1.5],
[0.8, 2],
[0.8, 5],
[1, 1.5],
[1, 2],
[1, 5]
],
'smoothing_factor': [0.2, 0.3, 0.6, 1.2],
},
'repetition': {
'repetition_penalty': [1, 1.05, 1.1, 1.15, 1.20, 1.25],
'presence_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
'frequency_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
},
'other': {
'temperature_last': [True, False],
}
}
generate_params = default_preset()
for cat in params_and_values:
choices = list(params_and_values[cat].keys())
if shared.args.loader is not None:
choices = [x for x in choices if loader_contains(x)]
if len(choices) > 0:
choice = random.choice(choices)
value = random.choice(params_and_values[cat][choice])
if choice == 'dynamic_temperature':
generate_params['dynamic_temperature'] = True
generate_params['dynatemp_low'] = value[0]
generate_params['dynatemp_high'] = value[1]
else:
generate_params[choice] = value
def reset_preset_for_ui(name, state):
"""Reset current preset to its saved values from file"""
generate_params = load_preset(name, verbose=True)
state.update(generate_params)
return state, *[generate_params[k] for k in presets_params()]
def neutralize_samplers_for_ui(state):
"""Set all samplers to their default/neutral values"""
generate_params = default_preset()
state.update(generate_params)
logger.info("GENERATED_PRESET=")
pprint.PrettyPrinter(indent=4, width=1, sort_dicts=False).pprint(remove_defaults(state))
return state, *[generate_params[k] for k in presets_params()]

View file

@ -9,6 +9,7 @@ from pathlib import Path
import yaml
from modules.logging_colors import logger
from modules.presets import default_preset
# Model variables
model = None
@ -21,60 +22,19 @@ lora_names = []
# Generation variables
stop_everything = False
generation_lock = None
processing_message = '*Is typing...*'
processing_message = ''
# UI variables
gradio = {}
persistent_interface_state = {}
need_restart = False
# UI defaults
settings = {
'show_controls': True,
'start_with': '',
'mode': 'instruct',
'chat_style': 'cai-chat',
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
'prompt-default': 'QA',
'prompt-notebook': 'QA',
'character': 'Assistant',
'name1': 'You',
'user_bio': '',
'custom_system_message': '',
'preset': 'min_p',
'max_new_tokens': 512,
'max_new_tokens_min': 1,
'max_new_tokens_max': 4096,
'prompt_lookup_num_tokens': 0,
'max_tokens_second': 0,
'max_updates_second': 12,
'auto_max_new_tokens': True,
'ban_eos_token': False,
'add_bos_token': True,
'enable_thinking': True,
'skip_special_tokens': True,
'stream': True,
'static_cache': False,
'truncation_length': 8192,
'seed': -1,
'custom_stopping_strings': '',
'custom_token_bans': '',
'negative_prompt': '',
'dark_theme': True,
'default_extensions': [],
'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}",
'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}",
}
default_settings = copy.deepcopy(settings)
# Parser copied from https://github.com/vladmandic/automatic
parser = argparse.ArgumentParser(description="Text generation web UI", conflict_handler='resolve', add_help=True, formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=55, indent_increment=2, width=200))
# Basic settings
group = parser.add_argument_group('Basic settings')
group.add_argument('--multi-user', action='store_true', help='Multi-user mode. Chat histories are not saved or automatically loaded. Warning: this is likely not safe for sharing publicly.')
group.add_argument('--character', type=str, help='The name of the character to load in chat mode by default.')
group.add_argument('--model', type=str, help='Name of the model to load by default.')
group.add_argument('--lora', type=str, nargs='+', help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.')
group.add_argument('--model-dir', type=str, default='user_data/models', help='Path to directory with all the models.')
@ -230,6 +190,102 @@ for arg in sys.argv[1:]:
elif hasattr(args, arg):
provided_arguments.append(arg)
# Default generation parameters
neutral_samplers = default_preset()
# UI defaults
settings = {
'show_controls': True,
'start_with': '',
'mode': 'instruct',
'chat_style': 'cai-chat',
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
'enable_web_search': False,
'web_search_pages': 3,
'prompt-default': 'QA',
'prompt-notebook': 'QA',
'preset': 'Qwen3 - Thinking' if Path('user_data/presets/Qwen3 - Thinking.yaml').exists() else None,
'max_new_tokens': 512,
'max_new_tokens_min': 1,
'max_new_tokens_max': 4096,
'prompt_lookup_num_tokens': 0,
'max_tokens_second': 0,
'auto_max_new_tokens': True,
'ban_eos_token': False,
'add_bos_token': True,
'enable_thinking': True,
'skip_special_tokens': True,
'stream': True,
'static_cache': False,
'truncation_length': 8192,
'seed': -1,
'custom_stopping_strings': '',
'custom_token_bans': '',
'negative_prompt': '',
'dark_theme': True,
'paste_to_attachment': False,
# Character settings
'character': 'Assistant',
'name1': 'You',
'name2': 'AI',
'user_bio': '',
'context': 'The following is a conversation with an AI Large Language Model. The AI has been trained to answer questions, provide recommendations, and help with decision making. The AI follows user requests. The AI thinks outside the box.',
'greeting': 'How can I help you today?',
'custom_system_message': '',
'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}",
'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}",
# Generation parameters - Curve shape
'temperature': 0.6,
'dynatemp_low': neutral_samplers['dynatemp_low'],
'dynatemp_high': neutral_samplers['dynatemp_high'],
'dynatemp_exponent': neutral_samplers['dynatemp_exponent'],
'smoothing_factor': neutral_samplers['smoothing_factor'],
'smoothing_curve': neutral_samplers['smoothing_curve'],
# Generation parameters - Curve cutoff
'min_p': neutral_samplers['min_p'],
'top_p': 0.95,
'top_k': 20,
'typical_p': neutral_samplers['typical_p'],
'xtc_threshold': neutral_samplers['xtc_threshold'],
'xtc_probability': neutral_samplers['xtc_probability'],
'epsilon_cutoff': neutral_samplers['epsilon_cutoff'],
'eta_cutoff': neutral_samplers['eta_cutoff'],
'tfs': neutral_samplers['tfs'],
'top_a': neutral_samplers['top_a'],
'top_n_sigma': neutral_samplers['top_n_sigma'],
# Generation parameters - Repetition suppression
'dry_multiplier': neutral_samplers['dry_multiplier'],
'dry_allowed_length': neutral_samplers['dry_allowed_length'],
'dry_base': neutral_samplers['dry_base'],
'repetition_penalty': neutral_samplers['repetition_penalty'],
'frequency_penalty': neutral_samplers['frequency_penalty'],
'presence_penalty': neutral_samplers['presence_penalty'],
'encoder_repetition_penalty': neutral_samplers['encoder_repetition_penalty'],
'no_repeat_ngram_size': neutral_samplers['no_repeat_ngram_size'],
'repetition_penalty_range': neutral_samplers['repetition_penalty_range'],
# Generation parameters - Alternative sampling methods
'penalty_alpha': neutral_samplers['penalty_alpha'],
'guidance_scale': neutral_samplers['guidance_scale'],
'mirostat_mode': neutral_samplers['mirostat_mode'],
'mirostat_tau': neutral_samplers['mirostat_tau'],
'mirostat_eta': neutral_samplers['mirostat_eta'],
# Generation parameters - Other options
'do_sample': neutral_samplers['do_sample'],
'dynamic_temperature': neutral_samplers['dynamic_temperature'],
'temperature_last': neutral_samplers['temperature_last'],
'sampler_priority': neutral_samplers['sampler_priority'],
'dry_sequence_breakers': neutral_samplers['dry_sequence_breakers'],
'grammar_string': '',
}
default_settings = copy.deepcopy(settings)
def do_cmd_flags_warnings():
# Security warnings

View file

@ -65,41 +65,39 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
all_stop_strings += st
shared.stop_everything = False
last_update = -1
reply = ''
is_stream = state['stream']
if len(all_stop_strings) > 0 and not state['stream']:
state = copy.deepcopy(state)
state['stream'] = True
min_update_interval = 0
if state.get('max_updates_second', 0) > 0:
min_update_interval = 1 / state['max_updates_second']
# Generate
last_update = -1
latency_threshold = 1 / 1000
for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat):
cur_time = time.monotonic()
reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
if escape_html:
reply = html.escape(reply)
if is_stream:
cur_time = time.time()
# Limit number of tokens/second to make text readable in real time
if state['max_tokens_second'] > 0:
diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
if diff > 0:
time.sleep(diff)
last_update = time.time()
last_update = time.monotonic()
yield reply
# Limit updates to avoid lag in the Gradio UI
# API updates are not limited
else:
if cur_time - last_update > min_update_interval:
last_update = cur_time
# If 'generate_func' takes less than 0.001 seconds to yield the next token
# (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding
if (cur_time - last_update) > latency_threshold:
yield reply
last_update = time.monotonic()
if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
break
@ -481,6 +479,7 @@ def generate_reply_custom(question, original_question, state, stopping_strings=N
For models that do not use the transformers library for sampling
"""
state = copy.deepcopy(state)
state['seed'] = set_manual_seed(state['seed'])
t0 = time.time()
reply = ''

View file

@ -1,4 +1,5 @@
import copy
import threading
from pathlib import Path
import gradio as gr
@ -6,28 +7,39 @@ import yaml
import extensions
from modules import shared
from modules.chat import load_history
from modules.utils import gradio
with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r') as f:
# Global state for auto-saving UI settings with debouncing
_auto_save_timer = None
_auto_save_lock = threading.Lock()
_last_interface_state = None
_last_preset = None
_last_extensions = None
_last_show_controls = None
_last_theme_state = None
with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r', encoding='utf-8') as f:
css = f.read()
with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f:
with open(Path(__file__).resolve().parent / '../css/main.css', 'r', encoding='utf-8') as f:
css += f.read()
with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r') as f:
with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r', encoding='utf-8') as f:
css += f.read()
with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r') as f:
with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r', encoding='utf-8') as f:
css += f.read()
with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f:
with open(Path(__file__).resolve().parent / '../js/main.js', 'r', encoding='utf-8') as f:
js = f.read()
with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r') as f:
with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r', encoding='utf-8') as f:
global_scope_js = f.read()
with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f:
with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r', encoding='utf-8') as f:
save_files_js = f.read()
with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f:
with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r', encoding='utf-8') as f:
switch_tabs_js = f.read()
with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f:
with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r', encoding='utf-8') as f:
show_controls_js = f.read()
with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r') as f:
with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r', encoding='utf-8') as f:
update_big_picture_js = f.read()
with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r') as f:
with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r', encoding='utf-8') as f:
dark_theme_js = f.read()
refresh_symbol = '🔄'
@ -62,8 +74,10 @@ if not shared.args.old_colors:
body_background_fill="white",
block_background_fill="transparent",
body_text_color='rgb(64, 64, 64)',
button_secondary_background_fill="#f4f4f4",
button_secondary_background_fill="white",
button_secondary_border_color="var(--border-color-primary)",
input_shadow="none",
button_shadow_hover="none",
# Dark Mode Colors
input_background_fill_dark='var(--darker-gray)',
@ -95,6 +109,7 @@ if not shared.args.old_colors:
button_large_radius='0.375rem',
button_large_padding='6px 12px',
input_radius='0.375rem',
block_radius='0',
)
if Path("user_data/notification.mp3").exists():
@ -194,7 +209,6 @@ def list_interface_input_elements():
'max_new_tokens',
'prompt_lookup_num_tokens',
'max_tokens_second',
'max_updates_second',
'do_sample',
'dynamic_temperature',
'temperature_last',
@ -257,6 +271,11 @@ def list_interface_input_elements():
# Model elements
elements += list_model_elements()
# Other elements
elements += [
'paste_to_attachment'
]
return elements
@ -270,6 +289,13 @@ def gather_interface_values(*args):
if not shared.args.multi_user:
shared.persistent_interface_state = output
# Remove the chat input, as it gets cleared after this function call
shared.persistent_interface_state.pop('textbox')
# Prevent history loss if backend is restarted but UI is not refreshed
if output['history'] is None and output['unique_id'] is not None:
output['history'] = load_history(output['unique_id'], output['character_menu'], output['mode'])
return output
@ -292,7 +318,7 @@ def apply_interface_values(state, use_persistent=False):
def save_settings(state, preset, extensions_list, show_controls, theme_state):
output = copy.deepcopy(shared.settings)
exclude = ['name2', 'greeting', 'context', 'truncation_length', 'instruction_template_str']
exclude = []
for k in state:
if k in shared.settings and k not in exclude:
output[k] = state[k]
@ -301,10 +327,11 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
output['prompt-default'] = state['prompt_menu-default']
output['prompt-notebook'] = state['prompt_menu-notebook']
output['character'] = state['character_menu']
output['default_extensions'] = extensions_list
output['seed'] = int(output['seed'])
output['show_controls'] = show_controls
output['dark_theme'] = True if theme_state == 'dark' else False
output.pop('instruction_template_str')
output.pop('truncation_length')
# Save extension values in the UI
for extension_name in extensions_list:
@ -327,6 +354,143 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
return yaml.dump(output, sort_keys=False, width=float("inf"), allow_unicode=True)
def store_current_state_and_debounce(interface_state, preset, extensions, show_controls, theme_state):
"""Store current state and trigger debounced save"""
global _auto_save_timer, _last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state
if shared.args.multi_user:
return
# Store the current state in global variables
_last_interface_state = interface_state
_last_preset = preset
_last_extensions = extensions
_last_show_controls = show_controls
_last_theme_state = theme_state
# Reset the debounce timer
with _auto_save_lock:
if _auto_save_timer is not None:
_auto_save_timer.cancel()
_auto_save_timer = threading.Timer(1.0, _perform_debounced_save)
_auto_save_timer.start()
def _perform_debounced_save():
"""Actually perform the save using the stored state"""
global _auto_save_timer
try:
if _last_interface_state is not None:
contents = save_settings(_last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state)
settings_path = Path('user_data') / 'settings.yaml'
settings_path.parent.mkdir(exist_ok=True)
with open(settings_path, 'w', encoding='utf-8') as f:
f.write(contents)
except Exception as e:
print(f"Auto-save failed: {e}")
finally:
with _auto_save_lock:
_auto_save_timer = None
def setup_auto_save():
"""Attach auto-save to key UI elements"""
if shared.args.multi_user:
return
change_elements = [
# Chat tab (ui_chat.py)
'start_with',
'enable_web_search',
'web_search_pages',
'mode',
'chat_style',
'chat-instruct_command',
'character_menu',
'name1',
'name2',
'context',
'greeting',
'user_bio',
'custom_system_message',
'chat_template_str',
# Parameters tab (ui_parameters.py) - Generation parameters
'preset_menu',
'temperature',
'dynatemp_low',
'dynatemp_high',
'dynatemp_exponent',
'smoothing_factor',
'smoothing_curve',
'min_p',
'top_p',
'top_k',
'typical_p',
'xtc_threshold',
'xtc_probability',
'epsilon_cutoff',
'eta_cutoff',
'tfs',
'top_a',
'top_n_sigma',
'dry_multiplier',
'dry_allowed_length',
'dry_base',
'repetition_penalty',
'frequency_penalty',
'presence_penalty',
'encoder_repetition_penalty',
'no_repeat_ngram_size',
'repetition_penalty_range',
'penalty_alpha',
'guidance_scale',
'mirostat_mode',
'mirostat_tau',
'mirostat_eta',
'max_new_tokens',
'prompt_lookup_num_tokens',
'max_tokens_second',
'do_sample',
'dynamic_temperature',
'temperature_last',
'auto_max_new_tokens',
'ban_eos_token',
'add_bos_token',
'enable_thinking',
'skip_special_tokens',
'stream',
'static_cache',
'truncation_length',
'seed',
'sampler_priority',
'custom_stopping_strings',
'custom_token_bans',
'negative_prompt',
'dry_sequence_breakers',
'grammar_string',
# Default tab (ui_default.py)
'prompt_menu-default',
# Notebook tab (ui_notebook.py)
'prompt_menu-notebook',
# Session tab (ui_session.py)
'show_controls',
'theme_state',
'paste_to_attachment'
]
for element_name in change_elements:
if element_name in shared.gradio:
shared.gradio[element_name].change(
gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
store_current_state_and_debounce, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), None, show_progress=False)
def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class, interactive=True):
"""
Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui

View file

@ -18,23 +18,23 @@ def create_ui():
mu = shared.args.multi_user
shared.gradio['Chat input'] = gr.State()
shared.gradio['history'] = gr.JSON(visible=False)
shared.gradio['history'] = gr.State({'internal': [], 'visible': [], 'metadata': {}})
with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
with gr.Column():
with gr.Row(elem_id='past-chats-buttons'):
shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes='refresh-button', elem_id='Branch', interactive=not mu)
shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes=['refresh-button', 'refresh-button-medium'], elem_id='Branch', interactive=not mu)
shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes=['refresh-button', 'refresh-button-medium'], interactive=not mu)
shared.gradio['delete_chat'] = gr.Button('🗑️', visible=False, elem_classes='refresh-button', interactive=not mu, elem_id='delete_chat')
shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'refresh-button-medium', 'focus-on-chat-input'])
shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id="Branch-index", interactive=True)
shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input'])
shared.gradio['search_chat'] = gr.Textbox(placeholder='Search chats...', max_lines=1, elem_id='search_chat')
with gr.Row(elem_id='delete-chat-row', visible=False) as shared.gradio['delete-chat-row']:
shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'])
shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'])
shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-cancel')
shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-confirm')
with gr.Row(elem_id='rename-row', visible=False) as shared.gradio['rename-row']:
shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', elem_classes=['no-background'])
@ -55,7 +55,6 @@ def create_ui():
with gr.Column(scale=10, elem_id='chat-input-container'):
shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
shared.gradio['typing-dots'] = gr.HTML(value='<div class="typing"><span></span><span class="dot1"></span><span class="dot2"></span></div>', label='typing', elem_id='typing-container')
with gr.Column(scale=1, elem_id='generate-stop-container'):
@ -65,21 +64,15 @@ def create_ui():
# Hover menu buttons
with gr.Column(elem_id='chat-buttons'):
with gr.Row():
shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
with gr.Row():
shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
with gr.Row():
shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
with gr.Row():
shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
with gr.Column():
@ -87,13 +80,13 @@ def create_ui():
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
with gr.Row():
shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search')
shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search')
with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:
shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)
with gr.Row():
shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
with gr.Row():
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
@ -125,14 +118,15 @@ def create_chat_settings_ui():
with gr.Column(scale=8):
with gr.Tab("Character"):
with gr.Row():
shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
shared.gradio['character_menu'] = gr.Dropdown(value=shared.settings['character'], choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
shared.gradio['restore_character'] = gr.Button('Restore character', elem_classes='refresh-button', interactive=True, elem_id='restore-character')
shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name')
shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar'])
shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label='Greeting', elem_classes=['add_scrollbar'])
with gr.Tab("User"):
shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
@ -185,7 +179,7 @@ def create_chat_settings_ui():
with gr.Row():
with gr.Column():
shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])
shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
shared.gradio['instruction_template_str'] = gr.Textbox(value=shared.settings['instruction_template_str'], label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
with gr.Row():
shared.gradio['send_instruction_to_default'] = gr.Button('Send to default', elem_classes=['small-button'])
shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])
@ -202,7 +196,7 @@ def create_event_handlers():
shared.reload_inputs = gradio(reload_arr)
# Morph HTML updates instead of updating everything
shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data.html)")
shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data)")
shared.gradio['Generate'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@ -267,11 +261,9 @@ def create_event_handlers():
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
shared.gradio['delete_chat'].click(lambda: gr.update(visible=True), None, gradio('delete-chat-row'))
shared.gradio['delete_chat-cancel'].click(lambda: gr.update(visible=False), None, gradio('delete-chat-row'))
shared.gradio['delete_chat-confirm'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id', 'delete-chat-row'), show_progress=False)
chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
shared.gradio['branch_chat'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@ -301,10 +293,12 @@ def create_event_handlers():
chat.handle_character_menu_change, gradio('interface_state'), gradio('history', 'display', 'name1', 'name2', 'character_picture', 'greeting', 'context', 'unique_id'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
shared.gradio['character_picture'].change(chat.handle_character_picture_change, gradio('character_picture'), None, show_progress=False)
shared.gradio['mode'].change(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_mode_change, gradio('interface_state'), gradio('history', 'display', 'chat_style', 'chat-instruct_command', 'unique_id'), show_progress=False).then(
None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
None, gradio('mode'), None, js="(mode) => {const characterContainer = document.getElementById('character-menu').parentNode.parentNode; const isInChatTab = document.querySelector('#chat-controls').contains(characterContainer); if (isInChatTab) { characterContainer.style.display = mode === 'instruct' ? 'none' : ''; }}")
shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)
@ -324,6 +318,10 @@ def create_event_handlers():
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_save_template_click, gradio('instruction_template_str'), gradio('save_filename', 'save_root', 'save_contents', 'file_saver'), show_progress=False)
shared.gradio['restore_character'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.restore_character_for_ui, gradio('interface_state'), gradio('interface_state', 'name2', 'context', 'greeting', 'character_picture'), show_progress=False)
shared.gradio['delete_template'].click(chat.handle_delete_template_click, gradio('instruction_template'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
shared.gradio['save_chat_history'].click(
lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then(

View file

@ -19,7 +19,7 @@ def create_ui():
with gr.Row():
with gr.Column():
with gr.Row():
shared.gradio['textbox-default'] = gr.Textbox(value='', lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
shared.gradio['textbox-default'] = gr.Textbox(value=load_prompt(shared.settings['prompt-default']), lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
shared.gradio['token-counter-default'] = gr.HTML(value="<span>0</span>", elem_id="default-token-counter")
with gr.Row():
@ -28,7 +28,7 @@ def create_ui():
shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary')
with gr.Row():
shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-default'], label='Prompt', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['prompt_menu-default'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, 'refresh-button', interactive=not mu)
shared.gradio['save_prompt-default'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_prompt-default'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)

View file

@ -1,4 +1,6 @@
import importlib
import queue
import threading
import traceback
from functools import partial
from pathlib import Path
@ -205,48 +207,51 @@ def load_lora_wrapper(selected_loras):
def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
downloader_module = importlib.import_module("download-model")
downloader = downloader_module.ModelDownloader()
update_queue = queue.Queue()
try:
# Handle direct GGUF URLs
if repo_id.startswith("https://") and ("huggingface.co" in repo_id) and (repo_id.endswith(".gguf") or repo_id.endswith(".gguf?download=true")):
try:
path = repo_id.split("huggingface.co/")[1]
# Extract the repository ID (first two parts of the path)
parts = path.split("/")
if len(parts) >= 2:
extracted_repo_id = f"{parts[0]}/{parts[1]}"
# Extract the filename (last part of the path)
filename = repo_id.split("/")[-1]
if "?download=true" in filename:
filename = filename.replace("?download=true", "")
filename = repo_id.split("/")[-1].replace("?download=true", "")
repo_id = extracted_repo_id
specific_file = filename
except:
pass
except Exception as e:
yield f"Error parsing GGUF URL: {e}"
progress(0.0)
return
if repo_id == "":
yield ("Please enter a model path")
if not repo_id:
yield "Please enter a model path."
progress(0.0)
return
repo_id = repo_id.strip()
specific_file = specific_file.strip()
downloader = importlib.import_module("download-model").ModelDownloader()
progress(0.0)
progress(0.0, "Preparing download...")
model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
yield ("Getting the download links from Hugging Face")
yield "Getting download links from Hugging Face..."
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
if not links:
yield "No files found to download for the given model/criteria."
progress(0.0)
return
# Check for multiple GGUF files
gguf_files = [link for link in links if link.lower().endswith('.gguf')]
if len(gguf_files) > 1 and not specific_file:
output = "Multiple GGUF files found. Please copy one of the following filenames to the 'File name' field:\n\n```\n"
for link in gguf_files:
output += f"{Path(link).name}\n"
output += "```"
yield output
return
@ -260,12 +265,9 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
yield output
return
yield ("Getting the output folder")
yield "Determining output folder..."
output_folder = downloader.get_output_folder(
model,
branch,
is_lora,
is_llamacpp=is_llamacpp,
model, branch, is_lora, is_llamacpp=is_llamacpp,
model_dir=shared.args.model_dir if shared.args.model_dir != shared.args_defaults.model_dir else None
)
@ -275,19 +277,65 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
output_folder = Path(shared.args.lora_dir)
if check:
progress(0.5)
yield ("Checking previously downloaded files")
yield "Checking previously downloaded files..."
progress(0.5, "Verifying files...")
downloader.check_model_files(model, branch, links, sha256, output_folder)
progress(1.0)
else:
yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
progress(1.0, "Verification complete.")
yield "File check complete."
return
yield (f"Model successfully saved to `{output_folder}/`.")
except:
progress(1.0)
yield traceback.format_exc().replace('\n', '\n\n')
yield ""
progress(0.0, "Download starting...")
def downloader_thread_target():
try:
downloader.download_model_files(
model, branch, links, sha256, output_folder,
progress_queue=update_queue,
threads=4,
is_llamacpp=is_llamacpp,
specific_file=specific_file
)
update_queue.put(("COMPLETED", f"Model successfully saved to `{output_folder}/`."))
except Exception as e:
tb_str = traceback.format_exc().replace('\n', '\n\n')
update_queue.put(("ERROR", tb_str))
download_thread = threading.Thread(target=downloader_thread_target)
download_thread.start()
while True:
try:
message = update_queue.get(timeout=0.2)
if not isinstance(message, tuple) or len(message) != 2:
continue
msg_identifier, data = message
if msg_identifier == "COMPLETED":
progress(1.0, "Download complete!")
yield data
break
elif msg_identifier == "ERROR":
progress(0.0, "Error occurred")
yield data
break
elif isinstance(msg_identifier, float):
progress_value = msg_identifier
description_str = data
progress(progress_value, f"Downloading: {description_str}")
except queue.Empty:
if not download_thread.is_alive():
yield "Download process finished."
break
download_thread.join()
except Exception as e:
progress(0.0)
tb_str = traceback.format_exc().replace('\n', '\n\n')
yield tb_str
def update_truncation_length(current_length, state):

View file

@ -22,7 +22,7 @@ def create_ui():
with gr.Column(scale=4):
with gr.Tab('Raw'):
with gr.Row():
shared.gradio['textbox-notebook'] = gr.Textbox(value='', lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
shared.gradio['textbox-notebook'] = gr.Textbox(value=load_prompt(shared.settings['prompt-notebook']), lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
shared.gradio['token-counter-notebook'] = gr.HTML(value="<span>0</span>", elem_id="notebook-token-counter")
with gr.Tab('Markdown'):
@ -56,7 +56,7 @@ def create_ui():
with gr.Column(scale=1):
gr.HTML('<div style="padding-bottom: 13px"></div>')
with gr.Row():
shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-notebook'], label='Prompt', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['prompt_menu-notebook'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, ['refresh-button', 'refresh-button-small'], interactive=not mu)
shared.gradio['save_prompt-notebook'] = gr.Button('💾', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)
shared.gradio['delete_prompt-notebook'] = gr.Button('🗑️', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)

View file

@ -6,19 +6,19 @@ from modules import loaders, presets, shared, ui, ui_chat, utils
from modules.utils import gradio
def create_ui(default_preset):
def create_ui():
mu = shared.args.multi_user
generate_params = presets.load_preset(default_preset)
with gr.Tab("Parameters", elem_id="parameters"):
with gr.Tab("Generation"):
with gr.Row():
with gr.Column():
with gr.Row():
shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=default_preset, label='Preset', elem_classes='slim-dropdown')
shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=shared.settings['preset'], label='Preset', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
shared.gradio['random_preset'] = gr.Button('🎲', elem_classes='refresh-button')
shared.gradio['reset_preset'] = gr.Button('Restore preset', elem_classes='refresh-button', interactive=True)
shared.gradio['neutralize_samplers'] = gr.Button('Neutralize samplers', elem_classes='refresh-button', interactive=True)
with gr.Column():
shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()) if not shared.args.portable else ['llama.cpp'], value="All", elem_classes='slim-dropdown')
@ -28,57 +28,60 @@ def create_ui(default_preset):
with gr.Row():
with gr.Column():
gr.Markdown('## Curve shape')
shared.gradio['temperature'] = gr.Slider(0.01, 5, value=generate_params['temperature'], step=0.01, label='temperature')
shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature'])
shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature'])
shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature'])
shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=generate_params['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
shared.gradio['temperature'] = gr.Slider(0.01, 5, value=shared.settings['temperature'], step=0.01, label='temperature')
shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_low'], step=0.01, label='dynatemp_low', visible=shared.settings['dynamic_temperature'])
shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_high'], step=0.01, label='dynatemp_high', visible=shared.settings['dynamic_temperature'])
shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=shared.settings['dynamic_temperature'])
shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=shared.settings['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=shared.settings['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label='dynamic_temperature')
gr.Markdown('## Curve cutoff')
shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p')
shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=generate_params['top_n_sigma'], step=0.01, label='top_n_sigma')
shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p')
shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k')
shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p')
shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=generate_params['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=generate_params['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff')
shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs')
shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a')
shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=shared.settings['min_p'], step=0.01, label='min_p')
shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=shared.settings['top_n_sigma'], step=0.01, label='top_n_sigma')
shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=shared.settings['top_p'], step=0.01, label='top_p')
shared.gradio['top_k'] = gr.Slider(0, 200, value=shared.settings['top_k'], step=1, label='top_k')
shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=shared.settings['typical_p'], step=0.01, label='typical_p')
shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=shared.settings['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=shared.settings['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=shared.settings['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=shared.settings['eta_cutoff'], step=0.01, label='eta_cutoff')
shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=shared.settings['tfs'], step=0.01, label='tfs')
shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=shared.settings['top_a'], step=0.01, label='top_a')
gr.Markdown('## Repetition suppression')
shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=generate_params['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty')
shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=generate_params['frequency_penalty'], step=0.05, label='frequency_penalty')
shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty')
shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size')
shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range')
shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=shared.settings['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=shared.settings['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
shared.gradio['dry_base'] = gr.Slider(1, 4, value=shared.settings['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=shared.settings['repetition_penalty'], step=0.01, label='repetition_penalty')
shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=shared.settings['frequency_penalty'], step=0.05, label='frequency_penalty')
shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=shared.settings['presence_penalty'], step=0.05, label='presence_penalty')
shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=shared.settings['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=shared.settings['no_repeat_ngram_size'], label='no_repeat_ngram_size')
shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=shared.settings['repetition_penalty_range'], label='repetition_penalty_range')
with gr.Column():
gr.Markdown('## Alternative sampling methods')
shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau')
shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta')
shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=shared.settings['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=shared.settings['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=shared.settings['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=shared.settings['mirostat_tau'], label='mirostat_tau')
shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=shared.settings['mirostat_eta'], label='mirostat_eta')
gr.Markdown('## Other options')
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
shared.gradio['do_sample'] = gr.Checkbox(value=shared.settings['do_sample'], label='do_sample')
shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
shared.gradio['sampler_priority'] = gr.Textbox(value=shared.settings['sampler_priority'], lines=10, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
with gr.Column():
with gr.Row():
with gr.Column():
shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature')
shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
with gr.Blocks():
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
@ -91,18 +94,16 @@ def create_ui(default_preset):
shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length.')
shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])
shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
with gr.Row() as shared.gradio['grammar_file_row']:
shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)
shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
shared.gradio['grammar_string'] = gr.Textbox(value='', label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
shared.gradio['grammar_string'] = gr.Textbox(value=shared.settings['grammar_string'], label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
ui_chat.create_chat_settings_ui()
@ -113,9 +114,13 @@ def create_event_handlers():
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
shared.gradio['random_preset'].click(
shared.gradio['reset_preset'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
presets.reset_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
shared.gradio['neutralize_samplers'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
presets.neutralize_samplers_for_ui, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'), show_progress=False)
shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'), show_progress=False)

View file

@ -1,7 +1,6 @@
import gradio as gr
from modules import shared, ui, utils
from modules.github import clone_or_pull_repository
from modules.utils import gradio
@ -10,11 +9,14 @@ def create_ui():
with gr.Tab("Session", elem_id="session-tab"):
with gr.Row():
with gr.Column():
shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
with gr.Row():
shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡')
shared.gradio['save_settings'] = gr.Button('Save UI defaults to user_data/settings.yaml', interactive=not mu)
gr.Markdown("## Settings")
shared.gradio['save_settings'] = gr.Button('Save settings to user_data/settings.yaml', elem_classes='refresh-button', interactive=not mu)
shared.gradio['toggle_dark_mode'] = gr.Button('Toggle light/dark theme 💡', elem_classes='refresh-button')
shared.gradio['paste_to_attachment'] = gr.Checkbox(label='Turn long pasted text into attachments in the Chat tab', value=shared.settings['paste_to_attachment'], elem_id='paste_to_attachment')
with gr.Column():
gr.Markdown("## Extensions & flags")
shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
with gr.Row():
with gr.Column():
shared.gradio['extensions_menu'] = gr.CheckboxGroup(choices=utils.get_available_extensions(), value=shared.args.extensions, label="Available extensions", info='Note that some of these extensions may require manually installing Python requirements through the command: pip install -r extensions/extension_name/requirements.txt', elem_classes='checkboxgroup-table')
@ -22,30 +24,20 @@ def create_ui():
with gr.Column():
shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=get_boolean_arguments(), value=get_boolean_arguments(active=True), label="Boolean command-line flags", elem_classes='checkboxgroup-table')
with gr.Column():
if not shared.args.portable:
extension_name = gr.Textbox(lines=1, label='Install or update an extension', info='Enter the GitHub URL below and press Enter. For a list of extensions, see: https://github.com/oobabooga/text-generation-webui-extensions ⚠️ WARNING ⚠️ : extensions can execute arbitrary code. Make sure to inspect their source code before activating them.', interactive=not mu)
extension_status = gr.Markdown()
else:
pass
shared.gradio['theme_state'] = gr.Textbox(visible=False, value='dark' if shared.settings['dark_theme'] else 'light')
if not shared.args.portable:
extension_name.submit(clone_or_pull_repository, extension_name, extension_status, show_progress=False)
shared.gradio['save_settings'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
shared.gradio['toggle_dark_mode'].click(
lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode(); localStorage.setItem("theme", document.body.classList.contains("dark") ? "dark" : "light")}}')
# Reset interface event
shared.gradio['reset_interface'].click(
set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then(
None, None, None, js='() => {document.body.innerHTML=\'<h1 style="font-family:monospace;padding-top:20%;margin:0;height:100vh;color:lightgray;text-align:center;background:var(--body-background-fill)">Reloading...</h1>\'; setTimeout(function(){location.reload()},2500); return []}')
shared.gradio['toggle_dark_mode'].click(
lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode()}}')
shared.gradio['save_settings'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
def handle_save_settings(state, preset, extensions, show_controls, theme):
contents = ui.save_settings(state, preset, extensions, show_controls, theme)

View file

@ -3,7 +3,7 @@ import re
from datetime import datetime
from pathlib import Path
from modules import github, shared
from modules import shared
from modules.logging_colors import logger
@ -182,7 +182,6 @@ def get_available_instruction_templates():
def get_available_extensions():
extensions = sorted(set(map(lambda x: x.parts[1], Path('extensions').glob('*/script.py'))), key=natural_keys)
extensions = [v for v in extensions if v not in github.new_extensions]
return extensions

View file

@ -3,8 +3,6 @@ from concurrent.futures import as_completed
from datetime import datetime
import requests
from bs4 import BeautifulSoup
from duckduckgo_search import DDGS
from modules.logging_colors import logger
@ -14,35 +12,39 @@ def get_current_timestamp():
return datetime.now().strftime('%b %d, %Y %H:%M')
def download_web_page(url, timeout=5):
"""Download and extract text from a web page"""
def download_web_page(url, timeout=10):
"""
Download a web page and convert its HTML content to structured Markdown text.
"""
import html2text
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=timeout)
response.raise_for_status()
response.raise_for_status() # Raise an exception for bad status codes
soup = BeautifulSoup(response.content, 'html.parser')
# Initialize the HTML to Markdown converter
h = html2text.HTML2Text()
h.body_width = 0
# Remove script and style elements
for script in soup(["script", "style"]):
script.decompose()
# Convert the HTML to Markdown
markdown_text = h.handle(response.text)
# Get text and clean it up
text = soup.get_text()
lines = (line.strip() for line in text.splitlines())
chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
text = ' '.join(chunk for chunk in chunks if chunk)
return text
except Exception as e:
return markdown_text
except requests.exceptions.RequestException as e:
logger.error(f"Error downloading {url}: {e}")
return f"[Error downloading content from {url}: {str(e)}]"
return ""
except Exception as e:
logger.error(f"An unexpected error occurred: {e}")
return ""
def perform_web_search(query, num_pages=3, max_workers=5):
"""Perform web search and return results with content"""
from duckduckgo_search import DDGS
try:
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=num_pages))
@ -74,9 +76,7 @@ def perform_web_search(query, num_pages=3, max_workers=5):
'url': url,
'content': content
}
except Exception as e:
logger.error(f"Error downloading {url}: {e}")
# Include failed downloads with empty content
except Exception:
search_results[index] = {
'title': title,
'url': url,
@ -107,6 +107,13 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
logger.warning("No search results found")
return
# Filter out failed downloads before adding attachments
successful_results = [result for result in search_results if result['content'].strip()]
if not successful_results:
logger.warning("No successful downloads to add as attachments")
return
# Add search results as attachments
key = f"user_{row_idx}"
if key not in history['metadata']:
@ -114,7 +121,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
if "attachments" not in history['metadata'][key]:
history['metadata'][key]["attachments"] = []
for result in search_results:
for result in successful_results:
attachment = {
"name": result['title'],
"type": "text/html",
@ -123,7 +130,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
}
history['metadata'][key]["attachments"].append(attachment)
logger.info(f"Added {len(search_results)} web search results as attachments")
logger.info(f"Added {len(successful_results)} successful web search results as attachments.")
except Exception as e:
logger.error(f"Error in web search: {e}")

View file

@ -17,8 +17,6 @@ import sys
# Define the required versions
TORCH_VERSION = "2.6.0"
TORCHVISION_VERSION = "0.21.0"
TORCHAUDIO_VERSION = "2.6.0"
PYTHON_VERSION = "3.11"
LIBSTDCXX_VERSION_LINUX = "12.1.0"
@ -70,12 +68,8 @@ def is_installed():
def cpu_has_avx2():
try:
import cpuinfo
info = cpuinfo.get_cpu_info()
if 'avx2' in info['flags']:
return True
else:
return False
return 'avx2' in info['flags']
except:
return True
@ -83,30 +77,119 @@ def cpu_has_avx2():
def cpu_has_amx():
try:
import cpuinfo
info = cpuinfo.get_cpu_info()
if 'amx' in info['flags']:
return True
else:
return False
return 'amx' in info['flags']
except:
return True
def torch_version():
site_packages_path = None
for sitedir in site.getsitepackages():
if "site-packages" in sitedir and conda_env_path in sitedir:
site_packages_path = sitedir
break
def load_state():
"""Load installer state from JSON file"""
if os.path.exists(state_file):
try:
with open(state_file, 'r') as f:
return json.load(f)
except:
return {}
return {}
if site_packages_path:
torch_version_file = open(os.path.join(site_packages_path, 'torch', 'version.py')).read().splitlines()
torver = [line for line in torch_version_file if line.startswith('__version__')][0].split('__version__ = ')[1].strip("'")
def save_state(state):
"""Save installer state to JSON file"""
with open(state_file, 'w') as f:
json.dump(state, f)
def get_gpu_choice():
"""Get GPU choice from state file or ask user"""
state = load_state()
gpu_choice = state.get('gpu_choice')
if not gpu_choice:
if "GPU_CHOICE" in os.environ:
choice = os.environ["GPU_CHOICE"].upper()
print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
else:
choice = get_user_choice(
"What is your GPU?",
{
'A': 'NVIDIA - CUDA 12.4',
'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
'C': 'Apple M Series',
'D': 'Intel Arc (beta)',
'E': 'NVIDIA - CUDA 12.8',
'N': 'CPU mode'
},
)
# Convert choice to GPU name
gpu_choice = {"A": "NVIDIA", "B": "AMD", "C": "APPLE", "D": "INTEL", "E": "NVIDIA_CUDA128", "N": "NONE"}[choice]
# Save choice to state
state['gpu_choice'] = gpu_choice
save_state(state)
return gpu_choice
def get_pytorch_install_command(gpu_choice):
"""Get PyTorch installation command based on GPU choice"""
base_cmd = f"python -m pip install torch=={TORCH_VERSION} "
if gpu_choice == "NVIDIA":
return base_cmd + "--index-url https://download.pytorch.org/whl/cu124"
elif gpu_choice == "NVIDIA_CUDA128":
return "python -m pip install torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
elif gpu_choice == "AMD":
return base_cmd + "--index-url https://download.pytorch.org/whl/rocm6.2.4"
elif gpu_choice in ["APPLE", "NONE"]:
return base_cmd + "--index-url https://download.pytorch.org/whl/cpu"
elif gpu_choice == "INTEL":
if is_linux():
return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
else:
return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
else:
from torch import __version__ as torver
return base_cmd
return torver
def get_pytorch_update_command(gpu_choice):
"""Get PyTorch update command based on GPU choice"""
base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} "
if gpu_choice == "NVIDIA":
return f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
elif gpu_choice == "NVIDIA_CUDA128":
return "python -m pip install --upgrade torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
elif gpu_choice == "AMD":
return f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
elif gpu_choice in ["APPLE", "NONE"]:
return f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
elif gpu_choice == "INTEL":
intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
return f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
else:
return base_cmd
def get_requirements_file(gpu_choice):
"""Get requirements file path based on GPU choice"""
requirements_base = os.path.join("requirements", "full")
if gpu_choice == "AMD":
file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt"
elif gpu_choice == "APPLE":
file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt"
elif gpu_choice in ["INTEL", "NONE"]:
file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
elif gpu_choice == "NVIDIA":
file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
elif gpu_choice == "NVIDIA_CUDA128":
file_name = f"requirements_cuda128{'_noavx2' if not cpu_has_avx2() else ''}.txt"
else:
raise ValueError(f"Unknown GPU choice: {gpu_choice}")
return os.path.join(requirements_base, file_name)
def get_current_commit():
@ -209,28 +292,8 @@ def get_user_choice(question, options_dict):
def update_pytorch_and_python():
print_big_message("Checking for PyTorch updates.")
# Update the Python version. Left here for future reference in case this becomes necessary.
# print_big_message("Checking for PyTorch and Python updates.")
# current_python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
# if current_python_version != PYTHON_VERSION:
# run_cmd(f"conda install -y python={PYTHON_VERSION}", assert_success=True, environment=True)
torver = torch_version()
base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
if "+cu" in torver:
install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
elif "+rocm" in torver:
install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
elif "+cpu" in torver:
install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
elif "+cxx11" in torver:
intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
install_cmd = f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
else:
install_cmd = base_cmd
gpu_choice = get_gpu_choice()
install_cmd = get_pytorch_update_command(gpu_choice)
run_cmd(install_cmd, assert_success=True, environment=True)
@ -256,43 +319,11 @@ def install_webui():
if os.path.isfile(state_file):
os.remove(state_file)
# Ask the user for the GPU vendor
if "GPU_CHOICE" in os.environ:
choice = os.environ["GPU_CHOICE"].upper()
print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
# Warn about changed meanings and handle old choices
if choice == "B":
print_big_message("Warning: GPU_CHOICE='B' now means 'AMD' in the new version.")
elif choice == "C":
print_big_message("Warning: GPU_CHOICE='C' now means 'Apple M Series' in the new version.")
elif choice == "D":
print_big_message("Warning: GPU_CHOICE='D' now means 'Intel Arc' in the new version.")
else:
choice = get_user_choice(
"What is your GPU?",
{
'A': 'NVIDIA - CUDA 12.4',
'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
'C': 'Apple M Series',
'D': 'Intel Arc (beta)',
'N': 'CPU mode'
},
)
# Convert choices to GPU names for compatibility
gpu_choice_to_name = {
"A": "NVIDIA",
"B": "AMD",
"C": "APPLE",
"D": "INTEL",
"N": "NONE"
}
selected_gpu = gpu_choice_to_name[choice]
# Get GPU choice and save it to state
gpu_choice = get_gpu_choice()
# Write a flag to CMD_FLAGS.txt for CPU mode
if selected_gpu == "NONE":
if gpu_choice == "NONE":
cmd_flags_path = os.path.join(script_dir, "user_data", "CMD_FLAGS.txt")
with open(cmd_flags_path, 'r+') as cmd_flags_file:
if "--cpu" not in cmd_flags_file.read():
@ -300,34 +331,22 @@ def install_webui():
cmd_flags_file.write("\n--cpu\n")
# Handle CUDA version display
elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA":
elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA":
print("CUDA: 12.4")
elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA_CUDA128":
print("CUDA: 12.8")
# No PyTorch for AMD on Windows (?)
elif is_windows() and selected_gpu == "AMD":
elif is_windows() and gpu_choice == "AMD":
print("PyTorch setup on Windows is not implemented yet. Exiting...")
sys.exit(1)
# Find the Pytorch installation command
install_pytorch = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
if selected_gpu == "NVIDIA":
install_pytorch += "--index-url https://download.pytorch.org/whl/cu124"
elif selected_gpu == "AMD":
install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.2.4"
elif selected_gpu in ["APPLE", "NONE"]:
install_pytorch += "--index-url https://download.pytorch.org/whl/cpu"
elif selected_gpu == "INTEL":
if is_linux():
install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
else:
install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
# Install Git and then Pytorch
print_big_message("Installing PyTorch.")
install_pytorch = get_pytorch_install_command(gpu_choice)
run_cmd(f"conda install -y ninja git && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True)
if selected_gpu == "INTEL":
if gpu_choice == "INTEL":
# Install oneAPI dependencies via conda
print_big_message("Installing Intel oneAPI runtime libraries.")
run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0", environment=True)
@ -349,31 +368,15 @@ def update_requirements(initial_installation=False, pull=True):
assert_success=True
)
torver = torch_version()
requirements_base = os.path.join("requirements", "full")
if "+rocm" in torver:
file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt"
elif "+cpu" in torver or "+cxx11" in torver:
file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
elif is_macos():
file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt"
else:
file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
requirements_file = os.path.join(requirements_base, file_name)
# Load state from JSON file
current_commit = get_current_commit()
wheels_changed = False
if os.path.exists(state_file):
with open(state_file, 'r') as f:
last_state = json.load(f)
if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit:
wheels_changed = not os.path.exists(state_file)
if not wheels_changed:
state = load_state()
if 'wheels_changed' in state or state.get('last_installed_commit') != current_commit:
wheels_changed = True
else:
wheels_changed = True
gpu_choice = get_gpu_choice()
requirements_file = get_requirements_file(gpu_choice)
if pull:
# Read .whl lines before pulling
@ -409,19 +412,17 @@ def update_requirements(initial_installation=False, pull=True):
print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.")
# Save state before exiting
current_state = {}
state = load_state()
if wheels_changed:
current_state['wheels_changed'] = True
with open(state_file, 'w') as f:
json.dump(current_state, f)
state['wheels_changed'] = True
save_state(state)
sys.exit(1)
# Save current state
current_state = {'last_installed_commit': current_commit}
with open(state_file, 'w') as f:
json.dump(current_state, f)
state = load_state()
state['last_installed_commit'] = current_commit
state.pop('wheels_changed', None) # Remove wheels_changed flag
save_state(state)
if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
install_extensions_requirements()
@ -432,11 +433,10 @@ def update_requirements(initial_installation=False, pull=True):
# Update PyTorch
if not initial_installation:
update_pytorch_and_python()
torver = torch_version()
clean_outdated_pytorch_cuda_dependencies()
print_big_message(f"Installing webui requirements from file: {requirements_file}")
print(f"TORCH: {torver}\n")
print(f"GPU Choice: {gpu_choice}\n")
# Prepare the requirements file
textgen_requirements = open(requirements_file).read().splitlines()

View file

@ -1,5 +1,4 @@
accelerate==1.5.*
beautifulsoup4==4.13.4
bitsandbytes==0.45.*
colorama
datasets
@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@ -16,6 +16,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -33,12 +34,12 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

View file

@ -1,11 +1,11 @@
accelerate==1.5.*
beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -32,7 +33,7 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View file

@ -1,11 +1,11 @@
accelerate==1.5.*
beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -32,7 +33,7 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"

View file

@ -1,11 +1,11 @@
accelerate==1.5.*
beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -32,7 +33,7 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl

View file

@ -1,11 +1,11 @@
accelerate==1.5.*
beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -32,8 +33,8 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl

View file

@ -1,11 +1,11 @@
accelerate==1.5.*
beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -32,5 +33,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"

View file

@ -1,11 +1,11 @@
accelerate==1.5.*
beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -32,5 +33,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"

View file

@ -0,0 +1,45 @@
accelerate==1.5.*
bitsandbytes==0.45.*
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.50.*
tqdm
wandb
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

View file

@ -0,0 +1,45 @@
accelerate==1.5.*
bitsandbytes==0.45.*
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==2.2.*
pandas
peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
safetensors==0.5.*
scipy
sentencepiece
tensorboard
transformers==4.50.*
tqdm
wandb
# API
flask_cloudflared==0.0.14
sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

View file

@ -1,5 +1,4 @@
accelerate==1.5.*
beautifulsoup4==4.13.4
bitsandbytes==0.45.*
colorama
datasets
@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@ -16,6 +16,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -33,12 +34,12 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"

View file

@ -1,11 +1,11 @@
accelerate==1.5.*
beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich

View file

@ -1,12 +1,13 @@
beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -1,12 +1,13 @@
beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"

View file

@ -1,12 +1,13 @@
beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -18,6 +19,6 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"

View file

@ -1,12 +1,13 @@
beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"

View file

@ -1,12 +1,13 @@
beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"

View file

@ -1,12 +1,13 @@
beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -1,12 +1,13 @@
beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich

View file

@ -1,12 +1,13 @@
beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -1,12 +1,13 @@
beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
python-docx==1.1.2
pyyaml
requests
rich
@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"

View file

@ -1,12 +1,24 @@
import os
import shutil
import warnings
from pathlib import Path
from modules import shared
from modules.block_requests import OpenMonkeyPatch, RequestBlocker
from modules.logging_colors import logger
os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
os.environ['BITSANDBYTES_NOWELCOME'] = '1'
# Set up Gradio temp directory path
gradio_temp_path = Path('user_data') / 'cache' / 'gradio'
shutil.rmtree(gradio_temp_path, ignore_errors=True)
gradio_temp_path.mkdir(parents=True, exist_ok=True)
# Set environment variables
os.environ.update({
'GRADIO_ANALYTICS_ENABLED': 'False',
'BITSANDBYTES_NOWELCOME': '1',
'GRADIO_TEMP_DIR': str(gradio_temp_path)
})
warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
warnings.filterwarnings('ignore', category=UserWarning, message='Using the update method is deprecated')
warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_name" has conflict')
@ -27,7 +39,6 @@ import signal
import sys
import time
from functools import partial
from pathlib import Path
from threading import Lock, Thread
import yaml
@ -45,6 +56,7 @@ from modules import (
ui_session,
utils
)
from modules.chat import generate_pfp_cache
from modules.extensions import apply_extensions
from modules.LoRA import add_lora_to_model
from modules.models import load_model, unload_model_if_idle
@ -60,6 +72,14 @@ from modules.utils import gradio
def signal_handler(sig, frame):
logger.info("Received Ctrl+C. Shutting down Text generation web UI gracefully.")
# Explicitly stop LlamaServer to avoid __del__ cleanup issues during shutdown
if shared.model and shared.model.__class__.__name__ == 'LlamaServer':
try:
shared.model.stop()
except:
pass
sys.exit(0)
@ -85,17 +105,20 @@ def create_interface():
# Force some events to be triggered on page load
shared.persistent_interface_state.update({
'mode': shared.settings['mode'],
'loader': shared.args.loader or 'llama.cpp',
'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(),
'character_menu': shared.args.character or shared.settings['character'],
'instruction_template_str': shared.settings['instruction_template_str'],
'prompt_menu-default': shared.settings['prompt-default'],
'prompt_menu-notebook': shared.settings['prompt-notebook'],
'filter_by_loader': (shared.args.loader or 'All') if not shared.args.portable else 'llama.cpp'
})
if Path("user_data/cache/pfp_character.png").exists():
Path("user_data/cache/pfp_character.png").unlink()
# Clear existing cache files
for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
cache_path = Path(f"user_data/cache/{cache_file}")
if cache_path.exists():
cache_path.unlink()
# Regenerate for default character
if shared.settings['mode'] != 'instruct':
generate_pfp_cache(shared.settings['character'])
# css/js strings
css = ui.css
@ -126,7 +149,7 @@ def create_interface():
ui_default.create_ui()
ui_notebook.create_ui()
ui_parameters.create_ui(shared.settings['preset']) # Parameters tab
ui_parameters.create_ui() # Parameters tab
ui_model_menu.create_ui() # Model tab
if not shared.args.portable:
training.create_ui() # Training tab
@ -142,17 +165,35 @@ def create_interface():
ui_parameters.create_event_handlers()
ui_model_menu.create_event_handlers()
# UI persistence events
ui.setup_auto_save()
# Interface launch events
shared.gradio['interface'].load(
None,
gradio('show_controls'),
None,
js=f"""(x) => {{
if ({str(shared.settings['dark_theme']).lower()}) {{
document.getElementsByTagName('body')[0].classList.add('dark');
}}
else {{
document.getElementsByTagName('body')[0].classList.remove('dark');
// Check if this is first visit or if localStorage is out of sync
const savedTheme = localStorage.getItem('theme');
const serverTheme = {str(shared.settings['dark_theme']).lower()} ? 'dark' : 'light';
// If no saved theme or mismatch with server on first load, use server setting
if (!savedTheme || !sessionStorage.getItem('theme_synced')) {{
localStorage.setItem('theme', serverTheme);
sessionStorage.setItem('theme_synced', 'true');
if (serverTheme === 'dark') {{
document.getElementsByTagName('body')[0].classList.add('dark');
}} else {{
document.getElementsByTagName('body')[0].classList.remove('dark');
}}
}} else {{
// Use localStorage for subsequent reloads
if (savedTheme === 'dark') {{
document.getElementsByTagName('body')[0].classList.add('dark');
}} else {{
document.getElementsByTagName('body')[0].classList.remove('dark');
}}
}}
{js}
{ui.show_controls_js}
@ -208,13 +249,7 @@ if __name__ == "__main__":
shared.model_config['.*'] = get_fallback_settings()
shared.model_config.move_to_end('.*', last=False) # Move to the beginning
# Activate the extensions listed on settings.yaml
extensions_module.available_extensions = utils.get_available_extensions()
for extension in shared.settings['default_extensions']:
shared.args.extensions = shared.args.extensions or []
if extension not in shared.args.extensions:
shared.args.extensions.append(extension)
available_models = utils.get_available_models()
# Model defined through --model
@ -277,8 +312,8 @@ if __name__ == "__main__":
if shared.args.nowebui:
# Start the API in standalone mode
shared.args.extensions = [x for x in shared.args.extensions if x != 'gallery']
if shared.args.extensions is not None and len(shared.args.extensions) > 0:
shared.args.extensions = [x for x in (shared.args.extensions or []) if x != 'gallery']
if shared.args.extensions:
extensions_module.load_extensions()
else:
# Launch the web UI

View file

@ -1,3 +0,0 @@
do_sample: false
top_k: 4
penalty_alpha: 0.3

View file

@ -1 +0,0 @@
temperature: 1

View file

@ -0,0 +1,3 @@
temperature: 0.7
top_p: 0.8
top_k: 20

View file

@ -0,0 +1,3 @@
temperature: 0.6
top_p: 0.95
top_k: 20

View file

@ -1,77 +0,0 @@
show_controls: true
start_with: ''
mode: instruct
chat_style: cai-chat
chat-instruct_command: |-
Continue the chat dialogue below. Write a single reply for the character "<|character|>".
<|prompt|>
prompt-default: QA
prompt-notebook: QA
character: Assistant
name1: You
user_bio: ''
custom_system_message: ''
preset: min_p
max_new_tokens: 512
max_new_tokens_min: 1
max_new_tokens_max: 4096
prompt_lookup_num_tokens: 0
max_tokens_second: 0
max_updates_second: 12
auto_max_new_tokens: true
ban_eos_token: false
add_bos_token: true
enable_thinking: true
skip_special_tokens: true
stream: true
static_cache: false
truncation_length: 8192
seed: -1
custom_stopping_strings: ''
custom_token_bans: ''
negative_prompt: ''
dark_theme: true
default_extensions: []
instruction_template_str: |-
{%- set ns = namespace(found=false) -%}
{%- for message in messages -%}
{%- if message['role'] == 'system' -%}
{%- set ns.found = true -%}
{%- endif -%}
{%- endfor -%}
{%- if not ns.found -%}
{{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\n\n' -}}
{%- endif %}
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{{- '' + message['content'] + '\n\n' -}}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{-'### Instruction:\n' + message['content'] + '\n\n'-}}
{%- else -%}
{{-'### Response:\n' + message['content'] + '\n\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}
{%- if add_generation_prompt -%}
{{-'### Response:\n'-}}
{%- endif -%}
chat_template_str: |-
{%- for message in messages %}
{%- if message['role'] == 'system' -%}
{%- if message['content'] -%}
{{- message['content'] + '\n\n' -}}
{%- endif -%}
{%- if user_bio -%}
{{- user_bio + '\n\n' -}}
{%- endif -%}
{%- else -%}
{%- if message['role'] == 'user' -%}
{{- name1 + ': ' + message['content'] + '\n'-}}
{%- else -%}
{{- name2 + ': ' + message['content'] + '\n' -}}
{%- endif -%}
{%- endif -%}
{%- endfor -%}