"
- for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
+ for path in [Path(f"user_data/characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
if path.exists():
image_html = f'
'
break
diff --git a/extensions/openai/script.py b/extensions/openai/script.py
index f907cdbb..c2dc337b 100644
--- a/extensions/openai/script.py
+++ b/extensions/openai/script.py
@@ -86,6 +86,20 @@ app.add_middleware(
)
+@app.middleware("http")
+async def validate_host_header(request: Request, call_next):
+ # Be strict about only approving access to localhost by default
+ if not (shared.args.listen or shared.args.public_api):
+ host = request.headers.get("host", "").split(":")[0]
+ if host not in ["localhost", "127.0.0.1"]:
+ return JSONResponse(
+ status_code=400,
+ content={"detail": "Invalid host header"}
+ )
+
+ return await call_next(request)
+
+
@app.options("/", dependencies=check_key)
async def options_route():
return JSONResponse(content="OK")
@@ -236,6 +250,11 @@ async def handle_moderations(request: Request):
return JSONResponse(response)
+@app.get("/v1/internal/health", dependencies=check_key)
+async def handle_health_check():
+ return JSONResponse(content={"status": "ok"})
+
+
@app.post("/v1/internal/encode", response_model=EncodeResponse, dependencies=check_key)
async def handle_token_encode(request_data: EncodeRequest):
response = token_encode(request_data.text)
diff --git a/extensions/openai/typing.py b/extensions/openai/typing.py
index ea688897..4d6018f9 100644
--- a/extensions/openai/typing.py
+++ b/extensions/openai/typing.py
@@ -6,7 +6,7 @@ from pydantic import BaseModel, Field
class GenerationOptions(BaseModel):
- preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.")
+ preset: str | None = Field(default=None, description="The name of a file under text-generation-webui/user_data/presets (without the .yaml extension). The sampling parameters that get overwritten by this option are the keys in the default_preset() function in modules/presets.py.")
dynatemp_low: float = 1
dynatemp_high: float = 1
dynatemp_exponent: float = 1
@@ -103,10 +103,10 @@ class ChatCompletionRequestParams(BaseModel):
mode: str = Field(default='instruct', description="Valid options: instruct, chat, chat-instruct.")
- instruction_template: str | None = Field(default=None, description="An instruction template defined under text-generation-webui/instruction-templates. If not set, the correct template will be automatically obtained from the model metadata.")
+ instruction_template: str | None = Field(default=None, description="An instruction template defined under text-generation-webui/user_data/instruction-templates. If not set, the correct template will be automatically obtained from the model metadata.")
instruction_template_str: str | None = Field(default=None, description="A Jinja2 instruction template. If set, will take precedence over everything else.")
- character: str | None = Field(default=None, description="A character defined under text-generation-webui/characters. If not set, the default \"Assistant\" character will be used.")
+ character: str | None = Field(default=None, description="A character defined under text-generation-webui/user_data/characters. If not set, the default \"Assistant\" character will be used.")
bot_name: str | None = Field(default=None, description="Overwrites the value set by character field.", alias="name2")
context: str | None = Field(default=None, description="Overwrites the value set by character field.")
greeting: str | None = Field(default=None, description="Overwrites the value set by character field.")
diff --git a/extensions/superboogav2/chromadb.py b/extensions/superboogav2/chromadb.py
index c9e450e4..6e93dd92 100644
--- a/extensions/superboogav2/chromadb.py
+++ b/extensions/superboogav2/chromadb.py
@@ -148,7 +148,7 @@ class ChromaCollector():
id_ = new_ids[i]
metadata = metadatas[i] if metadatas is not None else None
embedding = self.embeddings_cache.get(text)
- if embedding is not None and embedding.any():
+ if embedding is not None and any(embedding):
existing_texts.append(text)
existing_embeddings.append(embedding)
existing_ids.append(id_)
diff --git a/js/global_scope_js.js b/js/global_scope_js.js
index f308edb9..e808c473 100644
--- a/js/global_scope_js.js
+++ b/js/global_scope_js.js
@@ -31,24 +31,94 @@ function removeLastClick() {
}
function handleMorphdomUpdate(text) {
+ // Track closed blocks
+ const closedBlocks = new Set();
+ document.querySelectorAll(".thinking-block").forEach(block => {
+ const blockId = block.getAttribute("data-block-id");
+ // If block exists and is not open, add to closed set
+ if (blockId && !block.hasAttribute("open")) {
+ closedBlocks.add(blockId);
+ }
+ });
+
+ // Store scroll positions for any open blocks
+ const scrollPositions = {};
+ document.querySelectorAll(".thinking-block[open]").forEach(block => {
+ const content = block.querySelector(".thinking-content");
+ const blockId = block.getAttribute("data-block-id");
+ if (content && blockId) {
+ const isAtBottom = Math.abs((content.scrollHeight - content.scrollTop) - content.clientHeight) < 5;
+ scrollPositions[blockId] = {
+ position: content.scrollTop,
+ isAtBottom: isAtBottom
+ };
+ }
+ });
+
morphdom(
document.getElementById("chat").parentNode,
"
",
{
onBeforeElUpdated: function(fromEl, toEl) {
+ // Preserve code highlighting
if (fromEl.tagName === "PRE" && fromEl.querySelector("code[data-highlighted]")) {
const fromCode = fromEl.querySelector("code");
const toCode = toEl.querySelector("code");
if (fromCode && toCode && fromCode.textContent === toCode.textContent) {
- // If the
content is the same, preserve the entire element
toEl.className = fromEl.className;
toEl.innerHTML = fromEl.innerHTML;
- return false; // Skip updating the element
+ return false;
+ }
+ }
+
+ // For thinking blocks, respect closed state
+ if (fromEl.classList && fromEl.classList.contains("thinking-block") &&
+ toEl.classList && toEl.classList.contains("thinking-block")) {
+ const blockId = toEl.getAttribute("data-block-id");
+ // If this block was closed by user, keep it closed
+ if (blockId && closedBlocks.has(blockId)) {
+ toEl.removeAttribute("open");
+ }
+ }
+
+ return !fromEl.isEqualNode(toEl);
+ },
+
+ onElUpdated: function(el) {
+ // Restore scroll positions for open thinking blocks
+ if (el.classList && el.classList.contains("thinking-block") && el.hasAttribute("open")) {
+ const blockId = el.getAttribute("data-block-id");
+ const content = el.querySelector(".thinking-content");
+
+ if (content && blockId && scrollPositions[blockId]) {
+ setTimeout(() => {
+ if (scrollPositions[blockId].isAtBottom) {
+ content.scrollTop = content.scrollHeight;
+ } else {
+ content.scrollTop = scrollPositions[blockId].position;
+ }
+ }, 0);
}
}
- return !fromEl.isEqualNode(toEl); // Update only if nodes differ
}
}
);
+
+ // Add toggle listeners for new blocks
+ document.querySelectorAll(".thinking-block").forEach(block => {
+ if (!block._hasToggleListener) {
+ block.addEventListener("toggle", function(e) {
+ if (this.open) {
+ const content = this.querySelector(".thinking-content");
+ if (content) {
+ setTimeout(() => {
+ content.scrollTop = content.scrollHeight;
+ }, 0);
+ }
+ }
+ });
+ block._hasToggleListener = true;
+ }
+ });
}
diff --git a/js/main.js b/js/main.js
index c5c47d04..33b7d6bd 100644
--- a/js/main.js
+++ b/js/main.js
@@ -395,7 +395,7 @@ let bigPictureVisible = false;
function addBigPicture() {
var imgElement = document.createElement("img");
var timestamp = new Date().getTime();
- imgElement.src = "/file/cache/pfp_character.png?time=" + timestamp;
+ imgElement.src = "/file/user_data/cache/pfp_character.png?time=" + timestamp;
imgElement.classList.add("bigProfilePicture");
imgElement.addEventListener("load", function () {
this.style.visibility = "visible";
diff --git a/js/update_big_picture.js b/js/update_big_picture.js
index 4c094776..ec51d63b 100644
--- a/js/update_big_picture.js
+++ b/js/update_big_picture.js
@@ -2,6 +2,6 @@ function updateBigPicture() {
var existingElement = document.querySelector(".bigProfilePicture");
if (existingElement) {
var timestamp = new Date().getTime();
- existingElement.src = "/file/cache/pfp_character.png?time=" + timestamp;
+ existingElement.src = "/file/user_data/cache/pfp_character.png?time=" + timestamp;
}
}
diff --git a/modules/chat.py b/modules/chat.py
index fd949907..e117e6ee 100644
--- a/modules/chat.py
+++ b/modules/chat.py
@@ -417,16 +417,8 @@ def generate_chat_reply(text, state, regenerate=False, _continue=False, loading_
yield history
return
- show_after = html.escape(state.get("show_after")) if state.get("show_after") else None
for history in chatbot_wrapper(text, state, regenerate=regenerate, _continue=_continue, loading_message=loading_message, for_ui=for_ui):
- if show_after:
- after = history["visible"][-1][1].partition(show_after)[2] or "*Is thinking...*"
- yield {
- 'internal': history['internal'],
- 'visible': history['visible'][:-1] + [[history['visible'][-1][0], after]]
- }
- else:
- yield history
+ yield history
def character_is_loaded(state, raise_exception=False):
@@ -533,9 +525,9 @@ def start_new_chat(state):
def get_history_file_path(unique_id, character, mode):
if mode == 'instruct':
- p = Path(f'logs/instruct/{unique_id}.json')
+ p = Path(f'user_data/logs/instruct/{unique_id}.json')
else:
- p = Path(f'logs/chat/{character}/{unique_id}.json')
+ p = Path(f'user_data/logs/chat/{character}/{unique_id}.json')
return p
@@ -571,13 +563,13 @@ def rename_history(old_id, new_id, character, mode):
def get_paths(state):
if state['mode'] == 'instruct':
- return Path('logs/instruct').glob('*.json')
+ return Path('user_data/logs/instruct').glob('*.json')
else:
character = state['character_menu']
# Handle obsolete filenames and paths
- old_p = Path(f'logs/{character}_persistent.json')
- new_p = Path(f'logs/persistent_{character}.json')
+ old_p = Path(f'user_data/logs/{character}_persistent.json')
+ new_p = Path(f'user_data/logs/persistent_{character}.json')
if old_p.exists():
logger.warning(f"Renaming \"{old_p}\" to \"{new_p}\"")
old_p.rename(new_p)
@@ -589,7 +581,7 @@ def get_paths(state):
p.parent.mkdir(exist_ok=True)
new_p.rename(p)
- return Path(f'logs/chat/{character}').glob('*.json')
+ return Path(f'user_data/logs/chat/{character}').glob('*.json')
def find_all_histories(state):
@@ -740,7 +732,7 @@ def generate_pfp_cache(character):
if not cache_folder.exists():
cache_folder.mkdir()
- for path in [Path(f"characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
+ for path in [Path(f"user_data/characters/{character}.{extension}") for extension in ['png', 'jpg', 'jpeg']]:
if path.exists():
original_img = Image.open(path)
original_img.save(Path(f'{cache_folder}/pfp_character.png'), format='PNG')
@@ -760,12 +752,12 @@ def load_character(character, name1, name2):
filepath = None
for extension in ["yml", "yaml", "json"]:
- filepath = Path(f'characters/{character}.{extension}')
+ filepath = Path(f'user_data/characters/{character}.{extension}')
if filepath.exists():
break
if filepath is None or not filepath.exists():
- logger.error(f"Could not find the character \"{character}\" inside characters/. No character has been loaded.")
+ logger.error(f"Could not find the character \"{character}\" inside user_data/characters. No character has been loaded.")
raise ValueError
file_contents = open(filepath, 'r', encoding='utf-8').read()
@@ -804,7 +796,7 @@ def load_instruction_template(template):
if template == 'None':
return ''
- for filepath in [Path(f'instruction-templates/{template}.yaml'), Path('instruction-templates/Alpaca.yaml')]:
+ for filepath in [Path(f'user_data/instruction-templates/{template}.yaml'), Path('user_data/instruction-templates/Alpaca.yaml')]:
if filepath.exists():
break
else:
@@ -846,17 +838,17 @@ def upload_character(file, img, tavern=False):
outfile_name = name
i = 1
- while Path(f'characters/{outfile_name}.yaml').exists():
+ while Path(f'user_data/characters/{outfile_name}.yaml').exists():
outfile_name = f'{name}_{i:03d}'
i += 1
- with open(Path(f'characters/{outfile_name}.yaml'), 'w', encoding='utf-8') as f:
+ with open(Path(f'user_data/characters/{outfile_name}.yaml'), 'w', encoding='utf-8') as f:
f.write(yaml_data)
if img is not None:
- img.save(Path(f'characters/{outfile_name}.png'))
+ img.save(Path(f'user_data/characters/{outfile_name}.png'))
- logger.info(f'New character saved to "characters/{outfile_name}.yaml".')
+ logger.info(f'New character saved to "user_data/characters/{outfile_name}.yaml".')
return gr.update(value=outfile_name, choices=get_available_characters())
@@ -931,9 +923,9 @@ def save_character(name, greeting, context, picture, filename):
return
data = generate_character_yaml(name, greeting, context)
- filepath = Path(f'characters/{filename}.yaml')
+ filepath = Path(f'user_data/characters/{filename}.yaml')
save_file(filepath, data)
- path_to_img = Path(f'characters/{filename}.png')
+ path_to_img = Path(f'user_data/characters/{filename}.png')
if picture is not None:
picture.save(path_to_img)
logger.info(f'Saved {path_to_img}.')
@@ -941,9 +933,9 @@ def save_character(name, greeting, context, picture, filename):
def delete_character(name, instruct=False):
for extension in ["yml", "yaml", "json"]:
- delete_file(Path(f'characters/{name}.{extension}'))
+ delete_file(Path(f'user_data/characters/{name}.{extension}'))
- delete_file(Path(f'characters/{name}.png'))
+ delete_file(Path(f'user_data/characters/{name}.png'))
def jinja_template_from_old_format(params, verbose=False):
@@ -1246,7 +1238,7 @@ def handle_save_template_click(instruction_template_str):
contents = generate_instruction_template_yaml(instruction_template_str)
return [
"My Template.yaml",
- "instruction-templates/",
+ "user_data/instruction-templates/",
contents,
gr.update(visible=True)
]
@@ -1255,7 +1247,7 @@ def handle_save_template_click(instruction_template_str):
def handle_delete_template_click(template):
return [
f"{template}.yaml",
- "instruction-templates/",
+ "user_data/instruction-templates/",
gr.update(visible=False)
]
diff --git a/modules/evaluate.py b/modules/evaluate.py
index ba0de378..4f41c1fc 100644
--- a/modules/evaluate.py
+++ b/modules/evaluate.py
@@ -12,8 +12,8 @@ from modules.text_generation import encode
def load_past_evaluations():
- if Path('logs/evaluations.csv').exists():
- df = pd.read_csv(Path('logs/evaluations.csv'), dtype=str)
+ if Path('user_data/logs/evaluations.csv').exists():
+ df = pd.read_csv(Path('user_data/logs/evaluations.csv'), dtype=str)
df['Perplexity'] = pd.to_numeric(df['Perplexity'])
return df
else:
@@ -26,7 +26,7 @@ past_evaluations = load_past_evaluations()
def save_past_evaluations(df):
global past_evaluations
past_evaluations = df
- filepath = Path('logs/evaluations.csv')
+ filepath = Path('user_data/logs/evaluations.csv')
filepath.parent.mkdir(parents=True, exist_ok=True)
df.to_csv(filepath, index=False)
@@ -69,7 +69,7 @@ def calculate_perplexity(models, input_dataset, stride, _max_length):
data = load_dataset('ptb_text_only', 'penn_treebank', split='test')
text = " ".join(data['sentence'])
else:
- with open(Path(f'training/datasets/{input_dataset}.txt'), 'r', encoding='utf-8') as f:
+ with open(Path(f'user_data/training/datasets/{input_dataset}.txt'), 'r', encoding='utf-8') as f:
text = f.read()
for model in models:
diff --git a/modules/exllamav2.py b/modules/exllamav2.py
index 0289bb21..6bb422ea 100644
--- a/modules/exllamav2.py
+++ b/modules/exllamav2.py
@@ -40,7 +40,7 @@ class Exllamav2Model:
config.model_dir = str(path_to_model)
config.prepare()
- config.max_seq_len = shared.args.max_seq_len
+ config.max_seq_len = shared.args.ctx_size
config.scale_pos_emb = shared.args.compress_pos_emb
config.scale_alpha_value = shared.args.alpha_value
config.no_flash_attn = shared.args.no_flash_attn
@@ -85,7 +85,44 @@ class Exllamav2Model:
model.load_autosplit(cache)
tokenizer = ExLlamaV2Tokenizer(config)
- generator = ExLlamaV2StreamingGenerator(model, cache, tokenizer)
+
+ # Initialize draft model for speculative decoding
+ draft_model = None
+ draft_cache = None
+
+ if shared.args.model_draft and shared.args.model_draft.lower() not in ["none", ""]:
+ logger.info(f"Loading draft model for speculative decoding: {shared.args.model_draft}")
+
+ # Find the draft model path
+ draft_path = Path(shared.args.model_draft)
+ if not draft_path.exists():
+ draft_path = Path(f'{shared.args.model_dir}') / Path(shared.args.model_draft)
+
+ draft_config = ExLlamaV2Config()
+ draft_config.model_dir = str(draft_path)
+ draft_config.prepare()
+ draft_config.arch_compat_overrides()
+
+ # Set context size for draft model
+ if shared.args.ctx_size_draft > 0:
+ draft_config.max_seq_len = shared.args.ctx_size_draft
+ else:
+ draft_config.max_seq_len = config.max_seq_len
+
+ draft_model = ExLlamaV2(draft_config)
+ draft_cache = cache_type(draft_model, lazy=True)
+ draft_model.load_autosplit(draft_cache)
+
+ logger.info(f"Draft model loaded successfully with max_draft={shared.args.draft_max}")
+
+ generator = ExLlamaV2StreamingGenerator(
+ model,
+ cache,
+ tokenizer,
+ draft_model=draft_model,
+ draft_cache=draft_cache,
+ num_speculative_tokens=shared.args.draft_max if draft_model is not None else 0
+ )
result = self()
result.model = model
@@ -93,6 +130,8 @@ class Exllamav2Model:
result.tokenizer = tokenizer
result.generator = generator
result.loras = None
+ result.draft_model = draft_model
+ result.draft_cache = draft_cache
return result, result
def encode(self, string, **kwargs):
@@ -179,6 +218,10 @@ class Exllamav2Model:
else:
max_new_tokens = state['max_new_tokens']
+ # Reset speculative decoding stats if using a draft model
+ if hasattr(self, 'draft_model') and self.draft_model is not None:
+ self.generator.reset_sd_stats()
+
self.generator.begin_stream(ids, settings, loras=self.loras)
decoded_text = ''
@@ -190,6 +233,11 @@ class Exllamav2Model:
decoded_text += chunk
yield decoded_text
+ # Log speculative decoding stats if using draft model
+ if hasattr(self, 'draft_model') and self.draft_model is not None:
+ efficiency, accuracy, total_tokens, total_draft_tokens, accepted_draft_tokens = self.generator.get_sd_stats()
+ logger.info(f"Speculative decoding: accepted={accepted_draft_tokens}/{total_draft_tokens} tokens")
+
def generate(self, prompt, state):
output = ''
for output in self.generate_with_streaming(prompt, state):
diff --git a/modules/exllamav2_hf.py b/modules/exllamav2_hf.py
index b159d9ce..eb801940 100644
--- a/modules/exllamav2_hf.py
+++ b/modules/exllamav2_hf.py
@@ -192,7 +192,7 @@ class Exllamav2HF(PreTrainedModel, GenerationMixin):
config.model_dir = str(pretrained_model_name_or_path)
config.prepare()
- config.max_seq_len = shared.args.max_seq_len
+ config.max_seq_len = shared.args.ctx_size
config.scale_pos_emb = shared.args.compress_pos_emb
config.scale_alpha_value = shared.args.alpha_value
config.no_flash_attn = shared.args.no_flash_attn
diff --git a/modules/exllamav3_hf.py b/modules/exllamav3_hf.py
index 2d9c493a..f15fc0b2 100644
--- a/modules/exllamav3_hf.py
+++ b/modules/exllamav3_hf.py
@@ -5,6 +5,7 @@ from typing import Any, Dict, Optional, Union
import torch
from exllamav3 import Cache, Config, Model
+from exllamav3.cache import CacheLayer_fp16, CacheLayer_quant
from torch.nn import CrossEntropyLoss
from transformers import (
GenerationConfig,
@@ -33,13 +34,39 @@ class Exllamav3HF(PreTrainedModel, GenerationMixin):
self.ex_model = Model.from_config(config)
# Calculate the closest multiple of 256 at or above the chosen value
- max_tokens = shared.args.max_seq_len
+ max_tokens = shared.args.ctx_size
if max_tokens % 256 != 0:
adjusted_tokens = ((max_tokens // 256) + 1) * 256
logger.warning(f"max_num_tokens must be a multiple of 256. Adjusting from {max_tokens} to {adjusted_tokens}")
max_tokens = adjusted_tokens
- self.ex_cache = Cache(self.ex_model, max_num_tokens=max_tokens)
+ # Parse cache type
+ cache_type = shared.args.cache_type.lower()
+ cache_kwargs = {}
+ if cache_type == 'fp16':
+ layer_type = CacheLayer_fp16
+ elif cache_type.startswith('q'):
+ layer_type = CacheLayer_quant
+ if '_' in cache_type:
+ # Different bits for k and v (e.g., q4_q8)
+ k_part, v_part = cache_type.split('_')
+ k_bits = int(k_part[1:])
+ v_bits = int(v_part[1:])
+ else:
+ # Same bits for k and v (e.g., q4)
+ k_bits = v_bits = int(cache_type[1:])
+
+ # Validate bit ranges
+ if not (2 <= k_bits <= 8 and 2 <= v_bits <= 8):
+ logger.warning(f"Invalid quantization bits: k_bits={k_bits}, v_bits={v_bits}. Must be between 2 and 8. Falling back to fp16.")
+ layer_type = CacheLayer_fp16
+ else:
+ cache_kwargs = {'k_bits': k_bits, 'v_bits': v_bits}
+ else:
+ logger.warning(f"Unrecognized cache type: {cache_type}. Falling back to fp16.")
+ layer_type = CacheLayer_fp16
+
+ self.ex_cache = Cache(self.ex_model, max_num_tokens=max_tokens, layer_type=layer_type, **cache_kwargs)
# Create load parameters dictionary
load_params = {'progressbar': True}
diff --git a/modules/gradio_hijack.py b/modules/gradio_hijack.py
index 2ddd983a..817da40c 100644
--- a/modules/gradio_hijack.py
+++ b/modules/gradio_hijack.py
@@ -1,5 +1,6 @@
'''
-Copied from: https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14184
+Most of the code here was adapted from:
+https://github.com/AUTOMATIC1111/stable-diffusion-webui/pull/14184
'''
import inspect
@@ -7,6 +8,30 @@ import warnings
from functools import wraps
import gradio as gr
+import gradio.routes
+import gradio.utils
+from starlette.middleware.trustedhost import TrustedHostMiddleware
+
+from modules import shared
+
+orig_create_app = gradio.routes.App.create_app
+
+
+# Be strict about only approving access to localhost by default
+def create_app_with_trustedhost(*args, **kwargs):
+ app = orig_create_app(*args, **kwargs)
+
+ if not (shared.args.listen or shared.args.share):
+ app.add_middleware(
+ TrustedHostMiddleware,
+ allowed_hosts=["localhost", "127.0.0.1"]
+ )
+
+ return app
+
+
+gradio.routes.App.create_app = create_app_with_trustedhost
+gradio.utils.launch_counter = lambda: None
class GradioDeprecationWarning(DeprecationWarning):
diff --git a/modules/html_generator.py b/modules/html_generator.py
index 144f2593..c5252c26 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -1,3 +1,4 @@
+import datetime
import functools
import html
import os
@@ -106,8 +107,87 @@ def replace_blockquote(m):
return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '')
+def extract_thinking_block(string):
+ """Extract thinking blocks from the beginning of a string."""
+ if not string:
+ return None, string
+
+ THINK_START_TAG = "<think>"
+ THINK_END_TAG = "</think>"
+
+ # Look for opening tag
+ start_pos = string.lstrip().find(THINK_START_TAG)
+ if start_pos == -1:
+ return None, string
+
+ # Adjust start position to account for any leading whitespace
+ start_pos = string.find(THINK_START_TAG)
+
+ # Find the content after the opening tag
+ content_start = start_pos + len(THINK_START_TAG)
+
+ # Look for closing tag
+ end_pos = string.find(THINK_END_TAG, content_start)
+
+ if end_pos != -1:
+ # Both tags found - extract content between them
+ thinking_content = string[content_start:end_pos]
+ remaining_content = string[end_pos + len(THINK_END_TAG):]
+ return thinking_content, remaining_content
+ else:
+ # Only opening tag found - everything else is thinking content
+ thinking_content = string[content_start:]
+ return thinking_content, ""
+
+
@functools.lru_cache(maxsize=None)
-def convert_to_markdown(string):
+def convert_to_markdown(string, message_id=None):
+ if not string:
+ return ""
+
+ # Use a default message ID if none provided
+ if message_id is None:
+ message_id = "unknown"
+
+ # Extract thinking block if present
+ thinking_content, remaining_content = extract_thinking_block(string)
+
+ # Process the main content
+ html_output = process_markdown_content(remaining_content)
+
+ # If thinking content was found, process it using the same function
+ if thinking_content is not None:
+ thinking_html = process_markdown_content(thinking_content)
+
+ # Generate unique ID for the thinking block
+ block_id = f"thinking-{message_id}-0"
+
+ # Check if thinking is complete or still in progress
+ is_streaming = not remaining_content
+ title_text = "Thinking..." if is_streaming else "Thought"
+
+ thinking_block = f'''
+
+
+ {thinking_html}
+
+ '''
+
+ # Prepend the thinking block to the message HTML
+ html_output = thinking_block + html_output
+
+ return html_output
+
+
+def process_markdown_content(string):
+ """Process a string through the markdown conversion pipeline."""
if not string:
return ""
@@ -208,15 +288,15 @@ def convert_to_markdown(string):
return html_output
-def convert_to_markdown_wrapped(string, use_cache=True):
+def convert_to_markdown_wrapped(string, message_id=None, use_cache=True):
'''
Used to avoid caching convert_to_markdown calls during streaming.
'''
if use_cache:
- return convert_to_markdown(string)
+ return convert_to_markdown(string, message_id=message_id)
- return convert_to_markdown.__wrapped__(string)
+ return convert_to_markdown.__wrapped__(string, message_id=message_id)
def generate_basic_html(string):
@@ -272,7 +352,7 @@ def generate_instruct_html(history):
for i in range(len(history['visible'])):
row_visible = history['visible'][i]
row_internal = history['internal'][i]
- converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+ converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
if converted_visible[0]: # Don't display empty user messages
output += (
@@ -307,19 +387,19 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache=
# We use ?character and ?time.time() to force the browser to reset caches
img_bot = (
- f'
'
- if Path("cache/pfp_character_thumb.png").exists() else ''
+ f'
'
+ if Path("user_data/cache/pfp_character_thumb.png").exists() else ''
)
img_me = (
- f'
'
- if Path("cache/pfp_me.png").exists() else ''
+ f'
'
+ if Path("user_data/cache/pfp_me.png").exists() else ''
)
for i in range(len(history['visible'])):
row_visible = history['visible'][i]
row_internal = history['internal'][i]
- converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+ converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
if converted_visible[0]: # Don't display empty user messages
output += (
@@ -359,7 +439,7 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
for i in range(len(history['visible'])):
row_visible = history['visible'][i]
row_internal = history['internal'][i]
- converted_visible = [convert_to_markdown_wrapped(entry, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+ converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
if converted_visible[0]: # Don't display empty user messages
output += (
@@ -389,8 +469,21 @@ def generate_chat_html(history, name1, name2, reset_cache=False):
return output
+def time_greeting():
+ current_hour = datetime.datetime.now().hour
+ if 5 <= current_hour < 12:
+ return "Good morning!"
+ elif 12 <= current_hour < 18:
+ return "Good afternoon!"
+ else:
+ return "Good evening!"
+
+
def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
- if mode == 'instruct':
+ if len(history['visible']) == 0:
+ greeting = f"{time_greeting()} How can I help you today?
"
+ result = f'{greeting}
'
+ elif mode == 'instruct':
result = generate_instruct_html(history)
elif style == 'wpp':
result = generate_chat_html(history, name1, name2)
diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index c88f945d..9572d5aa 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -1,11 +1,13 @@
import json
import os
import pprint
+import re
import socket
import subprocess
import sys
import threading
import time
+from pathlib import Path
import llama_cpp_binaries
import requests
@@ -251,7 +253,7 @@ class LlamaServer:
cmd = [
self.server_path,
"--model", self.model_path,
- "--ctx-size", str(shared.args.n_ctx),
+ "--ctx-size", str(shared.args.ctx_size),
"--n-gpu-layers", str(shared.args.n_gpu_layers),
"--batch-size", str(shared.args.batch_size),
"--port", str(self.port),
@@ -281,6 +283,41 @@ class LlamaServer:
cmd += ["--rope-freq-scale", str(1.0 / shared.args.compress_pos_emb)]
if shared.args.rope_freq_base > 0:
cmd += ["--rope-freq-base", str(shared.args.rope_freq_base)]
+ if shared.args.model_draft not in [None, 'None']:
+ path = Path(shared.args.model_draft)
+ if not path.exists():
+ path = Path(f'{shared.args.model_dir}/{shared.args.model_draft}')
+
+ if path.is_file():
+ model_file = path
+ else:
+ model_file = sorted(Path(f'{shared.args.model_dir}/{shared.args.model_draft}').glob('*.gguf'))[0]
+
+ cmd += ["--model-draft", model_file]
+ if shared.args.draft_max > 0:
+ cmd += ["--draft-max", str(shared.args.draft_max)]
+ if shared.args.gpu_layers_draft > 0:
+ cmd += ["--gpu-layers-draft", str(shared.args.gpu_layers_draft)]
+ if shared.args.device_draft:
+ cmd += ["--device-draft", shared.args.device_draft]
+ if shared.args.ctx_size_draft > 0:
+ cmd += ["--ctx-size-draft", str(shared.args.ctx_size_draft)]
+ if shared.args.streaming_llm:
+ cmd += ["--cache-reuse", "1"]
+ if shared.args.extra_flags:
+ # Clean up the input
+ extra_flags = shared.args.extra_flags.strip()
+ if extra_flags.startswith('"') and extra_flags.endswith('"'):
+ extra_flags = extra_flags[1:-1].strip()
+ elif extra_flags.startswith("'") and extra_flags.endswith("'"):
+ extra_flags = extra_flags[1:-1].strip()
+
+ for flag_item in extra_flags.split(','):
+ if '=' in flag_item:
+ flag, value = flag_item.split('=', 1)
+ cmd += [f"--{flag}", value]
+ else:
+ cmd.append(f"--{flag_item}")
env = os.environ.copy()
if os.name == 'posix':
@@ -299,17 +336,7 @@ class LlamaServer:
env=env
)
- def filter_stderr(process_stderr):
- try:
- for line in iter(process_stderr.readline, ''):
- if not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
- sys.stderr.write(line)
- sys.stderr.flush()
- except (ValueError, IOError):
- # Handle pipe closed exceptions
- pass
-
- threading.Thread(target=filter_stderr, args=(self.process.stderr,), daemon=True).start()
+ threading.Thread(target=filter_stderr_with_progress, args=(self.process.stderr,), daemon=True).start()
# Wait for server to be healthy
health_url = f"http://127.0.0.1:{self.port}/health"
@@ -360,3 +387,18 @@ class LlamaServer:
self.process.kill()
self.process = None
+
+
+def filter_stderr_with_progress(process_stderr):
+ progress_pattern = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
+ try:
+ for line in iter(process_stderr.readline, ''):
+ progress_match = progress_pattern.search(line)
+ if progress_match:
+ sys.stderr.write(line)
+ sys.stderr.flush()
+ elif not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
+ sys.stderr.write(line)
+ sys.stderr.flush()
+ except (ValueError, IOError):
+ pass
diff --git a/modules/loaders.py b/modules/loaders.py
index 7d6afe80..b8ae82d7 100644
--- a/modules/loaders.py
+++ b/modules/loaders.py
@@ -9,9 +9,11 @@ loaders_and_params = OrderedDict({
'threads',
'threads_batch',
'batch_size',
- 'n_ctx',
+ 'ctx_size',
'cache_type',
'tensor_split',
+ 'extra_flags',
+ 'streaming_llm',
'rope_freq_base',
'compress_pos_emb',
'flash_attn',
@@ -20,6 +22,12 @@ loaders_and_params = OrderedDict({
'no_mmap',
'mlock',
'numa',
+ 'model_draft',
+ 'draft_max',
+ 'gpu_layers_draft',
+ 'device_draft',
+ 'ctx_size_draft',
+ 'speculative_decoding_accordion',
],
'Transformers': [
'gpu_split',
@@ -41,14 +49,15 @@ loaders_and_params = OrderedDict({
'no_use_fast',
],
'ExLlamav3_HF': [
- 'max_seq_len',
+ 'ctx_size',
+ 'cache_type',
'gpu_split',
'cfg_cache',
'trust_remote_code',
'no_use_fast',
],
'ExLlamav2_HF': [
- 'max_seq_len',
+ 'ctx_size',
'cache_type',
'gpu_split',
'alpha_value',
@@ -64,7 +73,7 @@ loaders_and_params = OrderedDict({
'no_use_fast',
],
'ExLlamav2': [
- 'max_seq_len',
+ 'ctx_size',
'cache_type',
'gpu_split',
'alpha_value',
@@ -76,6 +85,10 @@ loaders_and_params = OrderedDict({
'no_xformers',
'no_sdpa',
'exllamav2_info',
+ 'model_draft',
+ 'draft_max',
+ 'ctx_size_draft',
+ 'speculative_decoding_accordion',
],
'HQQ': [
'hqq_backend',
@@ -83,7 +96,7 @@ loaders_and_params = OrderedDict({
'no_use_fast',
],
'TensorRT-LLM': [
- 'max_seq_len',
+ 'ctx_size',
'cpp_runner',
'tensorrt_llm_info',
]
diff --git a/modules/models.py b/modules/models.py
index 99b068aa..d0b0402a 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -52,10 +52,8 @@ def load_model(model_name, loader=None):
tokenizer = load_tokenizer(model_name)
shared.settings.update({k: v for k, v in metadata.items() if k in shared.settings})
- if loader.lower().startswith('exllama') or loader.lower().startswith('tensorrt'):
- shared.settings['truncation_length'] = shared.args.max_seq_len
- elif loader == 'llama.cpp':
- shared.settings['truncation_length'] = shared.args.n_ctx
+ if loader.lower().startswith('exllama') or loader.lower().startswith('tensorrt') or loader == 'llama.cpp':
+ shared.settings['truncation_length'] = shared.args.ctx_size
logger.info(f"Loaded \"{model_name}\" in {(time.time()-t0):.2f} seconds.")
logger.info(f"LOADER: \"{loader}\"")
diff --git a/modules/models_settings.py b/modules/models_settings.py
index ee2ed71b..ae589bb3 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -11,8 +11,7 @@ def get_fallback_settings():
return {
'bf16': False,
'use_eager_attention': False,
- 'max_seq_len': 2048,
- 'n_ctx': 2048,
+ 'ctx_size': 2048,
'rope_freq_base': 0,
'compress_pos_emb': 1,
'alpha_value': 1,
@@ -26,7 +25,7 @@ def get_fallback_settings():
def get_model_metadata(model):
model_settings = {}
- # Get settings from models/config.yaml and models/config-user.yaml
+ # Get settings from user_data/models/config.yaml and user_data/models/config-user.yaml
settings = shared.model_config
for pat in settings:
if re.match(pat.lower(), Path(model).name.lower()):
@@ -59,7 +58,7 @@ def get_model_metadata(model):
for k in metadata:
if k.endswith('context_length'):
- model_settings['n_ctx'] = min(metadata[k], 8192)
+ model_settings['ctx_size'] = min(metadata[k], 8192)
model_settings['truncation_length_info'] = metadata[k]
elif k.endswith('rope.freq_base'):
model_settings['rope_freq_base'] = metadata[k]
@@ -97,7 +96,7 @@ def get_model_metadata(model):
if k in metadata:
model_settings['truncation_length'] = metadata[k]
model_settings['truncation_length_info'] = metadata[k]
- model_settings['max_seq_len'] = min(metadata[k], 8192)
+ model_settings['ctx_size'] = min(metadata[k], 8192)
if 'rope_theta' in metadata:
model_settings['rope_freq_base'] = metadata['rope_theta']
@@ -145,7 +144,7 @@ def get_model_metadata(model):
if 'rope_freq_base' in model_settings and model_settings['rope_freq_base'] == 10000:
model_settings.pop('rope_freq_base')
- # Apply user settings from models/config-user.yaml
+ # Apply user settings from user_data/models/config-user.yaml
settings = shared.user_config
for pat in settings:
if re.match(pat.lower(), Path(model).name.lower()):
@@ -224,7 +223,7 @@ def apply_model_settings_to_state(model, state):
def save_model_settings(model, state):
'''
- Save the settings for this model to models/config-user.yaml
+ Save the settings for this model to user_data/models/config-user.yaml
'''
if model == 'None':
yield ("Not saving the settings because no model is selected in the menu.")
diff --git a/modules/one_click_installer_check.py b/modules/one_click_installer_check.py
deleted file mode 100644
index 4bde8600..00000000
--- a/modules/one_click_installer_check.py
+++ /dev/null
@@ -1,9 +0,0 @@
-from pathlib import Path
-
-from modules.logging_colors import logger
-
-if Path('../webui.py').exists():
- logger.warning('\nIt looks like you are running an outdated version of '
- 'the one-click-installers.\n'
- 'Please migrate your installation following the instructions here:\n'
- 'https://github.com/oobabooga/text-generation-webui/wiki/Migrating-an-old-one%E2%80%90click-install')
diff --git a/modules/presets.py b/modules/presets.py
index 7cab2af0..a432bf52 100644
--- a/modules/presets.py
+++ b/modules/presets.py
@@ -58,7 +58,7 @@ def presets_params():
def load_preset(name, verbose=False):
generate_params = default_preset()
if name not in ['None', None, '']:
- path = Path(f'presets/{name}.yaml')
+ path = Path(f'user_data/presets/{name}.yaml')
if path.exists():
with open(path, 'r') as infile:
preset = yaml.safe_load(infile)
diff --git a/modules/prompts.py b/modules/prompts.py
index 565c2450..8f00cac2 100644
--- a/modules/prompts.py
+++ b/modules/prompts.py
@@ -7,7 +7,7 @@ def load_prompt(fname):
if fname in ['None', '']:
return ''
else:
- file_path = Path(f'prompts/{fname}.txt')
+ file_path = Path(f'user_data/prompts/{fname}.txt')
if not file_path.exists():
return ''
diff --git a/modules/shared.py b/modules/shared.py
index 08268ae0..5d9dd362 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -1,6 +1,7 @@
import argparse
import copy
import os
+import shlex
import sys
from collections import OrderedDict
from pathlib import Path
@@ -31,7 +32,7 @@ need_restart = False
settings = {
'show_controls': True,
'start_with': '',
- 'mode': 'chat-instruct',
+ 'mode': 'instruct',
'chat_style': 'cai-chat',
'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
'prompt-default': 'QA',
@@ -57,7 +58,6 @@ settings = {
'seed': -1,
'custom_stopping_strings': '',
'custom_token_bans': '',
- 'show_after': '',
'negative_prompt': '',
'autoload_model': False,
'dark_theme': True,
@@ -77,10 +77,10 @@ group.add_argument('--multi-user', action='store_true', help='Multi-user mode. C
group.add_argument('--character', type=str, help='The name of the character to load in chat mode by default.')
group.add_argument('--model', type=str, help='Name of the model to load by default.')
group.add_argument('--lora', type=str, nargs='+', help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.')
-group.add_argument('--model-dir', type=str, default='models/', help='Path to directory with all the models.')
-group.add_argument('--lora-dir', type=str, default='loras/', help='Path to directory with all the loras.')
+group.add_argument('--model-dir', type=str, default='user_data/models', help='Path to directory with all the models.')
+group.add_argument('--lora-dir', type=str, default='user_data/loras', help='Path to directory with all the loras.')
group.add_argument('--model-menu', action='store_true', help='Show a model menu in the terminal when the web UI is first launched.')
-group.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See settings-template.yaml for an example. If you create a file called settings.yaml, this file will be loaded by default without the need to use the --settings flag.')
+group.add_argument('--settings', type=str, help='Load the default interface settings from this yaml file. See user_data/settings-template.yaml for an example. If you create a file called user_data/settings.yaml, this file will be loaded by default without the need to use the --settings flag.')
group.add_argument('--extensions', type=str, nargs='+', help='The list of extensions to load. If you want to load more than one extension, write the names separated by spaces.')
group.add_argument('--verbose', action='store_true', help='Print the prompts to the terminal.')
group.add_argument('--idle-timeout', type=int, default=0, help='Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again.')
@@ -94,7 +94,7 @@ group = parser.add_argument_group('Transformers/Accelerate')
group.add_argument('--cpu', action='store_true', help='Use the CPU to generate text. Warning: Training on CPU is extremely slow.')
group.add_argument('--cpu-memory', type=float, default=0, help='Maximum CPU memory in GiB. Use this for CPU offloading.')
group.add_argument('--disk', action='store_true', help='If the model is too large for your GPU(s) and CPU combined, send the remaining layers to the disk.')
-group.add_argument('--disk-cache-dir', type=str, default='cache', help='Directory to save the disk cache to. Defaults to "cache".')
+group.add_argument('--disk-cache-dir', type=str, default='user_data/cache', help='Directory to save the disk cache to. Defaults to "user_data/cache".')
group.add_argument('--load-in-8bit', action='store_true', help='Load the model with 8-bit precision (using bitsandbytes).')
group.add_argument('--bf16', action='store_true', help='Load the model with bfloat16 precision. Requires NVIDIA Ampere GPU.')
group.add_argument('--no-cache', action='store_true', help='Set use_cache to False while generating text. This reduces VRAM usage slightly, but it comes at a performance cost.')
@@ -115,10 +115,9 @@ group.add_argument('--quant_type', type=str, default='nf4', help='quant_type for
# llama.cpp
group = parser.add_argument_group('llama.cpp')
group.add_argument('--flash-attn', action='store_true', help='Use flash-attention.')
-group.add_argument('--n_ctx', type=int, default=8192, help='Size of the prompt context.')
group.add_argument('--threads', type=int, default=0, help='Number of threads to use.')
group.add_argument('--threads-batch', type=int, default=0, help='Number of threads to use for batches/prompt processing.')
-group.add_argument('--batch-size', type=int, default=2048, help='Maximum number of prompt tokens to batch together when calling llama_eval.')
+group.add_argument('--batch-size', type=int, default=256, help='Maximum number of prompt tokens to batch together when calling llama_eval.')
group.add_argument('--no-mmap', action='store_true', help='Prevent mmap from being used.')
group.add_argument('--mlock', action='store_true', help='Force the system to keep the model in RAM.')
group.add_argument('--n-gpu-layers', type=int, default=0, help='Number of layers to offload to the GPU.')
@@ -126,17 +125,31 @@ group.add_argument('--tensor-split', type=str, default=None, help='Split the mod
group.add_argument('--numa', action='store_true', help='Activate NUMA task allocation for llama.cpp.')
group.add_argument('--no-kv-offload', action='store_true', help='Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance.')
group.add_argument('--row-split', action='store_true', help='Split the model by rows across GPUs. This may improve multi-gpu performance.')
+group.add_argument('--extra-flags', type=str, default=None, help='Extra flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"')
+group.add_argument('--streaming-llm', action='store_true', help='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
+
+# Cache
+group = parser.add_argument_group('Context and cache management')
+group.add_argument('--ctx-size', '--n_ctx', '--max_seq_len', type=int, default=8192, metavar='N', help='Context size in tokens.')
+group.add_argument('--cache_type', type=str, default='fp16', help='KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8 (can specify k_bits and v_bits separately, e.g. q4_q8).')
+
+# Speculative decoding
+group = parser.add_argument_group('Speculative decoding')
+group.add_argument('--model-draft', type=str, default=None, help='Path to the draft model for speculative decoding.')
+group.add_argument('--draft-max', type=int, default=4, help='Number of tokens to draft for speculative decoding.')
+group.add_argument('--gpu-layers-draft', type=int, default=256, help='Number of layers to offload to the GPU for the draft model.')
+group.add_argument('--device-draft', type=str, default=None, help='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
+group.add_argument('--ctx-size-draft', type=int, default=0, help='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
# ExLlamaV2
group = parser.add_argument_group('ExLlamaV2')
group.add_argument('--gpu-split', type=str, help='Comma-separated list of VRAM (in GB) to use per GPU device for model layers. Example: 20,7,7.')
group.add_argument('--autosplit', action='store_true', help='Autosplit the model tensors across the available GPUs. This causes --gpu-split to be ignored.')
-group.add_argument('--max_seq_len', type=int, default=8192, help='Maximum sequence length.')
group.add_argument('--cfg-cache', action='store_true', help='ExLlamav2_HF: Create an additional cache for CFG negative prompts. Necessary to use CFG with that loader.')
group.add_argument('--no_flash_attn', action='store_true', help='Force flash-attention to not be used.')
group.add_argument('--no_xformers', action='store_true', help='Force xformers to not be used.')
group.add_argument('--no_sdpa', action='store_true', help='Force Torch SDPA to not be used.')
-group.add_argument('--num_experts_per_token', type=int, default=2, help='Number of experts to use for generation. Applies to MoE models like Mixtral.')
+group.add_argument('--num_experts_per_token', type=int, default=2, metavar='N', help='Number of experts to use for generation. Applies to MoE models like Mixtral.')
group.add_argument('--enable_tp', action='store_true', help='Enable Tensor Parallelism (TP) in ExLlamaV2.')
# HQQ
@@ -192,12 +205,36 @@ group.add_argument('--nowebui', action='store_true', help='Do not launch the Gra
# Deprecated parameters
group = parser.add_argument_group('Deprecated')
+# Handle CMD_FLAGS.txt
+cmd_flags_path = Path(__file__).parent.parent / "user_data" / "CMD_FLAGS.txt"
+if cmd_flags_path.exists():
+ with cmd_flags_path.open('r', encoding='utf-8') as f:
+ cmd_flags = ' '.join(
+ line.strip().rstrip('\\').strip()
+ for line in f
+ if line.strip().rstrip('\\').strip() and not line.strip().startswith('#')
+ )
+
+ if cmd_flags:
+ # Command-line takes precedence over CMD_FLAGS.txt
+ sys.argv = [sys.argv[0]] + shlex.split(cmd_flags) + sys.argv[1:]
+
+
args = parser.parse_args()
args_defaults = parser.parse_args([])
+
+# Create a mapping of all argument aliases to their canonical names
+alias_to_dest = {}
+for action in parser._actions:
+ for opt in action.option_strings:
+ alias_to_dest[opt.lstrip('-').replace('-', '_')] = action.dest
+
provided_arguments = []
for arg in sys.argv[1:]:
arg = arg.lstrip('-').replace('-', '_')
- if hasattr(args, arg):
+ if arg in alias_to_dest:
+ provided_arguments.append(alias_to_dest[arg])
+ elif hasattr(args, arg):
provided_arguments.append(arg)
diff --git a/modules/tensorrt_llm.py b/modules/tensorrt_llm.py
index c2685b75..73178c39 100644
--- a/modules/tensorrt_llm.py
+++ b/modules/tensorrt_llm.py
@@ -1,15 +1,15 @@
from pathlib import Path
-import tensorrt_llm
import torch
-from tensorrt_llm.runtime import ModelRunner, ModelRunnerCpp
+import tensorrt_llm
from modules import shared
from modules.logging_colors import logger
from modules.text_generation import (
get_max_prompt_length,
get_reply_from_output_ids
)
+from tensorrt_llm.runtime import ModelRunner, ModelRunnerCpp
class TensorRTLLMModel:
@@ -35,7 +35,7 @@ class TensorRTLLMModel:
logger.info("TensorRT-LLM: Using \"ModelRunnerCpp\"")
runner_kwargs.update(
max_batch_size=1,
- max_input_len=shared.args.max_seq_len - 512,
+ max_input_len=shared.args.ctx_size - 512,
max_output_len=512,
max_beam_width=1,
max_attention_window_size=None,
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 40046eb2..4e3d1d7a 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -264,6 +264,11 @@ def apply_stopping_strings(reply, all_stop_strings):
def get_reply_from_output_ids(output_ids, state=None, starting_from=0):
+ import torch
+
+ if torch.cuda.is_available():
+ torch.cuda.synchronize()
+
reply = decode(output_ids[starting_from:], state['skip_special_tokens'] if state else True)
# Handle tokenizers that do not add the leading space for the first token
diff --git a/modules/training.py b/modules/training.py
index c6c380a3..2354c39d 100644
--- a/modules/training.py
+++ b/modules/training.py
@@ -52,7 +52,7 @@ def create_ui():
with gr.Column():
always_override = gr.Checkbox(label='Override Existing Files', value=False, info='If the name is the same, checking will replace the existing file, and unchecking will load and continue from it (the rank must be the same).', elem_classes=['no-background'])
- with gr.Accordion(label='Target Modules', open=False):
+ with gr.Accordion(label='Target Modules', open=False, elem_classes='tgw-accordion'):
gr.Markdown("Selects which modules to target in training. Targeting more modules is closer to a full fine-tune at the cost of increased VRAM requirements and adapter size.\nNOTE: Only works for model_id='llama', other types will retain default training behavior and not use these settings.")
with gr.Row():
with gr.Column():
@@ -86,7 +86,7 @@ def create_ui():
with gr.Row():
lr_scheduler_type = gr.Dropdown(label='LR Scheduler', value='linear', choices=['linear', 'constant', 'constant_with_warmup', 'cosine', 'cosine_with_restarts', 'polynomial', 'inverse_sqrt'], info='Learning rate scheduler - defines how the learning rate changes over time. "Constant" means never change, "linear" means to go in a straight line from the learning rate down to 0, cosine follows a curve, etc.', elem_classes=['slim-dropdown'])
- with gr.Accordion(label='Advanced Options', open=False):
+ with gr.Accordion(label='Advanced Options', open=False, elem_classes='tgw-accordion'):
with gr.Row():
with gr.Column():
lora_dropout = gr.Slider(label='LoRA Dropout', minimum=0.0, maximum=1.0, step=0.025, value=0.05, info='Percentage probability for dropout of LoRA layers. This can help reduce overfitting. Most users should leave at default.')
@@ -106,23 +106,23 @@ def create_ui():
with gr.Column():
with gr.Tab(label='Formatted Dataset'):
with gr.Row():
- format = gr.Dropdown(choices=utils.get_datasets('training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.', elem_classes=['slim-dropdown'], interactive=not mu)
- ui.create_refresh_button(format, lambda: None, lambda: {'choices': utils.get_datasets('training/formats', 'json')}, 'refresh-button', interactive=not mu)
+ format = gr.Dropdown(choices=utils.get_datasets('user_data/training/formats', 'json'), value='None', label='Data Format', info='The format file used to decide how to format the dataset input.', elem_classes=['slim-dropdown'], interactive=not mu)
+ ui.create_refresh_button(format, lambda: None, lambda: {'choices': utils.get_datasets('user_data/training/formats', 'json')}, 'refresh-button', interactive=not mu)
with gr.Row():
- dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.', elem_classes=['slim-dropdown'], interactive=not mu)
- ui.create_refresh_button(dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button', interactive=not mu)
+ dataset = gr.Dropdown(choices=utils.get_datasets('user_data/training/datasets', 'json'), value='None', label='Dataset', info='The dataset file to use for training.', elem_classes=['slim-dropdown'], interactive=not mu)
+ ui.create_refresh_button(dataset, lambda: None, lambda: {'choices': utils.get_datasets('user_data/training/datasets', 'json')}, 'refresh-button', interactive=not mu)
with gr.Row():
- eval_dataset = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The (optional) dataset file used to evaluate the model after training.', elem_classes=['slim-dropdown'], interactive=not mu)
- ui.create_refresh_button(eval_dataset, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'json')}, 'refresh-button', interactive=not mu)
+ eval_dataset = gr.Dropdown(choices=utils.get_datasets('user_data/training/datasets', 'json'), value='None', label='Evaluation Dataset', info='The (optional) dataset file used to evaluate the model after training.', elem_classes=['slim-dropdown'], interactive=not mu)
+ ui.create_refresh_button(eval_dataset, lambda: None, lambda: {'choices': utils.get_datasets('user_data/training/datasets', 'json')}, 'refresh-button', interactive=not mu)
eval_steps = gr.Number(label='Evaluate every n steps', value=100, info='If an evaluation dataset is given, test it every time this many steps pass.')
with gr.Tab(label="Raw text file"):
with gr.Row():
- raw_text_file = gr.Dropdown(choices=utils.get_datasets('training/datasets', 'txt'), value='None', label='Text file', info='The raw text file to use for training.', elem_classes=['slim-dropdown'], interactive=not mu)
- ui.create_refresh_button(raw_text_file, lambda: None, lambda: {'choices': utils.get_datasets('training/datasets', 'txt')}, 'refresh-button', interactive=not mu)
+ raw_text_file = gr.Dropdown(choices=utils.get_datasets('user_data/training/datasets', 'txt'), value='None', label='Text file', info='The raw text file to use for training.', elem_classes=['slim-dropdown'], interactive=not mu)
+ ui.create_refresh_button(raw_text_file, lambda: None, lambda: {'choices': utils.get_datasets('user_data/training/datasets', 'txt')}, 'refresh-button', interactive=not mu)
with gr.Row():
with gr.Column():
@@ -143,7 +143,7 @@ def create_ui():
with gr.Row():
with gr.Column():
models = gr.Dropdown(utils.get_available_models(), label='Models', multiselect=True, interactive=not mu)
- evaluate_text_file = gr.Dropdown(choices=['wikitext', 'ptb', 'ptb_new'] + utils.get_datasets('training/datasets', 'txt')[1:], value='wikitext', label='Input dataset', info='The raw text file on which the model will be evaluated. The first options are automatically downloaded: wikitext, ptb, and ptb_new. The next options are your local text files under training/datasets.', interactive=not mu)
+ evaluate_text_file = gr.Dropdown(choices=['wikitext', 'ptb', 'ptb_new'] + utils.get_datasets('user_data/training/datasets', 'txt')[1:], value='wikitext', label='Input dataset', info='The raw text file on which the model will be evaluated. The first options are automatically downloaded: wikitext, ptb, and ptb_new. The next options are your local text files under user_data/training/datasets.', interactive=not mu)
with gr.Row():
with gr.Column():
stride_length = gr.Slider(label='Stride', minimum=0, maximum=32768, value=512, step=256, info='Used to make the evaluation faster at the cost of accuracy. 1 = slowest but most accurate. 512 is a common value.')
@@ -402,7 +402,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
if raw_text_file not in ['None', '']:
train_template["template_type"] = "raw_text"
logger.info("Loading raw text file dataset")
- fullpath = clean_path('training/datasets', f'{raw_text_file}')
+ fullpath = clean_path('user_data/training/datasets', f'{raw_text_file}')
fullpath = Path(fullpath)
if fullpath.is_dir():
logger.info('Training path directory {}'.format(raw_text_file))
@@ -415,7 +415,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
logger.info(f"Loaded training file: {file_path.name}")
else:
- with open(clean_path('training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
+ with open(clean_path('user_data/training/datasets', f'{raw_text_file}.txt'), 'r', encoding='utf-8') as file:
raw_text = file.read().replace('\r', '')
cut_string = hard_cut_string.replace('\\n', '\n')
@@ -460,7 +460,7 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
train_template["template_type"] = "dataset"
- with open(clean_path('training/formats', f'{format}.json'), 'r', encoding='utf-8-sig') as formatFile:
+ with open(clean_path('user_data/training/formats', f'{format}.json'), 'r', encoding='utf-8-sig') as formatFile:
format_data: dict[str, str] = json.load(formatFile)
# == store training prompt ==
@@ -482,13 +482,13 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
return tokenize(prompt, add_eos_token)
logger.info("Loading JSON datasets")
- data = load_dataset("json", data_files=clean_path('training/datasets', f'{dataset}.json'))
+ data = load_dataset("json", data_files=clean_path('user_data/training/datasets', f'{dataset}.json'))
train_data = data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
if eval_dataset == 'None':
eval_data = None
else:
- eval_data = load_dataset("json", data_files=clean_path('training/datasets', f'{eval_dataset}.json'))
+ eval_data = load_dataset("json", data_files=clean_path('user_data/training/datasets', f'{eval_dataset}.json'))
eval_data = eval_data['train'].map(generate_and_tokenize_prompt, new_fingerprint='%030x' % random.randrange(16**30))
# == We MUST reload model if it went through any previous training, even failed one ==
@@ -676,11 +676,11 @@ def do_train(lora_name: str, always_override: bool, q_proj_en: bool, v_proj_en:
decoded_entries.append({"value": decoded_text})
# Write the log file
- Path('logs').mkdir(exist_ok=True)
- with open(Path('logs/train_dataset_sample.json'), 'w') as json_file:
+ Path('user_data/logs').mkdir(exist_ok=True)
+ with open(Path('user_data/logs/train_dataset_sample.json'), 'w') as json_file:
json.dump(decoded_entries, json_file, indent=4)
- logger.info("Log file 'train_dataset_sample.json' created in the 'logs' directory.")
+ logger.info("Log file 'train_dataset_sample.json' created in the 'user_data/logs' directory.")
except Exception as e:
logger.error(f"Failed to create log file due to error: {e}")
diff --git a/modules/transformers_loader.py b/modules/transformers_loader.py
index add3be66..905f5c47 100644
--- a/modules/transformers_loader.py
+++ b/modules/transformers_loader.py
@@ -249,7 +249,7 @@ def load_model_HF(model_name):
)
if shared.args.disk:
- params['offload_folder'] = shared.args.disk_cache_dir
+ params['offload_folder'] = str(Path(shared.args.disk_cache_dir))
if shared.args.compress_pos_emb > 1:
params['rope_scaling'] = {'type': 'linear', 'factor': shared.args.compress_pos_emb}
diff --git a/modules/ui.py b/modules/ui.py
index d5caaeaa..f137e62d 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -94,7 +94,7 @@ if not shared.args.old_colors:
input_radius='0.375rem',
)
-if Path("notification.mp3").exists():
+if Path("user_data/notification.mp3").exists():
audio_notification_js = "document.querySelector('#audio_notification audio')?.play();"
else:
audio_notification_js = ""
@@ -110,10 +110,10 @@ def list_model_elements():
'threads_batch',
'batch_size',
'hqq_backend',
- 'n_ctx',
- 'max_seq_len',
+ 'ctx_size',
'cache_type',
'tensor_split',
+ 'extra_flags',
'gpu_split',
'alpha_value',
'rope_freq_base',
@@ -145,6 +145,11 @@ def list_model_elements():
'cpp_runner',
'trust_remote_code',
'no_use_fast',
+ 'model_draft',
+ 'draft_max',
+ 'gpu_layers_draft',
+ 'device_draft',
+ 'ctx_size_draft',
]
return elements
@@ -201,7 +206,6 @@ def list_interface_input_elements():
'sampler_priority',
'custom_stopping_strings',
'custom_token_bans',
- 'show_after',
'negative_prompt',
'dry_sequence_breakers',
'grammar_string',
@@ -262,7 +266,7 @@ def apply_interface_values(state, use_persistent=False):
if 'textbox-default' in state and 'prompt_menu-default' in state:
state.pop('prompt_menu-default')
- if 'textbox-notebook' and 'prompt_menu-notebook' in state:
+ if 'textbox-notebook' in state and 'prompt_menu-notebook' in state:
state.pop('prompt_menu-notebook')
elements = list_interface_input_elements()
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index a830abfb..0d588549 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -88,7 +88,7 @@ def create_ui():
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
with gr.Row():
- shared.gradio['mode'] = gr.Radio(choices=['chat', 'chat-instruct', 'instruct'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
+ shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
with gr.Row():
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
@@ -146,7 +146,7 @@ def create_chat_settings_ui():
with gr.Column(scale=1):
shared.gradio['character_picture'] = gr.Image(label='Character picture', type='pil', interactive=not mu)
- shared.gradio['your_picture'] = gr.Image(label='Your picture', type='pil', value=Image.open(Path('cache/pfp_me.png')) if Path('cache/pfp_me.png').exists() else None, interactive=not mu)
+ shared.gradio['your_picture'] = gr.Image(label='Your picture', type='pil', value=Image.open(Path('user_data/cache/pfp_me.png')) if Path('user_data/cache/pfp_me.png').exists() else None, interactive=not mu)
with gr.Tab('Instruction template'):
with gr.Row():
diff --git a/modules/ui_default.py b/modules/ui_default.py
index ccae9a5e..c2946b37 100644
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@@ -102,7 +102,7 @@ def handle_save_prompt(text):
return [
text,
utils.current_time() + ".txt",
- "prompts/",
+ "user_data/prompts/",
gr.update(visible=True)
]
@@ -110,6 +110,6 @@ def handle_save_prompt(text):
def handle_delete_prompt(prompt):
return [
prompt + ".txt",
- "prompts/",
+ "user_data/prompts/",
gr.update(visible=True)
]
diff --git a/modules/ui_file_saving.py b/modules/ui_file_saving.py
index 3a27e1b9..d1f9379b 100644
--- a/modules/ui_file_saving.py
+++ b/modules/ui_file_saving.py
@@ -28,7 +28,7 @@ def create_ui():
# Character saver/deleter
with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['character_saver']:
- shared.gradio['save_character_filename'] = gr.Textbox(lines=1, label='File name', info='The character will be saved to your characters/ folder with this base filename.')
+ shared.gradio['save_character_filename'] = gr.Textbox(lines=1, label='File name', info='The character will be saved to your user_data/characters folder with this base filename.')
with gr.Row():
shared.gradio['save_character_cancel'] = gr.Button('Cancel', elem_classes="small-button")
shared.gradio['save_character_confirm'] = gr.Button('Save', elem_classes="small-button", variant='primary', interactive=not mu)
@@ -41,7 +41,7 @@ def create_ui():
# Preset saver
with gr.Group(visible=False, elem_classes='file-saver') as shared.gradio['preset_saver']:
- shared.gradio['save_preset_filename'] = gr.Textbox(lines=1, label='File name', info='The preset will be saved to your presets/ folder with this base filename.')
+ shared.gradio['save_preset_filename'] = gr.Textbox(lines=1, label='File name', info='The preset will be saved to your user_data/presets folder with this base filename.')
shared.gradio['save_preset_contents'] = gr.Textbox(lines=10, label='File contents')
with gr.Row():
shared.gradio['save_preset_cancel'] = gr.Button('Cancel', elem_classes="small-button")
@@ -72,7 +72,7 @@ def create_event_handlers():
def handle_save_preset_confirm_click(filename, contents):
try:
- utils.save_file(f"presets/{filename}.yaml", contents)
+ utils.save_file(f"user_data/presets/{filename}.yaml", contents)
available_presets = utils.get_available_presets()
output = gr.update(choices=available_presets, value=filename)
except Exception:
@@ -145,7 +145,7 @@ def handle_save_preset_click(state):
def handle_delete_preset_click(preset):
return [
f"{preset}.yaml",
- "presets/",
+ "user_data/presets/",
gr.update(visible=True)
]
@@ -154,7 +154,7 @@ def handle_save_grammar_click(grammar_string):
return [
grammar_string,
"My Fancy Grammar.gbnf",
- "grammars/",
+ "user_data/grammars/",
gr.update(visible=True)
]
@@ -162,6 +162,6 @@ def handle_save_grammar_click(grammar_string):
def handle_delete_grammar_click(grammar_file):
return [
grammar_file,
- "grammars/",
+ "user_data/grammars/",
gr.update(visible=True)
]
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index b4af771c..e3cf2ba6 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -51,11 +51,11 @@ def create_ui():
shared.gradio['threads_batch'] = gr.Slider(label="threads_batch", minimum=0, step=1, maximum=256, value=shared.args.threads_batch)
shared.gradio['batch_size'] = gr.Slider(label="batch_size", minimum=1, maximum=4096, step=1, value=shared.args.batch_size)
shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
- shared.gradio['n_ctx'] = gr.Number(label="n_ctx", precision=0, step=256, value=shared.args.n_ctx, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.')
- shared.gradio['max_seq_len'] = gr.Number(label='max_seq_len', precision=0, step=256, value=shared.args.max_seq_len, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768.')
- shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q6', 'q4'], value=shared.args.cache_type, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4.')
+ shared.gradio['ctx_size'] = gr.Number(label='ctx_size', precision=0, step=256, value=shared.args.ctx_size, info='Context length. ⚠️ Lower this value if you can\'t load the model. Common values: 2048, 4096, 8192, 16384, 32768, 65536.')
+ shared.gradio['cache_type'] = gr.Dropdown(label="cache_type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
shared.gradio['tensor_split'] = gr.Textbox(label='tensor_split', info='List of proportions to split the model across multiple GPUs. Example: 60,40')
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
+ shared.gradio['extra_flags'] = gr.Textbox(label='extra-flags', info='Additional flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU"', value=shared.args.extra_flags)
shared.gradio['cpu_memory'] = gr.Number(label="Maximum CPU memory in GiB. Use this for CPU offloading.", value=shared.args.cpu_memory)
shared.gradio['alpha_value'] = gr.Number(label='alpha_value', value=shared.args.alpha_value, precision=2, info='Positional embeddings alpha factor for NTK RoPE scaling. Recommended values (NTKv1): 1.75 for 1.5x context, 2.5 for 2x context. Use either this or compress_pos_emb, not both.')
shared.gradio['rope_freq_base'] = gr.Number(label='rope_freq_base', value=shared.args.rope_freq_base, precision=0, info='Positional embeddings frequency base for NTK RoPE scaling. Related to alpha_value by rope_freq_base = 10000 * alpha_value ^ (64 / 63). 0 = from model.')
@@ -70,6 +70,7 @@ def create_ui():
shared.gradio['torch_compile'] = gr.Checkbox(label="torch-compile", value=shared.args.torch_compile, info='Compile the model with torch.compile for improved performance.')
shared.gradio['flash_attn'] = gr.Checkbox(label="flash_attn", value=shared.args.flash_attn, info='Use flash-attention.')
shared.gradio['use_flash_attention_2'] = gr.Checkbox(label="use_flash_attention_2", value=shared.args.use_flash_attention_2, info='Set use_flash_attention_2=True while loading the model.')
+ shared.gradio['streaming_llm'] = gr.Checkbox(label="streaming_llm", value=shared.args.streaming_llm, info='Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed.')
shared.gradio['cpu'] = gr.Checkbox(label="cpu", value=shared.args.cpu, info='llama.cpp: Use llama-cpp-python compiled without GPU acceleration. Transformers: use PyTorch in CPU mode.')
shared.gradio['disk'] = gr.Checkbox(label="disk", value=shared.args.disk)
shared.gradio['row_split'] = gr.Checkbox(label="row_split", value=shared.args.row_split, info='Split the model by rows across GPUs. This may improve multi-gpu performance.')
@@ -90,7 +91,18 @@ def create_ui():
shared.gradio['trust_remote_code'] = gr.Checkbox(label="trust-remote-code", value=shared.args.trust_remote_code, info='Set trust_remote_code=True while loading the tokenizer/model. To enable this option, start the web UI with the --trust-remote-code flag.', interactive=shared.args.trust_remote_code)
shared.gradio['no_use_fast'] = gr.Checkbox(label="no_use_fast", value=shared.args.no_use_fast, info='Set use_fast=False while loading the tokenizer.')
shared.gradio['exllamav2_info'] = gr.Markdown("ExLlamav2_HF is recommended over ExLlamav2 for better integration with extensions and more consistent sampling behavior across loaders.")
- shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `max_seq_len` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
+ shared.gradio['tensorrt_llm_info'] = gr.Markdown('* TensorRT-LLM has to be installed manually in a separate Python 3.10 environment at the moment. For a guide, consult the description of [this PR](https://github.com/oobabooga/text-generation-webui/pull/5715). \n\n* `ctx_size` is only used when `cpp-runner` is checked.\n\n* `cpp_runner` does not support streaming at the moment.')
+
+ # Speculative decoding
+ with gr.Accordion("Speculative decoding", open=False, elem_classes='tgw-accordion') as shared.gradio['speculative_decoding_accordion']:
+ with gr.Row():
+ shared.gradio['model_draft'] = gr.Dropdown(label="model-draft", choices=utils.get_available_models(), value=lambda: shared.args.model_draft, elem_classes='slim-dropdown', interactive=not mu)
+ ui.create_refresh_button(shared.gradio['model_draft'], lambda: None, lambda: {'choices': utils.get_available_models()}, 'refresh-button', interactive=not mu)
+
+ shared.gradio['draft_max'] = gr.Number(label="draft-max", precision=0, step=1, value=shared.args.draft_max, info='Number of tokens to draft for speculative decoding.')
+ shared.gradio['gpu_layers_draft'] = gr.Slider(label="gpu-layers-draft", minimum=0, maximum=256, value=shared.args.gpu_layers_draft, info='Number of layers to offload to the GPU for the draft model.')
+ shared.gradio['device_draft'] = gr.Textbox(label="device-draft", value=shared.args.device_draft, info='Comma-separated list of devices to use for offloading the draft model. Example: CUDA0,CUDA1')
+ shared.gradio['ctx_size_draft'] = gr.Number(label="ctx-size-draft", precision=0, step=256, value=shared.args.ctx_size_draft, info='Size of the prompt context for the draft model. If 0, uses the same as the main model.')
with gr.Column():
with gr.Row():
@@ -211,9 +223,9 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
model_dir=shared.args.model_dir if shared.args.model_dir != shared.args_defaults.model_dir else None
)
- if output_folder == Path("models"):
+ if output_folder == Path("user_data/models"):
output_folder = Path(shared.args.model_dir)
- elif output_folder == Path("loras"):
+ elif output_folder == Path("user_data/loras"):
output_folder = Path(shared.args.lora_dir)
if check:
@@ -234,10 +246,8 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
def update_truncation_length(current_length, state):
if 'loader' in state:
- if state['loader'].lower().startswith('exllama'):
- return state['max_seq_len']
- elif state['loader'] == 'llama.cpp':
- return state['n_ctx']
+ if state['loader'].lower().startswith('exllama') or state['loader'] == 'llama.cpp':
+ return state['ctx_size']
return current_length
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index c3245a9d..6c2715af 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -93,7 +93,6 @@ def create_ui(default_preset):
shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
- shared.gradio['show_after'] = gr.Textbox(value=shared.settings['show_after'] or None, label='Show after', info='Hide the reply before this text.', placeholder="")
shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])
shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
with gr.Row() as shared.gradio['grammar_file_row']:
@@ -122,16 +121,14 @@ def create_event_handlers():
def get_truncation_length():
- if 'max_seq_len' in shared.provided_arguments or shared.args.max_seq_len != shared.args_defaults.max_seq_len:
- return shared.args.max_seq_len
- elif 'n_ctx' in shared.provided_arguments or shared.args.n_ctx != shared.args_defaults.n_ctx:
- return shared.args.n_ctx
+ if 'ctx_size' in shared.provided_arguments or shared.args.ctx_size != shared.args_defaults.ctx_size:
+ return shared.args.ctx_size
else:
return shared.settings['truncation_length']
def load_grammar(name):
- p = Path(f'grammars/{name}')
+ p = Path(f'user_data/grammars/{name}')
if p.exists():
return open(p, 'r', encoding='utf-8').read()
else:
diff --git a/modules/ui_session.py b/modules/ui_session.py
index 66386d12..7cf9f6e6 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -13,7 +13,7 @@ def create_ui():
shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
with gr.Row():
shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡')
- shared.gradio['save_settings'] = gr.Button('Save UI defaults to settings.yaml', interactive=not mu)
+ shared.gradio['save_settings'] = gr.Button('Save UI defaults to user_data/settings.yaml', interactive=not mu)
with gr.Row():
with gr.Column():
@@ -48,7 +48,7 @@ def handle_save_settings(state, preset, extensions, show_controls, theme):
return [
contents,
"settings.yaml",
- "./",
+ "user_data/",
gr.update(visible=True)
]
diff --git a/modules/utils.py b/modules/utils.py
index f6be7541..77324139 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -76,44 +76,54 @@ def get_available_models():
# Get all GGUF files
gguf_files = get_available_ggufs()
+ # Filter out non-first parts of multipart GGUF files
+ filtered_gguf_files = []
+ for gguf_path in gguf_files:
+ filename = os.path.basename(gguf_path)
+
+ match = re.search(r'-(\d+)-of-\d+\.gguf$', filename)
+
+ if match:
+ part_number = match.group(1)
+ # Keep only if it's part 1
+ if part_number.lstrip("0") == "1":
+ filtered_gguf_files.append(gguf_path)
+ else:
+ # Not a multi-part file
+ filtered_gguf_files.append(gguf_path)
+
model_dir = Path(shared.args.model_dir)
# Find top-level directories containing GGUF files
dirs_with_gguf = set()
for gguf_path in gguf_files:
path = Path(gguf_path)
- if path.parts: # If in a subdirectory
- dirs_with_gguf.add(path.parts[0]) # Add top-level directory
+ if len(path.parts) > 0:
+ dirs_with_gguf.add(path.parts[0])
- # Find directories with safetensors files directly under them
+ # Find directories with safetensors files
dirs_with_safetensors = set()
for item in os.listdir(model_dir):
item_path = model_dir / item
if item_path.is_dir():
- # Check if there are safetensors files directly under this directory
if any(file.lower().endswith(('.safetensors', '.pt')) for file in os.listdir(item_path) if (item_path / file).is_file()):
dirs_with_safetensors.add(item)
# Find valid model directories
model_dirs = []
-
for item in os.listdir(model_dir):
item_path = model_dir / item
-
- # Skip if not a directory
if not item_path.is_dir():
continue
- # Include directory if it either:
- # 1. Doesn't contain GGUF files, OR
- # 2. Contains both GGUF and safetensors files
+ # Include directory if it either doesn't contain GGUF files
+ # or contains both GGUF and safetensors files
if item not in dirs_with_gguf or item in dirs_with_safetensors:
model_dirs.append(item)
model_dirs = sorted(model_dirs, key=natural_keys)
- # Combine all models
- return ['None'] + gguf_files + model_dirs
+ return ['None'] + filtered_gguf_files + model_dirs
def get_available_ggufs():
@@ -131,11 +141,11 @@ def get_available_ggufs():
def get_available_presets():
- return sorted(set((k.stem for k in Path('presets').glob('*.yaml'))), key=natural_keys)
+ return sorted(set((k.stem for k in Path('user_data/presets').glob('*.yaml'))), key=natural_keys)
def get_available_prompts():
- prompt_files = list(Path('prompts').glob('*.txt'))
+ prompt_files = list(Path('user_data/prompts').glob('*.txt'))
sorted_files = sorted(prompt_files, key=lambda x: x.stat().st_mtime, reverse=True)
prompts = [file.stem for file in sorted_files]
prompts.append('None')
@@ -143,12 +153,12 @@ def get_available_prompts():
def get_available_characters():
- paths = (x for x in Path('characters').iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
+ paths = (x for x in Path('user_data/characters').iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
return sorted(set((k.stem for k in paths)), key=natural_keys)
def get_available_instruction_templates():
- path = "instruction-templates"
+ path = "user_data/instruction-templates"
paths = []
if os.path.exists(path):
paths = (x for x in Path(path).iterdir() if x.suffix in ('.json', '.yaml', '.yml'))
@@ -179,4 +189,4 @@ def get_available_chat_styles():
def get_available_grammars():
- return ['None'] + sorted([item.name for item in list(Path('grammars').glob('*.gbnf'))], key=natural_keys)
+ return ['None'] + sorted([item.name for item in list(Path('user_data/grammars').glob('*.gbnf'))], key=natural_keys)
diff --git a/one_click.py b/one_click.py
index 04b729eb..065afd99 100644
--- a/one_click.py
+++ b/one_click.py
@@ -28,14 +28,7 @@ conda_env_path = os.path.join(script_dir, "installer_files", "env")
state_file = '.installer_state.json'
# Command-line flags
-cmd_flags_path = os.path.join(script_dir, "CMD_FLAGS.txt")
-if os.path.exists(cmd_flags_path):
- with open(cmd_flags_path, 'r') as f:
- CMD_FLAGS = ' '.join(line.strip().rstrip('\\').strip() for line in f if line.strip().rstrip('\\').strip() and not line.strip().startswith('#'))
-else:
- CMD_FLAGS = ''
-
-flags = f"{' '.join([flag for flag in sys.argv[1:] if flag != '--update-wizard'])} {CMD_FLAGS}"
+flags = f"{' '.join([flag for flag in sys.argv[1:] if flag != '--update-wizard'])}"
def signal_handler(sig, frame):
@@ -300,9 +293,10 @@ def install_webui():
# Write a flag to CMD_FLAGS.txt for CPU mode
if selected_gpu == "NONE":
+ cmd_flags_path = os.path.join(script_dir, "user_data", "CMD_FLAGS.txt")
with open(cmd_flags_path, 'r+') as cmd_flags_file:
if "--cpu" not in cmd_flags_file.read():
- print_big_message("Adding the --cpu flag to CMD_FLAGS.txt.")
+ print_big_message("Adding the --cpu flag to user_data/CMD_FLAGS.txt.")
cmd_flags_file.write("\n--cpu\n")
# Handle CUDA version display
@@ -538,7 +532,7 @@ if __name__ == "__main__":
flags_list = re.split(' +(?=(?:[^\"]*\"[^\"]*\")*[^\"]*$)|=', flags)
model_dir = [flags_list[(flags_list.index(flag) + 1)] for flag in flags_list if flag == '--model-dir'][0].strip('"\'')
else:
- model_dir = 'models'
+ model_dir = 'user_data/models'
if len([item for item in glob.glob(f'{model_dir}/*') if not item.endswith(('.txt', '.yaml'))]) == 0:
print_big_message("You haven't downloaded any model yet.\nOnce the web UI launches, head over to the \"Model\" tab and download one.")
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index b9afaa07..c20c161e 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -30,12 +30,12 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index 96cb299d..437da5b5 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -29,6 +29,6 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+rocm6.1.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 0f1a4fc2..b1c87990 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -29,6 +29,6 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+rocm6.1.2avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+rocm6.1.2avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.1.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 8d1e5294..e62987b0 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -29,7 +29,7 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3-py3-none-any.whl
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4-py3-none-any.whl
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index a44ff3cb..f7a9f114 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -29,8 +29,8 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3-py3-none-any.whl
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4-py3-none-any.whl
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 35855162..b8cd8390 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -29,5 +29,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index 0716455e..3b52d59b 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -29,5 +29,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 98c43b88..a04e8979 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -30,12 +30,12 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a3/exllamav3-0.0.1a3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav2/releases/download/v0.2.8/exllamav2-0.2.8-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a4/exllamav3-0.0.1a4+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index c3336fc7..5c717343 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -15,5 +15,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd.txt b/requirements/portable/requirements_amd.txt
index 4855225f..b616193d 100644
--- a/requirements/portable/requirements_amd.txt
+++ b/requirements/portable/requirements_amd.txt
@@ -15,4 +15,4 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+rocm6.1.2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+rocm6.1.2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_amd_noavx2.txt b/requirements/portable/requirements_amd_noavx2.txt
index f40daa8a..de4740c9 100644
--- a/requirements/portable/requirements_amd_noavx2.txt
+++ b/requirements/portable/requirements_amd_noavx2.txt
@@ -15,4 +15,4 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+rocm6.1.2avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+rocm6.1.2avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index 1ede251e..6310327d 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -15,5 +15,5 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 26b68bff..f69b58e7 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -15,6 +15,6 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0-cp311-cp311-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 456a0499..dafa6bbe 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -15,5 +15,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx2-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx2-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index 7cd2dd34..c02191eb 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -15,5 +15,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cpuavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index b47b8bbc..456188b4 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -15,5 +15,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124avx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+cu124avx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 15834f89..7e733967 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -15,5 +15,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+vulkan-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+vulkan-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index afb9e90f..0329a598 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -15,5 +15,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+vulkanavx-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.3.0/llama_cpp_binaries-0.3.0+vulkanavx-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.6.0/llama_cpp_binaries-0.6.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/server.py b/server.py
index 41a5660d..169578a5 100644
--- a/server.py
+++ b/server.py
@@ -1,7 +1,6 @@
import os
import warnings
-import modules.one_click_installer_check
from modules import shared
from modules.block_requests import OpenMonkeyPatch, RequestBlocker
from modules.logging_colors import logger
@@ -94,8 +93,8 @@ def create_interface():
'filter_by_loader': shared.args.loader or 'All'
})
- if Path("cache/pfp_character.png").exists():
- Path("cache/pfp_character.png").unlink()
+ if Path("user_data/cache/pfp_character.png").exists():
+ Path("user_data/cache/pfp_character.png").unlink()
# css/js strings
css = ui.css
@@ -112,8 +111,8 @@ def create_interface():
shared.gradio['interface_state'] = gr.State({k: None for k in shared.input_elements})
# Audio notification
- if Path("notification.mp3").exists():
- shared.gradio['audio_notification'] = gr.Audio(interactive=False, value="notification.mp3", elem_id="audio_notification", visible=False)
+ if Path("user_data/notification.mp3").exists():
+ shared.gradio['audio_notification'] = gr.Audio(interactive=False, value="user_data/notification.mp3", elem_id="audio_notification", visible=False)
# Floating menus for saving/deleting files
ui_file_saving.create_ui()
@@ -179,7 +178,7 @@ def create_interface():
ssl_keyfile=shared.args.ssl_keyfile,
ssl_certfile=shared.args.ssl_certfile,
root_path=shared.args.subpath,
- allowed_paths=["cache", "css", "extensions", "js"]
+ allowed_paths=["css", "js", "extensions", "user_data/cache"]
)
@@ -192,10 +191,10 @@ if __name__ == "__main__":
settings_file = None
if shared.args.settings is not None and Path(shared.args.settings).exists():
settings_file = Path(shared.args.settings)
- elif Path('settings.yaml').exists():
- settings_file = Path('settings.yaml')
- elif Path('settings.json').exists():
- settings_file = Path('settings.json')
+ elif Path('user_data/settings.yaml').exists():
+ settings_file = Path('user_data/settings.yaml')
+ elif Path('user_data/settings.json').exists():
+ settings_file = Path('user_data/settings.json')
if settings_file is not None:
logger.info(f"Loading settings from \"{settings_file}\"")
diff --git a/start_wsl.bat b/start_wsl.bat
deleted file mode 100755
index d7bacead..00000000
--- a/start_wsl.bat
+++ /dev/null
@@ -1,11 +0,0 @@
-@echo off
-
-cd /D "%~dp0"
-
-set PATH=%PATH%;%SystemRoot%\system32
-
-@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script
-call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh %*"
-
-:end
-pause
diff --git a/update_wizard_wsl.bat b/update_wizard_wsl.bat
deleted file mode 100755
index 35f0a349..00000000
--- a/update_wizard_wsl.bat
+++ /dev/null
@@ -1,11 +0,0 @@
-@echo off
-
-cd /D "%~dp0"
-
-set PATH=%PATH%;%SystemRoot%\system32
-
-@rem sed -i 's/\x0D$//' ./wsl.sh converts newlines to unix format in the wsl script calling wsl.sh with 'update' will run updater
-call wsl -e bash -lic "sed -i 's/\x0D$//' ./wsl.sh; source ./wsl.sh update-wizard"
-
-:end
-pause
diff --git a/user_data/CMD_FLAGS.txt b/user_data/CMD_FLAGS.txt
new file mode 100644
index 00000000..b0f667b0
--- /dev/null
+++ b/user_data/CMD_FLAGS.txt
@@ -0,0 +1,3 @@
+# Add persistent flags here to use every time you launch the web UI.
+# Example:
+# --listen --api
diff --git a/characters/Assistant.yaml b/user_data/characters/Assistant.yaml
similarity index 100%
rename from characters/Assistant.yaml
rename to user_data/characters/Assistant.yaml
diff --git a/characters/Example.png b/user_data/characters/Example.png
similarity index 100%
rename from characters/Example.png
rename to user_data/characters/Example.png
diff --git a/characters/Example.yaml b/user_data/characters/Example.yaml
similarity index 100%
rename from characters/Example.yaml
rename to user_data/characters/Example.yaml
diff --git a/grammars/arithmetic.gbnf b/user_data/grammars/arithmetic.gbnf
similarity index 100%
rename from grammars/arithmetic.gbnf
rename to user_data/grammars/arithmetic.gbnf
diff --git a/grammars/c.gbnf b/user_data/grammars/c.gbnf
similarity index 100%
rename from grammars/c.gbnf
rename to user_data/grammars/c.gbnf
diff --git a/grammars/chess.gbnf b/user_data/grammars/chess.gbnf
similarity index 100%
rename from grammars/chess.gbnf
rename to user_data/grammars/chess.gbnf
diff --git a/grammars/json.gbnf b/user_data/grammars/json.gbnf
similarity index 100%
rename from grammars/json.gbnf
rename to user_data/grammars/json.gbnf
diff --git a/grammars/json_w_trailing_space.gbnf b/user_data/grammars/json_w_trailing_space.gbnf
similarity index 100%
rename from grammars/json_w_trailing_space.gbnf
rename to user_data/grammars/json_w_trailing_space.gbnf
diff --git a/grammars/list.gbnf b/user_data/grammars/list.gbnf
similarity index 100%
rename from grammars/list.gbnf
rename to user_data/grammars/list.gbnf
diff --git a/grammars/roleplay.gbnf b/user_data/grammars/roleplay.gbnf
similarity index 100%
rename from grammars/roleplay.gbnf
rename to user_data/grammars/roleplay.gbnf
diff --git a/grammars/simple_arithmetic.gbnf b/user_data/grammars/simple_arithmetic.gbnf
similarity index 100%
rename from grammars/simple_arithmetic.gbnf
rename to user_data/grammars/simple_arithmetic.gbnf
diff --git a/instruction-templates/Airoboros-v1.2.yaml b/user_data/instruction-templates/Airoboros-v1.2.yaml
similarity index 100%
rename from instruction-templates/Airoboros-v1.2.yaml
rename to user_data/instruction-templates/Airoboros-v1.2.yaml
diff --git a/instruction-templates/Alpaca.yaml b/user_data/instruction-templates/Alpaca.yaml
similarity index 100%
rename from instruction-templates/Alpaca.yaml
rename to user_data/instruction-templates/Alpaca.yaml
diff --git a/instruction-templates/Bactrian.yaml b/user_data/instruction-templates/Bactrian.yaml
similarity index 100%
rename from instruction-templates/Bactrian.yaml
rename to user_data/instruction-templates/Bactrian.yaml
diff --git a/instruction-templates/Baichuan Chat.yaml b/user_data/instruction-templates/Baichuan Chat.yaml
similarity index 100%
rename from instruction-templates/Baichuan Chat.yaml
rename to user_data/instruction-templates/Baichuan Chat.yaml
diff --git a/instruction-templates/Baize.yaml b/user_data/instruction-templates/Baize.yaml
similarity index 100%
rename from instruction-templates/Baize.yaml
rename to user_data/instruction-templates/Baize.yaml
diff --git a/instruction-templates/Bluemoon.yaml b/user_data/instruction-templates/Bluemoon.yaml
similarity index 100%
rename from instruction-templates/Bluemoon.yaml
rename to user_data/instruction-templates/Bluemoon.yaml
diff --git a/instruction-templates/ChatGLM.yaml b/user_data/instruction-templates/ChatGLM.yaml
similarity index 100%
rename from instruction-templates/ChatGLM.yaml
rename to user_data/instruction-templates/ChatGLM.yaml
diff --git a/instruction-templates/ChatML.yaml b/user_data/instruction-templates/ChatML.yaml
similarity index 100%
rename from instruction-templates/ChatML.yaml
rename to user_data/instruction-templates/ChatML.yaml
diff --git a/instruction-templates/Chinese-Vicuna-Chat.yaml b/user_data/instruction-templates/Chinese-Vicuna-Chat.yaml
similarity index 100%
rename from instruction-templates/Chinese-Vicuna-Chat.yaml
rename to user_data/instruction-templates/Chinese-Vicuna-Chat.yaml
diff --git a/instruction-templates/Command-R.yaml b/user_data/instruction-templates/Command-R.yaml
similarity index 100%
rename from instruction-templates/Command-R.yaml
rename to user_data/instruction-templates/Command-R.yaml
diff --git a/instruction-templates/Galactica Cite.yaml b/user_data/instruction-templates/Galactica Cite.yaml
similarity index 100%
rename from instruction-templates/Galactica Cite.yaml
rename to user_data/instruction-templates/Galactica Cite.yaml
diff --git a/instruction-templates/Galactica Finetuned.yaml b/user_data/instruction-templates/Galactica Finetuned.yaml
similarity index 100%
rename from instruction-templates/Galactica Finetuned.yaml
rename to user_data/instruction-templates/Galactica Finetuned.yaml
diff --git a/instruction-templates/Galactica Q.yaml b/user_data/instruction-templates/Galactica Q.yaml
similarity index 100%
rename from instruction-templates/Galactica Q.yaml
rename to user_data/instruction-templates/Galactica Q.yaml
diff --git a/instruction-templates/Galactica Summary.yaml b/user_data/instruction-templates/Galactica Summary.yaml
similarity index 100%
rename from instruction-templates/Galactica Summary.yaml
rename to user_data/instruction-templates/Galactica Summary.yaml
diff --git a/instruction-templates/Galactica Work.yaml b/user_data/instruction-templates/Galactica Work.yaml
similarity index 100%
rename from instruction-templates/Galactica Work.yaml
rename to user_data/instruction-templates/Galactica Work.yaml
diff --git a/instruction-templates/Galactica v2.yaml b/user_data/instruction-templates/Galactica v2.yaml
similarity index 100%
rename from instruction-templates/Galactica v2.yaml
rename to user_data/instruction-templates/Galactica v2.yaml
diff --git a/instruction-templates/Galactica.yaml b/user_data/instruction-templates/Galactica.yaml
similarity index 100%
rename from instruction-templates/Galactica.yaml
rename to user_data/instruction-templates/Galactica.yaml
diff --git a/instruction-templates/Gorilla.yaml b/user_data/instruction-templates/Gorilla.yaml
similarity index 100%
rename from instruction-templates/Gorilla.yaml
rename to user_data/instruction-templates/Gorilla.yaml
diff --git a/instruction-templates/Guanaco non-chat.yaml b/user_data/instruction-templates/Guanaco non-chat.yaml
similarity index 100%
rename from instruction-templates/Guanaco non-chat.yaml
rename to user_data/instruction-templates/Guanaco non-chat.yaml
diff --git a/instruction-templates/Guanaco-QLoRA.yaml b/user_data/instruction-templates/Guanaco-QLoRA.yaml
similarity index 100%
rename from instruction-templates/Guanaco-QLoRA.yaml
rename to user_data/instruction-templates/Guanaco-QLoRA.yaml
diff --git a/instruction-templates/H2O-prompt_answer.yaml b/user_data/instruction-templates/H2O-prompt_answer.yaml
similarity index 100%
rename from instruction-templates/H2O-prompt_answer.yaml
rename to user_data/instruction-templates/H2O-prompt_answer.yaml
diff --git a/instruction-templates/Hippogriff.yaml b/user_data/instruction-templates/Hippogriff.yaml
similarity index 100%
rename from instruction-templates/Hippogriff.yaml
rename to user_data/instruction-templates/Hippogriff.yaml
diff --git a/instruction-templates/INCITE-Chat.yaml b/user_data/instruction-templates/INCITE-Chat.yaml
similarity index 100%
rename from instruction-templates/INCITE-Chat.yaml
rename to user_data/instruction-templates/INCITE-Chat.yaml
diff --git a/instruction-templates/INCITE-Instruct.yaml b/user_data/instruction-templates/INCITE-Instruct.yaml
similarity index 100%
rename from instruction-templates/INCITE-Instruct.yaml
rename to user_data/instruction-templates/INCITE-Instruct.yaml
diff --git a/instruction-templates/KoAlpaca.yaml b/user_data/instruction-templates/KoAlpaca.yaml
similarity index 100%
rename from instruction-templates/KoAlpaca.yaml
rename to user_data/instruction-templates/KoAlpaca.yaml
diff --git a/instruction-templates/Koala.yaml b/user_data/instruction-templates/Koala.yaml
similarity index 100%
rename from instruction-templates/Koala.yaml
rename to user_data/instruction-templates/Koala.yaml
diff --git a/instruction-templates/LLaVA.yaml b/user_data/instruction-templates/LLaVA.yaml
similarity index 100%
rename from instruction-templates/LLaVA.yaml
rename to user_data/instruction-templates/LLaVA.yaml
diff --git a/instruction-templates/Llama-v2.yaml b/user_data/instruction-templates/Llama-v2.yaml
similarity index 100%
rename from instruction-templates/Llama-v2.yaml
rename to user_data/instruction-templates/Llama-v2.yaml
diff --git a/instruction-templates/Llama-v3.yaml b/user_data/instruction-templates/Llama-v3.yaml
similarity index 100%
rename from instruction-templates/Llama-v3.yaml
rename to user_data/instruction-templates/Llama-v3.yaml
diff --git a/instruction-templates/MOSS.yaml b/user_data/instruction-templates/MOSS.yaml
similarity index 100%
rename from instruction-templates/MOSS.yaml
rename to user_data/instruction-templates/MOSS.yaml
diff --git a/instruction-templates/Manticore Chat.yaml b/user_data/instruction-templates/Manticore Chat.yaml
similarity index 100%
rename from instruction-templates/Manticore Chat.yaml
rename to user_data/instruction-templates/Manticore Chat.yaml
diff --git a/instruction-templates/Metharme.yaml b/user_data/instruction-templates/Metharme.yaml
similarity index 100%
rename from instruction-templates/Metharme.yaml
rename to user_data/instruction-templates/Metharme.yaml
diff --git a/instruction-templates/Mistral.yaml b/user_data/instruction-templates/Mistral.yaml
similarity index 100%
rename from instruction-templates/Mistral.yaml
rename to user_data/instruction-templates/Mistral.yaml
diff --git a/instruction-templates/NVIDIA-ChatQA.yaml b/user_data/instruction-templates/NVIDIA-ChatQA.yaml
similarity index 100%
rename from instruction-templates/NVIDIA-ChatQA.yaml
rename to user_data/instruction-templates/NVIDIA-ChatQA.yaml
diff --git a/instruction-templates/NewHope.yaml b/user_data/instruction-templates/NewHope.yaml
similarity index 100%
rename from instruction-templates/NewHope.yaml
rename to user_data/instruction-templates/NewHope.yaml
diff --git a/instruction-templates/Open Assistant.yaml b/user_data/instruction-templates/Open Assistant.yaml
similarity index 100%
rename from instruction-templates/Open Assistant.yaml
rename to user_data/instruction-templates/Open Assistant.yaml
diff --git a/instruction-templates/OpenBuddy.yaml b/user_data/instruction-templates/OpenBuddy.yaml
similarity index 100%
rename from instruction-templates/OpenBuddy.yaml
rename to user_data/instruction-templates/OpenBuddy.yaml
diff --git a/instruction-templates/OpenChat.yaml b/user_data/instruction-templates/OpenChat.yaml
similarity index 100%
rename from instruction-templates/OpenChat.yaml
rename to user_data/instruction-templates/OpenChat.yaml
diff --git a/instruction-templates/OpenOrca-Platypus2.yaml b/user_data/instruction-templates/OpenOrca-Platypus2.yaml
similarity index 100%
rename from instruction-templates/OpenOrca-Platypus2.yaml
rename to user_data/instruction-templates/OpenOrca-Platypus2.yaml
diff --git a/instruction-templates/Orca Mini.yaml b/user_data/instruction-templates/Orca Mini.yaml
similarity index 100%
rename from instruction-templates/Orca Mini.yaml
rename to user_data/instruction-templates/Orca Mini.yaml
diff --git a/instruction-templates/Orca-Vicuna.yaml b/user_data/instruction-templates/Orca-Vicuna.yaml
similarity index 100%
rename from instruction-templates/Orca-Vicuna.yaml
rename to user_data/instruction-templates/Orca-Vicuna.yaml
diff --git a/instruction-templates/RWKV-Raven.yaml b/user_data/instruction-templates/RWKV-Raven.yaml
similarity index 100%
rename from instruction-templates/RWKV-Raven.yaml
rename to user_data/instruction-templates/RWKV-Raven.yaml
diff --git a/instruction-templates/RWKV-World.yaml b/user_data/instruction-templates/RWKV-World.yaml
similarity index 100%
rename from instruction-templates/RWKV-World.yaml
rename to user_data/instruction-templates/RWKV-World.yaml
diff --git a/instruction-templates/Samantha.yaml b/user_data/instruction-templates/Samantha.yaml
similarity index 100%
rename from instruction-templates/Samantha.yaml
rename to user_data/instruction-templates/Samantha.yaml
diff --git a/instruction-templates/StableBeluga2.yaml b/user_data/instruction-templates/StableBeluga2.yaml
similarity index 100%
rename from instruction-templates/StableBeluga2.yaml
rename to user_data/instruction-templates/StableBeluga2.yaml
diff --git a/instruction-templates/StableLM.yaml b/user_data/instruction-templates/StableLM.yaml
similarity index 100%
rename from instruction-templates/StableLM.yaml
rename to user_data/instruction-templates/StableLM.yaml
diff --git a/instruction-templates/StableVicuna.yaml b/user_data/instruction-templates/StableVicuna.yaml
similarity index 100%
rename from instruction-templates/StableVicuna.yaml
rename to user_data/instruction-templates/StableVicuna.yaml
diff --git a/instruction-templates/Starchat-Beta.yaml b/user_data/instruction-templates/Starchat-Beta.yaml
similarity index 100%
rename from instruction-templates/Starchat-Beta.yaml
rename to user_data/instruction-templates/Starchat-Beta.yaml
diff --git a/instruction-templates/Synthia-CoT.yaml b/user_data/instruction-templates/Synthia-CoT.yaml
similarity index 100%
rename from instruction-templates/Synthia-CoT.yaml
rename to user_data/instruction-templates/Synthia-CoT.yaml
diff --git a/instruction-templates/Synthia.yaml b/user_data/instruction-templates/Synthia.yaml
similarity index 100%
rename from instruction-templates/Synthia.yaml
rename to user_data/instruction-templates/Synthia.yaml
diff --git a/instruction-templates/Tulu.yaml b/user_data/instruction-templates/Tulu.yaml
similarity index 100%
rename from instruction-templates/Tulu.yaml
rename to user_data/instruction-templates/Tulu.yaml
diff --git a/instruction-templates/Vicuna-v0.yaml b/user_data/instruction-templates/Vicuna-v0.yaml
similarity index 100%
rename from instruction-templates/Vicuna-v0.yaml
rename to user_data/instruction-templates/Vicuna-v0.yaml
diff --git a/instruction-templates/Vicuna-v1.1.yaml b/user_data/instruction-templates/Vicuna-v1.1.yaml
similarity index 100%
rename from instruction-templates/Vicuna-v1.1.yaml
rename to user_data/instruction-templates/Vicuna-v1.1.yaml
diff --git a/instruction-templates/Vigogne-Chat.yaml b/user_data/instruction-templates/Vigogne-Chat.yaml
similarity index 100%
rename from instruction-templates/Vigogne-Chat.yaml
rename to user_data/instruction-templates/Vigogne-Chat.yaml
diff --git a/instruction-templates/Vigogne-Instruct.yaml b/user_data/instruction-templates/Vigogne-Instruct.yaml
similarity index 100%
rename from instruction-templates/Vigogne-Instruct.yaml
rename to user_data/instruction-templates/Vigogne-Instruct.yaml
diff --git a/instruction-templates/Wizard-Mega ShareGPT.yaml b/user_data/instruction-templates/Wizard-Mega ShareGPT.yaml
similarity index 100%
rename from instruction-templates/Wizard-Mega ShareGPT.yaml
rename to user_data/instruction-templates/Wizard-Mega ShareGPT.yaml
diff --git a/instruction-templates/Wizard-Mega.yaml b/user_data/instruction-templates/Wizard-Mega.yaml
similarity index 100%
rename from instruction-templates/Wizard-Mega.yaml
rename to user_data/instruction-templates/Wizard-Mega.yaml
diff --git a/instruction-templates/Ziya.yaml b/user_data/instruction-templates/Ziya.yaml
similarity index 100%
rename from instruction-templates/Ziya.yaml
rename to user_data/instruction-templates/Ziya.yaml
diff --git a/loras/place-your-loras-here.txt b/user_data/loras/place-your-loras-here.txt
similarity index 100%
rename from loras/place-your-loras-here.txt
rename to user_data/loras/place-your-loras-here.txt
diff --git a/models/config.yaml b/user_data/models/config.yaml
similarity index 100%
rename from models/config.yaml
rename to user_data/models/config.yaml
diff --git a/models/place-your-models-here.txt b/user_data/models/place-your-models-here.txt
similarity index 100%
rename from models/place-your-models-here.txt
rename to user_data/models/place-your-models-here.txt
diff --git a/presets/Contrastive Search.yaml b/user_data/presets/Contrastive Search.yaml
similarity index 100%
rename from presets/Contrastive Search.yaml
rename to user_data/presets/Contrastive Search.yaml
diff --git a/presets/Creative.yaml b/user_data/presets/Creative.yaml
similarity index 100%
rename from presets/Creative.yaml
rename to user_data/presets/Creative.yaml
diff --git a/presets/Deterministic.yaml b/user_data/presets/Deterministic.yaml
similarity index 100%
rename from presets/Deterministic.yaml
rename to user_data/presets/Deterministic.yaml
diff --git a/presets/Instruct.yaml b/user_data/presets/Instruct.yaml
similarity index 100%
rename from presets/Instruct.yaml
rename to user_data/presets/Instruct.yaml
diff --git a/presets/Null preset.yaml b/user_data/presets/Null preset.yaml
similarity index 100%
rename from presets/Null preset.yaml
rename to user_data/presets/Null preset.yaml
diff --git a/presets/min_p.yaml b/user_data/presets/min_p.yaml
similarity index 100%
rename from presets/min_p.yaml
rename to user_data/presets/min_p.yaml
diff --git a/prompts/Alpaca-with-Input.txt b/user_data/prompts/Alpaca-with-Input.txt
similarity index 100%
rename from prompts/Alpaca-with-Input.txt
rename to user_data/prompts/Alpaca-with-Input.txt
diff --git a/prompts/QA.txt b/user_data/prompts/QA.txt
similarity index 100%
rename from prompts/QA.txt
rename to user_data/prompts/QA.txt
diff --git a/settings-template.yaml b/user_data/settings-template.yaml
similarity index 98%
rename from settings-template.yaml
rename to user_data/settings-template.yaml
index 0343df0a..83764f97 100644
--- a/settings-template.yaml
+++ b/user_data/settings-template.yaml
@@ -1,6 +1,6 @@
show_controls: true
start_with: ''
-mode: chat-instruct
+mode: instruct
chat_style: cai-chat
chat-instruct_command: |-
Continue the chat dialogue below. Write a single reply for the character "<|character|>".
@@ -29,7 +29,6 @@ truncation_length: 8192
seed: -1
custom_stopping_strings: ''
custom_token_bans: ''
-show_after: ''
negative_prompt: ''
autoload_model: false
dark_theme: true
diff --git a/training/datasets/put-trainer-datasets-here.txt b/user_data/training/datasets/put-trainer-datasets-here.txt
similarity index 100%
rename from training/datasets/put-trainer-datasets-here.txt
rename to user_data/training/datasets/put-trainer-datasets-here.txt
diff --git a/training/formats/ChatML-format.json b/user_data/training/formats/ChatML-format.json
similarity index 100%
rename from training/formats/ChatML-format.json
rename to user_data/training/formats/ChatML-format.json
diff --git a/training/formats/alpaca-chatbot-format.json b/user_data/training/formats/alpaca-chatbot-format.json
similarity index 100%
rename from training/formats/alpaca-chatbot-format.json
rename to user_data/training/formats/alpaca-chatbot-format.json
diff --git a/training/formats/alpaca-format.json b/user_data/training/formats/alpaca-format.json
similarity index 100%
rename from training/formats/alpaca-format.json
rename to user_data/training/formats/alpaca-format.json
diff --git a/training/formats/llama2-chat-format.json b/user_data/training/formats/llama2-chat-format.json
similarity index 100%
rename from training/formats/llama2-chat-format.json
rename to user_data/training/formats/llama2-chat-format.json
diff --git a/training/formats/vicuna-format.json b/user_data/training/formats/vicuna-format.json
similarity index 100%
rename from training/formats/vicuna-format.json
rename to user_data/training/formats/vicuna-format.json
diff --git a/wsl.sh b/wsl.sh
deleted file mode 100755
index c5d28b16..00000000
--- a/wsl.sh
+++ /dev/null
@@ -1,115 +0,0 @@
-#!/bin/bash
-
-# detect if build-essential is missing or broken
-if ! dpkg-query -W -f'${Status}' "build-essential" 2>/dev/null | grep -q "ok installed"; then
-echo "build-essential not found or broken!
-
-A C++ compiler is required to build needed Python packages!
-To install one, run cmd_wsl.bat and enter these commands:
-
-sudo apt-get update
-sudo apt-get install build-essential
-"
-read -n1 -p "Continue the installer anyway? [y,n]" EXIT_PROMPT
-# only continue if user inputs 'y' else exit
-if ! [[ $EXIT_PROMPT == "Y" || $EXIT_PROMPT == "y" ]]; then exit; fi
-fi
-
-# deactivate existing conda envs as needed to avoid conflicts
-{ conda deactivate && conda deactivate && conda deactivate; } 2> /dev/null
-
-# config unlike other scripts, can't use current directory due to file IO bug in WSL, needs to be in virtual drive
-INSTALL_DIR_PREFIX="$HOME/text-gen-install"
-if [[ ! $(realpath "$(pwd)/..") = /mnt/* ]]; then
- INSTALL_DIR_PREFIX="$(realpath "$(pwd)/..")" && INSTALL_INPLACE=1
-fi
-INSTALL_DIR="$INSTALL_DIR_PREFIX/text-generation-webui"
-CONDA_ROOT_PREFIX="$INSTALL_DIR/installer_files/conda"
-INSTALL_ENV_DIR="$INSTALL_DIR/installer_files/env"
-MINICONDA_DOWNLOAD_URL="https://repo.anaconda.com/miniconda/Miniconda3-py311_24.11.1-0-Linux-x86_64.sh"
-conda_exists="F"
-
-# environment isolation
-export PYTHONNOUSERSITE=1
-unset PYTHONPATH
-unset PYTHONHOME
-export CUDA_PATH="$INSTALL_ENV_DIR"
-export CUDA_HOME="$CUDA_PATH"
-
-# /usr/lib/wsl/lib needs to be added to LD_LIBRARY_PATH to fix years-old bug in WSL where GPU drivers aren't linked properly
-export LD_LIBRARY_PATH="$CUDA_HOME/lib:/usr/lib/wsl/lib:$LD_LIBRARY_PATH"
-
-# open bash cli if called with 'wsl.sh cmd' with workarounds for existing conda
-if [ "$1" == "cmd" ]; then
- exec bash --init-file <(echo ". ~/.bashrc; conda deactivate 2> /dev/null; cd $INSTALL_DIR || cd $HOME; source $CONDA_ROOT_PREFIX/etc/profile.d/conda.sh; conda activate $INSTALL_ENV_DIR")
- exit
-fi
-
-if [[ "$INSTALL_DIR" =~ " " ]]; then echo This script relies on Miniconda which can not be silently installed under a path with spaces. && exit; fi
-
-# create install dir if missing
-if [ ! -d "$INSTALL_DIR" ]; then mkdir -p "$INSTALL_DIR" || exit; fi
-
-# figure out whether git and conda needs to be installed
-if "$CONDA_ROOT_PREFIX/bin/conda" --version &>/dev/null; then conda_exists="T"; fi
-
-# (if necessary) install git and conda into a contained environment
-# download miniconda
-if [ "$conda_exists" == "F" ]; then
- echo "Downloading Miniconda from $MINICONDA_DOWNLOAD_URL to $INSTALL_DIR/miniconda_installer.sh"
-
- curl -L "$MINICONDA_DOWNLOAD_URL" > "$INSTALL_DIR/miniconda_installer.sh"
-
- chmod u+x "$INSTALL_DIR/miniconda_installer.sh"
- bash "$INSTALL_DIR/miniconda_installer.sh" -b -p $CONDA_ROOT_PREFIX
-
- # test the conda binary
- echo "Miniconda version:"
- "$CONDA_ROOT_PREFIX/bin/conda" --version
-
- # delete the Miniconda installer
- rm "$INSTALL_DIR/miniconda_installer.sh"
-fi
-
-# create the installer env
-if [ ! -e "$INSTALL_ENV_DIR" ]; then
- "$CONDA_ROOT_PREFIX/bin/conda" create -y -k --prefix "$INSTALL_ENV_DIR" python=3.11 git
-fi
-
-# check if conda environment was actually created
-if [ ! -e "$INSTALL_ENV_DIR/bin/python" ]; then
- echo "Conda environment is empty."
- exit
-fi
-
-# activate installer env
-source "$CONDA_ROOT_PREFIX/etc/profile.d/conda.sh" # otherwise conda complains about 'shell not initialized' (needed when running in a script)
-conda activate "$INSTALL_ENV_DIR"
-
-pushd $INSTALL_DIR 1> /dev/null || exit
-
-if [ ! -f "./server.py" ]; then
- git init -b main
- git remote add origin https://github.com/oobabooga/text-generation-webui
- git fetch
- git remote set-head origin -a
- git reset origin/HEAD --hard
- git branch --set-upstream-to=origin/HEAD
- git restore -- . :!./CMD_FLAGS.txt
-fi
-
-# copy CMD_FLAGS.txt to install dir to allow edits within Windows
-if [[ $INSTALL_INPLACE != 1 ]]; then
- # workaround for old install migration
- if [ ! -f "./wsl.sh" ]; then
- git pull || exit
- [ -f "../webui.py" ] && mv "../webui.py" "../webui-old.py"
- fi
- if [ -f "$(dirs +1)/CMD_FLAGS.txt" ] && [ -f "./CMD_FLAGS.txt" ]; then cp -u "$(dirs +1)/CMD_FLAGS.txt" "$INSTALL_DIR"; fi
-fi
-
-# setup installer env update env if called with 'wsl.sh update'
-case "$1" in
-("update-wizard") python one_click.py --update-wizard;;
-(*) python one_click.py $@;;
-esac