'
+def generate_chat_html(history, name1, name2, reset_cache=False, last_message_only=False):
+ if not last_message_only:
+ output = f'
'
+ else:
+ output = ""
- for i in range(len(history['visible'])):
- row_visible = history['visible'][i]
- row_internal = history['internal'][i]
- converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+ def create_message(role, content, raw_content):
+ """Inner function for WPP-style messages."""
+ text_class = "text-you" if role == "user" else "text-bot"
- # Get timestamps
- user_timestamp = format_message_timestamp(history, "user", i)
- assistant_timestamp = format_message_timestamp(history, "assistant", i)
+ # Get role-specific data
+ timestamp = format_message_timestamp(history, role, i)
+ attachments = format_message_attachments(history, role, i)
- # Get attachments
- user_attachments = format_message_attachments(history, "user", i)
- assistant_attachments = format_message_attachments(history, "assistant", i)
+ # Create info button if timestamp exists
+ info_message = ""
+ if timestamp:
+ tooltip_text = get_message_tooltip(history, role, i)
+ info_message = info_button.replace('title="message"', f'title="{html.escape(tooltip_text)}"')
- # Create info buttons for timestamps if they exist
- info_message_user = ""
- if user_timestamp != "":
- # Extract the timestamp value from the span
- user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0]
- info_message_user = info_button.replace("message", user_timestamp_value)
-
- info_message_assistant = ""
- if assistant_timestamp != "":
- # Extract the timestamp value from the span
- assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0]
- info_message_assistant = info_button.replace("message", assistant_timestamp_value)
-
- if converted_visible[0]: # Don't display empty user messages
- output += (
- f'
'
- f'
'
- f'
{converted_visible[0]}
'
- f'{user_attachments}'
- f'{actions_html(history, i, "user", info_message_user)}'
- f'
'
- f'
'
- )
-
- output += (
+ return (
f'
'
- f'
'
- f'
{converted_visible[1]}
'
- f'{assistant_attachments}'
- f'{actions_html(history, i, "assistant", info_message_assistant)}'
+ f'
'
+ f'
{content}
'
+ f'{attachments}'
+ f'{actions_html(history, i, role, info_message)}'
f'
'
f'
'
)
- output += "
"
+ # Determine range
+ start_idx = len(history['visible']) - 1 if last_message_only else 0
+ end_idx = len(history['visible'])
+
+ for i in range(start_idx, end_idx):
+ row_visible = history['visible'][i]
+ row_internal = history['internal'][i]
+
+ # Convert content
+ if last_message_only:
+ converted_visible = [None, convert_to_markdown_wrapped(row_visible[1], message_id=i, use_cache=i != len(history['visible']) - 1)]
+ else:
+ converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible]
+
+ # Generate messages
+ if not last_message_only and converted_visible[0]:
+ output += create_message("user", converted_visible[0], row_internal[0])
+
+ output += create_message("assistant", converted_visible[1], row_internal[1])
+
+ if not last_message_only:
+ output += "
"
+
return output
@@ -629,15 +663,15 @@ def time_greeting():
return "Good evening!"
-def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False):
+def chat_html_wrapper(history, name1, name2, mode, style, character, reset_cache=False, last_message_only=False):
if len(history['visible']) == 0:
greeting = f"
{time_greeting()} How can I help you today?
"
result = f'
{greeting}
'
elif mode == 'instruct':
- result = generate_instruct_html(history)
+ result = generate_instruct_html(history, last_message_only=last_message_only)
elif style == 'wpp':
- result = generate_chat_html(history, name1, name2)
+ result = generate_chat_html(history, name1, name2, last_message_only=last_message_only)
else:
- result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache)
+ result = generate_cai_chat_html(history, name1, name2, style, character, reset_cache=reset_cache, last_message_only=last_message_only)
- return {'html': result}
+ return {'html': result, 'last_message_only': last_message_only}
diff --git a/modules/llama_cpp_server.py b/modules/llama_cpp_server.py
index d695c74e..a79e24e4 100644
--- a/modules/llama_cpp_server.py
+++ b/modules/llama_cpp_server.py
@@ -408,15 +408,42 @@ class LlamaServer:
def filter_stderr_with_progress(process_stderr):
- progress_pattern = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
+ """
+ Reads stderr lines from a process, filters out noise, and displays progress updates
+ inline (overwriting the same line) until completion.
+ """
+ progress_re = re.compile(r'slot update_slots: id.*progress = (\d+\.\d+)')
+ last_was_progress = False
+
try:
- for line in iter(process_stderr.readline, ''):
- progress_match = progress_pattern.search(line)
- if progress_match:
- sys.stderr.write(line)
- sys.stderr.flush()
- elif not line.startswith(('srv ', 'slot ')) and 'log_server_r: request: GET /health' not in line:
- sys.stderr.write(line)
- sys.stderr.flush()
+ for raw in iter(process_stderr.readline, ''):
+ line = raw.rstrip('\r\n')
+ match = progress_re.search(line)
+
+ if match:
+ progress = float(match.group(1))
+
+ # Extract just the part from "prompt processing" onwards
+ prompt_processing_idx = line.find('prompt processing')
+ if prompt_processing_idx != -1:
+ display_line = line[prompt_processing_idx:]
+ else:
+ display_line = line # fallback to full line
+
+ # choose carriage return for in-progress or newline at completion
+ end_char = '\r' if progress < 1.0 else '\n'
+ print(display_line, end=end_char, file=sys.stderr, flush=True)
+ last_was_progress = (progress < 1.0)
+
+ # skip noise lines
+ elif not (line.startswith(('srv ', 'slot ')) or 'log_server_r: request: GET /health' in line):
+ # if we were in progress, finish that line first
+ if last_was_progress:
+ print(file=sys.stderr)
+
+ print(line, file=sys.stderr, flush=True)
+ last_was_progress = False
+
except (ValueError, IOError):
+ # silently ignore broken output or IO errors
pass
diff --git a/modules/models.py b/modules/models.py
index d329ae3c..c1e7fb56 100644
--- a/modules/models.py
+++ b/modules/models.py
@@ -116,7 +116,7 @@ def unload_model(keep_model_name=False):
return
is_llamacpp = (shared.model.__class__.__name__ == 'LlamaServer')
- if shared.args.loader == 'ExLlamav3_HF':
+ if shared.model.__class__.__name__ == 'Exllamav3HF':
shared.model.unload()
shared.model = shared.tokenizer = None
diff --git a/modules/models_settings.py b/modules/models_settings.py
index c914bdea..283a9744 100644
--- a/modules/models_settings.py
+++ b/modules/models_settings.py
@@ -329,6 +329,7 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
# Extract values from metadata
n_layers = None
n_kv_heads = None
+ n_attention_heads = None # Fallback for models without separate KV heads
embedding_dim = None
for key, value in metadata.items():
@@ -336,9 +337,14 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type):
n_layers = value
elif key.endswith('.attention.head_count_kv'):
n_kv_heads = max(value) if isinstance(value, list) else value
+ elif key.endswith('.attention.head_count'):
+ n_attention_heads = max(value) if isinstance(value, list) else value
elif key.endswith('.embedding_length'):
embedding_dim = value
+ if n_kv_heads is None:
+ n_kv_heads = n_attention_heads
+
if gpu_layers > n_layers:
gpu_layers = n_layers
diff --git a/modules/presets.py b/modules/presets.py
index cf706605..3eb1f5fc 100644
--- a/modules/presets.py
+++ b/modules/presets.py
@@ -1,6 +1,5 @@
import functools
import pprint
-import random
from pathlib import Path
import yaml
@@ -93,68 +92,17 @@ def load_preset_for_ui(name, state):
return state, *[generate_params[k] for k in presets_params()]
-def random_preset(state):
- params_and_values = {
- 'remove_tail_tokens': {
- 'top_p': [0.5, 0.8, 0.9, 0.95, 0.99],
- 'min_p': [0.5, 0.2, 0.1, 0.05, 0.01],
- 'top_k': [3, 5, 10, 20, 30, 40],
- 'typical_p': [0.2, 0.575, 0.95],
- 'tfs': [0.5, 0.8, 0.9, 0.95, 0.99],
- 'top_a': [0.5, 0.2, 0.1, 0.05, 0.01],
- 'epsilon_cutoff': [1, 3, 5, 7, 9],
- 'eta_cutoff': [3, 6, 9, 12, 15, 18],
- },
- 'flatten_distribution': {
- 'temperature': [0.1, 0.5, 0.7, 0.8, 1, 1.2, 1.5, 2.0, 5.0],
- 'dynamic_temperature': [
- [0.1, 1],
- [0.1, 1.5],
- [0.1, 2],
- [0.1, 5],
- [0.5, 1],
- [0.5, 1.5],
- [0.5, 2],
- [0.5, 5],
- [0.8, 1],
- [0.8, 1.5],
- [0.8, 2],
- [0.8, 5],
- [1, 1.5],
- [1, 2],
- [1, 5]
- ],
- 'smoothing_factor': [0.2, 0.3, 0.6, 1.2],
- },
- 'repetition': {
- 'repetition_penalty': [1, 1.05, 1.1, 1.15, 1.20, 1.25],
- 'presence_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
- 'frequency_penalty': [0, 0.1, 0.2, 0.4, 0.6, 0.8, 1.0, 2.0],
- },
- 'other': {
- 'temperature_last': [True, False],
- }
- }
-
- generate_params = default_preset()
- for cat in params_and_values:
- choices = list(params_and_values[cat].keys())
- if shared.args.loader is not None:
- choices = [x for x in choices if loader_contains(x)]
-
- if len(choices) > 0:
- choice = random.choice(choices)
- value = random.choice(params_and_values[cat][choice])
- if choice == 'dynamic_temperature':
- generate_params['dynamic_temperature'] = True
- generate_params['dynatemp_low'] = value[0]
- generate_params['dynatemp_high'] = value[1]
- else:
- generate_params[choice] = value
-
+def reset_preset_for_ui(name, state):
+ """Reset current preset to its saved values from file"""
+ generate_params = load_preset(name, verbose=True)
+ state.update(generate_params)
+ return state, *[generate_params[k] for k in presets_params()]
+
+
+def neutralize_samplers_for_ui(state):
+ """Set all samplers to their default/neutral values"""
+ generate_params = default_preset()
state.update(generate_params)
- logger.info("GENERATED_PRESET=")
- pprint.PrettyPrinter(indent=4, width=1, sort_dicts=False).pprint(remove_defaults(state))
return state, *[generate_params[k] for k in presets_params()]
diff --git a/modules/shared.py b/modules/shared.py
index d2305f30..b8ab2426 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -9,6 +9,7 @@ from pathlib import Path
import yaml
from modules.logging_colors import logger
+from modules.presets import default_preset
# Model variables
model = None
@@ -21,60 +22,19 @@ lora_names = []
# Generation variables
stop_everything = False
generation_lock = None
-processing_message = '*Is typing...*'
+processing_message = ''
# UI variables
gradio = {}
persistent_interface_state = {}
need_restart = False
-# UI defaults
-settings = {
- 'show_controls': True,
- 'start_with': '',
- 'mode': 'instruct',
- 'chat_style': 'cai-chat',
- 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
- 'prompt-default': 'QA',
- 'prompt-notebook': 'QA',
- 'character': 'Assistant',
- 'name1': 'You',
- 'user_bio': '',
- 'custom_system_message': '',
- 'preset': 'min_p',
- 'max_new_tokens': 512,
- 'max_new_tokens_min': 1,
- 'max_new_tokens_max': 4096,
- 'prompt_lookup_num_tokens': 0,
- 'max_tokens_second': 0,
- 'max_updates_second': 12,
- 'auto_max_new_tokens': True,
- 'ban_eos_token': False,
- 'add_bos_token': True,
- 'enable_thinking': True,
- 'skip_special_tokens': True,
- 'stream': True,
- 'static_cache': False,
- 'truncation_length': 8192,
- 'seed': -1,
- 'custom_stopping_strings': '',
- 'custom_token_bans': '',
- 'negative_prompt': '',
- 'dark_theme': True,
- 'default_extensions': [],
- 'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}",
- 'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}",
-}
-
-default_settings = copy.deepcopy(settings)
-
# Parser copied from https://github.com/vladmandic/automatic
parser = argparse.ArgumentParser(description="Text generation web UI", conflict_handler='resolve', add_help=True, formatter_class=lambda prog: argparse.HelpFormatter(prog, max_help_position=55, indent_increment=2, width=200))
# Basic settings
group = parser.add_argument_group('Basic settings')
group.add_argument('--multi-user', action='store_true', help='Multi-user mode. Chat histories are not saved or automatically loaded. Warning: this is likely not safe for sharing publicly.')
-group.add_argument('--character', type=str, help='The name of the character to load in chat mode by default.')
group.add_argument('--model', type=str, help='Name of the model to load by default.')
group.add_argument('--lora', type=str, nargs='+', help='The list of LoRAs to load. If you want to load more than one LoRA, write the names separated by spaces.')
group.add_argument('--model-dir', type=str, default='user_data/models', help='Path to directory with all the models.')
@@ -230,6 +190,102 @@ for arg in sys.argv[1:]:
elif hasattr(args, arg):
provided_arguments.append(arg)
+# Default generation parameters
+neutral_samplers = default_preset()
+
+# UI defaults
+settings = {
+ 'show_controls': True,
+ 'start_with': '',
+ 'mode': 'instruct',
+ 'chat_style': 'cai-chat',
+ 'chat-instruct_command': 'Continue the chat dialogue below. Write a single reply for the character "<|character|>".\n\n<|prompt|>',
+ 'enable_web_search': False,
+ 'web_search_pages': 3,
+ 'prompt-default': 'QA',
+ 'prompt-notebook': 'QA',
+ 'preset': 'Qwen3 - Thinking' if Path('user_data/presets/Qwen3 - Thinking.yaml').exists() else None,
+ 'max_new_tokens': 512,
+ 'max_new_tokens_min': 1,
+ 'max_new_tokens_max': 4096,
+ 'prompt_lookup_num_tokens': 0,
+ 'max_tokens_second': 0,
+ 'auto_max_new_tokens': True,
+ 'ban_eos_token': False,
+ 'add_bos_token': True,
+ 'enable_thinking': True,
+ 'skip_special_tokens': True,
+ 'stream': True,
+ 'static_cache': False,
+ 'truncation_length': 8192,
+ 'seed': -1,
+ 'custom_stopping_strings': '',
+ 'custom_token_bans': '',
+ 'negative_prompt': '',
+ 'dark_theme': True,
+ 'paste_to_attachment': False,
+
+ # Character settings
+ 'character': 'Assistant',
+ 'name1': 'You',
+ 'name2': 'AI',
+ 'user_bio': '',
+ 'context': 'The following is a conversation with an AI Large Language Model. The AI has been trained to answer questions, provide recommendations, and help with decision making. The AI follows user requests. The AI thinks outside the box.',
+ 'greeting': 'How can I help you today?',
+ 'custom_system_message': '',
+ 'instruction_template_str': "{%- set ns = namespace(found=false) -%}\n{%- for message in messages -%}\n {%- if message['role'] == 'system' -%}\n {%- set ns.found = true -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if not ns.found -%}\n {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\\n\\n' -}}\n{%- endif %}\n{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {{- '' + message['content'] + '\\n\\n' -}}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{-'### Instruction:\\n' + message['content'] + '\\n\\n'-}}\n {%- else -%}\n {{-'### Response:\\n' + message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}\n{%- if add_generation_prompt -%}\n {{-'### Response:\\n'-}}\n{%- endif -%}",
+ 'chat_template_str': "{%- for message in messages %}\n {%- if message['role'] == 'system' -%}\n {%- if message['content'] -%}\n {{- message['content'] + '\\n\\n' -}}\n {%- endif -%}\n {%- if user_bio -%}\n {{- user_bio + '\\n\\n' -}}\n {%- endif -%}\n {%- else -%}\n {%- if message['role'] == 'user' -%}\n {{- name1 + ': ' + message['content'] + '\\n'-}}\n {%- else -%}\n {{- name2 + ': ' + message['content'] + '\\n' -}}\n {%- endif -%}\n {%- endif -%}\n{%- endfor -%}",
+
+ # Generation parameters - Curve shape
+ 'temperature': 0.6,
+ 'dynatemp_low': neutral_samplers['dynatemp_low'],
+ 'dynatemp_high': neutral_samplers['dynatemp_high'],
+ 'dynatemp_exponent': neutral_samplers['dynatemp_exponent'],
+ 'smoothing_factor': neutral_samplers['smoothing_factor'],
+ 'smoothing_curve': neutral_samplers['smoothing_curve'],
+
+ # Generation parameters - Curve cutoff
+ 'min_p': neutral_samplers['min_p'],
+ 'top_p': 0.95,
+ 'top_k': 20,
+ 'typical_p': neutral_samplers['typical_p'],
+ 'xtc_threshold': neutral_samplers['xtc_threshold'],
+ 'xtc_probability': neutral_samplers['xtc_probability'],
+ 'epsilon_cutoff': neutral_samplers['epsilon_cutoff'],
+ 'eta_cutoff': neutral_samplers['eta_cutoff'],
+ 'tfs': neutral_samplers['tfs'],
+ 'top_a': neutral_samplers['top_a'],
+ 'top_n_sigma': neutral_samplers['top_n_sigma'],
+
+ # Generation parameters - Repetition suppression
+ 'dry_multiplier': neutral_samplers['dry_multiplier'],
+ 'dry_allowed_length': neutral_samplers['dry_allowed_length'],
+ 'dry_base': neutral_samplers['dry_base'],
+ 'repetition_penalty': neutral_samplers['repetition_penalty'],
+ 'frequency_penalty': neutral_samplers['frequency_penalty'],
+ 'presence_penalty': neutral_samplers['presence_penalty'],
+ 'encoder_repetition_penalty': neutral_samplers['encoder_repetition_penalty'],
+ 'no_repeat_ngram_size': neutral_samplers['no_repeat_ngram_size'],
+ 'repetition_penalty_range': neutral_samplers['repetition_penalty_range'],
+
+ # Generation parameters - Alternative sampling methods
+ 'penalty_alpha': neutral_samplers['penalty_alpha'],
+ 'guidance_scale': neutral_samplers['guidance_scale'],
+ 'mirostat_mode': neutral_samplers['mirostat_mode'],
+ 'mirostat_tau': neutral_samplers['mirostat_tau'],
+ 'mirostat_eta': neutral_samplers['mirostat_eta'],
+
+ # Generation parameters - Other options
+ 'do_sample': neutral_samplers['do_sample'],
+ 'dynamic_temperature': neutral_samplers['dynamic_temperature'],
+ 'temperature_last': neutral_samplers['temperature_last'],
+ 'sampler_priority': neutral_samplers['sampler_priority'],
+ 'dry_sequence_breakers': neutral_samplers['dry_sequence_breakers'],
+ 'grammar_string': '',
+}
+
+default_settings = copy.deepcopy(settings)
+
def do_cmd_flags_warnings():
# Security warnings
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 1fd6d810..55b538b0 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -65,41 +65,39 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
all_stop_strings += st
shared.stop_everything = False
- last_update = -1
reply = ''
is_stream = state['stream']
if len(all_stop_strings) > 0 and not state['stream']:
state = copy.deepcopy(state)
state['stream'] = True
- min_update_interval = 0
- if state.get('max_updates_second', 0) > 0:
- min_update_interval = 1 / state['max_updates_second']
-
# Generate
+ last_update = -1
+ latency_threshold = 1 / 1000
for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat):
+ cur_time = time.monotonic()
reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
if escape_html:
reply = html.escape(reply)
if is_stream:
- cur_time = time.time()
-
# Limit number of tokens/second to make text readable in real time
if state['max_tokens_second'] > 0:
diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
if diff > 0:
time.sleep(diff)
- last_update = time.time()
+ last_update = time.monotonic()
yield reply
# Limit updates to avoid lag in the Gradio UI
# API updates are not limited
else:
- if cur_time - last_update > min_update_interval:
- last_update = cur_time
+ # If 'generate_func' takes less than 0.001 seconds to yield the next token
+ # (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding
+ if (cur_time - last_update) > latency_threshold:
yield reply
+ last_update = time.monotonic()
if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
break
@@ -481,6 +479,7 @@ def generate_reply_custom(question, original_question, state, stopping_strings=N
For models that do not use the transformers library for sampling
"""
+ state = copy.deepcopy(state)
state['seed'] = set_manual_seed(state['seed'])
t0 = time.time()
reply = ''
diff --git a/modules/ui.py b/modules/ui.py
index 9f4d67cb..59da5118 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -1,4 +1,5 @@
import copy
+import threading
from pathlib import Path
import gradio as gr
@@ -6,28 +7,39 @@ import yaml
import extensions
from modules import shared
+from modules.chat import load_history
+from modules.utils import gradio
-with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r') as f:
+# Global state for auto-saving UI settings with debouncing
+_auto_save_timer = None
+_auto_save_lock = threading.Lock()
+_last_interface_state = None
+_last_preset = None
+_last_extensions = None
+_last_show_controls = None
+_last_theme_state = None
+
+with open(Path(__file__).resolve().parent / '../css/NotoSans/stylesheet.css', 'r', encoding='utf-8') as f:
css = f.read()
-with open(Path(__file__).resolve().parent / '../css/main.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/main.css', 'r', encoding='utf-8') as f:
css += f.read()
-with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/katex/katex.min.css', 'r', encoding='utf-8') as f:
css += f.read()
-with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r') as f:
+with open(Path(__file__).resolve().parent / '../css/highlightjs/highlightjs-copy.min.css', 'r', encoding='utf-8') as f:
css += f.read()
-with open(Path(__file__).resolve().parent / '../js/main.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/main.js', 'r', encoding='utf-8') as f:
js = f.read()
-with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/global_scope_js.js', 'r', encoding='utf-8') as f:
global_scope_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/save_files.js', 'r', encoding='utf-8') as f:
save_files_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/switch_tabs.js', 'r', encoding='utf-8') as f:
switch_tabs_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/show_controls.js', 'r', encoding='utf-8') as f:
show_controls_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/update_big_picture.js', 'r', encoding='utf-8') as f:
update_big_picture_js = f.read()
-with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r') as f:
+with open(Path(__file__).resolve().parent / '../js/dark_theme.js', 'r', encoding='utf-8') as f:
dark_theme_js = f.read()
refresh_symbol = '🔄'
@@ -62,8 +74,10 @@ if not shared.args.old_colors:
body_background_fill="white",
block_background_fill="transparent",
body_text_color='rgb(64, 64, 64)',
- button_secondary_background_fill="#f4f4f4",
+ button_secondary_background_fill="white",
button_secondary_border_color="var(--border-color-primary)",
+ input_shadow="none",
+ button_shadow_hover="none",
# Dark Mode Colors
input_background_fill_dark='var(--darker-gray)',
@@ -95,6 +109,7 @@ if not shared.args.old_colors:
button_large_radius='0.375rem',
button_large_padding='6px 12px',
input_radius='0.375rem',
+ block_radius='0',
)
if Path("user_data/notification.mp3").exists():
@@ -194,7 +209,6 @@ def list_interface_input_elements():
'max_new_tokens',
'prompt_lookup_num_tokens',
'max_tokens_second',
- 'max_updates_second',
'do_sample',
'dynamic_temperature',
'temperature_last',
@@ -257,6 +271,11 @@ def list_interface_input_elements():
# Model elements
elements += list_model_elements()
+ # Other elements
+ elements += [
+ 'paste_to_attachment'
+ ]
+
return elements
@@ -270,6 +289,13 @@ def gather_interface_values(*args):
if not shared.args.multi_user:
shared.persistent_interface_state = output
+ # Remove the chat input, as it gets cleared after this function call
+ shared.persistent_interface_state.pop('textbox')
+
+ # Prevent history loss if backend is restarted but UI is not refreshed
+ if output['history'] is None and output['unique_id'] is not None:
+ output['history'] = load_history(output['unique_id'], output['character_menu'], output['mode'])
+
return output
@@ -292,7 +318,7 @@ def apply_interface_values(state, use_persistent=False):
def save_settings(state, preset, extensions_list, show_controls, theme_state):
output = copy.deepcopy(shared.settings)
- exclude = ['name2', 'greeting', 'context', 'truncation_length', 'instruction_template_str']
+ exclude = []
for k in state:
if k in shared.settings and k not in exclude:
output[k] = state[k]
@@ -301,10 +327,11 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
output['prompt-default'] = state['prompt_menu-default']
output['prompt-notebook'] = state['prompt_menu-notebook']
output['character'] = state['character_menu']
- output['default_extensions'] = extensions_list
output['seed'] = int(output['seed'])
output['show_controls'] = show_controls
output['dark_theme'] = True if theme_state == 'dark' else False
+ output.pop('instruction_template_str')
+ output.pop('truncation_length')
# Save extension values in the UI
for extension_name in extensions_list:
@@ -327,6 +354,143 @@ def save_settings(state, preset, extensions_list, show_controls, theme_state):
return yaml.dump(output, sort_keys=False, width=float("inf"), allow_unicode=True)
+def store_current_state_and_debounce(interface_state, preset, extensions, show_controls, theme_state):
+ """Store current state and trigger debounced save"""
+ global _auto_save_timer, _last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state
+
+ if shared.args.multi_user:
+ return
+
+ # Store the current state in global variables
+ _last_interface_state = interface_state
+ _last_preset = preset
+ _last_extensions = extensions
+ _last_show_controls = show_controls
+ _last_theme_state = theme_state
+
+ # Reset the debounce timer
+ with _auto_save_lock:
+ if _auto_save_timer is not None:
+ _auto_save_timer.cancel()
+
+ _auto_save_timer = threading.Timer(1.0, _perform_debounced_save)
+ _auto_save_timer.start()
+
+
+def _perform_debounced_save():
+ """Actually perform the save using the stored state"""
+ global _auto_save_timer
+
+ try:
+ if _last_interface_state is not None:
+ contents = save_settings(_last_interface_state, _last_preset, _last_extensions, _last_show_controls, _last_theme_state)
+ settings_path = Path('user_data') / 'settings.yaml'
+ settings_path.parent.mkdir(exist_ok=True)
+ with open(settings_path, 'w', encoding='utf-8') as f:
+ f.write(contents)
+ except Exception as e:
+ print(f"Auto-save failed: {e}")
+ finally:
+ with _auto_save_lock:
+ _auto_save_timer = None
+
+
+def setup_auto_save():
+ """Attach auto-save to key UI elements"""
+ if shared.args.multi_user:
+ return
+
+ change_elements = [
+ # Chat tab (ui_chat.py)
+ 'start_with',
+ 'enable_web_search',
+ 'web_search_pages',
+ 'mode',
+ 'chat_style',
+ 'chat-instruct_command',
+ 'character_menu',
+ 'name1',
+ 'name2',
+ 'context',
+ 'greeting',
+ 'user_bio',
+ 'custom_system_message',
+ 'chat_template_str',
+
+ # Parameters tab (ui_parameters.py) - Generation parameters
+ 'preset_menu',
+ 'temperature',
+ 'dynatemp_low',
+ 'dynatemp_high',
+ 'dynatemp_exponent',
+ 'smoothing_factor',
+ 'smoothing_curve',
+ 'min_p',
+ 'top_p',
+ 'top_k',
+ 'typical_p',
+ 'xtc_threshold',
+ 'xtc_probability',
+ 'epsilon_cutoff',
+ 'eta_cutoff',
+ 'tfs',
+ 'top_a',
+ 'top_n_sigma',
+ 'dry_multiplier',
+ 'dry_allowed_length',
+ 'dry_base',
+ 'repetition_penalty',
+ 'frequency_penalty',
+ 'presence_penalty',
+ 'encoder_repetition_penalty',
+ 'no_repeat_ngram_size',
+ 'repetition_penalty_range',
+ 'penalty_alpha',
+ 'guidance_scale',
+ 'mirostat_mode',
+ 'mirostat_tau',
+ 'mirostat_eta',
+ 'max_new_tokens',
+ 'prompt_lookup_num_tokens',
+ 'max_tokens_second',
+ 'do_sample',
+ 'dynamic_temperature',
+ 'temperature_last',
+ 'auto_max_new_tokens',
+ 'ban_eos_token',
+ 'add_bos_token',
+ 'enable_thinking',
+ 'skip_special_tokens',
+ 'stream',
+ 'static_cache',
+ 'truncation_length',
+ 'seed',
+ 'sampler_priority',
+ 'custom_stopping_strings',
+ 'custom_token_bans',
+ 'negative_prompt',
+ 'dry_sequence_breakers',
+ 'grammar_string',
+
+ # Default tab (ui_default.py)
+ 'prompt_menu-default',
+
+ # Notebook tab (ui_notebook.py)
+ 'prompt_menu-notebook',
+
+ # Session tab (ui_session.py)
+ 'show_controls',
+ 'theme_state',
+ 'paste_to_attachment'
+ ]
+
+ for element_name in change_elements:
+ if element_name in shared.gradio:
+ shared.gradio[element_name].change(
+ gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ store_current_state_and_debounce, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), None, show_progress=False)
+
+
def create_refresh_button(refresh_component, refresh_method, refreshed_args, elem_class, interactive=True):
"""
Copied from https://github.com/AUTOMATIC1111/stable-diffusion-webui
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index d79aa523..3b841b8b 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -18,23 +18,23 @@ def create_ui():
mu = shared.args.multi_user
shared.gradio['Chat input'] = gr.State()
- shared.gradio['history'] = gr.JSON(visible=False)
+ shared.gradio['history'] = gr.State({'internal': [], 'visible': [], 'metadata': {}})
with gr.Tab('Chat', id='Chat', elem_id='chat-tab'):
with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
with gr.Column():
with gr.Row(elem_id='past-chats-buttons'):
- shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes='refresh-button', elem_id='Branch', interactive=not mu)
+ shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes=['refresh-button', 'refresh-button-medium'], elem_id='Branch', interactive=not mu)
+ shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes=['refresh-button', 'refresh-button-medium'], interactive=not mu)
+ shared.gradio['delete_chat'] = gr.Button('🗑️', visible=False, elem_classes='refresh-button', interactive=not mu, elem_id='delete_chat')
+ shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'refresh-button-medium', 'focus-on-chat-input'])
shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id="Branch-index", interactive=True)
- shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)
- shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
- shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input'])
shared.gradio['search_chat'] = gr.Textbox(placeholder='Search chats...', max_lines=1, elem_id='search_chat')
with gr.Row(elem_id='delete-chat-row', visible=False) as shared.gradio['delete-chat-row']:
- shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'])
- shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'])
+ shared.gradio['delete_chat-cancel'] = gr.Button('Cancel', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-cancel')
+ shared.gradio['delete_chat-confirm'] = gr.Button('Confirm', variant='stop', elem_classes=['refresh-button', 'focus-on-chat-input'], elem_id='delete_chat-confirm')
with gr.Row(elem_id='rename-row', visible=False) as shared.gradio['rename-row']:
shared.gradio['rename_to'] = gr.Textbox(label='Rename to:', placeholder='New name', elem_classes=['no-background'])
@@ -55,7 +55,6 @@ def create_ui():
with gr.Column(scale=10, elem_id='chat-input-container'):
shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
- shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
shared.gradio['typing-dots'] = gr.HTML(value='
', label='typing', elem_id='typing-container')
with gr.Column(scale=1, elem_id='generate-stop-container'):
@@ -65,21 +64,15 @@ def create_ui():
# Hover menu buttons
with gr.Column(elem_id='chat-buttons'):
- with gr.Row():
- shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
- shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
- shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
-
- with gr.Row():
- shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
-
- with gr.Row():
- shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
- shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
-
- with gr.Row():
- shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
- shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
+ shared.gradio['Regenerate'] = gr.Button('Regenerate (Ctrl + Enter)', elem_id='Regenerate')
+ shared.gradio['Continue'] = gr.Button('Continue (Alt + Enter)', elem_id='Continue')
+ shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
+ shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
+ shared.gradio['Send dummy message'] = gr.Button('Send dummy message')
+ shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
+ shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
+ shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
+ shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
with gr.Column():
@@ -87,13 +80,13 @@ def create_ui():
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
with gr.Row():
- shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search')
+ shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search', elem_id='web-search')
with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:
shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)
with gr.Row():
- shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
+ shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
with gr.Row():
shared.gradio['chat_style'] = gr.Dropdown(choices=utils.get_available_chat_styles(), label='Chat style', value=shared.settings['chat_style'], visible=shared.settings['mode'] != 'instruct')
@@ -125,14 +118,15 @@ def create_chat_settings_ui():
with gr.Column(scale=8):
with gr.Tab("Character"):
with gr.Row():
- shared.gradio['character_menu'] = gr.Dropdown(value=None, choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
+ shared.gradio['character_menu'] = gr.Dropdown(value=shared.settings['character'], choices=utils.get_available_characters(), label='Character', elem_id='character-menu', info='Used in chat and chat-instruct modes.', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['character_menu'], lambda: None, lambda: {'choices': utils.get_available_characters()}, 'refresh-button', interactive=not mu)
shared.gradio['save_character'] = gr.Button('💾', elem_classes='refresh-button', elem_id="save-character", interactive=not mu)
shared.gradio['delete_character'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['restore_character'] = gr.Button('Restore character', elem_classes='refresh-button', interactive=True, elem_id='restore-character')
- shared.gradio['name2'] = gr.Textbox(value='', lines=1, label='Character\'s name')
- shared.gradio['context'] = gr.Textbox(value='', lines=10, label='Context', elem_classes=['add_scrollbar'])
- shared.gradio['greeting'] = gr.Textbox(value='', lines=5, label='Greeting', elem_classes=['add_scrollbar'])
+ shared.gradio['name2'] = gr.Textbox(value=shared.settings['name2'], lines=1, label='Character\'s name')
+ shared.gradio['context'] = gr.Textbox(value=shared.settings['context'], lines=10, label='Context', elem_classes=['add_scrollbar'])
+ shared.gradio['greeting'] = gr.Textbox(value=shared.settings['greeting'], lines=5, label='Greeting', elem_classes=['add_scrollbar'])
with gr.Tab("User"):
shared.gradio['name1'] = gr.Textbox(value=shared.settings['name1'], lines=1, label='Name')
@@ -185,7 +179,7 @@ def create_chat_settings_ui():
with gr.Row():
with gr.Column():
shared.gradio['custom_system_message'] = gr.Textbox(value=shared.settings['custom_system_message'], lines=2, label='Custom system message', info='If not empty, will be used instead of the default one.', elem_classes=['add_scrollbar'])
- shared.gradio['instruction_template_str'] = gr.Textbox(value='', label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
+ shared.gradio['instruction_template_str'] = gr.Textbox(value=shared.settings['instruction_template_str'], label='Instruction template', lines=24, info='This gets autodetected; you usually don\'t need to change it. Used in instruct and chat-instruct modes.', elem_classes=['add_scrollbar', 'monospace'])
with gr.Row():
shared.gradio['send_instruction_to_default'] = gr.Button('Send to default', elem_classes=['small-button'])
shared.gradio['send_instruction_to_notebook'] = gr.Button('Send to notebook', elem_classes=['small-button'])
@@ -202,7 +196,7 @@ def create_event_handlers():
shared.reload_inputs = gradio(reload_arr)
# Morph HTML updates instead of updating everything
- shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data.html)")
+ shared.gradio['display'].change(None, gradio('display'), None, js="(data) => handleMorphdomUpdate(data)")
shared.gradio['Generate'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@@ -267,11 +261,9 @@ def create_event_handlers():
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_start_new_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
- shared.gradio['delete_chat'].click(lambda: gr.update(visible=True), None, gradio('delete-chat-row'))
- shared.gradio['delete_chat-cancel'].click(lambda: gr.update(visible=False), None, gradio('delete-chat-row'))
shared.gradio['delete_chat-confirm'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id', 'delete-chat-row'), show_progress=False)
+ chat.handle_delete_chat_confirm_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
shared.gradio['branch_chat'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
@@ -301,10 +293,12 @@ def create_event_handlers():
chat.handle_character_menu_change, gradio('interface_state'), gradio('history', 'display', 'name1', 'name2', 'character_picture', 'greeting', 'context', 'unique_id'), show_progress=False).then(
None, None, None, js=f'() => {{{ui.update_big_picture_js}; updateBigPicture()}}')
+ shared.gradio['character_picture'].change(chat.handle_character_picture_change, gradio('character_picture'), None, show_progress=False)
+
shared.gradio['mode'].change(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_mode_change, gradio('interface_state'), gradio('history', 'display', 'chat_style', 'chat-instruct_command', 'unique_id'), show_progress=False).then(
- None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
+ None, gradio('mode'), None, js="(mode) => {const characterContainer = document.getElementById('character-menu').parentNode.parentNode; const isInChatTab = document.querySelector('#chat-controls').contains(characterContainer); if (isInChatTab) { characterContainer.style.display = mode === 'instruct' ? 'none' : ''; }}")
shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)
@@ -324,6 +318,10 @@ def create_event_handlers():
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_save_template_click, gradio('instruction_template_str'), gradio('save_filename', 'save_root', 'save_contents', 'file_saver'), show_progress=False)
+ shared.gradio['restore_character'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.restore_character_for_ui, gradio('interface_state'), gradio('interface_state', 'name2', 'context', 'greeting', 'character_picture'), show_progress=False)
+
shared.gradio['delete_template'].click(chat.handle_delete_template_click, gradio('instruction_template'), gradio('delete_filename', 'delete_root', 'file_deleter'), show_progress=False)
shared.gradio['save_chat_history'].click(
lambda x: json.dumps(x, indent=4), gradio('history'), gradio('temporary_text')).then(
diff --git a/modules/ui_default.py b/modules/ui_default.py
index c2946b37..8acc4b10 100644
--- a/modules/ui_default.py
+++ b/modules/ui_default.py
@@ -19,7 +19,7 @@ def create_ui():
with gr.Row():
with gr.Column():
with gr.Row():
- shared.gradio['textbox-default'] = gr.Textbox(value='', lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
+ shared.gradio['textbox-default'] = gr.Textbox(value=load_prompt(shared.settings['prompt-default']), lines=27, label='Input', elem_classes=['textbox_default', 'add_scrollbar'])
shared.gradio['token-counter-default'] = gr.HTML(value="
0", elem_id="default-token-counter")
with gr.Row():
@@ -28,7 +28,7 @@ def create_ui():
shared.gradio['Generate-default'] = gr.Button('Generate', variant='primary')
with gr.Row():
- shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
+ shared.gradio['prompt_menu-default'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-default'], label='Prompt', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['prompt_menu-default'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, 'refresh-button', interactive=not mu)
shared.gradio['save_prompt-default'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_prompt-default'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 862b3893..9e982f0e 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -1,4 +1,6 @@
import importlib
+import queue
+import threading
import traceback
from functools import partial
from pathlib import Path
@@ -205,48 +207,51 @@ def load_lora_wrapper(selected_loras):
def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), return_links=False, check=False):
+ downloader_module = importlib.import_module("download-model")
+ downloader = downloader_module.ModelDownloader()
+ update_queue = queue.Queue()
+
try:
# Handle direct GGUF URLs
if repo_id.startswith("https://") and ("huggingface.co" in repo_id) and (repo_id.endswith(".gguf") or repo_id.endswith(".gguf?download=true")):
try:
path = repo_id.split("huggingface.co/")[1]
-
- # Extract the repository ID (first two parts of the path)
parts = path.split("/")
if len(parts) >= 2:
extracted_repo_id = f"{parts[0]}/{parts[1]}"
-
- # Extract the filename (last part of the path)
- filename = repo_id.split("/")[-1]
- if "?download=true" in filename:
- filename = filename.replace("?download=true", "")
-
+ filename = repo_id.split("/")[-1].replace("?download=true", "")
repo_id = extracted_repo_id
specific_file = filename
- except:
- pass
+ except Exception as e:
+ yield f"Error parsing GGUF URL: {e}"
+ progress(0.0)
+ return
- if repo_id == "":
- yield ("Please enter a model path")
+ if not repo_id:
+ yield "Please enter a model path."
+ progress(0.0)
return
repo_id = repo_id.strip()
specific_file = specific_file.strip()
- downloader = importlib.import_module("download-model").ModelDownloader()
- progress(0.0)
+ progress(0.0, "Preparing download...")
+
model, branch = downloader.sanitize_model_and_branch_names(repo_id, None)
-
- yield ("Getting the download links from Hugging Face")
+ yield "Getting download links from Hugging Face..."
links, sha256, is_lora, is_llamacpp = downloader.get_download_links_from_huggingface(model, branch, text_only=False, specific_file=specific_file)
+ if not links:
+ yield "No files found to download for the given model/criteria."
+ progress(0.0)
+ return
+
# Check for multiple GGUF files
gguf_files = [link for link in links if link.lower().endswith('.gguf')]
if len(gguf_files) > 1 and not specific_file:
output = "Multiple GGUF files found. Please copy one of the following filenames to the 'File name' field:\n\n```\n"
for link in gguf_files:
output += f"{Path(link).name}\n"
-
output += "```"
yield output
return
@@ -260,12 +265,9 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
yield output
return
- yield ("Getting the output folder")
+ yield "Determining output folder..."
output_folder = downloader.get_output_folder(
- model,
- branch,
- is_lora,
- is_llamacpp=is_llamacpp,
+ model, branch, is_lora, is_llamacpp=is_llamacpp,
model_dir=shared.args.model_dir if shared.args.model_dir != shared.args_defaults.model_dir else None
)
@@ -275,19 +277,65 @@ def download_model_wrapper(repo_id, specific_file, progress=gr.Progress(), retur
output_folder = Path(shared.args.lora_dir)
if check:
- progress(0.5)
-
- yield ("Checking previously downloaded files")
+ yield "Checking previously downloaded files..."
+ progress(0.5, "Verifying files...")
downloader.check_model_files(model, branch, links, sha256, output_folder)
- progress(1.0)
- else:
- yield (f"Downloading file{'s' if len(links) > 1 else ''} to `{output_folder}/`")
- downloader.download_model_files(model, branch, links, sha256, output_folder, progress_bar=progress, threads=4, is_llamacpp=is_llamacpp)
+ progress(1.0, "Verification complete.")
+ yield "File check complete."
+ return
- yield (f"Model successfully saved to `{output_folder}/`.")
- except:
- progress(1.0)
- yield traceback.format_exc().replace('\n', '\n\n')
+ yield ""
+ progress(0.0, "Download starting...")
+
+ def downloader_thread_target():
+ try:
+ downloader.download_model_files(
+ model, branch, links, sha256, output_folder,
+ progress_queue=update_queue,
+ threads=4,
+ is_llamacpp=is_llamacpp,
+ specific_file=specific_file
+ )
+ update_queue.put(("COMPLETED", f"Model successfully saved to `{output_folder}/`."))
+ except Exception as e:
+ tb_str = traceback.format_exc().replace('\n', '\n\n')
+ update_queue.put(("ERROR", tb_str))
+
+ download_thread = threading.Thread(target=downloader_thread_target)
+ download_thread.start()
+
+ while True:
+ try:
+ message = update_queue.get(timeout=0.2)
+ if not isinstance(message, tuple) or len(message) != 2:
+ continue
+
+ msg_identifier, data = message
+
+ if msg_identifier == "COMPLETED":
+ progress(1.0, "Download complete!")
+ yield data
+ break
+ elif msg_identifier == "ERROR":
+ progress(0.0, "Error occurred")
+ yield data
+ break
+ elif isinstance(msg_identifier, float):
+ progress_value = msg_identifier
+ description_str = data
+ progress(progress_value, f"Downloading: {description_str}")
+
+ except queue.Empty:
+ if not download_thread.is_alive():
+ yield "Download process finished."
+ break
+
+ download_thread.join()
+
+ except Exception as e:
+ progress(0.0)
+ tb_str = traceback.format_exc().replace('\n', '\n\n')
+ yield tb_str
def update_truncation_length(current_length, state):
diff --git a/modules/ui_notebook.py b/modules/ui_notebook.py
index b234ac57..3f79a93c 100644
--- a/modules/ui_notebook.py
+++ b/modules/ui_notebook.py
@@ -22,7 +22,7 @@ def create_ui():
with gr.Column(scale=4):
with gr.Tab('Raw'):
with gr.Row():
- shared.gradio['textbox-notebook'] = gr.Textbox(value='', lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
+ shared.gradio['textbox-notebook'] = gr.Textbox(value=load_prompt(shared.settings['prompt-notebook']), lines=27, elem_id='textbox-notebook', elem_classes=['textbox', 'add_scrollbar'])
shared.gradio['token-counter-notebook'] = gr.HTML(value="
0", elem_id="notebook-token-counter")
with gr.Tab('Markdown'):
@@ -56,7 +56,7 @@ def create_ui():
with gr.Column(scale=1):
gr.HTML('
')
with gr.Row():
- shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value='None', label='Prompt', elem_classes='slim-dropdown')
+ shared.gradio['prompt_menu-notebook'] = gr.Dropdown(choices=utils.get_available_prompts(), value=shared.settings['prompt-notebook'], label='Prompt', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['prompt_menu-notebook'], lambda: None, lambda: {'choices': utils.get_available_prompts()}, ['refresh-button', 'refresh-button-small'], interactive=not mu)
shared.gradio['save_prompt-notebook'] = gr.Button('💾', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)
shared.gradio['delete_prompt-notebook'] = gr.Button('🗑️', elem_classes=['refresh-button', 'refresh-button-small'], interactive=not mu)
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 733d0901..e2b10554 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -6,19 +6,19 @@ from modules import loaders, presets, shared, ui, ui_chat, utils
from modules.utils import gradio
-def create_ui(default_preset):
+def create_ui():
mu = shared.args.multi_user
- generate_params = presets.load_preset(default_preset)
with gr.Tab("Parameters", elem_id="parameters"):
with gr.Tab("Generation"):
with gr.Row():
with gr.Column():
with gr.Row():
- shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=default_preset, label='Preset', elem_classes='slim-dropdown')
+ shared.gradio['preset_menu'] = gr.Dropdown(choices=utils.get_available_presets(), value=shared.settings['preset'], label='Preset', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['preset_menu'], lambda: None, lambda: {'choices': utils.get_available_presets()}, 'refresh-button', interactive=not mu)
shared.gradio['save_preset'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_preset'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
- shared.gradio['random_preset'] = gr.Button('🎲', elem_classes='refresh-button')
+ shared.gradio['reset_preset'] = gr.Button('Restore preset', elem_classes='refresh-button', interactive=True)
+ shared.gradio['neutralize_samplers'] = gr.Button('Neutralize samplers', elem_classes='refresh-button', interactive=True)
with gr.Column():
shared.gradio['filter_by_loader'] = gr.Dropdown(label="Filter by loader", choices=["All"] + list(loaders.loaders_and_params.keys()) if not shared.args.portable else ['llama.cpp'], value="All", elem_classes='slim-dropdown')
@@ -28,57 +28,60 @@ def create_ui(default_preset):
with gr.Row():
with gr.Column():
gr.Markdown('## Curve shape')
- shared.gradio['temperature'] = gr.Slider(0.01, 5, value=generate_params['temperature'], step=0.01, label='temperature')
- shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_low'], step=0.01, label='dynatemp_low', visible=generate_params['dynamic_temperature'])
- shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_high'], step=0.01, label='dynatemp_high', visible=generate_params['dynamic_temperature'])
- shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=generate_params['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=generate_params['dynamic_temperature'])
- shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=generate_params['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
- shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=generate_params['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
+ shared.gradio['temperature'] = gr.Slider(0.01, 5, value=shared.settings['temperature'], step=0.01, label='temperature')
+ shared.gradio['dynatemp_low'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_low'], step=0.01, label='dynatemp_low', visible=shared.settings['dynamic_temperature'])
+ shared.gradio['dynatemp_high'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_high'], step=0.01, label='dynatemp_high', visible=shared.settings['dynamic_temperature'])
+ shared.gradio['dynatemp_exponent'] = gr.Slider(0.01, 5, value=shared.settings['dynatemp_exponent'], step=0.01, label='dynatemp_exponent', visible=shared.settings['dynamic_temperature'])
+ shared.gradio['smoothing_factor'] = gr.Slider(0.0, 10.0, value=shared.settings['smoothing_factor'], step=0.01, label='smoothing_factor', info='Activates Quadratic Sampling.')
+ shared.gradio['smoothing_curve'] = gr.Slider(1.0, 10.0, value=shared.settings['smoothing_curve'], step=0.01, label='smoothing_curve', info='Adjusts the dropoff curve of Quadratic Sampling.')
+ shared.gradio['dynamic_temperature'] = gr.Checkbox(value=shared.settings['dynamic_temperature'], label='dynamic_temperature')
gr.Markdown('## Curve cutoff')
- shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=generate_params['min_p'], step=0.01, label='min_p')
- shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=generate_params['top_n_sigma'], step=0.01, label='top_n_sigma')
- shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=generate_params['top_p'], step=0.01, label='top_p')
- shared.gradio['top_k'] = gr.Slider(0, 200, value=generate_params['top_k'], step=1, label='top_k')
- shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=generate_params['typical_p'], step=0.01, label='typical_p')
- shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=generate_params['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
- shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=generate_params['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
- shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=generate_params['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
- shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=generate_params['eta_cutoff'], step=0.01, label='eta_cutoff')
- shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=generate_params['tfs'], step=0.01, label='tfs')
- shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=generate_params['top_a'], step=0.01, label='top_a')
+ shared.gradio['min_p'] = gr.Slider(0.0, 1.0, value=shared.settings['min_p'], step=0.01, label='min_p')
+ shared.gradio['top_n_sigma'] = gr.Slider(0.0, 5.0, value=shared.settings['top_n_sigma'], step=0.01, label='top_n_sigma')
+ shared.gradio['top_p'] = gr.Slider(0.0, 1.0, value=shared.settings['top_p'], step=0.01, label='top_p')
+ shared.gradio['top_k'] = gr.Slider(0, 200, value=shared.settings['top_k'], step=1, label='top_k')
+ shared.gradio['typical_p'] = gr.Slider(0.0, 1.0, value=shared.settings['typical_p'], step=0.01, label='typical_p')
+ shared.gradio['xtc_threshold'] = gr.Slider(0, 0.5, value=shared.settings['xtc_threshold'], step=0.01, label='xtc_threshold', info='If 2 or more tokens have probability above this threshold, consider removing all but the last one.')
+ shared.gradio['xtc_probability'] = gr.Slider(0, 1, value=shared.settings['xtc_probability'], step=0.01, label='xtc_probability', info='Probability that the removal will actually happen. 0 disables the sampler. 1 makes it always happen.')
+ shared.gradio['epsilon_cutoff'] = gr.Slider(0, 9, value=shared.settings['epsilon_cutoff'], step=0.01, label='epsilon_cutoff')
+ shared.gradio['eta_cutoff'] = gr.Slider(0, 20, value=shared.settings['eta_cutoff'], step=0.01, label='eta_cutoff')
+ shared.gradio['tfs'] = gr.Slider(0.0, 1.0, value=shared.settings['tfs'], step=0.01, label='tfs')
+ shared.gradio['top_a'] = gr.Slider(0.0, 1.0, value=shared.settings['top_a'], step=0.01, label='top_a')
gr.Markdown('## Repetition suppression')
- shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=generate_params['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
- shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=generate_params['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
- shared.gradio['dry_base'] = gr.Slider(1, 4, value=generate_params['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
- shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=generate_params['repetition_penalty'], step=0.01, label='repetition_penalty')
- shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=generate_params['frequency_penalty'], step=0.05, label='frequency_penalty')
- shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=generate_params['presence_penalty'], step=0.05, label='presence_penalty')
- shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=generate_params['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
- shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=generate_params['no_repeat_ngram_size'], label='no_repeat_ngram_size')
- shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=generate_params['repetition_penalty_range'], label='repetition_penalty_range')
+ shared.gradio['dry_multiplier'] = gr.Slider(0, 5, value=shared.settings['dry_multiplier'], step=0.01, label='dry_multiplier', info='Set to greater than 0 to enable DRY. Recommended value: 0.8.')
+ shared.gradio['dry_allowed_length'] = gr.Slider(1, 20, value=shared.settings['dry_allowed_length'], step=1, label='dry_allowed_length', info='Longest sequence that can be repeated without being penalized.')
+ shared.gradio['dry_base'] = gr.Slider(1, 4, value=shared.settings['dry_base'], step=0.01, label='dry_base', info='Controls how fast the penalty grows with increasing sequence length.')
+ shared.gradio['repetition_penalty'] = gr.Slider(1.0, 1.5, value=shared.settings['repetition_penalty'], step=0.01, label='repetition_penalty')
+ shared.gradio['frequency_penalty'] = gr.Slider(0, 2, value=shared.settings['frequency_penalty'], step=0.05, label='frequency_penalty')
+ shared.gradio['presence_penalty'] = gr.Slider(0, 2, value=shared.settings['presence_penalty'], step=0.05, label='presence_penalty')
+ shared.gradio['encoder_repetition_penalty'] = gr.Slider(0.8, 1.5, value=shared.settings['encoder_repetition_penalty'], step=0.01, label='encoder_repetition_penalty')
+ shared.gradio['no_repeat_ngram_size'] = gr.Slider(0, 20, step=1, value=shared.settings['no_repeat_ngram_size'], label='no_repeat_ngram_size')
+ shared.gradio['repetition_penalty_range'] = gr.Slider(0, 4096, step=64, value=shared.settings['repetition_penalty_range'], label='repetition_penalty_range')
with gr.Column():
gr.Markdown('## Alternative sampling methods')
- shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=generate_params['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
- shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=generate_params['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
- shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=generate_params['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
- shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=generate_params['mirostat_tau'], label='mirostat_tau')
- shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=generate_params['mirostat_eta'], label='mirostat_eta')
+ shared.gradio['penalty_alpha'] = gr.Slider(0, 5, value=shared.settings['penalty_alpha'], label='penalty_alpha', info='For Contrastive Search. do_sample must be unchecked.')
+ shared.gradio['guidance_scale'] = gr.Slider(-0.5, 2.5, step=0.05, value=shared.settings['guidance_scale'], label='guidance_scale', info='For CFG. 1.5 is a good value.')
+ shared.gradio['mirostat_mode'] = gr.Slider(0, 2, step=1, value=shared.settings['mirostat_mode'], label='mirostat_mode', info='mode=1 is for llama.cpp only.')
+ shared.gradio['mirostat_tau'] = gr.Slider(0, 10, step=0.01, value=shared.settings['mirostat_tau'], label='mirostat_tau')
+ shared.gradio['mirostat_eta'] = gr.Slider(0, 1, step=0.01, value=shared.settings['mirostat_eta'], label='mirostat_eta')
gr.Markdown('## Other options')
- shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
- shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
- shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
- shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
+ shared.gradio['do_sample'] = gr.Checkbox(value=shared.settings['do_sample'], label='do_sample')
+ shared.gradio['temperature_last'] = gr.Checkbox(value=shared.settings['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
+ shared.gradio['sampler_priority'] = gr.Textbox(value=shared.settings['sampler_priority'], lines=10, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
+ shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=shared.settings['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
with gr.Column():
with gr.Row():
with gr.Column():
- shared.gradio['do_sample'] = gr.Checkbox(value=generate_params['do_sample'], label='do_sample')
- shared.gradio['dynamic_temperature'] = gr.Checkbox(value=generate_params['dynamic_temperature'], label='dynamic_temperature')
- shared.gradio['temperature_last'] = gr.Checkbox(value=generate_params['temperature_last'], label='temperature_last', info='Moves temperature/dynamic temperature/quadratic sampling to the end of the sampler stack, ignoring their positions in "Sampler priority".')
+ with gr.Blocks():
+ shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
+ shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
+ shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
+
shared.gradio['auto_max_new_tokens'] = gr.Checkbox(value=shared.settings['auto_max_new_tokens'], label='auto_max_new_tokens', info='Expand max_new_tokens to the available context length.')
shared.gradio['ban_eos_token'] = gr.Checkbox(value=shared.settings['ban_eos_token'], label='Ban the eos_token', info='Forces the model to never end the generation prematurely.')
shared.gradio['add_bos_token'] = gr.Checkbox(value=shared.settings['add_bos_token'], label='Add the bos_token to the beginning of prompts', info='Disabling this can make the replies more creative.')
@@ -91,18 +94,16 @@ def create_ui(default_preset):
shared.gradio['truncation_length'] = gr.Number(precision=0, step=256, value=get_truncation_length(), label='Truncate the prompt up to this length', info='The leftmost tokens are removed if the prompt exceeds this length.')
shared.gradio['seed'] = gr.Number(value=shared.settings['seed'], label='Seed (-1 for random)')
- shared.gradio['sampler_priority'] = gr.Textbox(value=generate_params['sampler_priority'], lines=12, label='Sampler priority', info='Parameter names separated by new lines or commas.', elem_classes=['add_scrollbar'])
shared.gradio['custom_stopping_strings'] = gr.Textbox(lines=2, value=shared.settings["custom_stopping_strings"] or None, label='Custom stopping strings', info='Written between "" and separated by commas.', placeholder='"\\n", "\\nYou:"')
shared.gradio['custom_token_bans'] = gr.Textbox(value=shared.settings['custom_token_bans'] or None, label='Token bans', info='Token IDs to ban, separated by commas. The IDs can be found in the Default or Notebook tab.')
shared.gradio['negative_prompt'] = gr.Textbox(value=shared.settings['negative_prompt'], label='Negative prompt', info='For CFG. Only used when guidance_scale is different than 1.', lines=3, elem_classes=['add_scrollbar'])
- shared.gradio['dry_sequence_breakers'] = gr.Textbox(value=generate_params['dry_sequence_breakers'], label='dry_sequence_breakers', info='Tokens across which sequence matching is not continued. Specified as a comma-separated list of quoted strings.')
with gr.Row() as shared.gradio['grammar_file_row']:
shared.gradio['grammar_file'] = gr.Dropdown(value='None', choices=utils.get_available_grammars(), label='Load grammar from file (.gbnf)', elem_classes='slim-dropdown')
ui.create_refresh_button(shared.gradio['grammar_file'], lambda: None, lambda: {'choices': utils.get_available_grammars()}, 'refresh-button', interactive=not mu)
shared.gradio['save_grammar'] = gr.Button('💾', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_grammar'] = gr.Button('🗑️ ', elem_classes='refresh-button', interactive=not mu)
- shared.gradio['grammar_string'] = gr.Textbox(value='', label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
+ shared.gradio['grammar_string'] = gr.Textbox(value=shared.settings['grammar_string'], label='Grammar', lines=16, elem_classes=['add_scrollbar', 'monospace'])
ui_chat.create_chat_settings_ui()
@@ -113,9 +114,13 @@ def create_event_handlers():
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
presets.load_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
- shared.gradio['random_preset'].click(
+ shared.gradio['reset_preset'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- presets.random_preset, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
+ presets.reset_preset_for_ui, gradio('preset_menu', 'interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
+
+ shared.gradio['neutralize_samplers'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ presets.neutralize_samplers_for_ui, gradio('interface_state'), gradio('interface_state') + gradio(presets.presets_params()), show_progress=False)
shared.gradio['grammar_file'].change(load_grammar, gradio('grammar_file'), gradio('grammar_string'), show_progress=False)
shared.gradio['dynamic_temperature'].change(lambda x: [gr.update(visible=x)] * 3, gradio('dynamic_temperature'), gradio('dynatemp_low', 'dynatemp_high', 'dynatemp_exponent'), show_progress=False)
diff --git a/modules/ui_session.py b/modules/ui_session.py
index a4eba667..33d7dcb7 100644
--- a/modules/ui_session.py
+++ b/modules/ui_session.py
@@ -1,7 +1,6 @@
import gradio as gr
from modules import shared, ui, utils
-from modules.github import clone_or_pull_repository
from modules.utils import gradio
@@ -10,11 +9,14 @@ def create_ui():
with gr.Tab("Session", elem_id="session-tab"):
with gr.Row():
with gr.Column():
- shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
- with gr.Row():
- shared.gradio['toggle_dark_mode'] = gr.Button('Toggle 💡')
- shared.gradio['save_settings'] = gr.Button('Save UI defaults to user_data/settings.yaml', interactive=not mu)
+ gr.Markdown("## Settings")
+ shared.gradio['save_settings'] = gr.Button('Save settings to user_data/settings.yaml', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['toggle_dark_mode'] = gr.Button('Toggle light/dark theme 💡', elem_classes='refresh-button')
+ shared.gradio['paste_to_attachment'] = gr.Checkbox(label='Turn long pasted text into attachments in the Chat tab', value=shared.settings['paste_to_attachment'], elem_id='paste_to_attachment')
+ with gr.Column():
+ gr.Markdown("## Extensions & flags")
+ shared.gradio['reset_interface'] = gr.Button("Apply flags/extensions and restart", interactive=not mu)
with gr.Row():
with gr.Column():
shared.gradio['extensions_menu'] = gr.CheckboxGroup(choices=utils.get_available_extensions(), value=shared.args.extensions, label="Available extensions", info='Note that some of these extensions may require manually installing Python requirements through the command: pip install -r extensions/extension_name/requirements.txt', elem_classes='checkboxgroup-table')
@@ -22,30 +24,20 @@ def create_ui():
with gr.Column():
shared.gradio['bool_menu'] = gr.CheckboxGroup(choices=get_boolean_arguments(), value=get_boolean_arguments(active=True), label="Boolean command-line flags", elem_classes='checkboxgroup-table')
- with gr.Column():
- if not shared.args.portable:
- extension_name = gr.Textbox(lines=1, label='Install or update an extension', info='Enter the GitHub URL below and press Enter. For a list of extensions, see: https://github.com/oobabooga/text-generation-webui-extensions ⚠️ WARNING ⚠️ : extensions can execute arbitrary code. Make sure to inspect their source code before activating them.', interactive=not mu)
- extension_status = gr.Markdown()
- else:
- pass
-
shared.gradio['theme_state'] = gr.Textbox(visible=False, value='dark' if shared.settings['dark_theme'] else 'light')
- if not shared.args.portable:
- extension_name.submit(clone_or_pull_repository, extension_name, extension_status, show_progress=False)
+ shared.gradio['save_settings'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
+
+ shared.gradio['toggle_dark_mode'].click(
+ lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
+ None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode(); localStorage.setItem("theme", document.body.classList.contains("dark") ? "dark" : "light")}}')
# Reset interface event
shared.gradio['reset_interface'].click(
set_interface_arguments, gradio('extensions_menu', 'bool_menu'), None).then(
None, None, None, js='() => {document.body.innerHTML=\'
Reloading...
\'; setTimeout(function(){location.reload()},2500); return []}')
- shared.gradio['toggle_dark_mode'].click(
- lambda x: 'dark' if x == 'light' else 'light', gradio('theme_state'), gradio('theme_state')).then(
- None, None, None, js=f'() => {{{ui.dark_theme_js}; toggleDarkMode()}}')
-
- shared.gradio['save_settings'].click(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- handle_save_settings, gradio('interface_state', 'preset_menu', 'extensions_menu', 'show_controls', 'theme_state'), gradio('save_contents', 'save_filename', 'save_root', 'file_saver'), show_progress=False)
-
def handle_save_settings(state, preset, extensions, show_controls, theme):
contents = ui.save_settings(state, preset, extensions, show_controls, theme)
diff --git a/modules/utils.py b/modules/utils.py
index 577c55b8..21873541 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -3,7 +3,7 @@ import re
from datetime import datetime
from pathlib import Path
-from modules import github, shared
+from modules import shared
from modules.logging_colors import logger
@@ -182,7 +182,6 @@ def get_available_instruction_templates():
def get_available_extensions():
extensions = sorted(set(map(lambda x: x.parts[1], Path('extensions').glob('*/script.py'))), key=natural_keys)
- extensions = [v for v in extensions if v not in github.new_extensions]
return extensions
diff --git a/modules/web_search.py b/modules/web_search.py
index 1f670349..ffd7e483 100644
--- a/modules/web_search.py
+++ b/modules/web_search.py
@@ -3,8 +3,6 @@ from concurrent.futures import as_completed
from datetime import datetime
import requests
-from bs4 import BeautifulSoup
-from duckduckgo_search import DDGS
from modules.logging_colors import logger
@@ -14,35 +12,39 @@ def get_current_timestamp():
return datetime.now().strftime('%b %d, %Y %H:%M')
-def download_web_page(url, timeout=5):
- """Download and extract text from a web page"""
+def download_web_page(url, timeout=10):
+ """
+ Download a web page and convert its HTML content to structured Markdown text.
+ """
+ import html2text
+
try:
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
}
response = requests.get(url, headers=headers, timeout=timeout)
- response.raise_for_status()
+ response.raise_for_status() # Raise an exception for bad status codes
- soup = BeautifulSoup(response.content, 'html.parser')
+ # Initialize the HTML to Markdown converter
+ h = html2text.HTML2Text()
+ h.body_width = 0
- # Remove script and style elements
- for script in soup(["script", "style"]):
- script.decompose()
+ # Convert the HTML to Markdown
+ markdown_text = h.handle(response.text)
- # Get text and clean it up
- text = soup.get_text()
- lines = (line.strip() for line in text.splitlines())
- chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
- text = ' '.join(chunk for chunk in chunks if chunk)
-
- return text
- except Exception as e:
+ return markdown_text
+ except requests.exceptions.RequestException as e:
logger.error(f"Error downloading {url}: {e}")
- return f"[Error downloading content from {url}: {str(e)}]"
+ return ""
+ except Exception as e:
+ logger.error(f"An unexpected error occurred: {e}")
+ return ""
def perform_web_search(query, num_pages=3, max_workers=5):
"""Perform web search and return results with content"""
+ from duckduckgo_search import DDGS
+
try:
with DDGS() as ddgs:
results = list(ddgs.text(query, max_results=num_pages))
@@ -74,9 +76,7 @@ def perform_web_search(query, num_pages=3, max_workers=5):
'url': url,
'content': content
}
- except Exception as e:
- logger.error(f"Error downloading {url}: {e}")
- # Include failed downloads with empty content
+ except Exception:
search_results[index] = {
'title': title,
'url': url,
@@ -107,6 +107,13 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
logger.warning("No search results found")
return
+ # Filter out failed downloads before adding attachments
+ successful_results = [result for result in search_results if result['content'].strip()]
+
+ if not successful_results:
+ logger.warning("No successful downloads to add as attachments")
+ return
+
# Add search results as attachments
key = f"user_{row_idx}"
if key not in history['metadata']:
@@ -114,7 +121,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
if "attachments" not in history['metadata'][key]:
history['metadata'][key]["attachments"] = []
- for result in search_results:
+ for result in successful_results:
attachment = {
"name": result['title'],
"type": "text/html",
@@ -123,7 +130,7 @@ def add_web_search_attachments(history, row_idx, user_message, search_query, sta
}
history['metadata'][key]["attachments"].append(attachment)
- logger.info(f"Added {len(search_results)} web search results as attachments")
+ logger.info(f"Added {len(successful_results)} successful web search results as attachments.")
except Exception as e:
logger.error(f"Error in web search: {e}")
diff --git a/one_click.py b/one_click.py
index 482a6aa9..94f2aab0 100644
--- a/one_click.py
+++ b/one_click.py
@@ -17,8 +17,6 @@ import sys
# Define the required versions
TORCH_VERSION = "2.6.0"
-TORCHVISION_VERSION = "0.21.0"
-TORCHAUDIO_VERSION = "2.6.0"
PYTHON_VERSION = "3.11"
LIBSTDCXX_VERSION_LINUX = "12.1.0"
@@ -70,12 +68,8 @@ def is_installed():
def cpu_has_avx2():
try:
import cpuinfo
-
info = cpuinfo.get_cpu_info()
- if 'avx2' in info['flags']:
- return True
- else:
- return False
+ return 'avx2' in info['flags']
except:
return True
@@ -83,30 +77,119 @@ def cpu_has_avx2():
def cpu_has_amx():
try:
import cpuinfo
-
info = cpuinfo.get_cpu_info()
- if 'amx' in info['flags']:
- return True
- else:
- return False
+ return 'amx' in info['flags']
except:
return True
-def torch_version():
- site_packages_path = None
- for sitedir in site.getsitepackages():
- if "site-packages" in sitedir and conda_env_path in sitedir:
- site_packages_path = sitedir
- break
+def load_state():
+ """Load installer state from JSON file"""
+ if os.path.exists(state_file):
+ try:
+ with open(state_file, 'r') as f:
+ return json.load(f)
+ except:
+ return {}
+ return {}
- if site_packages_path:
- torch_version_file = open(os.path.join(site_packages_path, 'torch', 'version.py')).read().splitlines()
- torver = [line for line in torch_version_file if line.startswith('__version__')][0].split('__version__ = ')[1].strip("'")
+
+def save_state(state):
+ """Save installer state to JSON file"""
+ with open(state_file, 'w') as f:
+ json.dump(state, f)
+
+
+def get_gpu_choice():
+ """Get GPU choice from state file or ask user"""
+ state = load_state()
+ gpu_choice = state.get('gpu_choice')
+
+ if not gpu_choice:
+ if "GPU_CHOICE" in os.environ:
+ choice = os.environ["GPU_CHOICE"].upper()
+ print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
+ else:
+ choice = get_user_choice(
+ "What is your GPU?",
+ {
+ 'A': 'NVIDIA - CUDA 12.4',
+ 'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
+ 'C': 'Apple M Series',
+ 'D': 'Intel Arc (beta)',
+ 'E': 'NVIDIA - CUDA 12.8',
+ 'N': 'CPU mode'
+ },
+ )
+
+ # Convert choice to GPU name
+ gpu_choice = {"A": "NVIDIA", "B": "AMD", "C": "APPLE", "D": "INTEL", "E": "NVIDIA_CUDA128", "N": "NONE"}[choice]
+
+ # Save choice to state
+ state['gpu_choice'] = gpu_choice
+ save_state(state)
+
+ return gpu_choice
+
+
+def get_pytorch_install_command(gpu_choice):
+ """Get PyTorch installation command based on GPU choice"""
+ base_cmd = f"python -m pip install torch=={TORCH_VERSION} "
+
+ if gpu_choice == "NVIDIA":
+ return base_cmd + "--index-url https://download.pytorch.org/whl/cu124"
+ elif gpu_choice == "NVIDIA_CUDA128":
+ return "python -m pip install torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
+ elif gpu_choice == "AMD":
+ return base_cmd + "--index-url https://download.pytorch.org/whl/rocm6.2.4"
+ elif gpu_choice in ["APPLE", "NONE"]:
+ return base_cmd + "--index-url https://download.pytorch.org/whl/cpu"
+ elif gpu_choice == "INTEL":
+ if is_linux():
+ return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+ else:
+ return "python -m pip install torch==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
else:
- from torch import __version__ as torver
+ return base_cmd
- return torver
+
+def get_pytorch_update_command(gpu_choice):
+ """Get PyTorch update command based on GPU choice"""
+ base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} "
+
+ if gpu_choice == "NVIDIA":
+ return f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
+ elif gpu_choice == "NVIDIA_CUDA128":
+ return "python -m pip install --upgrade torch==2.7.1 --index-url https://download.pytorch.org/whl/cu128"
+ elif gpu_choice == "AMD":
+ return f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
+ elif gpu_choice in ["APPLE", "NONE"]:
+ return f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
+ elif gpu_choice == "INTEL":
+ intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
+ return f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
+ else:
+ return base_cmd
+
+
+def get_requirements_file(gpu_choice):
+ """Get requirements file path based on GPU choice"""
+ requirements_base = os.path.join("requirements", "full")
+
+ if gpu_choice == "AMD":
+ file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+ elif gpu_choice == "APPLE":
+ file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt"
+ elif gpu_choice in ["INTEL", "NONE"]:
+ file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+ elif gpu_choice == "NVIDIA":
+ file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+ elif gpu_choice == "NVIDIA_CUDA128":
+ file_name = f"requirements_cuda128{'_noavx2' if not cpu_has_avx2() else ''}.txt"
+ else:
+ raise ValueError(f"Unknown GPU choice: {gpu_choice}")
+
+ return os.path.join(requirements_base, file_name)
def get_current_commit():
@@ -209,28 +292,8 @@ def get_user_choice(question, options_dict):
def update_pytorch_and_python():
print_big_message("Checking for PyTorch updates.")
-
- # Update the Python version. Left here for future reference in case this becomes necessary.
- # print_big_message("Checking for PyTorch and Python updates.")
- # current_python_version = f"{sys.version_info.major}.{sys.version_info.minor}"
- # if current_python_version != PYTHON_VERSION:
- # run_cmd(f"conda install -y python={PYTHON_VERSION}", assert_success=True, environment=True)
-
- torver = torch_version()
- base_cmd = f"python -m pip install --upgrade torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION}"
-
- if "+cu" in torver:
- install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cu124"
- elif "+rocm" in torver:
- install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/rocm6.2.4"
- elif "+cpu" in torver:
- install_cmd = f"{base_cmd} --index-url https://download.pytorch.org/whl/cpu"
- elif "+cxx11" in torver:
- intel_extension = "intel-extension-for-pytorch==2.1.10+xpu" if is_linux() else "intel-extension-for-pytorch==2.1.10"
- install_cmd = f"{base_cmd} {intel_extension} --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
- else:
- install_cmd = base_cmd
-
+ gpu_choice = get_gpu_choice()
+ install_cmd = get_pytorch_update_command(gpu_choice)
run_cmd(install_cmd, assert_success=True, environment=True)
@@ -256,43 +319,11 @@ def install_webui():
if os.path.isfile(state_file):
os.remove(state_file)
- # Ask the user for the GPU vendor
- if "GPU_CHOICE" in os.environ:
- choice = os.environ["GPU_CHOICE"].upper()
- print_big_message(f"Selected GPU choice \"{choice}\" based on the GPU_CHOICE environment variable.")
-
- # Warn about changed meanings and handle old choices
- if choice == "B":
- print_big_message("Warning: GPU_CHOICE='B' now means 'AMD' in the new version.")
- elif choice == "C":
- print_big_message("Warning: GPU_CHOICE='C' now means 'Apple M Series' in the new version.")
- elif choice == "D":
- print_big_message("Warning: GPU_CHOICE='D' now means 'Intel Arc' in the new version.")
- else:
- choice = get_user_choice(
- "What is your GPU?",
- {
- 'A': 'NVIDIA - CUDA 12.4',
- 'B': 'AMD - Linux/macOS only, requires ROCm 6.2.4',
- 'C': 'Apple M Series',
- 'D': 'Intel Arc (beta)',
- 'N': 'CPU mode'
- },
- )
-
- # Convert choices to GPU names for compatibility
- gpu_choice_to_name = {
- "A": "NVIDIA",
- "B": "AMD",
- "C": "APPLE",
- "D": "INTEL",
- "N": "NONE"
- }
-
- selected_gpu = gpu_choice_to_name[choice]
+ # Get GPU choice and save it to state
+ gpu_choice = get_gpu_choice()
# Write a flag to CMD_FLAGS.txt for CPU mode
- if selected_gpu == "NONE":
+ if gpu_choice == "NONE":
cmd_flags_path = os.path.join(script_dir, "user_data", "CMD_FLAGS.txt")
with open(cmd_flags_path, 'r+') as cmd_flags_file:
if "--cpu" not in cmd_flags_file.read():
@@ -300,34 +331,22 @@ def install_webui():
cmd_flags_file.write("\n--cpu\n")
# Handle CUDA version display
- elif any((is_windows(), is_linux())) and selected_gpu == "NVIDIA":
+ elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA":
print("CUDA: 12.4")
+ elif any((is_windows(), is_linux())) and gpu_choice == "NVIDIA_CUDA128":
+ print("CUDA: 12.8")
# No PyTorch for AMD on Windows (?)
- elif is_windows() and selected_gpu == "AMD":
+ elif is_windows() and gpu_choice == "AMD":
print("PyTorch setup on Windows is not implemented yet. Exiting...")
sys.exit(1)
- # Find the Pytorch installation command
- install_pytorch = f"python -m pip install torch=={TORCH_VERSION} torchvision=={TORCHVISION_VERSION} torchaudio=={TORCHAUDIO_VERSION} "
-
- if selected_gpu == "NVIDIA":
- install_pytorch += "--index-url https://download.pytorch.org/whl/cu124"
- elif selected_gpu == "AMD":
- install_pytorch += "--index-url https://download.pytorch.org/whl/rocm6.2.4"
- elif selected_gpu in ["APPLE", "NONE"]:
- install_pytorch += "--index-url https://download.pytorch.org/whl/cpu"
- elif selected_gpu == "INTEL":
- if is_linux():
- install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10+xpu --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
- else:
- install_pytorch = "python -m pip install torch==2.1.0a0 torchvision==0.16.0a0 torchaudio==2.1.0a0 intel-extension-for-pytorch==2.1.10 --extra-index-url https://pytorch-extension.intel.com/release-whl/stable/xpu/us/"
-
# Install Git and then Pytorch
print_big_message("Installing PyTorch.")
+ install_pytorch = get_pytorch_install_command(gpu_choice)
run_cmd(f"conda install -y ninja git && {install_pytorch} && python -m pip install py-cpuinfo==9.0.0", assert_success=True, environment=True)
- if selected_gpu == "INTEL":
+ if gpu_choice == "INTEL":
# Install oneAPI dependencies via conda
print_big_message("Installing Intel oneAPI runtime libraries.")
run_cmd("conda install -y -c https://software.repos.intel.com/python/conda/ -c conda-forge dpcpp-cpp-rt=2024.0 mkl-dpcpp=2024.0", environment=True)
@@ -349,31 +368,15 @@ def update_requirements(initial_installation=False, pull=True):
assert_success=True
)
- torver = torch_version()
- requirements_base = os.path.join("requirements", "full")
-
- if "+rocm" in torver:
- file_name = f"requirements_amd{'_noavx2' if not cpu_has_avx2() else ''}.txt"
- elif "+cpu" in torver or "+cxx11" in torver:
- file_name = f"requirements_cpu_only{'_noavx2' if not cpu_has_avx2() else ''}.txt"
- elif is_macos():
- file_name = f"requirements_apple_{'intel' if is_x86_64() else 'silicon'}.txt"
- else:
- file_name = f"requirements{'_noavx2' if not cpu_has_avx2() else ''}.txt"
-
- requirements_file = os.path.join(requirements_base, file_name)
-
- # Load state from JSON file
current_commit = get_current_commit()
- wheels_changed = False
- if os.path.exists(state_file):
- with open(state_file, 'r') as f:
- last_state = json.load(f)
-
- if 'wheels_changed' in last_state or last_state.get('last_installed_commit') != current_commit:
+ wheels_changed = not os.path.exists(state_file)
+ if not wheels_changed:
+ state = load_state()
+ if 'wheels_changed' in state or state.get('last_installed_commit') != current_commit:
wheels_changed = True
- else:
- wheels_changed = True
+
+ gpu_choice = get_gpu_choice()
+ requirements_file = get_requirements_file(gpu_choice)
if pull:
# Read .whl lines before pulling
@@ -409,19 +412,17 @@ def update_requirements(initial_installation=False, pull=True):
print_big_message(f"File '{file}' was updated during 'git pull'. Please run the script again.")
# Save state before exiting
- current_state = {}
+ state = load_state()
if wheels_changed:
- current_state['wheels_changed'] = True
-
- with open(state_file, 'w') as f:
- json.dump(current_state, f)
-
+ state['wheels_changed'] = True
+ save_state(state)
sys.exit(1)
# Save current state
- current_state = {'last_installed_commit': current_commit}
- with open(state_file, 'w') as f:
- json.dump(current_state, f)
+ state = load_state()
+ state['last_installed_commit'] = current_commit
+ state.pop('wheels_changed', None) # Remove wheels_changed flag
+ save_state(state)
if os.environ.get("INSTALL_EXTENSIONS", "").lower() in ("yes", "y", "true", "1", "t", "on"):
install_extensions_requirements()
@@ -432,11 +433,10 @@ def update_requirements(initial_installation=False, pull=True):
# Update PyTorch
if not initial_installation:
update_pytorch_and_python()
- torver = torch_version()
clean_outdated_pytorch_cuda_dependencies()
print_big_message(f"Installing webui requirements from file: {requirements_file}")
- print(f"TORCH: {torver}\n")
+ print(f"GPU Choice: {gpu_choice}\n")
# Prepare the requirements file
textgen_requirements = open(requirements_file).read().splitlines()
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index 2c322715..a71e5240 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -1,5 +1,4 @@
accelerate==1.5.*
-beautifulsoup4==4.13.4
bitsandbytes==0.45.*
colorama
datasets
@@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@@ -16,6 +16,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -33,12 +34,12 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index 6aeb325e..db1ead1a 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -1,11 +1,11 @@
accelerate==1.5.*
-beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -32,7 +33,7 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index 3b052423..a08aa392 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -1,11 +1,11 @@
accelerate==1.5.*
-beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -32,7 +33,7 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 8c51459e..fa217c3e 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -1,11 +1,11 @@
accelerate==1.5.*
-beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -32,7 +33,7 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index b9f15d45..52581f1a 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -1,11 +1,11 @@
accelerate==1.5.*
-beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -32,8 +33,8 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3-py3-none-any.whl
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index 0877d968..b72f22aa 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -1,11 +1,11 @@
accelerate==1.5.*
-beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -32,5 +33,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index cab78237..e8de6057 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -1,11 +1,11 @@
accelerate==1.5.*
-beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -32,5 +33,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128.txt b/requirements/full/requirements_cuda128.txt
new file mode 100644
index 00000000..7851041f
--- /dev/null
+++ b/requirements/full/requirements_cuda128.txt
@@ -0,0 +1,45 @@
+accelerate==1.5.*
+bitsandbytes==0.45.*
+colorama
+datasets
+duckduckgo_search==8.0.2
+einops
+fastapi==0.112.4
+gradio==4.37.*
+html2text==2025.4.15
+jinja2==3.1.6
+markdown
+numpy==2.2.*
+pandas
+peft==0.15.*
+Pillow>=9.5.0
+psutil
+pydantic==2.8.2
+PyPDF2==3.0.1
+python-docx==1.1.2
+pyyaml
+requests
+rich
+safetensors==0.5.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.50.*
+tqdm
+wandb
+
+# API
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
+# CUDA wheels
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_cuda128_noavx2.txt b/requirements/full/requirements_cuda128_noavx2.txt
new file mode 100644
index 00000000..c8015166
--- /dev/null
+++ b/requirements/full/requirements_cuda128_noavx2.txt
@@ -0,0 +1,45 @@
+accelerate==1.5.*
+bitsandbytes==0.45.*
+colorama
+datasets
+duckduckgo_search==8.0.2
+einops
+fastapi==0.112.4
+gradio==4.37.*
+html2text==2025.4.15
+jinja2==3.1.6
+markdown
+numpy==2.2.*
+pandas
+peft==0.15.*
+Pillow>=9.5.0
+psutil
+pydantic==2.8.2
+PyPDF2==3.0.1
+python-docx==1.1.2
+pyyaml
+requests
+rich
+safetensors==0.5.*
+scipy
+sentencepiece
+tensorboard
+transformers==4.50.*
+tqdm
+wandb
+
+# API
+flask_cloudflared==0.0.14
+sse-starlette==1.6.5
+tiktoken
+
+# CUDA wheels
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu128.torch2.7.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu128torch2.7.0cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index dfd42577..5e81ce1f 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -1,5 +1,4 @@
accelerate==1.5.*
-beautifulsoup4==4.13.4
bitsandbytes==0.45.*
colorama
datasets
@@ -7,6 +6,7 @@ duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@@ -16,6 +16,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -33,12 +34,12 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
-https://github.com/oobabooga/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/exllamav3/releases/download/v0.0.3/exllamav3-0.0.3+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/turboderp-org/exllamav2/releases/download/v0.3.1/exllamav2-0.3.1-py3-none-any.whl; platform_system == "Linux" and platform_machine != "x86_64"
+https://github.com/kingbri1/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu124torch2.6.0cxx11abiFALSE-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/Dao-AILab/flash-attention/releases/download/v2.7.4.post1/flash_attn-2.7.4.post1+cu12torch2.6cxx11abiFALSE-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 5d9f84ce..d26663a7 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -1,11 +1,11 @@
accelerate==1.5.*
-beautifulsoup4==4.13.4
colorama
datasets
duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
@@ -15,6 +15,7 @@ Pillow>=9.5.0
psutil
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index fdae681d..4ddcf43f 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index a58f39f7..38a21618 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 91ea3a6d..0b70c800 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -18,6 +19,6 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 37e5aa40..510a20f4 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index dcb2884b..e6d9f0c5 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index 8f1295bb..48f92e0a 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index 21805fe2..3d30e6d6 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 858b4488..9f93424f 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 569bae99..9070b9a6 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -1,12 +1,13 @@
-beautifulsoup4==4.13.4
duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
+html2text==2025.4.15
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
PyPDF2==3.0.1
+python-docx==1.1.2
pyyaml
requests
rich
@@ -18,5 +19,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.18.0/llama_cpp_binaries-0.18.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/server.py b/server.py
index c22ed1f1..e0e3fbe5 100644
--- a/server.py
+++ b/server.py
@@ -1,12 +1,24 @@
import os
+import shutil
import warnings
+from pathlib import Path
from modules import shared
from modules.block_requests import OpenMonkeyPatch, RequestBlocker
from modules.logging_colors import logger
-os.environ['GRADIO_ANALYTICS_ENABLED'] = 'False'
-os.environ['BITSANDBYTES_NOWELCOME'] = '1'
+# Set up Gradio temp directory path
+gradio_temp_path = Path('user_data') / 'cache' / 'gradio'
+shutil.rmtree(gradio_temp_path, ignore_errors=True)
+gradio_temp_path.mkdir(parents=True, exist_ok=True)
+
+# Set environment variables
+os.environ.update({
+ 'GRADIO_ANALYTICS_ENABLED': 'False',
+ 'BITSANDBYTES_NOWELCOME': '1',
+ 'GRADIO_TEMP_DIR': str(gradio_temp_path)
+})
+
warnings.filterwarnings('ignore', category=UserWarning, message='TypedStorage is deprecated')
warnings.filterwarnings('ignore', category=UserWarning, message='Using the update method is deprecated')
warnings.filterwarnings('ignore', category=UserWarning, message='Field "model_name" has conflict')
@@ -27,7 +39,6 @@ import signal
import sys
import time
from functools import partial
-from pathlib import Path
from threading import Lock, Thread
import yaml
@@ -45,6 +56,7 @@ from modules import (
ui_session,
utils
)
+from modules.chat import generate_pfp_cache
from modules.extensions import apply_extensions
from modules.LoRA import add_lora_to_model
from modules.models import load_model, unload_model_if_idle
@@ -60,6 +72,14 @@ from modules.utils import gradio
def signal_handler(sig, frame):
logger.info("Received Ctrl+C. Shutting down Text generation web UI gracefully.")
+
+ # Explicitly stop LlamaServer to avoid __del__ cleanup issues during shutdown
+ if shared.model and shared.model.__class__.__name__ == 'LlamaServer':
+ try:
+ shared.model.stop()
+ except:
+ pass
+
sys.exit(0)
@@ -85,17 +105,20 @@ def create_interface():
# Force some events to be triggered on page load
shared.persistent_interface_state.update({
+ 'mode': shared.settings['mode'],
'loader': shared.args.loader or 'llama.cpp',
- 'mode': shared.settings['mode'] if shared.settings['mode'] == 'instruct' else gr.update(),
- 'character_menu': shared.args.character or shared.settings['character'],
- 'instruction_template_str': shared.settings['instruction_template_str'],
- 'prompt_menu-default': shared.settings['prompt-default'],
- 'prompt_menu-notebook': shared.settings['prompt-notebook'],
'filter_by_loader': (shared.args.loader or 'All') if not shared.args.portable else 'llama.cpp'
})
- if Path("user_data/cache/pfp_character.png").exists():
- Path("user_data/cache/pfp_character.png").unlink()
+ # Clear existing cache files
+ for cache_file in ['pfp_character.png', 'pfp_character_thumb.png']:
+ cache_path = Path(f"user_data/cache/{cache_file}")
+ if cache_path.exists():
+ cache_path.unlink()
+
+ # Regenerate for default character
+ if shared.settings['mode'] != 'instruct':
+ generate_pfp_cache(shared.settings['character'])
# css/js strings
css = ui.css
@@ -126,7 +149,7 @@ def create_interface():
ui_default.create_ui()
ui_notebook.create_ui()
- ui_parameters.create_ui(shared.settings['preset']) # Parameters tab
+ ui_parameters.create_ui() # Parameters tab
ui_model_menu.create_ui() # Model tab
if not shared.args.portable:
training.create_ui() # Training tab
@@ -142,17 +165,35 @@ def create_interface():
ui_parameters.create_event_handlers()
ui_model_menu.create_event_handlers()
+ # UI persistence events
+ ui.setup_auto_save()
+
# Interface launch events
shared.gradio['interface'].load(
None,
gradio('show_controls'),
None,
js=f"""(x) => {{
- if ({str(shared.settings['dark_theme']).lower()}) {{
- document.getElementsByTagName('body')[0].classList.add('dark');
- }}
- else {{
- document.getElementsByTagName('body')[0].classList.remove('dark');
+ // Check if this is first visit or if localStorage is out of sync
+ const savedTheme = localStorage.getItem('theme');
+ const serverTheme = {str(shared.settings['dark_theme']).lower()} ? 'dark' : 'light';
+
+ // If no saved theme or mismatch with server on first load, use server setting
+ if (!savedTheme || !sessionStorage.getItem('theme_synced')) {{
+ localStorage.setItem('theme', serverTheme);
+ sessionStorage.setItem('theme_synced', 'true');
+ if (serverTheme === 'dark') {{
+ document.getElementsByTagName('body')[0].classList.add('dark');
+ }} else {{
+ document.getElementsByTagName('body')[0].classList.remove('dark');
+ }}
+ }} else {{
+ // Use localStorage for subsequent reloads
+ if (savedTheme === 'dark') {{
+ document.getElementsByTagName('body')[0].classList.add('dark');
+ }} else {{
+ document.getElementsByTagName('body')[0].classList.remove('dark');
+ }}
}}
{js}
{ui.show_controls_js}
@@ -208,13 +249,7 @@ if __name__ == "__main__":
shared.model_config['.*'] = get_fallback_settings()
shared.model_config.move_to_end('.*', last=False) # Move to the beginning
- # Activate the extensions listed on settings.yaml
extensions_module.available_extensions = utils.get_available_extensions()
- for extension in shared.settings['default_extensions']:
- shared.args.extensions = shared.args.extensions or []
- if extension not in shared.args.extensions:
- shared.args.extensions.append(extension)
-
available_models = utils.get_available_models()
# Model defined through --model
@@ -277,8 +312,8 @@ if __name__ == "__main__":
if shared.args.nowebui:
# Start the API in standalone mode
- shared.args.extensions = [x for x in shared.args.extensions if x != 'gallery']
- if shared.args.extensions is not None and len(shared.args.extensions) > 0:
+ shared.args.extensions = [x for x in (shared.args.extensions or []) if x != 'gallery']
+ if shared.args.extensions:
extensions_module.load_extensions()
else:
# Launch the web UI
diff --git a/user_data/presets/Contrastive Search.yaml b/user_data/presets/Contrastive Search.yaml
deleted file mode 100644
index d9a47a9f..00000000
--- a/user_data/presets/Contrastive Search.yaml
+++ /dev/null
@@ -1,3 +0,0 @@
-do_sample: false
-top_k: 4
-penalty_alpha: 0.3
diff --git a/user_data/presets/Null preset.yaml b/user_data/presets/Null preset.yaml
deleted file mode 100644
index 714aa9a3..00000000
--- a/user_data/presets/Null preset.yaml
+++ /dev/null
@@ -1 +0,0 @@
-temperature: 1
diff --git a/user_data/presets/Qwen3 - No Thinking.yaml b/user_data/presets/Qwen3 - No Thinking.yaml
new file mode 100644
index 00000000..b1c1e03c
--- /dev/null
+++ b/user_data/presets/Qwen3 - No Thinking.yaml
@@ -0,0 +1,3 @@
+temperature: 0.7
+top_p: 0.8
+top_k: 20
diff --git a/user_data/presets/Qwen3 - Thinking.yaml b/user_data/presets/Qwen3 - Thinking.yaml
new file mode 100644
index 00000000..cb2942f9
--- /dev/null
+++ b/user_data/presets/Qwen3 - Thinking.yaml
@@ -0,0 +1,3 @@
+temperature: 0.6
+top_p: 0.95
+top_k: 20
diff --git a/user_data/settings-template.yaml b/user_data/settings-template.yaml
deleted file mode 100644
index ce0f77e1..00000000
--- a/user_data/settings-template.yaml
+++ /dev/null
@@ -1,77 +0,0 @@
-show_controls: true
-start_with: ''
-mode: instruct
-chat_style: cai-chat
-chat-instruct_command: |-
- Continue the chat dialogue below. Write a single reply for the character "<|character|>".
-
- <|prompt|>
-prompt-default: QA
-prompt-notebook: QA
-character: Assistant
-name1: You
-user_bio: ''
-custom_system_message: ''
-preset: min_p
-max_new_tokens: 512
-max_new_tokens_min: 1
-max_new_tokens_max: 4096
-prompt_lookup_num_tokens: 0
-max_tokens_second: 0
-max_updates_second: 12
-auto_max_new_tokens: true
-ban_eos_token: false
-add_bos_token: true
-enable_thinking: true
-skip_special_tokens: true
-stream: true
-static_cache: false
-truncation_length: 8192
-seed: -1
-custom_stopping_strings: ''
-custom_token_bans: ''
-negative_prompt: ''
-dark_theme: true
-default_extensions: []
-instruction_template_str: |-
- {%- set ns = namespace(found=false) -%}
- {%- for message in messages -%}
- {%- if message['role'] == 'system' -%}
- {%- set ns.found = true -%}
- {%- endif -%}
- {%- endfor -%}
- {%- if not ns.found -%}
- {{- '' + 'Below is an instruction that describes a task. Write a response that appropriately completes the request.' + '\n\n' -}}
- {%- endif %}
- {%- for message in messages %}
- {%- if message['role'] == 'system' -%}
- {{- '' + message['content'] + '\n\n' -}}
- {%- else -%}
- {%- if message['role'] == 'user' -%}
- {{-'### Instruction:\n' + message['content'] + '\n\n'-}}
- {%- else -%}
- {{-'### Response:\n' + message['content'] + '\n\n' -}}
- {%- endif -%}
- {%- endif -%}
- {%- endfor -%}
- {%- if add_generation_prompt -%}
- {{-'### Response:\n'-}}
- {%- endif -%}
-chat_template_str: |-
- {%- for message in messages %}
- {%- if message['role'] == 'system' -%}
- {%- if message['content'] -%}
- {{- message['content'] + '\n\n' -}}
- {%- endif -%}
- {%- if user_bio -%}
- {{- user_bio + '\n\n' -}}
- {%- endif -%}
- {%- else -%}
- {%- if message['role'] == 'user' -%}
- {{- name1 + ': ' + message['content'] + '\n'-}}
- {%- else -%}
- {{- name2 + ': ' + message['content'] + '\n' -}}
- {%- endif -%}
- {%- endif -%}
- {%- endfor -%}
-