"
if auto_adjust:
return vram_info, gr.update(value=current_layers, maximum=max_layers)
else:
diff --git a/modules/shared.py b/modules/shared.py
index 4e0a20db..d2305f30 100644
--- a/modules/shared.py
+++ b/modules/shared.py
@@ -47,6 +47,7 @@ settings = {
'max_new_tokens_max': 4096,
'prompt_lookup_num_tokens': 0,
'max_tokens_second': 0,
+ 'max_updates_second': 12,
'auto_max_new_tokens': True,
'ban_eos_token': False,
'add_bos_token': True,
@@ -86,7 +87,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft
# Model loader
group = parser.add_argument_group('Model loader')
-group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, HQQ, TensorRT-LLM.')
+group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, TensorRT-LLM.')
# Transformers/Accelerate
group = parser.add_argument_group('Transformers/Accelerate')
@@ -151,10 +152,6 @@ group.add_argument('--no_sdpa', action='store_true', help='Force Torch SDPA to n
group.add_argument('--num_experts_per_token', type=int, default=2, metavar='N', help='Number of experts to use for generation. Applies to MoE models like Mixtral.')
group.add_argument('--enable_tp', action='store_true', help='Enable Tensor Parallelism (TP) in ExLlamaV2.')
-# HQQ
-group = parser.add_argument_group('HQQ')
-group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.')
-
# TensorRT-LLM
group = parser.add_argument_group('TensorRT-LLM')
group.add_argument('--cpp-runner', action='store_true', help='Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn\'t support streaming yet.')
@@ -262,8 +259,6 @@ def fix_loader_name(name):
return 'ExLlamav2_HF'
elif name in ['exllamav3-hf', 'exllamav3_hf', 'exllama-v3-hf', 'exllama_v3_hf', 'exllama-v3_hf', 'exllama3-hf', 'exllama3_hf', 'exllama-3-hf', 'exllama_3_hf', 'exllama-3_hf']:
return 'ExLlamav3_HF'
- elif name in ['hqq']:
- return 'HQQ'
elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']:
return 'TensorRT-LLM'
diff --git a/modules/text_generation.py b/modules/text_generation.py
index 00b9275a..1fd6d810 100644
--- a/modules/text_generation.py
+++ b/modules/text_generation.py
@@ -65,39 +65,41 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap
all_stop_strings += st
shared.stop_everything = False
+ last_update = -1
reply = ''
is_stream = state['stream']
if len(all_stop_strings) > 0 and not state['stream']:
state = copy.deepcopy(state)
state['stream'] = True
+ min_update_interval = 0
+ if state.get('max_updates_second', 0) > 0:
+ min_update_interval = 1 / state['max_updates_second']
+
# Generate
- last_update = -1
- latency_threshold = 1 / 1000
for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat):
- cur_time = time.monotonic()
reply, stop_found = apply_stopping_strings(reply, all_stop_strings)
if escape_html:
reply = html.escape(reply)
if is_stream:
+ cur_time = time.time()
+
# Limit number of tokens/second to make text readable in real time
if state['max_tokens_second'] > 0:
diff = 1 / state['max_tokens_second'] - (cur_time - last_update)
if diff > 0:
time.sleep(diff)
- last_update = time.monotonic()
+ last_update = time.time()
yield reply
# Limit updates to avoid lag in the Gradio UI
# API updates are not limited
else:
- # If 'generate_func' takes less than 0.001 seconds to yield the next token
- # (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding
- if (cur_time - last_update) > latency_threshold:
+ if cur_time - last_update > min_update_interval:
+ last_update = cur_time
yield reply
- last_update = time.monotonic()
if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything):
break
@@ -503,11 +505,11 @@ def generate_reply_custom(question, original_question, state, stopping_strings=N
return
-def print_prompt(prompt, max_chars=2000):
+def print_prompt(prompt, max_chars=-1):
DARK_YELLOW = "\033[38;5;3m"
RESET = "\033[0m"
- if len(prompt) > max_chars:
+ if max_chars > 0 and len(prompt) > max_chars:
half_chars = max_chars // 2
hidden_len = len(prompt[half_chars:-half_chars])
hidden_msg = f"{DARK_YELLOW}[...{hidden_len} characters hidden...]{RESET}"
diff --git a/modules/ui.py b/modules/ui.py
index eeb6ce92..a2662e14 100644
--- a/modules/ui.py
+++ b/modules/ui.py
@@ -109,7 +109,6 @@ def list_model_elements():
'threads',
'threads_batch',
'batch_size',
- 'hqq_backend',
'ctx_size',
'cache_type',
'tensor_split',
@@ -192,6 +191,7 @@ def list_interface_input_elements():
'max_new_tokens',
'prompt_lookup_num_tokens',
'max_tokens_second',
+ 'max_updates_second',
'do_sample',
'dynamic_temperature',
'temperature_last',
@@ -210,6 +210,15 @@ def list_interface_input_elements():
'negative_prompt',
'dry_sequence_breakers',
'grammar_string',
+ 'navigate_message_index',
+ 'navigate_direction',
+ 'navigate_message_role',
+ 'edit_message_index',
+ 'edit_message_text',
+ 'edit_message_role',
+ 'branch_index',
+ 'enable_web_search',
+ 'web_search_pages',
]
# Chat elements
diff --git a/modules/ui_chat.py b/modules/ui_chat.py
index 0856cfab..d79aa523 100644
--- a/modules/ui_chat.py
+++ b/modules/ui_chat.py
@@ -24,7 +24,8 @@ def create_ui():
with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']):
with gr.Column():
with gr.Row(elem_id='past-chats-buttons'):
- shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes='refresh-button', interactive=not mu)
+ shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes='refresh-button', elem_id='Branch', interactive=not mu)
+ shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id="Branch-index", interactive=True)
shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu)
shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu)
shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input'])
@@ -47,13 +48,13 @@ def create_ui():
with gr.Row():
with gr.Column(elem_id='chat-col'):
shared.gradio['display'] = gr.JSON(value={}, visible=False) # Hidden buffer
- shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)
+ shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': [], 'metadata': {}}, '', '', 'chat', 'cai-chat', '')['html'], visible=True)
with gr.Row(elem_id="chat-input-row"):
with gr.Column(scale=1, elem_id='gr-hover-container'):
gr.HTML(value='
☰', elem_id='gr-hover')
with gr.Column(scale=10, elem_id='chat-input-container'):
- shared.gradio['textbox'] = gr.Textbox(label='', placeholder='Send a message', elem_id='chat-input', elem_classes=['add_scrollbar'])
+ shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar'])
shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls')
shared.gradio['typing-dots'] = gr.HTML(value='
', label='typing', elem_id='typing-container')
@@ -70,8 +71,6 @@ def create_ui():
shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last')
with gr.Row():
- shared.gradio['Replace last reply'] = gr.Button('Replace last reply (Ctrl + Shift + L)', elem_id='Replace-last')
- shared.gradio['Copy last reply'] = gr.Button('Copy last reply (Ctrl + Shift + K)', elem_id='Copy-last')
shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate')
with gr.Row():
@@ -79,14 +78,20 @@ def create_ui():
shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply')
with gr.Row():
- shared.gradio['send-chat-to-default'] = gr.Button('Send to default')
- shared.gradio['send-chat-to-notebook'] = gr.Button('Send to notebook')
+ shared.gradio['send-chat-to-default'] = gr.Button('Send to Default')
+ shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook')
with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']):
with gr.Column():
with gr.Row():
shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar'])
+ with gr.Row():
+ shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search')
+
+ with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']:
+ shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10)
+
with gr.Row():
shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode')
@@ -96,6 +101,22 @@ def create_ui():
with gr.Row():
shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar'])
+ with gr.Row():
+ shared.gradio['count_tokens'] = gr.Button('Count tokens', size='sm')
+
+ shared.gradio['token_display'] = gr.HTML(value='', elem_classes='token-display')
+
+ # Hidden elements for version navigation and editing
+ with gr.Row(visible=False):
+ shared.gradio['navigate_message_index'] = gr.Number(value=-1, precision=0, elem_id="Navigate-message-index")
+ shared.gradio['navigate_direction'] = gr.Textbox(value="", elem_id="Navigate-direction")
+ shared.gradio['navigate_message_role'] = gr.Textbox(value="", elem_id="Navigate-message-role")
+ shared.gradio['navigate_version'] = gr.Button(elem_id="Navigate-version")
+ shared.gradio['edit_message_index'] = gr.Number(value=-1, precision=0, elem_id="Edit-message-index")
+ shared.gradio['edit_message_text'] = gr.Textbox(value="", elem_id="Edit-message-text")
+ shared.gradio['edit_message_role'] = gr.Textbox(value="", elem_id="Edit-message-role")
+ shared.gradio['edit_message'] = gr.Button(elem_id="Edit-message")
+
def create_chat_settings_ui():
mu = shared.args.multi_user
@@ -185,7 +206,7 @@ def create_event_handlers():
shared.gradio['Generate'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
+ lambda x: (x, {"text": "", "files": []}), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then(
chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then(
@@ -193,7 +214,7 @@ def create_event_handlers():
shared.gradio['textbox'].submit(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
+ lambda x: (x, {"text": "", "files": []}), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then(
lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then(
chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then(
None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then(
@@ -221,10 +242,6 @@ def create_event_handlers():
None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then(
None, None, None, js=f'() => {{{ui.audio_notification_js}}}')
- shared.gradio['Replace last reply'].click(
- ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.handle_replace_last_reply_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)
-
shared.gradio['Send dummy message'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
chat.handle_send_dummy_message_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False)
@@ -258,7 +275,7 @@ def create_event_handlers():
shared.gradio['branch_chat'].click(
ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
- chat.handle_branch_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False)
+ chat.handle_branch_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id', 'branch_index'), show_progress=False)
shared.gradio['rename_chat'].click(chat.handle_rename_chat_click, None, gradio('rename_to', 'rename-row'), show_progress=False)
shared.gradio['rename_to-cancel'].click(lambda: gr.update(visible=False), None, gradio('rename-row'), show_progress=False)
@@ -290,7 +307,14 @@ def create_event_handlers():
None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}")
shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False)
- shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False)
+
+ shared.gradio['navigate_version'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.handle_navigate_version_click, gradio('interface_state'), gradio('history', 'display'), show_progress=False)
+
+ shared.gradio['edit_message'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.handle_edit_message_click, gradio('interface_state'), gradio('history', 'display'), show_progress=False)
# Save/delete a character
shared.gradio['save_character'].click(chat.handle_save_character_click, gradio('name2'), gradio('save_character_filename', 'character_saver'), show_progress=False)
@@ -347,3 +371,13 @@ def create_event_handlers():
None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}')
shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}')
+
+ shared.gradio['count_tokens'].click(
+ ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then(
+ chat.count_prompt_tokens, gradio('textbox', 'interface_state'), gradio('token_display'), show_progress=False)
+
+ shared.gradio['enable_web_search'].change(
+ lambda x: gr.update(visible=x),
+ gradio('enable_web_search'),
+ gradio('web_search_row')
+ )
diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py
index 5b7dfdd8..862b3893 100644
--- a/modules/ui_model_menu.py
+++ b/modules/ui_model_menu.py
@@ -39,11 +39,9 @@ def create_ui():
with gr.Row():
with gr.Column():
shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.')
- shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. ⚠️ Lower this value if you can\'t load the model.')
+ shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072. ⚠️ Lower this value if you can\'t load the model.')
shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7')
shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).')
- shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend)
-
with gr.Column():
shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info())
shared.gradio['flash_attn'] = gr.Checkbox(label="flash-attn", value=shared.args.flash_attn, info='Use flash-attention.')
@@ -312,7 +310,7 @@ def get_initial_vram_info():
for_ui=True
)
- return "
Estimated VRAM to load the model:"
+ return "
Estimated VRAM to load the model:
"
def get_initial_gpu_layers_max():
diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py
index 84f9fbfc..733d0901 100644
--- a/modules/ui_parameters.py
+++ b/modules/ui_parameters.py
@@ -71,6 +71,8 @@ def create_ui(default_preset):
shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.')
shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.')
shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.')
+ shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.')
+
with gr.Column():
with gr.Row():
with gr.Column():
diff --git a/modules/utils.py b/modules/utils.py
index 0e8bdd18..577c55b8 100644
--- a/modules/utils.py
+++ b/modules/utils.py
@@ -74,7 +74,7 @@ def natural_keys(text):
def check_model_loaded():
if shared.model_name == 'None' or shared.model is None:
- if len(get_available_models()) <= 1:
+ if len(get_available_models()) == 0:
error_msg = "No model is loaded.\n\nTo get started:\n1) Place a GGUF file in your user_data/models folder\n2) Go to the Model tab and select it"
logger.error(error_msg)
return False, error_msg
diff --git a/modules/web_search.py b/modules/web_search.py
new file mode 100644
index 00000000..1f670349
--- /dev/null
+++ b/modules/web_search.py
@@ -0,0 +1,129 @@
+import concurrent.futures
+from concurrent.futures import as_completed
+from datetime import datetime
+
+import requests
+from bs4 import BeautifulSoup
+from duckduckgo_search import DDGS
+
+from modules.logging_colors import logger
+
+
+def get_current_timestamp():
+ """Returns the current time in 24-hour format"""
+ return datetime.now().strftime('%b %d, %Y %H:%M')
+
+
+def download_web_page(url, timeout=5):
+ """Download and extract text from a web page"""
+ try:
+ headers = {
+ 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36'
+ }
+ response = requests.get(url, headers=headers, timeout=timeout)
+ response.raise_for_status()
+
+ soup = BeautifulSoup(response.content, 'html.parser')
+
+ # Remove script and style elements
+ for script in soup(["script", "style"]):
+ script.decompose()
+
+ # Get text and clean it up
+ text = soup.get_text()
+ lines = (line.strip() for line in text.splitlines())
+ chunks = (phrase.strip() for line in lines for phrase in line.split(" "))
+ text = ' '.join(chunk for chunk in chunks if chunk)
+
+ return text
+ except Exception as e:
+ logger.error(f"Error downloading {url}: {e}")
+ return f"[Error downloading content from {url}: {str(e)}]"
+
+
+def perform_web_search(query, num_pages=3, max_workers=5):
+ """Perform web search and return results with content"""
+ try:
+ with DDGS() as ddgs:
+ results = list(ddgs.text(query, max_results=num_pages))
+
+ # Prepare download tasks
+ download_tasks = []
+ for i, result in enumerate(results):
+ url = result.get('href', '')
+ title = result.get('title', f'Search Result {i+1}')
+ download_tasks.append((url, title, i))
+
+ search_results = [None] * len(download_tasks) # Pre-allocate to maintain order
+
+ # Download pages in parallel
+ with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor:
+ # Submit all download tasks
+ future_to_task = {
+ executor.submit(download_web_page, task[0]): task
+ for task in download_tasks
+ }
+
+ # Collect results as they complete
+ for future in as_completed(future_to_task):
+ url, title, index = future_to_task[future]
+ try:
+ content = future.result()
+ search_results[index] = {
+ 'title': title,
+ 'url': url,
+ 'content': content
+ }
+ except Exception as e:
+ logger.error(f"Error downloading {url}: {e}")
+ # Include failed downloads with empty content
+ search_results[index] = {
+ 'title': title,
+ 'url': url,
+ 'content': ''
+ }
+
+ return search_results
+
+ except Exception as e:
+ logger.error(f"Error performing web search: {e}")
+ return []
+
+
+def add_web_search_attachments(history, row_idx, user_message, search_query, state):
+ """Perform web search and add results as attachments"""
+ if not search_query:
+ logger.warning("No search query provided")
+ return
+
+ try:
+ logger.info(f"Using search query: {search_query}")
+
+ # Perform web search
+ num_pages = int(state.get('web_search_pages', 3))
+ search_results = perform_web_search(search_query, num_pages)
+
+ if not search_results:
+ logger.warning("No search results found")
+ return
+
+ # Add search results as attachments
+ key = f"user_{row_idx}"
+ if key not in history['metadata']:
+ history['metadata'][key] = {"timestamp": get_current_timestamp()}
+ if "attachments" not in history['metadata'][key]:
+ history['metadata'][key]["attachments"] = []
+
+ for result in search_results:
+ attachment = {
+ "name": result['title'],
+ "type": "text/html",
+ "url": result['url'],
+ "content": result['content']
+ }
+ history['metadata'][key]["attachments"].append(attachment)
+
+ logger.info(f"Added {len(search_results)} web search results as attachments")
+
+ except Exception as e:
+ logger.error(f"Error in web search: {e}")
diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt
index af5f7d8a..2c322715 100644
--- a/requirements/full/requirements.txt
+++ b/requirements/full/requirements.txt
@@ -1,7 +1,9 @@
accelerate==1.5.*
+beautifulsoup4==4.13.4
bitsandbytes==0.45.*
colorama
datasets
+duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
@@ -13,6 +15,7 @@ peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -30,8 +33,8 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt
index 4e011989..6aeb325e 100644
--- a/requirements/full/requirements_amd.txt
+++ b/requirements/full/requirements_amd.txt
@@ -1,6 +1,8 @@
accelerate==1.5.*
+beautifulsoup4==4.13.4
colorama
datasets
+duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
@@ -12,6 +14,7 @@ peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -29,7 +32,7 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt
index a3bd1350..3b052423 100644
--- a/requirements/full/requirements_amd_noavx2.txt
+++ b/requirements/full/requirements_amd_noavx2.txt
@@ -1,6 +1,8 @@
accelerate==1.5.*
+beautifulsoup4==4.13.4
colorama
datasets
+duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
@@ -12,6 +14,7 @@ peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -29,7 +32,7 @@ sse-starlette==1.6.5
tiktoken
# AMD wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64"
diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt
index 363365bf..8c51459e 100644
--- a/requirements/full/requirements_apple_intel.txt
+++ b/requirements/full/requirements_apple_intel.txt
@@ -1,6 +1,8 @@
accelerate==1.5.*
+beautifulsoup4==4.13.4
colorama
datasets
+duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
@@ -12,6 +14,7 @@ peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -29,7 +32,7 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt
index 2843fed2..b9f15d45 100644
--- a/requirements/full/requirements_apple_silicon.txt
+++ b/requirements/full/requirements_apple_silicon.txt
@@ -1,6 +1,8 @@
accelerate==1.5.*
+beautifulsoup4==4.13.4
colorama
datasets
+duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
@@ -12,6 +14,7 @@ peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -29,8 +32,8 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl
diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt
index bd7c4a4f..0877d968 100644
--- a/requirements/full/requirements_cpu_only.txt
+++ b/requirements/full/requirements_cpu_only.txt
@@ -1,6 +1,8 @@
accelerate==1.5.*
+beautifulsoup4==4.13.4
colorama
datasets
+duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
@@ -12,6 +14,7 @@ peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -29,5 +32,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt
index b5aa1cf7..cab78237 100644
--- a/requirements/full/requirements_cpu_only_noavx2.txt
+++ b/requirements/full/requirements_cpu_only_noavx2.txt
@@ -1,6 +1,8 @@
accelerate==1.5.*
+beautifulsoup4==4.13.4
colorama
datasets
+duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
@@ -12,6 +14,7 @@ peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -29,5 +32,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt
index 89947cbe..dfd42577 100644
--- a/requirements/full/requirements_noavx2.txt
+++ b/requirements/full/requirements_noavx2.txt
@@ -1,7 +1,9 @@
accelerate==1.5.*
+beautifulsoup4==4.13.4
bitsandbytes==0.45.*
colorama
datasets
+duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
@@ -13,6 +15,7 @@ peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -30,8 +33,8 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11"
https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11"
diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt
index 2e631bf0..5d9f84ce 100644
--- a/requirements/full/requirements_nowheels.txt
+++ b/requirements/full/requirements_nowheels.txt
@@ -1,6 +1,8 @@
accelerate==1.5.*
+beautifulsoup4==4.13.4
colorama
datasets
+duckduckgo_search==8.0.2
einops
fastapi==0.112.4
gradio==4.37.*
@@ -12,6 +14,7 @@ peft==0.15.*
Pillow>=9.5.0
psutil
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt
index 79959398..fdae681d 100644
--- a/requirements/portable/requirements.txt
+++ b/requirements/portable/requirements.txt
@@ -1,9 +1,12 @@
+beautifulsoup4==4.13.4
+duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -15,5 +18,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt
index ca16e4c7..a58f39f7 100644
--- a/requirements/portable/requirements_apple_intel.txt
+++ b/requirements/portable/requirements_apple_intel.txt
@@ -1,9 +1,12 @@
+beautifulsoup4==4.13.4
+duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -15,5 +18,5 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt
index 18e1c506..91ea3a6d 100644
--- a/requirements/portable/requirements_apple_silicon.txt
+++ b/requirements/portable/requirements_apple_silicon.txt
@@ -1,9 +1,12 @@
+beautifulsoup4==4.13.4
+duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -15,6 +18,6 @@ sse-starlette==1.6.5
tiktoken
# Mac wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0"
diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt
index 693f4712..37e5aa40 100644
--- a/requirements/portable/requirements_cpu_only.txt
+++ b/requirements/portable/requirements_cpu_only.txt
@@ -1,9 +1,12 @@
+beautifulsoup4==4.13.4
+duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -15,5 +18,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt
index 8635d11e..dcb2884b 100644
--- a/requirements/portable/requirements_cpu_only_noavx2.txt
+++ b/requirements/portable/requirements_cpu_only_noavx2.txt
@@ -1,9 +1,12 @@
+beautifulsoup4==4.13.4
+duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -15,5 +18,5 @@ sse-starlette==1.6.5
tiktoken
# llama.cpp (CPU only, no AVX2)
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows"
diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt
index e844596e..8f1295bb 100644
--- a/requirements/portable/requirements_noavx2.txt
+++ b/requirements/portable/requirements_noavx2.txt
@@ -1,9 +1,12 @@
+beautifulsoup4==4.13.4
+duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -15,5 +18,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt
index 6f9566ba..21805fe2 100644
--- a/requirements/portable/requirements_nowheels.txt
+++ b/requirements/portable/requirements_nowheels.txt
@@ -1,9 +1,12 @@
+beautifulsoup4==4.13.4
+duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt
index 9b7435d1..858b4488 100644
--- a/requirements/portable/requirements_vulkan.txt
+++ b/requirements/portable/requirements_vulkan.txt
@@ -1,9 +1,12 @@
+beautifulsoup4==4.13.4
+duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -15,5 +18,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt
index 513b7a15..569bae99 100644
--- a/requirements/portable/requirements_vulkan_noavx2.txt
+++ b/requirements/portable/requirements_vulkan_noavx2.txt
@@ -1,9 +1,12 @@
+beautifulsoup4==4.13.4
+duckduckgo_search==8.0.2
fastapi==0.112.4
gradio==4.37.*
jinja2==3.1.6
markdown
numpy==1.26.*
pydantic==2.8.2
+PyPDF2==3.0.1
pyyaml
requests
rich
@@ -15,5 +18,5 @@ sse-starlette==1.6.5
tiktoken
# CUDA wheels
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
-https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows"
+https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64"
diff --git a/user_data/settings-template.yaml b/user_data/settings-template.yaml
index db481e84..ce0f77e1 100644
--- a/user_data/settings-template.yaml
+++ b/user_data/settings-template.yaml
@@ -18,6 +18,7 @@ max_new_tokens_min: 1
max_new_tokens_max: 4096
prompt_lookup_num_tokens: 0
max_tokens_second: 0
+max_updates_second: 12
auto_max_new_tokens: true
ban_eos_token: false
add_bos_token: true