From 366de4b5611a344c708d76aeebb3b2d8c42a55c7 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 17 May 2025 17:11:38 -0700 Subject: [PATCH 01/61] UI: Fix the chat area height when "Show controls" is unchecked --- css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 3fec7bb0..1ad70122 100644 --- a/css/main.css +++ b/css/main.css @@ -827,7 +827,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { } #chat-col.bigchat { - padding-bottom: 80px !important; + padding-bottom: 15px !important; } .message-body ol, .message-body ul { From 076aa67963cd080837679662d79cb73326efb2ba Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 17 May 2025 22:14:14 -0700 Subject: [PATCH 02/61] Fix API issues --- extensions/openai/script.py | 34 ++++++++++++++++++++-------------- 1 file changed, 20 insertions(+), 14 deletions(-) diff --git a/extensions/openai/script.py b/extensions/openai/script.py index 2c98ee78..b7394bc5 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -114,14 +114,17 @@ async def openai_completions(request: Request, request_data: CompletionRequest): if request_data.stream: async def generator(): - async with streaming_semaphore: - response = OAIcompletions.stream_completions(to_dict(request_data), is_legacy=is_legacy) - async for resp in iterate_in_threadpool(response): - disconnected = await request.is_disconnected() - if disconnected: - break + try: + async with streaming_semaphore: + response = OAIcompletions.stream_completions(to_dict(request_data), is_legacy=is_legacy) + async for resp in iterate_in_threadpool(response): + disconnected = await request.is_disconnected() + if disconnected: + break - yield {"data": json.dumps(resp)} + yield {"data": json.dumps(resp)} + finally: + return return EventSourceResponse(generator()) # SSE streaming @@ -142,14 +145,17 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion if request_data.stream: async def generator(): - async with streaming_semaphore: - response = OAIcompletions.stream_chat_completions(to_dict(request_data), is_legacy=is_legacy) - async for resp in iterate_in_threadpool(response): - disconnected = await request.is_disconnected() - if disconnected: - break + try: + async with streaming_semaphore: + response = OAIcompletions.stream_chat_completions(to_dict(request_data), is_legacy=is_legacy) + async for resp in iterate_in_threadpool(response): + disconnected = await request.is_disconnected() + if disconnected: + break - yield {"data": json.dumps(resp)} + yield {"data": json.dumps(resp)} + finally: + return return EventSourceResponse(generator()) # SSE streaming From bd13a8f255ce7b637ee9e00fa7077752a6e56ca4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sat, 17 May 2025 22:31:55 -0700 Subject: [PATCH 03/61] UI: Light theme improvement --- css/main.css | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/css/main.css b/css/main.css index 1ad70122..dc0ba817 100644 --- a/css/main.css +++ b/css/main.css @@ -131,7 +131,7 @@ gradio-app > :first-child { } .header_bar { - box-shadow: 0 0 3px rgba(22 22 22 / 35%); + border-right: var(--input-border-width) solid var(--input-border-color); margin-bottom: 0; overflow-x: scroll; text-wrap: nowrap; @@ -1171,11 +1171,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { background-color: var(--light-theme-gray); } -#chat-controls { +.dark #chat-controls { border-left: 1px solid #d9d9d0; } -#past-chats-row { +.dark #past-chats-row { border-right: 1px solid #d9d9d0; } From f1ec6c8662c0c8a744c827aa8d99036983aca8cc Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 18 May 2025 09:04:51 -0700 Subject: [PATCH 04/61] Minor label changes --- modules/ui_chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 0856cfab..a0c37dad 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -79,8 +79,8 @@ def create_ui(): shared.gradio['Send dummy reply'] = gr.Button('Send dummy reply') with gr.Row(): - shared.gradio['send-chat-to-default'] = gr.Button('Send to default') - shared.gradio['send-chat-to-notebook'] = gr.Button('Send to notebook') + shared.gradio['send-chat-to-default'] = gr.Button('Send to Default') + shared.gradio['send-chat-to-notebook'] = gr.Button('Send to Notebook') with gr.Row(elem_id='chat-controls', elem_classes=['pretty_scrollbar']): with gr.Column(): From 2faaf18f1f9f4d29933017add849f8579021618c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 18 May 2025 09:06:20 -0700 Subject: [PATCH 05/61] Add back the "Common values" to the ctx-size slider --- modules/ui_model_menu.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 5b7dfdd8..85cf4189 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -39,7 +39,7 @@ def create_ui(): with gr.Row(): with gr.Column(): shared.gradio['gpu_layers'] = gr.Slider(label="gpu-layers", minimum=0, maximum=get_initial_gpu_layers_max(), step=1, value=shared.args.gpu_layers, info='Must be greater than 0 for the GPU to be used. ⚠️ Lower this value if you can\'t load the model.') - shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. ⚠️ Lower this value if you can\'t load the model.') + shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072. ⚠️ Lower this value if you can\'t load the model.') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).') shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend) From 9d7a36356d7de6b7557928d199ccc94aa9c8f99e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 18 May 2025 10:56:16 -0700 Subject: [PATCH 06/61] Remove unnecessary js that was causing scrolling issues --- js/main.js | 28 ---------------------------- 1 file changed, 28 deletions(-) diff --git a/js/main.js b/js/main.js index 6cecd341..01c346a7 100644 --- a/js/main.js +++ b/js/main.js @@ -132,8 +132,6 @@ targetElement.addEventListener("scroll", function() { // Create a MutationObserver instance const observer = new MutationObserver(function(mutations) { - updateCssProperties(); - if (targetElement.classList.contains("_generating")) { typing.parentNode.classList.add("visible-dots"); document.getElementById("stop").style.display = "flex"; @@ -446,32 +444,6 @@ const chatInput = document.querySelector("#chat-input textarea"); // Variables to store current dimensions let currentChatInputHeight = chatInput.clientHeight; -// Update chat layout based on chat and input dimensions -function updateCssProperties() { - const chatInputHeight = chatInput.clientHeight; - - // Check if the chat container is visible - if (chatContainer.clientHeight > 0) { - // Adjust scrollTop based on input height change - if (chatInputHeight !== currentChatInputHeight) { - const deltaHeight = chatInputHeight - currentChatInputHeight; - if (!isScrolled && deltaHeight < 0) { - chatContainer.scrollTop = chatContainer.scrollHeight; - } else { - chatContainer.scrollTop += deltaHeight; - } - - currentChatInputHeight = chatInputHeight; - } - } -} - -// Observe textarea size changes and call update function -new ResizeObserver(updateCssProperties).observe(document.querySelector("#chat-input textarea")); - -// Handle changes in window size -window.addEventListener("resize", updateCssProperties); - //------------------------------------------------ // Focus on the rename text area when it becomes visible //------------------------------------------------ From 126b3a768fa9af7f5318dbfd70b7e6ad00defc68 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 18 May 2025 12:38:36 -0700 Subject: [PATCH 07/61] Revert "Dynamic Chat Message UI Update Speed (#6952)" (for now) This reverts commit 8137eb8ef46ac6950cb96094e3cc30b0a72dee76. --- modules/shared.py | 1 + modules/text_generation.py | 18 ++++++++++-------- modules/ui.py | 1 + modules/ui_parameters.py | 2 ++ user_data/settings-template.yaml | 1 + 5 files changed, 15 insertions(+), 8 deletions(-) diff --git a/modules/shared.py b/modules/shared.py index 4e0a20db..a6c0cbe9 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -47,6 +47,7 @@ settings = { 'max_new_tokens_max': 4096, 'prompt_lookup_num_tokens': 0, 'max_tokens_second': 0, + 'max_updates_second': 12, 'auto_max_new_tokens': True, 'ban_eos_token': False, 'add_bos_token': True, diff --git a/modules/text_generation.py b/modules/text_generation.py index 00b9275a..962311df 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -65,39 +65,41 @@ def _generate_reply(question, state, stopping_strings=None, is_chat=False, escap all_stop_strings += st shared.stop_everything = False + last_update = -1 reply = '' is_stream = state['stream'] if len(all_stop_strings) > 0 and not state['stream']: state = copy.deepcopy(state) state['stream'] = True + min_update_interval = 0 + if state.get('max_updates_second', 0) > 0: + min_update_interval = 1 / state['max_updates_second'] + # Generate - last_update = -1 - latency_threshold = 1 / 1000 for reply in generate_func(question, original_question, state, stopping_strings, is_chat=is_chat): - cur_time = time.monotonic() reply, stop_found = apply_stopping_strings(reply, all_stop_strings) if escape_html: reply = html.escape(reply) if is_stream: + cur_time = time.time() + # Limit number of tokens/second to make text readable in real time if state['max_tokens_second'] > 0: diff = 1 / state['max_tokens_second'] - (cur_time - last_update) if diff > 0: time.sleep(diff) - last_update = time.monotonic() + last_update = time.time() yield reply # Limit updates to avoid lag in the Gradio UI # API updates are not limited else: - # If 'generate_func' takes less than 0.001 seconds to yield the next token - # (equivalent to more than 1000 tok/s), assume that the UI is lagging behind and skip yielding - if (cur_time - last_update) > latency_threshold: + if cur_time - last_update > min_update_interval: + last_update = cur_time yield reply - last_update = time.monotonic() if stop_found or (state['max_tokens_second'] > 0 and shared.stop_everything): break diff --git a/modules/ui.py b/modules/ui.py index eeb6ce92..25f93612 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -192,6 +192,7 @@ def list_interface_input_elements(): 'max_new_tokens', 'prompt_lookup_num_tokens', 'max_tokens_second', + 'max_updates_second', 'do_sample', 'dynamic_temperature', 'temperature_last', diff --git a/modules/ui_parameters.py b/modules/ui_parameters.py index 84f9fbfc..733d0901 100644 --- a/modules/ui_parameters.py +++ b/modules/ui_parameters.py @@ -71,6 +71,8 @@ def create_ui(default_preset): shared.gradio['max_new_tokens'] = gr.Slider(minimum=shared.settings['max_new_tokens_min'], maximum=shared.settings['max_new_tokens_max'], value=shared.settings['max_new_tokens'], step=1, label='max_new_tokens', info='⚠️ Setting this too high can cause prompt truncation.') shared.gradio['prompt_lookup_num_tokens'] = gr.Slider(value=shared.settings['prompt_lookup_num_tokens'], minimum=0, maximum=10, step=1, label='prompt_lookup_num_tokens', info='Activates Prompt Lookup Decoding.') shared.gradio['max_tokens_second'] = gr.Slider(value=shared.settings['max_tokens_second'], minimum=0, maximum=20, step=1, label='Maximum tokens/second', info='To make text readable in real time.') + shared.gradio['max_updates_second'] = gr.Slider(value=shared.settings['max_updates_second'], minimum=0, maximum=24, step=1, label='Maximum UI updates/second', info='Set this if you experience lag in the UI during streaming.') + with gr.Column(): with gr.Row(): with gr.Column(): diff --git a/user_data/settings-template.yaml b/user_data/settings-template.yaml index db481e84..ce0f77e1 100644 --- a/user_data/settings-template.yaml +++ b/user_data/settings-template.yaml @@ -18,6 +18,7 @@ max_new_tokens_min: 1 max_new_tokens_max: 4096 prompt_lookup_num_tokens: 0 max_tokens_second: 0 +max_updates_second: 12 auto_max_new_tokens: true ban_eos_token: false add_bos_token: true From 83bfd5c64b44e9eada63963e8aff05a608a7e90c Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 18 May 2025 12:45:01 -0700 Subject: [PATCH 08/61] Fix API issues --- extensions/openai/script.py | 18 ++++++++++-------- 1 file changed, 10 insertions(+), 8 deletions(-) diff --git a/extensions/openai/script.py b/extensions/openai/script.py index b7394bc5..b6abae20 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -114,8 +114,8 @@ async def openai_completions(request: Request, request_data: CompletionRequest): if request_data.stream: async def generator(): - try: - async with streaming_semaphore: + async with streaming_semaphore: + try: response = OAIcompletions.stream_completions(to_dict(request_data), is_legacy=is_legacy) async for resp in iterate_in_threadpool(response): disconnected = await request.is_disconnected() @@ -123,8 +123,9 @@ async def openai_completions(request: Request, request_data: CompletionRequest): break yield {"data": json.dumps(resp)} - finally: - return + finally: + stop_everything_event() + return return EventSourceResponse(generator()) # SSE streaming @@ -145,8 +146,8 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion if request_data.stream: async def generator(): - try: - async with streaming_semaphore: + async with streaming_semaphore: + try: response = OAIcompletions.stream_chat_completions(to_dict(request_data), is_legacy=is_legacy) async for resp in iterate_in_threadpool(response): disconnected = await request.is_disconnected() @@ -154,8 +155,9 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion break yield {"data": json.dumps(resp)} - finally: - return + finally: + stop_everything_event() + return return EventSourceResponse(generator()) # SSE streaming From 9cd6ea6c0b4b8fb99cf73d8fc0d1064db64fc2e8 Mon Sep 17 00:00:00 2001 From: Tiago Silva Date: Sun, 18 May 2025 22:07:16 +0100 Subject: [PATCH 09/61] Fix Dockerfile in AMD and Intel (#6995) --- README.md | 4 ++-- docker/amd/Dockerfile | 2 +- docker/amd/docker-compose.yml | 12 +----------- docker/intel/Dockerfile | 2 +- docker/intel/docker-compose.yml | 10 +--------- 5 files changed, 6 insertions(+), 24 deletions(-) diff --git a/README.md b/README.md index 0833f9b0..041513ac 100644 --- a/README.md +++ b/README.md @@ -146,14 +146,14 @@ The `requirements*.txt` above contain various wheels precompiled through GitHub For NVIDIA GPU: ln -s docker/{nvidia/Dockerfile,nvidia/docker-compose.yml,.dockerignore} . For AMD GPU: -ln -s docker/{amd/Dockerfile,intel/docker-compose.yml,.dockerignore} . +ln -s docker/{amd/Dockerfile,amd/docker-compose.yml,.dockerignore} . For Intel GPU: ln -s docker/{intel/Dockerfile,amd/docker-compose.yml,.dockerignore} . For CPU only ln -s docker/{cpu/Dockerfile,cpu/docker-compose.yml,.dockerignore} . cp docker/.env.example .env #Create logs/cache dir : -mkdir -p logs cache +mkdir -p user_data/logs user_data/cache # Edit .env and set: # TORCH_CUDA_ARCH_LIST based on your GPU model # APP_RUNTIME_GID your host user's group id (run `id -g` in a terminal) diff --git a/docker/amd/Dockerfile b/docker/amd/Dockerfile index 66e5863c..c23083f7 100644 --- a/docker/amd/Dockerfile +++ b/docker/amd/Dockerfile @@ -14,7 +14,7 @@ WORKDIR /home/app/ RUN git clone https://github.com/oobabooga/text-generation-webui.git WORKDIR /home/app/text-generation-webui RUN GPU_CHOICE=B LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose -COPY CMD_FLAGS.txt /home/app/text-generation-webui/ +COPY /user_data/CMD_FLAGS.txt /home/app/text-generation-webui/user_data EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005} WORKDIR /home/app/text-generation-webui # set umask to ensure group read / write at runtime diff --git a/docker/amd/docker-compose.yml b/docker/amd/docker-compose.yml index 8866e9ed..a727ca3e 100644 --- a/docker/amd/docker-compose.yml +++ b/docker/amd/docker-compose.yml @@ -41,14 +41,4 @@ services: security_opt: - seccomp=unconfined volumes: - - ./cache:/home/app/text-generation-webui/cache - - ./characters:/home/app/text-generation-webui/characters - - ./extensions:/home/app/text-generation-webui/extensions - - ./loras:/home/app/text-generation-webui/loras - - ./logs:/home/app/text-generation-webui/logs - - ./models:/home/app/text-generation-webui/models - - ./presets:/home/app/text-generation-webui/presets - - ./prompts:/home/app/text-generation-webui/prompts - - ./softprompts:/home/app/text-generation-webui/softprompts - - ./training:/home/app/text-generation-webui/training - - ./cloudflared:/etc/cloudflared + - ./user_data:/home/app/text-generation-webui/user_data diff --git a/docker/intel/Dockerfile b/docker/intel/Dockerfile index cab62442..4a709803 100644 --- a/docker/intel/Dockerfile +++ b/docker/intel/Dockerfile @@ -14,7 +14,7 @@ WORKDIR /home/app/ RUN git clone https://github.com/oobabooga/text-generation-webui.git WORKDIR /home/app/text-generation-webui RUN GPU_CHOICE=D LAUNCH_AFTER_INSTALL=FALSE INSTALL_EXTENSIONS=TRUE ./start_linux.sh --verbose -COPY CMD_FLAGS.txt /home/app/text-generation-webui/ +COPY /user_data/CMD_FLAGS.txt /home/app/text-generation-webui/user_data EXPOSE ${CONTAINER_PORT:-7860} ${CONTAINER_API_PORT:-5000} ${CONTAINER_API_STREAM_PORT:-5005} # set umask to ensure group read / write at runtime WORKDIR /home/app/text-generation-webui diff --git a/docker/intel/docker-compose.yml b/docker/intel/docker-compose.yml index 78e06698..bb48dd22 100644 --- a/docker/intel/docker-compose.yml +++ b/docker/intel/docker-compose.yml @@ -41,12 +41,4 @@ services: security_opt: - seccomp=unconfined volumes: - - ./characters:/home/app/text-generation-webui/characters - - ./extensions:/home/app/text-generation-webui/extensions - - ./loras:/home/app/text-generation-webui/loras - - ./models:/home/app/text-generation-webui/models - - ./presets:/home/app/text-generation-webui/presets - - ./prompts:/home/app/text-generation-webui/prompts - - ./softprompts:/home/app/text-generation-webui/softprompts - - ./training:/home/app/text-generation-webui/training - - ./cloudflared:/etc/cloudflared + - ./user_data:/home/app/text-generation-webui/user_data From 0c1bc6d1d07559e6518786948e728e5899a3471e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 18 May 2025 14:08:54 -0700 Subject: [PATCH 10/61] Bump llama.cpp --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 4 ++-- requirements/full/requirements_apple_silicon.txt | 6 +++--- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/full/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 4 ++-- requirements/portable/requirements_apple_silicon.txt | 6 +++--- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/portable/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- requirements/portable/requirements_vulkan_noavx2.txt | 4 ++-- 16 files changed, 34 insertions(+), 34 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index af5f7d8a..1dcf8c93 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -30,8 +30,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 4e011989..4a1702e9 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -29,7 +29,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index a3bd1350..0caca631 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -29,7 +29,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 363365bf..9a439798 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -29,7 +29,7 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 2843fed2..16e77264 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -29,8 +29,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index bd7c4a4f..468f97fa 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -29,5 +29,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index b5aa1cf7..eb7872ed 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -29,5 +29,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 89947cbe..3ba42c0b 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -30,8 +30,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index 79959398..6831c461 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index ca16e4c7..fbb77ec0 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 18e1c506..71575b28 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -15,6 +15,6 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 693f4712..d093ab14 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index 8635d11e..064d8e6c 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index e844596e..342239e8 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 9b7435d1..4ef3e97b 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 513b7a15..7b39feb1 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.12.0/llama_cpp_binaries-0.12.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From 0c7237e4b7c3de52f1de279134c12dcd0a41dcc9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 18 May 2025 20:01:29 -0700 Subject: [PATCH 11/61] Update README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 041513ac..ee5a04bf 100644 --- a/README.md +++ b/README.md @@ -14,8 +14,8 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [ExLlamaV2](https://github.com/turboderp-org/exllamav2). - [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) is also supported via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile). - - Additional quantization libraries like [AutoAWQ](https://github.com/casper-hansen/AutoAWQ), [AutoGPTQ](https://github.com/PanQiWei/AutoGPTQ), [HQQ](https://github.com/mobiusml/hqq), and [AQLM](https://github.com/Vahe1994/AQLM) can be used with the Transformers loader if you install them manually. -- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for llama.cpp GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory that doesn't interfere with your system environment. + - Additional quantization libraries like [HQQ](https://github.com/mobiusml/hqq) and [AQLM](https://github.com/Vahe1994/AQLM) can be used with the Transformers loader if you install them manually. +- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory that doesn't interfere with your system environment. - UI that resembles the original ChatGPT style. - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. From 9ec46b8c4485742140cdbe118354155b88b99019 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Mon, 19 May 2025 09:23:24 -0700 Subject: [PATCH 12/61] Remove the HQQ loader (HQQ models can be loaded through Transformers) --- README.md | 4 +--- modules/loaders.py | 6 ------ modules/models.py | 16 ---------------- modules/models_settings.py | 2 -- modules/shared.py | 8 +------- modules/ui.py | 1 - modules/ui_model_menu.py | 2 -- 7 files changed, 2 insertions(+), 37 deletions(-) diff --git a/README.md b/README.md index ee5a04bf..7105ce23 100644 --- a/README.md +++ b/README.md @@ -12,9 +12,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features -- Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), and [ExLlamaV2](https://github.com/turboderp-org/exllamav2). - - [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) is also supported via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile). - - Additional quantization libraries like [HQQ](https://github.com/mobiusml/hqq) and [AQLM](https://github.com/Vahe1994/AQLM) can be used with the Transformers loader if you install them manually. +- Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)). - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory that doesn't interfere with your system environment. - UI that resembles the original ChatGPT style. - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. diff --git a/modules/loaders.py b/modules/loaders.py index 79a7a4a3..6fbd2198 100644 --- a/modules/loaders.py +++ b/modules/loaders.py @@ -90,11 +90,6 @@ loaders_and_params = OrderedDict({ 'ctx_size_draft', 'speculative_decoding_accordion', ], - 'HQQ': [ - 'hqq_backend', - 'trust_remote_code', - 'no_use_fast', - ], 'TensorRT-LLM': [ 'ctx_size', 'cpp_runner', @@ -158,7 +153,6 @@ def transformers_samplers(): loaders_samplers = { 'Transformers': transformers_samplers(), - 'HQQ': transformers_samplers(), 'ExLlamav3_HF': { 'temperature', 'dynatemp_low', diff --git a/modules/models.py b/modules/models.py index 9ecee803..4218d58c 100644 --- a/modules/models.py +++ b/modules/models.py @@ -21,7 +21,6 @@ def load_model(model_name, loader=None): 'ExLlamav3_HF': ExLlamav3_HF_loader, 'ExLlamav2_HF': ExLlamav2_HF_loader, 'ExLlamav2': ExLlamav2_loader, - 'HQQ': HQQ_loader, 'TensorRT-LLM': TensorRT_LLM_loader, } @@ -102,21 +101,6 @@ def ExLlamav2_loader(model_name): return model, tokenizer -def HQQ_loader(model_name): - try: - from hqq.core.quantize import HQQBackend, HQQLinear - from hqq.models.hf.base import AutoHQQHFModel - except ModuleNotFoundError: - raise ModuleNotFoundError("Failed to import 'hqq'. Please install it manually following the instructions in the HQQ GitHub repository.") - - logger.info(f"Loading HQQ model with backend: \"{shared.args.hqq_backend}\"") - - model_dir = Path(f'{shared.args.model_dir}/{model_name}') - model = AutoHQQHFModel.from_quantized(str(model_dir)) - HQQLinear.set_backend(getattr(HQQBackend, shared.args.hqq_backend)) - return model - - def TensorRT_LLM_loader(model_name): try: from modules.tensorrt_llm import TensorRTLLMModel diff --git a/modules/models_settings.py b/modules/models_settings.py index 47dbc020..e742e0d8 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -183,8 +183,6 @@ def infer_loader(model_name, model_settings, hf_quant_method=None): loader = 'ExLlamav3_HF' elif re.match(r'.*exl2', model_name.lower()): loader = 'ExLlamav2_HF' - elif re.match(r'.*-hqq', model_name.lower()): - return 'HQQ' else: loader = 'Transformers' diff --git a/modules/shared.py b/modules/shared.py index a6c0cbe9..d2305f30 100644 --- a/modules/shared.py +++ b/modules/shared.py @@ -87,7 +87,7 @@ group.add_argument('--idle-timeout', type=int, default=0, help='Unload model aft # Model loader group = parser.add_argument_group('Model loader') -group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, HQQ, TensorRT-LLM.') +group.add_argument('--loader', type=str, help='Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, TensorRT-LLM.') # Transformers/Accelerate group = parser.add_argument_group('Transformers/Accelerate') @@ -152,10 +152,6 @@ group.add_argument('--no_sdpa', action='store_true', help='Force Torch SDPA to n group.add_argument('--num_experts_per_token', type=int, default=2, metavar='N', help='Number of experts to use for generation. Applies to MoE models like Mixtral.') group.add_argument('--enable_tp', action='store_true', help='Enable Tensor Parallelism (TP) in ExLlamaV2.') -# HQQ -group = parser.add_argument_group('HQQ') -group.add_argument('--hqq-backend', type=str, default='PYTORCH_COMPILE', help='Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN.') - # TensorRT-LLM group = parser.add_argument_group('TensorRT-LLM') group.add_argument('--cpp-runner', action='store_true', help='Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn\'t support streaming yet.') @@ -263,8 +259,6 @@ def fix_loader_name(name): return 'ExLlamav2_HF' elif name in ['exllamav3-hf', 'exllamav3_hf', 'exllama-v3-hf', 'exllama_v3_hf', 'exllama-v3_hf', 'exllama3-hf', 'exllama3_hf', 'exllama-3-hf', 'exllama_3_hf', 'exllama-3_hf']: return 'ExLlamav3_HF' - elif name in ['hqq']: - return 'HQQ' elif name in ['tensorrt', 'tensorrtllm', 'tensorrt_llm', 'tensorrt-llm', 'tensort', 'tensortllm']: return 'TensorRT-LLM' diff --git a/modules/ui.py b/modules/ui.py index 25f93612..f5dc0632 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -109,7 +109,6 @@ def list_model_elements(): 'threads', 'threads_batch', 'batch_size', - 'hqq_backend', 'ctx_size', 'cache_type', 'tensor_split', diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index 85cf4189..d361f692 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -42,8 +42,6 @@ def create_ui(): shared.gradio['ctx_size'] = gr.Slider(label='ctx-size', minimum=256, maximum=131072, step=256, value=shared.args.ctx_size, info='Context length. Common values: 4096, 8192, 16384, 32768, 65536, 131072. ⚠️ Lower this value if you can\'t load the model.') shared.gradio['gpu_split'] = gr.Textbox(label='gpu-split', info='Comma-separated list of VRAM (in GB) to use per GPU. Example: 20,7,7') shared.gradio['cache_type'] = gr.Dropdown(label="cache-type", choices=['fp16', 'q8_0', 'q4_0', 'fp8', 'q8', 'q7', 'q6', 'q5', 'q4', 'q3', 'q2'], value=shared.args.cache_type, allow_custom_value=True, info='Valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8. For ExLlamaV3, you can type custom combinations for separate k/v bits (e.g. q4_q8).') - shared.gradio['hqq_backend'] = gr.Dropdown(label="hqq_backend", choices=["PYTORCH", "PYTORCH_COMPILE", "ATEN"], value=shared.args.hqq_backend) - with gr.Column(): shared.gradio['vram_info'] = gr.HTML(value=get_initial_vram_info()) shared.gradio['flash_attn'] = gr.Checkbox(label="flash-attn", value=shared.args.flash_attn, info='Use flash-attention.') From 8e10f9894aee28088b176adc5bed91f80cc11b5c Mon Sep 17 00:00:00 2001 From: oobabooga Date: Tue, 20 May 2025 10:48:46 -0300 Subject: [PATCH 13/61] Add a metadata field to the chat history & add date/time to chat messages (#7003) --- css/main.css | 33 +++++++++++ modules/chat.py | 114 ++++++++++++++++++++++++++++++++++++-- modules/html_generator.py | 65 +++++++++++++++++++--- modules/ui_chat.py | 2 +- 4 files changed, 200 insertions(+), 14 deletions(-) diff --git a/css/main.css b/css/main.css index dc0ba817..319c1778 100644 --- a/css/main.css +++ b/css/main.css @@ -419,6 +419,14 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding-right: 1rem; } +.chat .message .timestamp { + font-size: 0.7em; + display: inline-block; + font-weight: normal; + opacity: 0.7; + margin-left: 5px; +} + .chat-parent.bigchat { flex: 1; } @@ -1269,6 +1277,31 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { left: 75px; } +.footer-button.footer-info-button { + bottom: -23px; +} + +.user-message .footer-button.footer-info-button { + left: 25px; +} + +.assistant-message:not(:last-child) .footer-button.footer-info-button { + left: 25px; +} + +.assistant-message:last-child .footer-button.footer-info-button { + left: 100px; +} + +.message:not(:last-child) .text-bot .footer-button.footer-info-button, +.message .text-you .footer-button.footer-info-button { + left: 25px; +} + +.message:last-child .text-bot .footer-button.footer-info-button { + left: 100px; +} + .message:hover .footer-button, .user-message:hover .footer-button, .assistant-message:hover .footer-button { diff --git a/modules/chat.py b/modules/chat.py index b83c4bfe..cbcde212 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -37,6 +37,30 @@ def strftime_now(format): return datetime.now().strftime(format) +def get_current_timestamp(): + """Returns the current time in 24-hour format""" + return datetime.now().strftime('%b %d, %Y %H:%M') + + +def update_message_metadata(metadata_dict, role, index, **fields): + """ + Updates or adds metadata fields for a specific message. + + Args: + metadata_dict: The metadata dictionary + role: The role (user, assistant, etc) + index: The message index + **fields: Arbitrary metadata fields to update/add + """ + key = f"{role}_{index}" + if key not in metadata_dict: + metadata_dict[key] = {} + + # Update with provided fields + for field_name, field_value in fields.items(): + metadata_dict[key][field_name] = field_value + + jinja_env = ImmutableSandboxedEnvironment( trim_blocks=True, lstrip_blocks=True, @@ -347,6 +371,10 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess output = apply_extensions('history', output) state = apply_extensions('state', state) + # Initialize metadata if not present + if 'metadata' not in output: + output['metadata'] = {} + visible_text = None stopping_strings = get_stopping_strings(state) is_stream = state['stream'] @@ -359,40 +387,56 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess text, visible_text = apply_extensions('chat_input', text, visible_text, state) text = apply_extensions('input', text, state, is_chat=True) + # Current row index + row_idx = len(output['internal']) output['internal'].append([text, '']) output['visible'].append([visible_text, '']) + # Add metadata with timestamp + update_message_metadata(output['metadata'], "user", row_idx, timestamp=get_current_timestamp()) # *Is typing...* if loading_message: yield { 'visible': output['visible'][:-1] + [[output['visible'][-1][0], shared.processing_message]], - 'internal': output['internal'] + 'internal': output['internal'], + 'metadata': output['metadata'] } else: text, visible_text = output['internal'][-1][0], output['visible'][-1][0] if regenerate: + row_idx = len(output['internal']) - 1 if loading_message: yield { 'visible': output['visible'][:-1] + [[visible_text, shared.processing_message]], - 'internal': output['internal'][:-1] + [[text, '']] + 'internal': output['internal'][:-1] + [[text, '']], + 'metadata': output['metadata'] } elif _continue: last_reply = [output['internal'][-1][1], output['visible'][-1][1]] if loading_message: yield { 'visible': output['visible'][:-1] + [[visible_text, last_reply[1] + '...']], - 'internal': output['internal'] + 'internal': output['internal'], + 'metadata': output['metadata'] } # Generate the prompt kwargs = { '_continue': _continue, - 'history': output if _continue else {k: v[:-1] for k, v in output.items()} + 'history': output if _continue else { + k: (v[:-1] if k in ['internal', 'visible'] else v) + for k, v in output.items() + } } + prompt = apply_extensions('custom_generate_chat_prompt', text, state, **kwargs) if prompt is None: prompt = generate_chat_prompt(text, state, **kwargs) + # Add timestamp for assistant's response at the start of generation + row_idx = len(output['internal']) - 1 + update_message_metadata(output['metadata'], "assistant", row_idx, timestamp=get_current_timestamp()) + # Generate reply = None for j, reply in enumerate(generate_reply(prompt, state, stopping_strings=stopping_strings, is_chat=True, for_ui=for_ui)): @@ -495,9 +539,19 @@ def generate_chat_reply_wrapper(text, state, regenerate=False, _continue=False): def remove_last_message(history): + if 'metadata' not in history: + history['metadata'] = {} + if len(history['visible']) > 0 and history['internal'][-1][0] != '<|BEGIN-VISIBLE-CHAT|>': + row_idx = len(history['internal']) - 1 last = history['visible'].pop() history['internal'].pop() + + # Remove metadata directly by known keys + if f"user_{row_idx}" in history['metadata']: + del history['metadata'][f"user_{row_idx}"] + if f"assistant_{row_idx}" in history['metadata']: + del history['metadata'][f"assistant_{row_idx}"] else: last = ['', ''] @@ -514,30 +568,54 @@ def send_last_reply_to_input(history): def replace_last_reply(text, state): history = state['history'] + # Initialize metadata if not present + if 'metadata' not in history: + history['metadata'] = {} + if len(text.strip()) == 0: return history elif len(history['visible']) > 0: + row_idx = len(history['internal']) - 1 history['visible'][-1][1] = html.escape(text) history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True) + update_message_metadata(history['metadata'], "assistant", row_idx, timestamp=get_current_timestamp()) return history def send_dummy_message(text, state): history = state['history'] + + # Initialize metadata if not present + if 'metadata' not in history: + history['metadata'] = {} + + row_idx = len(history['internal']) history['visible'].append([html.escape(text), '']) history['internal'].append([apply_extensions('input', text, state, is_chat=True), '']) + update_message_metadata(history['metadata'], "user", row_idx, timestamp=get_current_timestamp()) + return history def send_dummy_reply(text, state): history = state['history'] + + # Initialize metadata if not present + if 'metadata' not in history: + history['metadata'] = {} + if len(history['visible']) > 0 and not history['visible'][-1][1] == '': + row_idx = len(history['internal']) history['visible'].append(['', '']) history['internal'].append(['', '']) + # We don't need to add system metadata + row_idx = len(history['internal']) - 1 history['visible'][-1][1] = html.escape(text) history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True) + update_message_metadata(history['metadata'], "assistant", row_idx, timestamp=get_current_timestamp()) + return history @@ -547,7 +625,8 @@ def redraw_html(history, name1, name2, mode, style, character, reset_cache=False def start_new_chat(state): mode = state['mode'] - history = {'internal': [], 'visible': []} + # Initialize with empty metadata dictionary + history = {'internal': [], 'visible': [], 'metadata': {}} if mode != 'instruct': greeting = replace_character_names(state['greeting'], state['name1'], state['name2']) @@ -555,6 +634,9 @@ def start_new_chat(state): history['internal'] += [['<|BEGIN-VISIBLE-CHAT|>', greeting]] history['visible'] += [['', apply_extensions('output', html.escape(greeting), state, is_chat=True)]] + # Add timestamp for assistant's greeting + update_message_metadata(history['metadata'], "assistant", 0, timestamp=get_current_timestamp()) + unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S') save_history(history, unique_id, state['character_menu'], state['mode']) @@ -735,6 +817,16 @@ def load_history(unique_id, character, mode): 'visible': f['data_visible'] } + # Add metadata if it doesn't exist + if 'metadata' not in history: + history['metadata'] = {} + # Add placeholder timestamps for existing messages + for i, (user_msg, asst_msg) in enumerate(history['internal']): + if user_msg and user_msg != '<|BEGIN-VISIBLE-CHAT|>': + update_message_metadata(history['metadata'], "user", i, timestamp="") + if asst_msg: + update_message_metadata(history['metadata'], "assistant", i, timestamp="") + return history @@ -750,6 +842,16 @@ def load_history_json(file, history): 'visible': f['data_visible'] } + # Add metadata if it doesn't exist + if 'metadata' not in history: + history['metadata'] = {} + # Add placeholder timestamps + for i, (user_msg, asst_msg) in enumerate(history['internal']): + if user_msg and user_msg != '<|BEGIN-VISIBLE-CHAT|>': + update_message_metadata(history['metadata'], "user", i, timestamp="") + if asst_msg: + update_message_metadata(history['metadata'], "assistant", i, timestamp="") + return history except: return history @@ -1299,7 +1401,7 @@ def handle_your_picture_change(picture, state): def handle_send_instruction_click(state): state['mode'] = 'instruct' - state['history'] = {'internal': [], 'visible': []} + state['history'] = {'internal': [], 'visible': [], 'metadata': {}} output = generate_chat_prompt("Input", state) diff --git a/modules/html_generator.py b/modules/html_generator.py index 39659476..5dbde6da 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -169,11 +169,7 @@ def convert_to_markdown(string, message_id=None): thinking_block = f'''
- - - - - + {info_svg_small} {title_text}
{thinking_html}
@@ -339,11 +335,24 @@ copy_svg = '''''' continue_svg = '''''' remove_svg = '''''' +info_svg = '''''' +info_svg_small = '''''' copy_button = f'' refresh_button = f'' continue_button = f'' remove_button = f'' +info_button = f'' + + +def format_message_timestamp(history, role, index): + """Get a formatted timestamp HTML span for a message if available""" + key = f"{role}_{index}" + if 'metadata' in history and key in history['metadata'] and history['metadata'][key].get('timestamp'): + timestamp = history['metadata'][key]['timestamp'] + return f"{timestamp}" + + return "" def generate_instruct_html(history): @@ -354,6 +363,23 @@ def generate_instruct_html(history): row_internal = history['internal'][i] converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + # Get timestamps + user_timestamp = format_message_timestamp(history, "user", i) + assistant_timestamp = format_message_timestamp(history, "assistant", i) + + # Create info buttons for timestamps if they exist + info_message_user = "" + if user_timestamp != "": + # Extract the timestamp value from the span + user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0] + info_message_user = info_button.replace("message", user_timestamp_value) + + info_message_assistant = "" + if assistant_timestamp != "": + # Extract the timestamp value from the span + assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0] + info_message_assistant = info_button.replace("message", assistant_timestamp_value) + if converted_visible[0]: # Don't display empty user messages output += ( f'
' f'
{converted_visible[0]}
' f'{copy_button}' + f'{info_message_user}' f'
' f'' ) @@ -374,6 +401,7 @@ def generate_instruct_html(history): f'{refresh_button if i == len(history["visible"]) - 1 else ""}' f'{continue_button if i == len(history["visible"]) - 1 else ""}' f'{remove_button if i == len(history["visible"]) - 1 else ""}' + f'{info_message_assistant}' f'' f'' ) @@ -401,13 +429,17 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= row_internal = history['internal'][i] converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + # Get timestamps + user_timestamp = format_message_timestamp(history, "user", i) + assistant_timestamp = format_message_timestamp(history, "assistant", i) + if converted_visible[0]: # Don't display empty user messages output += ( f'
' f'
{img_me}
' f'
' - f'
{name1}
' + f'
{name1}{user_timestamp}
' f'
{converted_visible[0]}
' f'{copy_button}' f'
' @@ -419,7 +451,7 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= f'data-raw="{html.escape(row_internal[1], quote=True)}">' f'
{img_bot}
' f'
' - f'
{name2}
' + f'
{name2}{assistant_timestamp}
' f'
{converted_visible[1]}
' f'{copy_button}' f'{refresh_button if i == len(history["visible"]) - 1 else ""}' @@ -441,6 +473,23 @@ def generate_chat_html(history, name1, name2, reset_cache=False): row_internal = history['internal'][i] converted_visible = [convert_to_markdown_wrapped(entry, message_id=i, use_cache=i != len(history['visible']) - 1) for entry in row_visible] + # Get timestamps + user_timestamp = format_message_timestamp(history, "user", i) + assistant_timestamp = format_message_timestamp(history, "assistant", i) + + # Create info buttons for timestamps if they exist + info_message_user = "" + if user_timestamp != "": + # Extract the timestamp value from the span + user_timestamp_value = user_timestamp.split('>', 1)[1].split('<', 1)[0] + info_message_user = info_button.replace("message", user_timestamp_value) + + info_message_assistant = "" + if assistant_timestamp != "": + # Extract the timestamp value from the span + assistant_timestamp_value = assistant_timestamp.split('>', 1)[1].split('<', 1)[0] + info_message_assistant = info_button.replace("message", assistant_timestamp_value) + if converted_visible[0]: # Don't display empty user messages output += ( f'
' f'
{converted_visible[0]}
' f'{copy_button}' + f'{info_message_user}' f'
' f'
' ) @@ -461,6 +511,7 @@ def generate_chat_html(history, name1, name2, reset_cache=False): f'{refresh_button if i == len(history["visible"]) - 1 else ""}' f'{continue_button if i == len(history["visible"]) - 1 else ""}' f'{remove_button if i == len(history["visible"]) - 1 else ""}' + f'{info_message_assistant}' f'
' f'' ) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index a0c37dad..7a5430ca 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -47,7 +47,7 @@ def create_ui(): with gr.Row(): with gr.Column(elem_id='chat-col'): shared.gradio['display'] = gr.JSON(value={}, visible=False) # Hidden buffer - shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': []}, '', '', 'chat', 'cai-chat', '')['html'], visible=True) + shared.gradio['html_display'] = gr.HTML(value=chat_html_wrapper({'internal': [], 'visible': [], 'metadata': {}}, '', '', 'chat', 'cai-chat', '')['html'], visible=True) with gr.Row(elem_id="chat-input-row"): with gr.Column(scale=1, elem_id='gr-hover-container'): gr.HTML(value='
', elem_id='gr-hover') From c25a381540eb8c40e945730b058ca3e83fe0674c Mon Sep 17 00:00:00 2001 From: Daniel Dengler Date: Tue, 20 May 2025 16:07:40 +0200 Subject: [PATCH 14/61] Add a "Branch here" footer button to chat messages (#6967) --- css/main.css | 66 +++++++++------------------------------ js/global_scope_js.js | 31 ++++++++++++++++++ modules/chat.py | 10 ++++-- modules/html_generator.py | 47 +++++++++++++++------------- modules/ui.py | 1 + modules/ui_chat.py | 5 +-- 6 files changed, 83 insertions(+), 77 deletions(-) diff --git a/css/main.css b/css/main.css index 319c1778..d7142336 100644 --- a/css/main.css +++ b/css/main.css @@ -1244,67 +1244,31 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { position: relative; } -.footer-button { +/* New container for the buttons */ +.message-actions { position: absolute; + bottom: -23px; + left: 0; + display: flex; + gap: 5px; + opacity: 0; + transition: opacity 0.2s; +} + +.footer-button { padding: 0; margin: 0; border: none; border-radius: 3px; cursor: pointer; - opacity: 0; display: flex; align-items: center; - transition: opacity 0.2s; + justify-content: center; } -.footer-button.footer-copy-button { - bottom: -23px; - left: 0; -} - -.footer-button.footer-refresh-button { - bottom: -23px; - left: 25px; -} - -.footer-button.footer-continue-button { - bottom: -23px; - left: 50px; -} - -.footer-button.footer-remove-button { - bottom: -23px; - left: 75px; -} - -.footer-button.footer-info-button { - bottom: -23px; -} - -.user-message .footer-button.footer-info-button { - left: 25px; -} - -.assistant-message:not(:last-child) .footer-button.footer-info-button { - left: 25px; -} - -.assistant-message:last-child .footer-button.footer-info-button { - left: 100px; -} - -.message:not(:last-child) .text-bot .footer-button.footer-info-button, -.message .text-you .footer-button.footer-info-button { - left: 25px; -} - -.message:last-child .text-bot .footer-button.footer-info-button { - left: 100px; -} - -.message:hover .footer-button, -.user-message:hover .footer-button, -.assistant-message:hover .footer-button { +.message:hover .message-actions, +.user-message:hover .message-actions, +.assistant-message:hover .message-actions { opacity: 1; } diff --git a/js/global_scope_js.js b/js/global_scope_js.js index 29d2d8bd..285d82f9 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -18,6 +18,37 @@ function copyToClipboard(element) { }); } +function branchHere(element) { + if (!element) return; + + const messageElement = element.closest(".message, .user-message, .assistant-message"); + if (!messageElement) return; + + const index = messageElement.getAttribute("data-index"); + if (!index) return; + + const branchIndexInput = document.getElementById("Branch-index").querySelector("input"); + if (!branchIndexInput) { + console.error("Element with ID 'Branch-index' not found."); + return; + } + const branchButton = document.getElementById("Branch"); + + if (!branchButton) { + console.error("Required element 'Branch' not found."); + return; + } + + branchIndexInput.value = index; + + // Trigger any 'change' or 'input' events Gradio might be listening for + const event = new Event("input", { bubbles: true }); // 'change' might also work + branchIndexInput.dispatchEvent(event); + + branchButton.click(); // Gradio will now pick up the 'index' + +} + function regenerateClick() { document.getElementById("Regenerate").click(); } diff --git a/modules/chat.py b/modules/chat.py index cbcde212..13f733e9 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -1248,7 +1248,13 @@ def handle_delete_chat_confirm_click(state): def handle_branch_chat_click(state): - history = state['history'] + branch_from_index = state['branch_index'] + if branch_from_index == -1: + history = state['history'] + else: + history = state['history'] + history['visible'] = history['visible'][:branch_from_index + 1] + history['internal'] = history['internal'][:branch_from_index + 1] new_unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S') save_history(history, new_unique_id, state['character_menu'], state['mode']) @@ -1259,7 +1265,7 @@ def handle_branch_chat_click(state): past_chats_update = gr.update(choices=histories, value=new_unique_id) - return [history, html, past_chats_update] + return [history, html, past_chats_update, -1] def handle_rename_chat_click(): diff --git a/modules/html_generator.py b/modules/html_generator.py index 5dbde6da..36b31ac5 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -335,10 +335,12 @@ copy_svg = '''''' continue_svg = '''''' remove_svg = '''''' +branch_svg = '''''' info_svg = '''''' info_svg_small = '''''' copy_button = f'' +branch_button = f'' refresh_button = f'' continue_button = f'' remove_button = f'' @@ -355,6 +357,17 @@ def format_message_timestamp(history, role, index): return "" +def actions_html(history, i, info_message=""): + return (f'
' + f'{copy_button}' + f'{refresh_button if i == len(history["visible"]) - 1 else ""}' + f'{continue_button if i == len(history["visible"]) - 1 else ""}' + f'{remove_button if i == len(history["visible"]) - 1 else ""}' + f'{branch_button}' + f'{info_message}' + f'
') + + def generate_instruct_html(history): output = f'
' @@ -386,22 +399,18 @@ def generate_instruct_html(history): f'data-raw="{html.escape(row_internal[0], quote=True)}">' f'
' f'
{converted_visible[0]}
' - f'{copy_button}' - f'{info_message_user}' + f'
{copy_button}{info_message_user}
' f'
' f'
' ) output += ( f'
' + f'data-raw="{html.escape(row_internal[1], quote=True)}"' + f'data-index={i}>' f'
' f'
{converted_visible[1]}
' - f'{copy_button}' - f'{refresh_button if i == len(history["visible"]) - 1 else ""}' - f'{continue_button if i == len(history["visible"]) - 1 else ""}' - f'{remove_button if i == len(history["visible"]) - 1 else ""}' - f'{info_message_assistant}' + f'{actions_html(history, i, info_message_assistant)}' f'
' f'
' ) @@ -441,22 +450,20 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= f'
' f'
{name1}{user_timestamp}
' f'
{converted_visible[0]}
' - f'{copy_button}' + f'
{copy_button}
' f'
' f'
' ) output += ( f'
' + f'data-raw="{html.escape(row_internal[1], quote=True)}"' + f'data-index={i}>' f'
{img_bot}
' f'
' f'
{name2}{assistant_timestamp}
' f'
{converted_visible[1]}
' - f'{copy_button}' - f'{refresh_button if i == len(history["visible"]) - 1 else ""}' - f'{continue_button if i == len(history["visible"]) - 1 else ""}' - f'{remove_button if i == len(history["visible"]) - 1 else ""}' + f'{actions_html(history, i)}' f'
' f'
' ) @@ -496,22 +503,18 @@ def generate_chat_html(history, name1, name2, reset_cache=False): f'data-raw="{html.escape(row_internal[0], quote=True)}">' f'
' f'
{converted_visible[0]}
' - f'{copy_button}' - f'{info_message_user}' + f'
{copy_button}{info_message_user}
' f'
' f'
' ) output += ( f'
' + f'data-raw="{html.escape(row_internal[1], quote=True)}"' + f'data-index={i}>' f'
' f'
{converted_visible[1]}
' - f'{copy_button}' - f'{refresh_button if i == len(history["visible"]) - 1 else ""}' - f'{continue_button if i == len(history["visible"]) - 1 else ""}' - f'{remove_button if i == len(history["visible"]) - 1 else ""}' - f'{info_message_assistant}' + f'{actions_html(history, i, info_message_assistant)}' f'
' f'
' ) diff --git a/modules/ui.py b/modules/ui.py index f5dc0632..5e8fa14e 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -210,6 +210,7 @@ def list_interface_input_elements(): 'negative_prompt', 'dry_sequence_breakers', 'grammar_string', + 'branch_index' ] # Chat elements diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 7a5430ca..513a632b 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -24,7 +24,8 @@ def create_ui(): with gr.Row(elem_id='past-chats-row', elem_classes=['pretty_scrollbar']): with gr.Column(): with gr.Row(elem_id='past-chats-buttons'): - shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes='refresh-button', interactive=not mu) + shared.gradio['branch_chat'] = gr.Button('Branch', elem_classes='refresh-button', elem_id='Branch', interactive=not mu) + shared.gradio['branch_index'] = gr.Number(value=-1, precision=0, visible=False, elem_id="Branch-index", interactive=True) shared.gradio['rename_chat'] = gr.Button('Rename', elem_classes='refresh-button', interactive=not mu) shared.gradio['delete_chat'] = gr.Button('🗑️', elem_classes='refresh-button', interactive=not mu) shared.gradio['Start new chat'] = gr.Button('New chat', elem_classes=['refresh-button', 'focus-on-chat-input']) @@ -258,7 +259,7 @@ def create_event_handlers(): shared.gradio['branch_chat'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.handle_branch_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False) + chat.handle_branch_chat_click, gradio('interface_state'), gradio('history', 'display', 'unique_id', 'branch_index'), show_progress=False) shared.gradio['rename_chat'].click(chat.handle_rename_chat_click, None, gradio('rename_to', 'rename-row'), show_progress=False) shared.gradio['rename_to-cancel'].click(lambda: gr.update(visible=False), None, gradio('rename-row'), show_progress=False) From 616ea6966d4821357076ff0c3b0a37967b736dd1 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Tue, 20 May 2025 12:51:28 -0300 Subject: [PATCH 15/61] Store previous reply versions on regenerate (#7004) --- modules/chat.py | 37 +++++++++++++++++++++++++++++++++++++ 1 file changed, 37 insertions(+) diff --git a/modules/chat.py b/modules/chat.py index 13f733e9..3efc55db 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -365,6 +365,34 @@ def get_stopping_strings(state): return result +def add_message_version(history, row_idx, is_current=True): + """Add the current message as a version in the history metadata""" + if 'metadata' not in history: + history['metadata'] = {} + + if row_idx >= len(history['internal']) or not history['internal'][row_idx][1].strip(): + return # Skip if row doesn't exist or message is empty + + key = f"assistant_{row_idx}" + + # Initialize metadata structures if needed + if key not in history['metadata']: + history['metadata'][key] = {"timestamp": get_current_timestamp()} + if "versions" not in history['metadata'][key]: + history['metadata'][key]["versions"] = [] + + # Add current message as a version + history['metadata'][key]["versions"].append({ + "content": history['internal'][row_idx][1], + "visible_content": history['visible'][row_idx][1], + "timestamp": get_current_timestamp() + }) + + # Update index if this is the current version + if is_current: + history['metadata'][key]["current_version_index"] = len(history['metadata'][key]["versions"]) - 1 + + def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False): history = state['history'] output = copy.deepcopy(history) @@ -405,6 +433,10 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess text, visible_text = output['internal'][-1][0], output['visible'][-1][0] if regenerate: row_idx = len(output['internal']) - 1 + + # Store the existing response as a version before regenerating + add_message_version(output, row_idx, is_current=False) + if loading_message: yield { 'visible': output['visible'][:-1] + [[visible_text, shared.processing_message]], @@ -465,6 +497,11 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if is_stream: yield output + # Add the newly generated response as a version (only for regeneration) + if regenerate: + row_idx = len(output['internal']) - 1 + add_message_version(output, row_idx, is_current=True) + output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) yield output From 51c50b265d50a46b345b1b1d4afa55b5c94d5063 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 20 May 2025 11:15:38 -0700 Subject: [PATCH 16/61] Update llama.cpp to https://github.com/ggml-org/llama.cpp/commit/b7a17463ec190aeee7b9077c606c910fb4688b84 --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 4 ++-- requirements/full/requirements_apple_silicon.txt | 6 +++--- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/full/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 4 ++-- requirements/portable/requirements_apple_silicon.txt | 6 +++--- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/portable/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- requirements/portable/requirements_vulkan_noavx2.txt | 4 ++-- 16 files changed, 34 insertions(+), 34 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 1dcf8c93..c65ab8a2 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -30,8 +30,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 4a1702e9..3da16d3e 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -29,7 +29,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 0caca631..271b4bd0 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -29,7 +29,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 9a439798..15df937c 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -29,7 +29,7 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 16e77264..bd2f8339 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -29,8 +29,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 468f97fa..98c25649 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -29,5 +29,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index eb7872ed..6e13c1d2 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -29,5 +29,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 3ba42c0b..67a5cb73 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -30,8 +30,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index 6831c461..409252f6 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index fbb77ec0..89adbabf 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 71575b28..0b1c03fa 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -15,6 +15,6 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index d093ab14..eb4319b7 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index 064d8e6c..0a60d4de 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 342239e8..652e9900 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 4ef3e97b..c83d61c7 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 7b39feb1..e69f3bdf 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -15,5 +15,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.13.0/llama_cpp_binaries-0.13.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From 5d00574a566ac8c66af16f76c9cbda6696e46e00 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 20 May 2025 16:20:49 -0700 Subject: [PATCH 17/61] Minor UI fixes --- modules/models_settings.py | 4 ++-- modules/ui_model_menu.py | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/modules/models_settings.py b/modules/models_settings.py index e742e0d8..df5a8e8d 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -438,7 +438,7 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type, - If for_ui=False: (vram_usage, adjusted_layers) or just vram_usage """ if loader != 'llama.cpp' or model in ["None", None] or not model.endswith(".gguf"): - vram_info = "
Estimated VRAM to load the model:" + vram_info = "
Estimated VRAM to load the model:
" if for_ui: return (vram_info, gr.update()) if auto_adjust else vram_info else: @@ -480,7 +480,7 @@ def update_gpu_layers_and_vram(loader, model, gpu_layers, ctx_size, cache_type, vram_usage = estimate_vram(model, current_layers, ctx_size, cache_type) if for_ui: - vram_info = f"
Estimated VRAM to load the model: {vram_usage:.0f} MiB" + vram_info = f"
Estimated VRAM to load the model: {vram_usage:.0f} MiB
" if auto_adjust: return vram_info, gr.update(value=current_layers, maximum=max_layers) else: diff --git a/modules/ui_model_menu.py b/modules/ui_model_menu.py index d361f692..862b3893 100644 --- a/modules/ui_model_menu.py +++ b/modules/ui_model_menu.py @@ -310,7 +310,7 @@ def get_initial_vram_info(): for_ui=True ) - return "
Estimated VRAM to load the model:" + return "
Estimated VRAM to load the model:
" def get_initial_gpu_layers_max(): From 409a48d6bdd0f2bc861fc459cdd701d697bdd188 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Wed, 21 May 2025 00:36:20 -0300 Subject: [PATCH 18/61] Add attachments support (text files, PDF documents) (#7005) --- css/main.css | 56 ++++++++ modules/chat.py | 124 ++++++++++++++++-- modules/html_generator.py | 41 ++++++ modules/ui_chat.py | 6 +- requirements/full/requirements.txt | 1 + requirements/full/requirements_amd.txt | 1 + requirements/full/requirements_amd_noavx2.txt | 1 + .../full/requirements_apple_intel.txt | 1 + .../full/requirements_apple_silicon.txt | 1 + requirements/full/requirements_cpu_only.txt | 1 + .../full/requirements_cpu_only_noavx2.txt | 1 + requirements/full/requirements_noavx2.txt | 1 + requirements/full/requirements_nowheels.txt | 1 + requirements/portable/requirements.txt | 1 + .../portable/requirements_apple_intel.txt | 1 + .../portable/requirements_apple_silicon.txt | 1 + .../portable/requirements_cpu_only.txt | 1 + .../portable/requirements_cpu_only_noavx2.txt | 1 + requirements/portable/requirements_noavx2.txt | 1 + .../portable/requirements_nowheels.txt | 1 + requirements/portable/requirements_vulkan.txt | 1 + .../portable/requirements_vulkan_noavx2.txt | 1 + 22 files changed, 233 insertions(+), 12 deletions(-) diff --git a/css/main.css b/css/main.css index d7142336..6cb99fc3 100644 --- a/css/main.css +++ b/css/main.css @@ -592,6 +592,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { padding: 0.65rem 2.5rem; border: 0; box-shadow: 0; + border-radius: 8px; } #chat-input textarea::placeholder { @@ -611,6 +612,16 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { display: none; } +#chat-input .submit-button { + display: none; +} + +#chat-input .upload-button { + margin-right: 16px; + margin-bottom: 7px; + background: transparent; +} + .chat-input-positioned { max-width: 54rem; left: 50%; @@ -1395,3 +1406,48 @@ strong { .dark #vram-info .value { color: #07ff07; } + +.message-attachments { + display: flex; + flex-wrap: wrap; + gap: 8px; + margin-top: 8px; +} + +.attachment-box { + display: flex; + flex-direction: column; + align-items: center; + justify-content: center; + padding: 8px; + background: rgb(0 0 0 / 5%); + border-radius: 6px; + border: 1px solid rgb(0 0 0 / 10%); + min-width: 80px; + max-width: 120px; +} + +.attachment-icon { + margin-bottom: 4px; + color: #555; +} + +.attachment-name { + font-size: 0.8em; + text-align: center; + word-break: break-word; + overflow: hidden; + text-overflow: ellipsis; + display: -webkit-box; + -webkit-line-clamp: 2; + -webkit-box-orient: vertical; +} + +.dark .attachment-box { + background: rgb(255 255 255 / 5%); + border: 1px solid rgb(255 255 255 / 10%); +} + +.dark .attachment-icon { + color: #ccc; +} diff --git a/modules/chat.py b/modules/chat.py index 3efc55db..cdd50c92 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -157,7 +157,9 @@ def generate_chat_prompt(user_input, state, **kwargs): impersonate = kwargs.get('impersonate', False) _continue = kwargs.get('_continue', False) also_return_rows = kwargs.get('also_return_rows', False) - history = kwargs.get('history', state['history'])['internal'] + history_data = kwargs.get('history', state['history']) + history = history_data['internal'] + metadata = history_data.get('metadata', {}) # Templates chat_template_str = state['chat_template_str'] @@ -196,11 +198,13 @@ def generate_chat_prompt(user_input, state, **kwargs): messages.append({"role": "system", "content": context}) insert_pos = len(messages) - for entry in reversed(history): + for i, entry in enumerate(reversed(history)): user_msg = entry[0].strip() assistant_msg = entry[1].strip() tool_msg = entry[2].strip() if len(entry) > 2 else '' + row_idx = len(history) - i - 1 + if tool_msg: messages.insert(insert_pos, {"role": "tool", "content": tool_msg}) @@ -208,10 +212,40 @@ def generate_chat_prompt(user_input, state, **kwargs): messages.insert(insert_pos, {"role": "assistant", "content": assistant_msg}) if user_msg not in ['', '<|BEGIN-VISIBLE-CHAT|>']: - messages.insert(insert_pos, {"role": "user", "content": user_msg}) + # Check for user message attachments in metadata + user_key = f"user_{row_idx}" + enhanced_user_msg = user_msg + + # Add attachment content if present + if user_key in metadata and "attachments" in metadata[user_key]: + attachments_text = "" + for attachment in metadata[user_key]["attachments"]: + filename = attachment.get("name", "file") + content = attachment.get("content", "") + attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n" + + if attachments_text: + enhanced_user_msg = f"{user_msg}\n\nATTACHMENTS:{attachments_text}" + + messages.insert(insert_pos, {"role": "user", "content": enhanced_user_msg}) user_input = user_input.strip() if user_input and not impersonate and not _continue: + # For the current user input being processed, check if we need to add attachments + if not impersonate and not _continue and len(history_data.get('metadata', {})) > 0: + current_row_idx = len(history) + user_key = f"user_{current_row_idx}" + + if user_key in metadata and "attachments" in metadata[user_key]: + attachments_text = "" + for attachment in metadata[user_key]["attachments"]: + filename = attachment.get("name", "file") + content = attachment.get("content", "") + attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n" + + if attachments_text: + user_input = f"{user_input}\n\nATTACHMENTS:{attachments_text}" + messages.append({"role": "user", "content": user_input}) def make_prompt(messages): @@ -280,7 +314,6 @@ def generate_chat_prompt(user_input, state, **kwargs): # Resort to truncating the user input else: - user_message = messages[-1]['content'] # Bisect the truncation point @@ -393,7 +426,74 @@ def add_message_version(history, row_idx, is_current=True): history['metadata'][key]["current_version_index"] = len(history['metadata'][key]["versions"]) - 1 +def add_message_attachment(history, row_idx, file_path, is_user=True): + """Add a file attachment to a message in history metadata""" + if 'metadata' not in history: + history['metadata'] = {} + + key = f"{'user' if is_user else 'assistant'}_{row_idx}" + + if key not in history['metadata']: + history['metadata'][key] = {"timestamp": get_current_timestamp()} + if "attachments" not in history['metadata'][key]: + history['metadata'][key]["attachments"] = [] + + # Get file info using pathlib + path = Path(file_path) + filename = path.name + file_extension = path.suffix.lower() + + try: + # Handle different file types + if file_extension == '.pdf': + # Process PDF file + content = extract_pdf_text(path) + file_type = "application/pdf" + else: + # Default handling for text files + with open(path, 'r', encoding='utf-8') as f: + content = f.read() + file_type = "text/plain" + + # Add attachment + attachment = { + "name": filename, + "type": file_type, + "content": content, + } + + history['metadata'][key]["attachments"].append(attachment) + return content # Return the content for reuse + except Exception as e: + logger.error(f"Error processing attachment {filename}: {e}") + return None + + +def extract_pdf_text(pdf_path): + """Extract text from a PDF file""" + import PyPDF2 + + text = "" + try: + with open(pdf_path, 'rb') as file: + pdf_reader = PyPDF2.PdfReader(file) + for page_num in range(len(pdf_reader.pages)): + page = pdf_reader.pages[page_num] + text += page.extract_text() + "\n\n" + + return text.strip() + except Exception as e: + logger.error(f"Error extracting text from PDF: {e}") + return f"[Error extracting PDF text: {str(e)}]" + + def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False): + # Handle dict format with text and files + files = [] + if isinstance(text, dict): + files = text.get('files', []) + text = text.get('text', '') + history = state['history'] output = copy.deepcopy(history) output = apply_extensions('history', output) @@ -411,12 +511,18 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if not (regenerate or _continue): visible_text = html.escape(text) + # Process file attachments and store in metadata + row_idx = len(output['internal']) + + # Add attachments to metadata only, not modifying the message text + for file_path in files: + add_message_attachment(output, row_idx, file_path, is_user=True) + # Apply extensions text, visible_text = apply_extensions('chat_input', text, visible_text, state) text = apply_extensions('input', text, state, is_chat=True) # Current row index - row_idx = len(output['internal']) output['internal'].append([text, '']) output['visible'].append([visible_text, '']) # Add metadata with timestamp @@ -1215,7 +1321,7 @@ def handle_replace_last_reply_click(text, state): save_history(history, state['unique_id'], state['character_menu'], state['mode']) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - return [history, html, ""] + return [history, html, {"text": "", "files": []}] def handle_send_dummy_message_click(text, state): @@ -1223,7 +1329,7 @@ def handle_send_dummy_message_click(text, state): save_history(history, state['unique_id'], state['character_menu'], state['mode']) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - return [history, html, ""] + return [history, html, {"text": "", "files": []}] def handle_send_dummy_reply_click(text, state): @@ -1231,7 +1337,7 @@ def handle_send_dummy_reply_click(text, state): save_history(history, state['unique_id'], state['character_menu'], state['mode']) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - return [history, html, ""] + return [history, html, {"text": "", "files": []}] def handle_remove_last_click(state): @@ -1239,7 +1345,7 @@ def handle_remove_last_click(state): save_history(history, state['unique_id'], state['character_menu'], state['mode']) html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - return [history, html, last_input] + return [history, html, {"text": last_input, "files": []}] def handle_unique_id_select(state): diff --git a/modules/html_generator.py b/modules/html_generator.py index 36b31ac5..f5e0b28f 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -338,6 +338,7 @@ remove_svg = '''''' info_svg = '''''' info_svg_small = '''''' +attachment_svg = '''''' copy_button = f'' branch_button = f'' @@ -357,6 +358,28 @@ def format_message_timestamp(history, role, index): return "" +def format_message_attachments(history, role, index): + """Get formatted HTML for message attachments if available""" + key = f"{role}_{index}" + if 'metadata' in history and key in history['metadata'] and 'attachments' in history['metadata'][key]: + attachments = history['metadata'][key]['attachments'] + if not attachments: + return "" + + attachments_html = '
' + for attachment in attachments: + attachments_html += ( + f'
' + f'
{attachment_svg}
' + f'
{html.escape(attachment["name"])}
' + f'
' + ) + attachments_html += '
' + return attachments_html + + return "" + + def actions_html(history, i, info_message=""): return (f'
' f'{copy_button}' @@ -380,6 +403,10 @@ def generate_instruct_html(history): user_timestamp = format_message_timestamp(history, "user", i) assistant_timestamp = format_message_timestamp(history, "assistant", i) + # Get attachments + user_attachments = format_message_attachments(history, "user", i) + assistant_attachments = format_message_attachments(history, "assistant", i) + # Create info buttons for timestamps if they exist info_message_user = "" if user_timestamp != "": @@ -399,6 +426,7 @@ def generate_instruct_html(history): f'data-raw="{html.escape(row_internal[0], quote=True)}">' f'
' f'
{converted_visible[0]}
' + f'{user_attachments}' f'
{copy_button}{info_message_user}
' f'
' f'
' @@ -410,6 +438,7 @@ def generate_instruct_html(history): f'data-index={i}>' f'
' f'
{converted_visible[1]}
' + f'{assistant_attachments}' f'{actions_html(history, i, info_message_assistant)}' f'
' f'
' @@ -442,6 +471,10 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= user_timestamp = format_message_timestamp(history, "user", i) assistant_timestamp = format_message_timestamp(history, "assistant", i) + # Get attachments + user_attachments = format_message_attachments(history, "user", i) + assistant_attachments = format_message_attachments(history, "assistant", i) + if converted_visible[0]: # Don't display empty user messages output += ( f'
' f'
{name1}{user_timestamp}
' f'
{converted_visible[0]}
' + f'{user_attachments}' f'
{copy_button}
' f'
' f'
' @@ -463,6 +497,7 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= f'
' f'
{name2}{assistant_timestamp}
' f'
{converted_visible[1]}
' + f'{assistant_attachments}' f'{actions_html(history, i)}' f'
' f'
' @@ -484,6 +519,10 @@ def generate_chat_html(history, name1, name2, reset_cache=False): user_timestamp = format_message_timestamp(history, "user", i) assistant_timestamp = format_message_timestamp(history, "assistant", i) + # Get attachments + user_attachments = format_message_attachments(history, "user", i) + assistant_attachments = format_message_attachments(history, "assistant", i) + # Create info buttons for timestamps if they exist info_message_user = "" if user_timestamp != "": @@ -503,6 +542,7 @@ def generate_chat_html(history, name1, name2, reset_cache=False): f'data-raw="{html.escape(row_internal[0], quote=True)}">' f'
' f'
{converted_visible[0]}
' + f'{user_attachments}' f'
{copy_button}{info_message_user}
' f'
' f'' @@ -514,6 +554,7 @@ def generate_chat_html(history, name1, name2, reset_cache=False): f'data-index={i}>' f'
' f'
{converted_visible[1]}
' + f'{assistant_attachments}' f'{actions_html(history, i, info_message_assistant)}' f'
' f'' diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 513a632b..f244113c 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -54,7 +54,7 @@ def create_ui(): gr.HTML(value='
', elem_id='gr-hover') with gr.Column(scale=10, elem_id='chat-input-container'): - shared.gradio['textbox'] = gr.Textbox(label='', placeholder='Send a message', elem_id='chat-input', elem_classes=['add_scrollbar']) + shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf'], elem_id='chat-input', elem_classes=['add_scrollbar']) shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls') shared.gradio['typing-dots'] = gr.HTML(value='
', label='typing', elem_id='typing-container') @@ -186,7 +186,7 @@ def create_event_handlers(): shared.gradio['Generate'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( + lambda x: (x, {"text": "", "files": []}), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then( @@ -194,7 +194,7 @@ def create_event_handlers(): shared.gradio['textbox'].submit( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - lambda x: (x, ''), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( + lambda x: (x, {"text": "", "files": []}), gradio('textbox'), gradio('Chat input', 'textbox'), show_progress=False).then( lambda: None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.add("_generating")').then( chat.generate_chat_reply_wrapper, gradio(inputs), gradio('display', 'history'), show_progress=False).then( None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then( diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index c65ab8a2..afb5f9d4 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -13,6 +13,7 @@ peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 3da16d3e..46c33034 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -12,6 +12,7 @@ peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 271b4bd0..c8e94cbd 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -12,6 +12,7 @@ peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 15df937c..dc403ae2 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -12,6 +12,7 @@ peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index bd2f8339..5c643c4c 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -12,6 +12,7 @@ peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 98c25649..ccabea84 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -12,6 +12,7 @@ peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 6e13c1d2..7e9da47f 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -12,6 +12,7 @@ peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 67a5cb73..fdf5cd0e 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -13,6 +13,7 @@ peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 2e631bf0..22d39ded 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -12,6 +12,7 @@ peft==0.15.* Pillow>=9.5.0 psutil pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index 409252f6..ec9bafc6 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -4,6 +4,7 @@ jinja2==3.1.6 markdown numpy==1.26.* pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 89adbabf..025a737e 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -4,6 +4,7 @@ jinja2==3.1.6 markdown numpy==1.26.* pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 0b1c03fa..32644e87 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -4,6 +4,7 @@ jinja2==3.1.6 markdown numpy==1.26.* pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index eb4319b7..bd5c1d9b 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -4,6 +4,7 @@ jinja2==3.1.6 markdown numpy==1.26.* pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index 0a60d4de..51f2b7d9 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -4,6 +4,7 @@ jinja2==3.1.6 markdown numpy==1.26.* pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 652e9900..aad6bf5a 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -4,6 +4,7 @@ jinja2==3.1.6 markdown numpy==1.26.* pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt index 6f9566ba..4c055426 100644 --- a/requirements/portable/requirements_nowheels.txt +++ b/requirements/portable/requirements_nowheels.txt @@ -4,6 +4,7 @@ jinja2==3.1.6 markdown numpy==1.26.* pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index c83d61c7..3d98d1b0 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -4,6 +4,7 @@ jinja2==3.1.6 markdown numpy==1.26.* pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index e69f3bdf..f954b8d2 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -4,6 +4,7 @@ jinja2==3.1.6 markdown numpy==1.26.* pydantic==2.8.2 +PyPDF2==3.0.1 pyyaml requests rich From cc8a4fdcb114bfd068c42cea267e34daaf901a30 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 20 May 2025 21:31:18 -0700 Subject: [PATCH 19/61] Minor improvement to attachments prompt format --- modules/chat.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index cdd50c92..715f4327 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -225,7 +225,7 @@ def generate_chat_prompt(user_input, state, **kwargs): attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n" if attachments_text: - enhanced_user_msg = f"{user_msg}\n\nATTACHMENTS:{attachments_text}" + enhanced_user_msg = f"{user_msg}\n\nATTACHMENTS:\n{attachments_text}" messages.insert(insert_pos, {"role": "user", "content": enhanced_user_msg}) @@ -244,7 +244,7 @@ def generate_chat_prompt(user_input, state, **kwargs): attachments_text += f"\nName: {filename}\nContents:\n\n=====\n{content}\n=====\n\n" if attachments_text: - user_input = f"{user_input}\n\nATTACHMENTS:{attachments_text}" + user_input = f"{user_input}\n\nATTACHMENTS:\n{attachments_text}" messages.append({"role": "user", "content": user_input}) From 8620d6ffe73048932594494752f82cc4a20f8f92 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 20 May 2025 21:34:07 -0700 Subject: [PATCH 20/61] Make it possible to upload multiple text files/pdfs at once --- modules/ui_chat.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/ui_chat.py b/modules/ui_chat.py index f244113c..ab4b4e60 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -54,7 +54,7 @@ def create_ui(): gr.HTML(value='
', elem_id='gr-hover') with gr.Column(scale=10, elem_id='chat-input-container'): - shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf'], elem_id='chat-input', elem_classes=['add_scrollbar']) + shared.gradio['textbox'] = gr.MultimodalTextbox(label='', placeholder='Send a message', file_types=['text', '.pdf'], file_count="multiple", elem_id='chat-input', elem_classes=['add_scrollbar']) shared.gradio['show_controls'] = gr.Checkbox(value=shared.settings['show_controls'], label='Show controls (Ctrl+S)', elem_id='show-controls') shared.gradio['typing-dots'] = gr.HTML(value='
', label='typing', elem_id='typing-container') From 0d3f85477897c2999f456713ce998b59b26a6a22 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 20 May 2025 21:40:42 -0700 Subject: [PATCH 21/61] Improve the style of thinking blocks --- css/main.css | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/css/main.css b/css/main.css index 6cb99fc3..8444cae8 100644 --- a/css/main.css +++ b/css/main.css @@ -1370,6 +1370,11 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { contain: layout; } +.chat .message-body .thinking-content p, +.chat .message-body .thinking-content li { + font-size: 14px !important; +} + /* Animation for opening thinking blocks */ @keyframes fadeIn { from { opacity: 0; } From 7f6579ab20d8fd215e81f3b766f3aa9d83066bdb Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 20 May 2025 21:49:44 -0700 Subject: [PATCH 22/61] Minor style change --- css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 8444cae8..d1be8eb1 100644 --- a/css/main.css +++ b/css/main.css @@ -1372,7 +1372,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { .chat .message-body .thinking-content p, .chat .message-body .thinking-content li { - font-size: 14px !important; + font-size: 15px !important; } /* Animation for opening thinking blocks */ From bae1aa34aa020aa749f942708b96e28e2b85c4a4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Sun, 25 May 2025 17:19:26 -0700 Subject: [PATCH 23/61] Fix loading `Llama-3_3-Nemotron-Super-49B-v1` and similar models (closes #7012) --- modules/models_settings.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/models_settings.py b/modules/models_settings.py index df5a8e8d..c914bdea 100644 --- a/modules/models_settings.py +++ b/modules/models_settings.py @@ -335,7 +335,7 @@ def estimate_vram(gguf_file, gpu_layers, ctx_size, cache_type): if key.endswith('.block_count'): n_layers = value elif key.endswith('.attention.head_count_kv'): - n_kv_heads = value + n_kv_heads = max(value) if isinstance(value, list) else value elif key.endswith('.embedding_length'): embedding_dim = value From 73bfc936a078ce428cc10b590a83e0391b6aed58 Mon Sep 17 00:00:00 2001 From: djholtby Date: Mon, 26 May 2025 21:39:03 -0400 Subject: [PATCH 24/61] Close response generator when stopping API generation (#7014) --- extensions/openai/script.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/extensions/openai/script.py b/extensions/openai/script.py index b6abae20..24bcd69d 100644 --- a/extensions/openai/script.py +++ b/extensions/openai/script.py @@ -125,6 +125,7 @@ async def openai_completions(request: Request, request_data: CompletionRequest): yield {"data": json.dumps(resp)} finally: stop_everything_event() + response.close() return return EventSourceResponse(generator()) # SSE streaming @@ -157,6 +158,7 @@ async def openai_chat_completions(request: Request, request_data: ChatCompletion yield {"data": json.dumps(resp)} finally: stop_everything_event() + response.close() return return EventSourceResponse(generator()) # SSE streaming From 8531100109ecc4a5bed41cc2f3adaddf9d7157f8 Mon Sep 17 00:00:00 2001 From: Underscore <47636331+Th-Underscore@users.noreply.github.com> Date: Mon, 26 May 2025 21:40:09 -0400 Subject: [PATCH 25/61] Fix textbox text usage in methods (#7009) --- modules/chat.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 715f4327..36a07836 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -708,8 +708,9 @@ def send_last_reply_to_input(history): return '' -def replace_last_reply(text, state): +def replace_last_reply(textbox, state): history = state['history'] + text = textbox['text'] # Initialize metadata if not present if 'metadata' not in history: @@ -726,8 +727,9 @@ def replace_last_reply(text, state): return history -def send_dummy_message(text, state): +def send_dummy_message(textbox, state): history = state['history'] + text = textbox['text'] # Initialize metadata if not present if 'metadata' not in history: @@ -741,8 +743,9 @@ def send_dummy_message(text, state): return history -def send_dummy_reply(text, state): +def send_dummy_reply(textbox, state): history = state['history'] + text = textbox['text'] # Initialize metadata if not present if 'metadata' not in history: From cc9b7253c1216e5340da85cba9b65a13cf3526e9 Mon Sep 17 00:00:00 2001 From: "dependabot[bot]" <49699333+dependabot[bot]@users.noreply.github.com> Date: Mon, 26 May 2025 23:13:10 -0300 Subject: [PATCH 26/61] Update transformers requirement in /requirements/full (#7017) --- requirements/full/requirements.txt | 2 +- requirements/full/requirements_amd.txt | 2 +- requirements/full/requirements_amd_noavx2.txt | 2 +- requirements/full/requirements_apple_intel.txt | 2 +- requirements/full/requirements_apple_silicon.txt | 2 +- requirements/full/requirements_cpu_only.txt | 2 +- requirements/full/requirements_cpu_only_noavx2.txt | 2 +- requirements/full/requirements_noavx2.txt | 2 +- requirements/full/requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index afb5f9d4..3d18f5fd 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.52.* tqdm wandb diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 46c33034..82b19964 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.52.* tqdm wandb diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index c8e94cbd..a8b03014 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.52.* tqdm wandb diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index dc403ae2..5a61ac7d 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.52.* tqdm wandb diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 5c643c4c..6862c3b4 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.52.* tqdm wandb diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index ccabea84..e6982779 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.52.* tqdm wandb diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 7e9da47f..97bff786 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.52.* tqdm wandb diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index fdf5cd0e..17c7e246 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -21,7 +21,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.52.* tqdm wandb diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 22d39ded..89b32caf 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -20,7 +20,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.50.* +transformers==4.52.* tqdm wandb From 355b5f6c8b5552ccdae1aa363931724306bdbb16 Mon Sep 17 00:00:00 2001 From: Underscore <47636331+Th-Underscore@users.noreply.github.com> Date: Tue, 27 May 2025 21:54:18 -0400 Subject: [PATCH 27/61] UI: Add message version navigation (#6947) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> --- css/main.css | 41 ++++++++++++++++- js/global_scope_js.js | 38 ++++++++++++++++ js/main.js | 93 ++++++++++++++++++++++++++++++++++++++- modules/chat.py | 59 +++++++++++++++++++++++-- modules/html_generator.py | 27 +++++++++++- modules/ui.py | 2 + modules/ui_chat.py | 10 +++++ 7 files changed, 262 insertions(+), 8 deletions(-) diff --git a/css/main.css b/css/main.css index d1be8eb1..be27544c 100644 --- a/css/main.css +++ b/css/main.css @@ -1260,7 +1260,7 @@ div.svelte-362y77>*, div.svelte-362y77>.form>* { position: absolute; bottom: -23px; left: 0; - display: flex; + display: flex; gap: 5px; opacity: 0; transition: opacity 0.2s; @@ -1456,3 +1456,42 @@ strong { .dark .attachment-icon { color: #ccc; } + +/* --- Simple Version Navigation --- */ +.version-navigation { + position: absolute; + bottom: -23px; + right: 0; + display: flex; + align-items: center; + gap: 5px; + opacity: 0; + transition: opacity 0.2s; +} + +.message:hover .version-navigation, +.user-message:hover .version-navigation, +.assistant-message:hover .version-navigation { + opacity: 1; +} + +.version-nav-button { + padding: 2px 6px; + font-size: 12px; + min-width: auto; +} + +.version-nav-button[disabled] { + opacity: 0.3; + cursor: not-allowed; +} + +.version-position { + font-size: 11px; + color: currentColor; + font-family: monospace; + min-width: 35px; + text-align: center; + opacity: 0.8; + user-select: none; +} diff --git a/js/global_scope_js.js b/js/global_scope_js.js index 285d82f9..9174622e 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -49,6 +49,44 @@ function branchHere(element) { } +function navigateVersion(element, direction) { + if (!element) return; + + const messageElement = element.closest(".message, .user-message, .assistant-message"); + if (!messageElement) return; + + const index = messageElement.getAttribute("data-index"); + if (!index) return; + + const indexInput = document.getElementById("Navigate-message-index").querySelector("input"); + if (!indexInput) { + console.error("Element with ID 'Navigate-message-index' not found."); + return; + } + + const directionInput = document.getElementById("Navigate-direction").querySelector("textarea"); + if (!directionInput) { + console.error("Element with ID 'Navigate-direction' not found."); + return; + } + + const navigateButton = document.getElementById("Navigate-version"); + if (!navigateButton) { + console.error("Required element 'Navigate-version' not found."); + return; + } + + indexInput.value = index; + directionInput.value = direction; + + // Trigger any 'change' or 'input' events Gradio might be listening for + const event = new Event("input", { bubbles: true }); + indexInput.dispatchEvent(event); + directionInput.dispatchEvent(event); + + navigateButton.click(); +} + function regenerateClick() { document.getElementById("Regenerate").click(); } diff --git a/js/main.js b/js/main.js index 01c346a7..d90e8ade 100644 --- a/js/main.js +++ b/js/main.js @@ -39,9 +39,24 @@ document.querySelector(".header_bar").addEventListener("click", function(event) //------------------------------------------------ // Keyboard shortcuts //------------------------------------------------ + +// --- Helper functions --- // +function isModifiedKeyboardEvent() { + return (event instanceof KeyboardEvent && + event.shiftKey || + event.ctrlKey || + event.altKey || + event.metaKey); +} + +function isFocusedOnEditableTextbox() { + if (event.target.tagName === "INPUT" || event.target.tagName === "TEXTAREA") { + return !!event.target.value; + } +} + let previousTabId = "chat-tab-button"; document.addEventListener("keydown", function(event) { - // Stop generation on Esc pressed if (event.key === "Escape") { // Find the element with id 'stop' and click it @@ -49,10 +64,15 @@ document.addEventListener("keydown", function(event) { if (stopButton) { stopButton.click(); } + return; + } + + if (!document.querySelector("#chat-tab").checkVisibility() ) { + return; } // Show chat controls on Ctrl + S - else if (event.ctrlKey && event.key == "s") { + if (event.ctrlKey && event.key == "s") { event.preventDefault(); var showControlsElement = document.getElementById("show-controls"); @@ -100,6 +120,23 @@ document.addEventListener("keydown", function(event) { document.getElementById("Impersonate").click(); } + // --- Simple version navigation --- // + if (!isFocusedOnEditableTextbox()) { + // Version navigation on Arrow keys (horizontal) + if (!isModifiedKeyboardEvent() && event.key === "ArrowLeft") { + event.preventDefault(); + navigateLastAssistantMessage("left"); + } + + else if (!isModifiedKeyboardEvent() && event.key === "ArrowRight") { + event.preventDefault(); + if (!navigateLastAssistantMessage("right")) { + // If can't navigate right (last version), regenerate + document.getElementById("Regenerate").click(); + } + } + } + }); //------------------------------------------------ @@ -789,3 +826,55 @@ function createMobileTopBar() { } createMobileTopBar(); + +//------------------------------------------------ +// Simple Navigation Functions +//------------------------------------------------ + +function navigateLastAssistantMessage(direction) { + const chat = document.querySelector("#chat"); + if (!chat) return false; + + const messages = chat.querySelectorAll("[data-index]"); + if (messages.length === 0) return false; + + // Find the last assistant message (starting from the end) + let lastAssistantMessage = null; + for (let i = messages.length - 1; i >= 0; i--) { + const msg = messages[i]; + if ( + msg.classList.contains("assistant-message") || + msg.querySelector(".circle-bot") || + msg.querySelector(".text-bot") + ) { + lastAssistantMessage = msg; + break; + } + } + + if (!lastAssistantMessage) return false; + + const buttons = lastAssistantMessage.querySelectorAll(".version-nav-button"); + + for (let i = 0; i < buttons.length; i++) { + const button = buttons[i]; + const onclick = button.getAttribute("onclick"); + const disabled = button.hasAttribute("disabled"); + + const isLeft = onclick && onclick.includes("'left'"); + const isRight = onclick && onclick.includes("'right'"); + + if (!disabled) { + if (direction === "left" && isLeft) { + navigateVersion(button, direction); + return true; + } + if (direction === "right" && isRight) { + navigateVersion(button, direction); + return true; + } + } + } + + return false; +} diff --git a/modules/chat.py b/modules/chat.py index 36a07836..6eed47ee 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -414,10 +414,20 @@ def add_message_version(history, row_idx, is_current=True): if "versions" not in history['metadata'][key]: history['metadata'][key]["versions"] = [] + # Check if this version already exists + current_content = history['internal'][row_idx][1] + current_visible = history['visible'][row_idx][1] + + for i, version in enumerate(history['metadata'][key]["versions"]): + if version['content'] == current_content and version['visible_content'] == current_visible: + if is_current: + history['metadata'][key]["current_version_index"] = i + return + # Add current message as a version history['metadata'][key]["versions"].append({ - "content": history['internal'][row_idx][1], - "visible_content": history['visible'][row_idx][1], + "content": current_content, + "visible_content": current_visible, "timestamp": get_current_timestamp() }) @@ -540,8 +550,9 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if regenerate: row_idx = len(output['internal']) - 1 - # Store the existing response as a version before regenerating - add_message_version(output, row_idx, is_current=False) + # Store the first response as a version before regenerating + if not output['metadata'].get(f"assistant_{row_idx}", {}).get('versions'): + add_message_version(output, row_idx, is_current=False) if loading_message: yield { @@ -1414,6 +1425,46 @@ def handle_branch_chat_click(state): return [history, html, past_chats_update, -1] +def handle_navigate_version_click(state): + history = state['history'] + message_index = int(state['navigate_message_index']) + direction = state['navigate_direction'] + + # Get assistant message metadata + key = f"assistant_{message_index}" + if key not in history['metadata'] or 'versions' not in history['metadata'][key]: + # No versions to navigate + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + return [history, html] + + metadata = history['metadata'][key] + current_idx = metadata.get('current_version_index', 0) + versions = metadata['versions'] + + # Calculate new index + if direction == 'left': + new_idx = max(0, current_idx - 1) + else: # right + new_idx = min(len(versions) - 1, current_idx + 1) + + if new_idx == current_idx: + # No change needed + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + return [history, html] + + # Update history with new version + version = versions[new_idx] + history['internal'][message_index][1] = version['content'] + history['visible'][message_index][1] = version['visible_content'] + metadata['current_version_index'] = new_idx + + # Redraw and save + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + save_history(history, state['unique_id'], state['character_menu'], state['mode']) + + return [history, html] + + def handle_rename_chat_click(): return [ gr.update(value="My New Chat"), diff --git a/modules/html_generator.py b/modules/html_generator.py index f5e0b28f..1dfeb445 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -380,6 +380,30 @@ def format_message_attachments(history, role, index): return "" +def get_version_navigation_html(history, i): + """Generate simple navigation arrows for message versions""" + key = f"assistant_{i}" + metadata = history.get('metadata', {}) + + if key not in metadata or 'versions' not in metadata[key]: + return "" + + versions = metadata[key]['versions'] + current_idx = metadata[key].get('current_version_index', 0) + + if len(versions) <= 1: + return "" + + left_disabled = ' disabled' if current_idx == 0 else '' + right_disabled = ' disabled' if current_idx >= len(versions) - 1 else '' + + left_arrow = f'' + right_arrow = f'' + position = f'{current_idx + 1}/{len(versions)}' + + return f'
{left_arrow}{position}{right_arrow}
' + + def actions_html(history, i, info_message=""): return (f'
' f'{copy_button}' @@ -388,7 +412,8 @@ def actions_html(history, i, info_message=""): f'{remove_button if i == len(history["visible"]) - 1 else ""}' f'{branch_button}' f'{info_message}' - f'
') + f'
' + f'{get_version_navigation_html(history, i)}') def generate_instruct_html(history): diff --git a/modules/ui.py b/modules/ui.py index 5e8fa14e..52c095a2 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -157,6 +157,8 @@ def list_model_elements(): def list_interface_input_elements(): elements = [ + 'navigate_message_index', + 'navigate_direction', 'temperature', 'dynatemp_low', 'dynatemp_high', diff --git a/modules/ui_chat.py b/modules/ui_chat.py index ab4b4e60..7a9f6f76 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -97,6 +97,12 @@ def create_ui(): with gr.Row(): shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar']) + # Hidden elements for version navigation (similar to branch) + with gr.Row(visible=False): + shared.gradio['navigate_message_index'] = gr.Number(value=-1, precision=0, elem_id="Navigate-message-index") + shared.gradio['navigate_direction'] = gr.Textbox(value="", elem_id="Navigate-direction") + shared.gradio['navigate_version'] = gr.Button(elem_id="Navigate-version") + def create_chat_settings_ui(): mu = shared.args.multi_user @@ -293,6 +299,10 @@ def create_event_handlers(): shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False) shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False) + shared.gradio['navigate_version'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.handle_navigate_version_click, gradio('interface_state'), gradio('history', 'display'), show_progress=False) + # Save/delete a character shared.gradio['save_character'].click(chat.handle_save_character_click, gradio('name2'), gradio('save_character_filename', 'character_saver'), show_progress=False) shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter'), show_progress=False) From 5028480ebabf26ec44778588b4fbd019cd9456ed Mon Sep 17 00:00:00 2001 From: Underscore <47636331+Th-Underscore@users.noreply.github.com> Date: Tue, 27 May 2025 23:55:27 -0400 Subject: [PATCH 28/61] UI: Add footer buttons for editing messages (#7019) --------- Co-authored-by: oobabooga <112222186+oobabooga@users.noreply.github.com> --- css/main.css | 49 +++++++++++++- js/global_scope_js.js | 132 +++++++++++++++++++++++++++++++++++++- js/main.js | 26 ++++---- modules/chat.py | 80 +++++++++++++---------- modules/html_generator.py | 44 ++++++++----- modules/ui.py | 7 +- modules/ui_chat.py | 18 +++--- 7 files changed, 282 insertions(+), 74 deletions(-) diff --git a/css/main.css b/css/main.css index be27544c..9d68ba02 100644 --- a/css/main.css +++ b/css/main.css @@ -1457,6 +1457,53 @@ strong { color: #ccc; } +/* Message Editing Styles */ +.editing-textarea { + width: 100%; + min-height: 200px; + padding: 10px; + border-radius: 5px; + border: 1px solid #ccc; + background-color: var(--light-theme-gray); + font-family: inherit; + font-size: inherit; + resize: vertical; +} + +.dark .editing-textarea { + border: 1px solid var(--border-color-dark); + background-color: var(--darker-gray); +} + +.editing-textarea:focus { + outline: none; + border-color: var(--selected-item-color-dark); +} + +.edit-controls-container { + margin-top: 0; + display: flex; + gap: 8px; + padding-bottom: 8px; +} + +.edit-control-button { + padding: 6px 12px; + border: 1px solid #ccc; + border-radius: 4px; + cursor: pointer; + background-color: #f8f9fa; + color: #212529; + font-size: 12px; + margin: 0; +} + +.dark .edit-control-button { + border: 1px solid var(--border-color-dark); + background-color: var(--light-gray); + color: #efefef; +} + /* --- Simple Version Navigation --- */ .version-navigation { position: absolute; @@ -1488,7 +1535,7 @@ strong { .version-position { font-size: 11px; - color: currentColor; + color: currentcolor; font-family: monospace; min-width: 35px; text-align: center; diff --git a/js/global_scope_js.js b/js/global_scope_js.js index 9174622e..0e86d450 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -1,3 +1,7 @@ +// ------------------------------------------------- +// Event handlers +// ------------------------------------------------- + function copyToClipboard(element) { if (!element) return; @@ -42,11 +46,135 @@ function branchHere(element) { branchIndexInput.value = index; // Trigger any 'change' or 'input' events Gradio might be listening for - const event = new Event("input", { bubbles: true }); // 'change' might also work + const event = new Event("input", { bubbles: true }); branchIndexInput.dispatchEvent(event); - branchButton.click(); // Gradio will now pick up the 'index' + branchButton.click(); +} +// ------------------------------------------------- +// Message Editing Functions +// ------------------------------------------------- + +function editHere(buttonElement) { + if (!buttonElement) return; + + const messageElement = buttonElement.closest(".message, .user-message, .assistant-message"); + if (!messageElement) return; + + const messageBody = messageElement.querySelector(".message-body"); + if (!messageBody) return; + + // If already editing, focus the textarea + const existingTextarea = messageBody.querySelector(".editing-textarea"); + if (existingTextarea) { + existingTextarea.focus(); + return; + } + + // Determine role based on message element - handle different chat modes + const isUserMessage = messageElement.classList.contains("user-message") || + messageElement.querySelector(".text-you") !== null || + messageElement.querySelector(".circle-you") !== null; + + startEditing(messageElement, messageBody, isUserMessage); +} + +function startEditing(messageElement, messageBody, isUserMessage) { + const rawText = messageElement.getAttribute("data-raw") || messageBody.textContent; + const originalHTML = messageBody.innerHTML; + + // Create editing interface + const editingInterface = createEditingInterface(rawText); + + // Replace message content + messageBody.innerHTML = ""; + messageBody.appendChild(editingInterface.textarea); + messageBody.appendChild(editingInterface.controls); + + editingInterface.textarea.focus(); + editingInterface.textarea.setSelectionRange(rawText.length, rawText.length); + + // Setup event handlers + setupEditingHandlers(editingInterface.textarea, messageElement, originalHTML, messageBody, isUserMessage); +} + +function createEditingInterface(text) { + const textarea = document.createElement("textarea"); + textarea.value = text; + textarea.className = "editing-textarea"; + textarea.rows = Math.max(3, text.split("\n").length); + + const controls = document.createElement("div"); + controls.className = "edit-controls-container"; + + const saveButton = document.createElement("button"); + saveButton.textContent = "Save"; + saveButton.className = "edit-control-button"; + saveButton.type = "button"; + + const cancelButton = document.createElement("button"); + cancelButton.textContent = "Cancel"; + cancelButton.className = "edit-control-button edit-cancel-button"; + cancelButton.type = "button"; + + controls.appendChild(saveButton); + controls.appendChild(cancelButton); + + return { textarea, controls, saveButton, cancelButton }; +} + +function setupEditingHandlers(textarea, messageElement, originalHTML, messageBody, isUserMessage) { + const saveButton = messageBody.querySelector(".edit-control-button:not(.edit-cancel-button)"); + const cancelButton = messageBody.querySelector(".edit-cancel-button"); + + const submitEdit = () => { + const index = messageElement.getAttribute("data-index"); + if (!index || !submitMessageEdit(index, textarea.value, isUserMessage)) { + cancelEdit(); + } + }; + + const cancelEdit = () => { + messageBody.innerHTML = originalHTML; + }; + + // Event handlers + saveButton.onclick = submitEdit; + cancelButton.onclick = cancelEdit; + + textarea.onkeydown = (e) => { + if (e.key === "Enter" && !e.shiftKey) { + e.preventDefault(); + submitEdit(); + } else if (e.key === "Escape") { + e.preventDefault(); + cancelEdit(); + } + }; +} + +function submitMessageEdit(index, newText, isUserMessage) { + const editIndexInput = document.getElementById("Edit-message-index")?.querySelector("input"); + const editTextInput = document.getElementById("Edit-message-text")?.querySelector("textarea"); + const editRoleInput = document.getElementById("Edit-message-role")?.querySelector("textarea"); + const editButton = document.getElementById("Edit-message"); + + if (!editIndexInput || !editTextInput || !editRoleInput || !editButton) { + console.error("Edit elements not found"); + return false; + } + + editIndexInput.value = index; + editTextInput.value = newText; + editRoleInput.value = isUserMessage ? "user" : "assistant"; + + editIndexInput.dispatchEvent(new Event("input", { bubbles: true })); + editTextInput.dispatchEvent(new Event("input", { bubbles: true })); + editRoleInput.dispatchEvent(new Event("input", { bubbles: true })); + + editButton.click(); + return true; } function navigateVersion(element, direction) { diff --git a/js/main.js b/js/main.js index d90e8ade..fc014f66 100644 --- a/js/main.js +++ b/js/main.js @@ -1,3 +1,7 @@ +// ------------------------------------------------ +// Main +// ------------------------------------------------ + let main_parent = document.getElementById("chat-tab").parentNode; let extensions = document.getElementById("extensions"); @@ -102,18 +106,6 @@ document.addEventListener("keydown", function(event) { document.getElementById("Remove-last").click(); } - // Copy last on Ctrl + Shift + K - else if (event.ctrlKey && event.shiftKey && event.key === "K") { - event.preventDefault(); - document.getElementById("Copy-last").click(); - } - - // Replace last on Ctrl + Shift + L - else if (event.ctrlKey && event.shiftKey && event.key === "L") { - event.preventDefault(); - document.getElementById("Replace-last").click(); - } - // Impersonate on Ctrl + Shift + M else if (event.ctrlKey && event.shiftKey && event.key === "M") { event.preventDefault(); @@ -388,6 +380,16 @@ document.addEventListener("click", function (event) { } }); +document.addEventListener("dblclick", (event) => { + const messageElement = event.target.closest(".message, .user-message, .assistant-message"); + if (!messageElement) return; + + const editButton = messageElement.querySelector(".footer-edit-button"); + if (editButton) { + editButton.click(); + } +}); + //------------------------------------------------ // Relocate the "Show controls" checkbox //------------------------------------------------ diff --git a/modules/chat.py b/modules/chat.py index 6eed47ee..9598efa7 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -712,32 +712,6 @@ def remove_last_message(history): return html.unescape(last[0]), history -def send_last_reply_to_input(history): - if len(history['visible']) > 0: - return html.unescape(history['visible'][-1][1]) - else: - return '' - - -def replace_last_reply(textbox, state): - history = state['history'] - text = textbox['text'] - - # Initialize metadata if not present - if 'metadata' not in history: - history['metadata'] = {} - - if len(text.strip()) == 0: - return history - elif len(history['visible']) > 0: - row_idx = len(history['internal']) - 1 - history['visible'][-1][1] = html.escape(text) - history['internal'][-1][1] = apply_extensions('input', text, state, is_chat=True) - update_message_metadata(history['metadata'], "assistant", row_idx, timestamp=get_current_timestamp()) - - return history - - def send_dummy_message(textbox, state): history = state['history'] text = textbox['text'] @@ -1330,14 +1304,6 @@ def my_yaml_output(data): return result -def handle_replace_last_reply_click(text, state): - history = replace_last_reply(text, state) - save_history(history, state['unique_id'], state['character_menu'], state['mode']) - html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - - return [history, html, {"text": "", "files": []}] - - def handle_send_dummy_message_click(text, state): history = send_dummy_message(text, state) save_history(history, state['unique_id'], state['character_menu'], state['mode']) @@ -1425,6 +1391,52 @@ def handle_branch_chat_click(state): return [history, html, past_chats_update, -1] +def handle_edit_message_click(state): + history = state['history'] + message_index = int(state['edit_message_index']) + new_text = state['edit_message_text'] + role = state['edit_message_role'] # "user" or "assistant" + + if message_index >= len(history['internal']): + html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + return [history, html_output, gr.update()] + + # Use the role passed from frontend + is_user_msg = (role == "user") + role_idx = 0 if is_user_msg else 1 + + # For assistant messages, save the original version BEFORE updating content + if not is_user_msg: + if not history['metadata'].get(f"assistant_{message_index}", {}).get('versions'): + add_message_version(history, message_index, is_current=False) + + # NOW update the message content + history['internal'][message_index][role_idx] = apply_extensions('input', new_text, state, is_chat=True) + history['visible'][message_index][role_idx] = html.escape(new_text) + + # Branch if editing user message, add version if editing assistant message + if is_user_msg: + # Branch like branch-here + history['visible'] = history['visible'][:message_index + 1] + history['internal'] = history['internal'][:message_index + 1] + new_unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S') + save_history(history, new_unique_id, state['character_menu'], state['mode']) + histories = find_all_histories_with_first_prompts(state) + past_chats_update = gr.update(choices=histories, value=new_unique_id) + state['unique_id'] = new_unique_id + elif not is_user_msg: + # Add the new version as current + add_message_version(history, message_index, is_current=True) + past_chats_update = gr.update() + else: + past_chats_update = gr.update() + + save_history(history, state['unique_id'], state['character_menu'], state['mode']) + html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + + return [history, html_output, past_chats_update] + + def handle_navigate_version_click(state): history = state['history'] message_index = int(state['navigate_message_index']) diff --git a/modules/html_generator.py b/modules/html_generator.py index 1dfeb445..9a93555f 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -336,12 +336,14 @@ refresh_svg = '''''' remove_svg = '''''' branch_svg = '''''' +edit_svg = '''''' info_svg = '''''' info_svg_small = '''''' attachment_svg = '''''' copy_button = f'' branch_button = f'' +edit_button = f'' refresh_button = f'' continue_button = f'' remove_button = f'' @@ -404,16 +406,23 @@ def get_version_navigation_html(history, i): return f'
{left_arrow}{position}{right_arrow}
' -def actions_html(history, i, info_message=""): +def actions_html(history, i, role, info_message=""): + if role == "assistant": + return (f'
' + f'{copy_button}' + f'{edit_button}' + f'{refresh_button if i == len(history["visible"]) - 1 else ""}' + f'{continue_button if i == len(history["visible"]) - 1 else ""}' + f'{remove_button if i == len(history["visible"]) - 1 else ""}' + f'{branch_button}' + f'{info_message}' + f'
' + f'{get_version_navigation_html(history, i)}') return (f'
' f'{copy_button}' - f'{refresh_button if i == len(history["visible"]) - 1 else ""}' - f'{continue_button if i == len(history["visible"]) - 1 else ""}' - f'{remove_button if i == len(history["visible"]) - 1 else ""}' - f'{branch_button}' + f'{edit_button}' f'{info_message}' - f'
' - f'{get_version_navigation_html(history, i)}') + f'
') def generate_instruct_html(history): @@ -448,11 +457,12 @@ def generate_instruct_html(history): if converted_visible[0]: # Don't display empty user messages output += ( f'
' + f'data-raw="{html.escape(row_internal[0], quote=True)}"' + f'data-index={i}>' f'
' f'
{converted_visible[0]}
' f'{user_attachments}' - f'
{copy_button}{info_message_user}
' + f'{actions_html(history, i, "user", info_message_user)}' f'
' f'
' ) @@ -464,7 +474,7 @@ def generate_instruct_html(history): f'
' f'
{converted_visible[1]}
' f'{assistant_attachments}' - f'{actions_html(history, i, info_message_assistant)}' + f'{actions_html(history, i, "assistant", info_message_assistant)}' f'
' f'' ) @@ -503,13 +513,14 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= if converted_visible[0]: # Don't display empty user messages output += ( f'
' + f'data-raw="{html.escape(row_internal[0], quote=True)}"' + f'data-index={i}>' f'
{img_me}
' f'
' f'
{name1}{user_timestamp}
' f'
{converted_visible[0]}
' f'{user_attachments}' - f'
{copy_button}
' + f'{actions_html(history, i, "user")}' f'
' f'
' ) @@ -523,7 +534,7 @@ def generate_cai_chat_html(history, name1, name2, style, character, reset_cache= f'
{name2}{assistant_timestamp}
' f'
{converted_visible[1]}
' f'{assistant_attachments}' - f'{actions_html(history, i)}' + f'{actions_html(history, i, "assistant")}' f'' f'' ) @@ -564,11 +575,12 @@ def generate_chat_html(history, name1, name2, reset_cache=False): if converted_visible[0]: # Don't display empty user messages output += ( f'
' + f'data-raw="{html.escape(row_internal[0], quote=True)}"' + f'data-index={i}>' f'
' f'
{converted_visible[0]}
' f'{user_attachments}' - f'
{copy_button}{info_message_user}
' + f'{actions_html(history, i, "user", info_message_user)}' f'
' f'
' ) @@ -580,7 +592,7 @@ def generate_chat_html(history, name1, name2, reset_cache=False): f'
' f'
{converted_visible[1]}
' f'{assistant_attachments}' - f'{actions_html(history, i, info_message_assistant)}' + f'{actions_html(history, i, "assistant", info_message_assistant)}' f'
' f'' ) diff --git a/modules/ui.py b/modules/ui.py index 52c095a2..00393b53 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -212,7 +212,12 @@ def list_interface_input_elements(): 'negative_prompt', 'dry_sequence_breakers', 'grammar_string', - 'branch_index' + 'navigate_message_index', + 'navigate_direction', + 'edit_message_index', + 'edit_message_text', + 'edit_message_role', + 'branch_index', ] # Chat elements diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 7a9f6f76..2856ce1f 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -71,8 +71,6 @@ def create_ui(): shared.gradio['Remove last'] = gr.Button('Remove last reply (Ctrl + Shift + Backspace)', elem_id='Remove-last') with gr.Row(): - shared.gradio['Replace last reply'] = gr.Button('Replace last reply (Ctrl + Shift + L)', elem_id='Replace-last') - shared.gradio['Copy last reply'] = gr.Button('Copy last reply (Ctrl + Shift + K)', elem_id='Copy-last') shared.gradio['Impersonate'] = gr.Button('Impersonate (Ctrl + Shift + M)', elem_id='Impersonate') with gr.Row(): @@ -97,11 +95,15 @@ def create_ui(): with gr.Row(): shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar']) - # Hidden elements for version navigation (similar to branch) + # Hidden elements for version navigation and editing with gr.Row(visible=False): shared.gradio['navigate_message_index'] = gr.Number(value=-1, precision=0, elem_id="Navigate-message-index") shared.gradio['navigate_direction'] = gr.Textbox(value="", elem_id="Navigate-direction") shared.gradio['navigate_version'] = gr.Button(elem_id="Navigate-version") + shared.gradio['edit_message_index'] = gr.Number(value=-1, precision=0, elem_id="Edit-message-index") + shared.gradio['edit_message_text'] = gr.Textbox(value="", elem_id="Edit-message-text") + shared.gradio['edit_message_role'] = gr.Textbox(value="", elem_id="Edit-message-role") + shared.gradio['edit_message'] = gr.Button(elem_id="Edit-message") def create_chat_settings_ui(): @@ -228,10 +230,6 @@ def create_event_handlers(): None, None, None, js='() => document.getElementById("chat").parentNode.parentNode.parentNode.classList.remove("_generating")').then( None, None, None, js=f'() => {{{ui.audio_notification_js}}}') - shared.gradio['Replace last reply'].click( - ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.handle_replace_last_reply_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False) - shared.gradio['Send dummy message'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.handle_send_dummy_message_click, gradio('textbox', 'interface_state'), gradio('history', 'display', 'textbox'), show_progress=False) @@ -297,12 +295,16 @@ def create_event_handlers(): None, gradio('mode'), None, js="(mode) => {mode === 'instruct' ? document.getElementById('character-menu').parentNode.parentNode.style.display = 'none' : document.getElementById('character-menu').parentNode.parentNode.style.display = ''}") shared.gradio['chat_style'].change(chat.redraw_html, gradio(reload_arr), gradio('display'), show_progress=False) - shared.gradio['Copy last reply'].click(chat.send_last_reply_to_input, gradio('history'), gradio('textbox'), show_progress=False) shared.gradio['navigate_version'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.handle_navigate_version_click, gradio('interface_state'), gradio('history', 'display'), show_progress=False) + shared.gradio['edit_message'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.handle_edit_message_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False).then( + lambda: None, None, None, js='() => { const role = document.getElementById("Edit-message-role").querySelector("textarea").value; if (role === "user") document.getElementById("Regenerate").click(); }') + # Save/delete a character shared.gradio['save_character'].click(chat.handle_save_character_click, gradio('name2'), gradio('save_character_filename', 'character_saver'), show_progress=False) shared.gradio['delete_character'].click(lambda: gr.update(visible=True), None, gradio('character_deleter'), show_progress=False) From 2db36da979b539263deacbd3ac8b3f6dbba7f97f Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 27 May 2025 21:00:11 -0700 Subject: [PATCH 29/61] UI: Make scrollbars more discrete in dark mode --- css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 9d68ba02..90dd51bc 100644 --- a/css/main.css +++ b/css/main.css @@ -265,7 +265,7 @@ button { .dark .pretty_scrollbar::-webkit-scrollbar-thumb, .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover { - background: #ccc; + background: rgba(255, 255, 255, 0.2); border-radius: 10px; } From f6ca0ee0727bceac867d5a5bbea0c6d61fea35ea Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 27 May 2025 21:20:51 -0700 Subject: [PATCH 30/61] Fix regenerate sometimes not creating a new message version --- modules/chat.py | 24 +++++------------------- 1 file changed, 5 insertions(+), 19 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 9598efa7..59ca4d34 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -399,40 +399,26 @@ def get_stopping_strings(state): def add_message_version(history, row_idx, is_current=True): - """Add the current message as a version in the history metadata""" - if 'metadata' not in history: - history['metadata'] = {} - - if row_idx >= len(history['internal']) or not history['internal'][row_idx][1].strip(): - return # Skip if row doesn't exist or message is empty - key = f"assistant_{row_idx}" - - # Initialize metadata structures if needed if key not in history['metadata']: - history['metadata'][key] = {"timestamp": get_current_timestamp()} + history['metadata'][key] = {} + if "versions" not in history['metadata'][key]: history['metadata'][key]["versions"] = [] - # Check if this version already exists current_content = history['internal'][row_idx][1] current_visible = history['visible'][row_idx][1] - for i, version in enumerate(history['metadata'][key]["versions"]): - if version['content'] == current_content and version['visible_content'] == current_visible: - if is_current: - history['metadata'][key]["current_version_index"] = i - return - - # Add current message as a version + # Always add the current message as a new version entry. + # The timestamp will differentiate it even if content is identical to a previous version. history['metadata'][key]["versions"].append({ "content": current_content, "visible_content": current_visible, "timestamp": get_current_timestamp() }) - # Update index if this is the current version if is_current: + # Set the current_version_index to the newly added version (which is now the last one). history['metadata'][key]["current_version_index"] = len(history['metadata'][key]["versions"]) - 1 From 1b0e2d8750ee315086acb2738fab76ad28abadb8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Tue, 27 May 2025 22:36:24 -0700 Subject: [PATCH 31/61] UI: Add a token counter to the chat tab (counts input + history) --- css/main.css | 7 ++++++ modules/chat.py | 54 +++++++++++++++++++++++++++++++++++++++++++++- modules/ui_chat.py | 9 ++++++++ 3 files changed, 69 insertions(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 90dd51bc..6e030453 100644 --- a/css/main.css +++ b/css/main.css @@ -1542,3 +1542,10 @@ strong { opacity: 0.8; user-select: none; } + +.token-display { + font-family: monospace; + font-size: 13px; + color: var(--body-text-color-subdued); + margin-top: 4px; +} diff --git a/modules/chat.py b/modules/chat.py index 59ca4d34..498c0d88 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -230,7 +230,15 @@ def generate_chat_prompt(user_input, state, **kwargs): messages.insert(insert_pos, {"role": "user", "content": enhanced_user_msg}) user_input = user_input.strip() - if user_input and not impersonate and not _continue: + + # Check if we have attachments even with empty input + has_attachments = False + if not impersonate and not _continue and len(history_data.get('metadata', {})) > 0: + current_row_idx = len(history) + user_key = f"user_{current_row_idx}" + has_attachments = user_key in metadata and "attachments" in metadata[user_key] + + if (user_input or has_attachments) and not impersonate and not _continue: # For the current user input being processed, check if we need to add attachments if not impersonate and not _continue and len(history_data.get('metadata', {})) > 0: current_row_idx = len(history) @@ -350,6 +358,50 @@ def generate_chat_prompt(user_input, state, **kwargs): return prompt +def count_prompt_tokens(text_input, state): + """Count tokens for current history + input including attachments""" + if shared.tokenizer is None: + return "Tokenizer not available" + + try: + # Handle dict format with text and files + files = [] + if isinstance(text_input, dict): + files = text_input.get('files', []) + text = text_input.get('text', '') + else: + text = text_input + files = [] + + # Create temporary history copy to add attachments + temp_history = copy.deepcopy(state['history']) + if 'metadata' not in temp_history: + temp_history['metadata'] = {} + + # Process attachments if any + if files: + row_idx = len(temp_history['internal']) + for file_path in files: + add_message_attachment(temp_history, row_idx, file_path, is_user=True) + + # Create temp state with modified history + temp_state = copy.deepcopy(state) + temp_state['history'] = temp_history + + # Build prompt using existing logic + prompt = generate_chat_prompt(text, temp_state) + current_tokens = get_encoded_length(prompt) + max_tokens = temp_state['truncation_length'] + + percentage = (current_tokens / max_tokens) * 100 if max_tokens > 0 else 0 + + return f"History + Input:
{current_tokens:,} / {max_tokens:,} tokens ({percentage:.1f}%)" + + except Exception as e: + logger.error(f"Error counting tokens: {e}") + return f"Error: {str(e)}" + + def get_stopping_strings(state): stopping_strings = [] renderers = [] diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 2856ce1f..952a40a5 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -95,6 +95,11 @@ def create_ui(): with gr.Row(): shared.gradio['chat-instruct_command'] = gr.Textbox(value=shared.settings['chat-instruct_command'], lines=12, label='Command for chat-instruct mode', info='<|character|> and <|prompt|> get replaced with the bot name and the regular chat prompt respectively.', visible=shared.settings['mode'] == 'chat-instruct', elem_classes=['add_scrollbar']) + with gr.Row(): + shared.gradio['count_tokens'] = gr.Button('Count tokens', size='sm') + + shared.gradio['token_display'] = gr.HTML(value='', elem_classes='token-display') + # Hidden elements for version navigation and editing with gr.Row(visible=False): shared.gradio['navigate_message_index'] = gr.Number(value=-1, precision=0, elem_id="Navigate-message-index") @@ -360,3 +365,7 @@ def create_event_handlers(): None, None, None, js=f'() => {{{ui.switch_tabs_js}; switch_to_notebook()}}') shared.gradio['show_controls'].change(None, gradio('show_controls'), None, js=f'(x) => {{{ui.show_controls_js}; toggle_controls(x)}}') + + shared.gradio['count_tokens'].click( + ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( + chat.count_prompt_tokens, gradio('textbox', 'interface_state'), gradio('token_display'), show_progress=False) From 077bbc6b101f8f6045b95369bc82373187741d12 Mon Sep 17 00:00:00 2001 From: oobabooga Date: Wed, 28 May 2025 04:27:28 -0300 Subject: [PATCH 32/61] Add web search support (#7023) --- modules/chat.py | 4 + modules/ui.py | 6 +- modules/ui_chat.py | 12 ++ modules/web_search.py | 125 ++++++++++++++++++ requirements/full/requirements.txt | 2 + requirements/full/requirements_amd.txt | 2 + requirements/full/requirements_amd_noavx2.txt | 2 + .../full/requirements_apple_intel.txt | 2 + .../full/requirements_apple_silicon.txt | 2 + requirements/full/requirements_cpu_only.txt | 2 + .../full/requirements_cpu_only_noavx2.txt | 2 + requirements/full/requirements_noavx2.txt | 2 + requirements/full/requirements_nowheels.txt | 2 + requirements/portable/requirements.txt | 2 + .../portable/requirements_apple_intel.txt | 2 + .../portable/requirements_apple_silicon.txt | 2 + .../portable/requirements_cpu_only.txt | 2 + .../portable/requirements_cpu_only_noavx2.txt | 2 + requirements/portable/requirements_noavx2.txt | 2 + .../portable/requirements_nowheels.txt | 2 + requirements/portable/requirements_vulkan.txt | 2 + .../portable/requirements_vulkan_noavx2.txt | 2 + 22 files changed, 181 insertions(+), 2 deletions(-) create mode 100644 modules/web_search.py diff --git a/modules/chat.py b/modules/chat.py index 498c0d88..b2aacd5c 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -31,6 +31,7 @@ from modules.text_generation import ( get_max_prompt_length ) from modules.utils import delete_file, get_available_characters, save_file +from modules.web_search import add_web_search_attachments def strftime_now(format): @@ -566,6 +567,9 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess for file_path in files: add_message_attachment(output, row_idx, file_path, is_user=True) + # Add web search results as attachments if enabled + add_web_search_attachments(output, row_idx, text, state) + # Apply extensions text, visible_text = apply_extensions('chat_input', text, visible_text, state) text = apply_extensions('input', text, state, is_chat=True) diff --git a/modules/ui.py b/modules/ui.py index 00393b53..e24e6402 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -157,8 +157,6 @@ def list_model_elements(): def list_interface_input_elements(): elements = [ - 'navigate_message_index', - 'navigate_direction', 'temperature', 'dynatemp_low', 'dynatemp_high', @@ -218,6 +216,10 @@ def list_interface_input_elements(): 'edit_message_text', 'edit_message_role', 'branch_index', + 'enable_web_search', + 'web_search_pages', + 'navigate_message_index', + 'navigate_direction', ] # Chat elements diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 952a40a5..719af85a 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -86,6 +86,12 @@ def create_ui(): with gr.Row(): shared.gradio['start_with'] = gr.Textbox(label='Start reply with', placeholder='Sure thing!', value=shared.settings['start_with'], elem_classes=['add_scrollbar']) + with gr.Row(): + shared.gradio['enable_web_search'] = gr.Checkbox(value=shared.settings.get('enable_web_search', False), label='Activate web search') + + with gr.Row(visible=shared.settings.get('enable_web_search', False)) as shared.gradio['web_search_row']: + shared.gradio['web_search_pages'] = gr.Number(value=shared.settings.get('web_search_pages', 3), precision=0, label='Number of pages to download', minimum=1, maximum=10) + with gr.Row(): shared.gradio['mode'] = gr.Radio(choices=['instruct', 'chat-instruct', 'chat'], value=shared.settings['mode'] if shared.settings['mode'] in ['chat', 'chat-instruct'] else None, label='Mode', info='Defines how the chat prompt is generated. In instruct and chat-instruct modes, the instruction template Parameters > Instruction template is used.', elem_id='chat-mode') @@ -369,3 +375,9 @@ def create_event_handlers(): shared.gradio['count_tokens'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( chat.count_prompt_tokens, gradio('textbox', 'interface_state'), gradio('token_display'), show_progress=False) + + shared.gradio['enable_web_search'].change( + lambda x: gr.update(visible=x), + gradio('enable_web_search'), + gradio('web_search_row') + ) diff --git a/modules/web_search.py b/modules/web_search.py new file mode 100644 index 00000000..e7688ba4 --- /dev/null +++ b/modules/web_search.py @@ -0,0 +1,125 @@ +from datetime import datetime + +import requests +from bs4 import BeautifulSoup +from duckduckgo_search import DDGS + +from modules.logging_colors import logger +from modules.text_generation import generate_reply + + +def get_current_timestamp(): + """Returns the current time in 24-hour format""" + return datetime.now().strftime('%b %d, %Y %H:%M') + + +def generate_search_query(user_message, state): + """Generate a search query from user message using the LLM""" + search_prompt = f"{user_message}\n\n=====\n\nPlease turn the message above into a short web search query in the same language as the message. Respond with only the search query, nothing else." + + # Use a minimal state for search query generation + search_state = state.copy() + search_state['max_new_tokens'] = 64 + search_state['temperature'] = 0.1 + + query = "" + for reply in generate_reply(search_prompt, search_state, stopping_strings=[], is_chat=False): + query = reply.strip() + + return query + + +def download_web_page(url, timeout=10): + """Download and extract text from a web page""" + try: + headers = { + 'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/91.0.4472.124 Safari/537.36' + } + response = requests.get(url, headers=headers, timeout=timeout) + response.raise_for_status() + + soup = BeautifulSoup(response.content, 'html.parser') + + # Remove script and style elements + for script in soup(["script", "style"]): + script.decompose() + + # Get text and clean it up + text = soup.get_text() + lines = (line.strip() for line in text.splitlines()) + chunks = (phrase.strip() for line in lines for phrase in line.split(" ")) + text = ' '.join(chunk for chunk in chunks if chunk) + + return text + except Exception as e: + logger.error(f"Error downloading {url}: {e}") + return f"[Error downloading content from {url}: {str(e)}]" + + +def perform_web_search(query, num_pages=3): + """Perform web search and return results with content""" + try: + with DDGS() as ddgs: + results = list(ddgs.text(query, max_results=num_pages)) + + search_results = [] + for i, result in enumerate(results): + url = result.get('href', '') + title = result.get('title', f'Search Result {i+1}') + + # Download page content + content = download_web_page(url) + + search_results.append({ + 'title': title, + 'url': url, + 'content': content + }) + + return search_results + except Exception as e: + logger.error(f"Error performing web search: {e}") + return [] + + +def add_web_search_attachments(history, row_idx, user_message, state): + """Perform web search and add results as attachments""" + if not state.get('enable_web_search', False): + return + + try: + # Generate search query + search_query = generate_search_query(user_message, state) + if not search_query: + logger.warning("Failed to generate search query") + return + + logger.info(f"Generated search query: {search_query}") + + # Perform web search + num_pages = int(state.get('web_search_pages', 3)) + search_results = perform_web_search(search_query, num_pages) + + if not search_results: + logger.warning("No search results found") + return + + # Add search results as attachments + key = f"user_{row_idx}" + if key not in history['metadata']: + history['metadata'][key] = {"timestamp": get_current_timestamp()} + if "attachments" not in history['metadata'][key]: + history['metadata'][key]["attachments"] = [] + + for result in search_results: + attachment = { + "name": f"{result['title']}", + "type": "text/html", + "content": f"URL: {result['url']}\n\n{result['content']}" + } + history['metadata'][key]["attachments"].append(attachment) + + logger.info(f"Added {len(search_results)} web search results as attachments") + + except Exception as e: + logger.error(f"Error in web search: {e}") diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 3d18f5fd..0eaf10da 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -1,7 +1,9 @@ accelerate==1.5.* +beautifulsoup4==4.13.4 bitsandbytes==0.45.* colorama datasets +duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 82b19964..65f184bf 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -1,6 +1,8 @@ accelerate==1.5.* +beautifulsoup4==4.13.4 colorama datasets +duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index a8b03014..d20b2ec3 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -1,6 +1,8 @@ accelerate==1.5.* +beautifulsoup4==4.13.4 colorama datasets +duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 5a61ac7d..2613d787 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -1,6 +1,8 @@ accelerate==1.5.* +beautifulsoup4==4.13.4 colorama datasets +duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 6862c3b4..af583b00 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -1,6 +1,8 @@ accelerate==1.5.* +beautifulsoup4==4.13.4 colorama datasets +duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index e6982779..9bf2a37d 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -1,6 +1,8 @@ accelerate==1.5.* +beautifulsoup4==4.13.4 colorama datasets +duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 97bff786..1731448e 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -1,6 +1,8 @@ accelerate==1.5.* +beautifulsoup4==4.13.4 colorama datasets +duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index 17c7e246..fc481a1a 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -1,7 +1,9 @@ accelerate==1.5.* +beautifulsoup4==4.13.4 bitsandbytes==0.45.* colorama datasets +duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 89b32caf..2ed8affa 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -1,6 +1,8 @@ accelerate==1.5.* +beautifulsoup4==4.13.4 colorama datasets +duckduckgo_search==8.0.2 einops fastapi==0.112.4 gradio==4.37.* diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index ec9bafc6..fdae681d 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -1,3 +1,5 @@ +beautifulsoup4==4.13.4 +duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* jinja2==3.1.6 diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 025a737e..a58f39f7 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -1,3 +1,5 @@ +beautifulsoup4==4.13.4 +duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* jinja2==3.1.6 diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 32644e87..91ea3a6d 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -1,3 +1,5 @@ +beautifulsoup4==4.13.4 +duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* jinja2==3.1.6 diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index bd5c1d9b..37e5aa40 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -1,3 +1,5 @@ +beautifulsoup4==4.13.4 +duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* jinja2==3.1.6 diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index 51f2b7d9..dcb2884b 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -1,3 +1,5 @@ +beautifulsoup4==4.13.4 +duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* jinja2==3.1.6 diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index aad6bf5a..8f1295bb 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -1,3 +1,5 @@ +beautifulsoup4==4.13.4 +duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* jinja2==3.1.6 diff --git a/requirements/portable/requirements_nowheels.txt b/requirements/portable/requirements_nowheels.txt index 4c055426..21805fe2 100644 --- a/requirements/portable/requirements_nowheels.txt +++ b/requirements/portable/requirements_nowheels.txt @@ -1,3 +1,5 @@ +beautifulsoup4==4.13.4 +duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* jinja2==3.1.6 diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 3d98d1b0..858b4488 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -1,3 +1,5 @@ +beautifulsoup4==4.13.4 +duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* jinja2==3.1.6 diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index f954b8d2..569bae99 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -1,3 +1,5 @@ +beautifulsoup4==4.13.4 +duckduckgo_search==8.0.2 fastapi==0.112.4 gradio==4.37.* jinja2==3.1.6 From 75c6ae8502cae60bd8dabef1e2af4aec5766ca35 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 28 May 2025 00:29:17 -0700 Subject: [PATCH 33/61] UI: Don't edit messages on double click --- js/main.js | 10 ---------- 1 file changed, 10 deletions(-) diff --git a/js/main.js b/js/main.js index fc014f66..48bb8632 100644 --- a/js/main.js +++ b/js/main.js @@ -380,16 +380,6 @@ document.addEventListener("click", function (event) { } }); -document.addEventListener("dblclick", (event) => { - const messageElement = event.target.closest(".message, .user-message, .assistant-message"); - if (!messageElement) return; - - const editButton = messageElement.querySelector(".footer-edit-button"); - if (editButton) { - editButton.click(); - } -}); - //------------------------------------------------ // Relocate the "Show controls" checkbox //------------------------------------------------ From 0aedb8992165b386dac244baeb5fb5967513869e Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 28 May 2025 00:35:20 -0700 Subject: [PATCH 34/61] UI: Small style improvement to attachments --- css/main.css | 1 + 1 file changed, 1 insertion(+) diff --git a/css/main.css b/css/main.css index 6e030453..181a19b8 100644 --- a/css/main.css +++ b/css/main.css @@ -1417,6 +1417,7 @@ strong { flex-wrap: wrap; gap: 8px; margin-top: 8px; + padding-bottom: 6px; } .attachment-box { From 6c3590ba9ab0bd540097a50986a59f0099d11d92 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 28 May 2025 05:28:15 -0700 Subject: [PATCH 35/61] Make web search attachments clickable --- modules/html_generator.py | 8 +++++++- modules/web_search.py | 5 +++-- 2 files changed, 10 insertions(+), 3 deletions(-) diff --git a/modules/html_generator.py b/modules/html_generator.py index 9a93555f..bfb278cd 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -370,10 +370,16 @@ def format_message_attachments(history, role, index): attachments_html = '
' for attachment in attachments: + name = html.escape(attachment["name"]) + + # Make clickable if URL exists + if "url" in attachment: + name = f'{name}' + attachments_html += ( f'
' f'
{attachment_svg}
' - f'
{html.escape(attachment["name"])}
' + f'
{name}
' f'
' ) attachments_html += '
' diff --git a/modules/web_search.py b/modules/web_search.py index e7688ba4..d3387ac9 100644 --- a/modules/web_search.py +++ b/modules/web_search.py @@ -113,9 +113,10 @@ def add_web_search_attachments(history, row_idx, user_message, state): for result in search_results: attachment = { - "name": f"{result['title']}", + "name": result['title'], "type": "text/html", - "content": f"URL: {result['url']}\n\n{result['content']}" + "url": result['url'], + "content": result['content'] } history['metadata'][key]["attachments"].append(attachment) From 27641ac1823751165615a1a53b62ae24977e37a0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 28 May 2025 17:09:05 -0700 Subject: [PATCH 36/61] UI: Make message editing work the same for user and assistant messages --- js/global_scope_js.js | 28 ++++++------ modules/chat.py | 94 ++++++++++++++++++++------------------- modules/html_generator.py | 42 ++++++++++------- modules/ui.py | 3 +- modules/ui_chat.py | 4 +- 5 files changed, 94 insertions(+), 77 deletions(-) diff --git a/js/global_scope_js.js b/js/global_scope_js.js index 0e86d450..3274f47e 100644 --- a/js/global_scope_js.js +++ b/js/global_scope_js.js @@ -186,31 +186,33 @@ function navigateVersion(element, direction) { const index = messageElement.getAttribute("data-index"); if (!index) return; - const indexInput = document.getElementById("Navigate-message-index").querySelector("input"); - if (!indexInput) { - console.error("Element with ID 'Navigate-message-index' not found."); - return; - } - - const directionInput = document.getElementById("Navigate-direction").querySelector("textarea"); - if (!directionInput) { - console.error("Element with ID 'Navigate-direction' not found."); - return; + // Determine role based on message element classes + let role = "assistant"; // Default role + if (messageElement.classList.contains("user-message") || + messageElement.querySelector(".text-you") || + messageElement.querySelector(".circle-you")) { + role = "user"; } + const indexInput = document.getElementById("Navigate-message-index")?.querySelector("input"); + const directionInput = document.getElementById("Navigate-direction")?.querySelector("textarea"); + const roleInput = document.getElementById("Navigate-message-role")?.querySelector("textarea"); const navigateButton = document.getElementById("Navigate-version"); - if (!navigateButton) { - console.error("Required element 'Navigate-version' not found."); + + if (!indexInput || !directionInput || !roleInput || !navigateButton) { + console.error("Navigation control elements (index, direction, role, or button) not found."); return; } indexInput.value = index; directionInput.value = direction; + roleInput.value = role; - // Trigger any 'change' or 'input' events Gradio might be listening for + // Trigger 'input' events for Gradio to pick up changes const event = new Event("input", { bubbles: true }); indexInput.dispatchEvent(event); directionInput.dispatchEvent(event); + roleInput.dispatchEvent(event); navigateButton.click(); } diff --git a/modules/chat.py b/modules/chat.py index b2aacd5c..8bac680c 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -451,19 +451,21 @@ def get_stopping_strings(state): return result -def add_message_version(history, row_idx, is_current=True): - key = f"assistant_{row_idx}" +def add_message_version(history, role, row_idx, is_current=True): + key = f"{role}_{row_idx}" + if 'metadata' not in history: + history['metadata'] = {} if key not in history['metadata']: history['metadata'][key] = {} if "versions" not in history['metadata'][key]: history['metadata'][key]["versions"] = [] - current_content = history['internal'][row_idx][1] - current_visible = history['visible'][row_idx][1] + # Determine which index to use for content based on role + content_idx = 0 if role == 'user' else 1 + current_content = history['internal'][row_idx][content_idx] + current_visible = history['visible'][row_idx][content_idx] - # Always add the current message as a new version entry. - # The timestamp will differentiate it even if content is identical to a previous version. history['metadata'][key]["versions"].append({ "content": current_content, "visible_content": current_visible, @@ -594,7 +596,7 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess # Store the first response as a version before regenerating if not output['metadata'].get(f"assistant_{row_idx}", {}).get('versions'): - add_message_version(output, row_idx, is_current=False) + add_message_version(output, "assistant", row_idx, is_current=False) if loading_message: yield { @@ -656,12 +658,13 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if is_stream: yield output + output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) + # Add the newly generated response as a version (only for regeneration) if regenerate: row_idx = len(output['internal']) - 1 - add_message_version(output, row_idx, is_current=True) + add_message_version(output, "assistant", row_idx, is_current=True) - output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) yield output @@ -1441,37 +1444,35 @@ def handle_edit_message_click(state): if message_index >= len(history['internal']): html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - return [history, html_output, gr.update()] + return [history, html_output, gr.update()] # No unique_id change - # Use the role passed from frontend - is_user_msg = (role == "user") - role_idx = 0 if is_user_msg else 1 + role_idx = 0 if role == "user" else 1 - # For assistant messages, save the original version BEFORE updating content - if not is_user_msg: - if not history['metadata'].get(f"assistant_{message_index}", {}).get('versions'): - add_message_version(history, message_index, is_current=False) + if 'metadata' not in history: + history['metadata'] = {} + + key = f"{role}_{message_index}" + if key not in history['metadata']: + history['metadata'][key] = {} + + # If no versions exist yet for this message, store the current (pre-edit) content as the first version. + if "versions" not in history['metadata'][key] or not history['metadata'][key]["versions"]: + original_content = history['internal'][message_index][role_idx] + original_visible = history['visible'][message_index][role_idx] + + history['metadata'][key]["versions"] = [{ + "content": original_content, + "visible_content": original_visible, + "timestamp": get_current_timestamp() + }] - # NOW update the message content history['internal'][message_index][role_idx] = apply_extensions('input', new_text, state, is_chat=True) history['visible'][message_index][role_idx] = html.escape(new_text) - # Branch if editing user message, add version if editing assistant message - if is_user_msg: - # Branch like branch-here - history['visible'] = history['visible'][:message_index + 1] - history['internal'] = history['internal'][:message_index + 1] - new_unique_id = datetime.now().strftime('%Y%m%d-%H-%M-%S') - save_history(history, new_unique_id, state['character_menu'], state['mode']) - histories = find_all_histories_with_first_prompts(state) - past_chats_update = gr.update(choices=histories, value=new_unique_id) - state['unique_id'] = new_unique_id - elif not is_user_msg: - # Add the new version as current - add_message_version(history, message_index, is_current=True) - past_chats_update = gr.update() - else: - past_chats_update = gr.update() + add_message_version(history, role, message_index, is_current=True) + + # Since we are not branching, unique_id does not change. + past_chats_update = gr.update() save_history(history, state['unique_id'], state['character_menu'], state['mode']) html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) @@ -1483,33 +1484,36 @@ def handle_navigate_version_click(state): history = state['history'] message_index = int(state['navigate_message_index']) direction = state['navigate_direction'] + role = state['navigate_message_role'] - # Get assistant message metadata - key = f"assistant_{message_index}" - if key not in history['metadata'] or 'versions' not in history['metadata'][key]: - # No versions to navigate + if not role: + logger.error("Role not provided for version navigation.") + html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) + return [history, html] + + key = f"{role}_{message_index}" + if 'metadata' not in history or key not in history['metadata'] or 'versions' not in history['metadata'][key]: html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) return [history, html] metadata = history['metadata'][key] - current_idx = metadata.get('current_version_index', 0) versions = metadata['versions'] + # Default to the last version if current_version_index is not set + current_idx = metadata.get('current_version_index', len(versions) - 1 if versions else 0) - # Calculate new index if direction == 'left': new_idx = max(0, current_idx - 1) else: # right new_idx = min(len(versions) - 1, current_idx + 1) if new_idx == current_idx: - # No change needed html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) return [history, html] - # Update history with new version - version = versions[new_idx] - history['internal'][message_index][1] = version['content'] - history['visible'][message_index][1] = version['visible_content'] + msg_content_idx = 0 if role == 'user' else 1 # 0 for user content, 1 for assistant content in the pair + version_to_load = versions[new_idx] + history['internal'][message_index][msg_content_idx] = version_to_load['content'] + history['visible'][message_index][msg_content_idx] = version_to_load['visible_content'] metadata['current_version_index'] = new_idx # Redraw and save diff --git a/modules/html_generator.py b/modules/html_generator.py index bfb278cd..cbf3e19c 100644 --- a/modules/html_generator.py +++ b/modules/html_generator.py @@ -388,16 +388,17 @@ def format_message_attachments(history, role, index): return "" -def get_version_navigation_html(history, i): +def get_version_navigation_html(history, i, role): """Generate simple navigation arrows for message versions""" - key = f"assistant_{i}" + key = f"{role}_{i}" metadata = history.get('metadata', {}) if key not in metadata or 'versions' not in metadata[key]: return "" versions = metadata[key]['versions'] - current_idx = metadata[key].get('current_version_index', 0) + # Default to the last version if current_version_index isn't set in metadata + current_idx = metadata[key].get('current_version_index', len(versions) - 1 if versions else 0) if len(versions) <= 1: return "" @@ -413,22 +414,33 @@ def get_version_navigation_html(history, i): def actions_html(history, i, role, info_message=""): + action_buttons = "" + version_nav_html = "" + if role == "assistant": - return (f'
' - f'{copy_button}' - f'{edit_button}' - f'{refresh_button if i == len(history["visible"]) - 1 else ""}' - f'{continue_button if i == len(history["visible"]) - 1 else ""}' - f'{remove_button if i == len(history["visible"]) - 1 else ""}' - f'{branch_button}' - f'{info_message}' - f'
' - f'{get_version_navigation_html(history, i)}') - return (f'
' + action_buttons = ( f'{copy_button}' f'{edit_button}' + f'{refresh_button if i == len(history["visible"]) - 1 else ""}' + f'{continue_button if i == len(history["visible"]) - 1 else ""}' + f'{remove_button if i == len(history["visible"]) - 1 else ""}' + f'{branch_button}' + ) + + version_nav_html = get_version_navigation_html(history, i, "assistant") + elif role == "user": + action_buttons = ( + f'{copy_button}' + f'{edit_button}' + ) + + version_nav_html = get_version_navigation_html(history, i, "user") + + return (f'
' + f'{action_buttons}' f'{info_message}' - f'
') + f'
' + f'{version_nav_html}') def generate_instruct_html(history): diff --git a/modules/ui.py b/modules/ui.py index e24e6402..a2662e14 100644 --- a/modules/ui.py +++ b/modules/ui.py @@ -212,14 +212,13 @@ def list_interface_input_elements(): 'grammar_string', 'navigate_message_index', 'navigate_direction', + 'navigate_message_role', 'edit_message_index', 'edit_message_text', 'edit_message_role', 'branch_index', 'enable_web_search', 'web_search_pages', - 'navigate_message_index', - 'navigate_direction', ] # Chat elements diff --git a/modules/ui_chat.py b/modules/ui_chat.py index 719af85a..df3d3929 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -110,6 +110,7 @@ def create_ui(): with gr.Row(visible=False): shared.gradio['navigate_message_index'] = gr.Number(value=-1, precision=0, elem_id="Navigate-message-index") shared.gradio['navigate_direction'] = gr.Textbox(value="", elem_id="Navigate-direction") + shared.gradio['navigate_message_role'] = gr.Textbox(value="", elem_id="Navigate-message-role") shared.gradio['navigate_version'] = gr.Button(elem_id="Navigate-version") shared.gradio['edit_message_index'] = gr.Number(value=-1, precision=0, elem_id="Edit-message-index") shared.gradio['edit_message_text'] = gr.Textbox(value="", elem_id="Edit-message-text") @@ -313,8 +314,7 @@ def create_event_handlers(): shared.gradio['edit_message'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.handle_edit_message_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False).then( - lambda: None, None, None, js='() => { const role = document.getElementById("Edit-message-role").querySelector("textarea").value; if (role === "user") document.getElementById("Regenerate").click(); }') + chat.handle_edit_message_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False) # Save/delete a character shared.gradio['save_character'].click(chat.handle_save_character_click, gradio('name2'), gradio('save_character_filename', 'character_saver'), show_progress=False) From 3eb0b77427ad7b87c128999fd915f97b22104819 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 28 May 2025 18:14:51 -0700 Subject: [PATCH 37/61] Improve the web search query generation --- modules/chat.py | 25 ++++++++++++++++++++++++- modules/web_search.py | 29 ++++------------------------- 2 files changed, 28 insertions(+), 26 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 8bac680c..495fe934 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -538,6 +538,27 @@ def extract_pdf_text(pdf_path): return f"[Error extracting PDF text: {str(e)}]" +def generate_search_query(user_message, state): + """Generate a search query from user message using the LLM""" + # Augment the user message with search instruction + augmented_message = f"{user_message}\n\n=====\n\nPlease turn the message above into a short web search query in the same language as the message. Respond with only the search query, nothing else." + + # Use a minimal state for search query generation but keep the full history + search_state = state.copy() + search_state['max_new_tokens'] = 64 + search_state['auto_max_new_tokens'] = False + search_state['enable_thinking'] = False + + # Generate the full prompt using existing history + augmented message + formatted_prompt = generate_chat_prompt(augmented_message, search_state) + + query = "" + for reply in generate_reply(formatted_prompt, search_state, stopping_strings=[], is_chat=True): + query = reply.strip() + + return query + + def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_message=True, for_ui=False): # Handle dict format with text and files files = [] @@ -570,7 +591,9 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess add_message_attachment(output, row_idx, file_path, is_user=True) # Add web search results as attachments if enabled - add_web_search_attachments(output, row_idx, text, state) + if state.get('enable_web_search', False): + search_query = generate_search_query(text, state) + add_web_search_attachments(output, row_idx, text, search_query, state) # Apply extensions text, visible_text = apply_extensions('chat_input', text, visible_text, state) diff --git a/modules/web_search.py b/modules/web_search.py index d3387ac9..667178c5 100644 --- a/modules/web_search.py +++ b/modules/web_search.py @@ -13,22 +13,6 @@ def get_current_timestamp(): return datetime.now().strftime('%b %d, %Y %H:%M') -def generate_search_query(user_message, state): - """Generate a search query from user message using the LLM""" - search_prompt = f"{user_message}\n\n=====\n\nPlease turn the message above into a short web search query in the same language as the message. Respond with only the search query, nothing else." - - # Use a minimal state for search query generation - search_state = state.copy() - search_state['max_new_tokens'] = 64 - search_state['temperature'] = 0.1 - - query = "" - for reply in generate_reply(search_prompt, search_state, stopping_strings=[], is_chat=False): - query = reply.strip() - - return query - - def download_web_page(url, timeout=10): """Download and extract text from a web page""" try: @@ -82,19 +66,14 @@ def perform_web_search(query, num_pages=3): return [] -def add_web_search_attachments(history, row_idx, user_message, state): +def add_web_search_attachments(history, row_idx, user_message, search_query, state): """Perform web search and add results as attachments""" - if not state.get('enable_web_search', False): + if not search_query: + logger.warning("No search query provided") return try: - # Generate search query - search_query = generate_search_query(user_message, state) - if not search_query: - logger.warning("Failed to generate search query") - return - - logger.info(f"Generated search query: {search_query}") + logger.info(f"Using search query: {search_query}") # Perform web search num_pages = int(state.get('web_search_pages', 3)) From 7080a02252b9949297950ef3669361d21f4a6bcf Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 28 May 2025 18:15:21 -0700 Subject: [PATCH 38/61] Reduce the timeout for downloading web pages --- modules/web_search.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/web_search.py b/modules/web_search.py index 667178c5..070f850c 100644 --- a/modules/web_search.py +++ b/modules/web_search.py @@ -13,7 +13,7 @@ def get_current_timestamp(): return datetime.now().strftime('%b %d, %Y %H:%M') -def download_web_page(url, timeout=10): +def download_web_page(url, timeout=5): """Download and extract text from a web page""" try: headers = { From 75d6cfd14d1aed5ba19bd747479794cbd34212d0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Wed, 28 May 2025 20:34:14 -0700 Subject: [PATCH 39/61] Download fetched web search results in parallel --- modules/web_search.py | 44 +++++++++++++++++++++++++++++++++---------- 1 file changed, 34 insertions(+), 10 deletions(-) diff --git a/modules/web_search.py b/modules/web_search.py index 070f850c..1f670349 100644 --- a/modules/web_search.py +++ b/modules/web_search.py @@ -1,3 +1,5 @@ +import concurrent.futures +from concurrent.futures import as_completed from datetime import datetime import requests @@ -5,7 +7,6 @@ from bs4 import BeautifulSoup from duckduckgo_search import DDGS from modules.logging_colors import logger -from modules.text_generation import generate_reply def get_current_timestamp(): @@ -40,27 +41,50 @@ def download_web_page(url, timeout=5): return f"[Error downloading content from {url}: {str(e)}]" -def perform_web_search(query, num_pages=3): +def perform_web_search(query, num_pages=3, max_workers=5): """Perform web search and return results with content""" try: with DDGS() as ddgs: results = list(ddgs.text(query, max_results=num_pages)) - search_results = [] + # Prepare download tasks + download_tasks = [] for i, result in enumerate(results): url = result.get('href', '') title = result.get('title', f'Search Result {i+1}') + download_tasks.append((url, title, i)) - # Download page content - content = download_web_page(url) + search_results = [None] * len(download_tasks) # Pre-allocate to maintain order - search_results.append({ - 'title': title, - 'url': url, - 'content': content - }) + # Download pages in parallel + with concurrent.futures.ThreadPoolExecutor(max_workers=max_workers) as executor: + # Submit all download tasks + future_to_task = { + executor.submit(download_web_page, task[0]): task + for task in download_tasks + } + + # Collect results as they complete + for future in as_completed(future_to_task): + url, title, index = future_to_task[future] + try: + content = future.result() + search_results[index] = { + 'title': title, + 'url': url, + 'content': content + } + except Exception as e: + logger.error(f"Error downloading {url}: {e}") + # Include failed downloads with empty content + search_results[index] = { + 'title': title, + 'url': url, + 'content': '' + } return search_results + except Exception as e: logger.error(f"Error performing web search: {e}") return [] From 63234b9b6f60ec4f276480b4e7f9d4cd1395dcaf Mon Sep 17 00:00:00 2001 From: Underscore <47636331+Th-Underscore@users.noreply.github.com> Date: Thu, 29 May 2025 07:22:03 -0400 Subject: [PATCH 40/61] UI: Fix impersonate (#7025) --- modules/chat.py | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 495fe934..7afd906d 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -691,16 +691,19 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess yield output -def impersonate_wrapper(text, state): +def impersonate_wrapper(textbox, state): + text = textbox['text'] static_output = chat_html_wrapper(state['history'], state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) prompt = generate_chat_prompt('', state, impersonate=True) stopping_strings = get_stopping_strings(state) - yield text + '...', static_output + textbox['text'] = text + '...' + yield textbox, static_output reply = None for reply in generate_reply(prompt + text, state, stopping_strings=stopping_strings, is_chat=True): - yield (text + reply).lstrip(' '), static_output + textbox['text'] = (text + reply).lstrip(' ') + yield textbox, static_output if shared.stop_everything: return From a8d02dec8f5e6a054a153b3b09425b51e090ae11 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 04:24:21 -0700 Subject: [PATCH 41/61] Bump llama.cpp --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 4 ++-- requirements/full/requirements_apple_silicon.txt | 6 +++--- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/full/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 4 ++-- requirements/portable/requirements_apple_silicon.txt | 6 +++--- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/portable/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- requirements/portable/requirements_vulkan_noavx2.txt | 4 ++-- 16 files changed, 34 insertions(+), 34 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 0eaf10da..5f61aff9 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -33,8 +33,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 65f184bf..a718b6ca 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index d20b2ec3..5fddc623 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 2613d787..8e014445 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index af583b00..77779f3d 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 9bf2a37d..79efc607 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -32,5 +32,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 1731448e..8b29453e 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -32,5 +32,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index fc481a1a..f1f4a02e 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -33,8 +33,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index fdae681d..adf50d9a 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index a58f39f7..46b36791 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 91ea3a6d..66052711 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -18,6 +18,6 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 37e5aa40..4013abcc 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index dcb2884b..41808854 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index 8f1295bb..cff79ec6 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 858b4488..762b3fa3 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index 569bae99..b425d305 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From 685cfe254036111711de027f6d3a8198d02e7545 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 04:26:43 -0700 Subject: [PATCH 42/61] Lint --- css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 181a19b8..8af87b42 100644 --- a/css/main.css +++ b/css/main.css @@ -265,7 +265,7 @@ button { .dark .pretty_scrollbar::-webkit-scrollbar-thumb, .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover { - background: rgba(255, 255, 255, 0.2); + background: rgb(255 255 255 / 20%); border-radius: 10px; } From f2ee917d4f600ebbc5fa9d5fcf65cf5feef27fc1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 04:55:05 -0700 Subject: [PATCH 43/61] Update README --- README.md | 7 +++++-- 1 file changed, 5 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index 7105ce23..afb21cb0 100644 --- a/README.md +++ b/README.md @@ -14,14 +14,17 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)). - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory that doesn't interfere with your system environment. +- **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents. +- **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. +- **Advanced chat management**: Edit messages, navigate between message versions (like "swipes"), and branch conversations at any point. +- **Automatic prompt formatting** using Jinja2 templates. You don't need to ever worry about prompt formats. - UI that resembles the original ChatGPT style. -- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. - Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these. - Multiple sampling parameters and generation options for sophisticated text generation control. - Switch between different models easily in the UI without restarting, with fine control over settings. - OpenAI-compatible API with Chat and Completions endpoints, including tool-calling support – see [examples](https://github.com/oobabooga/text-generation-webui/wiki/12-%E2%80%90-OpenAI-API#examples). -- 100% offline and private, with zero telemetry, external resources, or remote update requests. +- 100% offline and private, with zero telemetry, external resources, or remote update requests. Web search is optional and user-controlled. - Extension support, with numerous built-in and user-contributed extensions available. See the [wiki](https://github.com/oobabooga/text-generation-webui/wiki/07-%E2%80%90-Extensions) and [extensions directory](https://github.com/oobabooga/text-generation-webui-extensions) for details. ## How to install From 2a9699033d90f4ffedfb22cbba7003c6441d08dc Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 04:55:59 -0700 Subject: [PATCH 44/61] Update README --- README.md | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/README.md b/README.md index afb21cb0..05809436 100644 --- a/README.md +++ b/README.md @@ -16,8 +16,8 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory that doesn't interfere with your system environment. - **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents. - **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. -- **Advanced chat management**: Edit messages, navigate between message versions (like "swipes"), and branch conversations at any point. -- **Automatic prompt formatting** using Jinja2 templates. You don't need to ever worry about prompt formats. +- Advanced chat management: Edit messages, navigate between message versions (like "swipes"), and branch conversations at any point. +- Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. - UI that resembles the original ChatGPT style. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. - Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these. From 9a94d7b4f6ae95b6b4b2fc521b5b25c300915dc9 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 05:02:52 -0700 Subject: [PATCH 45/61] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 05809436..900d5fbd 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features - Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)). -- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory that doesn't interfere with your system environment. +- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory without affecting your system. - **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents. - **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. - Advanced chat management: Edit messages, navigate between message versions (like "swipes"), and branch conversations at any point. From 0986d075fb22dc5aa582bbefdfdb0ebdb6ee92c8 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 05:03:59 -0700 Subject: [PATCH 46/61] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 900d5fbd..ec01c0aa 100644 --- a/README.md +++ b/README.md @@ -16,7 +16,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory without affecting your system. - **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents. - **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. -- Advanced chat management: Edit messages, navigate between message versions (like "swipes"), and branch conversations at any point. +- Advanced chat management: Edit messages, navigate between message versions, and branch conversations at any point. - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. - UI that resembles the original ChatGPT style. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. From 36bc2760058ed4e6998f4c55176c7311b0facabe Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 05:39:26 -0700 Subject: [PATCH 47/61] Update README --- README.md | 1 + 1 file changed, 1 insertion(+) diff --git a/README.md b/README.md index ec01c0aa..9accffb7 100644 --- a/README.md +++ b/README.md @@ -18,6 +18,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. - **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. - Advanced chat management: Edit messages, navigate between message versions, and branch conversations at any point. - Automatic prompt formatting using Jinja2 templates. You don't need to ever worry about prompt formats. +- Automatic GPU layers for GGUF models (on NVIDIA GPUs). - UI that resembles the original ChatGPT style. - Three chat modes: `instruct`, `chat-instruct`, and `chat`, with automatic prompt templates in `chat-instruct`. - Free-form text generation in the Default/Notebook tabs without being limited to chat turns. You can send formatted conversations from the Chat tab to these. From 81794692ab6fbc0ef24c7484b6571de090984dde Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 08:07:14 -0700 Subject: [PATCH 48/61] UI: Make the dark theme darker --- css/main.css | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/css/main.css b/css/main.css index 8af87b42..0d0a13cf 100644 --- a/css/main.css +++ b/css/main.css @@ -1,11 +1,11 @@ :root { --darker-gray: #202123; - --dark-gray: #343541; - --light-gray: #444654; + --dark-gray: #2A2B32; + --light-gray: #373943; --light-theme-gray: #f9fbff; --border-color-dark: #525252; --header-width: 112px; - --selected-item-color-dark: #32333e; + --selected-item-color-dark: #2E2F38; } @font-face { From c970c5f1665c3966c84ba50a05a45d2598038ea6 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 08:15:13 -0700 Subject: [PATCH 49/61] Make scrollbars darker in dark theme --- css/main.css | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/css/main.css b/css/main.css index 0d0a13cf..7f9d4618 100644 --- a/css/main.css +++ b/css/main.css @@ -265,7 +265,7 @@ button { .dark .pretty_scrollbar::-webkit-scrollbar-thumb, .dark .pretty_scrollbar::-webkit-scrollbar-thumb:hover { - background: rgb(255 255 255 / 20%); + background: rgb(255 255 255 / 10%); border-radius: 10px; } From 3f37a2e915a31b273caddd12a80412a199d753a7 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 08:49:31 -0700 Subject: [PATCH 50/61] Update README --- README.md | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/README.md b/README.md index 9accffb7..361584f8 100644 --- a/README.md +++ b/README.md @@ -13,7 +13,7 @@ Its goal is to become the [AUTOMATIC1111/stable-diffusion-webui](https://github. ## Features - Supports multiple text generation backends in one UI/API, including [llama.cpp](https://github.com/ggerganov/llama.cpp), [Transformers](https://github.com/huggingface/transformers), [ExLlamaV3](https://github.com/turboderp-org/exllamav3), [ExLlamaV2](https://github.com/turboderp-org/exllamav2), and [TensorRT-LLM](https://github.com/NVIDIA/TensorRT-LLM) (the latter via its own [Dockerfile](https://github.com/oobabooga/text-generation-webui/blob/main/docker/TensorRT-LLM/Dockerfile)). -- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory without affecting your system. +- Easy setup: Choose between **portable builds** (zero setup, just unzip and run) for GGUF models on Windows/Linux/macOS, or the one-click installer that creates a self-contained `installer_files` directory. - **File attachments**: Upload text files and PDF documents directly in conversations to talk about their contents. - **Web search**: Optionally search the internet with LLM-generated queries based on your input to add context to the conversation. - Advanced chat management: Edit messages, navigate between message versions, and branch conversations at any point. From faa5c82c64e2036762ed3ff60a38fc5b37dac36d Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 09:02:34 -0700 Subject: [PATCH 51/61] Fix message version count not updating during regeneration streaming --- modules/chat.py | 37 ++++++++++++++++++++++++++++++------- 1 file changed, 30 insertions(+), 7 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 7afd906d..90d66687 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -617,10 +617,19 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if regenerate: row_idx = len(output['internal']) - 1 - # Store the first response as a version before regenerating + # Store the old response as a version before regenerating if not output['metadata'].get(f"assistant_{row_idx}", {}).get('versions'): add_message_version(output, "assistant", row_idx, is_current=False) + # Add new empty version (will be filled during streaming) + key = f"assistant_{row_idx}" + output['metadata'][key]["versions"].append({ + "content": "", + "visible_content": "", + "timestamp": get_current_timestamp() + }) + output['metadata'][key]["current_version_index"] = len(output['metadata'][key]["versions"]) - 1 + if loading_message: yield { 'visible': output['visible'][:-1] + [[visible_text, shared.processing_message]], @@ -673,20 +682,34 @@ def chatbot_wrapper(text, state, regenerate=False, _continue=False, loading_mess if _continue: output['internal'][-1] = [text, last_reply[0] + reply] output['visible'][-1] = [visible_text, last_reply[1] + visible_reply] - if is_stream: - yield output elif not (j == 0 and visible_reply.strip() == ''): output['internal'][-1] = [text, reply.lstrip(' ')] output['visible'][-1] = [visible_text, visible_reply.lstrip(' ')] - if is_stream: - yield output + + # Keep version metadata in sync during streaming (for regeneration) + if regenerate: + row_idx = len(output['internal']) - 1 + key = f"assistant_{row_idx}" + current_idx = output['metadata'][key]['current_version_index'] + output['metadata'][key]['versions'][current_idx].update({ + 'content': output['internal'][row_idx][1], + 'visible_content': output['visible'][row_idx][1] + }) + + if is_stream: + yield output output['visible'][-1][1] = apply_extensions('output', output['visible'][-1][1], state, is_chat=True) - # Add the newly generated response as a version (only for regeneration) + # Final sync for version metadata (in case streaming was disabled) if regenerate: row_idx = len(output['internal']) - 1 - add_message_version(output, "assistant", row_idx, is_current=True) + key = f"assistant_{row_idx}" + current_idx = output['metadata'][key]['current_version_index'] + output['metadata'][key]['versions'][current_idx].update({ + 'content': output['internal'][row_idx][1], + 'visible_content': output['visible'][row_idx][1] + }) yield output From 724147ffabce95b5d20528b83b6e44c1523d58f0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 10:49:29 -0700 Subject: [PATCH 52/61] Better detect when no model is available --- modules/utils.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/modules/utils.py b/modules/utils.py index 0e8bdd18..577c55b8 100644 --- a/modules/utils.py +++ b/modules/utils.py @@ -74,7 +74,7 @@ def natural_keys(text): def check_model_loaded(): if shared.model_name == 'None' or shared.model is None: - if len(get_available_models()) <= 1: + if len(get_available_models()) == 0: error_msg = "No model is loaded.\n\nTo get started:\n1) Place a GGUF file in your user_data/models folder\n2) Go to the Model tab and select it" logger.error(error_msg) return False, error_msg From e7129f9dbefbe87fa4c425b5873f80cbddaf7cf0 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 12:45:53 -0700 Subject: [PATCH 53/61] Prevent footer buttons below last assistant message from always appearing --- js/main.js | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/js/main.js b/js/main.js index 48bb8632..ea3ff46a 100644 --- a/js/main.js +++ b/js/main.js @@ -171,7 +171,6 @@ const observer = new MutationObserver(function(mutations) { document.getElementById("Generate").style.display = "flex"; } - doSyntaxHighlighting(); if (!isScrolled && targetElement.scrollTop !== targetElement.scrollHeight) { @@ -184,7 +183,7 @@ const observer = new MutationObserver(function(mutations) { const lastChild = messagesContainer?.lastElementChild; const prevSibling = lastChild?.previousElementSibling; if (lastChild && prevSibling) { - lastChild.style.minHeight = `calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px))`; + lastChild.style.setProperty("margin-bottom", `calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px) - ${lastChild.offsetHeight}px)`, "important"); } } }); From aff41f3482bc7045334b0d81ac514723fdbd4f97 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 12:53:41 -0700 Subject: [PATCH 54/61] Update README --- README.md | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) diff --git a/README.md b/README.md index 361584f8..daf409d0 100644 --- a/README.md +++ b/README.md @@ -189,13 +189,13 @@ usage: server.py [-h] [--multi-user] [--character CHARACTER] [--model MODEL] [-- [--extensions EXTENSIONS [EXTENSIONS ...]] [--verbose] [--idle-timeout IDLE_TIMEOUT] [--loader LOADER] [--cpu] [--cpu-memory CPU_MEMORY] [--disk] [--disk-cache-dir DISK_CACHE_DIR] [--load-in-8bit] [--bf16] [--no-cache] [--trust-remote-code] [--force-safetensors] [--no_use_fast] [--use_flash_attention_2] [--use_eager_attention] [--torch-compile] [--load-in-4bit] [--use_double_quant] [--compute_dtype COMPUTE_DTYPE] [--quant_type QUANT_TYPE] [--flash-attn] [--threads THREADS] [--threads-batch THREADS_BATCH] [--batch-size BATCH_SIZE] [--no-mmap] - [--mlock] [--n-gpu-layers N_GPU_LAYERS] [--tensor-split TENSOR_SPLIT] [--numa] [--no-kv-offload] [--row-split] [--extra-flags EXTRA_FLAGS] [--streaming-llm] [--ctx-size N] + [--mlock] [--gpu-layers N] [--tensor-split TENSOR_SPLIT] [--numa] [--no-kv-offload] [--row-split] [--extra-flags EXTRA_FLAGS] [--streaming-llm] [--ctx-size N] [--cache-type N] [--model-draft MODEL_DRAFT] [--draft-max DRAFT_MAX] [--gpu-layers-draft GPU_LAYERS_DRAFT] [--device-draft DEVICE_DRAFT] [--ctx-size-draft CTX_SIZE_DRAFT] [--gpu-split GPU_SPLIT] - [--autosplit] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa] [--num_experts_per_token N] [--enable_tp] [--hqq-backend HQQ_BACKEND] [--cpp-runner] - [--cache_type CACHE_TYPE] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] - [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] - [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] [--subpath SUBPATH] [--old-colors] [--api] [--public-api] - [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] [--api-enable-ipv6] [--api-disable-ipv4] [--nowebui] + [--autosplit] [--cfg-cache] [--no_flash_attn] [--no_xformers] [--no_sdpa] [--num_experts_per_token N] [--enable_tp] [--cpp-runner] [--deepspeed] [--nvme-offload-dir NVME_OFFLOAD_DIR] + [--local_rank LOCAL_RANK] [--alpha_value ALPHA_VALUE] [--rope_freq_base ROPE_FREQ_BASE] [--compress_pos_emb COMPRESS_POS_EMB] [--listen] [--listen-port LISTEN_PORT] + [--listen-host LISTEN_HOST] [--share] [--auto-launch] [--gradio-auth GRADIO_AUTH] [--gradio-auth-path GRADIO_AUTH_PATH] [--ssl-keyfile SSL_KEYFILE] [--ssl-certfile SSL_CERTFILE] + [--subpath SUBPATH] [--old-colors] [--portable] [--api] [--public-api] [--public-api-id PUBLIC_API_ID] [--api-port API_PORT] [--api-key API_KEY] [--admin-key ADMIN_KEY] + [--api-enable-ipv6] [--api-disable-ipv4] [--nowebui] Text generation web UI @@ -217,7 +217,7 @@ Basic settings: --idle-timeout IDLE_TIMEOUT Unload model after this many minutes of inactivity. It will be automatically reloaded when you try to use it again. Model loader: - --loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, HQQ, + --loader LOADER Choose the model loader manually, otherwise, it will get autodetected. Valid options: Transformers, llama.cpp, ExLlamav3_HF, ExLlamav2_HF, ExLlamav2, TensorRT-LLM. Transformers/Accelerate: @@ -248,16 +248,18 @@ llama.cpp: --batch-size BATCH_SIZE Maximum number of prompt tokens to batch together when calling llama_eval. --no-mmap Prevent mmap from being used. --mlock Force the system to keep the model in RAM. - --n-gpu-layers N_GPU_LAYERS Number of layers to offload to the GPU. + --gpu-layers N, --n-gpu-layers N Number of layers to offload to the GPU. --tensor-split TENSOR_SPLIT Split the model across multiple GPUs. Comma-separated list of proportions. Example: 60,40. --numa Activate NUMA task allocation for llama.cpp. --no-kv-offload Do not offload the K, Q, V to the GPU. This saves VRAM but reduces the performance. --row-split Split the model by rows across GPUs. This may improve multi-gpu performance. - --extra-flags EXTRA_FLAGS Extra flags to pass to llama-server. Format: "flag1=value1;flag2;flag3=value3". Example: "override-tensor=exps=CPU" + --extra-flags EXTRA_FLAGS Extra flags to pass to llama-server. Format: "flag1=value1,flag2,flag3=value3". Example: "override-tensor=exps=CPU" --streaming-llm Activate StreamingLLM to avoid re-evaluating the entire prompt when old messages are removed. -Context and cache management: +Context and cache: --ctx-size N, --n_ctx N, --max_seq_len N Context size in tokens. + --cache-type N, --cache_type N KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4; ExLlamaV3 - fp16, q2 to q8 (can specify k_bits and v_bits + separately, e.g. q4_q8). Speculative decoding: --model-draft MODEL_DRAFT Path to the draft model for speculative decoding. @@ -276,15 +278,9 @@ ExLlamaV2: --num_experts_per_token N Number of experts to use for generation. Applies to MoE models like Mixtral. --enable_tp Enable Tensor Parallelism (TP) in ExLlamaV2. -HQQ: - --hqq-backend HQQ_BACKEND Backend for the HQQ loader. Valid options: PYTORCH, PYTORCH_COMPILE, ATEN. - TensorRT-LLM: --cpp-runner Use the ModelRunnerCpp runner, which is faster than the default ModelRunner but doesn't support streaming yet. -Cache: - --cache_type CACHE_TYPE KV cache type; valid options: llama.cpp - fp16, q8_0, q4_0; ExLlamaV2 - fp16, fp8, q8, q6, q4. - DeepSpeed: --deepspeed Enable the use of DeepSpeed ZeRO-3 for inference via the Transformers integration. --nvme-offload-dir NVME_OFFLOAD_DIR DeepSpeed: Directory to use for ZeRO-3 NVME offloading. @@ -307,6 +303,7 @@ Gradio: --ssl-certfile SSL_CERTFILE The path to the SSL certificate cert file. --subpath SUBPATH Customize the subpath for gradio, use with reverse proxy --old-colors Use the legacy Gradio colors, before the December/2024 update. + --portable Hide features not available in portable mode like training. API: --api Enable the API extension. From f59998d2680f346038320b536617c4738c393947 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 13:08:48 -0700 Subject: [PATCH 55/61] Don't limit the number of prompt characters printed with --verbose --- modules/text_generation.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/modules/text_generation.py b/modules/text_generation.py index 962311df..1fd6d810 100644 --- a/modules/text_generation.py +++ b/modules/text_generation.py @@ -505,11 +505,11 @@ def generate_reply_custom(question, original_question, state, stopping_strings=N return -def print_prompt(prompt, max_chars=2000): +def print_prompt(prompt, max_chars=-1): DARK_YELLOW = "\033[38;5;3m" RESET = "\033[0m" - if len(prompt) > max_chars: + if max_chars > 0 and len(prompt) > max_chars: half_chars = max_chars // 2 hidden_len = len(prompt[half_chars:-half_chars]) hidden_msg = f"{DARK_YELLOW}[...{hidden_len} characters hidden...]{RESET}" From a45a65213052dad02d696ed54af1b9f2ea82cd4a Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 13:28:51 -0700 Subject: [PATCH 56/61] CSS fix --- js/main.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/js/main.js b/js/main.js index ea3ff46a..f23dc246 100644 --- a/js/main.js +++ b/js/main.js @@ -183,7 +183,10 @@ const observer = new MutationObserver(function(mutations) { const lastChild = messagesContainer?.lastElementChild; const prevSibling = lastChild?.previousElementSibling; if (lastChild && prevSibling) { - lastChild.style.setProperty("margin-bottom", `calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px) - ${lastChild.offsetHeight}px)`, "important"); + lastChild.style.setProperty("margin-bottom", + `max(0px, calc(max(70vh, 100vh - ${prevSibling.offsetHeight}px - 102px) - ${lastChild.offsetHeight}px))`, + "important" + ); } } }); From 8078c41ec67b96656d7e96128d915290b319e4f5 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 13:32:19 -0700 Subject: [PATCH 57/61] Revert "Bump llama.cpp" This reverts commit a8d02dec8f5e6a054a153b3b09425b51e090ae11. --- requirements/full/requirements.txt | 4 ++-- requirements/full/requirements_amd.txt | 4 ++-- requirements/full/requirements_amd_noavx2.txt | 4 ++-- requirements/full/requirements_apple_intel.txt | 4 ++-- requirements/full/requirements_apple_silicon.txt | 6 +++--- requirements/full/requirements_cpu_only.txt | 4 ++-- requirements/full/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/full/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements.txt | 4 ++-- requirements/portable/requirements_apple_intel.txt | 4 ++-- requirements/portable/requirements_apple_silicon.txt | 6 +++--- requirements/portable/requirements_cpu_only.txt | 4 ++-- requirements/portable/requirements_cpu_only_noavx2.txt | 4 ++-- requirements/portable/requirements_noavx2.txt | 4 ++-- requirements/portable/requirements_vulkan.txt | 4 ++-- requirements/portable/requirements_vulkan_noavx2.txt | 4 ++-- 16 files changed, 34 insertions(+), 34 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 5f61aff9..0eaf10da 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -33,8 +33,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index a718b6ca..65f184bf 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index 5fddc623..d20b2ec3 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # AMD wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+rocm6.2.4.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl; platform_system != "Darwin" and platform_machine != "x86_64" diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 8e014445..2613d787 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -32,7 +32,7 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index 77779f3d..af583b00 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -32,8 +32,8 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9-py3-none-any.whl https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9-py3-none-any.whl diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 79efc607..9bf2a37d 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -32,5 +32,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 8b29453e..1731448e 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -32,5 +32,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index f1f4a02e..fc481a1a 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -33,8 +33,8 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" https://github.com/oobabooga/exllamav3/releases/download/v0.0.1a9/exllamav3-0.0.1a9+cu124.torch2.6.0-cp311-cp311-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" and python_version == "3.11" https://github.com/turboderp-org/exllamav2/releases/download/v0.2.9/exllamav2-0.2.9+cu124.torch2.6.0-cp311-cp311-win_amd64.whl; platform_system == "Windows" and python_version == "3.11" diff --git a/requirements/portable/requirements.txt b/requirements/portable/requirements.txt index adf50d9a..fdae681d 100644 --- a/requirements/portable/requirements.txt +++ b/requirements/portable/requirements.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_apple_intel.txt b/requirements/portable/requirements_apple_intel.txt index 46b36791..a58f39f7 100644 --- a/requirements/portable/requirements_apple_intel.txt +++ b/requirements/portable/requirements_apple_intel.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_x86_64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" diff --git a/requirements/portable/requirements_apple_silicon.txt b/requirements/portable/requirements_apple_silicon.txt index 66052711..91ea3a6d 100644 --- a/requirements/portable/requirements_apple_silicon.txt +++ b/requirements/portable/requirements_apple_silicon.txt @@ -18,6 +18,6 @@ sse-starlette==1.6.5 tiktoken # Mac wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_15_0_arm64.whl; platform_system == "Darwin" and platform_release >= "24.0.0" and platform_release < "25.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_14_0_arm64.whl; platform_system == "Darwin" and platform_release >= "23.0.0" and platform_release < "24.0.0" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0-py3-none-macosx_13_0_arm64.whl; platform_system == "Darwin" and platform_release >= "22.0.0" and platform_release < "23.0.0" diff --git a/requirements/portable/requirements_cpu_only.txt b/requirements/portable/requirements_cpu_only.txt index 4013abcc..37e5aa40 100644 --- a/requirements/portable/requirements_cpu_only.txt +++ b/requirements/portable/requirements_cpu_only.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx2-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_cpu_only_noavx2.txt b/requirements/portable/requirements_cpu_only_noavx2.txt index 41808854..dcb2884b 100644 --- a/requirements/portable/requirements_cpu_only_noavx2.txt +++ b/requirements/portable/requirements_cpu_only_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # llama.cpp (CPU only, no AVX2) -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cpuavx-py3-none-win_amd64.whl; platform_system == "Windows" diff --git a/requirements/portable/requirements_noavx2.txt b/requirements/portable/requirements_noavx2.txt index cff79ec6..8f1295bb 100644 --- a/requirements/portable/requirements_noavx2.txt +++ b/requirements/portable/requirements_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+cu124avx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan.txt b/requirements/portable/requirements_vulkan.txt index 762b3fa3..858b4488 100644 --- a/requirements/portable/requirements_vulkan.txt +++ b/requirements/portable/requirements_vulkan.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkan-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" diff --git a/requirements/portable/requirements_vulkan_noavx2.txt b/requirements/portable/requirements_vulkan_noavx2.txt index b425d305..569bae99 100644 --- a/requirements/portable/requirements_vulkan_noavx2.txt +++ b/requirements/portable/requirements_vulkan_noavx2.txt @@ -18,5 +18,5 @@ sse-starlette==1.6.5 tiktoken # CUDA wheels -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" -https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.15.0/llama_cpp_binaries-0.15.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-win_amd64.whl; platform_system == "Windows" +https://github.com/oobabooga/llama-cpp-binaries/releases/download/v0.14.0/llama_cpp_binaries-0.14.0+vulkanavx-py3-none-linux_x86_64.whl; platform_system == "Linux" and platform_machine == "x86_64" From dce02732a4caef16157ffbc288dfe079053e0bb4 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 14:08:48 -0700 Subject: [PATCH 58/61] Fix timestamp issues when editing/swiping messages --- modules/chat.py | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/modules/chat.py b/modules/chat.py index 90d66687..6b3ff4fc 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -1508,11 +1508,12 @@ def handle_edit_message_click(state): if "versions" not in history['metadata'][key] or not history['metadata'][key]["versions"]: original_content = history['internal'][message_index][role_idx] original_visible = history['visible'][message_index][role_idx] + original_timestamp = history['metadata'][key].get('timestamp', get_current_timestamp()) history['metadata'][key]["versions"] = [{ "content": original_content, "visible_content": original_visible, - "timestamp": get_current_timestamp() + "timestamp": original_timestamp }] history['internal'][message_index][role_idx] = apply_extensions('input', new_text, state, is_chat=True) @@ -1564,6 +1565,7 @@ def handle_navigate_version_click(state): history['internal'][message_index][msg_content_idx] = version_to_load['content'] history['visible'][message_index][msg_content_idx] = version_to_load['visible_content'] metadata['current_version_index'] = new_idx + update_message_metadata(history['metadata'], role, message_index, timestamp=version_to_load['timestamp']) # Redraw and save html = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) From acbcc12e7b19cc9f540d32b8d601ceefde77b7a1 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 14:11:21 -0700 Subject: [PATCH 59/61] Clean up --- modules/chat.py | 7 ++----- modules/ui_chat.py | 2 +- 2 files changed, 3 insertions(+), 6 deletions(-) diff --git a/modules/chat.py b/modules/chat.py index 6b3ff4fc..e526a9a0 100644 --- a/modules/chat.py +++ b/modules/chat.py @@ -1493,7 +1493,7 @@ def handle_edit_message_click(state): if message_index >= len(history['internal']): html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - return [history, html_output, gr.update()] # No unique_id change + return [history, html_output] role_idx = 0 if role == "user" else 1 @@ -1521,13 +1521,10 @@ def handle_edit_message_click(state): add_message_version(history, role, message_index, is_current=True) - # Since we are not branching, unique_id does not change. - past_chats_update = gr.update() - save_history(history, state['unique_id'], state['character_menu'], state['mode']) html_output = redraw_html(history, state['name1'], state['name2'], state['mode'], state['chat_style'], state['character_menu']) - return [history, html_output, past_chats_update] + return [history, html_output] def handle_navigate_version_click(state): diff --git a/modules/ui_chat.py b/modules/ui_chat.py index df3d3929..d79aa523 100644 --- a/modules/ui_chat.py +++ b/modules/ui_chat.py @@ -314,7 +314,7 @@ def create_event_handlers(): shared.gradio['edit_message'].click( ui.gather_interface_values, gradio(shared.input_elements), gradio('interface_state')).then( - chat.handle_edit_message_click, gradio('interface_state'), gradio('history', 'display', 'unique_id'), show_progress=False) + chat.handle_edit_message_click, gradio('interface_state'), gradio('history', 'display'), show_progress=False) # Save/delete a character shared.gradio['save_character'].click(chat.handle_save_character_click, gradio('name2'), gradio('save_character_filename', 'character_saver'), show_progress=False) From d1bfb08e8d4bab174e6b4467eff20f8a01a2a613 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 14:27:47 -0700 Subject: [PATCH 60/61] Improve the style of message editing --- css/main.css | 1 + 1 file changed, 1 insertion(+) diff --git a/css/main.css b/css/main.css index 7f9d4618..9685c863 100644 --- a/css/main.css +++ b/css/main.css @@ -1462,6 +1462,7 @@ strong { .editing-textarea { width: 100%; min-height: 200px; + max-height: 65vh; padding: 10px; border-radius: 5px; border: 1px solid #ccc; From 28e6bd4fcd8cd385cc92cc56c0c49fc474006147 Mon Sep 17 00:00:00 2001 From: oobabooga <112222186+oobabooga@users.noreply.github.com> Date: Thu, 29 May 2025 14:49:07 -0700 Subject: [PATCH 61/61] Revert "Update transformers requirement in /requirements/full (#7017)" This reverts commit cc9b7253c1216e5340da85cba9b65a13cf3526e9. --- requirements/full/requirements.txt | 2 +- requirements/full/requirements_amd.txt | 2 +- requirements/full/requirements_amd_noavx2.txt | 2 +- requirements/full/requirements_apple_intel.txt | 2 +- requirements/full/requirements_apple_silicon.txt | 2 +- requirements/full/requirements_cpu_only.txt | 2 +- requirements/full/requirements_cpu_only_noavx2.txt | 2 +- requirements/full/requirements_noavx2.txt | 2 +- requirements/full/requirements_nowheels.txt | 2 +- 9 files changed, 9 insertions(+), 9 deletions(-) diff --git a/requirements/full/requirements.txt b/requirements/full/requirements.txt index 0eaf10da..2c322715 100644 --- a/requirements/full/requirements.txt +++ b/requirements/full/requirements.txt @@ -23,7 +23,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_amd.txt b/requirements/full/requirements_amd.txt index 65f184bf..6aeb325e 100644 --- a/requirements/full/requirements_amd.txt +++ b/requirements/full/requirements_amd.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_amd_noavx2.txt b/requirements/full/requirements_amd_noavx2.txt index d20b2ec3..3b052423 100644 --- a/requirements/full/requirements_amd_noavx2.txt +++ b/requirements/full/requirements_amd_noavx2.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_apple_intel.txt b/requirements/full/requirements_apple_intel.txt index 2613d787..8c51459e 100644 --- a/requirements/full/requirements_apple_intel.txt +++ b/requirements/full/requirements_apple_intel.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_apple_silicon.txt b/requirements/full/requirements_apple_silicon.txt index af583b00..b9f15d45 100644 --- a/requirements/full/requirements_apple_silicon.txt +++ b/requirements/full/requirements_apple_silicon.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_cpu_only.txt b/requirements/full/requirements_cpu_only.txt index 9bf2a37d..0877d968 100644 --- a/requirements/full/requirements_cpu_only.txt +++ b/requirements/full/requirements_cpu_only.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_cpu_only_noavx2.txt b/requirements/full/requirements_cpu_only_noavx2.txt index 1731448e..cab78237 100644 --- a/requirements/full/requirements_cpu_only_noavx2.txt +++ b/requirements/full/requirements_cpu_only_noavx2.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_noavx2.txt b/requirements/full/requirements_noavx2.txt index fc481a1a..dfd42577 100644 --- a/requirements/full/requirements_noavx2.txt +++ b/requirements/full/requirements_noavx2.txt @@ -23,7 +23,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb diff --git a/requirements/full/requirements_nowheels.txt b/requirements/full/requirements_nowheels.txt index 2ed8affa..5d9f84ce 100644 --- a/requirements/full/requirements_nowheels.txt +++ b/requirements/full/requirements_nowheels.txt @@ -22,7 +22,7 @@ safetensors==0.5.* scipy sentencepiece tensorboard -transformers==4.52.* +transformers==4.50.* tqdm wandb