import datetime import functools import html import os import re import time from pathlib import Path import markdown from PIL import Image, ImageOps from modules import shared from modules.reasoning import extract_reasoning from modules.sane_markdown_lists import SaneListExtension from modules.utils import get_available_chat_styles # This is to store the paths to the thumbnails of the profile pictures image_cache = {} def minify_css(css: str) -> str: # Step 1: Remove comments css = re.sub(r'/\*.*?\*/', '', css, flags=re.DOTALL) # Step 2: Remove leading and trailing whitespace css = re.sub(r'^[ \t]*|[ \t]*$', '', css, flags=re.MULTILINE) # Step 3: Remove spaces after specific characters ({ : ; ,}) css = re.sub(r'([:{;,])\s+', r'\1', css) # Step 4: Remove spaces before `{` css = re.sub(r'\s+{', '{', css) # Step 5: Remove empty lines css = re.sub(r'^\s*$', '', css, flags=re.MULTILINE) # Step 6: Collapse all lines into one css = re.sub(r'\n', '', css) return css with open(Path(__file__).resolve().parent / '../css/html_readable_style.css', 'r', encoding='utf-8') as f: readable_css = f.read() with open(Path(__file__).resolve().parent / '../css/html_instruct_style.css', 'r', encoding='utf-8') as f: instruct_css = f.read() # Custom chat styles chat_styles = {} for k in get_available_chat_styles(): with open(Path(f'css/chat_style-{k}.css'), 'r', encoding='utf-8') as f: chat_styles[k] = f.read() # Handle styles that derive from other styles for k in chat_styles: lines = chat_styles[k].split('\n') input_string = lines[0] match = re.search(r'chat_style-([a-z\-]*)\.css', input_string) if match: style = match.group(1) chat_styles[k] = chat_styles.get(style, '') + '\n\n' + '\n'.join(lines[1:]) # Reduce the size of the CSS sources above readable_css = minify_css(readable_css) instruct_css = minify_css(instruct_css) for k in chat_styles: chat_styles[k] = minify_css(chat_styles[k]) def fix_newlines(string): string = string.replace('\n', '\n\n') string = re.sub(r"\n{3,}", "\n\n", string) string = string.strip() return string def replace_quotes(text): # Define a list of quote pairs (opening and closing), using HTML entities quote_pairs = [ ('"', '"'), # Double quotes ('“', '”'), # Unicode left and right double quotation marks ('‘', '’'), # Unicode left and right single quotation marks ('«', '»'), # French quotes ('„', '“'), # German quotes ('‘', '’'), # Alternative single quotes ('“', '”'), # Unicode quotes (numeric entities) ('“', '”'), # Unicode quotes (hex entities) ('\u201C', '\u201D'), # Unicode quotes (literal chars) ] # Create a regex pattern that matches any of the quote pairs, including newlines pattern = '|'.join(f'({re.escape(open_q)})(.*?)({re.escape(close_q)})' for open_q, close_q in quote_pairs) # Replace matched patterns with tags, keeping original quotes def replacer(m): # Find the first non-None group set for i in range(1, len(m.groups()), 3): # Step through each sub-pattern's groups if m.group(i): # If this sub-pattern matched return f'{m.group(i)}{m.group(i + 1)}{m.group(i + 2)}' return m.group(0) # Fallback (shouldn't happen) replaced_text = re.sub(pattern, replacer, text, flags=re.DOTALL) return replaced_text def replace_blockquote(m): return m.group().replace('\n', '\n> ').replace('\\begin{blockquote}', '').replace('\\end{blockquote}', '') def extract_thinking_block(string): """Extract thinking blocks from the beginning of an HTML-escaped string.""" return extract_reasoning(string, html_escaped=True) def build_thinking_block(thinking_content, message_id, has_remaining_content): """Build HTML for a thinking block.""" if thinking_content is None: return None # Process the thinking content through markdown thinking_html = process_markdown_content(thinking_content) # Generate unique ID for the thinking block block_id = f"thinking-{message_id}-0" # Check if thinking is complete or still in progress is_streaming = not has_remaining_content title_text = "Thinking..." if is_streaming else "Thought" return f'''
{info_svg_small} {title_text}
{thinking_html}
''' def build_main_content_block(content): """Build HTML for the main content block.""" if not content: return "" return process_markdown_content(content) def process_markdown_content(string): """ Process a string through the markdown conversion pipeline. Uses robust manual parsing to ensure correct LaTeX and Code Block rendering. """ if not string: return "" # Define unique placeholders for LaTeX asterisks and underscores LATEX_ASTERISK_PLACEHOLDER = "LATEXASTERISKPLACEHOLDER" LATEX_UNDERSCORE_PLACEHOLDER = "LATEXUNDERSCOREPLACEHOLDER" def protect_asterisks_underscores_in_latex(match): """A replacer function for re.sub to protect asterisks and underscores in multiple LaTeX formats.""" # Check which delimiter group was captured if match.group(1) is not None: # Content from $$...$$ content = match.group(1) modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER) modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER) return f'{modified_content}' elif match.group(2) is not None: # Content from \[...\] content = match.group(2) modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER) modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER) return f'\\[{modified_content}\\]' elif match.group(3) is not None: # Content from \(...\) content = match.group(3) modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER) modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER) return f'\\({modified_content}\\)' return match.group(0) # Fallback # Make \[ \] LaTeX equations inline pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$' replacement = r'\\[ \1 \\]' string = re.sub(pattern, replacement, string, flags=re.MULTILINE) # Escape backslashes string = string.replace('\\', '\\\\') # Quote to string = replace_quotes(string) # Blockquote string = re.sub(r'(^|[\n])>', r'\1>', string) pattern = re.compile(r'\\begin{blockquote}(.*?)\\end{blockquote}', re.DOTALL) string = pattern.sub(replace_blockquote, string) # Code block standardization string = string.replace('\\begin{code}', '```') string = string.replace('\\end{code}', '```') string = string.replace('\\begin{align*}', '$$') string = string.replace('\\end{align*}', '$$') string = string.replace('\\begin{align}', '$$') string = string.replace('\\end{align}', '$$') string = string.replace('\\begin{equation}', '$$') string = string.replace('\\end{equation}', '$$') string = string.replace('\\begin{equation*}', '$$') string = string.replace('\\end{equation*}', '$$') string = re.sub(r"(.)```", r"\1\n```", string) # Protect asterisks and underscores within all LaTeX blocks before markdown conversion latex_pattern = re.compile(r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)', re.DOTALL) string = latex_pattern.sub(protect_asterisks_underscores_in_latex, string) result = '' is_code = False is_latex = False # Manual line iteration for robust structure parsing for line in string.split('\n'): stripped_line = line.strip() if stripped_line.startswith('```'): is_code = not is_code elif stripped_line.startswith('$$') and (stripped_line == "$$" or not stripped_line.endswith('$$')): is_latex = not is_latex elif stripped_line.endswith('$$'): is_latex = False elif stripped_line.startswith('\\\\[') and not stripped_line.endswith('\\\\]'): is_latex = True elif stripped_line.startswith('\\\\]'): is_latex = False elif stripped_line.endswith('\\\\]'): is_latex = False result += line # Don't add an extra \n for code, LaTeX, or tables if is_code or is_latex or line.startswith('|'): result += '\n' # Also don't add an extra \n for lists elif stripped_line.startswith('-') or stripped_line.startswith('*') or stripped_line.startswith('+') or stripped_line.startswith('>') or re.match(r'\d+\.', stripped_line): result += ' \n' else: result += ' \n' result = result.strip() if is_code: result += '\n```' # Unfinished code block # Unfinished list, like "\n1.". A |delete| string is added and then # removed to force a
    or