UI: Fix inline LaTeX rendering by protecting $...$ from markdown (closes #7423)

This commit is contained in:
oobabooga 2026-04-04 19:33:05 -07:00
parent 8cb7fe9c47
commit 1b403a4ffa

View file

@ -185,28 +185,29 @@ def process_markdown_content(string):
if not string: if not string:
return "" return ""
# Define unique placeholders for LaTeX asterisks and underscores # Define unique placeholders for LaTeX characters that conflict with markdown
LATEX_ASTERISK_PLACEHOLDER = "LATEXASTERISKPLACEHOLDER" LATEX_ASTERISK_PLACEHOLDER = "LATEXASTERISKPLACEHOLDER"
LATEX_UNDERSCORE_PLACEHOLDER = "LATEXUNDERSCOREPLACEHOLDER" LATEX_UNDERSCORE_PLACEHOLDER = "LATEXUNDERSCOREPLACEHOLDER"
LATEX_PIPE_PLACEHOLDER = "LATEXPIPEPLACEHOLDER"
def protect_latex_content(content):
"""Protect markdown-sensitive characters inside LaTeX."""
content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
content = content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
content = content.replace('|', LATEX_PIPE_PLACEHOLDER)
return content
def protect_asterisks_underscores_in_latex(match): def protect_asterisks_underscores_in_latex(match):
"""A replacer function for re.sub to protect asterisks and underscores in multiple LaTeX formats.""" """A replacer function for re.sub to protect markdown-sensitive characters in multiple LaTeX formats."""
# Check which delimiter group was captured # Check which delimiter group was captured
if match.group(1) is not None: # Content from $$...$$ if match.group(1) is not None: # Content from $$...$$
content = match.group(1) return protect_latex_content(match.group(1))
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
return f'{modified_content}'
elif match.group(2) is not None: # Content from \[...\] elif match.group(2) is not None: # Content from \[...\]
content = match.group(2) return f'\\[{protect_latex_content(match.group(2))}\\]'
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
return f'\\[{modified_content}\\]'
elif match.group(3) is not None: # Content from \(...\) elif match.group(3) is not None: # Content from \(...\)
content = match.group(3) return f'\\({protect_latex_content(match.group(3))}\\)'
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER) elif match.group(4) is not None: # Content from $...$
modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER) return f'${protect_latex_content(match.group(4).strip())}$'
return f'\\({modified_content}\\)'
return match.group(0) # Fallback return match.group(0) # Fallback
@ -240,7 +241,7 @@ def process_markdown_content(string):
string = re.sub(r"(.)```", r"\1\n```", string) string = re.sub(r"(.)```", r"\1\n```", string)
# Protect asterisks and underscores within all LaTeX blocks before markdown conversion # Protect asterisks and underscores within all LaTeX blocks before markdown conversion
latex_pattern = re.compile(r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)', latex_pattern = re.compile(r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)|(?<!\$)\$(?!\$)([^\$\n]*\\\\[^\$\n]*?)\$(?!\$)',
re.DOTALL) re.DOTALL)
string = latex_pattern.sub(protect_asterisks_underscores_in_latex, string) string = latex_pattern.sub(protect_asterisks_underscores_in_latex, string)
@ -306,6 +307,7 @@ def process_markdown_content(string):
# Restore the LaTeX asterisks and underscores after markdown conversion # Restore the LaTeX asterisks and underscores after markdown conversion
html_output = html_output.replace(LATEX_ASTERISK_PLACEHOLDER, '*') html_output = html_output.replace(LATEX_ASTERISK_PLACEHOLDER, '*')
html_output = html_output.replace(LATEX_UNDERSCORE_PLACEHOLDER, '_') html_output = html_output.replace(LATEX_UNDERSCORE_PLACEHOLDER, '_')
html_output = html_output.replace(LATEX_PIPE_PLACEHOLDER, '|')
# Remove extra newlines before </code> # Remove extra newlines before </code>
html_output = re.sub(r'\s*</code>', '</code>', html_output) html_output = re.sub(r'\s*</code>', '</code>', html_output)