Fix LaTeX rendering for equations with asterisks

This commit is contained in:
oobabooga 2025-08-30 10:13:32 -07:00
parent a3eb67e466
commit 96136ea760

View file

@ -243,6 +243,27 @@ def process_markdown_content(string):
if not string:
return ""
# Define a unique placeholder for LaTeX asterisks
LATEX_ASTERISK_PLACEHOLDER = "LATEXASTERISKPLACEHOLDER"
def protect_asterisks_in_latex(match):
"""A replacer function for re.sub to protect asterisks in multiple LaTeX formats."""
# Check which delimiter group was captured
if match.group(1) is not None: # Content from $$...$$
content = match.group(1)
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
return f'$${modified_content}$$'
elif match.group(2) is not None: # Content from \[...\]
content = match.group(2)
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
return f'\\[{modified_content}\\]'
elif match.group(3) is not None: # Content from \(...\)
content = match.group(3)
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
return f'\\({modified_content}\\)'
return match.group(0) # Fallback
# Make \[ \] LaTeX equations inline
pattern = r'^\s*\\\[\s*\n([\s\S]*?)\n\s*\\\]\s*$'
replacement = r'\\[ \1 \\]'
@ -272,6 +293,10 @@ def process_markdown_content(string):
string = string.replace('\\end{equation*}', '$$')
string = re.sub(r"(.)```", r"\1\n```", string)
# Protect asterisks within all LaTeX blocks before markdown conversion
latex_pattern = re.compile(r'\$\$(.*?)\$\$|\\\[(.*?)\\\]|\\\((.*?)\\\)', re.DOTALL)
string = latex_pattern.sub(protect_asterisks_in_latex, string)
result = ''
is_code = False
is_latex = False
@ -330,6 +355,9 @@ def process_markdown_content(string):
# Convert to HTML using markdown
html_output = markdown.markdown(result, extensions=['fenced_code', 'tables', SaneListExtension()])
# Restore the LaTeX asterisks after markdown conversion
html_output = html_output.replace(LATEX_ASTERISK_PLACEHOLDER, '*')
# Remove extra newlines before </code>
html_output = re.sub(r'\s*</code>', '</code>', html_output)