diff --git a/modules/html_generator.py b/modules/html_generator.py
index 312b66ad..667a64d6 100644
--- a/modules/html_generator.py
+++ b/modules/html_generator.py
@@ -211,23 +211,27 @@ def process_markdown_content(string):
if not string:
return ""
- # Define a unique placeholder for LaTeX asterisks
+ # Define unique placeholders for LaTeX asterisks and underscores
LATEX_ASTERISK_PLACEHOLDER = "LATEXASTERISKPLACEHOLDER"
+ LATEX_UNDERSCORE_PLACEHOLDER = "LATEXUNDERSCOREPLACEHOLDER"
- def protect_asterisks_in_latex(match):
- """A replacer function for re.sub to protect asterisks in multiple LaTeX formats."""
+ def protect_asterisks_underscores_in_latex(match):
+ """A replacer function for re.sub to protect asterisks and underscores in multiple LaTeX formats."""
# Check which delimiter group was captured
if match.group(1) is not None: # Content from $$...$$
content = match.group(1)
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
- return f'$${modified_content}$$'
+ modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
+ return f'{modified_content}'
elif match.group(2) is not None: # Content from \[...\]
content = match.group(2)
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
+ modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
return f'\\[{modified_content}\\]'
elif match.group(3) is not None: # Content from \(...\)
content = match.group(3)
modified_content = content.replace('*', LATEX_ASTERISK_PLACEHOLDER)
+ modified_content = modified_content.replace('_', LATEX_UNDERSCORE_PLACEHOLDER)
return f'\\({modified_content}\\)'
return match.group(0) # Fallback
@@ -261,9 +265,10 @@ def process_markdown_content(string):
string = string.replace('\\end{equation*}', '$$')
string = re.sub(r"(.)```", r"\1\n```", string)
- # Protect asterisks within all LaTeX blocks before markdown conversion
- latex_pattern = re.compile(r'\$\$(.*?)\$\$|\\\[(.*?)\\\]|\\\((.*?)\\\)', re.DOTALL)
- string = latex_pattern.sub(protect_asterisks_in_latex, string)
+ # Protect asterisks and underscores within all LaTeX blocks before markdown conversion
+ latex_pattern = re.compile(r'((?:^|[\r\n\s])\$\$[^`]*?\$\$)|\\\[(.*?)\\\]|\\\((.*?)\\\)',
+ re.DOTALL)
+ string = latex_pattern.sub(protect_asterisks_underscores_in_latex, string)
result = ''
is_code = False
@@ -275,11 +280,11 @@ def process_markdown_content(string):
if stripped_line.startswith('```'):
is_code = not is_code
- elif stripped_line.startswith('$$'):
+ elif stripped_line.startswith('$$') and (stripped_line == "$$" or not stripped_line.endswith('$$')):
is_latex = not is_latex
elif stripped_line.endswith('$$'):
is_latex = False
- elif stripped_line.startswith('\\\\['):
+ elif stripped_line.startswith('\\\\[') and not stripped_line.endswith('\\\\]'):
is_latex = True
elif stripped_line.startswith('\\\\]'):
is_latex = False
@@ -324,8 +329,9 @@ def process_markdown_content(string):
# Convert to HTML using markdown
html_output = markdown.markdown(result, extensions=['fenced_code', 'tables', SaneListExtension()])
- # Restore the LaTeX asterisks after markdown conversion
+ # Restore the LaTeX asterisks and underscores after markdown conversion
html_output = html_output.replace(LATEX_ASTERISK_PLACEHOLDER, '*')
+ html_output = html_output.replace(LATEX_UNDERSCORE_PLACEHOLDER, '_')
# Remove extra newlines before
html_output = re.sub(r'\s*', '', html_output)