From 5bcd4d335e0db3c84df7616ea7dbab6c4ef37dfd Mon Sep 17 00:00:00 2001 From: xunbu Date: Fri, 22 Aug 2025 14:26:18 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8Dmd=E8=BD=AChtml=E7=9A=84?= =?UTF-8?q?=E6=B8=B2=E6=9F=93=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/exporter/md/md2html_exporter.py | 25 +++++++++++-------- 1 file changed, 15 insertions(+), 10 deletions(-) diff --git a/docutranslate/exporter/md/md2html_exporter.py b/docutranslate/exporter/md/md2html_exporter.py index 43e6ba4..d081d6e 100644 --- a/docutranslate/exporter/md/md2html_exporter.py +++ b/docutranslate/exporter/md/md2html_exporter.py @@ -1,3 +1,4 @@ +import re # <--- 步骤 1: 导入 re 模块 from dataclasses import dataclass import jinja2 import markdown @@ -6,6 +7,7 @@ from docutranslate.ir.document import Document from docutranslate.ir.markdown_document import MarkdownDocument from docutranslate.utils.resource_utils import resource_path + @dataclass class MD2HTMLExporterConfig(MDExporterConfig): cdn: bool = True @@ -26,15 +28,14 @@ class MD2HTMLExporter(MDExporter): katex_js = f'' if not cdn else r"""""" auto_render = f'' if not cdn else r"""""" - # 修改 JavaScript 渲染配置,增加更多选项 + # 这是正确且推荐的 JS 配置,它与 pymdownx.arithmatex 配合工作 + # 它只寻找 arithmatex 生成的 \(...\) 和 \[...\] render_math_in_element = r""" ' - # 修改扩展配置 + # 扩展配置保持不变,我们仍然使用 arithmatex extensions = [ 'markdown.extensions.tables', 'pymdownx.arithmatex', @@ -80,9 +81,16 @@ class MD2HTMLExporter(MDExporter): } } - # 预处理 markdown 内容,确保数学公式周围有正确的空行 content = document.content.decode() + # ================================================================= + # 步骤 2: 预处理 markdown 内容,确保数学公式块周围有正确的空行 + # 正则表达式 r'(\$\$[\s\S]*?\$\$)' 匹配一个完整的 $$...$$ 块。 + # [\s\S]*? 匹配包括换行符在内的任何字符,并且是非贪婪模式。 + # re.sub 将找到的每个匹配项替换为 `\n\n<匹配项>\n\n`,从而强制添加空行。 + content = re.sub(r'(\$\$[\s\S]*?\$\$)', r'\n\n\1\n\n', content) + # ================================================================= + html_content = markdown.markdown( content, extensions=extensions, @@ -100,14 +108,11 @@ class MD2HTMLExporter(MDExporter): mermaid=mermaid, ) return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem) - - if __name__ == '__main__': from pathlib import Path - d = Document.from_path(r"C:\Users\jxgm\Desktop\mcp文件夹\学习笔记\互联网认证授权机制\互联网认证授权机制.md" -) - # d = Document.from_path(r"C:\Users\jxgm\Desktop\matrixcookbook_translated.md") + # d = Document.from_path(r"C:\Users\jxgm\Desktop\mcp文件夹\学习笔记\互联网认证授权机制\互联网认证授权机制.md") + d = Document.from_path(r"C:\Users\jxgm\Desktop\matrixcookbook_translated.md") exporter = MD2HTMLExporter() d1 = exporter.export(d)