From c61055de4374227f7c755501219eba337ee44f6a Mon Sep 17 00:00:00 2001 From: xunbu Date: Wed, 20 Aug 2025 17:49:20 +0800 Subject: [PATCH] =?UTF-8?q?=E6=96=B0=E7=9A=84markdown=E8=BD=AChtml?= =?UTF-8?q?=E5=AE=9E=E7=8E=B0?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/exporter/md/md2html_exporter.py | 50 +++++++++++++++++-- docutranslate/template/markdown.html | 3 +- pyproject.toml | 3 +- uv.lock | 37 ++++++++++---- 4 files changed, 75 insertions(+), 18 deletions(-) diff --git a/docutranslate/exporter/md/md2html_exporter.py b/docutranslate/exporter/md/md2html_exporter.py index 229cb66..f964e41 100644 --- a/docutranslate/exporter/md/md2html_exporter.py +++ b/docutranslate/exporter/md/md2html_exporter.py @@ -1,7 +1,7 @@ from dataclasses import dataclass import jinja2 -import markdown2 +import markdown from docutranslate.exporter.md.base import MDExporter, MDExporterConfig from docutranslate.ir.document import Document @@ -20,7 +20,6 @@ class MD2HTMLExporter(MDExporter): def export(self, document: MarkdownDocument) -> Document: cdn = self.cdn - markdowner = markdown2.Markdown(extras=['tables', 'fenced-code-blocks', 'mermaid', "code-friendly"]) # language=html pico = f'' if not cdn else r'' html_template = resource_path("template/markdown.html").read_text(encoding="utf-8") @@ -58,16 +57,57 @@ class MD2HTMLExporter(MDExporter): }); """ mermaid = f'' - content = markdowner.convert(document.content.decode().replace("\\", "\\\\")) - # TODO:实现MathJax本地化 + + # 使用 python-markdown 和 pymdown-extensions + # Arithmatex 扩展专门用于处理 KaTeX/MathJax 公式 + # 它能智能识别 $...$, $$...$$, \(...\), \[...\] 等,并保护它们不受干扰 + extensions = [ + 'markdown.extensions.tables', + 'pymdownx.arithmatex', + 'pymdownx.superfences' # 使用 superfences + ] + + extension_configs = { + 'pymdownx.arithmatex': { + 'generic': True + }, + 'pymdownx.superfences': { + 'custom_fences': [ + { + 'name': 'mermaid', + 'class': 'mermaid', + # 这个 format 函数确保输出的 HTML 结构是 Mermaid.js 期望的 + 'format': lambda source, language, css_class, options, md, + **kwargs: f'
{source}
' + } + ] + } + } + + # 注意:这里不再需要 .replace("\\", "\\\\") + html_content = markdown.markdown( + document.content.decode(), + extensions=extensions, + extension_configs=extension_configs + ) + render = jinja2.Template(html_template).render( title=document.stem, pico=pico, katexCss=katex_css, katexJs=katex_js, autoRender=auto_render, - markdown=content, + markdown=html_content, # 使用新的 html_content renderMathInElement=render_math_in_element, mermaid=mermaid, ) return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem) + +if __name__ == '__main__': + from pathlib import Path + # d=Document.from_path(r"C:\Users\jxgm\Desktop\A_Survey_on_Decentralized_Identifiers_and_Verifiable_Credentials_translated.md") + d=Document.from_path(r"C:\Users\jxgm\Desktop\mcp文件夹\学习笔记\互联网认证授权机制\互联网认证授权机制.md") + exporter=MD2HTMLExporter() + d1=exporter.export(d) + path=Path(r"C:\Users\jxgm\Desktop\a.html") + path.write_bytes(d1.content) \ No newline at end of file diff --git a/docutranslate/template/markdown.html b/docutranslate/template/markdown.html index 1f5037b..b2d2162 100644 --- a/docutranslate/template/markdown.html +++ b/docutranslate/template/markdown.html @@ -31,7 +31,8 @@ mutation.target.style.visibility = "visible"; } }); - document.querySelectorAll("pre.mermaid-pre div.mermaid").forEach(item => { + // ***** 修改这里的选择器 ***** + document.querySelectorAll("pre.mermaid").forEach(item => { observer.observe(item, { attributes: true, attributeFilter: ['data-processed'] diff --git a/pyproject.toml b/pyproject.toml index bd936e8..be2b610 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -5,7 +5,6 @@ readme = "README.md" requires-python = ">=3.11" dependencies = [ "httpx==0.27.2", - "markdown2>=2.5.3", "fastapi[standard]>=0.115.12", "jsonpath-ng>=1.7.0", "openpyxl>=3.1.5", @@ -16,6 +15,8 @@ dependencies = [ "lxml>=5.4.0", "python-docx>=1.2.0", "beautifulsoup4>=4.13.4", + "markdown>=3.8.2", + "pymdown-extensions>=10.16.1", ] dynamic = ["version"] diff --git a/uv.lock b/uv.lock index 970e75a..11ee74a 100644 --- a/uv.lock +++ b/uv.lock @@ -322,8 +322,9 @@ dependencies = [ { name = "jsonpath-ng" }, { name = "lxml" }, { name = "mammoth" }, - { name = "markdown2" }, + { name = "markdown" }, { name = "openpyxl" }, + { name = "pymdown-extensions" }, { name = "python-docx" }, { name = "srt" }, { name = "xlsx2html" }, @@ -352,9 +353,10 @@ requires-dist = [ { name = "jsonpath-ng", specifier = ">=1.7.0" }, { name = "lxml", specifier = ">=5.4.0" }, { name = "mammoth", specifier = ">=1.10.0" }, - { name = "markdown2", specifier = ">=2.5.3" }, + { name = "markdown", specifier = ">=3.8.2" }, { name = "opencv-python", marker = "extra == 'docling'", specifier = ">=4.11.0.86" }, { name = "openpyxl", specifier = ">=3.1.5" }, + { name = "pymdown-extensions", specifier = ">=10.16.1" }, { name = "python-docx", specifier = ">=1.2.0" }, { name = "srt", specifier = ">=3.5.3" }, { name = "xlsx2html", specifier = ">=0.6.2" }, @@ -809,6 +811,15 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/a6/67/36eeb3a8726df3b282ba99ec126323871cffdbcf3b7a1db64ca9bbe4abc1/mammoth-1.10.0-py2.py3-none-any.whl", hash = "sha256:a1c87d5b98ca30230394267f98614b58b14b50f8031dc33ac9a535c6ab04eb99", size = 53823 }, ] +[[package]] +name = "markdown" +version = "3.8.2" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/d7/c2/4ab49206c17f75cb08d6311171f2d65798988db4360c4d1485bd0eedd67c/markdown-3.8.2.tar.gz", hash = "sha256:247b9a70dd12e27f67431ce62523e675b866d254f900c4fe75ce3dda62237c45", size = 362071 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/96/2b/34cc11786bc00d0f04d0f5fdc3a2b1ae0b6239eef72d3d345805f9ad92a1/markdown-3.8.2-py3-none-any.whl", hash = "sha256:5c83764dbd4e00bdd94d85a19b8d55ccca20fe35b2e678a1422b380324dd5f24", size = 106827 }, +] + [[package]] name = "markdown-it-py" version = "3.0.0" @@ -821,15 +832,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 }, ] -[[package]] -name = "markdown2" -version = "2.5.4" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/42/f8/b2ae8bf5f28f9b510ae097415e6e4cb63226bb28d7ee01aec03a755ba03b/markdown2-2.5.4.tar.gz", hash = "sha256:a09873f0b3c23dbfae589b0080587df52ad75bb09a5fa6559147554736676889", size = 145652 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/b8/06/2697b5043c3ecb720ce0d243fc7cf5024c0b5b1e450506e9b21939019963/markdown2-2.5.4-py3-none-any.whl", hash = "sha256:3c4b2934e677be7fec0e6f2de4410e116681f4ad50ec8e5ba7557be506d3f439", size = 49954 }, -] - [[package]] name = "marko" version = "2.1.4" @@ -1561,6 +1563,19 @@ version = "2.10" source = { registry = "https://pypi.org/simple" } sdist = { url = "https://files.pythonhosted.org/packages/5d/ab/34ec41718af73c00119d0351b7a2531d2ebddb51833a36448fc7b862be60/pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3", size = 162597 } +[[package]] +name = "pymdown-extensions" +version = "10.16.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "markdown" }, + { name = "pyyaml" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/55/b3/6d2b3f149bc5413b0a29761c2c5832d8ce904a1d7f621e86616d96f505cc/pymdown_extensions-10.16.1.tar.gz", hash = "sha256:aace82bcccba3efc03e25d584e6a22d27a8e17caa3f4dd9f207e49b787aa9a91", size = 853277 } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e4/06/43084e6cbd4b3bc0e80f6be743b2e79fbc6eed8de9ad8c629939fa55d972/pymdown_extensions-10.16.1-py3-none-any.whl", hash = "sha256:d6ba157a6c03146a7fb122b2b9a121300056384eafeec9c9f9e584adfdb2a32d", size = 266178 }, +] + [[package]] name = "pypdfium2" version = "4.30.0"