diff --git a/docutranslate/exporter/md/md2html_exporter.py b/docutranslate/exporter/md/md2html_exporter.py index 229cb66..f964e41 100644 --- a/docutranslate/exporter/md/md2html_exporter.py +++ b/docutranslate/exporter/md/md2html_exporter.py @@ -1,7 +1,7 @@ from dataclasses import dataclass import jinja2 -import markdown2 +import markdown from docutranslate.exporter.md.base import MDExporter, MDExporterConfig from docutranslate.ir.document import Document @@ -20,7 +20,6 @@ class MD2HTMLExporter(MDExporter): def export(self, document: MarkdownDocument) -> Document: cdn = self.cdn - markdowner = markdown2.Markdown(extras=['tables', 'fenced-code-blocks', 'mermaid', "code-friendly"]) # language=html pico = f'' if not cdn else r'' html_template = resource_path("template/markdown.html").read_text(encoding="utf-8") @@ -58,16 +57,57 @@ class MD2HTMLExporter(MDExporter): }); """ mermaid = f'' - content = markdowner.convert(document.content.decode().replace("\\", "\\\\")) - # TODO:实现MathJax本地化 + + # 使用 python-markdown 和 pymdown-extensions + # Arithmatex 扩展专门用于处理 KaTeX/MathJax 公式 + # 它能智能识别 $...$, $$...$$, \(...\), \[...\] 等,并保护它们不受干扰 + extensions = [ + 'markdown.extensions.tables', + 'pymdownx.arithmatex', + 'pymdownx.superfences' # 使用 superfences + ] + + extension_configs = { + 'pymdownx.arithmatex': { + 'generic': True + }, + 'pymdownx.superfences': { + 'custom_fences': [ + { + 'name': 'mermaid', + 'class': 'mermaid', + # 这个 format 函数确保输出的 HTML 结构是 Mermaid.js 期望的 + 'format': lambda source, language, css_class, options, md, + **kwargs: f'
{source}'
+ }
+ ]
+ }
+ }
+
+ # 注意:这里不再需要 .replace("\\", "\\\\")
+ html_content = markdown.markdown(
+ document.content.decode(),
+ extensions=extensions,
+ extension_configs=extension_configs
+ )
+
render = jinja2.Template(html_template).render(
title=document.stem,
pico=pico,
katexCss=katex_css,
katexJs=katex_js,
autoRender=auto_render,
- markdown=content,
+ markdown=html_content, # 使用新的 html_content
renderMathInElement=render_math_in_element,
mermaid=mermaid,
)
return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem)
+
+if __name__ == '__main__':
+ from pathlib import Path
+ # d=Document.from_path(r"C:\Users\jxgm\Desktop\A_Survey_on_Decentralized_Identifiers_and_Verifiable_Credentials_translated.md")
+ d=Document.from_path(r"C:\Users\jxgm\Desktop\mcp文件夹\学习笔记\互联网认证授权机制\互联网认证授权机制.md")
+ exporter=MD2HTMLExporter()
+ d1=exporter.export(d)
+ path=Path(r"C:\Users\jxgm\Desktop\a.html")
+ path.write_bytes(d1.content)
\ No newline at end of file
diff --git a/docutranslate/template/markdown.html b/docutranslate/template/markdown.html
index 1f5037b..b2d2162 100644
--- a/docutranslate/template/markdown.html
+++ b/docutranslate/template/markdown.html
@@ -31,7 +31,8 @@
mutation.target.style.visibility = "visible";
}
});
- document.querySelectorAll("pre.mermaid-pre div.mermaid").forEach(item => {
+ // ***** 修改这里的选择器 *****
+ document.querySelectorAll("pre.mermaid").forEach(item => {
observer.observe(item, {
attributes: true,
attributeFilter: ['data-processed']
diff --git a/pyproject.toml b/pyproject.toml
index bd936e8..be2b610 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -5,7 +5,6 @@ readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"httpx==0.27.2",
- "markdown2>=2.5.3",
"fastapi[standard]>=0.115.12",
"jsonpath-ng>=1.7.0",
"openpyxl>=3.1.5",
@@ -16,6 +15,8 @@ dependencies = [
"lxml>=5.4.0",
"python-docx>=1.2.0",
"beautifulsoup4>=4.13.4",
+ "markdown>=3.8.2",
+ "pymdown-extensions>=10.16.1",
]
dynamic = ["version"]
diff --git a/uv.lock b/uv.lock
index 970e75a..11ee74a 100644
--- a/uv.lock
+++ b/uv.lock
@@ -322,8 +322,9 @@ dependencies = [
{ name = "jsonpath-ng" },
{ name = "lxml" },
{ name = "mammoth" },
- { name = "markdown2" },
+ { name = "markdown" },
{ name = "openpyxl" },
+ { name = "pymdown-extensions" },
{ name = "python-docx" },
{ name = "srt" },
{ name = "xlsx2html" },
@@ -352,9 +353,10 @@ requires-dist = [
{ name = "jsonpath-ng", specifier = ">=1.7.0" },
{ name = "lxml", specifier = ">=5.4.0" },
{ name = "mammoth", specifier = ">=1.10.0" },
- { name = "markdown2", specifier = ">=2.5.3" },
+ { name = "markdown", specifier = ">=3.8.2" },
{ name = "opencv-python", marker = "extra == 'docling'", specifier = ">=4.11.0.86" },
{ name = "openpyxl", specifier = ">=3.1.5" },
+ { name = "pymdown-extensions", specifier = ">=10.16.1" },
{ name = "python-docx", specifier = ">=1.2.0" },
{ name = "srt", specifier = ">=3.5.3" },
{ name = "xlsx2html", specifier = ">=0.6.2" },
@@ -809,6 +811,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a6/67/36eeb3a8726df3b282ba99ec126323871cffdbcf3b7a1db64ca9bbe4abc1/mammoth-1.10.0-py2.py3-none-any.whl", hash = "sha256:a1c87d5b98ca30230394267f98614b58b14b50f8031dc33ac9a535c6ab04eb99", size = 53823 },
]
+[[package]]
+name = "markdown"
+version = "3.8.2"
+source = { registry = "https://pypi.org/simple" }
+sdist = { url = "https://files.pythonhosted.org/packages/d7/c2/4ab49206c17f75cb08d6311171f2d65798988db4360c4d1485bd0eedd67c/markdown-3.8.2.tar.gz", hash = "sha256:247b9a70dd12e27f67431ce62523e675b866d254f900c4fe75ce3dda62237c45", size = 362071 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/96/2b/34cc11786bc00d0f04d0f5fdc3a2b1ae0b6239eef72d3d345805f9ad92a1/markdown-3.8.2-py3-none-any.whl", hash = "sha256:5c83764dbd4e00bdd94d85a19b8d55ccca20fe35b2e678a1422b380324dd5f24", size = 106827 },
+]
+
[[package]]
name = "markdown-it-py"
version = "3.0.0"
@@ -821,15 +832,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 },
]
-[[package]]
-name = "markdown2"
-version = "2.5.4"
-source = { registry = "https://pypi.org/simple" }
-sdist = { url = "https://files.pythonhosted.org/packages/42/f8/b2ae8bf5f28f9b510ae097415e6e4cb63226bb28d7ee01aec03a755ba03b/markdown2-2.5.4.tar.gz", hash = "sha256:a09873f0b3c23dbfae589b0080587df52ad75bb09a5fa6559147554736676889", size = 145652 }
-wheels = [
- { url = "https://files.pythonhosted.org/packages/b8/06/2697b5043c3ecb720ce0d243fc7cf5024c0b5b1e450506e9b21939019963/markdown2-2.5.4-py3-none-any.whl", hash = "sha256:3c4b2934e677be7fec0e6f2de4410e116681f4ad50ec8e5ba7557be506d3f439", size = 49954 },
-]
-
[[package]]
name = "marko"
version = "2.1.4"
@@ -1561,6 +1563,19 @@ version = "2.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/5d/ab/34ec41718af73c00119d0351b7a2531d2ebddb51833a36448fc7b862be60/pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3", size = 162597 }
+[[package]]
+name = "pymdown-extensions"
+version = "10.16.1"
+source = { registry = "https://pypi.org/simple" }
+dependencies = [
+ { name = "markdown" },
+ { name = "pyyaml" },
+]
+sdist = { url = "https://files.pythonhosted.org/packages/55/b3/6d2b3f149bc5413b0a29761c2c5832d8ce904a1d7f621e86616d96f505cc/pymdown_extensions-10.16.1.tar.gz", hash = "sha256:aace82bcccba3efc03e25d584e6a22d27a8e17caa3f4dd9f207e49b787aa9a91", size = 853277 }
+wheels = [
+ { url = "https://files.pythonhosted.org/packages/e4/06/43084e6cbd4b3bc0e80f6be743b2e79fbc6eed8de9ad8c629939fa55d972/pymdown_extensions-10.16.1-py3-none-any.whl", hash = "sha256:d6ba157a6c03146a7fb122b2b9a121300056384eafeec9c9f9e584adfdb2a32d", size = 266178 },
+]
+
[[package]]
name = "pypdfium2"
version = "4.30.0"