新的markdown转html实现

This commit is contained in:
xunbu
2025-08-20 17:49:20 +08:00
parent 48c519657e
commit c61055de43
4 changed files with 75 additions and 18 deletions

View File

@@ -1,7 +1,7 @@
from dataclasses import dataclass
import jinja2
import markdown2
import markdown
from docutranslate.exporter.md.base import MDExporter, MDExporterConfig
from docutranslate.ir.document import Document
@@ -20,7 +20,6 @@ class MD2HTMLExporter(MDExporter):
def export(self, document: MarkdownDocument) -> Document:
cdn = self.cdn
markdowner = markdown2.Markdown(extras=['tables', 'fenced-code-blocks', 'mermaid', "code-friendly"])
# language=html
pico = f'<style>{resource_path("static/pico.css").read_text(encoding="utf-8")}</style>' if not cdn else r'<link rel="stylesheet" href="https://s4.zstatic.net/ajax/libs/picocss/2.1.1/pico.min.css" integrity="sha512-+4kjFgVD0n6H3xt19Ox84B56MoS7srFn60tgdWFuO4hemtjhySKyW4LnftYZn46k3THUEiTTsbVjrHai+0MOFw==" crossorigin="anonymous" referrerpolicy="no-referrer" />'
html_template = resource_path("template/markdown.html").read_text(encoding="utf-8")
@@ -58,16 +57,57 @@ class MD2HTMLExporter(MDExporter):
});
</script>"""
mermaid = f'<script>{resource_path("static/mermaid.js").read_text(encoding="utf-8")}</script>'
content = markdowner.convert(document.content.decode().replace("\\", "\\\\"))
# TODO:实现MathJax本地化
# 使用 python-markdown 和 pymdown-extensions
# Arithmatex 扩展专门用于处理 KaTeX/MathJax 公式
# 它能智能识别 $...$, $$...$$, \(...\), \[...\] 等,并保护它们不受干扰
extensions = [
'markdown.extensions.tables',
'pymdownx.arithmatex',
'pymdownx.superfences' # 使用 superfences
]
extension_configs = {
'pymdownx.arithmatex': {
'generic': True
},
'pymdownx.superfences': {
'custom_fences': [
{
'name': 'mermaid',
'class': 'mermaid',
# 这个 format 函数确保输出的 HTML 结构是 Mermaid.js 期望的
'format': lambda source, language, css_class, options, md,
**kwargs: f'<pre class="{css_class}">{source}</pre>'
}
]
}
}
# 注意:这里不再需要 .replace("\\", "\\\\")
html_content = markdown.markdown(
document.content.decode(),
extensions=extensions,
extension_configs=extension_configs
)
render = jinja2.Template(html_template).render(
title=document.stem,
pico=pico,
katexCss=katex_css,
katexJs=katex_js,
autoRender=auto_render,
markdown=content,
markdown=html_content, # 使用新的 html_content
renderMathInElement=render_math_in_element,
mermaid=mermaid,
)
return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem)
if __name__ == '__main__':
from pathlib import Path
# d=Document.from_path(r"C:\Users\jxgm\Desktop\A_Survey_on_Decentralized_Identifiers_and_Verifiable_Credentials_translated.md")
d=Document.from_path(r"C:\Users\jxgm\Desktop\mcp文件夹\学习笔记\互联网认证授权机制\互联网认证授权机制.md")
exporter=MD2HTMLExporter()
d1=exporter.export(d)
path=Path(r"C:\Users\jxgm\Desktop\a.html")
path.write_bytes(d1.content)

View File

@@ -31,7 +31,8 @@
mutation.target.style.visibility = "visible";
}
});
document.querySelectorAll("pre.mermaid-pre div.mermaid").forEach(item => {
// ***** 修改这里的选择器 *****
document.querySelectorAll("pre.mermaid").forEach(item => {
observer.observe(item, {
attributes: true,
attributeFilter: ['data-processed']

View File

@@ -5,7 +5,6 @@ readme = "README.md"
requires-python = ">=3.11"
dependencies = [
"httpx==0.27.2",
"markdown2>=2.5.3",
"fastapi[standard]>=0.115.12",
"jsonpath-ng>=1.7.0",
"openpyxl>=3.1.5",
@@ -16,6 +15,8 @@ dependencies = [
"lxml>=5.4.0",
"python-docx>=1.2.0",
"beautifulsoup4>=4.13.4",
"markdown>=3.8.2",
"pymdown-extensions>=10.16.1",
]
dynamic = ["version"]

37
uv.lock generated
View File

@@ -322,8 +322,9 @@ dependencies = [
{ name = "jsonpath-ng" },
{ name = "lxml" },
{ name = "mammoth" },
{ name = "markdown2" },
{ name = "markdown" },
{ name = "openpyxl" },
{ name = "pymdown-extensions" },
{ name = "python-docx" },
{ name = "srt" },
{ name = "xlsx2html" },
@@ -352,9 +353,10 @@ requires-dist = [
{ name = "jsonpath-ng", specifier = ">=1.7.0" },
{ name = "lxml", specifier = ">=5.4.0" },
{ name = "mammoth", specifier = ">=1.10.0" },
{ name = "markdown2", specifier = ">=2.5.3" },
{ name = "markdown", specifier = ">=3.8.2" },
{ name = "opencv-python", marker = "extra == 'docling'", specifier = ">=4.11.0.86" },
{ name = "openpyxl", specifier = ">=3.1.5" },
{ name = "pymdown-extensions", specifier = ">=10.16.1" },
{ name = "python-docx", specifier = ">=1.2.0" },
{ name = "srt", specifier = ">=3.5.3" },
{ name = "xlsx2html", specifier = ">=0.6.2" },
@@ -809,6 +811,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/a6/67/36eeb3a8726df3b282ba99ec126323871cffdbcf3b7a1db64ca9bbe4abc1/mammoth-1.10.0-py2.py3-none-any.whl", hash = "sha256:a1c87d5b98ca30230394267f98614b58b14b50f8031dc33ac9a535c6ab04eb99", size = 53823 },
]
[[package]]
name = "markdown"
version = "3.8.2"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/d7/c2/4ab49206c17f75cb08d6311171f2d65798988db4360c4d1485bd0eedd67c/markdown-3.8.2.tar.gz", hash = "sha256:247b9a70dd12e27f67431ce62523e675b866d254f900c4fe75ce3dda62237c45", size = 362071 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/96/2b/34cc11786bc00d0f04d0f5fdc3a2b1ae0b6239eef72d3d345805f9ad92a1/markdown-3.8.2-py3-none-any.whl", hash = "sha256:5c83764dbd4e00bdd94d85a19b8d55ccca20fe35b2e678a1422b380324dd5f24", size = 106827 },
]
[[package]]
name = "markdown-it-py"
version = "3.0.0"
@@ -821,15 +832,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 },
]
[[package]]
name = "markdown2"
version = "2.5.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/42/f8/b2ae8bf5f28f9b510ae097415e6e4cb63226bb28d7ee01aec03a755ba03b/markdown2-2.5.4.tar.gz", hash = "sha256:a09873f0b3c23dbfae589b0080587df52ad75bb09a5fa6559147554736676889", size = 145652 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/b8/06/2697b5043c3ecb720ce0d243fc7cf5024c0b5b1e450506e9b21939019963/markdown2-2.5.4-py3-none-any.whl", hash = "sha256:3c4b2934e677be7fec0e6f2de4410e116681f4ad50ec8e5ba7557be506d3f439", size = 49954 },
]
[[package]]
name = "marko"
version = "2.1.4"
@@ -1561,6 +1563,19 @@ version = "2.10"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/5d/ab/34ec41718af73c00119d0351b7a2531d2ebddb51833a36448fc7b862be60/pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3", size = 162597 }
[[package]]
name = "pymdown-extensions"
version = "10.16.1"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "markdown" },
{ name = "pyyaml" },
]
sdist = { url = "https://files.pythonhosted.org/packages/55/b3/6d2b3f149bc5413b0a29761c2c5832d8ce904a1d7f621e86616d96f505cc/pymdown_extensions-10.16.1.tar.gz", hash = "sha256:aace82bcccba3efc03e25d584e6a22d27a8e17caa3f4dd9f207e49b787aa9a91", size = 853277 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/e4/06/43084e6cbd4b3bc0e80f6be743b2e79fbc6eed8de9ad8c629939fa55d972/pymdown_extensions-10.16.1-py3-none-any.whl", hash = "sha256:d6ba157a6c03146a7fb122b2b9a121300056384eafeec9c9f9e584adfdb2a32d", size = 266178 },
]
[[package]]
name = "pypdfium2"
version = "4.30.0"