新的markdown转html实现
This commit is contained in:
@@ -1,7 +1,7 @@
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
|
||||||
import jinja2
|
import jinja2
|
||||||
import markdown2
|
import markdown
|
||||||
|
|
||||||
from docutranslate.exporter.md.base import MDExporter, MDExporterConfig
|
from docutranslate.exporter.md.base import MDExporter, MDExporterConfig
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
@@ -20,7 +20,6 @@ class MD2HTMLExporter(MDExporter):
|
|||||||
|
|
||||||
def export(self, document: MarkdownDocument) -> Document:
|
def export(self, document: MarkdownDocument) -> Document:
|
||||||
cdn = self.cdn
|
cdn = self.cdn
|
||||||
markdowner = markdown2.Markdown(extras=['tables', 'fenced-code-blocks', 'mermaid', "code-friendly"])
|
|
||||||
# language=html
|
# language=html
|
||||||
pico = f'<style>{resource_path("static/pico.css").read_text(encoding="utf-8")}</style>' if not cdn else r'<link rel="stylesheet" href="https://s4.zstatic.net/ajax/libs/picocss/2.1.1/pico.min.css" integrity="sha512-+4kjFgVD0n6H3xt19Ox84B56MoS7srFn60tgdWFuO4hemtjhySKyW4LnftYZn46k3THUEiTTsbVjrHai+0MOFw==" crossorigin="anonymous" referrerpolicy="no-referrer" />'
|
pico = f'<style>{resource_path("static/pico.css").read_text(encoding="utf-8")}</style>' if not cdn else r'<link rel="stylesheet" href="https://s4.zstatic.net/ajax/libs/picocss/2.1.1/pico.min.css" integrity="sha512-+4kjFgVD0n6H3xt19Ox84B56MoS7srFn60tgdWFuO4hemtjhySKyW4LnftYZn46k3THUEiTTsbVjrHai+0MOFw==" crossorigin="anonymous" referrerpolicy="no-referrer" />'
|
||||||
html_template = resource_path("template/markdown.html").read_text(encoding="utf-8")
|
html_template = resource_path("template/markdown.html").read_text(encoding="utf-8")
|
||||||
@@ -58,16 +57,57 @@ class MD2HTMLExporter(MDExporter):
|
|||||||
});
|
});
|
||||||
</script>"""
|
</script>"""
|
||||||
mermaid = f'<script>{resource_path("static/mermaid.js").read_text(encoding="utf-8")}</script>'
|
mermaid = f'<script>{resource_path("static/mermaid.js").read_text(encoding="utf-8")}</script>'
|
||||||
content = markdowner.convert(document.content.decode().replace("\\", "\\\\"))
|
|
||||||
# TODO:实现MathJax本地化
|
# 使用 python-markdown 和 pymdown-extensions
|
||||||
|
# Arithmatex 扩展专门用于处理 KaTeX/MathJax 公式
|
||||||
|
# 它能智能识别 $...$, $$...$$, \(...\), \[...\] 等,并保护它们不受干扰
|
||||||
|
extensions = [
|
||||||
|
'markdown.extensions.tables',
|
||||||
|
'pymdownx.arithmatex',
|
||||||
|
'pymdownx.superfences' # 使用 superfences
|
||||||
|
]
|
||||||
|
|
||||||
|
extension_configs = {
|
||||||
|
'pymdownx.arithmatex': {
|
||||||
|
'generic': True
|
||||||
|
},
|
||||||
|
'pymdownx.superfences': {
|
||||||
|
'custom_fences': [
|
||||||
|
{
|
||||||
|
'name': 'mermaid',
|
||||||
|
'class': 'mermaid',
|
||||||
|
# 这个 format 函数确保输出的 HTML 结构是 Mermaid.js 期望的
|
||||||
|
'format': lambda source, language, css_class, options, md,
|
||||||
|
**kwargs: f'<pre class="{css_class}">{source}</pre>'
|
||||||
|
}
|
||||||
|
]
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
# 注意:这里不再需要 .replace("\\", "\\\\")
|
||||||
|
html_content = markdown.markdown(
|
||||||
|
document.content.decode(),
|
||||||
|
extensions=extensions,
|
||||||
|
extension_configs=extension_configs
|
||||||
|
)
|
||||||
|
|
||||||
render = jinja2.Template(html_template).render(
|
render = jinja2.Template(html_template).render(
|
||||||
title=document.stem,
|
title=document.stem,
|
||||||
pico=pico,
|
pico=pico,
|
||||||
katexCss=katex_css,
|
katexCss=katex_css,
|
||||||
katexJs=katex_js,
|
katexJs=katex_js,
|
||||||
autoRender=auto_render,
|
autoRender=auto_render,
|
||||||
markdown=content,
|
markdown=html_content, # 使用新的 html_content
|
||||||
renderMathInElement=render_math_in_element,
|
renderMathInElement=render_math_in_element,
|
||||||
mermaid=mermaid,
|
mermaid=mermaid,
|
||||||
)
|
)
|
||||||
return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem)
|
return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem)
|
||||||
|
|
||||||
|
if __name__ == '__main__':
|
||||||
|
from pathlib import Path
|
||||||
|
# d=Document.from_path(r"C:\Users\jxgm\Desktop\A_Survey_on_Decentralized_Identifiers_and_Verifiable_Credentials_translated.md")
|
||||||
|
d=Document.from_path(r"C:\Users\jxgm\Desktop\mcp文件夹\学习笔记\互联网认证授权机制\互联网认证授权机制.md")
|
||||||
|
exporter=MD2HTMLExporter()
|
||||||
|
d1=exporter.export(d)
|
||||||
|
path=Path(r"C:\Users\jxgm\Desktop\a.html")
|
||||||
|
path.write_bytes(d1.content)
|
||||||
@@ -31,7 +31,8 @@
|
|||||||
mutation.target.style.visibility = "visible";
|
mutation.target.style.visibility = "visible";
|
||||||
}
|
}
|
||||||
});
|
});
|
||||||
document.querySelectorAll("pre.mermaid-pre div.mermaid").forEach(item => {
|
// ***** 修改这里的选择器 *****
|
||||||
|
document.querySelectorAll("pre.mermaid").forEach(item => {
|
||||||
observer.observe(item, {
|
observer.observe(item, {
|
||||||
attributes: true,
|
attributes: true,
|
||||||
attributeFilter: ['data-processed']
|
attributeFilter: ['data-processed']
|
||||||
|
|||||||
@@ -5,7 +5,6 @@ readme = "README.md"
|
|||||||
requires-python = ">=3.11"
|
requires-python = ">=3.11"
|
||||||
dependencies = [
|
dependencies = [
|
||||||
"httpx==0.27.2",
|
"httpx==0.27.2",
|
||||||
"markdown2>=2.5.3",
|
|
||||||
"fastapi[standard]>=0.115.12",
|
"fastapi[standard]>=0.115.12",
|
||||||
"jsonpath-ng>=1.7.0",
|
"jsonpath-ng>=1.7.0",
|
||||||
"openpyxl>=3.1.5",
|
"openpyxl>=3.1.5",
|
||||||
@@ -16,6 +15,8 @@ dependencies = [
|
|||||||
"lxml>=5.4.0",
|
"lxml>=5.4.0",
|
||||||
"python-docx>=1.2.0",
|
"python-docx>=1.2.0",
|
||||||
"beautifulsoup4>=4.13.4",
|
"beautifulsoup4>=4.13.4",
|
||||||
|
"markdown>=3.8.2",
|
||||||
|
"pymdown-extensions>=10.16.1",
|
||||||
]
|
]
|
||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
|
|
||||||
|
|||||||
37
uv.lock
generated
37
uv.lock
generated
@@ -322,8 +322,9 @@ dependencies = [
|
|||||||
{ name = "jsonpath-ng" },
|
{ name = "jsonpath-ng" },
|
||||||
{ name = "lxml" },
|
{ name = "lxml" },
|
||||||
{ name = "mammoth" },
|
{ name = "mammoth" },
|
||||||
{ name = "markdown2" },
|
{ name = "markdown" },
|
||||||
{ name = "openpyxl" },
|
{ name = "openpyxl" },
|
||||||
|
{ name = "pymdown-extensions" },
|
||||||
{ name = "python-docx" },
|
{ name = "python-docx" },
|
||||||
{ name = "srt" },
|
{ name = "srt" },
|
||||||
{ name = "xlsx2html" },
|
{ name = "xlsx2html" },
|
||||||
@@ -352,9 +353,10 @@ requires-dist = [
|
|||||||
{ name = "jsonpath-ng", specifier = ">=1.7.0" },
|
{ name = "jsonpath-ng", specifier = ">=1.7.0" },
|
||||||
{ name = "lxml", specifier = ">=5.4.0" },
|
{ name = "lxml", specifier = ">=5.4.0" },
|
||||||
{ name = "mammoth", specifier = ">=1.10.0" },
|
{ name = "mammoth", specifier = ">=1.10.0" },
|
||||||
{ name = "markdown2", specifier = ">=2.5.3" },
|
{ name = "markdown", specifier = ">=3.8.2" },
|
||||||
{ name = "opencv-python", marker = "extra == 'docling'", specifier = ">=4.11.0.86" },
|
{ name = "opencv-python", marker = "extra == 'docling'", specifier = ">=4.11.0.86" },
|
||||||
{ name = "openpyxl", specifier = ">=3.1.5" },
|
{ name = "openpyxl", specifier = ">=3.1.5" },
|
||||||
|
{ name = "pymdown-extensions", specifier = ">=10.16.1" },
|
||||||
{ name = "python-docx", specifier = ">=1.2.0" },
|
{ name = "python-docx", specifier = ">=1.2.0" },
|
||||||
{ name = "srt", specifier = ">=3.5.3" },
|
{ name = "srt", specifier = ">=3.5.3" },
|
||||||
{ name = "xlsx2html", specifier = ">=0.6.2" },
|
{ name = "xlsx2html", specifier = ">=0.6.2" },
|
||||||
@@ -809,6 +811,15 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/a6/67/36eeb3a8726df3b282ba99ec126323871cffdbcf3b7a1db64ca9bbe4abc1/mammoth-1.10.0-py2.py3-none-any.whl", hash = "sha256:a1c87d5b98ca30230394267f98614b58b14b50f8031dc33ac9a535c6ab04eb99", size = 53823 },
|
{ url = "https://files.pythonhosted.org/packages/a6/67/36eeb3a8726df3b282ba99ec126323871cffdbcf3b7a1db64ca9bbe4abc1/mammoth-1.10.0-py2.py3-none-any.whl", hash = "sha256:a1c87d5b98ca30230394267f98614b58b14b50f8031dc33ac9a535c6ab04eb99", size = 53823 },
|
||||||
]
|
]
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "markdown"
|
||||||
|
version = "3.8.2"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/d7/c2/4ab49206c17f75cb08d6311171f2d65798988db4360c4d1485bd0eedd67c/markdown-3.8.2.tar.gz", hash = "sha256:247b9a70dd12e27f67431ce62523e675b866d254f900c4fe75ce3dda62237c45", size = 362071 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/96/2b/34cc11786bc00d0f04d0f5fdc3a2b1ae0b6239eef72d3d345805f9ad92a1/markdown-3.8.2-py3-none-any.whl", hash = "sha256:5c83764dbd4e00bdd94d85a19b8d55ccca20fe35b2e678a1422b380324dd5f24", size = 106827 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "markdown-it-py"
|
name = "markdown-it-py"
|
||||||
version = "3.0.0"
|
version = "3.0.0"
|
||||||
@@ -821,15 +832,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 },
|
{ url = "https://files.pythonhosted.org/packages/42/d7/1ec15b46af6af88f19b8e5ffea08fa375d433c998b8a7639e76935c14f1f/markdown_it_py-3.0.0-py3-none-any.whl", hash = "sha256:355216845c60bd96232cd8d8c40e8f9765cc86f46880e43a8fd22dc1a1a8cab1", size = 87528 },
|
||||||
]
|
]
|
||||||
|
|
||||||
[[package]]
|
|
||||||
name = "markdown2"
|
|
||||||
version = "2.5.4"
|
|
||||||
source = { registry = "https://pypi.org/simple" }
|
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/42/f8/b2ae8bf5f28f9b510ae097415e6e4cb63226bb28d7ee01aec03a755ba03b/markdown2-2.5.4.tar.gz", hash = "sha256:a09873f0b3c23dbfae589b0080587df52ad75bb09a5fa6559147554736676889", size = 145652 }
|
|
||||||
wheels = [
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/b8/06/2697b5043c3ecb720ce0d243fc7cf5024c0b5b1e450506e9b21939019963/markdown2-2.5.4-py3-none-any.whl", hash = "sha256:3c4b2934e677be7fec0e6f2de4410e116681f4ad50ec8e5ba7557be506d3f439", size = 49954 },
|
|
||||||
]
|
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "marko"
|
name = "marko"
|
||||||
version = "2.1.4"
|
version = "2.1.4"
|
||||||
@@ -1561,6 +1563,19 @@ version = "2.10"
|
|||||||
source = { registry = "https://pypi.org/simple" }
|
source = { registry = "https://pypi.org/simple" }
|
||||||
sdist = { url = "https://files.pythonhosted.org/packages/5d/ab/34ec41718af73c00119d0351b7a2531d2ebddb51833a36448fc7b862be60/pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3", size = 162597 }
|
sdist = { url = "https://files.pythonhosted.org/packages/5d/ab/34ec41718af73c00119d0351b7a2531d2ebddb51833a36448fc7b862be60/pylatexenc-2.10.tar.gz", hash = "sha256:3dd8fd84eb46dc30bee1e23eaab8d8fb5a7f507347b23e5f38ad9675c84f40d3", size = 162597 }
|
||||||
|
|
||||||
|
[[package]]
|
||||||
|
name = "pymdown-extensions"
|
||||||
|
version = "10.16.1"
|
||||||
|
source = { registry = "https://pypi.org/simple" }
|
||||||
|
dependencies = [
|
||||||
|
{ name = "markdown" },
|
||||||
|
{ name = "pyyaml" },
|
||||||
|
]
|
||||||
|
sdist = { url = "https://files.pythonhosted.org/packages/55/b3/6d2b3f149bc5413b0a29761c2c5832d8ce904a1d7f621e86616d96f505cc/pymdown_extensions-10.16.1.tar.gz", hash = "sha256:aace82bcccba3efc03e25d584e6a22d27a8e17caa3f4dd9f207e49b787aa9a91", size = 853277 }
|
||||||
|
wheels = [
|
||||||
|
{ url = "https://files.pythonhosted.org/packages/e4/06/43084e6cbd4b3bc0e80f6be743b2e79fbc6eed8de9ad8c629939fa55d972/pymdown_extensions-10.16.1-py3-none-any.whl", hash = "sha256:d6ba157a6c03146a7fb122b2b9a121300056384eafeec9c9f9e584adfdb2a32d", size = 266178 },
|
||||||
|
]
|
||||||
|
|
||||||
[[package]]
|
[[package]]
|
||||||
name = "pypdfium2"
|
name = "pypdfium2"
|
||||||
version = "4.30.0"
|
version = "4.30.0"
|
||||||
|
|||||||
Reference in New Issue
Block a user