diff --git a/docutranslate/exporter/md/md2html_exporter.py b/docutranslate/exporter/md/md2html_exporter.py index af42ddb..76770c3 100644 --- a/docutranslate/exporter/md/md2html_exporter.py +++ b/docutranslate/exporter/md/md2html_exporter.py @@ -1,22 +1,22 @@ from dataclasses import dataclass - import jinja2 import markdown - from docutranslate.exporter.md.base import MDExporter, MDExporterConfig from docutranslate.ir.document import Document from docutranslate.ir.markdown_document import MarkdownDocument from docutranslate.utils.resource_utils import resource_path + @dataclass class MD2HTMLExporterConfig(MDExporterConfig): cdn: bool = True + class MD2HTMLExporter(MDExporter): def __init__(self, config: MD2HTMLExporterConfig = None): config = config or MD2HTMLExporterConfig() super().__init__(config=config) - self.cdn=config.cdn + self.cdn = config.cdn def export(self, document: MarkdownDocument) -> Document: cdn = self.cdn @@ -26,41 +26,32 @@ class MD2HTMLExporter(MDExporter): katex_css = f'' if not cdn else r"""""" katex_js = f'' if not cdn else r"""""" auto_render = f'' if not cdn else r"""""" - # language=javascript + + # 修改 JavaScript 渲染配置,增加更多选项 render_math_in_element = r""" - """ if cdn else r""" - """ + """ + mermaid = f'' - # 使用 python-markdown 和 pymdown-extensions - # Arithmatex 扩展专门用于处理 KaTeX/MathJax 公式 - # 它能智能识别 $...$, $$...$$, \(...\), \[...\] 等,并保护它们不受干扰 + # 修改扩展配置 extensions = [ 'markdown.extensions.tables', 'pymdownx.arithmatex', @@ -69,14 +60,20 @@ class MD2HTMLExporter(MDExporter): extension_configs = { 'pymdownx.arithmatex': { - 'generic': True + 'generic': True, + 'block_tag': 'div', + 'inline_tag': 'span', + 'block_syntax': ['dollar', 'square'], + 'inline_syntax': ['dollar', 'round'], + 'tex_inline_wrap': ['\\(', '\\)'], + 'tex_block_wrap': ['\\[', '\\]'], + 'smart_dollar': True }, 'pymdownx.superfences': { 'custom_fences': [ { 'name': 'mermaid', 'class': 'mermaid', - # 这个 format 函数确保输出的 HTML 结构是 Mermaid.js 期望的 'format': lambda source, language, css_class, options, md, **kwargs: f'
{source}'
}
@@ -84,9 +81,21 @@ class MD2HTMLExporter(MDExporter):
}
}
- # 注意:这里不再需要 .replace("\\", "\\\\")
+ # 预处理 markdown 内容,确保数学公式周围有正确的空行
+ content = document.content.decode()
+
+ # 处理 $$ 块公式,确保前后有空行
+ import re
+ # 匹配 $$ 块公式
+ def fix_block_math(match):
+ formula = match.group(1)
+ return f'\n\n$$\n{formula}\n$$\n\n'
+
+ # 使用正则表达式修复块公式格式
+ content = re.sub(r'\$\$\s*\n?(.*?)\n?\s*\$\$', fix_block_math, content, flags=re.DOTALL)
+
html_content = markdown.markdown(
- document.content.decode(),
+ content,
extensions=extensions,
extension_configs=extension_configs
)
@@ -97,17 +106,18 @@ class MD2HTMLExporter(MDExporter):
katexCss=katex_css,
katexJs=katex_js,
autoRender=auto_render,
- markdown=html_content, # 使用新的 html_content
+ markdown=html_content,
renderMathInElement=render_math_in_element,
mermaid=mermaid,
)
return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem)
+
if __name__ == '__main__':
from pathlib import Path
- # d=Document.from_path(r"C:\Users\jxgm\Desktop\A_Survey_on_Decentralized_Identifiers_and_Verifiable_Credentials_translated.md")
- d=Document.from_path(r"C:\Users\jxgm\Desktop\mcp文件夹\学习笔记\互联网认证授权机制\互联网认证授权机制.md")
- exporter=MD2HTMLExporter()
- d1=exporter.export(d)
- path=Path(r"C:\Users\jxgm\Desktop\a.html")
+
+ d = Document.from_path(r"C:\Users\jxgm\Desktop\a2f9907d-6d49-4e87-9075-126218336b1e_origin_translated.md")
+ exporter = MD2HTMLExporter()
+ d1 = exporter.export(d)
+ path = Path(r"C:\Users\jxgm\Desktop\a.html")
path.write_bytes(d1.content)
\ No newline at end of file