diff --git a/docutranslate/exporter/md/md2html_exporter.py b/docutranslate/exporter/md/md2html_exporter.py index af42ddb..76770c3 100644 --- a/docutranslate/exporter/md/md2html_exporter.py +++ b/docutranslate/exporter/md/md2html_exporter.py @@ -1,22 +1,22 @@ from dataclasses import dataclass - import jinja2 import markdown - from docutranslate.exporter.md.base import MDExporter, MDExporterConfig from docutranslate.ir.document import Document from docutranslate.ir.markdown_document import MarkdownDocument from docutranslate.utils.resource_utils import resource_path + @dataclass class MD2HTMLExporterConfig(MDExporterConfig): cdn: bool = True + class MD2HTMLExporter(MDExporter): def __init__(self, config: MD2HTMLExporterConfig = None): config = config or MD2HTMLExporterConfig() super().__init__(config=config) - self.cdn=config.cdn + self.cdn = config.cdn def export(self, document: MarkdownDocument) -> Document: cdn = self.cdn @@ -26,41 +26,32 @@ class MD2HTMLExporter(MDExporter): katex_css = f'' if not cdn else r"""""" katex_js = f'' if not cdn else r"""""" auto_render = f'' if not cdn else r"""""" - # language=javascript + + # 修改 JavaScript 渲染配置,增加更多选项 render_math_in_element = r""" - """ if cdn else r""" - """ + """ + mermaid = f'' - # 使用 python-markdown 和 pymdown-extensions - # Arithmatex 扩展专门用于处理 KaTeX/MathJax 公式 - # 它能智能识别 $...$, $$...$$, \(...\), \[...\] 等,并保护它们不受干扰 + # 修改扩展配置 extensions = [ 'markdown.extensions.tables', 'pymdownx.arithmatex', @@ -69,14 +60,20 @@ class MD2HTMLExporter(MDExporter): extension_configs = { 'pymdownx.arithmatex': { - 'generic': True + 'generic': True, + 'block_tag': 'div', + 'inline_tag': 'span', + 'block_syntax': ['dollar', 'square'], + 'inline_syntax': ['dollar', 'round'], + 'tex_inline_wrap': ['\\(', '\\)'], + 'tex_block_wrap': ['\\[', '\\]'], + 'smart_dollar': True }, 'pymdownx.superfences': { 'custom_fences': [ { 'name': 'mermaid', 'class': 'mermaid', - # 这个 format 函数确保输出的 HTML 结构是 Mermaid.js 期望的 'format': lambda source, language, css_class, options, md, **kwargs: f'
{source}
' } @@ -84,9 +81,21 @@ class MD2HTMLExporter(MDExporter): } } - # 注意:这里不再需要 .replace("\\", "\\\\") + # 预处理 markdown 内容,确保数学公式周围有正确的空行 + content = document.content.decode() + + # 处理 $$ 块公式,确保前后有空行 + import re + # 匹配 $$ 块公式 + def fix_block_math(match): + formula = match.group(1) + return f'\n\n$$\n{formula}\n$$\n\n' + + # 使用正则表达式修复块公式格式 + content = re.sub(r'\$\$\s*\n?(.*?)\n?\s*\$\$', fix_block_math, content, flags=re.DOTALL) + html_content = markdown.markdown( - document.content.decode(), + content, extensions=extensions, extension_configs=extension_configs ) @@ -97,17 +106,18 @@ class MD2HTMLExporter(MDExporter): katexCss=katex_css, katexJs=katex_js, autoRender=auto_render, - markdown=html_content, # 使用新的 html_content + markdown=html_content, renderMathInElement=render_math_in_element, mermaid=mermaid, ) return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem) + if __name__ == '__main__': from pathlib import Path - # d=Document.from_path(r"C:\Users\jxgm\Desktop\A_Survey_on_Decentralized_Identifiers_and_Verifiable_Credentials_translated.md") - d=Document.from_path(r"C:\Users\jxgm\Desktop\mcp文件夹\学习笔记\互联网认证授权机制\互联网认证授权机制.md") - exporter=MD2HTMLExporter() - d1=exporter.export(d) - path=Path(r"C:\Users\jxgm\Desktop\a.html") + + d = Document.from_path(r"C:\Users\jxgm\Desktop\a2f9907d-6d49-4e87-9075-126218336b1e_origin_translated.md") + exporter = MD2HTMLExporter() + d1 = exporter.export(d) + path = Path(r"C:\Users\jxgm\Desktop\a.html") path.write_bytes(d1.content) \ No newline at end of file