重构workflow
This commit is contained in:
0
docutranslate/exporter/md/__init__.py
Normal file
0
docutranslate/exporter/md/__init__.py
Normal file
18
docutranslate/exporter/md/base.py
Normal file
18
docutranslate/exporter/md/base.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from docutranslate.exporter.base import Exporter, ExporterConfig
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class MDExporterConfig(ExporterConfig):
|
||||
...
|
||||
|
||||
|
||||
class MDExporter(Exporter):
|
||||
def __init__(self, config: MDExporterConfig|None=None):
|
||||
super().__init__(config=config)
|
||||
|
||||
def export(self, document: MarkdownDocument) -> Document:
|
||||
...
|
||||
73
docutranslate/exporter/md/md2html_exporter.py
Normal file
73
docutranslate/exporter/md/md2html_exporter.py
Normal file
@@ -0,0 +1,73 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
import jinja2
|
||||
import markdown2
|
||||
|
||||
from docutranslate.exporter.md.base import MDExporter, MDExporterConfig
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
from docutranslate.utils.resource_utils import resource_path
|
||||
|
||||
@dataclass
|
||||
class MD2HTMLExporterConfig(MDExporterConfig):
|
||||
cdn: bool = True
|
||||
|
||||
class MD2HTMLExporter(MDExporter):
|
||||
def __init__(self, config: MD2HTMLExporterConfig = None):
|
||||
config = config or MD2HTMLExporterConfig()
|
||||
super().__init__(config=config)
|
||||
self.cdn=config.cdn
|
||||
|
||||
def export(self, document: MarkdownDocument) -> Document:
|
||||
cdn = self.cdn
|
||||
markdowner = markdown2.Markdown(extras=['tables', 'fenced-code-blocks', 'mermaid', "code-friendly"])
|
||||
# language=html
|
||||
pico = f'<style>{resource_path("static/pico.css").read_text(encoding="utf-8")}</style>' if not cdn else r'<link rel="stylesheet" href="https://s4.zstatic.net/ajax/libs/picocss/2.1.1/pico.min.css" integrity="sha512-+4kjFgVD0n6H3xt19Ox84B56MoS7srFn60tgdWFuO4hemtjhySKyW4LnftYZn46k3THUEiTTsbVjrHai+0MOFw==" crossorigin="anonymous" referrerpolicy="no-referrer" />'
|
||||
html_template = resource_path("template/markdown.html").read_text(encoding="utf-8")
|
||||
katex_css = f'<style>{resource_path("static/katex.css").read_text(encoding="utf-8")}</style>' if not cdn else r"""<link rel="stylesheet" href="https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/katex.min.css" integrity="sha512-fHwaWebuwA7NSF5Qg/af4UeDx9XqUpYpOGgubo3yWu+b2IQR4UeQwbb42Ti7gVAjNtVoI/I9TEoYeu9omwcC6g==" crossorigin="anonymous" referrerpolicy="no-referrer" />"""
|
||||
katex_js = f'<script>{resource_path("static/katex.js").read_text(encoding="utf-8")}</script>' if not cdn else r"""<script defer src="https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/katex.min.js" integrity="sha512-LQNxIMR5rXv7o+b1l8+N1EZMfhG7iFZ9HhnbJkTp4zjNr5Wvst75AqUeFDxeRUa7l5vEDyUiAip//r+EFLLCyA==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>"""
|
||||
auto_render = f'<script>{resource_path("static/autoRender.js").read_text(encoding="utf-8")}</script>' if not cdn else r"""<script defer src="https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/contrib/auto-render.min.js" integrity="sha512-iWiuBS5nt6r60fCz26Nd0Zqe0nbk1ZTIQbl3Kv7kYsX+yKMUFHzjaH2+AnM6vp2Xs+gNmaBAVWJjSmuPw76Efg==" crossorigin="anonymous" referrerpolicy="no-referrer"></script>"""
|
||||
# language=javascript
|
||||
render_math_in_element = r"""
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", function () {
|
||||
renderMathInElement(document.body, {
|
||||
delimiters: [
|
||||
{left: '$$', right: '$$', display: true},
|
||||
{left: '\\[', right: '\\]', display: true},
|
||||
{left: '$', right: '$', display: false},
|
||||
{left: '\\(', right: '\\)', display: false}
|
||||
],
|
||||
throwOnError: false
|
||||
})
|
||||
});
|
||||
</script>""" if cdn else r"""
|
||||
<script>
|
||||
document.addEventListener("DOMContentLoaded", function
|
||||
() {
|
||||
renderMathInElement(document.body, {
|
||||
delimiters: [
|
||||
{left: '$$', right: '$$', display: true},
|
||||
{left: '\\[', right: '\\]', display: true},
|
||||
{left: '$', right: '$', display: false},
|
||||
{left: '\\(', right: '\\)', display: false}
|
||||
],
|
||||
fonts: false,
|
||||
throwOnError: false
|
||||
})
|
||||
});
|
||||
</script>"""
|
||||
mermaid = f'<script>{resource_path("static/mermaid.js").read_text(encoding="utf-8")}</script>'
|
||||
content = markdowner.convert(document.content.decode().replace("\\", "\\\\"))
|
||||
# TODO:实现MathJax本地化
|
||||
render = jinja2.Template(html_template).render(
|
||||
title=document.stem,
|
||||
pico=pico,
|
||||
katexCss=katex_css,
|
||||
katexJs=katex_js,
|
||||
autoRender=auto_render,
|
||||
markdown=content,
|
||||
renderMathInElement=render_math_in_element,
|
||||
mermaid=mermaid,
|
||||
)
|
||||
return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem)
|
||||
8
docutranslate/exporter/md/md2md_exporter.py
Normal file
8
docutranslate/exporter/md/md2md_exporter.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from docutranslate.exporter.md.base import MDExporter
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument, Document
|
||||
|
||||
|
||||
class MD2MDExporter(MDExporter):
|
||||
|
||||
def export(self, document: MarkdownDocument) -> Document:
|
||||
return Document.from_bytes(suffix=".md", content=document.content, stem=document.stem)
|
||||
11
docutranslate/exporter/md/md2mdzip_exporter.py
Normal file
11
docutranslate/exporter/md/md2mdzip_exporter.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from docutranslate.exporter.md.base import MDExporter
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument, Document
|
||||
from docutranslate.utils.markdown_utils import unembed_base64_images_to_zip
|
||||
|
||||
|
||||
class MD2MDZipExporter(MDExporter):
|
||||
|
||||
def export(self, document: MarkdownDocument) -> Document:
|
||||
return Document.from_bytes(suffix=".zip", content=unembed_base64_images_to_zip(document.content.decode(),
|
||||
markdown_name=document.name),
|
||||
stem=document.stem)
|
||||
3
docutranslate/exporter/md/types.py
Normal file
3
docutranslate/exporter/md/types.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from typing import Literal
|
||||
|
||||
ConvertEnginType = Literal["mineru", "docling"]
|
||||
Reference in New Issue
Block a user