From b41daddd4f77af649df94348e004cc16583ce9c9 Mon Sep 17 00:00:00 2001 From: xunbu Date: Sun, 3 Aug 2025 18:08:06 +0800 Subject: [PATCH] =?UTF-8?q?=E5=90=8E=E7=AB=AF=E5=AE=8C=E6=88=90json?= =?UTF-8?q?=E7=BF=BB=E8=AF=91=E5=B7=A5=E4=BD=9C=E6=B5=81=EF=BC=88=E5=BE=85?= =?UTF-8?q?=E6=A3=80=E6=9F=A5=EF=BC=89?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/exporter/js/__init__.py | 0 docutranslate/exporter/js/base.py | 8 +++ .../exporter/js/json2html_exporter.py | 37 +++++++++++ .../exporter/js/json2json_exporter.py | 7 ++ docutranslate/template/json.html | 62 ++++++++++++++++++ .../ai_translator/json_translator.py | 36 +--------- docutranslate/workflow/interfaces.py | 8 +++ docutranslate/workflow/json_workflow.py | 65 +++++++++++++++++++ 8 files changed, 189 insertions(+), 34 deletions(-) create mode 100644 docutranslate/exporter/js/__init__.py create mode 100644 docutranslate/exporter/js/base.py create mode 100644 docutranslate/exporter/js/json2html_exporter.py create mode 100644 docutranslate/exporter/js/json2json_exporter.py create mode 100644 docutranslate/template/json.html create mode 100644 docutranslate/workflow/json_workflow.py diff --git a/docutranslate/exporter/js/__init__.py b/docutranslate/exporter/js/__init__.py new file mode 100644 index 0000000..e69de29 diff --git a/docutranslate/exporter/js/base.py b/docutranslate/exporter/js/base.py new file mode 100644 index 0000000..c716ede --- /dev/null +++ b/docutranslate/exporter/js/base.py @@ -0,0 +1,8 @@ +from docutranslate.exporter.base import Exporter +from docutranslate.ir.document import Document + +#TODO:看情况是否需要为json单独写一个document类型 +class JsonExporter(Exporter[Document]): + + def export(self,document:Document)->Document: + ... \ No newline at end of file diff --git a/docutranslate/exporter/js/json2html_exporter.py b/docutranslate/exporter/js/json2html_exporter.py new file mode 100644 index 0000000..45c5d50 --- /dev/null +++ b/docutranslate/exporter/js/json2html_exporter.py @@ -0,0 +1,37 @@ +import json +from dataclasses import dataclass + +import jinja2 + +from docutranslate.exporter.base import ExporterConfig +from docutranslate.exporter.js.base import JsonExporter +from docutranslate.ir.document import Document +from docutranslate.utils.resource_utils import resource_path + + +@dataclass +class Json2HTMLExporterConfig(ExporterConfig): + cdn: bool = True + + +class Json2HTMLExporter(JsonExporter): + def __init__(self, config: Json2HTMLExporterConfig = None): + config = config or Json2HTMLExporterConfig() + super().__init__(config=config) + self.cdn = config.cdn + + def export(self, document: Document) -> Document: + cdn = self.cdn + html_template = resource_path("template/json.html").read_text(encoding="utf-8") + + # language=html + pico = f'' if not cdn else r'' + + json_data=json.dumps(document.content.decode(),ensure_ascii=False) + + render = jinja2.Template(html_template).render( + title=document.stem, + pico=pico, + jsonData=json_data, + ) + return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem) diff --git a/docutranslate/exporter/js/json2json_exporter.py b/docutranslate/exporter/js/json2json_exporter.py new file mode 100644 index 0000000..bb7cd4e --- /dev/null +++ b/docutranslate/exporter/js/json2json_exporter.py @@ -0,0 +1,7 @@ +from docutranslate.exporter.txt.base import TXTExporter +from docutranslate.ir.document import Document + + +class Json2JsonExporter(TXTExporter): + def export(self, document: Document) -> Document: + return document.copy() diff --git a/docutranslate/template/json.html b/docutranslate/template/json.html new file mode 100644 index 0000000..e7b3149 --- /dev/null +++ b/docutranslate/template/json.html @@ -0,0 +1,62 @@ + + + + + {{title}} + {{pico}} + + + + + + +
+ + + + + + + + + \ No newline at end of file diff --git a/docutranslate/translator/ai_translator/json_translator.py b/docutranslate/translator/ai_translator/json_translator.py index dda0092..6eb23c5 100644 --- a/docutranslate/translator/ai_translator/json_translator.py +++ b/docutranslate/translator/ai_translator/json_translator.py @@ -13,7 +13,7 @@ from docutranslate.utils.json_utils import flat_json_split @dataclass class JsonTranslatorConfig(AiTranslatorConfig): - jsonpaths: list[str] + json_paths: list[str] class JsonTranslator(Translator): @@ -32,7 +32,7 @@ class JsonTranslator(Translator): timeout=config.timeout, logger=self.logger) self.translate_agent = JsonTranslateAgent(agent_config) - self.jsonpaths = config.jsonpaths + self.jsonpaths = config.json_paths def _extract_matches(self, content: dict) -> list[Any]: """ @@ -141,38 +141,6 @@ class JsonTranslator(Translator): return self - # def translate(self, document: Document) -> Self: - # content = json.loads(document.content.decode()) - # path_vals: dict[str:list[str]] = {} - # for jsonpath in self.jsonpaths: - # jsonpath_expr = parse(jsonpath) - # matches = [match.value for match in jsonpath_expr.find(content)] - # path_vals[jsonpath] = matches - # vals_all = [] - # for vals in path_vals.values(): - # vals_all.extend(vals) - # vals_dict = {str(i): val for i, val in enumerate(vals_all)}#{"1":"val1","2":"val2"} - # chunks=flat_json_split(vals_dict,self.chunk_size) - # chunks_translated=self.translate_agent.send_prompts([json.dumps(chunk) for chunk in chunks]) - # for chunk in chunks_translated: - # vals_dict.update(json.loads(chunk)) - # - # #翻译后的{path1:[val1',val2']} - # reconstructed_dict = {} - # start_index = 0 - # for key in path_vals.keys(): - # length = len(path_vals[key]) - # sub_list = list(vals_dict.values())[start_index: start_index + length] - # reconstructed_dict[key] = sub_list - # start_index += length - # - # for path in reconstructed_dict: - # path_expression = parse(path) - # matches = path_expression.find(content) - # for idx,match in enumerate(matches): - # match.full_path.update(content, reconstructed_dict[path][idx]) - # return self - async def translate_async(self, document: Document) -> Self: content = json.loads(document.content.decode()) diff --git a/docutranslate/workflow/interfaces.py b/docutranslate/workflow/interfaces.py index 813ef50..4f0a394 100644 --- a/docutranslate/workflow/interfaces.py +++ b/docutranslate/workflow/interfaces.py @@ -47,3 +47,11 @@ class TXTExportable(Protocol[T_ExporterConfig]): def save_as_txt(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self: ... + +@runtime_checkable +class JsonExportable(Protocol[T_ExporterConfig]): + def export_to_json(self, config: T_ExporterConfig | None = None) -> str: + ... + + def save_as_json(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self: + ... diff --git a/docutranslate/workflow/json_workflow.py b/docutranslate/workflow/json_workflow.py new file mode 100644 index 0000000..469e686 --- /dev/null +++ b/docutranslate/workflow/json_workflow.py @@ -0,0 +1,65 @@ +from dataclasses import dataclass +from pathlib import Path +from typing import Self + +from docutranslate.exporter.base import ExporterConfig +from docutranslate.exporter.js.json2html_exporter import Json2HTMLExporterConfig, Json2HTMLExporter +from docutranslate.exporter.js.json2json_exporter import Json2JsonExporter +from docutranslate.ir.document import Document +from docutranslate.translator.ai_translator.json_translator import JsonTranslatorConfig, JsonTranslator +from docutranslate.workflow.base import Workflow, WorkflowConfig +from docutranslate.workflow.interfaces import HTMLExportable, JsonExportable + + +@dataclass(kw_only=True) +class JsonWorkflowConfig(WorkflowConfig): + translator_config: JsonTranslatorConfig + html_exporter_config: Json2HTMLExporterConfig + + +class JsonWorkflow(Workflow[JsonWorkflowConfig, Document, Document], HTMLExportable[Json2HTMLExporterConfig], + JsonExportable[ExporterConfig]): + def __init__(self, config: JsonWorkflowConfig): + super().__init__(config=config) + if config.logger: + for sub_config in [self.config.translator_config]: + if sub_config: + sub_config.logger = config.logger + + def _pre_translate(self, document_original: Document): + document = document_original.copy() + translate_config = self.config.translator_config + translator = JsonTranslator(translate_config) + return document, translator + + def translate(self) -> Self: + document, translator = self._pre_translate(self.document_original) + translator.translate(document) + self.document_translated = document + return self + + async def translate_async(self) -> Self: + document, translator = self._pre_translate(self.document_original) + await translator.translate_async(document) + self.document_translated = document + return self + + def export_to_html(self, config: Json2HTMLExporterConfig = None) -> str: + config = config or self.config.html_exporter_config + docu = self._export(Json2HTMLExporter(config)) + return docu.content.decode() + + def export_to_json(self, _: ExporterConfig | None = None) -> str: + docu = self._export(Json2JsonExporter()) + return docu.content.decode() + + def save_as_html(self, name: str = None, output_dir: Path | str = "./output", + config: Json2HTMLExporter | None = None) -> Self: + config = config or self.config.html_exporter_config + self._save(exporter=Json2HTMLExporter(config), name=name, output_dir=output_dir) + return self + + def save_as_json(self, name: str = None, output_dir: Path | str = "./output", + _: ExporterConfig | None = None) -> Self: + self._save(exporter=Json2JsonExporter(), name=name, output_dir=output_dir) + return self