后端完成json翻译工作流(待检查)

This commit is contained in:
xunbu
2025-08-03 18:08:06 +08:00
parent f054a1e0a9
commit b41daddd4f
8 changed files with 189 additions and 34 deletions

View File

View File

@@ -0,0 +1,8 @@
from docutranslate.exporter.base import Exporter
from docutranslate.ir.document import Document
#TODO:看情况是否需要为json单独写一个document类型
class JsonExporter(Exporter[Document]):
def export(self,document:Document)->Document:
...

View File

@@ -0,0 +1,37 @@
import json
from dataclasses import dataclass
import jinja2
from docutranslate.exporter.base import ExporterConfig
from docutranslate.exporter.js.base import JsonExporter
from docutranslate.ir.document import Document
from docutranslate.utils.resource_utils import resource_path
@dataclass
class Json2HTMLExporterConfig(ExporterConfig):
cdn: bool = True
class Json2HTMLExporter(JsonExporter):
def __init__(self, config: Json2HTMLExporterConfig = None):
config = config or Json2HTMLExporterConfig()
super().__init__(config=config)
self.cdn = config.cdn
def export(self, document: Document) -> Document:
cdn = self.cdn
html_template = resource_path("template/json.html").read_text(encoding="utf-8")
# language=html
pico = f'<style>{resource_path("static/pico.css").read_text(encoding="utf-8")}</style>' if not cdn else r'<link rel="stylesheet" href="https://s4.zstatic.net/ajax/libs/picocss/2.1.1/pico.min.css" integrity="sha512-+4kjFgVD0n6H3xt19Ox84B56MoS7srFn60tgdWFuO4hemtjhySKyW4LnftYZn46k3THUEiTTsbVjrHai+0MOFw==" crossorigin="anonymous" referrerpolicy="no-referrer" />'
json_data=json.dumps(document.content.decode(),ensure_ascii=False)
render = jinja2.Template(html_template).render(
title=document.stem,
pico=pico,
jsonData=json_data,
)
return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem)

View File

@@ -0,0 +1,7 @@
from docutranslate.exporter.txt.base import TXTExporter
from docutranslate.ir.document import Document
class Json2JsonExporter(TXTExporter):
def export(self, document: Document) -> Document:
return document.copy()

View File

@@ -0,0 +1,62 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>{{title}}</title>
{{pico}}
<!-- 1. 从 CDN 引入 json-viewer 的 CSS -->
<link href="https://cdn.jsdelivr.net/npm/jquery.json-viewer@1.4.0/json-viewer/jquery.json-viewer.css" rel="stylesheet">
<style>
/* 美化一下页面 */
body { font-family: sans-serif; padding: 20px; }
#json-container { border: 1px solid #ccc; padding: 10px; border-radius: 5px; }
</style>
</head>
<body>
<div id="json-container"></div>
<script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js"></script>
<script src="https://cdn.jsdelivr.net/npm/jquery.json-viewer@1.4.0/json-viewer/jquery.json-viewer.js"></script>
<script>
const myData = {
"id": "0001",
"type": "donut",
"name": "Cake",
"ppu": 0.55,
"is_active": true,
"sauce": null,
"batters": {
"batter": [
{ "id": "1001", "type": "Regular" },
{ "id": "1002", "type": "Chocolate" },
{ "id": "1003", "type": "Blueberry" },
{ "id": "1004", "type": "Devil's Food" }
]
},
"topping": [
{ "id": "5001", "type": "None" },
{ "id": "5002", "type": "Glazed" },
{ "id": "5005", "type": "Sugar" },
{ "id": "5007", "type": "Powdered Sugar" },
{ "id": "5006", "type": "Chocolate with Sprinkles" },
{ "id": "5003", "type": "Chocolate" },
{ "id": "5004", "type": "Maple" }
],
"url": "https://example.com"
};;
// 5. 初始化 JSON 查看器
$(document).ready(function() {
$('#json-container').jsonViewer(myData, {
collapsed: false, // 初始状态是否折叠所有节点
withQuotes: true // key 和 string value 是否带引号
});
});
</script>
</body>
</html>

View File

@@ -13,7 +13,7 @@ from docutranslate.utils.json_utils import flat_json_split
@dataclass
class JsonTranslatorConfig(AiTranslatorConfig):
jsonpaths: list[str]
json_paths: list[str]
class JsonTranslator(Translator):
@@ -32,7 +32,7 @@ class JsonTranslator(Translator):
timeout=config.timeout,
logger=self.logger)
self.translate_agent = JsonTranslateAgent(agent_config)
self.jsonpaths = config.jsonpaths
self.jsonpaths = config.json_paths
def _extract_matches(self, content: dict) -> list[Any]:
"""
@@ -141,38 +141,6 @@ class JsonTranslator(Translator):
return self
# def translate(self, document: Document) -> Self:
# content = json.loads(document.content.decode())
# path_vals: dict[str:list[str]] = {}
# for jsonpath in self.jsonpaths:
# jsonpath_expr = parse(jsonpath)
# matches = [match.value for match in jsonpath_expr.find(content)]
# path_vals[jsonpath] = matches
# vals_all = []
# for vals in path_vals.values():
# vals_all.extend(vals)
# vals_dict = {str(i): val for i, val in enumerate(vals_all)}#{"1":"val1","2":"val2"}
# chunks=flat_json_split(vals_dict,self.chunk_size)
# chunks_translated=self.translate_agent.send_prompts([json.dumps(chunk) for chunk in chunks])
# for chunk in chunks_translated:
# vals_dict.update(json.loads(chunk))
#
# #翻译后的{path1:[val1',val2']}
# reconstructed_dict = {}
# start_index = 0
# for key in path_vals.keys():
# length = len(path_vals[key])
# sub_list = list(vals_dict.values())[start_index: start_index + length]
# reconstructed_dict[key] = sub_list
# start_index += length
#
# for path in reconstructed_dict:
# path_expression = parse(path)
# matches = path_expression.find(content)
# for idx,match in enumerate(matches):
# match.full_path.update(content, reconstructed_dict[path][idx])
# return self
async def translate_async(self, document: Document) -> Self:
content = json.loads(document.content.decode())

View File

@@ -47,3 +47,11 @@ class TXTExportable(Protocol[T_ExporterConfig]):
def save_as_txt(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
...
@runtime_checkable
class JsonExportable(Protocol[T_ExporterConfig]):
def export_to_json(self, config: T_ExporterConfig | None = None) -> str:
...
def save_as_json(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
...

View File

@@ -0,0 +1,65 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Self
from docutranslate.exporter.base import ExporterConfig
from docutranslate.exporter.js.json2html_exporter import Json2HTMLExporterConfig, Json2HTMLExporter
from docutranslate.exporter.js.json2json_exporter import Json2JsonExporter
from docutranslate.ir.document import Document
from docutranslate.translator.ai_translator.json_translator import JsonTranslatorConfig, JsonTranslator
from docutranslate.workflow.base import Workflow, WorkflowConfig
from docutranslate.workflow.interfaces import HTMLExportable, JsonExportable
@dataclass(kw_only=True)
class JsonWorkflowConfig(WorkflowConfig):
translator_config: JsonTranslatorConfig
html_exporter_config: Json2HTMLExporterConfig
class JsonWorkflow(Workflow[JsonWorkflowConfig, Document, Document], HTMLExportable[Json2HTMLExporterConfig],
JsonExportable[ExporterConfig]):
def __init__(self, config: JsonWorkflowConfig):
super().__init__(config=config)
if config.logger:
for sub_config in [self.config.translator_config]:
if sub_config:
sub_config.logger = config.logger
def _pre_translate(self, document_original: Document):
document = document_original.copy()
translate_config = self.config.translator_config
translator = JsonTranslator(translate_config)
return document, translator
def translate(self) -> Self:
document, translator = self._pre_translate(self.document_original)
translator.translate(document)
self.document_translated = document
return self
async def translate_async(self) -> Self:
document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document)
self.document_translated = document
return self
def export_to_html(self, config: Json2HTMLExporterConfig = None) -> str:
config = config or self.config.html_exporter_config
docu = self._export(Json2HTMLExporter(config))
return docu.content.decode()
def export_to_json(self, _: ExporterConfig | None = None) -> str:
docu = self._export(Json2JsonExporter())
return docu.content.decode()
def save_as_html(self, name: str = None, output_dir: Path | str = "./output",
config: Json2HTMLExporter | None = None) -> Self:
config = config or self.config.html_exporter_config
self._save(exporter=Json2HTMLExporter(config), name=name, output_dir=output_dir)
return self
def save_as_json(self, name: str = None, output_dir: Path | str = "./output",
_: ExporterConfig | None = None) -> Self:
self._save(exporter=Json2JsonExporter(), name=name, output_dir=output_dir)
return self