后端完成json翻译工作流(待检查)
This commit is contained in:
0
docutranslate/exporter/js/__init__.py
Normal file
0
docutranslate/exporter/js/__init__.py
Normal file
8
docutranslate/exporter/js/base.py
Normal file
8
docutranslate/exporter/js/base.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from docutranslate.exporter.base import Exporter
|
||||
from docutranslate.ir.document import Document
|
||||
|
||||
#TODO:看情况是否需要为json单独写一个document类型
|
||||
class JsonExporter(Exporter[Document]):
|
||||
|
||||
def export(self,document:Document)->Document:
|
||||
...
|
||||
37
docutranslate/exporter/js/json2html_exporter.py
Normal file
37
docutranslate/exporter/js/json2html_exporter.py
Normal file
@@ -0,0 +1,37 @@
|
||||
import json
|
||||
from dataclasses import dataclass
|
||||
|
||||
import jinja2
|
||||
|
||||
from docutranslate.exporter.base import ExporterConfig
|
||||
from docutranslate.exporter.js.base import JsonExporter
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.utils.resource_utils import resource_path
|
||||
|
||||
|
||||
@dataclass
|
||||
class Json2HTMLExporterConfig(ExporterConfig):
|
||||
cdn: bool = True
|
||||
|
||||
|
||||
class Json2HTMLExporter(JsonExporter):
|
||||
def __init__(self, config: Json2HTMLExporterConfig = None):
|
||||
config = config or Json2HTMLExporterConfig()
|
||||
super().__init__(config=config)
|
||||
self.cdn = config.cdn
|
||||
|
||||
def export(self, document: Document) -> Document:
|
||||
cdn = self.cdn
|
||||
html_template = resource_path("template/json.html").read_text(encoding="utf-8")
|
||||
|
||||
# language=html
|
||||
pico = f'<style>{resource_path("static/pico.css").read_text(encoding="utf-8")}</style>' if not cdn else r'<link rel="stylesheet" href="https://s4.zstatic.net/ajax/libs/picocss/2.1.1/pico.min.css" integrity="sha512-+4kjFgVD0n6H3xt19Ox84B56MoS7srFn60tgdWFuO4hemtjhySKyW4LnftYZn46k3THUEiTTsbVjrHai+0MOFw==" crossorigin="anonymous" referrerpolicy="no-referrer" />'
|
||||
|
||||
json_data=json.dumps(document.content.decode(),ensure_ascii=False)
|
||||
|
||||
render = jinja2.Template(html_template).render(
|
||||
title=document.stem,
|
||||
pico=pico,
|
||||
jsonData=json_data,
|
||||
)
|
||||
return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem)
|
||||
7
docutranslate/exporter/js/json2json_exporter.py
Normal file
7
docutranslate/exporter/js/json2json_exporter.py
Normal file
@@ -0,0 +1,7 @@
|
||||
from docutranslate.exporter.txt.base import TXTExporter
|
||||
from docutranslate.ir.document import Document
|
||||
|
||||
|
||||
class Json2JsonExporter(TXTExporter):
|
||||
def export(self, document: Document) -> Document:
|
||||
return document.copy()
|
||||
62
docutranslate/template/json.html
Normal file
62
docutranslate/template/json.html
Normal file
@@ -0,0 +1,62 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<title>{{title}}</title>
|
||||
{{pico}}
|
||||
<!-- 1. 从 CDN 引入 json-viewer 的 CSS -->
|
||||
<link href="https://cdn.jsdelivr.net/npm/jquery.json-viewer@1.4.0/json-viewer/jquery.json-viewer.css" rel="stylesheet">
|
||||
<style>
|
||||
/* 美化一下页面 */
|
||||
body { font-family: sans-serif; padding: 20px; }
|
||||
#json-container { border: 1px solid #ccc; padding: 10px; border-radius: 5px; }
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
|
||||
<div id="json-container"></div>
|
||||
|
||||
|
||||
<script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js"></script>
|
||||
<script src="https://cdn.jsdelivr.net/npm/jquery.json-viewer@1.4.0/json-viewer/jquery.json-viewer.js"></script>
|
||||
|
||||
<script>
|
||||
|
||||
const myData = {
|
||||
"id": "0001",
|
||||
"type": "donut",
|
||||
"name": "Cake",
|
||||
"ppu": 0.55,
|
||||
"is_active": true,
|
||||
"sauce": null,
|
||||
"batters": {
|
||||
"batter": [
|
||||
{ "id": "1001", "type": "Regular" },
|
||||
{ "id": "1002", "type": "Chocolate" },
|
||||
{ "id": "1003", "type": "Blueberry" },
|
||||
{ "id": "1004", "type": "Devil's Food" }
|
||||
]
|
||||
},
|
||||
"topping": [
|
||||
{ "id": "5001", "type": "None" },
|
||||
{ "id": "5002", "type": "Glazed" },
|
||||
{ "id": "5005", "type": "Sugar" },
|
||||
{ "id": "5007", "type": "Powdered Sugar" },
|
||||
{ "id": "5006", "type": "Chocolate with Sprinkles" },
|
||||
{ "id": "5003", "type": "Chocolate" },
|
||||
{ "id": "5004", "type": "Maple" }
|
||||
],
|
||||
"url": "https://example.com"
|
||||
};;
|
||||
|
||||
// 5. 初始化 JSON 查看器
|
||||
$(document).ready(function() {
|
||||
$('#json-container').jsonViewer(myData, {
|
||||
collapsed: false, // 初始状态是否折叠所有节点
|
||||
withQuotes: true // key 和 string value 是否带引号
|
||||
});
|
||||
});
|
||||
</script>
|
||||
|
||||
</body>
|
||||
</html>
|
||||
@@ -13,7 +13,7 @@ from docutranslate.utils.json_utils import flat_json_split
|
||||
|
||||
@dataclass
|
||||
class JsonTranslatorConfig(AiTranslatorConfig):
|
||||
jsonpaths: list[str]
|
||||
json_paths: list[str]
|
||||
|
||||
|
||||
class JsonTranslator(Translator):
|
||||
@@ -32,7 +32,7 @@ class JsonTranslator(Translator):
|
||||
timeout=config.timeout,
|
||||
logger=self.logger)
|
||||
self.translate_agent = JsonTranslateAgent(agent_config)
|
||||
self.jsonpaths = config.jsonpaths
|
||||
self.jsonpaths = config.json_paths
|
||||
|
||||
def _extract_matches(self, content: dict) -> list[Any]:
|
||||
"""
|
||||
@@ -141,38 +141,6 @@ class JsonTranslator(Translator):
|
||||
|
||||
return self
|
||||
|
||||
# def translate(self, document: Document) -> Self:
|
||||
# content = json.loads(document.content.decode())
|
||||
# path_vals: dict[str:list[str]] = {}
|
||||
# for jsonpath in self.jsonpaths:
|
||||
# jsonpath_expr = parse(jsonpath)
|
||||
# matches = [match.value for match in jsonpath_expr.find(content)]
|
||||
# path_vals[jsonpath] = matches
|
||||
# vals_all = []
|
||||
# for vals in path_vals.values():
|
||||
# vals_all.extend(vals)
|
||||
# vals_dict = {str(i): val for i, val in enumerate(vals_all)}#{"1":"val1","2":"val2"}
|
||||
# chunks=flat_json_split(vals_dict,self.chunk_size)
|
||||
# chunks_translated=self.translate_agent.send_prompts([json.dumps(chunk) for chunk in chunks])
|
||||
# for chunk in chunks_translated:
|
||||
# vals_dict.update(json.loads(chunk))
|
||||
#
|
||||
# #翻译后的{path1:[val1',val2']}
|
||||
# reconstructed_dict = {}
|
||||
# start_index = 0
|
||||
# for key in path_vals.keys():
|
||||
# length = len(path_vals[key])
|
||||
# sub_list = list(vals_dict.values())[start_index: start_index + length]
|
||||
# reconstructed_dict[key] = sub_list
|
||||
# start_index += length
|
||||
#
|
||||
# for path in reconstructed_dict:
|
||||
# path_expression = parse(path)
|
||||
# matches = path_expression.find(content)
|
||||
# for idx,match in enumerate(matches):
|
||||
# match.full_path.update(content, reconstructed_dict[path][idx])
|
||||
# return self
|
||||
|
||||
async def translate_async(self, document: Document) -> Self:
|
||||
content = json.loads(document.content.decode())
|
||||
|
||||
|
||||
@@ -47,3 +47,11 @@ class TXTExportable(Protocol[T_ExporterConfig]):
|
||||
|
||||
def save_as_txt(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
|
||||
...
|
||||
|
||||
@runtime_checkable
|
||||
class JsonExportable(Protocol[T_ExporterConfig]):
|
||||
def export_to_json(self, config: T_ExporterConfig | None = None) -> str:
|
||||
...
|
||||
|
||||
def save_as_json(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
|
||||
...
|
||||
|
||||
65
docutranslate/workflow/json_workflow.py
Normal file
65
docutranslate/workflow/json_workflow.py
Normal file
@@ -0,0 +1,65 @@
|
||||
from dataclasses import dataclass
|
||||
from pathlib import Path
|
||||
from typing import Self
|
||||
|
||||
from docutranslate.exporter.base import ExporterConfig
|
||||
from docutranslate.exporter.js.json2html_exporter import Json2HTMLExporterConfig, Json2HTMLExporter
|
||||
from docutranslate.exporter.js.json2json_exporter import Json2JsonExporter
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.translator.ai_translator.json_translator import JsonTranslatorConfig, JsonTranslator
|
||||
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
||||
from docutranslate.workflow.interfaces import HTMLExportable, JsonExportable
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class JsonWorkflowConfig(WorkflowConfig):
|
||||
translator_config: JsonTranslatorConfig
|
||||
html_exporter_config: Json2HTMLExporterConfig
|
||||
|
||||
|
||||
class JsonWorkflow(Workflow[JsonWorkflowConfig, Document, Document], HTMLExportable[Json2HTMLExporterConfig],
|
||||
JsonExportable[ExporterConfig]):
|
||||
def __init__(self, config: JsonWorkflowConfig):
|
||||
super().__init__(config=config)
|
||||
if config.logger:
|
||||
for sub_config in [self.config.translator_config]:
|
||||
if sub_config:
|
||||
sub_config.logger = config.logger
|
||||
|
||||
def _pre_translate(self, document_original: Document):
|
||||
document = document_original.copy()
|
||||
translate_config = self.config.translator_config
|
||||
translator = JsonTranslator(translate_config)
|
||||
return document, translator
|
||||
|
||||
def translate(self) -> Self:
|
||||
document, translator = self._pre_translate(self.document_original)
|
||||
translator.translate(document)
|
||||
self.document_translated = document
|
||||
return self
|
||||
|
||||
async def translate_async(self) -> Self:
|
||||
document, translator = self._pre_translate(self.document_original)
|
||||
await translator.translate_async(document)
|
||||
self.document_translated = document
|
||||
return self
|
||||
|
||||
def export_to_html(self, config: Json2HTMLExporterConfig = None) -> str:
|
||||
config = config or self.config.html_exporter_config
|
||||
docu = self._export(Json2HTMLExporter(config))
|
||||
return docu.content.decode()
|
||||
|
||||
def export_to_json(self, _: ExporterConfig | None = None) -> str:
|
||||
docu = self._export(Json2JsonExporter())
|
||||
return docu.content.decode()
|
||||
|
||||
def save_as_html(self, name: str = None, output_dir: Path | str = "./output",
|
||||
config: Json2HTMLExporter | None = None) -> Self:
|
||||
config = config or self.config.html_exporter_config
|
||||
self._save(exporter=Json2HTMLExporter(config), name=name, output_dir=output_dir)
|
||||
return self
|
||||
|
||||
def save_as_json(self, name: str = None, output_dir: Path | str = "./output",
|
||||
_: ExporterConfig | None = None) -> Self:
|
||||
self._save(exporter=Json2JsonExporter(), name=name, output_dir=output_dir)
|
||||
return self
|
||||
Reference in New Issue
Block a user