后端完成json翻译工作流(待检查)
This commit is contained in:
0
docutranslate/exporter/js/__init__.py
Normal file
0
docutranslate/exporter/js/__init__.py
Normal file
8
docutranslate/exporter/js/base.py
Normal file
8
docutranslate/exporter/js/base.py
Normal file
@@ -0,0 +1,8 @@
|
|||||||
|
from docutranslate.exporter.base import Exporter
|
||||||
|
from docutranslate.ir.document import Document
|
||||||
|
|
||||||
|
#TODO:看情况是否需要为json单独写一个document类型
|
||||||
|
class JsonExporter(Exporter[Document]):
|
||||||
|
|
||||||
|
def export(self,document:Document)->Document:
|
||||||
|
...
|
||||||
37
docutranslate/exporter/js/json2html_exporter.py
Normal file
37
docutranslate/exporter/js/json2html_exporter.py
Normal file
@@ -0,0 +1,37 @@
|
|||||||
|
import json
|
||||||
|
from dataclasses import dataclass
|
||||||
|
|
||||||
|
import jinja2
|
||||||
|
|
||||||
|
from docutranslate.exporter.base import ExporterConfig
|
||||||
|
from docutranslate.exporter.js.base import JsonExporter
|
||||||
|
from docutranslate.ir.document import Document
|
||||||
|
from docutranslate.utils.resource_utils import resource_path
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass
|
||||||
|
class Json2HTMLExporterConfig(ExporterConfig):
|
||||||
|
cdn: bool = True
|
||||||
|
|
||||||
|
|
||||||
|
class Json2HTMLExporter(JsonExporter):
|
||||||
|
def __init__(self, config: Json2HTMLExporterConfig = None):
|
||||||
|
config = config or Json2HTMLExporterConfig()
|
||||||
|
super().__init__(config=config)
|
||||||
|
self.cdn = config.cdn
|
||||||
|
|
||||||
|
def export(self, document: Document) -> Document:
|
||||||
|
cdn = self.cdn
|
||||||
|
html_template = resource_path("template/json.html").read_text(encoding="utf-8")
|
||||||
|
|
||||||
|
# language=html
|
||||||
|
pico = f'<style>{resource_path("static/pico.css").read_text(encoding="utf-8")}</style>' if not cdn else r'<link rel="stylesheet" href="https://s4.zstatic.net/ajax/libs/picocss/2.1.1/pico.min.css" integrity="sha512-+4kjFgVD0n6H3xt19Ox84B56MoS7srFn60tgdWFuO4hemtjhySKyW4LnftYZn46k3THUEiTTsbVjrHai+0MOFw==" crossorigin="anonymous" referrerpolicy="no-referrer" />'
|
||||||
|
|
||||||
|
json_data=json.dumps(document.content.decode(),ensure_ascii=False)
|
||||||
|
|
||||||
|
render = jinja2.Template(html_template).render(
|
||||||
|
title=document.stem,
|
||||||
|
pico=pico,
|
||||||
|
jsonData=json_data,
|
||||||
|
)
|
||||||
|
return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem)
|
||||||
7
docutranslate/exporter/js/json2json_exporter.py
Normal file
7
docutranslate/exporter/js/json2json_exporter.py
Normal file
@@ -0,0 +1,7 @@
|
|||||||
|
from docutranslate.exporter.txt.base import TXTExporter
|
||||||
|
from docutranslate.ir.document import Document
|
||||||
|
|
||||||
|
|
||||||
|
class Json2JsonExporter(TXTExporter):
|
||||||
|
def export(self, document: Document) -> Document:
|
||||||
|
return document.copy()
|
||||||
62
docutranslate/template/json.html
Normal file
62
docutranslate/template/json.html
Normal file
@@ -0,0 +1,62 @@
|
|||||||
|
<!DOCTYPE html>
|
||||||
|
<html lang="en">
|
||||||
|
<head>
|
||||||
|
<meta charset="UTF-8">
|
||||||
|
<title>{{title}}</title>
|
||||||
|
{{pico}}
|
||||||
|
<!-- 1. 从 CDN 引入 json-viewer 的 CSS -->
|
||||||
|
<link href="https://cdn.jsdelivr.net/npm/jquery.json-viewer@1.4.0/json-viewer/jquery.json-viewer.css" rel="stylesheet">
|
||||||
|
<style>
|
||||||
|
/* 美化一下页面 */
|
||||||
|
body { font-family: sans-serif; padding: 20px; }
|
||||||
|
#json-container { border: 1px solid #ccc; padding: 10px; border-radius: 5px; }
|
||||||
|
</style>
|
||||||
|
</head>
|
||||||
|
<body>
|
||||||
|
|
||||||
|
<div id="json-container"></div>
|
||||||
|
|
||||||
|
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/jquery@3.6.0/dist/jquery.min.js"></script>
|
||||||
|
<script src="https://cdn.jsdelivr.net/npm/jquery.json-viewer@1.4.0/json-viewer/jquery.json-viewer.js"></script>
|
||||||
|
|
||||||
|
<script>
|
||||||
|
|
||||||
|
const myData = {
|
||||||
|
"id": "0001",
|
||||||
|
"type": "donut",
|
||||||
|
"name": "Cake",
|
||||||
|
"ppu": 0.55,
|
||||||
|
"is_active": true,
|
||||||
|
"sauce": null,
|
||||||
|
"batters": {
|
||||||
|
"batter": [
|
||||||
|
{ "id": "1001", "type": "Regular" },
|
||||||
|
{ "id": "1002", "type": "Chocolate" },
|
||||||
|
{ "id": "1003", "type": "Blueberry" },
|
||||||
|
{ "id": "1004", "type": "Devil's Food" }
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"topping": [
|
||||||
|
{ "id": "5001", "type": "None" },
|
||||||
|
{ "id": "5002", "type": "Glazed" },
|
||||||
|
{ "id": "5005", "type": "Sugar" },
|
||||||
|
{ "id": "5007", "type": "Powdered Sugar" },
|
||||||
|
{ "id": "5006", "type": "Chocolate with Sprinkles" },
|
||||||
|
{ "id": "5003", "type": "Chocolate" },
|
||||||
|
{ "id": "5004", "type": "Maple" }
|
||||||
|
],
|
||||||
|
"url": "https://example.com"
|
||||||
|
};;
|
||||||
|
|
||||||
|
// 5. 初始化 JSON 查看器
|
||||||
|
$(document).ready(function() {
|
||||||
|
$('#json-container').jsonViewer(myData, {
|
||||||
|
collapsed: false, // 初始状态是否折叠所有节点
|
||||||
|
withQuotes: true // key 和 string value 是否带引号
|
||||||
|
});
|
||||||
|
});
|
||||||
|
</script>
|
||||||
|
|
||||||
|
</body>
|
||||||
|
</html>
|
||||||
@@ -13,7 +13,7 @@ from docutranslate.utils.json_utils import flat_json_split
|
|||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
class JsonTranslatorConfig(AiTranslatorConfig):
|
class JsonTranslatorConfig(AiTranslatorConfig):
|
||||||
jsonpaths: list[str]
|
json_paths: list[str]
|
||||||
|
|
||||||
|
|
||||||
class JsonTranslator(Translator):
|
class JsonTranslator(Translator):
|
||||||
@@ -32,7 +32,7 @@ class JsonTranslator(Translator):
|
|||||||
timeout=config.timeout,
|
timeout=config.timeout,
|
||||||
logger=self.logger)
|
logger=self.logger)
|
||||||
self.translate_agent = JsonTranslateAgent(agent_config)
|
self.translate_agent = JsonTranslateAgent(agent_config)
|
||||||
self.jsonpaths = config.jsonpaths
|
self.jsonpaths = config.json_paths
|
||||||
|
|
||||||
def _extract_matches(self, content: dict) -> list[Any]:
|
def _extract_matches(self, content: dict) -> list[Any]:
|
||||||
"""
|
"""
|
||||||
@@ -141,38 +141,6 @@ class JsonTranslator(Translator):
|
|||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
# def translate(self, document: Document) -> Self:
|
|
||||||
# content = json.loads(document.content.decode())
|
|
||||||
# path_vals: dict[str:list[str]] = {}
|
|
||||||
# for jsonpath in self.jsonpaths:
|
|
||||||
# jsonpath_expr = parse(jsonpath)
|
|
||||||
# matches = [match.value for match in jsonpath_expr.find(content)]
|
|
||||||
# path_vals[jsonpath] = matches
|
|
||||||
# vals_all = []
|
|
||||||
# for vals in path_vals.values():
|
|
||||||
# vals_all.extend(vals)
|
|
||||||
# vals_dict = {str(i): val for i, val in enumerate(vals_all)}#{"1":"val1","2":"val2"}
|
|
||||||
# chunks=flat_json_split(vals_dict,self.chunk_size)
|
|
||||||
# chunks_translated=self.translate_agent.send_prompts([json.dumps(chunk) for chunk in chunks])
|
|
||||||
# for chunk in chunks_translated:
|
|
||||||
# vals_dict.update(json.loads(chunk))
|
|
||||||
#
|
|
||||||
# #翻译后的{path1:[val1',val2']}
|
|
||||||
# reconstructed_dict = {}
|
|
||||||
# start_index = 0
|
|
||||||
# for key in path_vals.keys():
|
|
||||||
# length = len(path_vals[key])
|
|
||||||
# sub_list = list(vals_dict.values())[start_index: start_index + length]
|
|
||||||
# reconstructed_dict[key] = sub_list
|
|
||||||
# start_index += length
|
|
||||||
#
|
|
||||||
# for path in reconstructed_dict:
|
|
||||||
# path_expression = parse(path)
|
|
||||||
# matches = path_expression.find(content)
|
|
||||||
# for idx,match in enumerate(matches):
|
|
||||||
# match.full_path.update(content, reconstructed_dict[path][idx])
|
|
||||||
# return self
|
|
||||||
|
|
||||||
async def translate_async(self, document: Document) -> Self:
|
async def translate_async(self, document: Document) -> Self:
|
||||||
content = json.loads(document.content.decode())
|
content = json.loads(document.content.decode())
|
||||||
|
|
||||||
|
|||||||
@@ -47,3 +47,11 @@ class TXTExportable(Protocol[T_ExporterConfig]):
|
|||||||
|
|
||||||
def save_as_txt(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
|
def save_as_txt(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
|
||||||
...
|
...
|
||||||
|
|
||||||
|
@runtime_checkable
|
||||||
|
class JsonExportable(Protocol[T_ExporterConfig]):
|
||||||
|
def export_to_json(self, config: T_ExporterConfig | None = None) -> str:
|
||||||
|
...
|
||||||
|
|
||||||
|
def save_as_json(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
|
||||||
|
...
|
||||||
|
|||||||
65
docutranslate/workflow/json_workflow.py
Normal file
65
docutranslate/workflow/json_workflow.py
Normal file
@@ -0,0 +1,65 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
|
from pathlib import Path
|
||||||
|
from typing import Self
|
||||||
|
|
||||||
|
from docutranslate.exporter.base import ExporterConfig
|
||||||
|
from docutranslate.exporter.js.json2html_exporter import Json2HTMLExporterConfig, Json2HTMLExporter
|
||||||
|
from docutranslate.exporter.js.json2json_exporter import Json2JsonExporter
|
||||||
|
from docutranslate.ir.document import Document
|
||||||
|
from docutranslate.translator.ai_translator.json_translator import JsonTranslatorConfig, JsonTranslator
|
||||||
|
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
||||||
|
from docutranslate.workflow.interfaces import HTMLExportable, JsonExportable
|
||||||
|
|
||||||
|
|
||||||
|
@dataclass(kw_only=True)
|
||||||
|
class JsonWorkflowConfig(WorkflowConfig):
|
||||||
|
translator_config: JsonTranslatorConfig
|
||||||
|
html_exporter_config: Json2HTMLExporterConfig
|
||||||
|
|
||||||
|
|
||||||
|
class JsonWorkflow(Workflow[JsonWorkflowConfig, Document, Document], HTMLExportable[Json2HTMLExporterConfig],
|
||||||
|
JsonExportable[ExporterConfig]):
|
||||||
|
def __init__(self, config: JsonWorkflowConfig):
|
||||||
|
super().__init__(config=config)
|
||||||
|
if config.logger:
|
||||||
|
for sub_config in [self.config.translator_config]:
|
||||||
|
if sub_config:
|
||||||
|
sub_config.logger = config.logger
|
||||||
|
|
||||||
|
def _pre_translate(self, document_original: Document):
|
||||||
|
document = document_original.copy()
|
||||||
|
translate_config = self.config.translator_config
|
||||||
|
translator = JsonTranslator(translate_config)
|
||||||
|
return document, translator
|
||||||
|
|
||||||
|
def translate(self) -> Self:
|
||||||
|
document, translator = self._pre_translate(self.document_original)
|
||||||
|
translator.translate(document)
|
||||||
|
self.document_translated = document
|
||||||
|
return self
|
||||||
|
|
||||||
|
async def translate_async(self) -> Self:
|
||||||
|
document, translator = self._pre_translate(self.document_original)
|
||||||
|
await translator.translate_async(document)
|
||||||
|
self.document_translated = document
|
||||||
|
return self
|
||||||
|
|
||||||
|
def export_to_html(self, config: Json2HTMLExporterConfig = None) -> str:
|
||||||
|
config = config or self.config.html_exporter_config
|
||||||
|
docu = self._export(Json2HTMLExporter(config))
|
||||||
|
return docu.content.decode()
|
||||||
|
|
||||||
|
def export_to_json(self, _: ExporterConfig | None = None) -> str:
|
||||||
|
docu = self._export(Json2JsonExporter())
|
||||||
|
return docu.content.decode()
|
||||||
|
|
||||||
|
def save_as_html(self, name: str = None, output_dir: Path | str = "./output",
|
||||||
|
config: Json2HTMLExporter | None = None) -> Self:
|
||||||
|
config = config or self.config.html_exporter_config
|
||||||
|
self._save(exporter=Json2HTMLExporter(config), name=name, output_dir=output_dir)
|
||||||
|
return self
|
||||||
|
|
||||||
|
def save_as_json(self, name: str = None, output_dir: Path | str = "./output",
|
||||||
|
_: ExporterConfig | None = None) -> Self:
|
||||||
|
self._save(exporter=Json2JsonExporter(), name=name, output_dir=output_dir)
|
||||||
|
return self
|
||||||
Reference in New Issue
Block a user