添加xlsxworkflow

This commit is contained in:
xunbu
2025-08-04 18:35:26 +08:00
parent 141de321db
commit c364752b2d
4 changed files with 79 additions and 7 deletions

View File

@@ -1,6 +1,5 @@
from abc import abstractmethod from abc import abstractmethod
from dataclasses import dataclass from dataclasses import dataclass
from logging import Logger
from typing import TypeVar from typing import TypeVar
from docutranslate.agents.agent import ThinkingMode from docutranslate.agents.agent import ThinkingMode

View File

@@ -12,7 +12,7 @@ from docutranslate.translator.base import Translator
@dataclass @dataclass
class XlsxTranslatorConfig(AiTranslatorConfig): class XlsxTranslatorConfig(AiTranslatorConfig):
position: Literal["replace", "append", "prepend"] = "replace" insert_mode: Literal["replace", "append", "prepend"] = "replace"
separator: str = "\n" separator: str = "\n"
@@ -32,7 +32,7 @@ class XlsxTranslator(Translator):
timeout=config.timeout, timeout=config.timeout,
logger=self.logger) logger=self.logger)
self.translate_agent = SegmentsTranslateAgent(agent_config) self.translate_agent = SegmentsTranslateAgent(agent_config)
self.position = config.position self.insert_mode = config.insert_mode
self.separator = config.separator self.separator = config.separator
def _pre_translate(self, document: Document): def _pre_translate(self, document: Document):
@@ -66,11 +66,11 @@ class XlsxTranslator(Translator):
# 定位到工作表和单元格 # 定位到工作表和单元格
sheet = workbook[sheet_name] sheet = workbook[sheet_name]
if self.position == "replace": if self.insert_mode == "replace":
sheet[coordinate] = translated_text sheet[coordinate] = translated_text
elif self.position == "append": elif self.insert_mode == "append":
sheet[coordinate] = original_text + self.separator + translated_text sheet[coordinate] = original_text + self.separator + translated_text
elif self.position == "prepend": elif self.insert_mode == "prepend":
sheet[coordinate] = translated_text + self.separator + original_text sheet[coordinate] = translated_text + self.separator + original_text
else: else:
self.logger.error("不正确的XlsxTranslatorConfig参数") self.logger.error("不正确的XlsxTranslatorConfig参数")
@@ -120,7 +120,7 @@ if __name__ == '__main__':
api_key=r"969ba51b61914cc2b710d1393dca1a3c.hSuATex5IoNVZNGu", api_key=r"969ba51b61914cc2b710d1393dca1a3c.hSuATex5IoNVZNGu",
model_id=r"glm-4-flash", model_id=r"glm-4-flash",
to_lang="英文", to_lang="英文",
position="append" insert_mode="append"
) )
translator = XlsxTranslator(config) translator = XlsxTranslator(config)
document = Document.from_path(r"C:\Users\jxgm\Desktop\translate\docutranslate\tests\files\工业互联分组表.xlsx") document = Document.from_path(r"C:\Users\jxgm\Desktop\translate\docutranslate\tests\files\工业互联分组表.xlsx")

View File

@@ -55,3 +55,11 @@ class JsonExportable(Protocol[T_ExporterConfig]):
def save_as_json(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self: def save_as_json(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
... ...
@runtime_checkable
class XlsxExportable(Protocol[T_ExporterConfig]):
def export_to_xlsx(self, config: T_ExporterConfig | None = None) -> bytes:
...
def save_as_xlsx(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
...

View File

@@ -0,0 +1,65 @@
from dataclasses import dataclass
from pathlib import Path
from typing import Self
from docutranslate.exporter.base import ExporterConfig
from docutranslate.exporter.xlsx.xlsx2html_exporter import Xlsx2HTMLExporterConfig, Xlsx2HTMLExporter
from docutranslate.exporter.xlsx.xlsx2xlsx_exporter import Xlsx2XlsxExporter
from docutranslate.ir.document import Document
from docutranslate.translator.ai_translator.xlsx_translator import XlsxTranslatorConfig, XlsxTranslator
from docutranslate.workflow.base import Workflow, WorkflowConfig
from docutranslate.workflow.interfaces import HTMLExportable, XlsxExportable
@dataclass(kw_only=True)
class XlsxWorkflowConfig(WorkflowConfig):
translator_config: XlsxTranslatorConfig
html_exporter_config: Xlsx2HTMLExporterConfig
class XlsxWorkflow(Workflow[XlsxWorkflowConfig, Document, Document], HTMLExportable[Xlsx2HTMLExporterConfig],
XlsxExportable[ExporterConfig]):
def __init__(self, config: XlsxWorkflowConfig):
super().__init__(config=config)
if config.logger:
for sub_config in [self.config.translator_config]:
if sub_config:
sub_config.logger = config.logger
def _pre_translate(self, document_original: Document):
document = document_original.copy()
translate_config = self.config.translator_config
translator = XlsxTranslator(translate_config)
return document, translator
def translate(self) -> Self:
document, translator = self._pre_translate(self.document_original)
translator.translate(document)
self.document_translated = document
return self
async def translate_async(self) -> Self:
document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document)
self.document_translated = document
return self
def export_to_html(self, config: Xlsx2HTMLExporterConfig = None) -> str:
config = config or self.config.html_exporter_config
docu = self._export(Xlsx2HTMLExporter(config))
return docu.content.decode()
def export_to_xlsx(self, _: ExporterConfig | None = None) -> bytes:
docu = self._export(Xlsx2XlsxExporter())
return docu.content
def save_as_html(self, name: str = None, output_dir: Path | str = "./output",
config: Xlsx2HTMLExporter | None = None) -> Self:
config = config or self.config.html_exporter_config
self._save(exporter=Xlsx2HTMLExporter(config), name=name, output_dir=output_dir)
return self
def save_as_xlsx(self, name: str = None, output_dir: Path | str = "./output",
_: ExporterConfig | None = None) -> Self:
self._save(exporter=Xlsx2XlsxExporter(), name=name, output_dir=output_dir)
return self