From b484ba60bcc51e3c8c0bd814a276c7a3ce67444a Mon Sep 17 00:00:00 2001 From: xunbu Date: Thu, 31 Jul 2025 08:47:52 +0800 Subject: [PATCH] =?UTF-8?q?=E5=AE=8C=E6=88=90txtworkflow=E9=87=8D=E6=9E=84?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/workflow/interfaces.py | 24 ++++++++++---------- docutranslate/workflow/md_based_workflow.py | 11 ++++----- docutranslate/workflow/txt_workflow.py | 25 ++++++++++++++++----- 3 files changed, 36 insertions(+), 24 deletions(-) diff --git a/docutranslate/workflow/interfaces.py b/docutranslate/workflow/interfaces.py index 38dd920..df35624 100644 --- a/docutranslate/workflow/interfaces.py +++ b/docutranslate/workflow/interfaces.py @@ -3,40 +3,40 @@ from typing import Protocol, Self, TypeVar, runtime_checkable from docutranslate.exporter.base import ExporterConfig -T = TypeVar("T", bound=ExporterConfig) +T_ExporterConfig = TypeVar("T_ExporterConfig", bound=ExporterConfig) @runtime_checkable -class HTMLExportable(Protocol[T]): - def export_to_html(self, export_config: T | None = None) -> str: +class HTMLExportable(Protocol[T_ExporterConfig]): + def export_to_html(self, config: T_ExporterConfig | None = None) -> str: ... - def save_as_html(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self: + def save_as_html(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self: ... @runtime_checkable -class MDExportable(Protocol[T]): +class MDExportable(Protocol[T_ExporterConfig]): - def export_to_markdown(self, export_config: T | None = None) -> str: + def export_to_markdown(self, config: T_ExporterConfig | None = None) -> str: ... - def save_as_markdown(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self: + def save_as_markdown(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self: ... @runtime_checkable -class MDZIPExportable(Protocol[T]): +class MDZIPExportable(Protocol[T_ExporterConfig]): - def export_to_markdown_zip(self, export_config: T | None = None) -> bytes: + def export_to_markdown_zip(self, config: T_ExporterConfig | None = None) -> bytes: ... - def save_as_markdown_zip(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self: + def save_as_markdown_zip(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self: ... @runtime_checkable -class MDFormatsExportable(MDZIPExportable[T], MDExportable[T], Protocol): +class MDFormatsExportable(MDZIPExportable[T_ExporterConfig], MDExportable[T_ExporterConfig]): ... @runtime_checkable -class TXTExportable(Protocol[T]): +class TXTExportable(Protocol[T_ExporterConfig]): def export_to_txt(self) -> str: ... diff --git a/docutranslate/workflow/md_based_workflow.py b/docutranslate/workflow/md_based_workflow.py index 03b21ac..a71b03d 100644 --- a/docutranslate/workflow/md_based_workflow.py +++ b/docutranslate/workflow/md_based_workflow.py @@ -1,6 +1,5 @@ import asyncio from dataclasses import dataclass -from logging import Logger from pathlib import Path from typing import Self, Tuple, Type @@ -26,7 +25,6 @@ from docutranslate.translator.ai_translator.md_translator import MDTranslatorCon @dataclass(kw_only=True) class MarkdownBasedWorkflowConfig(WorkflowConfig): - logger: Logger | None = None convert_engine: ConvertEnginType converter_config: X2MarkdownConverterConfig | None translator_config: MDTranslatorConfig @@ -45,11 +43,10 @@ class MarkdownBasedWorkflow(Workflow[MarkdownBasedWorkflowConfig, Document, Mark if DOCLING_EXIST: self._converter_factory["docling"] = (ConverterDocling, ConverterDoclingConfig) self.convert_engine = config.convert_engine - self.logger = config.logger - if self.logger: - for config in [self.config.converter_config, self.config.translator_config, self.config.html_exporter_config]: - if config is not None: - config.logger = self.logger + if config.logger: + for sub_config in [self.config.converter_config, self.config.translator_config, self.config.html_exporter_config]: + if sub_config and sub_config.logger is not None: + sub_config.logger = config.logger def _get_document_md(self, convert_engin: ConvertEnginType, convert_config: X2MarkdownConverterConfig): if self.document_original is None: diff --git a/docutranslate/workflow/txt_workflow.py b/docutranslate/workflow/txt_workflow.py index 4def08e..7b348df 100644 --- a/docutranslate/workflow/txt_workflow.py +++ b/docutranslate/workflow/txt_workflow.py @@ -1,17 +1,29 @@ +from dataclasses import dataclass from pathlib import Path from typing import Self from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig, TXT2HTMLExporter from docutranslate.exporter.txt.txt2txt_exporter import TXT2TXTExporter -from docutranslate.workflow.base import Workflow +from docutranslate.ir.document import Document +from docutranslate.workflow.base import Workflow, WorkflowConfig from docutranslate.workflow.interfaces import HTMLExportable, TXTExportable from docutranslate.translator.ai_translator.txt_translator import TXTTranslatorConfig, TXTTranslator +@dataclass(kw_only=True) +class TXTWorkflowConfig(WorkflowConfig): + translator_config: TXTTranslatorConfig + html_exporter_config: TXT2HTMLExporterConfig +class TXTWorkflow(Workflow[TXTWorkflowConfig,Document,Document], HTMLExportable, TXTExportable): + def __init__(self,config:TXTWorkflowConfig): + super().__init__(config=config) + if config.logger: + for sub_config in [self.config.translator_config]: + if sub_config and sub_config.logger is not None: + sub_config.logger=config.logger -class TXTWorkflow(Workflow, HTMLExportable, TXTExportable): - - def translate(self, translate_config: TXTTranslatorConfig) -> Self: + def translate(self) -> Self: + translate_config=self.config.translator_config document = self.document_original.copy() # 翻译解析后文件 translator = TXTTranslator(translate_config) @@ -19,7 +31,8 @@ class TXTWorkflow(Workflow, HTMLExportable, TXTExportable): self.document_translated = document return self - async def translate_async(self, translate_config: TXTTranslatorConfig) -> Self: + async def translate_async(self) -> Self: + translate_config=self.config.translator_config document = self.document_original.copy() # 翻译解析后文件 translator = TXTTranslator(translate_config) @@ -28,6 +41,7 @@ class TXTWorkflow(Workflow, HTMLExportable, TXTExportable): return self def export_to_html(self, export_config: TXT2HTMLExporterConfig=None) -> str: + export_config=export_config or self.config.html_exporter_config docu = self._export(TXT2HTMLExporter(export_config)) return docu.content.decode() @@ -37,6 +51,7 @@ class TXTWorkflow(Workflow, HTMLExportable, TXTExportable): def save_as_html(self, name: str = None, output_dir: Path | str = "./output", export_config: TXT2HTMLExporterConfig | None = None) -> Self: + export_config=export_config or self.config.html_exporter_config self._save(exporter=TXT2HTMLExporter(export_config), name=name, output_dir=output_dir) return self