完成txtworkflow重构

This commit is contained in:
xunbu
2025-07-31 08:47:52 +08:00
parent f4b3432f45
commit b484ba60bc
3 changed files with 36 additions and 24 deletions

View File

@@ -3,40 +3,40 @@ from typing import Protocol, Self, TypeVar, runtime_checkable
from docutranslate.exporter.base import ExporterConfig from docutranslate.exporter.base import ExporterConfig
T = TypeVar("T", bound=ExporterConfig) T_ExporterConfig = TypeVar("T_ExporterConfig", bound=ExporterConfig)
@runtime_checkable @runtime_checkable
class HTMLExportable(Protocol[T]): class HTMLExportable(Protocol[T_ExporterConfig]):
def export_to_html(self, export_config: T | None = None) -> str: def export_to_html(self, config: T_ExporterConfig | None = None) -> str:
... ...
def save_as_html(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self: def save_as_html(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
... ...
@runtime_checkable @runtime_checkable
class MDExportable(Protocol[T]): class MDExportable(Protocol[T_ExporterConfig]):
def export_to_markdown(self, export_config: T | None = None) -> str: def export_to_markdown(self, config: T_ExporterConfig | None = None) -> str:
... ...
def save_as_markdown(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self: def save_as_markdown(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
... ...
@runtime_checkable @runtime_checkable
class MDZIPExportable(Protocol[T]): class MDZIPExportable(Protocol[T_ExporterConfig]):
def export_to_markdown_zip(self, export_config: T | None = None) -> bytes: def export_to_markdown_zip(self, config: T_ExporterConfig | None = None) -> bytes:
... ...
def save_as_markdown_zip(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self: def save_as_markdown_zip(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
... ...
@runtime_checkable @runtime_checkable
class MDFormatsExportable(MDZIPExportable[T], MDExportable[T], Protocol): class MDFormatsExportable(MDZIPExportable[T_ExporterConfig], MDExportable[T_ExporterConfig]):
... ...
@runtime_checkable @runtime_checkable
class TXTExportable(Protocol[T]): class TXTExportable(Protocol[T_ExporterConfig]):
def export_to_txt(self) -> str: def export_to_txt(self) -> str:
... ...

View File

@@ -1,6 +1,5 @@
import asyncio import asyncio
from dataclasses import dataclass from dataclasses import dataclass
from logging import Logger
from pathlib import Path from pathlib import Path
from typing import Self, Tuple, Type from typing import Self, Tuple, Type
@@ -26,7 +25,6 @@ from docutranslate.translator.ai_translator.md_translator import MDTranslatorCon
@dataclass(kw_only=True) @dataclass(kw_only=True)
class MarkdownBasedWorkflowConfig(WorkflowConfig): class MarkdownBasedWorkflowConfig(WorkflowConfig):
logger: Logger | None = None
convert_engine: ConvertEnginType convert_engine: ConvertEnginType
converter_config: X2MarkdownConverterConfig | None converter_config: X2MarkdownConverterConfig | None
translator_config: MDTranslatorConfig translator_config: MDTranslatorConfig
@@ -45,11 +43,10 @@ class MarkdownBasedWorkflow(Workflow[MarkdownBasedWorkflowConfig, Document, Mark
if DOCLING_EXIST: if DOCLING_EXIST:
self._converter_factory["docling"] = (ConverterDocling, ConverterDoclingConfig) self._converter_factory["docling"] = (ConverterDocling, ConverterDoclingConfig)
self.convert_engine = config.convert_engine self.convert_engine = config.convert_engine
self.logger = config.logger if config.logger:
if self.logger: for sub_config in [self.config.converter_config, self.config.translator_config, self.config.html_exporter_config]:
for config in [self.config.converter_config, self.config.translator_config, self.config.html_exporter_config]: if sub_config and sub_config.logger is not None:
if config is not None: sub_config.logger = config.logger
config.logger = self.logger
def _get_document_md(self, convert_engin: ConvertEnginType, convert_config: X2MarkdownConverterConfig): def _get_document_md(self, convert_engin: ConvertEnginType, convert_config: X2MarkdownConverterConfig):
if self.document_original is None: if self.document_original is None:

View File

@@ -1,17 +1,29 @@
from dataclasses import dataclass
from pathlib import Path from pathlib import Path
from typing import Self from typing import Self
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig, TXT2HTMLExporter from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig, TXT2HTMLExporter
from docutranslate.exporter.txt.txt2txt_exporter import TXT2TXTExporter from docutranslate.exporter.txt.txt2txt_exporter import TXT2TXTExporter
from docutranslate.workflow.base import Workflow from docutranslate.ir.document import Document
from docutranslate.workflow.base import Workflow, WorkflowConfig
from docutranslate.workflow.interfaces import HTMLExportable, TXTExportable from docutranslate.workflow.interfaces import HTMLExportable, TXTExportable
from docutranslate.translator.ai_translator.txt_translator import TXTTranslatorConfig, TXTTranslator from docutranslate.translator.ai_translator.txt_translator import TXTTranslatorConfig, TXTTranslator
@dataclass(kw_only=True)
class TXTWorkflowConfig(WorkflowConfig):
translator_config: TXTTranslatorConfig
html_exporter_config: TXT2HTMLExporterConfig
class TXTWorkflow(Workflow[TXTWorkflowConfig,Document,Document], HTMLExportable, TXTExportable):
def __init__(self,config:TXTWorkflowConfig):
super().__init__(config=config)
if config.logger:
for sub_config in [self.config.translator_config]:
if sub_config and sub_config.logger is not None:
sub_config.logger=config.logger
class TXTWorkflow(Workflow, HTMLExportable, TXTExportable): def translate(self) -> Self:
translate_config=self.config.translator_config
def translate(self, translate_config: TXTTranslatorConfig) -> Self:
document = self.document_original.copy() document = self.document_original.copy()
# 翻译解析后文件 # 翻译解析后文件
translator = TXTTranslator(translate_config) translator = TXTTranslator(translate_config)
@@ -19,7 +31,8 @@ class TXTWorkflow(Workflow, HTMLExportable, TXTExportable):
self.document_translated = document self.document_translated = document
return self return self
async def translate_async(self, translate_config: TXTTranslatorConfig) -> Self: async def translate_async(self) -> Self:
translate_config=self.config.translator_config
document = self.document_original.copy() document = self.document_original.copy()
# 翻译解析后文件 # 翻译解析后文件
translator = TXTTranslator(translate_config) translator = TXTTranslator(translate_config)
@@ -28,6 +41,7 @@ class TXTWorkflow(Workflow, HTMLExportable, TXTExportable):
return self return self
def export_to_html(self, export_config: TXT2HTMLExporterConfig=None) -> str: def export_to_html(self, export_config: TXT2HTMLExporterConfig=None) -> str:
export_config=export_config or self.config.html_exporter_config
docu = self._export(TXT2HTMLExporter(export_config)) docu = self._export(TXT2HTMLExporter(export_config))
return docu.content.decode() return docu.content.decode()
@@ -37,6 +51,7 @@ class TXTWorkflow(Workflow, HTMLExportable, TXTExportable):
def save_as_html(self, name: str = None, output_dir: Path | str = "./output", def save_as_html(self, name: str = None, output_dir: Path | str = "./output",
export_config: TXT2HTMLExporterConfig | None = None) -> Self: export_config: TXT2HTMLExporterConfig | None = None) -> Self:
export_config=export_config or self.config.html_exporter_config
self._save(exporter=TXT2HTMLExporter(export_config), name=name, output_dir=output_dir) self._save(exporter=TXT2HTMLExporter(export_config), name=name, output_dir=output_dir)
return self return self