完成txtworkflow重构
This commit is contained in:
@@ -3,40 +3,40 @@ from typing import Protocol, Self, TypeVar, runtime_checkable
|
|||||||
|
|
||||||
from docutranslate.exporter.base import ExporterConfig
|
from docutranslate.exporter.base import ExporterConfig
|
||||||
|
|
||||||
T = TypeVar("T", bound=ExporterConfig)
|
T_ExporterConfig = TypeVar("T_ExporterConfig", bound=ExporterConfig)
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class HTMLExportable(Protocol[T]):
|
class HTMLExportable(Protocol[T_ExporterConfig]):
|
||||||
def export_to_html(self, export_config: T | None = None) -> str:
|
def export_to_html(self, config: T_ExporterConfig | None = None) -> str:
|
||||||
...
|
...
|
||||||
|
|
||||||
def save_as_html(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self:
|
def save_as_html(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
|
||||||
...
|
...
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class MDExportable(Protocol[T]):
|
class MDExportable(Protocol[T_ExporterConfig]):
|
||||||
|
|
||||||
def export_to_markdown(self, export_config: T | None = None) -> str:
|
def export_to_markdown(self, config: T_ExporterConfig | None = None) -> str:
|
||||||
...
|
...
|
||||||
|
|
||||||
def save_as_markdown(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self:
|
def save_as_markdown(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
|
||||||
...
|
...
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class MDZIPExportable(Protocol[T]):
|
class MDZIPExportable(Protocol[T_ExporterConfig]):
|
||||||
|
|
||||||
def export_to_markdown_zip(self, export_config: T | None = None) -> bytes:
|
def export_to_markdown_zip(self, config: T_ExporterConfig | None = None) -> bytes:
|
||||||
...
|
...
|
||||||
|
|
||||||
def save_as_markdown_zip(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self:
|
def save_as_markdown_zip(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
|
||||||
...
|
...
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class MDFormatsExportable(MDZIPExportable[T], MDExportable[T], Protocol):
|
class MDFormatsExportable(MDZIPExportable[T_ExporterConfig], MDExportable[T_ExporterConfig]):
|
||||||
...
|
...
|
||||||
|
|
||||||
@runtime_checkable
|
@runtime_checkable
|
||||||
class TXTExportable(Protocol[T]):
|
class TXTExportable(Protocol[T_ExporterConfig]):
|
||||||
def export_to_txt(self) -> str:
|
def export_to_txt(self) -> str:
|
||||||
...
|
...
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,5 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from logging import Logger
|
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Self, Tuple, Type
|
from typing import Self, Tuple, Type
|
||||||
|
|
||||||
@@ -26,7 +25,6 @@ from docutranslate.translator.ai_translator.md_translator import MDTranslatorCon
|
|||||||
|
|
||||||
@dataclass(kw_only=True)
|
@dataclass(kw_only=True)
|
||||||
class MarkdownBasedWorkflowConfig(WorkflowConfig):
|
class MarkdownBasedWorkflowConfig(WorkflowConfig):
|
||||||
logger: Logger | None = None
|
|
||||||
convert_engine: ConvertEnginType
|
convert_engine: ConvertEnginType
|
||||||
converter_config: X2MarkdownConverterConfig | None
|
converter_config: X2MarkdownConverterConfig | None
|
||||||
translator_config: MDTranslatorConfig
|
translator_config: MDTranslatorConfig
|
||||||
@@ -45,11 +43,10 @@ class MarkdownBasedWorkflow(Workflow[MarkdownBasedWorkflowConfig, Document, Mark
|
|||||||
if DOCLING_EXIST:
|
if DOCLING_EXIST:
|
||||||
self._converter_factory["docling"] = (ConverterDocling, ConverterDoclingConfig)
|
self._converter_factory["docling"] = (ConverterDocling, ConverterDoclingConfig)
|
||||||
self.convert_engine = config.convert_engine
|
self.convert_engine = config.convert_engine
|
||||||
self.logger = config.logger
|
if config.logger:
|
||||||
if self.logger:
|
for sub_config in [self.config.converter_config, self.config.translator_config, self.config.html_exporter_config]:
|
||||||
for config in [self.config.converter_config, self.config.translator_config, self.config.html_exporter_config]:
|
if sub_config and sub_config.logger is not None:
|
||||||
if config is not None:
|
sub_config.logger = config.logger
|
||||||
config.logger = self.logger
|
|
||||||
|
|
||||||
def _get_document_md(self, convert_engin: ConvertEnginType, convert_config: X2MarkdownConverterConfig):
|
def _get_document_md(self, convert_engin: ConvertEnginType, convert_config: X2MarkdownConverterConfig):
|
||||||
if self.document_original is None:
|
if self.document_original is None:
|
||||||
|
|||||||
@@ -1,17 +1,29 @@
|
|||||||
|
from dataclasses import dataclass
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
from typing import Self
|
from typing import Self
|
||||||
|
|
||||||
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig, TXT2HTMLExporter
|
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig, TXT2HTMLExporter
|
||||||
from docutranslate.exporter.txt.txt2txt_exporter import TXT2TXTExporter
|
from docutranslate.exporter.txt.txt2txt_exporter import TXT2TXTExporter
|
||||||
from docutranslate.workflow.base import Workflow
|
from docutranslate.ir.document import Document
|
||||||
|
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
||||||
from docutranslate.workflow.interfaces import HTMLExportable, TXTExportable
|
from docutranslate.workflow.interfaces import HTMLExportable, TXTExportable
|
||||||
from docutranslate.translator.ai_translator.txt_translator import TXTTranslatorConfig, TXTTranslator
|
from docutranslate.translator.ai_translator.txt_translator import TXTTranslatorConfig, TXTTranslator
|
||||||
|
|
||||||
|
@dataclass(kw_only=True)
|
||||||
|
class TXTWorkflowConfig(WorkflowConfig):
|
||||||
|
translator_config: TXTTranslatorConfig
|
||||||
|
html_exporter_config: TXT2HTMLExporterConfig
|
||||||
|
|
||||||
|
class TXTWorkflow(Workflow[TXTWorkflowConfig,Document,Document], HTMLExportable, TXTExportable):
|
||||||
|
def __init__(self,config:TXTWorkflowConfig):
|
||||||
|
super().__init__(config=config)
|
||||||
|
if config.logger:
|
||||||
|
for sub_config in [self.config.translator_config]:
|
||||||
|
if sub_config and sub_config.logger is not None:
|
||||||
|
sub_config.logger=config.logger
|
||||||
|
|
||||||
class TXTWorkflow(Workflow, HTMLExportable, TXTExportable):
|
def translate(self) -> Self:
|
||||||
|
translate_config=self.config.translator_config
|
||||||
def translate(self, translate_config: TXTTranslatorConfig) -> Self:
|
|
||||||
document = self.document_original.copy()
|
document = self.document_original.copy()
|
||||||
# 翻译解析后文件
|
# 翻译解析后文件
|
||||||
translator = TXTTranslator(translate_config)
|
translator = TXTTranslator(translate_config)
|
||||||
@@ -19,7 +31,8 @@ class TXTWorkflow(Workflow, HTMLExportable, TXTExportable):
|
|||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def translate_async(self, translate_config: TXTTranslatorConfig) -> Self:
|
async def translate_async(self) -> Self:
|
||||||
|
translate_config=self.config.translator_config
|
||||||
document = self.document_original.copy()
|
document = self.document_original.copy()
|
||||||
# 翻译解析后文件
|
# 翻译解析后文件
|
||||||
translator = TXTTranslator(translate_config)
|
translator = TXTTranslator(translate_config)
|
||||||
@@ -28,6 +41,7 @@ class TXTWorkflow(Workflow, HTMLExportable, TXTExportable):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def export_to_html(self, export_config: TXT2HTMLExporterConfig=None) -> str:
|
def export_to_html(self, export_config: TXT2HTMLExporterConfig=None) -> str:
|
||||||
|
export_config=export_config or self.config.html_exporter_config
|
||||||
docu = self._export(TXT2HTMLExporter(export_config))
|
docu = self._export(TXT2HTMLExporter(export_config))
|
||||||
return docu.content.decode()
|
return docu.content.decode()
|
||||||
|
|
||||||
@@ -37,6 +51,7 @@ class TXTWorkflow(Workflow, HTMLExportable, TXTExportable):
|
|||||||
|
|
||||||
def save_as_html(self, name: str = None, output_dir: Path | str = "./output",
|
def save_as_html(self, name: str = None, output_dir: Path | str = "./output",
|
||||||
export_config: TXT2HTMLExporterConfig | None = None) -> Self:
|
export_config: TXT2HTMLExporterConfig | None = None) -> Self:
|
||||||
|
export_config=export_config or self.config.html_exporter_config
|
||||||
self._save(exporter=TXT2HTMLExporter(export_config), name=name, output_dir=output_dir)
|
self._save(exporter=TXT2HTMLExporter(export_config), name=name, output_dir=output_dir)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user