重构workflow
This commit is contained in:
19
README.md
19
README.md
@@ -4,7 +4,7 @@
|
||||
[](https://github.com/xunbu/docutranslate/releases)
|
||||
[](https://pypi.org/project/docutranslate/)
|
||||
[](https://www.python.org/)
|
||||
[](./LICENSE)
|
||||
[](./LICENSE)
|
||||
|
||||
文件翻译工具,借助[docling](https://github.com/docling-project/docling)、[minerU](https://mineru.net/)与大语言模型实现多种格式文件的翻译
|
||||
|
||||
@@ -43,15 +43,14 @@
|
||||
2. `pip install -e .`
|
||||
3. `uv pip install -e .`#使用uv
|
||||
|
||||
# 支持的文件格式
|
||||
# 翻译工作流
|
||||
|
||||
| 输入格式 | 输出格式 |
|
||||
|----------------|--------------|
|
||||
| PDF | Markdown(推荐) |
|
||||
| Markdown | HTML |
|
||||
| HTML、XHTML | PDF(仅交互界面支持) |
|
||||
| CSV | |
|
||||
| DOC、DOCX(部分支持) | |
|
||||
| 工作流 | 代码 | 输入格式 | 输出格式 |
|
||||
|-------------------------|------------------|----------------------------------------|----------------------|
|
||||
| `MarkdownBasedWorkflow` | `markdown_based` | `.pdf ` `.md` `.png` `.jpeg` `.docx`等 | `.md` `.html` `.pdf` |
|
||||
| `TXTWorkflow` | `txt` | `.txt ` | `.txt` `.html` `.pdf` |
|
||||
|
||||
> 所有.pdf的输出只能通过交互式界面获取
|
||||
|
||||
> 如果想不使用交互界面获取pdf,可以先下载HTML文件,用浏览器打开并打印
|
||||
|
||||
@@ -143,7 +142,7 @@ docutranslate -i -p 8011
|
||||
## 翻译文件
|
||||
|
||||
```python
|
||||
from docutranslate.translater import FileTranslater
|
||||
from docutranslate.translator import FileTranslater
|
||||
|
||||
translater = FileTranslater(base_url="<baseurl>", # 大模型的baseurl
|
||||
key="<api-key>", # 大模型的api-key
|
||||
|
||||
@@ -22,7 +22,7 @@ from pydantic import BaseModel, Field
|
||||
|
||||
from docutranslate.global_values.conditional_import import DOCLING_EXIST
|
||||
# --- 核心代码重构后的新 Imports ---
|
||||
from docutranslate.workflow.base_workflow import BaseWorkflow
|
||||
from docutranslate.workflow.base import Workflow
|
||||
from docutranslate.workflow.interfaces import HTMLExportable, MDFormatsExportable, TXTExportable
|
||||
from docutranslate.workflow.md_based_workflow import MarkdownBasedWorkflow
|
||||
from docutranslate.workflow.txt_workflow import TXTWorkflow
|
||||
@@ -30,16 +30,16 @@ from docutranslate.workflow.txt_workflow import TXTWorkflow
|
||||
if DOCLING_EXIST or TYPE_CHECKING:
|
||||
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig
|
||||
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig
|
||||
from docutranslate.exporter.md2x.md2html_exporter import MD2HTMLExportConfig
|
||||
from docutranslate.exporter.txt2x.txt2html_exporter import TXT2HTMLExportConfig
|
||||
from docutranslate.translater.base import AiTranslateConfig
|
||||
from docutranslate.translater.md_translator import MDTranslateConfig
|
||||
from docutranslate.translater.txt_translator import TXTTranslateConfig
|
||||
from docutranslate.exporter.md.md2html_exporter import MD2HTMLExporterConfig
|
||||
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig
|
||||
from docutranslate.translator.base import AiTranslateConfig
|
||||
from docutranslate.translator.ai_translator.md_translator import MDTranslatorConfig
|
||||
from docutranslate.translator.ai_translator.txt_translator import TXTTranslatorConfig
|
||||
# ------------------------------------
|
||||
|
||||
from docutranslate import __version__
|
||||
from docutranslate.logger import global_logger
|
||||
from docutranslate.translater import default_params
|
||||
from docutranslate.translator import default_params
|
||||
from docutranslate.utils.resource_utils import resource_path
|
||||
|
||||
# --- 全局配置 (MODIFIED) ---
|
||||
@@ -50,7 +50,7 @@ MAX_LOG_HISTORY = 200
|
||||
httpx_client: httpx.AsyncClient
|
||||
|
||||
# --- [NEW] Workflow字典 ---
|
||||
WORKFLOW_DICT: Dict[str, type[BaseWorkflow]] = {
|
||||
WORKFLOW_DICT: Dict[str, type[Workflow]] = {
|
||||
"markdown_based": MarkdownBasedWorkflow,
|
||||
"txt": TXTWorkflow,
|
||||
}
|
||||
@@ -70,7 +70,7 @@ def _create_default_task_state() -> Dict[str, Any]:
|
||||
|
||||
|
||||
# --- [KEPT FOR TEMP ENDPOINT] Workflow 工厂函数 (旧逻辑,仅为临时接口保留) ---
|
||||
def _get_workflow_for_file(filename: str, logger: logging.Logger) -> BaseWorkflow:
|
||||
def _get_workflow_for_file(filename: str, logger: logging.Logger) -> Workflow:
|
||||
"""根据文件名后缀选择并返回合适的 Workflow 实例。这是扩展点。"""
|
||||
suffix = Path(filename).suffix.lower()
|
||||
if suffix == '.txt':
|
||||
@@ -299,7 +299,7 @@ async def _perform_translation(
|
||||
# 4. 根据 payload 的具体类型执行不同的翻译流程 (类型安全!)
|
||||
if isinstance(payload, MarkdownWorkflowParams) and isinstance(workflow, MarkdownBasedWorkflow):
|
||||
task_logger.info("执行 MarkdownBased 翻译流程。")
|
||||
translate_config = MDTranslateConfig(**ai_config.__dict__)
|
||||
translate_config = MDTranslatorConfig(**ai_config.__dict__)
|
||||
|
||||
convert_config = None
|
||||
if payload.convert_engin == 'mineru':
|
||||
@@ -323,7 +323,7 @@ async def _perform_translation(
|
||||
|
||||
elif isinstance(payload, TextWorkflowParams) and isinstance(workflow, TXTWorkflow):
|
||||
task_logger.info("执行 TXT 翻译流程。")
|
||||
translate_config = TXTTranslateConfig(**ai_config.__dict__)
|
||||
translate_config = TXTTranslatorConfig(**ai_config.__dict__)
|
||||
await workflow.translate_async(translate_config=translate_config)
|
||||
|
||||
else:
|
||||
@@ -750,7 +750,7 @@ async def _get_content_from_workflow(task_id: str, file_type: FileType) -> tuple
|
||||
if not task_state.get("download_ready") or not task_state.get("workflow_instance"):
|
||||
raise HTTPException(status_code=404, detail="内容尚未准备好。")
|
||||
|
||||
workflow: BaseWorkflow = task_state["workflow_instance"]
|
||||
workflow: Workflow = task_state["workflow_instance"]
|
||||
filename_stem = task_state['original_filename_stem']
|
||||
|
||||
try:
|
||||
@@ -759,8 +759,8 @@ async def _get_content_from_workflow(task_id: str, file_type: FileType) -> tuple
|
||||
filename: str
|
||||
|
||||
if file_type == 'html' and isinstance(workflow, HTMLExportable):
|
||||
config = MD2HTMLExportConfig(cdn=True) if isinstance(workflow,
|
||||
MarkdownBasedWorkflow) else TXT2HTMLExportConfig(
|
||||
config = MD2HTMLExporterConfig(cdn=True) if isinstance(workflow,
|
||||
MarkdownBasedWorkflow) else TXT2HTMLExporterConfig(
|
||||
cdn=True)
|
||||
try:
|
||||
await httpx_client.head("https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/contrib/auto-render.min.js",
|
||||
@@ -1073,14 +1073,14 @@ async def temp_translate(
|
||||
workflow.read_bytes(decoded_content, Path(file_name).stem, Path(file_name).suffix)
|
||||
|
||||
if isinstance(workflow, MarkdownBasedWorkflow):
|
||||
translate_config = MDTranslateConfig(**ai_config.__dict__)
|
||||
translate_config = MDTranslatorConfig(**ai_config.__dict__)
|
||||
convert_config = ConverterMineruConfig(mineru_token=mineru_token) if mineru_token else None
|
||||
convert_engin = 'mineru' if mineru_token else None
|
||||
await workflow.translate_async(convert_engin, convert_config, translate_config)
|
||||
return {"success": True, "content": workflow.export_to_markdown()}
|
||||
|
||||
elif isinstance(workflow, TXTWorkflow):
|
||||
translate_config = TXTTranslateConfig(**ai_config.__dict__)
|
||||
translate_config = TXTTranslatorConfig(**ai_config.__dict__)
|
||||
await workflow.translate_async(translate_config)
|
||||
return {"success": True, "content": workflow.export_to_txt()}
|
||||
|
||||
|
||||
@@ -1,7 +1,7 @@
|
||||
import os
|
||||
from collections import OrderedDict
|
||||
|
||||
from docutranslate.exporter.md2x.types import x2md_convert_config_type
|
||||
from docutranslate.converter.base import ConverterConfig
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
|
||||
@@ -13,17 +13,17 @@ class MDBasedCovertCacher:
|
||||
self.cache_dict = OrderedDict()
|
||||
|
||||
@staticmethod
|
||||
def _get_hashcode(document: Document, convert_engin: str, convert_config: x2md_convert_config_type) -> str:
|
||||
obj = (document.suffix, document.content, convert_engin, convert_config)
|
||||
def _get_hashcode(document: Document, convert_engin: str, convert_config: ConverterConfig) -> str:
|
||||
obj = (document.suffix, document.content, convert_engin, convert_config.gethash())
|
||||
return str(hash(obj))
|
||||
|
||||
def get_cached_result(self, document: Document, convert_engin: str,
|
||||
convert_config: x2md_convert_config_type) -> MarkdownDocument | None:
|
||||
return self.cache_dict.get(self._get_hashcode(document, convert_engin, convert_config))
|
||||
convert_config: ConverterConfig) -> MarkdownDocument | None:
|
||||
return self.cache_dict.get(self._get_hashcode(document, convert_engin, convert_config.gethash()))
|
||||
|
||||
def cache_result(self, convert_result: MarkdownDocument, document: Document, convert_engin: str,
|
||||
convert_config: x2md_convert_config_type) -> MarkdownDocument:
|
||||
hash_code = self._get_hashcode(document, convert_engin, convert_config)
|
||||
convert_config: ConverterConfig) -> MarkdownDocument:
|
||||
hash_code = self._get_hashcode(document, convert_engin, convert_config.gethash())
|
||||
if len(self.cache_dict) > int(CACHE_NUM):
|
||||
self.cache_dict.popitem(last=False)
|
||||
self.cache_dict[hash_code] = convert_result
|
||||
|
||||
29
docutranslate/converter/base.py
Normal file
29
docutranslate/converter/base.py
Normal file
@@ -0,0 +1,29 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from logging import Logger
|
||||
from typing import Hashable
|
||||
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.logger import global_logger
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class ConverterConfig(ABC):
|
||||
logger: Logger | None = None
|
||||
@abstractmethod
|
||||
def gethash(self)->Hashable:
|
||||
...
|
||||
|
||||
|
||||
|
||||
class Converter(ABC):
|
||||
def __init__(self, config: ConverterConfig | None = None):
|
||||
self.config = config
|
||||
self.logger = config.logger or global_logger
|
||||
|
||||
@abstractmethod
|
||||
def convert(self, document: Document) -> Document:
|
||||
...
|
||||
|
||||
async def convert_async(self, document: Document) -> Document:
|
||||
...
|
||||
@@ -1,11 +0,0 @@
|
||||
from typing import Protocol
|
||||
|
||||
from docutranslate.ir.document import Document
|
||||
|
||||
|
||||
class Converter(Protocol):
|
||||
def convert(self, document: Document) -> Document:
|
||||
...
|
||||
|
||||
async def convert_async(self, document: Document) -> Document:
|
||||
...
|
||||
@@ -1,19 +1,28 @@
|
||||
from typing import Protocol
|
||||
from docutranslate.converter.interfaces import Converter
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
|
||||
from docutranslate.converter.base import Converter, ConverterConfig
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class X2MarkdownConverterConfig(ConverterConfig):
|
||||
...
|
||||
|
||||
|
||||
class X2MarkdownConverter(Converter,Protocol):
|
||||
class X2MarkdownConverter(Converter):
|
||||
"""
|
||||
负责将其它格式的文件转换为markdown
|
||||
"""
|
||||
|
||||
@abstractmethod
|
||||
def convert(self, document: Document) -> MarkdownDocument:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
async def convert_async(self, document: Document) -> MarkdownDocument:
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def support_format(self)->list[str]:
|
||||
...
|
||||
@@ -3,7 +3,6 @@ import os
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from io import BytesIO
|
||||
from logging import Logger
|
||||
from pathlib import Path
|
||||
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
@@ -14,34 +13,34 @@ from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
from docling_core.types.doc import ImageRefMode
|
||||
from huggingface_hub.errors import LocalEntryNotFoundError
|
||||
|
||||
from docutranslate.converter.x2md.interfaces import X2MarkdownConverter
|
||||
from docutranslate.converter.x2md.base import X2MarkdownConverter, X2MarkdownConverterConfig
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
from docutranslate.logger import global_logger
|
||||
|
||||
IMAGE_RESOLUTION_SCALE = 4
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ConverterDoclingConfig:
|
||||
@dataclass(kw_only=True)
|
||||
class ConverterDoclingConfig(X2MarkdownConverterConfig):
|
||||
code: bool = True
|
||||
formula: bool = True
|
||||
artifact: Path | None = None
|
||||
|
||||
def gethash(self):
|
||||
return self.code,self.formula
|
||||
|
||||
|
||||
class ConverterDocling(X2MarkdownConverter):
|
||||
def __init__(self, config: ConverterDoclingConfig, logger: Logger = global_logger):
|
||||
self.logger = logger
|
||||
self.config = config
|
||||
def __init__(self, config: ConverterDoclingConfig):
|
||||
super().__init__(config=config)
|
||||
self.code = config.code
|
||||
self.formula = config.formula
|
||||
artifact=Path("./docling_artifact")
|
||||
artifact = Path("./docling_artifact")
|
||||
if artifact.is_dir():
|
||||
self.logger.info("使用./docling_artifact的本地模型")
|
||||
self.artifact=artifact
|
||||
self.artifact = artifact
|
||||
else:
|
||||
self.artifact=config.artifact
|
||||
|
||||
self.artifact = config.artifact
|
||||
|
||||
def convert(self, document) -> MarkdownDocument:
|
||||
assert isinstance(document.name, str)
|
||||
|
||||
@@ -1,10 +1,10 @@
|
||||
from docutranslate.converter.x2md.interfaces import X2MarkdownConverter
|
||||
from docutranslate.converter.x2md.base import X2MarkdownConverter
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
|
||||
|
||||
class ConverterIdentity(X2MarkdownConverter):
|
||||
|
||||
#TODO:支持markdown_zip格式输入
|
||||
def convert(self, document: Document) -> MarkdownDocument:
|
||||
return MarkdownDocument.from_bytes(content=document.content, suffix=".md", stem=document.stem)
|
||||
|
||||
|
||||
@@ -3,10 +3,11 @@ import time
|
||||
import zipfile
|
||||
from dataclasses import dataclass
|
||||
from logging import Logger
|
||||
from typing import Hashable
|
||||
|
||||
import httpx
|
||||
|
||||
from docutranslate.converter.x2md.interfaces import X2MarkdownConverter
|
||||
from docutranslate.converter.x2md.base import X2MarkdownConverter, X2MarkdownConverterConfig
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
from docutranslate.logger import global_logger
|
||||
@@ -15,11 +16,14 @@ from docutranslate.utils.markdown_utils import embed_inline_image_from_zip
|
||||
URL = 'https://mineru.net/api/v4/file-urls/batch'
|
||||
|
||||
|
||||
@dataclass(frozen=True)
|
||||
class ConverterMineruConfig:
|
||||
@dataclass(kw_only=True)
|
||||
class ConverterMineruConfig(X2MarkdownConverterConfig):
|
||||
mineru_token: str
|
||||
formula: bool = True
|
||||
|
||||
def gethash(self) ->Hashable:
|
||||
return self.formula
|
||||
|
||||
|
||||
timeout = httpx.Timeout(
|
||||
connect=5.0, # 连接超时 (建立连接的最长时间)
|
||||
@@ -34,7 +38,7 @@ client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, v
|
||||
|
||||
class ConverterMineru(X2MarkdownConverter):
|
||||
def __init__(self, config: ConverterMineruConfig, logger: Logger = global_logger):
|
||||
self.config = config
|
||||
super().__init__(config=config)
|
||||
self.mineru_token = config.mineru_token.strip()
|
||||
self.formula = config.formula
|
||||
self.logger = logger
|
||||
|
||||
20
docutranslate/exporter/base.py
Normal file
20
docutranslate/exporter/base.py
Normal file
@@ -0,0 +1,20 @@
|
||||
from abc import ABC,abstractmethod
|
||||
from typing import Generic,TypeVar, Any
|
||||
|
||||
from dataclasses import dataclass
|
||||
|
||||
from docutranslate.ir.document import Document
|
||||
|
||||
D_in = TypeVar('D_in', bound=Document)
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class ExporterConfig:
|
||||
...
|
||||
|
||||
class Exporter(ABC,Generic[D_in]):
|
||||
def __init__(self,config:ExporterConfig|None=None):
|
||||
self.config=config
|
||||
|
||||
@abstractmethod
|
||||
def export(self, document: D_in) -> Any:
|
||||
...
|
||||
@@ -1,8 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
@dataclass
|
||||
class ExportConfig:
|
||||
pass
|
||||
|
||||
|
||||
|
||||
@@ -1,15 +0,0 @@
|
||||
from typing import Protocol, TypeVar, Any, Self
|
||||
|
||||
from docutranslate.exporter.export_config import ExportConfig
|
||||
from docutranslate.ir.document import Document
|
||||
|
||||
D_in = TypeVar('D_in', bound=Document)
|
||||
|
||||
|
||||
class Exporter(Protocol[D_in]):
|
||||
@classmethod
|
||||
def from_config(cls, export_config: ExportConfig | None = None) -> Self:
|
||||
...
|
||||
|
||||
def export(self, document: D_in) -> Any:
|
||||
...
|
||||
18
docutranslate/exporter/md/base.py
Normal file
18
docutranslate/exporter/md/base.py
Normal file
@@ -0,0 +1,18 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from docutranslate.exporter.base import Exporter, ExporterConfig
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class MDExporterConfig(ExporterConfig):
|
||||
...
|
||||
|
||||
|
||||
class MDExporter(Exporter):
|
||||
def __init__(self, config: MDExporterConfig|None=None):
|
||||
super().__init__(config=config)
|
||||
|
||||
def export(self, document: MarkdownDocument) -> Document:
|
||||
...
|
||||
@@ -3,20 +3,20 @@ from dataclasses import dataclass
|
||||
import jinja2
|
||||
import markdown2
|
||||
|
||||
from docutranslate.exporter.export_config import ExportConfig
|
||||
from docutranslate.exporter.md2x.interfaces import MDExporter
|
||||
from docutranslate.exporter.md.base import MDExporter, MDExporterConfig
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
from docutranslate.utils.resource_utils import resource_path
|
||||
|
||||
@dataclass
|
||||
class MD2HTMLExportConfig(ExportConfig):
|
||||
class MD2HTMLExporterConfig(MDExporterConfig):
|
||||
cdn: bool = True
|
||||
|
||||
class MD2HTMLExporter(MDExporter):
|
||||
def __init__(self, export_config: MD2HTMLExportConfig = None):
|
||||
export_config = export_config or MD2HTMLExportConfig()
|
||||
self.cdn=export_config.cdn
|
||||
def __init__(self, config: MD2HTMLExporterConfig = None):
|
||||
config = config or MD2HTMLExporterConfig()
|
||||
super().__init__(config=config)
|
||||
self.cdn=config.cdn
|
||||
|
||||
def export(self, document: MarkdownDocument) -> Document:
|
||||
cdn = self.cdn
|
||||
8
docutranslate/exporter/md/md2md_exporter.py
Normal file
8
docutranslate/exporter/md/md2md_exporter.py
Normal file
@@ -0,0 +1,8 @@
|
||||
from docutranslate.exporter.md.base import MDExporter
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument, Document
|
||||
|
||||
|
||||
class MD2MDExporter(MDExporter):
|
||||
|
||||
def export(self, document: MarkdownDocument) -> Document:
|
||||
return Document.from_bytes(suffix=".md", content=document.content, stem=document.stem)
|
||||
11
docutranslate/exporter/md/md2mdzip_exporter.py
Normal file
11
docutranslate/exporter/md/md2mdzip_exporter.py
Normal file
@@ -0,0 +1,11 @@
|
||||
from docutranslate.exporter.md.base import MDExporter
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument, Document
|
||||
from docutranslate.utils.markdown_utils import unembed_base64_images_to_zip
|
||||
|
||||
|
||||
class MD2MDZipExporter(MDExporter):
|
||||
|
||||
def export(self, document: MarkdownDocument) -> Document:
|
||||
return Document.from_bytes(suffix=".zip", content=unembed_base64_images_to_zip(document.content.decode(),
|
||||
markdown_name=document.name),
|
||||
stem=document.stem)
|
||||
3
docutranslate/exporter/md/types.py
Normal file
3
docutranslate/exporter/md/types.py
Normal file
@@ -0,0 +1,3 @@
|
||||
from typing import Literal
|
||||
|
||||
ConvertEnginType = Literal["mineru", "docling"]
|
||||
@@ -1,9 +0,0 @@
|
||||
from docutranslate.exporter.interfaces import Exporter
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
|
||||
|
||||
class MDExporter(Exporter):
|
||||
|
||||
def export(self,document:MarkdownDocument)->Document:
|
||||
...
|
||||
@@ -1,18 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from docutranslate.exporter.export_config import ExportConfig
|
||||
from docutranslate.exporter.md2x.interfaces import MDExporter
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument,Document
|
||||
|
||||
|
||||
@dataclass
|
||||
class MD2MDExportConfig(ExportConfig):
|
||||
pass
|
||||
|
||||
|
||||
class MD2MDExporter(MDExporter):
|
||||
def __init__(self, export_config: MD2MDExportConfig | None=None):
|
||||
pass
|
||||
|
||||
def export(self,document:MarkdownDocument)->Document:
|
||||
return Document.from_bytes(suffix=".md",content=document.content,stem=document.stem)
|
||||
@@ -1,21 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
|
||||
from docutranslate.exporter.export_config import ExportConfig
|
||||
from docutranslate.exporter.md2x.interfaces import MDExporter
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument,Document
|
||||
from docutranslate.utils.markdown_utils import unembed_base64_images_to_zip
|
||||
|
||||
|
||||
@dataclass
|
||||
class MD2MDZIPExportConfig(ExportConfig):
|
||||
pass
|
||||
|
||||
|
||||
class MD2MDZipExporter(MDExporter):
|
||||
def __init__(self, export_config: MD2MDZIPExportConfig | None=None):
|
||||
pass
|
||||
|
||||
def export(self,document:MarkdownDocument)->Document:
|
||||
return Document.from_bytes(suffix=".zip",content=unembed_base64_images_to_zip(document.content.decode(), markdown_name=document.name),stem=document.stem)
|
||||
|
||||
|
||||
@@ -1,14 +0,0 @@
|
||||
from typing import Literal, TYPE_CHECKING
|
||||
|
||||
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig
|
||||
from docutranslate.global_values.conditional_import import DOCLING_EXIST
|
||||
|
||||
if DOCLING_EXIST or TYPE_CHECKING:
|
||||
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig
|
||||
|
||||
convert_engin_type = Literal["mineru", "docling"]
|
||||
|
||||
if DOCLING_EXIST or TYPE_CHECKING:
|
||||
x2md_convert_config_type = ConverterDoclingConfig | ConverterMineruConfig
|
||||
else:
|
||||
x2md_convert_config_type = ConverterMineruConfig
|
||||
@@ -1,8 +1,8 @@
|
||||
from docutranslate.exporter.interfaces import Exporter
|
||||
from docutranslate.exporter.base import Exporter
|
||||
from docutranslate.ir.document import Document
|
||||
|
||||
#TODO:看情况是否需要为TXT单独写一个document类型
|
||||
class TXTExporter(Exporter):
|
||||
class TXTExporter(Exporter[Document]):
|
||||
|
||||
def export(self,document:Document)->Document:
|
||||
...
|
||||
@@ -2,21 +2,22 @@ from dataclasses import dataclass
|
||||
|
||||
import jinja2
|
||||
|
||||
from docutranslate.exporter.export_config import ExportConfig
|
||||
from docutranslate.exporter.txt2x.interfaces import TXTExporter
|
||||
from docutranslate.exporter.base import ExporterConfig
|
||||
from docutranslate.exporter.txt.base import TXTExporter
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.utils.resource_utils import resource_path
|
||||
|
||||
|
||||
@dataclass
|
||||
class TXT2HTMLExportConfig(ExportConfig):
|
||||
class TXT2HTMLExporterConfig(ExporterConfig):
|
||||
cdn: bool = True
|
||||
|
||||
|
||||
class TXT2HTMLExporter(TXTExporter):
|
||||
def __init__(self, export_config: TXT2HTMLExportConfig = None):
|
||||
export_config = export_config or TXT2HTMLExportConfig()
|
||||
self.cdn = export_config.cdn
|
||||
def __init__(self, config: TXT2HTMLExporterConfig = None):
|
||||
config = config or TXT2HTMLExporterConfig()
|
||||
super().__init__(config=config)
|
||||
self.cdn = config.cdn
|
||||
|
||||
def export(self, document: Document) -> Document:
|
||||
cdn = self.cdn
|
||||
@@ -1,10 +1,7 @@
|
||||
from docutranslate.exporter.txt2x.interfaces import TXTExporter
|
||||
from docutranslate.exporter.txt.base import TXTExporter
|
||||
from docutranslate.ir.document import Document
|
||||
|
||||
|
||||
|
||||
|
||||
|
||||
class TXT2TXTExporter(TXTExporter):
|
||||
def export(self, document: Document) -> Document:
|
||||
return document.copy()
|
||||
return document.copy()
|
||||
@@ -1,16 +0,0 @@
|
||||
from dataclasses import dataclass
|
||||
from logging import Logger
|
||||
|
||||
|
||||
@dataclass
|
||||
class AiTranslateConfig:
|
||||
base_url: str
|
||||
api_key: str
|
||||
model_id: str
|
||||
to_lang: str
|
||||
custom_prompt: str | None = None
|
||||
temperature: float = 0.7
|
||||
timeout: int = 2000
|
||||
chunk_size: int = 3000
|
||||
concurrent: int = 30
|
||||
logger: Logger | None = None
|
||||
@@ -1,20 +0,0 @@
|
||||
from typing import Protocol, TypeVar
|
||||
|
||||
from docutranslate.agents import Agent
|
||||
from docutranslate.ir.document import Document
|
||||
|
||||
T=TypeVar('T',bound=Document)
|
||||
V=TypeVar('V',bound=Agent)
|
||||
|
||||
class Translator(Protocol[T,V]):
|
||||
"""
|
||||
翻译中间文本(原地替换),Translator不做格式转换
|
||||
"""
|
||||
def translate(self, document:T) -> Document:
|
||||
...
|
||||
|
||||
async def translate_async(self, document: T) -> Document:
|
||||
...
|
||||
|
||||
def log(self,info:str):
|
||||
...
|
||||
0
docutranslate/translator/ai_translator/__init__.py
Normal file
0
docutranslate/translator/ai_translator/__init__.py
Normal file
35
docutranslate/translator/ai_translator/base.py
Normal file
35
docutranslate/translator/ai_translator/base.py
Normal file
@@ -0,0 +1,35 @@
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from logging import Logger
|
||||
from typing import TypeVar
|
||||
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.translator.base import Translator, TranslatorConfig
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class AiTranslatorConfig(TranslatorConfig):
|
||||
base_url: str
|
||||
api_key: str
|
||||
model_id: str
|
||||
to_lang: str
|
||||
custom_prompt: str | None = None
|
||||
temperature: float = 0.7
|
||||
timeout: int = 2000
|
||||
chunk_size: int = 3000
|
||||
concurrent: int = 30
|
||||
|
||||
T=TypeVar('T',bound=Document)
|
||||
|
||||
class AiTranslator(Translator[T]):
|
||||
"""
|
||||
翻译中间文本(原地替换),Translator不做格式转换
|
||||
"""
|
||||
def __init__(self,config:AiTranslatorConfig,logger:Logger|None=None):
|
||||
super().__init__(config=config,logger=logger)
|
||||
@abstractmethod
|
||||
def translate(self, document:T) -> Document:
|
||||
...
|
||||
@abstractmethod
|
||||
async def translate_async(self, document: T) -> Document:
|
||||
...
|
||||
@@ -5,22 +5,21 @@ from typing import Self
|
||||
from docutranslate.agents import MDTranslateAgent
|
||||
from docutranslate.context.md_mask_context import MDMaskUrisContext
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
from docutranslate.logger import global_logger
|
||||
from docutranslate.translater.base import AiTranslateConfig
|
||||
from docutranslate.translater.interfaces import Translator
|
||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
||||
from docutranslate.translator.base import Translator
|
||||
from docutranslate.utils.markdown_splitter import split_markdown_text, join_markdown_texts
|
||||
from docutranslate.utils.markdown_utils import clean_markdown_math_block
|
||||
|
||||
|
||||
@dataclass
|
||||
class MDTranslateConfig(AiTranslateConfig):
|
||||
class MDTranslatorConfig(AiTranslatorConfig):
|
||||
...
|
||||
|
||||
|
||||
|
||||
class MDTranslator(Translator):
|
||||
def __init__(self, config: MDTranslateConfig):
|
||||
self.logger = config.logger or global_logger
|
||||
def __init__(self, config: MDTranslatorConfig):
|
||||
super().__init__(config=config)
|
||||
self.chunk_size = config.chunk_size
|
||||
self.translate_agent = MDTranslateAgent(custom_prompt=config.custom_prompt,
|
||||
to_lang=config.to_lang,
|
||||
@@ -3,20 +3,19 @@ from typing import Self
|
||||
|
||||
from docutranslate.agents.txt_agent import TXTTranslateAgent
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.logger import global_logger
|
||||
from docutranslate.translater.base import AiTranslateConfig
|
||||
from docutranslate.translater.interfaces import Translator
|
||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
||||
from docutranslate.translator.base import Translator
|
||||
from docutranslate.utils.markdown_splitter import split_markdown_text
|
||||
|
||||
|
||||
@dataclass
|
||||
class TXTTranslateConfig(AiTranslateConfig):
|
||||
class TXTTranslatorConfig(AiTranslatorConfig):
|
||||
...
|
||||
|
||||
|
||||
class TXTTranslator(Translator):
|
||||
def __init__(self, config: TXTTranslateConfig):
|
||||
self.logger = config.logger or global_logger
|
||||
def __init__(self, config: TXTTranslatorConfig):
|
||||
super().__init__(config=config)
|
||||
self.chunk_size = config.chunk_size
|
||||
self.translate_agent = TXTTranslateAgent(custom_prompt=config.custom_prompt,
|
||||
to_lang=config.to_lang,
|
||||
27
docutranslate/translator/base.py
Normal file
27
docutranslate/translator/base.py
Normal file
@@ -0,0 +1,27 @@
|
||||
from dataclasses import dataclass
|
||||
from logging import Logger
|
||||
from typing import TypeVar,Generic
|
||||
from abc import ABC,abstractmethod
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.logger import global_logger
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class TranslatorConfig:
|
||||
logger:Logger|None=None
|
||||
|
||||
T=TypeVar('T',bound=Document)
|
||||
|
||||
class Translator(ABC,Generic[T]):
|
||||
"""
|
||||
翻译中间文本(原地替换),Translator不做格式转换
|
||||
"""
|
||||
def __init__(self,config:TranslatorConfig|None=None):
|
||||
self.config=config
|
||||
self.logger=config.logger or global_logger
|
||||
@abstractmethod
|
||||
def translate(self, document:T) -> Document:
|
||||
...
|
||||
@abstractmethod
|
||||
async def translate_async(self, document: T) -> Document:
|
||||
...
|
||||
@@ -1,19 +1,27 @@
|
||||
from abc import ABC, abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from logging import Logger
|
||||
from pathlib import Path
|
||||
from typing import Self, Generic, TypeVar
|
||||
|
||||
from docutranslate.exporter.interfaces import Exporter
|
||||
from docutranslate.exporter.base import Exporter
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.logger import global_logger
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class WorkflowConfig:
|
||||
logger: Logger | None = None
|
||||
|
||||
|
||||
T_original = TypeVar('T_original', bound=Document)
|
||||
T_Translated = TypeVar('T_Translated', bound=Document)
|
||||
|
||||
|
||||
class BaseWorkflow(ABC, Generic[T_Translated]):
|
||||
class Workflow(ABC, Generic[T_original, T_Translated]):
|
||||
def __init__(self, logger: Logger = global_logger):
|
||||
self.logger = logger
|
||||
self.document_original: Document | None = None
|
||||
self.document_original: T_original | None = None
|
||||
self.document_translated: T_Translated | None = None
|
||||
|
||||
def read_path(self, path: Path | str) -> Self:
|
||||
@@ -1,9 +1,9 @@
|
||||
from pathlib import Path
|
||||
from typing import Protocol, Self, TypeVar, runtime_checkable
|
||||
|
||||
from docutranslate.exporter.export_config import ExportConfig
|
||||
from docutranslate.exporter.export_config import ExporterConfig
|
||||
|
||||
T = TypeVar("T", bound=ExportConfig)
|
||||
T = TypeVar("T", bound=ExporterConfig)
|
||||
|
||||
@runtime_checkable
|
||||
class HTMLExportable(Protocol[T]):
|
||||
|
||||
@@ -1,40 +1,84 @@
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from logging import Logger
|
||||
from pathlib import Path
|
||||
from typing import Self, Literal, overload, TYPE_CHECKING
|
||||
from typing import Self, Tuple, Any
|
||||
|
||||
from docutranslate.cacher import md_based_convert_cacher
|
||||
from docutranslate.global_values.conditional_import import DOCLING_EXIST
|
||||
|
||||
if DOCLING_EXIST or TYPE_CHECKING:
|
||||
if DOCLING_EXIST:
|
||||
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig, ConverterDocling
|
||||
from docutranslate.converter.x2md.converter_identity import ConverterIdentity
|
||||
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig, ConverterMineru
|
||||
from docutranslate.converter.x2md.interfaces import X2MarkdownConverter
|
||||
from docutranslate.exporter.md2x.md2html_exporter import MD2HTMLExportConfig, MD2HTMLExporter
|
||||
from docutranslate.exporter.md2x.md2md_exporter import MD2MDExportConfig, MD2MDExporter
|
||||
from docutranslate.exporter.md2x.md2mdzip_exporter import MD2MDZIPExportConfig, MD2MDZipExporter
|
||||
from docutranslate.exporter.md2x.types import x2md_convert_config_type, convert_engin_type
|
||||
from docutranslate.workflow.base_workflow import BaseWorkflow
|
||||
from docutranslate.converter.x2md.base import X2MarkdownConverterConfig
|
||||
from docutranslate.exporter.md.md2html_exporter import MD2HTMLExporterConfig, MD2HTMLExporter
|
||||
from docutranslate.exporter.md.md2md_exporter import MD2MDExporter
|
||||
from docutranslate.exporter.md.md2mdzip_exporter import MD2MDZipExporter
|
||||
from docutranslate.exporter.md.types import ConvertEnginType
|
||||
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
||||
from docutranslate.workflow.interfaces import MDFormatsExportable, HTMLExportable
|
||||
from docutranslate.translater.md_translator import MDTranslateConfig, MDTranslator
|
||||
from docutranslate.translator.ai_translator.md_translator import MDTranslatorConfig, MDTranslator
|
||||
|
||||
|
||||
class MarkdownBasedWorkflow(BaseWorkflow, HTMLExportable, MDFormatsExportable):
|
||||
def __init__(self, *args, **kwargs):
|
||||
super().__init__(*args, **kwargs)
|
||||
@dataclass(kw_only=True)
|
||||
class MarkdownBasedWorkflowConfig(WorkflowConfig):
|
||||
# X2MarkdownConverterConfig
|
||||
convert_engine: ConvertEnginType | None
|
||||
formula: bool = True
|
||||
# ConverterDoclingConfig
|
||||
code: bool = True
|
||||
artifact: Path | None = None
|
||||
# ConverterMineruConfig
|
||||
mineru_token: str
|
||||
# MDTranslatorConfig
|
||||
base_url: str
|
||||
api_key: str
|
||||
model_id: str
|
||||
to_lang: str
|
||||
custom_prompt: str | None = None
|
||||
temperature: float = 0.7
|
||||
timeout: int = 2000
|
||||
chunk_size: int = 3000
|
||||
concurrent: int = 30
|
||||
# MD2HTMLExporterConfig
|
||||
cdn: bool = True
|
||||
# general
|
||||
logger: Logger | None = None
|
||||
|
||||
if DOCLING_EXIST or TYPE_CHECKING:
|
||||
self._converter_factory: dict[str:tuple[X2MarkdownConverter, x2md_convert_config_type]] = {
|
||||
"mineru": (ConverterMineru, ConverterMineruConfig),
|
||||
"docling": (ConverterDocling, ConverterDoclingConfig)
|
||||
}
|
||||
else:
|
||||
self._converter_factory: dict[str:tuple[X2MarkdownConverter, x2md_convert_config_type]] = {
|
||||
"mineru": (ConverterMineru, ConverterMineruConfig),
|
||||
}
|
||||
|
||||
def _get_document_md(self, convert_engin: convert_engin_type | None,
|
||||
convert_config: x2md_convert_config_type | None):
|
||||
class MarkdownBasedWorkflow(Workflow, HTMLExportable, MDFormatsExportable):
|
||||
def __init__(self, config: MarkdownBasedWorkflowConfig):
|
||||
super().__init__(config=config)
|
||||
self._converter_factory: dict[ConvertEnginType, Tuple[Any, Any]] = {
|
||||
"mineru": (ConverterMineru, ConverterMineruConfig),
|
||||
}
|
||||
if DOCLING_EXIST:
|
||||
self._converter_factory["docling"] = (ConverterDocling, ConverterDoclingConfig)
|
||||
self.x2markdown_converter_config:X2MarkdownConverterConfig|None
|
||||
if config.convert_engine is None:
|
||||
self.converter_config=None
|
||||
elif config.convert_engine== "mineru":
|
||||
self.converter_config = ConverterMineruConfig(formula=config.formula,
|
||||
mineru_token=config.mineru_token)
|
||||
elif DOCLING_EXIST and config.convert_engine== "docling":
|
||||
self.converter_config = ConverterDoclingConfig(code=config.code,
|
||||
formula=config.formula,
|
||||
artifact=config.artifact)
|
||||
self.translator_config = MDTranslatorConfig(base_url=config.base_url,
|
||||
api_key=config.api_key,
|
||||
model_id=config.model_id,
|
||||
to_lang=config.to_lang,
|
||||
custom_prompt=config.custom_prompt,
|
||||
temperature=config.temperature,
|
||||
timeout=config.timeout,
|
||||
chunk_size=config.chunk_size,
|
||||
concurrent=config.concurrent,
|
||||
)
|
||||
self.md2html_exporter_config = MD2HTMLExporterConfig(cdn=config.cdn)
|
||||
self.convert_engine=config.convert_engine
|
||||
|
||||
def _get_document_md(self,convert_engin:ConvertEnginType|None,convert_config:X2MarkdownConverterConfig):
|
||||
if self.document_original is None:
|
||||
raise RuntimeError("file has not been read yet. Call read_path or read_bytes first.")
|
||||
# 获取缓存的解析后文件
|
||||
@@ -51,7 +95,7 @@ class MarkdownBasedWorkflow(BaseWorkflow, HTMLExportable, MDFormatsExportable):
|
||||
if not isinstance(convert_config, config_class):
|
||||
raise TypeError(
|
||||
f"未传入正确的convert_config,应传入{config_class.__name__}类型,现为{type(convert_config).__name__}类型")
|
||||
converter = converter_class(convert_config, logger=self.logger)
|
||||
converter = converter_class(convert_config)
|
||||
else:
|
||||
raise ValueError(f"不存在{convert_engin}解析引擎")
|
||||
document_md = converter.convert(self.document_original)
|
||||
@@ -59,67 +103,54 @@ class MarkdownBasedWorkflow(BaseWorkflow, HTMLExportable, MDFormatsExportable):
|
||||
md_based_convert_cacher.cache_result(document_md, self.document_original, convert_engin, convert_config)
|
||||
return document_md
|
||||
|
||||
@overload
|
||||
def translate(self, convert_engin: None,
|
||||
convert_config: x2md_convert_config_type | None, translate_config: MDTranslateConfig) -> Self:
|
||||
...
|
||||
|
||||
@overload
|
||||
def translate(self, convert_engin: Literal["docling"],
|
||||
convert_config: "ConverterDoclingConfig", translate_config: MDTranslateConfig) -> Self:
|
||||
...
|
||||
|
||||
@overload
|
||||
def translate(self, convert_engin: Literal["mineru"],
|
||||
convert_config: ConverterMineruConfig, translate_config: MDTranslateConfig) -> Self:
|
||||
...
|
||||
|
||||
def translate(self, convert_engin: convert_engin_type | None,
|
||||
convert_config: x2md_convert_config_type | None,
|
||||
translate_config: MDTranslateConfig) -> Self:
|
||||
document_md = self._get_document_md(convert_engin, convert_config)
|
||||
def translate(self) -> Self:
|
||||
convert_engin,convert_config=self.convert_engine,self.converter_config
|
||||
translator_config=self.translator_config
|
||||
document_md = self._get_document_md(convert_engin,convert_config)
|
||||
# 翻译解析后文件
|
||||
translator = MDTranslator(translate_config)
|
||||
translator = MDTranslator(translator_config)
|
||||
translator.translate(document_md)
|
||||
self.document_translated = document_md
|
||||
return self
|
||||
|
||||
async def translate_async(self, convert_engin: Literal["mineru", "docling"] | None,
|
||||
convert_config: x2md_convert_config_type | None,
|
||||
translate_config: MDTranslateConfig) -> Self:
|
||||
|
||||
async def translate_async(self) -> Self:
|
||||
convert_engin,convert_config=self.convert_engine,self.converter_config
|
||||
translator_config=self.translator_config
|
||||
document_md = await asyncio.to_thread(self._get_document_md, convert_engin, convert_config)
|
||||
# 翻译解析后文件
|
||||
translator = MDTranslator(translate_config)
|
||||
translator = MDTranslator(translator_config)
|
||||
await translator.translate_async(document_md)
|
||||
self.document_translated = document_md
|
||||
return self
|
||||
|
||||
def export_to_html(self, export_config: MD2HTMLExportConfig | None = None) -> str:
|
||||
def export_to_html(self, export_config: MD2HTMLExporterConfig | None = None) -> str:
|
||||
export_config=export_config or self.md2html_exporter_config
|
||||
docu = self._export(MD2HTMLExporter(export_config))
|
||||
return docu.content.decode()
|
||||
|
||||
def export_to_markdown(self, export_config: MD2MDExportConfig | None = None) -> str:
|
||||
def export_to_markdown(self, export_config: X2MarkdownConverterConfig | None = None) -> str:
|
||||
docu = self._export(MD2MDExporter())
|
||||
return docu.content.decode()
|
||||
|
||||
def export_to_markdown_zip(self, export_config: MD2MDZIPExportConfig | None = None) -> bytes:
|
||||
def export_to_markdown_zip(self, export_config: X2MarkdownConverterConfig | None = None) -> bytes:
|
||||
docu = self._export(MD2MDZipExporter())
|
||||
return docu.content
|
||||
|
||||
def save_as_html(self, name: str = None, output_dir: Path | str = "./output",
|
||||
export_config: MD2HTMLExportConfig | None = None) -> Self:
|
||||
self._save(exporter=MD2HTMLExporter(), name=name, output_dir=output_dir)
|
||||
export_config: MD2HTMLExporterConfig | None = None) -> Self:
|
||||
export_config = export_config or self.md2html_exporter_config
|
||||
self._save(exporter=MD2HTMLExporter(config=export_config), name=name, output_dir=output_dir)
|
||||
return self
|
||||
|
||||
def save_as_markdown(self, name: str = None, output_dir: Path | str = "./output",
|
||||
export_config: MD2MDExportConfig | None = None) -> Self:
|
||||
export_config=None) -> Self:
|
||||
|
||||
self._save(exporter=MD2MDExporter(), name=name, output_dir=output_dir)
|
||||
return self
|
||||
|
||||
def save_as_markdown_zip(self, name: str = None, output_dir: Path | str = "./output",
|
||||
export_config: MD2MDZIPExportConfig | None = None) -> Self:
|
||||
export_config=None) -> Self:
|
||||
|
||||
self._save(exporter=MD2MDZipExporter(), name=name, output_dir=output_dir)
|
||||
return self
|
||||
|
||||
@@ -1,17 +1,17 @@
|
||||
from pathlib import Path
|
||||
from typing import Self
|
||||
|
||||
from docutranslate.exporter.txt2x.txt2html_exporter import TXT2HTMLExportConfig, TXT2HTMLExporter
|
||||
from docutranslate.exporter.txt2x.txt2txt_exporter import TXT2TXTExporter
|
||||
from docutranslate.workflow.base_workflow import BaseWorkflow
|
||||
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig, TXT2HTMLExporter
|
||||
from docutranslate.exporter.txt.txt2txt_exporter import TXT2TXTExporter
|
||||
from docutranslate.workflow.base import Workflow
|
||||
from docutranslate.workflow.interfaces import HTMLExportable, TXTExportable
|
||||
from docutranslate.translater.txt_translator import TXTTranslateConfig, TXTTranslator
|
||||
from docutranslate.translator.ai_translator.txt_translator import TXTTranslatorConfig, TXTTranslator
|
||||
|
||||
|
||||
|
||||
class TXTWorkflow(BaseWorkflow, HTMLExportable, TXTExportable):
|
||||
class TXTWorkflow(Workflow, HTMLExportable, TXTExportable):
|
||||
|
||||
def translate(self, translate_config: TXTTranslateConfig) -> Self:
|
||||
def translate(self, translate_config: TXTTranslatorConfig) -> Self:
|
||||
document = self.document_original.copy()
|
||||
# 翻译解析后文件
|
||||
translator = TXTTranslator(translate_config)
|
||||
@@ -19,7 +19,7 @@ class TXTWorkflow(BaseWorkflow, HTMLExportable, TXTExportable):
|
||||
self.document_translated = document
|
||||
return self
|
||||
|
||||
async def translate_async(self, translate_config: TXTTranslateConfig) -> Self:
|
||||
async def translate_async(self, translate_config: TXTTranslatorConfig) -> Self:
|
||||
document = self.document_original.copy()
|
||||
# 翻译解析后文件
|
||||
translator = TXTTranslator(translate_config)
|
||||
@@ -27,7 +27,7 @@ class TXTWorkflow(BaseWorkflow, HTMLExportable, TXTExportable):
|
||||
self.document_translated = document
|
||||
return self
|
||||
|
||||
def export_to_html(self, export_config: TXT2HTMLExportConfig=None) -> str:
|
||||
def export_to_html(self, export_config: TXT2HTMLExporterConfig=None) -> str:
|
||||
docu = self._export(TXT2HTMLExporter(export_config))
|
||||
return docu.content.decode()
|
||||
|
||||
@@ -36,7 +36,7 @@ class TXTWorkflow(BaseWorkflow, HTMLExportable, TXTExportable):
|
||||
return docu.content.decode()
|
||||
|
||||
def save_as_html(self, name: str = None, output_dir: Path | str = "./output",
|
||||
export_config: TXT2HTMLExportConfig | None = None) -> Self:
|
||||
export_config: TXT2HTMLExporterConfig | None = None) -> Self:
|
||||
self._save(exporter=TXT2HTMLExporter(export_config), name=name, output_dir=output_dir)
|
||||
return self
|
||||
|
||||
|
||||
Reference in New Issue
Block a user