diff --git a/docutranslate/app.py b/docutranslate/app.py index b5450b1..e49a329 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -307,12 +307,12 @@ async def _perform_translation( raise ValueError("使用 'mineru' 引擎需要提供 'mineru_token'。") convert_config = ConverterMineruConfig( mineru_token=payload.mineru_token, - formula=payload.formula_ocr + formula_ocr=payload.formula_ocr ) elif payload.convert_engin == 'docling': convert_config = ConverterDoclingConfig( - code=payload.code_ocr, - formula=payload.formula_ocr + code_ocr=payload.code_ocr, + formula_ocr=payload.formula_ocr ) await workflow.translate_async( diff --git a/docutranslate/converter/x2md/base.py b/docutranslate/converter/x2md/base.py index 658e7fc..efc2749 100644 --- a/docutranslate/converter/x2md/base.py +++ b/docutranslate/converter/x2md/base.py @@ -1,5 +1,6 @@ from abc import abstractmethod from dataclasses import dataclass +from typing import Hashable from docutranslate.converter.base import Converter, ConverterConfig from docutranslate.ir.document import Document @@ -8,7 +9,9 @@ from docutranslate.ir.markdown_document import MarkdownDocument @dataclass(kw_only=True) class X2MarkdownConverterConfig(ConverterConfig): ... - + @abstractmethod + def gethash(self) ->Hashable: + ... class X2MarkdownConverter(Converter): """ diff --git a/docutranslate/converter/x2md/converter_docling.py b/docutranslate/converter/x2md/converter_docling.py index 4a64cf7..52e90d7 100644 --- a/docutranslate/converter/x2md/converter_docling.py +++ b/docutranslate/converter/x2md/converter_docling.py @@ -22,19 +22,19 @@ IMAGE_RESOLUTION_SCALE = 4 @dataclass(kw_only=True) class ConverterDoclingConfig(X2MarkdownConverterConfig): - code: bool = True - formula: bool = True + code_ocr: bool = True + formula_ocr: bool = True artifact: Path | None = None def gethash(self): - return self.code,self.formula + return self.code_ocr,self.formula_ocr class ConverterDocling(X2MarkdownConverter): def __init__(self, config: ConverterDoclingConfig): super().__init__(config=config) - self.code = config.code - self.formula = config.formula + self.code = config.code_ocr + self.formula = config.formula_ocr artifact = Path("./docling_artifact") if artifact.is_dir(): self.logger.info("使用./docling_artifact的本地模型") diff --git a/docutranslate/converter/x2md/converter_mineru.py b/docutranslate/converter/x2md/converter_mineru.py index 820d89b..fd670de 100644 --- a/docutranslate/converter/x2md/converter_mineru.py +++ b/docutranslate/converter/x2md/converter_mineru.py @@ -19,10 +19,10 @@ URL = 'https://mineru.net/api/v4/file-urls/batch' @dataclass(kw_only=True) class ConverterMineruConfig(X2MarkdownConverterConfig): mineru_token: str - formula: bool = True + formula_ocr: bool = True def gethash(self) ->Hashable: - return self.formula + return self.formula_ocr timeout = httpx.Timeout( @@ -40,7 +40,7 @@ class ConverterMineru(X2MarkdownConverter): def __init__(self, config: ConverterMineruConfig, logger: Logger = global_logger): super().__init__(config=config) self.mineru_token = config.mineru_token.strip() - self.formula = config.formula + self.formula = config.formula_ocr self.logger = logger def _get_header(self): diff --git a/docutranslate/workflow/txt_workflow.py b/docutranslate/workflow/txt_workflow.py index 874e774..da524d0 100644 --- a/docutranslate/workflow/txt_workflow.py +++ b/docutranslate/workflow/txt_workflow.py @@ -26,20 +26,21 @@ class TXTWorkflow(Workflow[TXTWorkflowConfig, Document, Document], HTMLExportabl if sub_config: sub_config.logger = config.logger - def translate(self) -> Self: + def _pre_translate(self,document_original:Document): + document = document_original.copy() translate_config = self.config.translator_config - document = self.document_original.copy() - # 翻译解析后文件 translator = TXTTranslator(translate_config) + return document,translator + + + def translate(self) -> Self: + document, translator=self._pre_translate(self.document_original) translator.translate(document) self.document_translated = document return self async def translate_async(self) -> Self: - translate_config = self.config.translator_config - document = self.document_original.copy() - # 翻译解析后文件 - translator = TXTTranslator(translate_config) + document, translator = self._pre_translate(self.document_original) await translator.translate_async(document) self.document_translated = document return self