From 9775bf479babe648126708e210a03615f273f296 Mon Sep 17 00:00:00 2001 From: xunbu Date: Sat, 9 Aug 2025 21:15:43 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E5=A4=8D=E8=BD=AC=E6=8D=A2markdown?= =?UTF-8?q?=E6=97=B6=E9=83=A8=E5=88=86=E6=97=A5=E5=BF=97=E6=9C=AA=E6=98=BE?= =?UTF-8?q?=E7=A4=BA=E3=80=81=E4=B8=8D=E4=BD=BF=E7=94=A8markdown=E8=A7=A3?= =?UTF-8?q?=E6=9E=90=E5=BC=95=E6=93=8E=E6=97=B6=E5=87=BA=E7=8E=B0=E7=9A=84?= =?UTF-8?q?=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/app.py | 7 ++++--- docutranslate/cacher/md_based_convert_cacher.py | 9 +++++++-- docutranslate/converter/base.py | 11 +++++++---- docutranslate/converter/x2md/converter_mineru.py | 3 +-- docutranslate/static/index.html | 2 +- 5 files changed, 20 insertions(+), 12 deletions(-) diff --git a/docutranslate/app.py b/docutranslate/app.py index 03b0f55..9977096 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -441,10 +441,11 @@ async def _perform_translation( ) converter_config = None if payload.convert_engine == 'mineru': - converter_config = ConverterMineruConfig(mineru_token=payload.mineru_token, + converter_config = ConverterMineruConfig(logger=task_logger, mineru_token=payload.mineru_token, formula_ocr=payload.formula_ocr) elif payload.convert_engine == 'docling' and DOCLING_EXIST: - converter_config = ConverterDoclingConfig(code_ocr=payload.code_ocr, formula_ocr=payload.formula_ocr) + converter_config = ConverterDoclingConfig(logger=task_logger, code_ocr=payload.code_ocr, + formula_ocr=payload.formula_ocr) html_exporter_config = MD2HTMLExporterConfig(cdn=True) workflow_config = MarkdownBasedWorkflowConfig( convert_engine=payload.convert_engine, converter_config=converter_config, @@ -1192,4 +1193,4 @@ def run_app(port: int | None = None): if __name__ == "__main__": - run_app() \ No newline at end of file + run_app() diff --git a/docutranslate/cacher/md_based_convert_cacher.py b/docutranslate/cacher/md_based_convert_cacher.py index 213bf2a..a1c4948 100644 --- a/docutranslate/cacher/md_based_convert_cacher.py +++ b/docutranslate/cacher/md_based_convert_cacher.py @@ -13,8 +13,13 @@ class MDBasedCovertCacher: self.cache_dict = OrderedDict() @staticmethod - def _get_hashcode(document: Document, convert_engin: str, convert_config: ConverterConfig) -> str: - obj = (document.suffix, document.content, convert_engin, convert_config.gethash()) + def _get_hashcode(document: Document, convert_engin: str, convert_config: ConverterConfig|None) -> str: + if convert_config : + convert_config_hash=convert_config.gethash() + else: + convert_config_hash=None + + obj = (document.suffix, document.content, convert_engin, convert_config_hash) return str(hash(obj)) def get_cached_result(self, document: Document, convert_engin: str, diff --git a/docutranslate/converter/base.py b/docutranslate/converter/base.py index 1c32396..80e1117 100644 --- a/docutranslate/converter/base.py +++ b/docutranslate/converter/base.py @@ -10,16 +10,19 @@ from docutranslate.logger import global_logger @dataclass(kw_only=True) class ConverterConfig(ABC): logger: Logger | None = None - @abstractmethod - def gethash(self)->Hashable: - ... + @abstractmethod + def gethash(self) -> Hashable: + ... class Converter(ABC): def __init__(self, config: ConverterConfig | None = None): self.config = config - self.logger = config.logger or global_logger + if config: + self.logger = config.logger or global_logger + else: + self.logger = global_logger @abstractmethod def convert(self, document: Document) -> Document: diff --git a/docutranslate/converter/x2md/converter_mineru.py b/docutranslate/converter/x2md/converter_mineru.py index fd670de..6d3ac21 100644 --- a/docutranslate/converter/x2md/converter_mineru.py +++ b/docutranslate/converter/x2md/converter_mineru.py @@ -37,11 +37,10 @@ client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, v class ConverterMineru(X2MarkdownConverter): - def __init__(self, config: ConverterMineruConfig, logger: Logger = global_logger): + def __init__(self, config: ConverterMineruConfig): super().__init__(config=config) self.mineru_token = config.mineru_token.strip() self.formula = config.formula_ocr - self.logger = logger def _get_header(self): return { diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index 99a3a8e..b491080 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file + DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file