修改code\formula为code_ocr\formula_pcr

This commit is contained in:
xunbu
2025-07-31 10:23:10 +08:00
parent cc8a340461
commit fe2c199ee7
5 changed files with 23 additions and 19 deletions

View File

@@ -307,12 +307,12 @@ async def _perform_translation(
raise ValueError("使用 'mineru' 引擎需要提供 'mineru_token'")
convert_config = ConverterMineruConfig(
mineru_token=payload.mineru_token,
formula=payload.formula_ocr
formula_ocr=payload.formula_ocr
)
elif payload.convert_engin == 'docling':
convert_config = ConverterDoclingConfig(
code=payload.code_ocr,
formula=payload.formula_ocr
code_ocr=payload.code_ocr,
formula_ocr=payload.formula_ocr
)
await workflow.translate_async(

View File

@@ -1,5 +1,6 @@
from abc import abstractmethod
from dataclasses import dataclass
from typing import Hashable
from docutranslate.converter.base import Converter, ConverterConfig
from docutranslate.ir.document import Document
@@ -8,7 +9,9 @@ from docutranslate.ir.markdown_document import MarkdownDocument
@dataclass(kw_only=True)
class X2MarkdownConverterConfig(ConverterConfig):
...
@abstractmethod
def gethash(self) ->Hashable:
...
class X2MarkdownConverter(Converter):
"""

View File

@@ -22,19 +22,19 @@ IMAGE_RESOLUTION_SCALE = 4
@dataclass(kw_only=True)
class ConverterDoclingConfig(X2MarkdownConverterConfig):
code: bool = True
formula: bool = True
code_ocr: bool = True
formula_ocr: bool = True
artifact: Path | None = None
def gethash(self):
return self.code,self.formula
return self.code_ocr,self.formula_ocr
class ConverterDocling(X2MarkdownConverter):
def __init__(self, config: ConverterDoclingConfig):
super().__init__(config=config)
self.code = config.code
self.formula = config.formula
self.code = config.code_ocr
self.formula = config.formula_ocr
artifact = Path("./docling_artifact")
if artifact.is_dir():
self.logger.info("使用./docling_artifact的本地模型")

View File

@@ -19,10 +19,10 @@ URL = 'https://mineru.net/api/v4/file-urls/batch'
@dataclass(kw_only=True)
class ConverterMineruConfig(X2MarkdownConverterConfig):
mineru_token: str
formula: bool = True
formula_ocr: bool = True
def gethash(self) ->Hashable:
return self.formula
return self.formula_ocr
timeout = httpx.Timeout(
@@ -40,7 +40,7 @@ class ConverterMineru(X2MarkdownConverter):
def __init__(self, config: ConverterMineruConfig, logger: Logger = global_logger):
super().__init__(config=config)
self.mineru_token = config.mineru_token.strip()
self.formula = config.formula
self.formula = config.formula_ocr
self.logger = logger
def _get_header(self):

View File

@@ -26,20 +26,21 @@ class TXTWorkflow(Workflow[TXTWorkflowConfig, Document, Document], HTMLExportabl
if sub_config:
sub_config.logger = config.logger
def translate(self) -> Self:
def _pre_translate(self,document_original:Document):
document = document_original.copy()
translate_config = self.config.translator_config
document = self.document_original.copy()
# 翻译解析后文件
translator = TXTTranslator(translate_config)
return document,translator
def translate(self) -> Self:
document, translator=self._pre_translate(self.document_original)
translator.translate(document)
self.document_translated = document
return self
async def translate_async(self) -> Self:
translate_config = self.config.translator_config
document = self.document_original.copy()
# 翻译解析后文件
translator = TXTTranslator(translate_config)
document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document)
self.document_translated = document
return self