修改code\formula为code_ocr\formula_pcr
This commit is contained in:
@@ -307,12 +307,12 @@ async def _perform_translation(
|
||||
raise ValueError("使用 'mineru' 引擎需要提供 'mineru_token'。")
|
||||
convert_config = ConverterMineruConfig(
|
||||
mineru_token=payload.mineru_token,
|
||||
formula=payload.formula_ocr
|
||||
formula_ocr=payload.formula_ocr
|
||||
)
|
||||
elif payload.convert_engin == 'docling':
|
||||
convert_config = ConverterDoclingConfig(
|
||||
code=payload.code_ocr,
|
||||
formula=payload.formula_ocr
|
||||
code_ocr=payload.code_ocr,
|
||||
formula_ocr=payload.formula_ocr
|
||||
)
|
||||
|
||||
await workflow.translate_async(
|
||||
|
||||
@@ -1,5 +1,6 @@
|
||||
from abc import abstractmethod
|
||||
from dataclasses import dataclass
|
||||
from typing import Hashable
|
||||
|
||||
from docutranslate.converter.base import Converter, ConverterConfig
|
||||
from docutranslate.ir.document import Document
|
||||
@@ -8,7 +9,9 @@ from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
@dataclass(kw_only=True)
|
||||
class X2MarkdownConverterConfig(ConverterConfig):
|
||||
...
|
||||
|
||||
@abstractmethod
|
||||
def gethash(self) ->Hashable:
|
||||
...
|
||||
|
||||
class X2MarkdownConverter(Converter):
|
||||
"""
|
||||
|
||||
@@ -22,19 +22,19 @@ IMAGE_RESOLUTION_SCALE = 4
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class ConverterDoclingConfig(X2MarkdownConverterConfig):
|
||||
code: bool = True
|
||||
formula: bool = True
|
||||
code_ocr: bool = True
|
||||
formula_ocr: bool = True
|
||||
artifact: Path | None = None
|
||||
|
||||
def gethash(self):
|
||||
return self.code,self.formula
|
||||
return self.code_ocr,self.formula_ocr
|
||||
|
||||
|
||||
class ConverterDocling(X2MarkdownConverter):
|
||||
def __init__(self, config: ConverterDoclingConfig):
|
||||
super().__init__(config=config)
|
||||
self.code = config.code
|
||||
self.formula = config.formula
|
||||
self.code = config.code_ocr
|
||||
self.formula = config.formula_ocr
|
||||
artifact = Path("./docling_artifact")
|
||||
if artifact.is_dir():
|
||||
self.logger.info("使用./docling_artifact的本地模型")
|
||||
|
||||
@@ -19,10 +19,10 @@ URL = 'https://mineru.net/api/v4/file-urls/batch'
|
||||
@dataclass(kw_only=True)
|
||||
class ConverterMineruConfig(X2MarkdownConverterConfig):
|
||||
mineru_token: str
|
||||
formula: bool = True
|
||||
formula_ocr: bool = True
|
||||
|
||||
def gethash(self) ->Hashable:
|
||||
return self.formula
|
||||
return self.formula_ocr
|
||||
|
||||
|
||||
timeout = httpx.Timeout(
|
||||
@@ -40,7 +40,7 @@ class ConverterMineru(X2MarkdownConverter):
|
||||
def __init__(self, config: ConverterMineruConfig, logger: Logger = global_logger):
|
||||
super().__init__(config=config)
|
||||
self.mineru_token = config.mineru_token.strip()
|
||||
self.formula = config.formula
|
||||
self.formula = config.formula_ocr
|
||||
self.logger = logger
|
||||
|
||||
def _get_header(self):
|
||||
|
||||
@@ -26,20 +26,21 @@ class TXTWorkflow(Workflow[TXTWorkflowConfig, Document, Document], HTMLExportabl
|
||||
if sub_config:
|
||||
sub_config.logger = config.logger
|
||||
|
||||
def translate(self) -> Self:
|
||||
def _pre_translate(self,document_original:Document):
|
||||
document = document_original.copy()
|
||||
translate_config = self.config.translator_config
|
||||
document = self.document_original.copy()
|
||||
# 翻译解析后文件
|
||||
translator = TXTTranslator(translate_config)
|
||||
return document,translator
|
||||
|
||||
|
||||
def translate(self) -> Self:
|
||||
document, translator=self._pre_translate(self.document_original)
|
||||
translator.translate(document)
|
||||
self.document_translated = document
|
||||
return self
|
||||
|
||||
async def translate_async(self) -> Self:
|
||||
translate_config = self.config.translator_config
|
||||
document = self.document_original.copy()
|
||||
# 翻译解析后文件
|
||||
translator = TXTTranslator(translate_config)
|
||||
document, translator = self._pre_translate(self.document_original)
|
||||
await translator.translate_async(document)
|
||||
self.document_translated = document
|
||||
return self
|
||||
|
||||
Reference in New Issue
Block a user