修改code\formula为code_ocr\formula_pcr
This commit is contained in:
@@ -307,12 +307,12 @@ async def _perform_translation(
|
|||||||
raise ValueError("使用 'mineru' 引擎需要提供 'mineru_token'。")
|
raise ValueError("使用 'mineru' 引擎需要提供 'mineru_token'。")
|
||||||
convert_config = ConverterMineruConfig(
|
convert_config = ConverterMineruConfig(
|
||||||
mineru_token=payload.mineru_token,
|
mineru_token=payload.mineru_token,
|
||||||
formula=payload.formula_ocr
|
formula_ocr=payload.formula_ocr
|
||||||
)
|
)
|
||||||
elif payload.convert_engin == 'docling':
|
elif payload.convert_engin == 'docling':
|
||||||
convert_config = ConverterDoclingConfig(
|
convert_config = ConverterDoclingConfig(
|
||||||
code=payload.code_ocr,
|
code_ocr=payload.code_ocr,
|
||||||
formula=payload.formula_ocr
|
formula_ocr=payload.formula_ocr
|
||||||
)
|
)
|
||||||
|
|
||||||
await workflow.translate_async(
|
await workflow.translate_async(
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
|
from typing import Hashable
|
||||||
|
|
||||||
from docutranslate.converter.base import Converter, ConverterConfig
|
from docutranslate.converter.base import Converter, ConverterConfig
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
@@ -8,7 +9,9 @@ from docutranslate.ir.markdown_document import MarkdownDocument
|
|||||||
@dataclass(kw_only=True)
|
@dataclass(kw_only=True)
|
||||||
class X2MarkdownConverterConfig(ConverterConfig):
|
class X2MarkdownConverterConfig(ConverterConfig):
|
||||||
...
|
...
|
||||||
|
@abstractmethod
|
||||||
|
def gethash(self) ->Hashable:
|
||||||
|
...
|
||||||
|
|
||||||
class X2MarkdownConverter(Converter):
|
class X2MarkdownConverter(Converter):
|
||||||
"""
|
"""
|
||||||
|
|||||||
@@ -22,19 +22,19 @@ IMAGE_RESOLUTION_SCALE = 4
|
|||||||
|
|
||||||
@dataclass(kw_only=True)
|
@dataclass(kw_only=True)
|
||||||
class ConverterDoclingConfig(X2MarkdownConverterConfig):
|
class ConverterDoclingConfig(X2MarkdownConverterConfig):
|
||||||
code: bool = True
|
code_ocr: bool = True
|
||||||
formula: bool = True
|
formula_ocr: bool = True
|
||||||
artifact: Path | None = None
|
artifact: Path | None = None
|
||||||
|
|
||||||
def gethash(self):
|
def gethash(self):
|
||||||
return self.code,self.formula
|
return self.code_ocr,self.formula_ocr
|
||||||
|
|
||||||
|
|
||||||
class ConverterDocling(X2MarkdownConverter):
|
class ConverterDocling(X2MarkdownConverter):
|
||||||
def __init__(self, config: ConverterDoclingConfig):
|
def __init__(self, config: ConverterDoclingConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.code = config.code
|
self.code = config.code_ocr
|
||||||
self.formula = config.formula
|
self.formula = config.formula_ocr
|
||||||
artifact = Path("./docling_artifact")
|
artifact = Path("./docling_artifact")
|
||||||
if artifact.is_dir():
|
if artifact.is_dir():
|
||||||
self.logger.info("使用./docling_artifact的本地模型")
|
self.logger.info("使用./docling_artifact的本地模型")
|
||||||
|
|||||||
@@ -19,10 +19,10 @@ URL = 'https://mineru.net/api/v4/file-urls/batch'
|
|||||||
@dataclass(kw_only=True)
|
@dataclass(kw_only=True)
|
||||||
class ConverterMineruConfig(X2MarkdownConverterConfig):
|
class ConverterMineruConfig(X2MarkdownConverterConfig):
|
||||||
mineru_token: str
|
mineru_token: str
|
||||||
formula: bool = True
|
formula_ocr: bool = True
|
||||||
|
|
||||||
def gethash(self) ->Hashable:
|
def gethash(self) ->Hashable:
|
||||||
return self.formula
|
return self.formula_ocr
|
||||||
|
|
||||||
|
|
||||||
timeout = httpx.Timeout(
|
timeout = httpx.Timeout(
|
||||||
@@ -40,7 +40,7 @@ class ConverterMineru(X2MarkdownConverter):
|
|||||||
def __init__(self, config: ConverterMineruConfig, logger: Logger = global_logger):
|
def __init__(self, config: ConverterMineruConfig, logger: Logger = global_logger):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.mineru_token = config.mineru_token.strip()
|
self.mineru_token = config.mineru_token.strip()
|
||||||
self.formula = config.formula
|
self.formula = config.formula_ocr
|
||||||
self.logger = logger
|
self.logger = logger
|
||||||
|
|
||||||
def _get_header(self):
|
def _get_header(self):
|
||||||
|
|||||||
@@ -26,20 +26,21 @@ class TXTWorkflow(Workflow[TXTWorkflowConfig, Document, Document], HTMLExportabl
|
|||||||
if sub_config:
|
if sub_config:
|
||||||
sub_config.logger = config.logger
|
sub_config.logger = config.logger
|
||||||
|
|
||||||
def translate(self) -> Self:
|
def _pre_translate(self,document_original:Document):
|
||||||
|
document = document_original.copy()
|
||||||
translate_config = self.config.translator_config
|
translate_config = self.config.translator_config
|
||||||
document = self.document_original.copy()
|
|
||||||
# 翻译解析后文件
|
|
||||||
translator = TXTTranslator(translate_config)
|
translator = TXTTranslator(translate_config)
|
||||||
|
return document,translator
|
||||||
|
|
||||||
|
|
||||||
|
def translate(self) -> Self:
|
||||||
|
document, translator=self._pre_translate(self.document_original)
|
||||||
translator.translate(document)
|
translator.translate(document)
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def translate_async(self) -> Self:
|
async def translate_async(self) -> Self:
|
||||||
translate_config = self.config.translator_config
|
document, translator = self._pre_translate(self.document_original)
|
||||||
document = self.document_original.copy()
|
|
||||||
# 翻译解析后文件
|
|
||||||
translator = TXTTranslator(translate_config)
|
|
||||||
await translator.translate_async(document)
|
await translator.translate_async(document)
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|||||||
Reference in New Issue
Block a user