修改code\formula为code_ocr\formula_pcr

This commit is contained in:
xunbu
2025-07-31 10:23:10 +08:00
parent cc8a340461
commit fe2c199ee7
5 changed files with 23 additions and 19 deletions

View File

@@ -1,5 +1,6 @@
from abc import abstractmethod
from dataclasses import dataclass
from typing import Hashable
from docutranslate.converter.base import Converter, ConverterConfig
from docutranslate.ir.document import Document
@@ -8,7 +9,9 @@ from docutranslate.ir.markdown_document import MarkdownDocument
@dataclass(kw_only=True)
class X2MarkdownConverterConfig(ConverterConfig):
...
@abstractmethod
def gethash(self) ->Hashable:
...
class X2MarkdownConverter(Converter):
"""

View File

@@ -22,19 +22,19 @@ IMAGE_RESOLUTION_SCALE = 4
@dataclass(kw_only=True)
class ConverterDoclingConfig(X2MarkdownConverterConfig):
code: bool = True
formula: bool = True
code_ocr: bool = True
formula_ocr: bool = True
artifact: Path | None = None
def gethash(self):
return self.code,self.formula
return self.code_ocr,self.formula_ocr
class ConverterDocling(X2MarkdownConverter):
def __init__(self, config: ConverterDoclingConfig):
super().__init__(config=config)
self.code = config.code
self.formula = config.formula
self.code = config.code_ocr
self.formula = config.formula_ocr
artifact = Path("./docling_artifact")
if artifact.is_dir():
self.logger.info("使用./docling_artifact的本地模型")

View File

@@ -19,10 +19,10 @@ URL = 'https://mineru.net/api/v4/file-urls/batch'
@dataclass(kw_only=True)
class ConverterMineruConfig(X2MarkdownConverterConfig):
mineru_token: str
formula: bool = True
formula_ocr: bool = True
def gethash(self) ->Hashable:
return self.formula
return self.formula_ocr
timeout = httpx.Timeout(
@@ -40,7 +40,7 @@ class ConverterMineru(X2MarkdownConverter):
def __init__(self, config: ConverterMineruConfig, logger: Logger = global_logger):
super().__init__(config=config)
self.mineru_token = config.mineru_token.strip()
self.formula = config.formula
self.formula = config.formula_ocr
self.logger = logger
def _get_header(self):