修复转换markdown时部分日志未显示、不使用markdown解析引擎时出现的问题
This commit is contained in:
@@ -441,10 +441,11 @@ async def _perform_translation(
|
||||
)
|
||||
converter_config = None
|
||||
if payload.convert_engine == 'mineru':
|
||||
converter_config = ConverterMineruConfig(mineru_token=payload.mineru_token,
|
||||
converter_config = ConverterMineruConfig(logger=task_logger, mineru_token=payload.mineru_token,
|
||||
formula_ocr=payload.formula_ocr)
|
||||
elif payload.convert_engine == 'docling' and DOCLING_EXIST:
|
||||
converter_config = ConverterDoclingConfig(code_ocr=payload.code_ocr, formula_ocr=payload.formula_ocr)
|
||||
converter_config = ConverterDoclingConfig(logger=task_logger, code_ocr=payload.code_ocr,
|
||||
formula_ocr=payload.formula_ocr)
|
||||
html_exporter_config = MD2HTMLExporterConfig(cdn=True)
|
||||
workflow_config = MarkdownBasedWorkflowConfig(
|
||||
convert_engine=payload.convert_engine, converter_config=converter_config,
|
||||
|
||||
@@ -13,8 +13,13 @@ class MDBasedCovertCacher:
|
||||
self.cache_dict = OrderedDict()
|
||||
|
||||
@staticmethod
|
||||
def _get_hashcode(document: Document, convert_engin: str, convert_config: ConverterConfig) -> str:
|
||||
obj = (document.suffix, document.content, convert_engin, convert_config.gethash())
|
||||
def _get_hashcode(document: Document, convert_engin: str, convert_config: ConverterConfig|None) -> str:
|
||||
if convert_config :
|
||||
convert_config_hash=convert_config.gethash()
|
||||
else:
|
||||
convert_config_hash=None
|
||||
|
||||
obj = (document.suffix, document.content, convert_engin, convert_config_hash)
|
||||
return str(hash(obj))
|
||||
|
||||
def get_cached_result(self, document: Document, convert_engin: str,
|
||||
|
||||
@@ -10,16 +10,19 @@ from docutranslate.logger import global_logger
|
||||
@dataclass(kw_only=True)
|
||||
class ConverterConfig(ABC):
|
||||
logger: Logger | None = None
|
||||
|
||||
@abstractmethod
|
||||
def gethash(self) -> Hashable:
|
||||
...
|
||||
|
||||
|
||||
|
||||
class Converter(ABC):
|
||||
def __init__(self, config: ConverterConfig | None = None):
|
||||
self.config = config
|
||||
if config:
|
||||
self.logger = config.logger or global_logger
|
||||
else:
|
||||
self.logger = global_logger
|
||||
|
||||
@abstractmethod
|
||||
def convert(self, document: Document) -> Document:
|
||||
|
||||
@@ -37,11 +37,10 @@ client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, v
|
||||
|
||||
|
||||
class ConverterMineru(X2MarkdownConverter):
|
||||
def __init__(self, config: ConverterMineruConfig, logger: Logger = global_logger):
|
||||
def __init__(self, config: ConverterMineruConfig):
|
||||
super().__init__(config=config)
|
||||
self.mineru_token = config.mineru_token.strip()
|
||||
self.formula = config.formula_ocr
|
||||
self.logger = logger
|
||||
|
||||
def _get_header(self):
|
||||
return {
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user