修复转换markdown时部分日志未显示、不使用markdown解析引擎时出现的问题

This commit is contained in:
xunbu
2025-08-09 21:15:43 +08:00
parent 31eb711687
commit 9775bf479b
5 changed files with 20 additions and 12 deletions

View File

@@ -441,10 +441,11 @@ async def _perform_translation(
) )
converter_config = None converter_config = None
if payload.convert_engine == 'mineru': if payload.convert_engine == 'mineru':
converter_config = ConverterMineruConfig(mineru_token=payload.mineru_token, converter_config = ConverterMineruConfig(logger=task_logger, mineru_token=payload.mineru_token,
formula_ocr=payload.formula_ocr) formula_ocr=payload.formula_ocr)
elif payload.convert_engine == 'docling' and DOCLING_EXIST: elif payload.convert_engine == 'docling' and DOCLING_EXIST:
converter_config = ConverterDoclingConfig(code_ocr=payload.code_ocr, formula_ocr=payload.formula_ocr) converter_config = ConverterDoclingConfig(logger=task_logger, code_ocr=payload.code_ocr,
formula_ocr=payload.formula_ocr)
html_exporter_config = MD2HTMLExporterConfig(cdn=True) html_exporter_config = MD2HTMLExporterConfig(cdn=True)
workflow_config = MarkdownBasedWorkflowConfig( workflow_config = MarkdownBasedWorkflowConfig(
convert_engine=payload.convert_engine, converter_config=converter_config, convert_engine=payload.convert_engine, converter_config=converter_config,

View File

@@ -13,8 +13,13 @@ class MDBasedCovertCacher:
self.cache_dict = OrderedDict() self.cache_dict = OrderedDict()
@staticmethod @staticmethod
def _get_hashcode(document: Document, convert_engin: str, convert_config: ConverterConfig) -> str: def _get_hashcode(document: Document, convert_engin: str, convert_config: ConverterConfig|None) -> str:
obj = (document.suffix, document.content, convert_engin, convert_config.gethash()) if convert_config :
convert_config_hash=convert_config.gethash()
else:
convert_config_hash=None
obj = (document.suffix, document.content, convert_engin, convert_config_hash)
return str(hash(obj)) return str(hash(obj))
def get_cached_result(self, document: Document, convert_engin: str, def get_cached_result(self, document: Document, convert_engin: str,

View File

@@ -10,16 +10,19 @@ from docutranslate.logger import global_logger
@dataclass(kw_only=True) @dataclass(kw_only=True)
class ConverterConfig(ABC): class ConverterConfig(ABC):
logger: Logger | None = None logger: Logger | None = None
@abstractmethod
def gethash(self)->Hashable:
...
@abstractmethod
def gethash(self) -> Hashable:
...
class Converter(ABC): class Converter(ABC):
def __init__(self, config: ConverterConfig | None = None): def __init__(self, config: ConverterConfig | None = None):
self.config = config self.config = config
if config:
self.logger = config.logger or global_logger self.logger = config.logger or global_logger
else:
self.logger = global_logger
@abstractmethod @abstractmethod
def convert(self, document: Document) -> Document: def convert(self, document: Document) -> Document:

View File

@@ -37,11 +37,10 @@ client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, v
class ConverterMineru(X2MarkdownConverter): class ConverterMineru(X2MarkdownConverter):
def __init__(self, config: ConverterMineruConfig, logger: Logger = global_logger): def __init__(self, config: ConverterMineruConfig):
super().__init__(config=config) super().__init__(config=config)
self.mineru_token = config.mineru_token.strip() self.mineru_token = config.mineru_token.strip()
self.formula = config.formula_ocr self.formula = config.formula_ocr
self.logger = logger
def _get_header(self): def _get_header(self):
return { return {

File diff suppressed because one or more lines are too long