From 8f68eddeb175a482c1a237e36301d31711f1ce84 Mon Sep 17 00:00:00 2001
From: xunbu <xunbu3@qq.com>
Date: Wed, 16 Jul 2025 11:52:19 +0800
Subject: [PATCH] =?UTF-8?q?=E6=9B=B4=E9=AB=98=E7=B2=92=E5=BA=A6=E7=9A=84?=
 =?UTF-8?q?=E5=8D=8F=E7=A8=8B?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

---
 docutranslate/converter/converter_mineru.py | 41 ++++++++++++++++++++-
 docutranslate/translater.py                 |  9 +----
 2 files changed, 42 insertions(+), 8 deletions(-)

diff --git a/docutranslate/converter/converter_mineru.py b/docutranslate/converter/converter_mineru.py
index 4b0135d..2207b4b 100644
--- a/docutranslate/converter/converter_mineru.py
+++ b/docutranslate/converter/converter_mineru.py
@@ -120,7 +120,7 @@ class ConverterMineru(Converter):
         time1 = time.time()
         batch_id = await self.upload_async(document)
         file_url = await self.get_file_url_async(batch_id)
-        result = await asyncio.to_thread(get_md_from_zip_url_with_inline_images,file_url)
+        result = await get_md_from_zip_url_with_inline_images_async(file_url)
         self.logger.info(f"已转换为markdown，耗时{time.time() - time1}秒")
         return result
 
@@ -171,5 +171,44 @@ def get_md_from_zip_url_with_inline_images(
         raise Exception(f"发生未知错误: {e}")
 
 
+async def get_md_from_zip_url_with_inline_images_async(
+        zip_url: str,
+        filename_in_zip: str = "full.md",
+        encoding: str = "utf-8"
+) -> str | None:
+    """
+    从给定的ZIP文件URL中下载并提取指定文件的内容，
+    并将Markdown文件中的相对路径图片转换为内联Base64图片。
+
+    Args:
+        zip_url (str): ZIP文件的下载链接。
+        filename_in_zip (str): ZIP压缩包内目标Markdown文件的名称（包括路径）。
+                               默认为 "full.md"。
+        encoding (str): 目标文件的预期编码。默认为 "utf-8"。
+
+    Returns:
+        str | None: 如果成功，返回处理后的Markdown文本内容；否则返回 None。
+    """
+    try:
+        print(f"正在从 {zip_url} 下载ZIP文件 (使用 httpx.get)...")
+        response = await client_async.get(zip_url)  # 增加超时
+        response.raise_for_status()
+        print("ZIP文件下载完成。")
+        return await asyncio.to_thread(embed_inline_image_from_zip(response.content, filename_in_zip=filename_in_zip, encoding=encoding))
+
+
+    except httpx.HTTPStatusError as e:
+        raise Exception(f"HTTP 错误 (httpx): {e.response.status_code} - {e.request.url}\n响应内容: {e.response.text[:200]}...")
+    except httpx.RequestError as e:
+        raise Exception(f"下载ZIP文件时发生错误 (httpx): {e}")
+    except zipfile.BadZipFile:
+        raise Exception("错误: 下载的文件不是一个有效的ZIP压缩文件或已损坏。")
+    except UnicodeDecodeError:
+        raise Exception(f"错误: 无法使用 '{encoding}' 编码解码文件 '{filename_in_zip}' 的内容。")
+    except Exception as e:
+        import traceback
+        traceback.print_exc()  # 打印完整的堆栈跟踪，便于调试
+        raise Exception(f"发生未知错误: {e}")
+
 if __name__ == '__main__':
     pass
diff --git a/docutranslate/translater.py b/docutranslate/translater.py
index d0ab63f..7ff5334 100644
--- a/docutranslate/translater.py
+++ b/docutranslate/translater.py
@@ -1,4 +1,3 @@
-import asyncio
 import html
 import io
 import logging
@@ -428,12 +427,8 @@ class FileTranslater:
                                    code=True, output_format: Literal["markdown", "html"] = "markdown",
                                    custom_prompt_translate=None, refine=False,
                                    refine_agent: Agent | None = None, translate_agent: Agent | None = None, save=True):
-        await asyncio.to_thread(
-            self.read_file,
-            file_path,
-            formula=formula,
-            code=code
-        )
+
+        await self.read_file_async(file_path, formula=formula, code=code)
         if refine:
             await self.refine_markdown_by_agent_async(refine_agent)
         await self.translate_markdown_by_agent_async(translate_agent, to_lang=to_lang,