diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 8c7b3ca..afea7d0 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -7,7 +7,6 @@
-
@@ -37,62 +36,62 @@
- {
- "keyToString": {
- "DefaultHtmlFileTemplate": "HTML File",
- "JavaScript 调试.output.html (1).executor": "Run",
- "JavaScript 调试.output.html.executor": "Run",
- "JavaScript 调试.regex.md_中文.html.executor": "Run",
- "JavaScript 调试.regex_中文.html.executor": "Run",
- "JavaScript 调试.test.html.executor": "Run",
- "JavaScript 调试.test2.html.executor": "Run",
- "JavaScript 调试.test2_英文.html.executor": "Run",
- "JavaScript 调试.test4-1_中文.html.executor": "Run",
- "JavaScript 调试.互联网认证授权机制.html.executor": "Run",
- "JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run",
- "JavaScript 调试.毕业论文_英文.html.executor": "Run",
- "ModuleVcsDetector.initialDetectionPerformed": "true",
- "Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
- "Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
- "Python 测试.pytest (test_html.py 内).executor": "Run",
- "Python.1test.executor": "Run",
- "Python.2test2 (1).executor": "Run",
- "Python.PDFtranslater (1).executor": "Run",
- "Python.PDFtranslater (2).executor": "Run",
- "Python.agent.executor": "Debug",
- "Python.agent_utils.executor": "Run",
- "Python.app (1).executor": "Run",
- "Python.app.executor": "Run",
- "Python.app2.executor": "Run",
- "Python.app_test (1).executor": "Run",
- "Python.convert.executor": "Run",
- "Python.converter_docling.executor": "Run",
- "Python.converter_mineru.executor": "Run",
- "Python.markdown_splitter.executor": "Debug",
- "Python.markdown_utils.executor": "Run",
- "Python.test.executor": "Run",
- "Python.test1.executor": "Run",
- "Python.test2.executor": "Run",
- "Python.test3.executor": "Run",
- "Python.test4.executor": "Run",
- "Python.testhtml.executor": "Run",
- "Python.translater.executor": "Run",
- "Python.切分测试.executor": "Run",
- "RunOnceActivity.ShowReadmeOnStart": "true",
- "RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true",
- "RunOnceActivity.git.unshallow": "true",
- "git-widget-placeholder": "dev",
- "last_opened_file_path": "C:/Users/jxgm/Desktop/translate/docutranslate",
- "list.type.of.created.stylesheet": "CSS",
- "node.js.detected.package.eslint": "true",
- "node.js.detected.package.tslint": "true",
- "node.js.selected.package.eslint": "(autodetect)",
- "node.js.selected.package.tslint": "(autodetect)",
- "nodejs_package_manager_path": "npm",
- "settings.editor.selected.configurable": "preferences.pluginManager",
- "vue.rearranger.settings.migration": "true"
+
+}]]>
diff --git a/docutranslate/converter/converter_mineru.py b/docutranslate/converter/converter_mineru.py
index 138def7..9a10730 100644
--- a/docutranslate/converter/converter_mineru.py
+++ b/docutranslate/converter/converter_mineru.py
@@ -8,14 +8,22 @@ from docutranslate.utils.markdown_utils import embed_inline_image_from_zip
URL = 'https://mineru.net/api/v4/file-urls/batch'
-client = httpx.Client(trust_env=False)
+timeout = httpx.Timeout(
+ connect=5.0, # 连接超时 (建立连接的最长时间)
+ read=120.0, # 读取超时 (等待服务器响应的最长时间)
+ write=120.0, # 写入超时 (发送数据的最长时间)
+ pool=1.0 # 从连接池获取连接的超时时间
+)
+
+
+client = httpx.Client(trust_env=False,timeout=timeout)
# TODO: 提供更详细的logger
class ConverterMineru(Converter):
def __init__(self, token: str, formula=True):
self.mineru_token = token.strip()
- self.client_async = httpx.AsyncClient()
+ self.client_async = httpx.AsyncClient(timeout=timeout)
self.formula = formula
def _get_header(self):
@@ -36,7 +44,7 @@ class ConverterMineru(Converter):
def upload(self, document: Document):
# 获取上传链接
- response = client.post(URL, headers=self._get_header(), json=self._get_upload_data(document),timeout=120)
+ response = client.post(URL, headers=self._get_header(), json=self._get_upload_data(document))
response.raise_for_status()
result = response.json()
# print('response success. result:{}'.format(result))
@@ -45,7 +53,7 @@ class ConverterMineru(Converter):
urls = result["data"]["file_urls"]
# print('batch_id:{},urls:{}'.format(batch_id, urls))
# 获取
- res_upload = client.put(urls[0], content=document.filebytes,timeout=120)
+ res_upload = client.put(urls[0], content=document.filebytes)
res_upload.raise_for_status()
# print(f"{urls[0]} upload success")
return batch_id
@@ -109,7 +117,7 @@ def get_md_from_zip_url_with_inline_images(
"""
try:
print(f"正在从 {zip_url} 下载ZIP文件 (使用 httpx.get)...")
- response = client.get(zip_url, timeout=120.0) # 增加超时
+ response = client.get(zip_url) # 增加超时
response.raise_for_status()
print("ZIP文件下载完成。")
return embed_inline_image_from_zip(response.content, filename_in_zip=filename_in_zip, encoding=encoding)