优化minerU的超时设置
This commit is contained in:
111
.idea/workspace.xml
generated
111
.idea/workspace.xml
generated
@@ -7,7 +7,6 @@
|
|||||||
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
|
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
|
||||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||||
<change beforePath="$PROJECT_DIR$/docutranslate/converter/converter_mineru.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/converter/converter_mineru.py" afterDir="false" />
|
<change beforePath="$PROJECT_DIR$/docutranslate/converter/converter_mineru.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/converter/converter_mineru.py" afterDir="false" />
|
||||||
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
|
|
||||||
</list>
|
</list>
|
||||||
<option name="SHOW_DIALOG" value="false" />
|
<option name="SHOW_DIALOG" value="false" />
|
||||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||||
@@ -37,62 +36,62 @@
|
|||||||
<option name="hideEmptyMiddlePackages" value="true" />
|
<option name="hideEmptyMiddlePackages" value="true" />
|
||||||
<option name="showLibraryContents" value="true" />
|
<option name="showLibraryContents" value="true" />
|
||||||
</component>
|
</component>
|
||||||
<component name="PropertiesComponent">{
|
<component name="PropertiesComponent"><![CDATA[{
|
||||||
"keyToString": {
|
"keyToString": {
|
||||||
"DefaultHtmlFileTemplate": "HTML File",
|
"DefaultHtmlFileTemplate": "HTML File",
|
||||||
"JavaScript 调试.output.html (1).executor": "Run",
|
"JavaScript 调试.output.html (1).executor": "Run",
|
||||||
"JavaScript 调试.output.html.executor": "Run",
|
"JavaScript 调试.output.html.executor": "Run",
|
||||||
"JavaScript 调试.regex.md_中文.html.executor": "Run",
|
"JavaScript 调试.regex.md_中文.html.executor": "Run",
|
||||||
"JavaScript 调试.regex_中文.html.executor": "Run",
|
"JavaScript 调试.regex_中文.html.executor": "Run",
|
||||||
"JavaScript 调试.test.html.executor": "Run",
|
"JavaScript 调试.test.html.executor": "Run",
|
||||||
"JavaScript 调试.test2.html.executor": "Run",
|
"JavaScript 调试.test2.html.executor": "Run",
|
||||||
"JavaScript 调试.test2_英文.html.executor": "Run",
|
"JavaScript 调试.test2_英文.html.executor": "Run",
|
||||||
"JavaScript 调试.test4-1_中文.html.executor": "Run",
|
"JavaScript 调试.test4-1_中文.html.executor": "Run",
|
||||||
"JavaScript 调试.互联网认证授权机制.html.executor": "Run",
|
"JavaScript 调试.互联网认证授权机制.html.executor": "Run",
|
||||||
"JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run",
|
"JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run",
|
||||||
"JavaScript 调试.毕业论文_英文.html.executor": "Run",
|
"JavaScript 调试.毕业论文_英文.html.executor": "Run",
|
||||||
"ModuleVcsDetector.initialDetectionPerformed": "true",
|
"ModuleVcsDetector.initialDetectionPerformed": "true",
|
||||||
"Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
|
"Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
|
||||||
"Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
|
"Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
|
||||||
"Python 测试.pytest (test_html.py 内).executor": "Run",
|
"Python 测试.pytest (test_html.py 内).executor": "Run",
|
||||||
"Python.1test.executor": "Run",
|
"Python.1test.executor": "Run",
|
||||||
"Python.2test2 (1).executor": "Run",
|
"Python.2test2 (1).executor": "Run",
|
||||||
"Python.PDFtranslater (1).executor": "Run",
|
"Python.PDFtranslater (1).executor": "Run",
|
||||||
"Python.PDFtranslater (2).executor": "Run",
|
"Python.PDFtranslater (2).executor": "Run",
|
||||||
"Python.agent.executor": "Debug",
|
"Python.agent.executor": "Debug",
|
||||||
"Python.agent_utils.executor": "Run",
|
"Python.agent_utils.executor": "Run",
|
||||||
"Python.app (1).executor": "Run",
|
"Python.app (1).executor": "Run",
|
||||||
"Python.app.executor": "Run",
|
"Python.app.executor": "Run",
|
||||||
"Python.app2.executor": "Run",
|
"Python.app2.executor": "Run",
|
||||||
"Python.app_test (1).executor": "Run",
|
"Python.app_test (1).executor": "Run",
|
||||||
"Python.convert.executor": "Run",
|
"Python.convert.executor": "Run",
|
||||||
"Python.converter_docling.executor": "Run",
|
"Python.converter_docling.executor": "Run",
|
||||||
"Python.converter_mineru.executor": "Run",
|
"Python.converter_mineru.executor": "Run",
|
||||||
"Python.markdown_splitter.executor": "Debug",
|
"Python.markdown_splitter.executor": "Debug",
|
||||||
"Python.markdown_utils.executor": "Run",
|
"Python.markdown_utils.executor": "Run",
|
||||||
"Python.test.executor": "Run",
|
"Python.test.executor": "Run",
|
||||||
"Python.test1.executor": "Run",
|
"Python.test1.executor": "Run",
|
||||||
"Python.test2.executor": "Run",
|
"Python.test2.executor": "Run",
|
||||||
"Python.test3.executor": "Run",
|
"Python.test3.executor": "Run",
|
||||||
"Python.test4.executor": "Run",
|
"Python.test4.executor": "Run",
|
||||||
"Python.testhtml.executor": "Run",
|
"Python.testhtml.executor": "Run",
|
||||||
"Python.translater.executor": "Run",
|
"Python.translater.executor": "Run",
|
||||||
"Python.切分测试.executor": "Run",
|
"Python.切分测试.executor": "Run",
|
||||||
"RunOnceActivity.ShowReadmeOnStart": "true",
|
"RunOnceActivity.ShowReadmeOnStart": "true",
|
||||||
"RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true",
|
"RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true",
|
||||||
"RunOnceActivity.git.unshallow": "true",
|
"RunOnceActivity.git.unshallow": "true",
|
||||||
"git-widget-placeholder": "dev",
|
"git-widget-placeholder": "main",
|
||||||
"last_opened_file_path": "C:/Users/jxgm/Desktop/translate/docutranslate",
|
"last_opened_file_path": "C:/Users/jxgm/Desktop/translate/docutranslate",
|
||||||
"list.type.of.created.stylesheet": "CSS",
|
"list.type.of.created.stylesheet": "CSS",
|
||||||
"node.js.detected.package.eslint": "true",
|
"node.js.detected.package.eslint": "true",
|
||||||
"node.js.detected.package.tslint": "true",
|
"node.js.detected.package.tslint": "true",
|
||||||
"node.js.selected.package.eslint": "(autodetect)",
|
"node.js.selected.package.eslint": "(autodetect)",
|
||||||
"node.js.selected.package.tslint": "(autodetect)",
|
"node.js.selected.package.tslint": "(autodetect)",
|
||||||
"nodejs_package_manager_path": "npm",
|
"nodejs_package_manager_path": "npm",
|
||||||
"settings.editor.selected.configurable": "preferences.pluginManager",
|
"settings.editor.selected.configurable": "preferences.pluginManager",
|
||||||
"vue.rearranger.settings.migration": "true"
|
"vue.rearranger.settings.migration": "true"
|
||||||
}
|
}
|
||||||
}</component>
|
}]]></component>
|
||||||
<component name="RecentsManager">
|
<component name="RecentsManager">
|
||||||
<key name="CopyFile.RECENT_KEYS">
|
<key name="CopyFile.RECENT_KEYS">
|
||||||
<recent name="C:\Users\jxgm\Desktop\translate\docutranslate" />
|
<recent name="C:\Users\jxgm\Desktop\translate\docutranslate" />
|
||||||
|
|||||||
@@ -8,14 +8,22 @@ from docutranslate.utils.markdown_utils import embed_inline_image_from_zip
|
|||||||
|
|
||||||
URL = 'https://mineru.net/api/v4/file-urls/batch'
|
URL = 'https://mineru.net/api/v4/file-urls/batch'
|
||||||
|
|
||||||
client = httpx.Client(trust_env=False)
|
timeout = httpx.Timeout(
|
||||||
|
connect=5.0, # 连接超时 (建立连接的最长时间)
|
||||||
|
read=120.0, # 读取超时 (等待服务器响应的最长时间)
|
||||||
|
write=120.0, # 写入超时 (发送数据的最长时间)
|
||||||
|
pool=1.0 # 从连接池获取连接的超时时间
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
|
client = httpx.Client(trust_env=False,timeout=timeout)
|
||||||
|
|
||||||
|
|
||||||
# TODO: 提供更详细的logger
|
# TODO: 提供更详细的logger
|
||||||
class ConverterMineru(Converter):
|
class ConverterMineru(Converter):
|
||||||
def __init__(self, token: str, formula=True):
|
def __init__(self, token: str, formula=True):
|
||||||
self.mineru_token = token.strip()
|
self.mineru_token = token.strip()
|
||||||
self.client_async = httpx.AsyncClient()
|
self.client_async = httpx.AsyncClient(timeout=timeout)
|
||||||
self.formula = formula
|
self.formula = formula
|
||||||
|
|
||||||
def _get_header(self):
|
def _get_header(self):
|
||||||
@@ -36,7 +44,7 @@ class ConverterMineru(Converter):
|
|||||||
|
|
||||||
def upload(self, document: Document):
|
def upload(self, document: Document):
|
||||||
# 获取上传链接
|
# 获取上传链接
|
||||||
response = client.post(URL, headers=self._get_header(), json=self._get_upload_data(document),timeout=120)
|
response = client.post(URL, headers=self._get_header(), json=self._get_upload_data(document))
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
result = response.json()
|
result = response.json()
|
||||||
# print('response success. result:{}'.format(result))
|
# print('response success. result:{}'.format(result))
|
||||||
@@ -45,7 +53,7 @@ class ConverterMineru(Converter):
|
|||||||
urls = result["data"]["file_urls"]
|
urls = result["data"]["file_urls"]
|
||||||
# print('batch_id:{},urls:{}'.format(batch_id, urls))
|
# print('batch_id:{},urls:{}'.format(batch_id, urls))
|
||||||
# 获取
|
# 获取
|
||||||
res_upload = client.put(urls[0], content=document.filebytes,timeout=120)
|
res_upload = client.put(urls[0], content=document.filebytes)
|
||||||
res_upload.raise_for_status()
|
res_upload.raise_for_status()
|
||||||
# print(f"{urls[0]} upload success")
|
# print(f"{urls[0]} upload success")
|
||||||
return batch_id
|
return batch_id
|
||||||
@@ -109,7 +117,7 @@ def get_md_from_zip_url_with_inline_images(
|
|||||||
"""
|
"""
|
||||||
try:
|
try:
|
||||||
print(f"正在从 {zip_url} 下载ZIP文件 (使用 httpx.get)...")
|
print(f"正在从 {zip_url} 下载ZIP文件 (使用 httpx.get)...")
|
||||||
response = client.get(zip_url, timeout=120.0) # 增加超时
|
response = client.get(zip_url) # 增加超时
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
print("ZIP文件下载完成。")
|
print("ZIP文件下载完成。")
|
||||||
return embed_inline_image_from_zip(response.content, filename_in_zip=filename_in_zip, encoding=encoding)
|
return embed_inline_image_from_zip(response.content, filename_in_zip=filename_in_zip, encoding=encoding)
|
||||||
|
|||||||
Reference in New Issue
Block a user