修复了mineru上传文件过大时超时的问题

This commit is contained in:
xunbu
2025-05-24 22:06:39 +08:00
parent de49ade3d4
commit 3c92d8d25f
3 changed files with 19 additions and 16 deletions

21
.idea/workspace.xml generated
View File

@@ -6,13 +6,8 @@
<component name="ChangeListManager">
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/app.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/converter/converter.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/converter/converter.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/converter/converter_docling.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/converter/converter_docling.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/converter/converter_mineru.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/converter/converter_mineru.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/utils/markdown_utils.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/markdown_utils.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/uv.lock" beforeDir="false" afterPath="$PROJECT_DIR$/uv.lock" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -86,7 +81,7 @@
&quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
&quot;RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager&quot;: &quot;true&quot;,
&quot;RunOnceActivity.git.unshallow&quot;: &quot;true&quot;,
&quot;git-widget-placeholder&quot;: &quot;main&quot;,
&quot;git-widget-placeholder&quot;: &quot;dev&quot;,
&quot;last_opened_file_path&quot;: &quot;C:/Users/jxgm/Desktop/translate/docutranslate&quot;,
&quot;list.type.of.created.stylesheet&quot;: &quot;CSS&quot;,
&quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
@@ -610,6 +605,9 @@
<workItem from="1747754618316" duration="16566000" />
<workItem from="1747828300140" duration="4611000" />
<workItem from="1747836914405" duration="2380000" />
<workItem from="1747972866946" duration="205000" />
<workItem from="1748093422441" duration="1038000" />
<workItem from="1748094483009" duration="843000" />
</task>
<servers />
</component>
@@ -627,6 +625,17 @@
</map>
</option>
</component>
<component name="XDebuggerManager">
<breakpoint-manager>
<breakpoints>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/docutranslate/translater.py</url>
<line>247</line>
<option name="timeStamp" value="2" />
</line-breakpoint>
</breakpoints>
</breakpoint-manager>
</component>
<component name="com.intellij.coverage.CoverageDataManagerImpl">
<SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747894597097" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747472297913" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />

View File

@@ -1,9 +1,4 @@
import asyncio
import base64
import io
import mimetypes
import os
import re
import time
import zipfile
import httpx
@@ -41,7 +36,7 @@ class ConverterMineru(Converter):
def upload(self, document: Document):
# 获取上传链接
response = client.post(URL, headers=self._get_header(), json=self._get_upload_data(document))
response = client.post(URL, headers=self._get_header(), json=self._get_upload_data(document),timeout=120)
response.raise_for_status()
result = response.json()
# print('response success. result:{}'.format(result))
@@ -50,7 +45,7 @@ class ConverterMineru(Converter):
urls = result["data"]["file_urls"]
# print('batch_id:{},urls:{}'.format(batch_id, urls))
# 获取
res_upload = client.put(urls[0], content=document.filebytes)
res_upload = client.put(urls[0], content=document.filebytes,timeout=120)
res_upload.raise_for_status()
# print(f"{urls[0]} upload success")
return batch_id
@@ -114,7 +109,7 @@ def get_md_from_zip_url_with_inline_images(
"""
try:
print(f"正在从 {zip_url} 下载ZIP文件 (使用 httpx.get)...")
response = client.get(zip_url, timeout=60.0) # 增加超时
response = client.get(zip_url, timeout=120.0) # 增加超时
response.raise_for_status()
print("ZIP文件下载完成。")
return embed_inline_image_from_zip(response.content, filename_in_zip=filename_in_zip, encoding=encoding)

View File

@@ -1,6 +1,6 @@
[project]
name = "docutranslate"
version = "0.2.23"
version = "0.2.24"
description = "文件翻译工具"
readme = "README.md"
requires-python = ">=3.10"
@@ -8,7 +8,6 @@ dependencies = [
"httpx>=0.28.1",
"markdown2>=2.5.3",
"fastapi[standard]>=0.115.12",
]