修复了mineru上传文件过大时超时的问题

This commit is contained in:
xunbu
2025-05-24 22:06:39 +08:00
parent de49ade3d4
commit 3c92d8d25f
3 changed files with 19 additions and 16 deletions

21
.idea/workspace.xml generated
View File

@@ -6,13 +6,8 @@
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment=""> <list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/app.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/converter/converter.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/converter/converter.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/converter/converter_docling.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/converter/converter_docling.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/converter/converter_mineru.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/converter/converter_mineru.py" afterDir="false" /> <change beforePath="$PROJECT_DIR$/docutranslate/converter/converter_mineru.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/converter/converter_mineru.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/utils/markdown_utils.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/markdown_utils.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/uv.lock" beforeDir="false" afterPath="$PROJECT_DIR$/uv.lock" afterDir="false" />
</list> </list>
<option name="SHOW_DIALOG" value="false" /> <option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" /> <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -86,7 +81,7 @@
&quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;, &quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
&quot;RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager&quot;: &quot;true&quot;, &quot;RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager&quot;: &quot;true&quot;,
&quot;RunOnceActivity.git.unshallow&quot;: &quot;true&quot;, &quot;RunOnceActivity.git.unshallow&quot;: &quot;true&quot;,
&quot;git-widget-placeholder&quot;: &quot;main&quot;, &quot;git-widget-placeholder&quot;: &quot;dev&quot;,
&quot;last_opened_file_path&quot;: &quot;C:/Users/jxgm/Desktop/translate/docutranslate&quot;, &quot;last_opened_file_path&quot;: &quot;C:/Users/jxgm/Desktop/translate/docutranslate&quot;,
&quot;list.type.of.created.stylesheet&quot;: &quot;CSS&quot;, &quot;list.type.of.created.stylesheet&quot;: &quot;CSS&quot;,
&quot;node.js.detected.package.eslint&quot;: &quot;true&quot;, &quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
@@ -610,6 +605,9 @@
<workItem from="1747754618316" duration="16566000" /> <workItem from="1747754618316" duration="16566000" />
<workItem from="1747828300140" duration="4611000" /> <workItem from="1747828300140" duration="4611000" />
<workItem from="1747836914405" duration="2380000" /> <workItem from="1747836914405" duration="2380000" />
<workItem from="1747972866946" duration="205000" />
<workItem from="1748093422441" duration="1038000" />
<workItem from="1748094483009" duration="843000" />
</task> </task>
<servers /> <servers />
</component> </component>
@@ -627,6 +625,17 @@
</map> </map>
</option> </option>
</component> </component>
<component name="XDebuggerManager">
<breakpoint-manager>
<breakpoints>
<line-breakpoint enabled="true" suspend="THREAD" type="python-line">
<url>file://$PROJECT_DIR$/docutranslate/translater.py</url>
<line>247</line>
<option name="timeStamp" value="2" />
</line-breakpoint>
</breakpoints>
</breakpoint-manager>
</component>
<component name="com.intellij.coverage.CoverageDataManagerImpl"> <component name="com.intellij.coverage.CoverageDataManagerImpl">
<SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747894597097" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747894597097" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747472297913" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747472297913" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />

View File

@@ -1,9 +1,4 @@
import asyncio import asyncio
import base64
import io
import mimetypes
import os
import re
import time import time
import zipfile import zipfile
import httpx import httpx
@@ -41,7 +36,7 @@ class ConverterMineru(Converter):
def upload(self, document: Document): def upload(self, document: Document):
# 获取上传链接 # 获取上传链接
response = client.post(URL, headers=self._get_header(), json=self._get_upload_data(document)) response = client.post(URL, headers=self._get_header(), json=self._get_upload_data(document),timeout=120)
response.raise_for_status() response.raise_for_status()
result = response.json() result = response.json()
# print('response success. result:{}'.format(result)) # print('response success. result:{}'.format(result))
@@ -50,7 +45,7 @@ class ConverterMineru(Converter):
urls = result["data"]["file_urls"] urls = result["data"]["file_urls"]
# print('batch_id:{},urls:{}'.format(batch_id, urls)) # print('batch_id:{},urls:{}'.format(batch_id, urls))
# 获取 # 获取
res_upload = client.put(urls[0], content=document.filebytes) res_upload = client.put(urls[0], content=document.filebytes,timeout=120)
res_upload.raise_for_status() res_upload.raise_for_status()
# print(f"{urls[0]} upload success") # print(f"{urls[0]} upload success")
return batch_id return batch_id
@@ -114,7 +109,7 @@ def get_md_from_zip_url_with_inline_images(
""" """
try: try:
print(f"正在从 {zip_url} 下载ZIP文件 (使用 httpx.get)...") print(f"正在从 {zip_url} 下载ZIP文件 (使用 httpx.get)...")
response = client.get(zip_url, timeout=60.0) # 增加超时 response = client.get(zip_url, timeout=120.0) # 增加超时
response.raise_for_status() response.raise_for_status()
print("ZIP文件下载完成。") print("ZIP文件下载完成。")
return embed_inline_image_from_zip(response.content, filename_in_zip=filename_in_zip, encoding=encoding) return embed_inline_image_from_zip(response.content, filename_in_zip=filename_in_zip, encoding=encoding)

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "docutranslate" name = "docutranslate"
version = "0.2.23" version = "0.2.24"
description = "文件翻译工具" description = "文件翻译工具"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"
@@ -8,7 +8,6 @@ dependencies = [
"httpx>=0.28.1", "httpx>=0.28.1",
"markdown2>=2.5.3", "markdown2>=2.5.3",
"fastapi[standard]>=0.115.12", "fastapi[standard]>=0.115.12",
] ]