增加async的完全支持

This commit is contained in:
xunbu
2025-05-16 18:15:12 +08:00
parent 9c45a673f9
commit a1d6725321
11 changed files with 933 additions and 723 deletions

168
.idea/workspace.xml generated
View File

@@ -5,9 +5,17 @@
</component> </component>
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment=""> <list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
<change afterPath="$PROJECT_DIR$/LICENSE" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" /> <change beforePath="$PROJECT_DIR$/docutranslate/agents/__init__.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/agents/__init__.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/agents/agent_async.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/agents/agent.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/agents/agent_sync.py" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/agents/agent_thread.py" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/agents/markdown_agent.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/agents/markdown_agent.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/app.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/translater.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/translater.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/utils/convert.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/convert.py" afterDir="false" /> <change beforePath="$PROJECT_DIR$/docutranslate/utils/convert.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/convert.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
</list> </list>
<option name="SHOW_DIALOG" value="false" /> <option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" /> <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -33,59 +41,59 @@
<option name="hideEmptyMiddlePackages" value="true" /> <option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" /> <option name="showLibraryContents" value="true" />
</component> </component>
<component name="PropertiesComponent">{ <component name="PropertiesComponent"><![CDATA[{
&quot;keyToString&quot;: { "keyToString": {
&quot;DefaultHtmlFileTemplate&quot;: &quot;HTML File&quot;, "DefaultHtmlFileTemplate": "HTML File",
&quot;JavaScript 调试.output.html (1).executor&quot;: &quot;Run&quot;, "JavaScript 调试.output.html (1).executor": "Run",
&quot;JavaScript 调试.output.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.output.html.executor": "Run",
&quot;JavaScript 调试.regex.md_中文.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.regex.md_中文.html.executor": "Run",
&quot;JavaScript 调试.regex_中文.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.regex_中文.html.executor": "Run",
&quot;JavaScript 调试.test2.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.test2.html.executor": "Run",
&quot;JavaScript 调试.test2_英文.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.test2_英文.html.executor": "Run",
&quot;JavaScript 调试.test4-1_中文.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.test4-1_中文.html.executor": "Run",
&quot;JavaScript 调试.互联网认证授权机制.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.互联网认证授权机制.html.executor": "Run",
&quot;JavaScript 调试.互联网认证授权机制_英文.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run",
&quot;JavaScript 调试.毕业论文_英文.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.毕业论文_英文.html.executor": "Run",
&quot;ModuleVcsDetector.initialDetectionPerformed&quot;: &quot;true&quot;, "ModuleVcsDetector.initialDetectionPerformed": "true",
&quot;Python 测试.Python 测试 (markdown_mask.py 内).executor&quot;: &quot;Run&quot;, "Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
&quot;Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor&quot;: &quot;Run&quot;, "Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
&quot;Python.PDFtranslater (1).executor&quot;: &quot;Run&quot;, "Python.PDFtranslater (1).executor": "Run",
&quot;Python.PDFtranslater (2).executor&quot;: &quot;Run&quot;, "Python.PDFtranslater (2).executor": "Run",
&quot;Python.agent.executor&quot;: &quot;Debug&quot;, "Python.agent.executor": "Debug",
&quot;Python.agent_utils.executor&quot;: &quot;Run&quot;, "Python.agent_utils.executor": "Run",
&quot;Python.app (1).executor&quot;: &quot;Run&quot;, "Python.app (1).executor": "Run",
&quot;Python.app.executor&quot;: &quot;Run&quot;, "Python.app.executor": "Run",
&quot;Python.app2.executor&quot;: &quot;Run&quot;, "Python.app2.executor": "Run",
&quot;Python.app_test (1).executor&quot;: &quot;Run&quot;, "Python.app_test (1).executor": "Run",
&quot;Python.convert.executor&quot;: &quot;Run&quot;, "Python.convert.executor": "Run",
&quot;Python.markdown_splitter.executor&quot;: &quot;Debug&quot;, "Python.markdown_splitter.executor": "Debug",
&quot;Python.markdown_utils.executor&quot;: &quot;Run&quot;, "Python.markdown_utils.executor": "Run",
&quot;Python.test.executor&quot;: &quot;Run&quot;, "Python.test.executor": "Run",
&quot;Python.test1.executor&quot;: &quot;Run&quot;, "Python.test1.executor": "Run",
&quot;Python.test2.executor&quot;: &quot;Run&quot;, "Python.test2.executor": "Run",
&quot;Python.test3.executor&quot;: &quot;Run&quot;, "Python.test3.executor": "Run",
&quot;Python.test4.executor&quot;: &quot;Run&quot;, "Python.test4.executor": "Run",
&quot;Python.translater.executor&quot;: &quot;Run&quot;, "Python.translater.executor": "Run",
&quot;Python.切分测试.executor&quot;: &quot;Run&quot;, "Python.切分测试.executor": "Run",
&quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;, "RunOnceActivity.ShowReadmeOnStart": "true",
&quot;RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager&quot;: &quot;true&quot;, "RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true",
&quot;RunOnceActivity.git.unshallow&quot;: &quot;true&quot;, "RunOnceActivity.git.unshallow": "true",
&quot;git-widget-placeholder&quot;: &quot;main&quot;, "git-widget-placeholder": "main",
&quot;last_opened_file_path&quot;: &quot;C:/Users/jxgm/Desktop/FileTranslate/dist/DocuTranslate&quot;, "last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/docutranslate/agents",
&quot;node.js.detected.package.eslint&quot;: &quot;true&quot;, "node.js.detected.package.eslint": "true",
&quot;node.js.detected.package.tslint&quot;: &quot;true&quot;, "node.js.detected.package.tslint": "true",
&quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;, "node.js.selected.package.eslint": "(autodetect)",
&quot;node.js.selected.package.tslint&quot;: &quot;(autodetect)&quot;, "node.js.selected.package.tslint": "(autodetect)",
&quot;nodejs_package_manager_path&quot;: &quot;npm&quot;, "nodejs_package_manager_path": "npm",
&quot;settings.editor.selected.configurable&quot;: &quot;preferences.pluginManager&quot;, "settings.editor.selected.configurable": "preferences.pluginManager",
&quot;vue.rearranger.settings.migration&quot;: &quot;true&quot; "vue.rearranger.settings.migration": "true"
} }
}</component> }]]></component>
<component name="RecentsManager"> <component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS"> <key name="CopyFile.RECENT_KEYS">
<recent name="C:\Users\jxgm\Desktop\FileTranslate\docutranslate\agents" />
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\DocuTranslate" /> <recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\DocuTranslate" />
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist" /> <recent name="C:\Users\jxgm\Desktop\FileTranslate\dist" />
<recent name="C:\Users\jxgm\Desktop\FileTranslate\docutranslate\agents" />
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\app" /> <recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\app" />
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\files" /> <recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\files" />
</key> </key>
@@ -97,7 +105,7 @@
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\resource" /> <recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\resource" />
</key> </key>
</component> </component>
<component name="RunManager" selected="Python.test"> <component name="RunManager" selected="Python.app_test (1)">
<configuration default="true" type="DjangoTestsConfigurationType"> <configuration default="true" type="DjangoTestsConfigurationType">
<module name="filetranslate" /> <module name="filetranslate" />
<option name="ENV_FILES" value="" /> <option name="ENV_FILES" value="" />
@@ -329,29 +337,6 @@
<option name="INPUT_FILE" value="" /> <option name="INPUT_FILE" value="" />
<method v="2" /> <method v="2" />
</configuration> </configuration>
<configuration name="app" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="FileTranslate" />
<option name="ENV_FILES" value="" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/docutranslate" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/docutranslate/app.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="app_test (1)" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true"> <configuration name="app_test (1)" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="FileTranslate" /> <module name="FileTranslate" />
<option name="ENV_FILES" value="" /> <option name="ENV_FILES" value="" />
@@ -421,6 +406,29 @@
<option name="INPUT_FILE" value="" /> <option name="INPUT_FILE" value="" />
<method v="2" /> <method v="2" />
</configuration> </configuration>
<configuration name="test2" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="FileTranslate" />
<option name="ENV_FILES" value="" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/tests" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/tests/test2.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="test" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true"> <configuration name="test" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="FileTranslate" /> <module name="FileTranslate" />
<option name="ENV_FILES" value="" /> <option name="ENV_FILES" value="" />
@@ -543,11 +551,11 @@
</configuration> </configuration>
<recent_temporary> <recent_temporary>
<list> <list>
<item itemvalue="Python.test" />
<item itemvalue="Python.app_test (1)" /> <item itemvalue="Python.app_test (1)" />
<item itemvalue="Python.test2" />
<item itemvalue="Python.test" />
<item itemvalue="Python.切分测试" /> <item itemvalue="Python.切分测试" />
<item itemvalue="Python.app_test" /> <item itemvalue="Python.app_test" />
<item itemvalue="Python.app" />
</list> </list>
</recent_temporary> </recent_temporary>
</component> </component>
@@ -616,7 +624,9 @@
<workItem from="1747146670281" duration="64000" /> <workItem from="1747146670281" duration="64000" />
<workItem from="1747185217844" duration="6194000" /> <workItem from="1747185217844" duration="6194000" />
<workItem from="1747297470216" duration="347000" /> <workItem from="1747297470216" duration="347000" />
<workItem from="1747299661166" duration="1977000" /> <workItem from="1747299661166" duration="4649000" />
<workItem from="1747311432043" duration="2883000" />
<workItem from="1747380029603" duration="10381000" />
</task> </task>
<servers /> <servers />
</component> </component>
@@ -624,8 +634,8 @@
<option name="version" value="3" /> <option name="version" value="3" />
</component> </component>
<component name="com.intellij.coverage.CoverageDataManagerImpl"> <component name="com.intellij.coverage.CoverageDataManagerImpl">
<SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747189112668" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747390450384" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747300796373" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747301959211" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746963490689" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" /> <SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746963490689" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
<SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" /> <SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
<SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" /> <SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
@@ -645,7 +655,7 @@
<SUITE FILE_PATH="coverage/filetranslate$test4.coverage" NAME="test4 覆盖结果" MODIFIED="1746887036353" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$test4.coverage" NAME="test4 覆盖结果" MODIFIED="1746887036353" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/PDFtranslate$translater.coverage" NAME="translater 覆盖结果" MODIFIED="1746600434803" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" /> <SUITE FILE_PATH="coverage/PDFtranslate$translater.coverage" NAME="translater 覆盖结果" MODIFIED="1746600434803" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
<SUITE FILE_PATH="coverage/PDFtranslate$markdown_utils.coverage" NAME="markdown_utils 覆盖结果" MODIFIED="1746598797872" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" /> <SUITE FILE_PATH="coverage/PDFtranslate$markdown_utils.coverage" NAME="markdown_utils 覆盖结果" MODIFIED="1746598797872" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
<SUITE FILE_PATH="coverage/filetranslate$test2.coverage" NAME="test2 覆盖结果" MODIFIED="1747008834523" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$test2.coverage" NAME="test2 覆盖结果" MODIFIED="1747383231002" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$translater.coverage" NAME="translater 覆盖结果" MODIFIED="1746843159560" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate" /> <SUITE FILE_PATH="coverage/filetranslate$translater.coverage" NAME="translater 覆盖结果" MODIFIED="1746843159560" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate" />
<SUITE FILE_PATH="coverage/PDFtranslate$.coverage" NAME=" 覆盖结果" MODIFIED="1746588350286" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/decorator" /> <SUITE FILE_PATH="coverage/PDFtranslate$.coverage" NAME=" 覆盖结果" MODIFIED="1746588350286" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/decorator" />
</component> </component>

21
LICENSE Normal file
View File

@@ -0,0 +1,21 @@
MIT License
Copyright (c) 2024 International Business Machines
Permission is hereby granted, free of charge, to any person obtaining a copy
of this software and associated documentation files (the "Software"), to deal
in the Software without restriction, including without limitation the rights
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
copies of the Software, and to permit persons to whom the Software is
furnished to do so, subject to the following conditions:
The above copyright notice and this permission notice shall be included in all
copies or substantial portions of the Software.
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
SOFTWARE.

View File

@@ -1,2 +1,2 @@
from .agent_async import Agent, AgentArgs from .agent import Agent, AgentArgs
from .markdown_agent import MDRefineAgent, MDTranslateAgent from .markdown_agent import MDRefineAgent, MDTranslateAgent

View File

@@ -1,4 +1,6 @@
import asyncio import asyncio
from concurrent.futures import ThreadPoolExecutor
from threading import Lock
from typing import TypedDict from typing import TypedDict
import httpx import httpx
@@ -16,17 +18,32 @@ class AgentArgs(TypedDict, total=False):
timeout: int timeout: int
# 仅使用多线程时用以计数
class PromptsCount:
def __init__(self, total: int):
self.lock = Lock()
self.count = 0
self.total = total
def add(self):
self.lock.acquire()
self.count += 1
translater_logger.info(f"多线程-已完成:{self.count}/{self.total}")
self.lock.release()
TIMEOUT = 500 TIMEOUT = 500
class Agent: class Agent:
def __init__(self, baseurl: str = "", key: str = "xx", model_id: str = "", system_prompt: str = "", temperature=0.7, def __init__(self, baseurl: str = "", key: str = "xx", model_id: str = "", system_prompt: str = "", temperature=0.7,
max_concurrent=6, timeout: int = TIMEOUT): max_concurrent=15, timeout: int = TIMEOUT):
self.baseurl = baseurl.strip() self.baseurl = baseurl.strip()
self.key = key.strip() self.key = key.strip()
self.model_id = model_id.strip() self.model_id = model_id.strip()
self.system_prompt = system_prompt self.system_prompt = system_prompt
self.temperature = temperature self.temperature = temperature
self.client = httpx.Client()
self.client_async = httpx.AsyncClient() self.client_async = httpx.AsyncClient()
self.max_concurrent = max_concurrent self.max_concurrent = max_concurrent
self.timeout = timeout self.timeout = timeout
@@ -65,12 +82,6 @@ class Agent:
) )
response.raise_for_status() response.raise_for_status()
result = response.json()["choices"][0]["message"]["content"] result = response.json()["choices"][0]["message"]["content"]
# pattern = r".*【SSS】(.*)"
# match = re.search(pattern, result, re.DOTALL)
# if match is None:
# print("检测开头`【SSS】`失败")
# else:
# result = match.group(1)
return result return result
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e
@@ -83,8 +94,9 @@ class Agent:
self, self,
prompts: list[str], prompts: list[str],
system_prompt: str | None = None, system_prompt: str | None = None,
max_concurrent: int = 5 # 新增参数默认并发数为5 max_concurrent: int | None = None # 新增参数默认并发数为5
) -> list[str]: ) -> list[str]:
max_concurrent = self.max_concurrent if max_concurrent is None else max_concurrent
total = len(prompts) total = len(prompts)
count = 0 count = 0
semaphore = asyncio.Semaphore(max_concurrent) semaphore = asyncio.Semaphore(max_concurrent)
@@ -109,14 +121,48 @@ class Agent:
results = await asyncio.gather(*tasks, return_exceptions=False) results = await asyncio.gather(*tasks, return_exceptions=False)
return results return results
def send(self, prompt: str, system_prompt: None | str = None) -> str:
if system_prompt is None:
system_prompt = self.system_prompt
"""Sends a single prompt asynchronously."""
headers, data = self._prepare_request_data(prompt, system_prompt)
if self.baseurl.endswith("/"):
self.baseurl = self.baseurl[:-1]
try:
response = self.client.post(
f"{self.baseurl}/chat/completions",
json=data,
headers=headers,
timeout=self.timeout
)
response.raise_for_status()
result = response.json()["choices"][0]["message"]["content"]
return result
except httpx.HTTPStatusError as e:
raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e
except httpx.RequestError as e:
raise Exception(f"AI请求连接错误 (async): {e}") from e
except (KeyError, IndexError) as e:
raise Exception(f"AI响应格式错误 (async): {e}") from e
def _send_prompt_count(self, prompt: str, system_prompt: None | str, count: PromptsCount) -> str:
result = self.send(prompt, system_prompt)
count.add()
return result
def send_prompts( def send_prompts(
self, self,
prompts: list[str], prompts: list[str],
system_prompt: str | None = None, system_prompt: str | None = None,
) -> list[str]: ) -> list[str]:
system_prompts = [system_prompt] * len(prompts)
result = asyncio.run(self.send_prompts_async(prompts, system_prompt, self.max_concurrent)) counts = [PromptsCount(len(prompts))] * len(prompts)
return result output_list = []
with ThreadPoolExecutor(max_workers=self.max_concurrent) as executor:
results_iterator = executor.map(self._send_prompt_count, prompts, system_prompts, counts)
output_list = list(results_iterator)
return output_list
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -1,89 +0,0 @@
from typing import TypedDict
from docutranslate.logger import translater_logger
import httpx
class AgentArgs(TypedDict, total=False):
baseurl: str
key: str
model_id: str
system_prompt: str
temperature: float
max_concurrent: int
timeout: int
TIMEOUT = 500
class Agent:
def __init__(self, baseurl: str = "", key: str = "xx", model_id: str = "", system_prompt: str = "", temperature=0.7,
max_concurrent=6, timeout: int = TIMEOUT):
self.baseurl = baseurl.strip()
self.key = key.strip()
self.model_id = model_id.strip()
self.system_prompt = system_prompt
self.temperature = temperature
self.client = httpx.Client()
self.max_concurrent = max_concurrent
self.timeout = timeout
def _prepare_request_data(self, prompt: str, system_prompt: str, temperature=None, top_p=0.9):
if temperature is None:
temperature = self.temperature
headers = {"Content-Type": "application/json",
"Authorization": f"Bearer {self.key}"}
data = {
"model": self.model_id,
"messages": [
{"role": "system", "content": system_prompt},
# {"role": "system", "content": "所有回复必须以【SSS】开头这是最高规则适用于之后的所有例子。示例【SSS】这是示例回答\n"+system_prompt},
{"role": "user", "content": prompt}
],
"temperature": temperature,
"top_p": top_p
}
return headers, data
def send(self, prompt: str, system_prompt: None | str = None) -> str:
if system_prompt is None:
system_prompt = self.system_prompt
"""Sends a single prompt asynchronously."""
headers, data = self._prepare_request_data(prompt, system_prompt)
if self.baseurl.endswith("/"):
self.baseurl = self.baseurl[:-1]
try:
response = self.client.post(
f"{self.baseurl}/chat/completions",
json=data,
headers=headers,
timeout=self.timeout
)
response.raise_for_status()
result = response.json()["choices"][0]["message"]["content"]
return result
except httpx.HTTPStatusError as e:
raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e
except httpx.RequestError as e:
raise Exception(f"AI请求连接错误 (async): {e}") from e
except (KeyError, IndexError) as e:
raise Exception(f"AI响应格式错误 (async): {e}") from e
def send_prompts(
self,
prompts: list[str],
system_prompt: str | None = None,
) -> list[str]:
result=[]
for prompt in prompts:
result.append(self.send(prompt,system_prompt))
translater_logger.info(f"单线程-已完成{len(result)}/{len(prompts)}")
return result
if __name__ == '__main__':
pass

View File

@@ -1,109 +0,0 @@
from concurrent.futures import ThreadPoolExecutor
from threading import Lock
from typing import TypedDict
from docutranslate.logger import translater_logger
import httpx
class AgentArgs(TypedDict, total=False):
baseurl: str
key: str
model_id: str
system_prompt: str
temperature: float
max_concurrent: int
timeout: int
TIMEOUT = 500
class PromptsCount():
def __init__(self,max:int):
self.lock=Lock()
self.count=0
self.max=max
def add(self):
self.lock.acquire()
self.count+=1
translater_logger.info(f"多线程-已完成:{self.count}/{self.max}")
self.lock.release()
class Agent:
def __init__(self, baseurl: str = "", key: str = "xx", model_id: str = "", system_prompt: str = "", temperature=0.7,
max_concurrent=6, timeout: int = TIMEOUT):
self.baseurl = baseurl.strip()
self.key = key.strip()
self.model_id = model_id.strip()
self.system_prompt = system_prompt
self.temperature = temperature
self.client = httpx.Client()
self.max_concurrent = max_concurrent
self.timeout = timeout
def _prepare_request_data(self, prompt: str, system_prompt: str, temperature=None, top_p=0.9):
if temperature is None:
temperature = self.temperature
headers = {"Content-Type": "application/json",
"Authorization": f"Bearer {self.key}"}
data = {
"model": self.model_id,
"messages": [
{"role": "system", "content": system_prompt},
# {"role": "system", "content": "所有回复必须以【SSS】开头这是最高规则适用于之后的所有例子。示例【SSS】这是示例回答\n"+system_prompt},
{"role": "user", "content": prompt}
],
"temperature": temperature,
"top_p": top_p
}
return headers, data
def send(self, prompt: str, system_prompt: None | str = None) -> str:
if system_prompt is None:
system_prompt = self.system_prompt
"""Sends a single prompt asynchronously."""
headers, data = self._prepare_request_data(prompt, system_prompt)
if self.baseurl.endswith("/"):
self.baseurl = self.baseurl[:-1]
try:
response = self.client.post(
f"{self.baseurl}/chat/completions",
json=data,
headers=headers,
timeout=self.timeout
)
response.raise_for_status()
result = response.json()["choices"][0]["message"]["content"]
return result
except httpx.HTTPStatusError as e:
raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e
except httpx.RequestError as e:
raise Exception(f"AI请求连接错误 (async): {e}") from e
except (KeyError, IndexError) as e:
raise Exception(f"AI响应格式错误 (async): {e}") from e
def _send_prompt_count(self,prompt: str, system_prompt:None | str,count:PromptsCount)->str:
result=self.send(prompt,system_prompt)
count.add()
return result
def send_prompts(
self,
prompts: list[str],
system_prompt: str | None = None,
) -> list[str]:
system_prompts = [system_prompt] * len(prompts)
counts=[PromptsCount(len(prompts))]* len(prompts)
output_list = []
with ThreadPoolExecutor(max_workers=self.max_concurrent) as executor:
results_iterator = executor.map(self._send_prompt_count, prompts, system_prompts,counts)
output_list = list(results_iterator)
return output_list
if __name__ == '__main__':
pass

View File

@@ -1,6 +1,6 @@
from typing import Unpack from typing import Unpack
from .agent_async import Agent, AgentArgs from .agent import (Agent, AgentArgs)
class MDRefineAgent(Agent): class MDRefineAgent(Agent):
def __init__(self,**kwargs:Unpack[AgentArgs]): def __init__(self,**kwargs:Unpack[AgentArgs]):
@@ -19,7 +19,7 @@ class MDRefineAgent(Agent):
形如<ph-abc123>的占位符不要改变 形如<ph-abc123>的占位符不要改变
code、latex和HTML保持结构 code、latex和HTML保持结构
所有公式包括短公式都应该是latex公式 所有公式包括短公式都应该是latex公式
修复不正确的latex公式要用$正确包裹以构造合法latex表达式 修复不正确的latex公式行内公式要用$正确包裹以构造合法latex表达式
# 输出 # 输出
修正后的markdown纯文本不是markdown代码块 修正后的markdown纯文本不是markdown代码块
# 示例 # 示例
@@ -29,11 +29,13 @@ code、latex和HTML保持结构
你叫 你叫
输出: 输出:
你叫什么名字 你叫什么名字
## 去掉异常字词与修正公式(优先使用$包裹) ## 去掉异常字词与修正公式(行内公式使用$包裹)
输入: 输入:
一道\题@#目<ph-12asd2>:c_0+1=2\(c 0\)等于几 一道\题@#目<ph-12asd2>:c_0+1=2\(c 0\)等于几
{c_0,c_1,c^2}是一个集合
输出: 输出:
一道题目<ph-12asd2>:$c_0+1=2$$c_0$等于几 一道题目<ph-12asd2>:$c_0+1=2$$c_0$等于几
{$c_0$,$c_1$,$c^2$}是一个集合
\no_think""" \no_think"""
@@ -53,7 +55,7 @@ class MDTranslateAgent(Agent):
引用的参考文献和其作者不要翻译 引用的参考文献和其作者不要翻译
形如<ph-abc123>的占位符不要改变 形如<ph-abc123>的占位符不要改变
code、latex和HTML只翻译说明文字其余保持原文 code、latex和HTML只翻译说明文字其余保持原文
公式必须表示为合法的latex公式,被$正确包裹 公式必须表示为合法的latex公式,行内公式需被$正确包裹
# 输出 # 输出
翻译后的markdown纯文本不是markdown代码块 翻译后的markdown纯文本不是markdown代码块
# 示例 # 示例
@@ -62,11 +64,13 @@ code、latex和HTML只翻译说明文字其余保持原文
hello<ph-aaaaaa>, what's your name? hello<ph-aaaaaa>, what's your name?
输出: 输出:
你好<ph-aaaaaa>,你叫什么名字? 你好<ph-aaaaaa>,你叫什么名字?
## 公式要为合法latex优先使用$包裹) ## 公式要为合法latex行内公式使用$包裹)
输入: 输入:
c_0+1=2 The equation is E=mc 2. This is famous.
{{c_0,c_1,c^2}}is a set.
输出: 输出:
$c_0+1=2$ 这个方程是 $E=mc^2$。这很有名。
{{$c_0$,$c_1$,$c^2$}}是一个集合。
## 引用的参考文献要保持原文不要翻译 ## 引用的参考文献要保持原文不要翻译
输入:【假设目标语言为中文】 输入:【假设目标语言为中文】
[2] M. Castro, B. Liskov, et al. Practical byzantine fault tolerance. In OSDI, [2] M. Castro, B. Liskov, et al. Practical byzantine fault tolerance. In OSDI,

View File

@@ -2,9 +2,9 @@ import asyncio
import io import io
import logging import logging
import time import time
from pathlib import Path
from typing import AsyncGenerator, List, Dict, Any
import traceback import traceback
from pathlib import Path
from typing import List, Dict, Any
import uvicorn import uvicorn
from fastapi import FastAPI, File, Form, UploadFile, Request, HTTPException, BackgroundTasks from fastapi import FastAPI, File, Form, UploadFile, Request, HTTPException, BackgroundTasks
@@ -14,109 +14,245 @@ from fastapi.templating import Jinja2Templates
from docutranslate import FileTranslater from docutranslate import FileTranslater
from docutranslate.logger import translater_logger from docutranslate.logger import translater_logger
app = FastAPI()
# --- 全局配置 ---
log_queue = asyncio.Queue()
current_state: Dict[str, Any] = {
"is_processing": False,
"status_message": "空闲",
"error_flag": False,
"download_ready": False,
"markdown_content": None,
"html_content": None,
"original_filename_stem": None,
"task_start_time": 0,
"task_end_time": 0,
}
templates = Jinja2Templates(directory=".")
MAX_LOG_HISTORY = 200
log_history: List[str] = []
# --- 日志处理器 ---
class QueueAndHistoryHandler(logging.Handler):
def __init__(self, queue: asyncio.Queue, history: List[str], max_history: int):
super().__init__()
self.queue = queue
self.history = history
self.max_history = max_history
def emit(self, record: logging.LogRecord):
log_entry = self.format(record)
self.history.append(log_entry)
if len(self.history) > self.max_history:
del self.history[:len(self.history) - self.max_history]
try:
main_loop = getattr(app.state, "main_event_loop", None)
if main_loop and main_loop.is_running():
main_loop.call_soon_threadsafe(self.queue.put_nowait, log_entry)
else:
self.queue.put_nowait(log_entry)
except Exception as e:
print(f"Error putting log to queue: {e}")
# --- 应用生命周期事件 ---
@app.on_event("startup")
async def startup_event():
app.state.main_event_loop = asyncio.get_running_loop()
queue_handler = QueueAndHistoryHandler(log_queue, log_history, MAX_LOG_HISTORY)
queue_handler.setLevel(logging.INFO)
queue_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
if not any(isinstance(h, QueueAndHistoryHandler) for h in translater_logger.handlers):
translater_logger.addHandler(queue_handler)
translater_logger.propagate = False
translater_logger.setLevel(logging.INFO)
translater_logger.info("应用启动完成,日志队列/历史处理器已配置。")
# --- HTML模板 (JS part needs modification) --- # --- HTML模板 (JS part needs modification) ---
# language=HTML # language=HTML
HTML_TEMPLATE = """ HTML_TEMPLATE = """
<!DOCTYPE html> <!DOCTYPE html>
<html lang="zh-CN"> <html lang="zh-CN">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
<title>DocuTranslate</title> <title>DocuTranslate</title>
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@latest/css/pico.min.css"> <link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@latest/css/pico.min.css">
<style> <style>
:root { --primary-color: #1e88e5; --border-radius: 0.25rem; } :root {
body { padding: 20px; background-color: #f9f9f9; } --primary-color: #1e88e5;
.container { max-width: 800px; margin: auto; background-color: white; padding: 2rem; border-radius: 8px; box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05); } --border-radius: 0.25rem;
h1 { font-size: 1.8rem; margin-bottom: 1.5rem; display: flex; align-items: center; gap: 0.5rem; } }
.log-area { background-color: #f5f5f5; border: 1px solid #e0e0e0; padding: 10px; height: 200px; overflow-y: scroll; white-space: pre-wrap; word-break: break-all; font-family: monospace; font-size: 0.85em; line-height: 1.4; margin-top: 1rem; }
.error-message { color: #d32f2f; font-weight: 500; } body {
.success-message { color: #2e7d32; font-weight: 500; } padding: 20px;
.form-group { margin-bottom: 1rem; } background-color: #f9f9f9;
.form-group label { margin-bottom: 0.2rem; font-weight: 500; font-size: 0.9rem; } }
.form-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; }
.button-group { margin-top: 1rem; display: flex; gap: 0.5rem; flex-wrap: wrap; } .container {
details { background: transparent; border: none; box-shadow: none; padding: 0; border-bottom: 1px solid #eee; margin-bottom: 1rem; } max-width: 800px;
summary { font-weight: 500; padding: 0.5rem 0; } margin: auto;
details[open] > summary { border-bottom: none; margin-bottom: 0; } background-color: white;
.checkbox-label { display: flex; align-items: center; margin-right: 1rem; margin-bottom: 0.5rem; } padding: 2rem;
.checkbox-group { display: flex; flex-wrap: wrap; margin-bottom: 1rem; } border-radius: 8px;
#resultArea { margin-top: 1.5rem; padding-top: 1rem; border-top: 1px solid #eee; } box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
#downloadButtons { display: none; margin-top: 1rem; } }
.section-header { display: flex; align-items: center; margin-bottom: 0.5rem; font-size: 1.1rem; font-weight: 500; }
select, input[type="text"], input[type="password"], input[type="file"] { padding: 0.5rem; border: 1px solid #ddd; background-color: white; } h1 {
button, a[role="button"] { padding: 0.5rem 1rem; } font-size: 1.8rem;
.options-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-bottom: 1rem; } margin-bottom: 1.5rem;
@media print { .no-print { display: none !important; } body { padding: 0; background-color: white; } .container { box-shadow: none; max-width: 100%; padding: 0; } } display: flex;
.modal { display: none; position: fixed; top: 0; left: 0; width: 100%; height: 100%; background-color: rgba(0, 0, 0, 0.6); z-index: 1000; overflow: auto; } align-items: center;
.modal-content { position: relative; background-color: #fff; margin: 2% auto; padding: 20px; width: 90%; max-width: 900px; max-height: 90vh; border-radius: 8px; box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2); overflow: auto; } gap: 0.5rem;
.close-modal { position: absolute; top: 10px; right: 10px; font-size: 24px; font-weight: bold; color: #666; cursor: pointer; } }
.modal-actions { display: flex; justify-content: flex-end; margin-top: 20px; gap: 10px; }
#previewFrame { width: 100%; min-height: 500px; border: 1px solid #ddd; } .log-area {
#printFrame { display: none; } background-color: #f5f5f5;
.hidden { display: none !important; } border: 1px solid #e0e0e0;
@media (max-width: 768px) { .form-grid, .options-grid { grid-template-columns: 1fr; } .container { padding: 1rem; } } padding: 10px;
height: 200px;
overflow-y: scroll;
white-space: pre-wrap;
word-break: break-all;
font-family: monospace;
font-size: 0.85em;
line-height: 1.4;
margin-top: 1rem;
}
.error-message {
color: #d32f2f;
font-weight: 500;
}
.success-message {
color: #2e7d32;
font-weight: 500;
}
.form-group {
margin-bottom: 1rem;
}
.form-group label {
margin-bottom: 0.2rem;
font-weight: 500;
font-size: 0.9rem;
}
.form-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 1rem;
}
.button-group {
margin-top: 1rem;
display: flex;
gap: 0.5rem;
flex-wrap: wrap;
}
details {
background: transparent;
border: none;
box-shadow: none;
padding: 0;
border-bottom: 1px solid #eee;
margin-bottom: 1rem;
}
summary {
font-weight: 500;
padding: 0.5rem 0;
}
details[open] > summary {
border-bottom: none;
margin-bottom: 0;
}
.checkbox-label {
display: flex;
align-items: center;
margin-right: 1rem;
margin-bottom: 0.5rem;
}
.checkbox-group {
display: flex;
flex-wrap: wrap;
margin-bottom: 1rem;
}
#resultArea {
margin-top: 1.5rem;
padding-top: 1rem;
border-top: 1px solid #eee;
}
#downloadButtons {
display: none;
margin-top: 1rem;
}
.section-header {
display: flex;
align-items: center;
margin-bottom: 0.5rem;
font-size: 1.1rem;
font-weight: 500;
}
select, input[type="text"], input[type="password"], input[type="file"] {
padding: 0.5rem;
border: 1px solid #ddd;
background-color: white;
}
button, a[role="button"] {
padding: 0.5rem 1rem;
}
.options-grid {
display: grid;
grid-template-columns: 1fr 1fr;
gap: 1rem;
margin-bottom: 1rem;
}
@media print {
.no-print {
display: none !important;
}
body {
padding: 0;
background-color: white;
}
.container {
box-shadow: none;
max-width: 100%;
padding: 0;
}
}
.modal {
display: none;
position: fixed;
top: 0;
left: 0;
width: 100%;
height: 100%;
background-color: rgba(0, 0, 0, 0.6);
z-index: 1000;
overflow: auto;
}
.modal-content {
position: relative;
background-color: #fff;
margin: 2% auto;
padding: 20px;
width: 90%;
max-width: 900px;
max-height: 90vh;
border-radius: 8px;
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
overflow: auto;
}
.close-modal {
position: absolute;
top: 10px;
right: 10px;
font-size: 24px;
font-weight: bold;
color: #666;
cursor: pointer;
}
.modal-actions {
display: flex;
justify-content: flex-end;
margin-top: 20px;
gap: 10px;
}
#previewFrame {
width: 100%;
min-height: 500px;
border: 1px solid #ddd;
}
#printFrame {
display: none;
}
.hidden {
display: none !important;
}
@media (max-width: 768px) {
.form-grid, .options-grid {
grid-template-columns: 1fr;
}
.container {
padding: 1rem;
}
}
</style> </style>
</head> </head>
<body> <body>
<main class="container no-print"> <main class="container no-print">
<h1> <h1>
<a href="https://github.com/xunbu/docutranslate" target="_blank">DocuTranslate</a> <a href="https://github.com/xunbu/docutranslate" target="_blank">DocuTranslate</a>
</h1> </h1>
@@ -131,7 +267,8 @@ HTML_TEMPLATE = """
<option value="https://api.openai.com/v1">OpenAI</option> <option value="https://api.openai.com/v1">OpenAI</option>
<option value="https://open.bigmodel.cn/api/paas/v4">智谱AI</option> <option value="https://open.bigmodel.cn/api/paas/v4">智谱AI</option>
<option value="https://api.deepseek.com/v1">DeepSeek</option> <option value="https://api.deepseek.com/v1">DeepSeek</option>
<option value="https://dashscope.aliyuncs.com/compatible-mode/v1">阿里云百炼</option> <option value="https://dashscope.aliyuncs.com/compatible-mode/v1">阿里云百炼
</option>
<option value="https://www.dmxapi.cn/v1">DMXAPI</option> <option value="https://www.dmxapi.cn/v1">DMXAPI</option>
<option value="https://openrouter.ai/api/v1">OpenRouter</option> <option value="https://openrouter.ai/api/v1">OpenRouter</option>
<option value="https://ark.cn-beijing.volces.com/api/v3">火山引擎</option> <option value="https://ark.cn-beijing.volces.com/api/v3">火山引擎</option>
@@ -140,12 +277,14 @@ HTML_TEMPLATE = """
</div> </div>
<div class="form-group hidden" id="baseUrlGroup"> <div class="form-group hidden" id="baseUrlGroup">
<label for="base_url">API 地址 (Base URL)</label> <label for="base_url">API 地址 (Base URL)</label>
<input type="text" id="base_url" name="base_url" placeholder="https://api.openai.com/v1"> <input type="text" id="base_url" name="base_url"
placeholder="https://api.openai.com/v1">
</div> </div>
</div> </div>
<div class="form-group"> <div class="form-group">
<label for="apikey">API 密钥</label> <label for="apikey">API 密钥</label>
<input type="password" id="apikey" name="apikey" placeholder="平台对应的API Key" required> <input type="password" id="apikey" name="apikey" placeholder="平台对应的API Key"
required>
</div> </div>
<div class="form-group"> <div class="form-group">
<label for="model_id">模型 ID</label> <label for="model_id">模型 ID</label>
@@ -177,9 +316,14 @@ HTML_TEMPLATE = """
<div class="form-group"> <div class="form-group">
<label>高级选项</label> <label>高级选项</label>
<div class="checkbox-group"> <div class="checkbox-group">
<label class="checkbox-label" for="formula_ocr"><input type="checkbox" id="formula_ocr" name="formula_ocr">公式识别</label> <label class="checkbox-label" for="formula_ocr"><input type="checkbox"
<label class="checkbox-label" for="code_ocr"><input type="checkbox" id="code_ocr" name="code_ocr">代码识别</label> id="formula_ocr"
<label class="checkbox-label" for="refine_markdown"><input type="checkbox" id="refine_markdown" name="refine_markdown">修正文本(耗时)</label> name="formula_ocr">公式识别</label>
<label class="checkbox-label" for="code_ocr"><input type="checkbox" id="code_ocr"
name="code_ocr">代码识别</label>
<label class="checkbox-label" for="refine_markdown"><input type="checkbox"
id="refine_markdown"
name="refine_markdown">修正文本(耗时,有概率修复文本流和公式识别错误)</label>
</div> </div>
</div> </div>
</div> </div>
@@ -197,8 +341,8 @@ HTML_TEMPLATE = """
</div> </div>
<div class="section-header" style="margin-top: 1.5rem;">运行日志</div> <div class="section-header" style="margin-top: 1.5rem;">运行日志</div>
<div class="log-area" id="logArea"></div> <div class="log-area" id="logArea"></div>
</main> </main>
<div id="previewModal" class="modal"> <div id="previewModal" class="modal">
<div class="modal-content"> <div class="modal-content">
<span class="close-modal" id="closeModalBtn">×</span> <span class="close-modal" id="closeModalBtn">×</span>
<h3>HTML 预览</h3> <h3>HTML 预览</h3>
@@ -208,10 +352,10 @@ HTML_TEMPLATE = """
<button id="closePreviewBtn" class="outline">关闭</button> <button id="closePreviewBtn" class="outline">关闭</button>
</div> </div>
</div> </div>
</div> </div>
<iframe id="printFrame" style="display:none;"></iframe> <iframe id="printFrame" style="display:none;"></iframe>
<script> <script>
const platformSelect = document.getElementById('platform_select'); const platformSelect = document.getElementById('platform_select');
const baseUrlGroup = document.getElementById('baseUrlGroup'); const baseUrlGroup = document.getElementById('baseUrlGroup');
const baseUrlInput = document.getElementById('base_url'); const baseUrlInput = document.getElementById('base_url');
@@ -241,8 +385,22 @@ HTML_TEMPLATE = """
let statusPollIntervalId = null; let statusPollIntervalId = null;
let lastLogCount = 0; let lastLogCount = 0;
function saveToStorage(key, value) { try { localStorage.setItem(key, value); } catch (e) { console.warn("保存到本地存储失败:", e); } } function saveToStorage(key, value) {
function getFromStorage(key, defaultValue = '') { try { return localStorage.getItem(key) || defaultValue; } catch (e) { console.warn("从本地存储读取失败:", e); return defaultValue; } } try {
localStorage.setItem(key, value);
} catch (e) {
console.warn("保存到本地存储失败:", e);
}
}
function getFromStorage(key, defaultValue = '') {
try {
return localStorage.getItem(key) || defaultValue;
} catch (e) {
console.warn("从本地存储读取失败:", e);
return defaultValue;
}
}
function updatePlatformUI() { function updatePlatformUI() {
const selectedPlatformValue = platformSelect.value; const selectedPlatformValue = platformSelect.value;
@@ -259,6 +417,7 @@ HTML_TEMPLATE = """
} }
saveToStorage('translator_last_platform', selectedPlatformValue); saveToStorage('translator_last_platform', selectedPlatformValue);
} }
function loadSettings() { function loadSettings() {
const lastPlatform = getFromStorage('translator_last_platform', 'custom'); const lastPlatform = getFromStorage('translator_last_platform', 'custom');
platformSelect.value = lastPlatform; platformSelect.value = lastPlatform;
@@ -268,19 +427,24 @@ HTML_TEMPLATE = """
codeCheckbox.checked = getFromStorage('translator_code_ocr') === 'true'; codeCheckbox.checked = getFromStorage('translator_code_ocr') === 'true';
refineCheckbox.checked = getFromStorage('translator_refine_markdown') === 'true'; refineCheckbox.checked = getFromStorage('translator_refine_markdown') === 'true';
} }
loadSettings(); loadSettings();
platformSelect.addEventListener('change', updatePlatformUI); platformSelect.addEventListener('change', updatePlatformUI);
apikeyInput.addEventListener('input', (e) => saveToStorage(`translator_platform_${platformSelect.value}_apikey`, e.target.value)); apikeyInput.addEventListener('input', (e) => saveToStorage(`translator_platform_${platformSelect.value}_apikey`, e.target.value));
modelInput.addEventListener('input', (e) => saveToStorage(`translator_platform_${platformSelect.value}_model_id`, e.target.value)); modelInput.addEventListener('input', (e) => saveToStorage(`translator_platform_${platformSelect.value}_model_id`, e.target.value));
baseUrlInput.addEventListener('input', (e) => { if (platformSelect.value === 'custom') saveToStorage('translator_platform_custom_base_url', e.target.value); }); baseUrlInput.addEventListener('input', (e) => {
if (platformSelect.value === 'custom') saveToStorage('translator_platform_custom_base_url', e.target.value);
});
toLangSelect.addEventListener('change', e => saveToStorage('translator_to_lang', e.target.value)); toLangSelect.addEventListener('change', e => saveToStorage('translator_to_lang', e.target.value));
formulaCheckbox.addEventListener('change', e => saveToStorage('translator_formula_ocr', e.target.checked)); formulaCheckbox.addEventListener('change', e => saveToStorage('translator_formula_ocr', e.target.checked));
codeCheckbox.addEventListener('change', e => saveToStorage('translator_code_ocr', e.target.checked)); codeCheckbox.addEventListener('change', e => saveToStorage('translator_code_ocr', e.target.checked));
refineCheckbox.addEventListener('change', e => saveToStorage('translator_refine_markdown', e.target.checked)); refineCheckbox.addEventListener('change', e => saveToStorage('translator_refine_markdown', e.target.checked));
[closeModalButton, closePreviewBtn].forEach(elem => elem.addEventListener('click', () => modal.style.display = 'none')); [closeModalButton, closePreviewBtn].forEach(elem => elem.addEventListener('click', () => modal.style.display = 'none'));
window.addEventListener('click', (event) => { if (event.target === modal) modal.style.display = 'none'; }); window.addEventListener('click', (event) => {
if (event.target === modal) modal.style.display = 'none';
});
printFromPreview.addEventListener('click', () => { printFromPreview.addEventListener('click', () => {
try { try {
previewFrame.contentWindow.focus(); previewFrame.contentWindow.focus();
@@ -295,7 +459,8 @@ HTML_TEMPLATE = """
try { try {
const response = await fetch(`/get-logs?since=${lastLogCount}`); const response = await fetch(`/get-logs?since=${lastLogCount}`);
if (!response.ok) { if (!response.ok) {
console.warn(`Log polling failed: ${response.status}`); return; console.warn(`Log polling failed: ${response.status}`);
return;
} }
const data = await response.json(); const data = await response.json();
if (data.logs && data.logs.length > 0) { if (data.logs && data.logs.length > 0) {
@@ -343,12 +508,22 @@ HTML_TEMPLATE = """
const currentHtmlUrl = htmlUrl; const currentHtmlUrl = htmlUrl;
const currentFileName = fileName; const currentFileName = fileName;
fetch(currentHtmlUrl) fetch(currentHtmlUrl)
.then(resp => { if (!resp.ok) throw new Error(`HTTP error ${resp.status}`); return resp.text();}) .then(resp => {
if (!resp.ok) throw new Error(`HTTP error ${resp.status}`);
return resp.text();
})
.then(html => { .then(html => {
const blob = new Blob([html], {type: 'text/html'}); const blob = new Blob([html], {type: 'text/html'});
const blobUrl = URL.createObjectURL(blob); const blobUrl = URL.createObjectURL(blob);
previewFrame.src = blobUrl; previewFrame.src = blobUrl;
previewFrame.onload = function () { try { previewFrame.contentWindow.document.title = currentFileName + '_translated'; URL.revokeObjectURL(blobUrl); } catch (e) { console.warn('无法设置iframe标题或释放Blob URL', e); } }; previewFrame.onload = function () {
try {
previewFrame.contentWindow.document.title = currentFileName + '_translated';
URL.revokeObjectURL(blobUrl);
} catch (e) {
console.warn('无法设置iframe标题或释放Blob URL', e);
}
};
modal.style.display = 'block'; modal.style.display = 'block';
}) })
.catch(err => { .catch(err => {
@@ -446,7 +621,7 @@ HTML_TEMPLATE = """
const formData = new FormData(form); const formData = new FormData(form);
try { try {
const response = await fetch('/translate', { method: 'POST', body: formData }); const response = await fetch('/translate', {method: 'POST', body: formData});
const result = await response.json(); const result = await response.json();
if (response.ok && result.task_started) { if (response.ok && result.task_started) {
statusMsg.textContent = result.message || '任务已开始,正在处理...'; statusMsg.textContent = result.message || '任务已开始,正在处理...';
@@ -469,10 +644,66 @@ HTML_TEMPLATE = """
submitButton.textContent = '开始翻译'; submitButton.textContent = '开始翻译';
} }
}); });
</script> </script>
</body> </body>
</html> </html> \
""" """
app = FastAPI()
# --- 全局配置 ---
log_queue = asyncio.Queue()
current_state: Dict[str, Any] = {
"is_processing": False,
"status_message": "空闲",
"error_flag": False,
"download_ready": False,
"markdown_content": None,
"html_content": None,
"original_filename_stem": None,
"task_start_time": 0,
"task_end_time": 0,
}
templates = Jinja2Templates(directory=".")
MAX_LOG_HISTORY = 200
log_history: List[str] = []
# --- 日志处理器 ---
class QueueAndHistoryHandler(logging.Handler):
def __init__(self, queue: asyncio.Queue, history: List[str], max_history: int):
super().__init__()
self.queue = queue
self.history = history
self.max_history = max_history
def emit(self, record: logging.LogRecord):
log_entry = self.format(record)
self.history.append(log_entry)
if len(self.history) > self.max_history:
del self.history[:len(self.history) - self.max_history]
try:
main_loop = getattr(app.state, "main_event_loop", None)
if main_loop and main_loop.is_running():
main_loop.call_soon_threadsafe(self.queue.put_nowait, log_entry)
else:
self.queue.put_nowait(log_entry)
except Exception as e:
print(f"Error putting log to queue: {e}")
# --- 应用生命周期事件 ---
@app.on_event("startup")
async def startup_event():
app.state.main_event_loop = asyncio.get_running_loop()
queue_handler = QueueAndHistoryHandler(log_queue, log_history, MAX_LOG_HISTORY)
queue_handler.setLevel(logging.INFO)
queue_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
if not any(isinstance(h, QueueAndHistoryHandler) for h in translater_logger.handlers):
translater_logger.addHandler(queue_handler)
translater_logger.propagate = False
translater_logger.setLevel(logging.INFO)
translater_logger.info("应用启动完成,日志队列/历史处理器已配置。")
# --- Background Task Logic --- # --- Background Task Logic ---
@@ -503,7 +734,8 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori
try: try:
translater_logger.info(f"使用 Base URL: {params['base_url']}, Model: {params['model_id']}") translater_logger.info(f"使用 Base URL: {params['base_url']}, Model: {params['model_id']}")
translater_logger.info(f"文件大小: {len(file_contents)} 字节。目标语言: {params['to_lang']}") translater_logger.info(f"文件大小: {len(file_contents)} 字节。目标语言: {params['to_lang']}")
translater_logger.info(f"选项 - 公式: {params['formula_ocr']}, 代码: {params['code_ocr']}, 修正: {params['refine_markdown']}") translater_logger.info(
f"选项 - 公式: {params['formula_ocr']}, 代码: {params['code_ocr']}, 修正: {params['refine_markdown']}")
ft = FileTranslater( ft = FileTranslater(
base_url=params['base_url'], base_url=params['base_url'],
@@ -511,8 +743,7 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori
model_id=params['model_id'], model_id=params['model_id'],
tips=False tips=False
) )
await asyncio.to_thread( await ft.translate_bytes_async(
ft.translate_bytes,
name=original_filename, name=original_filename,
file=file_contents, file=file_contents,
to_lang=params['to_lang'], to_lang=params['to_lang'],
@@ -521,6 +752,16 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori
refine=params['refine_markdown'], refine=params['refine_markdown'],
save=False save=False
) )
# await asyncio.to_thread(
# ft.translate_bytes,
# name=original_filename,
# file=file_contents,
# to_lang=params['to_lang'],
# formula=params['formula_ocr'],
# code=params['code_ocr'],
# refine=params['refine_markdown'],
# save=False
# )
md_content = ft.export_to_markdown() md_content = ft.export_to_markdown()
html_content = ft.export_to_html(title=file_stem) html_content = ft.export_to_html(title=file_stem)
end_time = time.time() end_time = time.time()
@@ -625,8 +866,10 @@ async def get_status():
"error_flag": current_state["error_flag"], "error_flag": current_state["error_flag"],
"download_ready": current_state["download_ready"], "download_ready": current_state["download_ready"],
"original_filename_stem": current_state["original_filename_stem"], "original_filename_stem": current_state["original_filename_stem"],
"markdown_url": f"/download/markdown/{current_state['original_filename_stem']}_translated.md" if current_state["download_ready"] else None, "markdown_url": f"/download/markdown/{current_state['original_filename_stem']}_translated.md" if current_state[
"html_url": f"/download/html/{current_state['original_filename_stem']}_translated.html" if current_state["download_ready"] else None, "download_ready"] else None,
"html_url": f"/download/html/{current_state['original_filename_stem']}_translated.html" if current_state[
"download_ready"] else None,
"task_start_time": current_state["task_start_time"], "task_start_time": current_state["task_start_time"],
"task_end_time": current_state["task_end_time"], "task_end_time": current_state["task_end_time"],
} }
@@ -643,7 +886,8 @@ async def get_logs(since: int = 0):
@app.get("/download/markdown/{filename_with_ext}") @app.get("/download/markdown/{filename_with_ext}")
async def download_markdown(filename_with_ext: str): async def download_markdown(filename_with_ext: str):
if not current_state["download_ready"] or not current_state["markdown_content"] or not current_state["original_filename_stem"]: if not current_state["download_ready"] or not current_state["markdown_content"] or not current_state[
"original_filename_stem"]:
raise HTTPException(status_code=404, detail="Markdown 内容尚未准备好或不可用。") raise HTTPException(status_code=404, detail="Markdown 内容尚未准备好或不可用。")
requested_stem = Path(filename_with_ext).stem.replace("_translated", "") requested_stem = Path(filename_with_ext).stem.replace("_translated", "")
if requested_stem != current_state["original_filename_stem"]: if requested_stem != current_state["original_filename_stem"]:
@@ -658,7 +902,8 @@ async def download_markdown(filename_with_ext: str):
@app.get("/download/html/{filename_with_ext}") @app.get("/download/html/{filename_with_ext}")
async def download_html(filename_with_ext: str): async def download_html(filename_with_ext: str):
if not current_state["download_ready"] or not current_state["html_content"] or not current_state["original_filename_stem"]: if not current_state["download_ready"] or not current_state["html_content"] or not current_state[
"original_filename_stem"]:
raise HTTPException(status_code=404, detail="HTML 内容尚未准备好或不可用。") raise HTTPException(status_code=404, detail="HTML 内容尚未准备好或不可用。")
requested_stem = Path(filename_with_ext).stem.replace("_translated", "") requested_stem = Path(filename_with_ext).stem.replace("_translated", "")
if requested_stem != current_state["original_filename_stem"]: if requested_stem != current_state["original_filename_stem"]:
@@ -670,10 +915,12 @@ async def download_html(filename_with_ext: str):
headers={"Content-Disposition": f"attachment; filename=\"{actual_filename}\""} headers={"Content-Disposition": f"attachment; filename=\"{actual_filename}\""}
) )
def run_app(): def run_app():
print("正在启动 DocuTranslate") print("正在启动 DocuTranslate")
print("请访问 http://127.0.0.1:8010") print("请访问 http://127.0.0.1:8010")
uvicorn.run(app, host="127.0.0.1", port=8010, workers=1) uvicorn.run(app, host="127.0.0.1", port=8010, workers=1)
if __name__ == "__main__": if __name__ == "__main__":
run_app() run_app()

View File

@@ -1,3 +1,4 @@
import asyncio
from io import BytesIO from io import BytesIO
from pathlib import Path from pathlib import Path
from typing import Literal from typing import Literal
@@ -15,7 +16,7 @@ from docutranslate.logger import translater_logger
class FileTranslater: class FileTranslater:
def __init__(self, file_path: Path | str | None = None, chunksize: int = 3500, base_url="", key=None, def __init__(self, file_path: Path | str | None = None, chunksize: int = 2000, base_url="", key=None,
model_id="", temperature=0.7, max_concurrent=15, docling_artifact: Path | str | None = None, model_id="", temperature=0.7, max_concurrent=15, docling_artifact: Path | str | None = None,
timeout=2000, tips=True): timeout=2000, tips=True):
if isinstance(file_path, str): if isinstance(file_path, str):
@@ -145,6 +146,31 @@ class FileTranslater:
translater_logger.info("翻译完成") translater_logger.info("翻译完成")
return self.markdown return self.markdown
async def refine_markdown_by_agent_async(self, refine_agent: Agent | None = None) -> str:
translater_logger.info("正在修正markdown")
self._mask_uris_in_markdown()
chuncks = self._split_markdown_into_chunks()
if refine_agent is None:
refine_agent = MDRefineAgent(**self.default_agent_params())
result: list[str] = await refine_agent.send_prompts_async(chuncks)
self.markdown = "\n\n".join(result)
self._unmask_uris_in_markdown()
translater_logger.info("markdown已修正")
return self.markdown
async def translate_markdown_by_agent_async(self, translate_agent: Agent | None = None, to_lang="中文"):
translater_logger.info("正在翻译markdown")
self._mask_uris_in_markdown()
chuncks = self._split_markdown_into_chunks()
if translate_agent is None:
translate_agent = MDTranslateAgent(to_lang=to_lang, **self.default_agent_params())
result: list[str] = await translate_agent.send_prompts_async(chuncks)
self.markdown = "\n\n".join(result)
self._unmask_uris_in_markdown()
translater_logger.info("翻译完成")
return self.markdown
def save_as_markdown(self, filename: str | Path | None = None, output_dir: str | Path = "./output"): def save_as_markdown(self, filename: str | Path | None = None, output_dir: str | Path = "./output"):
if isinstance(filename, str): if isinstance(filename, str):
filename = Path(filename) filename = Path(filename)
@@ -191,7 +217,7 @@ class FileTranslater:
def export_to_html(self, title="title") -> str: def export_to_html(self, title="title") -> str:
markdowner = markdown2.Markdown(extras=['tables', 'fenced-code-blocks', 'mermaid', "code-friendly"]) markdowner = markdown2.Markdown(extras=['tables', 'fenced-code-blocks', 'mermaid', "code-friendly"])
# language=html
html = f"""<!DOCTYPE html> html = f"""<!DOCTYPE html>
<html lang="en"> <html lang="en">
<head> <head>
@@ -206,6 +232,7 @@ class FileTranslater:
</style> </style>
<script type="text/x-mathjax-config"> <script type="text/x-mathjax-config">
MathJax.Hub.Config({{ MathJax.Hub.Config({{
messageStyle: "none",
tex2jax: {{ tex2jax: {{
inlineMath: [ ['$','$'], ["\\\\(","\\\\)"] ], inlineMath: [ ['$','$'], ["\\\\(","\\\\)"] ],
processEscapes: true processEscapes: true
@@ -264,7 +291,32 @@ class FileTranslater:
filename = f"{file_path.stem}_{to_lang}.html" filename = f"{file_path.stem}_{to_lang}.html"
self.save_as_html(filename=filename, output_dir=output_dir) self.save_as_html(filename=filename, output_dir=output_dir)
return self return self
async def translate_file_async(self, file_path: Path | str | None = None, to_lang="中文", output_dir="./output",
formula=True,
code=True, output_format: Literal["markdown", "html"] = "markdown", refine=False,
refine_agent: Agent | None = None, translate_agent: Agent | None = None,save=True):
if file_path is None:
assert self.file_path is not None, "未输入文件路径"
file_path = self.file_path
if isinstance(file_path, str):
file_path = Path(file_path)
await asyncio.to_thread(
self.read_file,
file_path,
formula=formula,
code=code
)
if refine:
await self.refine_markdown_by_agent_async(refine_agent)
await self.translate_markdown_by_agent_async(translate_agent, to_lang=to_lang)
if save:
if output_format == "markdown":
filename = f"{file_path.stem}_{to_lang}.md"
self.save_as_markdown(filename=filename, output_dir=output_dir)
elif output_format == "html":
filename = f"{file_path.stem}_{to_lang}.html"
self.save_as_html(filename=filename, output_dir=output_dir)
return self
def translate_bytes(self, name:str,file: bytes, to_lang="中文", output_dir="./output", def translate_bytes(self, name:str,file: bytes, to_lang="中文", output_dir="./output",
formula=True, formula=True,
code=True, output_format: Literal["markdown", "html"] = "markdown", refine=False, code=True, output_format: Literal["markdown", "html"] = "markdown", refine=False,
@@ -281,3 +333,26 @@ class FileTranslater:
filename = f"{name}_{to_lang}.html" filename = f"{name}_{to_lang}.html"
self.save_as_html(filename=filename, output_dir=output_dir) self.save_as_html(filename=filename, output_dir=output_dir)
return self return self
async def translate_bytes_async(self, name:str,file: bytes, to_lang="中文", output_dir="./output",
formula=True,
code=True, output_format: Literal["markdown", "html"] = "markdown", refine=False,
refine_agent: Agent | None = None, translate_agent: Agent | None = None,save=True):
await asyncio.to_thread(
self.read_bytes,
name=name,
file=file,
formula=formula,
code=code
)
if refine:
await self.refine_markdown_by_agent_async(refine_agent)
await self.translate_markdown_by_agent_async(translate_agent, to_lang=to_lang)
if save:
if output_format == "markdown":
filename = f"{name}_{to_lang}.md"
self.save_as_markdown(filename=filename, output_dir=output_dir)
elif output_format == "html":
filename = f"{name}_{to_lang}.html"
self.save_as_html(filename=filename, output_dir=output_dir)
return self

View File

@@ -1,3 +1,4 @@
import asyncio
import os import os
from huggingface_hub.errors import LocalEntryNotFoundError from huggingface_hub.errors import LocalEntryNotFoundError
from docling.datamodel.base_models import InputFormat from docling.datamodel.base_models import InputFormat
@@ -6,8 +7,7 @@ from docling_core.types.doc import ImageRefMode
from pathlib import Path from pathlib import Path
from docling.document_converter import DocumentConverter, PdfFormatOption from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.document import DocumentStream from docling.datamodel.document import DocumentStream
from docling.datamodel.settings import settings
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
from docutranslate.logger import translater_logger from docutranslate.logger import translater_logger
IMAGE_RESOLUTION_SCALE = 4 IMAGE_RESOLUTION_SCALE = 4
@@ -22,20 +22,25 @@ def file2markdown_embed_images(file_path: Path | str|DocumentStream, formula=Fal
pipeline_options.do_formula_enrichment=True pipeline_options.do_formula_enrichment=True
if code: if code:
pipeline_options.do_code_enrichment=True pipeline_options.do_code_enrichment=True
pipeline_options.accelerator_options= AcceleratorOptions( # pipeline_options.accelerator_options= AcceleratorOptions(
num_threads=8, device=AcceleratorDevice.AUTO # num_threads=4, device=AcceleratorDevice.AUTO
) # )
#打印时间
settings.debug.profile_pipeline_timings=True
converter = DocumentConverter(format_options={ converter = DocumentConverter(format_options={
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
}) })
try: try:
result = converter.convert(file_path).document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED) conversion_result = converter.convert(file_path)
result = conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)
except LocalEntryNotFoundError: except LocalEntryNotFoundError:
translater_logger.info(f"无法连接huggingface正在尝试换源") translater_logger.info(f"无法连接huggingface正在尝试换源")
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com' os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
result = converter.convert(file_path).document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED) conversion_result = converter.convert(file_path)
result=conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)
translater_logger.info(f"已转换为markdown") translater_logger.info(f"已转换为markdown")
translater_logger.info(f"pdf转换耗时: {conversion_result.timings["pipeline_total"].times}")
return result return result
if __name__ == '__main__': if __name__ == '__main__':

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "docutranslate" name = "docutranslate"
version = "0.2.6.post1" version = "0.2.7"
description = "文件翻译工具" description = "文件翻译工具"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"