增加async的完全支持
This commit is contained in:
168
.idea/workspace.xml
generated
168
.idea/workspace.xml
generated
@@ -5,9 +5,17 @@
|
||||
</component>
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
|
||||
<change afterPath="$PROJECT_DIR$/LICENSE" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/agents/__init__.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/agents/__init__.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/agents/agent_async.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/agents/agent.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/agents/agent_sync.py" beforeDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/agents/agent_thread.py" beforeDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/agents/markdown_agent.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/agents/markdown_agent.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/app.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/translater.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/translater.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/utils/convert.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/convert.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
|
||||
</list>
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
@@ -33,59 +41,59 @@
|
||||
<option name="hideEmptyMiddlePackages" value="true" />
|
||||
<option name="showLibraryContents" value="true" />
|
||||
</component>
|
||||
<component name="PropertiesComponent">{
|
||||
"keyToString": {
|
||||
"DefaultHtmlFileTemplate": "HTML File",
|
||||
"JavaScript 调试.output.html (1).executor": "Run",
|
||||
"JavaScript 调试.output.html.executor": "Run",
|
||||
"JavaScript 调试.regex.md_中文.html.executor": "Run",
|
||||
"JavaScript 调试.regex_中文.html.executor": "Run",
|
||||
"JavaScript 调试.test2.html.executor": "Run",
|
||||
"JavaScript 调试.test2_英文.html.executor": "Run",
|
||||
"JavaScript 调试.test4-1_中文.html.executor": "Run",
|
||||
"JavaScript 调试.互联网认证授权机制.html.executor": "Run",
|
||||
"JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run",
|
||||
"JavaScript 调试.毕业论文_英文.html.executor": "Run",
|
||||
"ModuleVcsDetector.initialDetectionPerformed": "true",
|
||||
"Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
|
||||
"Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
|
||||
"Python.PDFtranslater (1).executor": "Run",
|
||||
"Python.PDFtranslater (2).executor": "Run",
|
||||
"Python.agent.executor": "Debug",
|
||||
"Python.agent_utils.executor": "Run",
|
||||
"Python.app (1).executor": "Run",
|
||||
"Python.app.executor": "Run",
|
||||
"Python.app2.executor": "Run",
|
||||
"Python.app_test (1).executor": "Run",
|
||||
"Python.convert.executor": "Run",
|
||||
"Python.markdown_splitter.executor": "Debug",
|
||||
"Python.markdown_utils.executor": "Run",
|
||||
"Python.test.executor": "Run",
|
||||
"Python.test1.executor": "Run",
|
||||
"Python.test2.executor": "Run",
|
||||
"Python.test3.executor": "Run",
|
||||
"Python.test4.executor": "Run",
|
||||
"Python.translater.executor": "Run",
|
||||
"Python.切分测试.executor": "Run",
|
||||
"RunOnceActivity.ShowReadmeOnStart": "true",
|
||||
"RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true",
|
||||
"RunOnceActivity.git.unshallow": "true",
|
||||
"git-widget-placeholder": "main",
|
||||
"last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/dist/DocuTranslate",
|
||||
"node.js.detected.package.eslint": "true",
|
||||
"node.js.detected.package.tslint": "true",
|
||||
"node.js.selected.package.eslint": "(autodetect)",
|
||||
"node.js.selected.package.tslint": "(autodetect)",
|
||||
"nodejs_package_manager_path": "npm",
|
||||
"settings.editor.selected.configurable": "preferences.pluginManager",
|
||||
"vue.rearranger.settings.migration": "true"
|
||||
<component name="PropertiesComponent"><![CDATA[{
|
||||
"keyToString": {
|
||||
"DefaultHtmlFileTemplate": "HTML File",
|
||||
"JavaScript 调试.output.html (1).executor": "Run",
|
||||
"JavaScript 调试.output.html.executor": "Run",
|
||||
"JavaScript 调试.regex.md_中文.html.executor": "Run",
|
||||
"JavaScript 调试.regex_中文.html.executor": "Run",
|
||||
"JavaScript 调试.test2.html.executor": "Run",
|
||||
"JavaScript 调试.test2_英文.html.executor": "Run",
|
||||
"JavaScript 调试.test4-1_中文.html.executor": "Run",
|
||||
"JavaScript 调试.互联网认证授权机制.html.executor": "Run",
|
||||
"JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run",
|
||||
"JavaScript 调试.毕业论文_英文.html.executor": "Run",
|
||||
"ModuleVcsDetector.initialDetectionPerformed": "true",
|
||||
"Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
|
||||
"Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
|
||||
"Python.PDFtranslater (1).executor": "Run",
|
||||
"Python.PDFtranslater (2).executor": "Run",
|
||||
"Python.agent.executor": "Debug",
|
||||
"Python.agent_utils.executor": "Run",
|
||||
"Python.app (1).executor": "Run",
|
||||
"Python.app.executor": "Run",
|
||||
"Python.app2.executor": "Run",
|
||||
"Python.app_test (1).executor": "Run",
|
||||
"Python.convert.executor": "Run",
|
||||
"Python.markdown_splitter.executor": "Debug",
|
||||
"Python.markdown_utils.executor": "Run",
|
||||
"Python.test.executor": "Run",
|
||||
"Python.test1.executor": "Run",
|
||||
"Python.test2.executor": "Run",
|
||||
"Python.test3.executor": "Run",
|
||||
"Python.test4.executor": "Run",
|
||||
"Python.translater.executor": "Run",
|
||||
"Python.切分测试.executor": "Run",
|
||||
"RunOnceActivity.ShowReadmeOnStart": "true",
|
||||
"RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true",
|
||||
"RunOnceActivity.git.unshallow": "true",
|
||||
"git-widget-placeholder": "main",
|
||||
"last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/docutranslate/agents",
|
||||
"node.js.detected.package.eslint": "true",
|
||||
"node.js.detected.package.tslint": "true",
|
||||
"node.js.selected.package.eslint": "(autodetect)",
|
||||
"node.js.selected.package.tslint": "(autodetect)",
|
||||
"nodejs_package_manager_path": "npm",
|
||||
"settings.editor.selected.configurable": "preferences.pluginManager",
|
||||
"vue.rearranger.settings.migration": "true"
|
||||
}
|
||||
}</component>
|
||||
}]]></component>
|
||||
<component name="RecentsManager">
|
||||
<key name="CopyFile.RECENT_KEYS">
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\docutranslate\agents" />
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\DocuTranslate" />
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist" />
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\docutranslate\agents" />
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\app" />
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\files" />
|
||||
</key>
|
||||
@@ -97,7 +105,7 @@
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\resource" />
|
||||
</key>
|
||||
</component>
|
||||
<component name="RunManager" selected="Python.test">
|
||||
<component name="RunManager" selected="Python.app_test (1)">
|
||||
<configuration default="true" type="DjangoTestsConfigurationType">
|
||||
<module name="filetranslate" />
|
||||
<option name="ENV_FILES" value="" />
|
||||
@@ -329,29 +337,6 @@
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="app" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
||||
<module name="FileTranslate" />
|
||||
<option name="ENV_FILES" value="" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/docutranslate" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/docutranslate/app.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="app_test (1)" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
||||
<module name="FileTranslate" />
|
||||
<option name="ENV_FILES" value="" />
|
||||
@@ -421,6 +406,29 @@
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="test2" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
||||
<module name="FileTranslate" />
|
||||
<option name="ENV_FILES" value="" />
|
||||
<option name="INTERPRETER_OPTIONS" value="" />
|
||||
<option name="PARENT_ENVS" value="true" />
|
||||
<envs>
|
||||
<env name="PYTHONUNBUFFERED" value="1" />
|
||||
</envs>
|
||||
<option name="SDK_HOME" value="" />
|
||||
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/tests" />
|
||||
<option name="IS_MODULE_SDK" value="true" />
|
||||
<option name="ADD_CONTENT_ROOTS" value="true" />
|
||||
<option name="ADD_SOURCE_ROOTS" value="true" />
|
||||
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
|
||||
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/tests/test2.py" />
|
||||
<option name="PARAMETERS" value="" />
|
||||
<option name="SHOW_COMMAND_LINE" value="false" />
|
||||
<option name="EMULATE_TERMINAL" value="false" />
|
||||
<option name="MODULE_MODE" value="false" />
|
||||
<option name="REDIRECT_INPUT" value="false" />
|
||||
<option name="INPUT_FILE" value="" />
|
||||
<method v="2" />
|
||||
</configuration>
|
||||
<configuration name="test" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
|
||||
<module name="FileTranslate" />
|
||||
<option name="ENV_FILES" value="" />
|
||||
@@ -543,11 +551,11 @@
|
||||
</configuration>
|
||||
<recent_temporary>
|
||||
<list>
|
||||
<item itemvalue="Python.test" />
|
||||
<item itemvalue="Python.app_test (1)" />
|
||||
<item itemvalue="Python.test2" />
|
||||
<item itemvalue="Python.test" />
|
||||
<item itemvalue="Python.切分测试" />
|
||||
<item itemvalue="Python.app_test" />
|
||||
<item itemvalue="Python.app" />
|
||||
</list>
|
||||
</recent_temporary>
|
||||
</component>
|
||||
@@ -616,7 +624,9 @@
|
||||
<workItem from="1747146670281" duration="64000" />
|
||||
<workItem from="1747185217844" duration="6194000" />
|
||||
<workItem from="1747297470216" duration="347000" />
|
||||
<workItem from="1747299661166" duration="1977000" />
|
||||
<workItem from="1747299661166" duration="4649000" />
|
||||
<workItem from="1747311432043" duration="2883000" />
|
||||
<workItem from="1747380029603" duration="10381000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
@@ -624,8 +634,8 @@
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
||||
<SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747189112668" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747300796373" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747390450384" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747301959211" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746963490689" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
|
||||
<SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
|
||||
<SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
|
||||
@@ -645,7 +655,7 @@
|
||||
<SUITE FILE_PATH="coverage/filetranslate$test4.coverage" NAME="test4 覆盖结果" MODIFIED="1746887036353" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/PDFtranslate$translater.coverage" NAME="translater 覆盖结果" MODIFIED="1746600434803" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
|
||||
<SUITE FILE_PATH="coverage/PDFtranslate$markdown_utils.coverage" NAME="markdown_utils 覆盖结果" MODIFIED="1746598797872" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$test2.coverage" NAME="test2 覆盖结果" MODIFIED="1747008834523" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$test2.coverage" NAME="test2 覆盖结果" MODIFIED="1747383231002" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$translater.coverage" NAME="translater 覆盖结果" MODIFIED="1746843159560" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate" />
|
||||
<SUITE FILE_PATH="coverage/PDFtranslate$.coverage" NAME=" 覆盖结果" MODIFIED="1746588350286" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/decorator" />
|
||||
</component>
|
||||
|
||||
21
LICENSE
Normal file
21
LICENSE
Normal file
@@ -0,0 +1,21 @@
|
||||
MIT License
|
||||
|
||||
Copyright (c) 2024 International Business Machines
|
||||
|
||||
Permission is hereby granted, free of charge, to any person obtaining a copy
|
||||
of this software and associated documentation files (the "Software"), to deal
|
||||
in the Software without restriction, including without limitation the rights
|
||||
to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
|
||||
copies of the Software, and to permit persons to whom the Software is
|
||||
furnished to do so, subject to the following conditions:
|
||||
|
||||
The above copyright notice and this permission notice shall be included in all
|
||||
copies or substantial portions of the Software.
|
||||
|
||||
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
|
||||
IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
|
||||
FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
|
||||
AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
|
||||
LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
|
||||
OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
|
||||
SOFTWARE.
|
||||
@@ -1,2 +1,2 @@
|
||||
from .agent_async import Agent, AgentArgs
|
||||
from .agent import Agent, AgentArgs
|
||||
from .markdown_agent import MDRefineAgent, MDTranslateAgent
|
||||
|
||||
@@ -1,4 +1,6 @@
|
||||
import asyncio
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from threading import Lock
|
||||
from typing import TypedDict
|
||||
|
||||
import httpx
|
||||
@@ -16,17 +18,32 @@ class AgentArgs(TypedDict, total=False):
|
||||
timeout: int
|
||||
|
||||
|
||||
# 仅使用多线程时用以计数
|
||||
class PromptsCount:
|
||||
def __init__(self, total: int):
|
||||
self.lock = Lock()
|
||||
self.count = 0
|
||||
self.total = total
|
||||
|
||||
def add(self):
|
||||
self.lock.acquire()
|
||||
self.count += 1
|
||||
translater_logger.info(f"多线程-已完成:{self.count}/{self.total}")
|
||||
self.lock.release()
|
||||
|
||||
|
||||
TIMEOUT = 500
|
||||
|
||||
|
||||
class Agent:
|
||||
def __init__(self, baseurl: str = "", key: str = "xx", model_id: str = "", system_prompt: str = "", temperature=0.7,
|
||||
max_concurrent=6, timeout: int = TIMEOUT):
|
||||
max_concurrent=15, timeout: int = TIMEOUT):
|
||||
self.baseurl = baseurl.strip()
|
||||
self.key = key.strip()
|
||||
self.model_id = model_id.strip()
|
||||
self.system_prompt = system_prompt
|
||||
self.temperature = temperature
|
||||
self.client = httpx.Client()
|
||||
self.client_async = httpx.AsyncClient()
|
||||
self.max_concurrent = max_concurrent
|
||||
self.timeout = timeout
|
||||
@@ -65,12 +82,6 @@ class Agent:
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()["choices"][0]["message"]["content"]
|
||||
# pattern = r".*【SSS】(.*)"
|
||||
# match = re.search(pattern, result, re.DOTALL)
|
||||
# if match is None:
|
||||
# print("检测开头`【SSS】`失败")
|
||||
# else:
|
||||
# result = match.group(1)
|
||||
return result
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e
|
||||
@@ -83,8 +94,9 @@ class Agent:
|
||||
self,
|
||||
prompts: list[str],
|
||||
system_prompt: str | None = None,
|
||||
max_concurrent: int = 5 # 新增参数,默认并发数为5
|
||||
max_concurrent: int | None = None # 新增参数,默认并发数为5
|
||||
) -> list[str]:
|
||||
max_concurrent = self.max_concurrent if max_concurrent is None else max_concurrent
|
||||
total = len(prompts)
|
||||
count = 0
|
||||
semaphore = asyncio.Semaphore(max_concurrent)
|
||||
@@ -109,14 +121,48 @@ class Agent:
|
||||
results = await asyncio.gather(*tasks, return_exceptions=False)
|
||||
return results
|
||||
|
||||
def send(self, prompt: str, system_prompt: None | str = None) -> str:
|
||||
if system_prompt is None:
|
||||
system_prompt = self.system_prompt
|
||||
|
||||
"""Sends a single prompt asynchronously."""
|
||||
headers, data = self._prepare_request_data(prompt, system_prompt)
|
||||
if self.baseurl.endswith("/"):
|
||||
self.baseurl = self.baseurl[:-1]
|
||||
try:
|
||||
response = self.client.post(
|
||||
f"{self.baseurl}/chat/completions",
|
||||
json=data,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()["choices"][0]["message"]["content"]
|
||||
return result
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e
|
||||
except httpx.RequestError as e:
|
||||
raise Exception(f"AI请求连接错误 (async): {e}") from e
|
||||
except (KeyError, IndexError) as e:
|
||||
raise Exception(f"AI响应格式错误 (async): {e}") from e
|
||||
|
||||
def _send_prompt_count(self, prompt: str, system_prompt: None | str, count: PromptsCount) -> str:
|
||||
result = self.send(prompt, system_prompt)
|
||||
count.add()
|
||||
return result
|
||||
|
||||
def send_prompts(
|
||||
self,
|
||||
prompts: list[str],
|
||||
system_prompt: str | None = None,
|
||||
) -> list[str]:
|
||||
|
||||
result = asyncio.run(self.send_prompts_async(prompts, system_prompt, self.max_concurrent))
|
||||
return result
|
||||
system_prompts = [system_prompt] * len(prompts)
|
||||
counts = [PromptsCount(len(prompts))] * len(prompts)
|
||||
output_list = []
|
||||
with ThreadPoolExecutor(max_workers=self.max_concurrent) as executor:
|
||||
results_iterator = executor.map(self._send_prompt_count, prompts, system_prompts, counts)
|
||||
output_list = list(results_iterator)
|
||||
return output_list
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
@@ -1,89 +0,0 @@
|
||||
from typing import TypedDict
|
||||
from docutranslate.logger import translater_logger
|
||||
import httpx
|
||||
|
||||
|
||||
class AgentArgs(TypedDict, total=False):
|
||||
baseurl: str
|
||||
key: str
|
||||
model_id: str
|
||||
system_prompt: str
|
||||
temperature: float
|
||||
max_concurrent: int
|
||||
timeout: int
|
||||
|
||||
|
||||
TIMEOUT = 500
|
||||
|
||||
|
||||
|
||||
class Agent:
|
||||
def __init__(self, baseurl: str = "", key: str = "xx", model_id: str = "", system_prompt: str = "", temperature=0.7,
|
||||
max_concurrent=6, timeout: int = TIMEOUT):
|
||||
self.baseurl = baseurl.strip()
|
||||
self.key = key.strip()
|
||||
self.model_id = model_id.strip()
|
||||
self.system_prompt = system_prompt
|
||||
self.temperature = temperature
|
||||
self.client = httpx.Client()
|
||||
self.max_concurrent = max_concurrent
|
||||
self.timeout = timeout
|
||||
|
||||
def _prepare_request_data(self, prompt: str, system_prompt: str, temperature=None, top_p=0.9):
|
||||
if temperature is None:
|
||||
temperature = self.temperature
|
||||
headers = {"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.key}"}
|
||||
data = {
|
||||
"model": self.model_id,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
# {"role": "system", "content": "所有回复必须以【SSS】开头(这是最高规则,适用于之后的所有例子)。示例:【SSS】这是示例回答\n"+system_prompt},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": temperature,
|
||||
"top_p": top_p
|
||||
}
|
||||
return headers, data
|
||||
|
||||
def send(self, prompt: str, system_prompt: None | str = None) -> str:
|
||||
if system_prompt is None:
|
||||
system_prompt = self.system_prompt
|
||||
|
||||
"""Sends a single prompt asynchronously."""
|
||||
headers, data = self._prepare_request_data(prompt, system_prompt)
|
||||
if self.baseurl.endswith("/"):
|
||||
self.baseurl = self.baseurl[:-1]
|
||||
try:
|
||||
response = self.client.post(
|
||||
f"{self.baseurl}/chat/completions",
|
||||
json=data,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()["choices"][0]["message"]["content"]
|
||||
return result
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e
|
||||
except httpx.RequestError as e:
|
||||
raise Exception(f"AI请求连接错误 (async): {e}") from e
|
||||
except (KeyError, IndexError) as e:
|
||||
raise Exception(f"AI响应格式错误 (async): {e}") from e
|
||||
|
||||
|
||||
def send_prompts(
|
||||
self,
|
||||
prompts: list[str],
|
||||
system_prompt: str | None = None,
|
||||
) -> list[str]:
|
||||
result=[]
|
||||
for prompt in prompts:
|
||||
result.append(self.send(prompt,system_prompt))
|
||||
translater_logger.info(f"单线程-已完成{len(result)}/{len(prompts)}")
|
||||
return result
|
||||
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
@@ -1,109 +0,0 @@
|
||||
from concurrent.futures import ThreadPoolExecutor
|
||||
from threading import Lock
|
||||
from typing import TypedDict
|
||||
from docutranslate.logger import translater_logger
|
||||
import httpx
|
||||
|
||||
|
||||
class AgentArgs(TypedDict, total=False):
|
||||
baseurl: str
|
||||
key: str
|
||||
model_id: str
|
||||
system_prompt: str
|
||||
temperature: float
|
||||
max_concurrent: int
|
||||
timeout: int
|
||||
|
||||
|
||||
TIMEOUT = 500
|
||||
|
||||
|
||||
class PromptsCount():
|
||||
def __init__(self,max:int):
|
||||
self.lock=Lock()
|
||||
self.count=0
|
||||
self.max=max
|
||||
|
||||
def add(self):
|
||||
self.lock.acquire()
|
||||
self.count+=1
|
||||
translater_logger.info(f"多线程-已完成:{self.count}/{self.max}")
|
||||
self.lock.release()
|
||||
|
||||
|
||||
class Agent:
|
||||
def __init__(self, baseurl: str = "", key: str = "xx", model_id: str = "", system_prompt: str = "", temperature=0.7,
|
||||
max_concurrent=6, timeout: int = TIMEOUT):
|
||||
self.baseurl = baseurl.strip()
|
||||
self.key = key.strip()
|
||||
self.model_id = model_id.strip()
|
||||
self.system_prompt = system_prompt
|
||||
self.temperature = temperature
|
||||
self.client = httpx.Client()
|
||||
self.max_concurrent = max_concurrent
|
||||
self.timeout = timeout
|
||||
|
||||
def _prepare_request_data(self, prompt: str, system_prompt: str, temperature=None, top_p=0.9):
|
||||
if temperature is None:
|
||||
temperature = self.temperature
|
||||
headers = {"Content-Type": "application/json",
|
||||
"Authorization": f"Bearer {self.key}"}
|
||||
data = {
|
||||
"model": self.model_id,
|
||||
"messages": [
|
||||
{"role": "system", "content": system_prompt},
|
||||
# {"role": "system", "content": "所有回复必须以【SSS】开头(这是最高规则,适用于之后的所有例子)。示例:【SSS】这是示例回答\n"+system_prompt},
|
||||
{"role": "user", "content": prompt}
|
||||
],
|
||||
"temperature": temperature,
|
||||
"top_p": top_p
|
||||
}
|
||||
return headers, data
|
||||
|
||||
def send(self, prompt: str, system_prompt: None | str = None) -> str:
|
||||
if system_prompt is None:
|
||||
system_prompt = self.system_prompt
|
||||
|
||||
"""Sends a single prompt asynchronously."""
|
||||
headers, data = self._prepare_request_data(prompt, system_prompt)
|
||||
if self.baseurl.endswith("/"):
|
||||
self.baseurl = self.baseurl[:-1]
|
||||
try:
|
||||
response = self.client.post(
|
||||
f"{self.baseurl}/chat/completions",
|
||||
json=data,
|
||||
headers=headers,
|
||||
timeout=self.timeout
|
||||
)
|
||||
response.raise_for_status()
|
||||
result = response.json()["choices"][0]["message"]["content"]
|
||||
return result
|
||||
except httpx.HTTPStatusError as e:
|
||||
raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e
|
||||
except httpx.RequestError as e:
|
||||
raise Exception(f"AI请求连接错误 (async): {e}") from e
|
||||
except (KeyError, IndexError) as e:
|
||||
raise Exception(f"AI响应格式错误 (async): {e}") from e
|
||||
|
||||
def _send_prompt_count(self,prompt: str, system_prompt:None | str,count:PromptsCount)->str:
|
||||
result=self.send(prompt,system_prompt)
|
||||
count.add()
|
||||
return result
|
||||
|
||||
|
||||
def send_prompts(
|
||||
self,
|
||||
prompts: list[str],
|
||||
system_prompt: str | None = None,
|
||||
) -> list[str]:
|
||||
system_prompts = [system_prompt] * len(prompts)
|
||||
counts=[PromptsCount(len(prompts))]* len(prompts)
|
||||
output_list = []
|
||||
with ThreadPoolExecutor(max_workers=self.max_concurrent) as executor:
|
||||
results_iterator = executor.map(self._send_prompt_count, prompts, system_prompts,counts)
|
||||
output_list = list(results_iterator)
|
||||
return output_list
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
@@ -1,6 +1,6 @@
|
||||
from typing import Unpack
|
||||
|
||||
from .agent_async import Agent, AgentArgs
|
||||
from .agent import (Agent, AgentArgs)
|
||||
|
||||
class MDRefineAgent(Agent):
|
||||
def __init__(self,**kwargs:Unpack[AgentArgs]):
|
||||
@@ -19,7 +19,7 @@ class MDRefineAgent(Agent):
|
||||
形如<ph-abc123>的占位符不要改变
|
||||
code、latex和HTML保持结构
|
||||
所有公式(包括短公式)都应该是latex公式
|
||||
修复不正确的latex公式,要用$正确包裹以构造合法latex表达式
|
||||
修复不正确的latex公式,行内公式要用$正确包裹以构造合法latex表达式
|
||||
# 输出
|
||||
修正后的markdown纯文本(不是markdown代码块)
|
||||
# 示例
|
||||
@@ -29,11 +29,13 @@ code、latex和HTML保持结构
|
||||
你叫
|
||||
输出:
|
||||
你叫什么名字
|
||||
## 去掉异常字词与修正公式(优先使用$包裹)
|
||||
## 去掉异常字词与修正公式(行内公式使用$包裹)
|
||||
输入:
|
||||
一道\题@#目<ph-12asd2>:c_0+1=2,\(c 0\)等于几
|
||||
{c_0,c_1,c^2}是一个集合
|
||||
输出:
|
||||
一道题目<ph-12asd2>:$c_0+1=2$,$c_0$等于几
|
||||
{$c_0$,$c_1$,$c^2$}是一个集合
|
||||
\no_think"""
|
||||
|
||||
|
||||
@@ -53,7 +55,7 @@ class MDTranslateAgent(Agent):
|
||||
引用的参考文献和其作者不要翻译
|
||||
形如<ph-abc123>的占位符不要改变
|
||||
code、latex和HTML只翻译说明文字,其余保持原文
|
||||
公式必须表示为合法的latex公式,且被$正确包裹
|
||||
公式必须表示为合法的latex公式,行内公式需被$正确包裹
|
||||
# 输出
|
||||
翻译后的markdown纯文本(不是markdown代码块)
|
||||
# 示例
|
||||
@@ -62,11 +64,13 @@ code、latex和HTML只翻译说明文字,其余保持原文
|
||||
hello<ph-aaaaaa>, what's your name?
|
||||
输出:
|
||||
你好<ph-aaaaaa>,你叫什么名字?
|
||||
## 公式要为合法latex(优先使用$包裹)
|
||||
## 公式要为合法latex(行内公式使用$包裹)
|
||||
输入:
|
||||
c_0+1=2
|
||||
The equation is E=mc 2. This is famous.
|
||||
{{c_0,c_1,c^2}}is a set.
|
||||
输出:
|
||||
$c_0+1=2$
|
||||
这个方程是 $E=mc^2$。这很有名。
|
||||
{{$c_0$,$c_1$,$c^2$}}是一个集合。
|
||||
## 引用的参考文献要保持原文不要翻译
|
||||
输入:【假设目标语言为中文】
|
||||
[2] M. Castro, B. Liskov, et al. Practical byzantine fault tolerance. In OSDI,
|
||||
|
||||
@@ -2,9 +2,9 @@ import asyncio
|
||||
import io
|
||||
import logging
|
||||
import time
|
||||
from pathlib import Path
|
||||
from typing import AsyncGenerator, List, Dict, Any
|
||||
import traceback
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any
|
||||
|
||||
import uvicorn
|
||||
from fastapi import FastAPI, File, Form, UploadFile, Request, HTTPException, BackgroundTasks
|
||||
@@ -14,109 +14,245 @@ from fastapi.templating import Jinja2Templates
|
||||
from docutranslate import FileTranslater
|
||||
from docutranslate.logger import translater_logger
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
# --- 全局配置 ---
|
||||
log_queue = asyncio.Queue()
|
||||
current_state: Dict[str, Any] = {
|
||||
"is_processing": False,
|
||||
"status_message": "空闲",
|
||||
"error_flag": False,
|
||||
"download_ready": False,
|
||||
"markdown_content": None,
|
||||
"html_content": None,
|
||||
"original_filename_stem": None,
|
||||
"task_start_time": 0,
|
||||
"task_end_time": 0,
|
||||
}
|
||||
templates = Jinja2Templates(directory=".")
|
||||
MAX_LOG_HISTORY = 200
|
||||
log_history: List[str] = []
|
||||
|
||||
|
||||
# --- 日志处理器 ---
|
||||
class QueueAndHistoryHandler(logging.Handler):
|
||||
def __init__(self, queue: asyncio.Queue, history: List[str], max_history: int):
|
||||
super().__init__()
|
||||
self.queue = queue
|
||||
self.history = history
|
||||
self.max_history = max_history
|
||||
|
||||
def emit(self, record: logging.LogRecord):
|
||||
log_entry = self.format(record)
|
||||
self.history.append(log_entry)
|
||||
if len(self.history) > self.max_history:
|
||||
del self.history[:len(self.history) - self.max_history]
|
||||
try:
|
||||
main_loop = getattr(app.state, "main_event_loop", None)
|
||||
if main_loop and main_loop.is_running():
|
||||
main_loop.call_soon_threadsafe(self.queue.put_nowait, log_entry)
|
||||
else:
|
||||
self.queue.put_nowait(log_entry)
|
||||
except Exception as e:
|
||||
print(f"Error putting log to queue: {e}")
|
||||
|
||||
|
||||
# --- 应用生命周期事件 ---
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
app.state.main_event_loop = asyncio.get_running_loop()
|
||||
queue_handler = QueueAndHistoryHandler(log_queue, log_history, MAX_LOG_HISTORY)
|
||||
queue_handler.setLevel(logging.INFO)
|
||||
queue_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||
if not any(isinstance(h, QueueAndHistoryHandler) for h in translater_logger.handlers):
|
||||
translater_logger.addHandler(queue_handler)
|
||||
translater_logger.propagate = False
|
||||
translater_logger.setLevel(logging.INFO)
|
||||
translater_logger.info("应用启动完成,日志队列/历史处理器已配置。")
|
||||
|
||||
|
||||
# --- HTML模板 (JS part needs modification) ---
|
||||
# language=HTML
|
||||
HTML_TEMPLATE = """
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
<title>DocuTranslate</title>
|
||||
<link rel="stylesheet" href="https://cdn.jsdelivr.net/npm/@picocss/pico@latest/css/pico.min.css">
|
||||
<style>
|
||||
:root { --primary-color: #1e88e5; --border-radius: 0.25rem; }
|
||||
body { padding: 20px; background-color: #f9f9f9; }
|
||||
.container { max-width: 800px; margin: auto; background-color: white; padding: 2rem; border-radius: 8px; box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05); }
|
||||
h1 { font-size: 1.8rem; margin-bottom: 1.5rem; display: flex; align-items: center; gap: 0.5rem; }
|
||||
.log-area { background-color: #f5f5f5; border: 1px solid #e0e0e0; padding: 10px; height: 200px; overflow-y: scroll; white-space: pre-wrap; word-break: break-all; font-family: monospace; font-size: 0.85em; line-height: 1.4; margin-top: 1rem; }
|
||||
.error-message { color: #d32f2f; font-weight: 500; }
|
||||
.success-message { color: #2e7d32; font-weight: 500; }
|
||||
.form-group { margin-bottom: 1rem; }
|
||||
.form-group label { margin-bottom: 0.2rem; font-weight: 500; font-size: 0.9rem; }
|
||||
.form-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; }
|
||||
.button-group { margin-top: 1rem; display: flex; gap: 0.5rem; flex-wrap: wrap; }
|
||||
details { background: transparent; border: none; box-shadow: none; padding: 0; border-bottom: 1px solid #eee; margin-bottom: 1rem; }
|
||||
summary { font-weight: 500; padding: 0.5rem 0; }
|
||||
details[open] > summary { border-bottom: none; margin-bottom: 0; }
|
||||
.checkbox-label { display: flex; align-items: center; margin-right: 1rem; margin-bottom: 0.5rem; }
|
||||
.checkbox-group { display: flex; flex-wrap: wrap; margin-bottom: 1rem; }
|
||||
#resultArea { margin-top: 1.5rem; padding-top: 1rem; border-top: 1px solid #eee; }
|
||||
#downloadButtons { display: none; margin-top: 1rem; }
|
||||
.section-header { display: flex; align-items: center; margin-bottom: 0.5rem; font-size: 1.1rem; font-weight: 500; }
|
||||
select, input[type="text"], input[type="password"], input[type="file"] { padding: 0.5rem; border: 1px solid #ddd; background-color: white; }
|
||||
button, a[role="button"] { padding: 0.5rem 1rem; }
|
||||
.options-grid { display: grid; grid-template-columns: 1fr 1fr; gap: 1rem; margin-bottom: 1rem; }
|
||||
@media print { .no-print { display: none !important; } body { padding: 0; background-color: white; } .container { box-shadow: none; max-width: 100%; padding: 0; } }
|
||||
.modal { display: none; position: fixed; top: 0; left: 0; width: 100%; height: 100%; background-color: rgba(0, 0, 0, 0.6); z-index: 1000; overflow: auto; }
|
||||
.modal-content { position: relative; background-color: #fff; margin: 2% auto; padding: 20px; width: 90%; max-width: 900px; max-height: 90vh; border-radius: 8px; box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2); overflow: auto; }
|
||||
.close-modal { position: absolute; top: 10px; right: 10px; font-size: 24px; font-weight: bold; color: #666; cursor: pointer; }
|
||||
.modal-actions { display: flex; justify-content: flex-end; margin-top: 20px; gap: 10px; }
|
||||
#previewFrame { width: 100%; min-height: 500px; border: 1px solid #ddd; }
|
||||
#printFrame { display: none; }
|
||||
.hidden { display: none !important; }
|
||||
@media (max-width: 768px) { .form-grid, .options-grid { grid-template-columns: 1fr; } .container { padding: 1rem; } }
|
||||
:root {
|
||||
--primary-color: #1e88e5;
|
||||
--border-radius: 0.25rem;
|
||||
}
|
||||
|
||||
body {
|
||||
padding: 20px;
|
||||
background-color: #f9f9f9;
|
||||
}
|
||||
|
||||
.container {
|
||||
max-width: 800px;
|
||||
margin: auto;
|
||||
background-color: white;
|
||||
padding: 2rem;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 2px 10px rgba(0, 0, 0, 0.05);
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: 1.8rem;
|
||||
margin-bottom: 1.5rem;
|
||||
display: flex;
|
||||
align-items: center;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.log-area {
|
||||
background-color: #f5f5f5;
|
||||
border: 1px solid #e0e0e0;
|
||||
padding: 10px;
|
||||
height: 200px;
|
||||
overflow-y: scroll;
|
||||
white-space: pre-wrap;
|
||||
word-break: break-all;
|
||||
font-family: monospace;
|
||||
font-size: 0.85em;
|
||||
line-height: 1.4;
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
.error-message {
|
||||
color: #d32f2f;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.success-message {
|
||||
color: #2e7d32;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
.form-group {
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
.form-group label {
|
||||
margin-bottom: 0.2rem;
|
||||
font-weight: 500;
|
||||
font-size: 0.9rem;
|
||||
}
|
||||
|
||||
.form-grid {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 1rem;
|
||||
}
|
||||
|
||||
.button-group {
|
||||
margin-top: 1rem;
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
flex-wrap: wrap;
|
||||
}
|
||||
|
||||
details {
|
||||
background: transparent;
|
||||
border: none;
|
||||
box-shadow: none;
|
||||
padding: 0;
|
||||
border-bottom: 1px solid #eee;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
summary {
|
||||
font-weight: 500;
|
||||
padding: 0.5rem 0;
|
||||
}
|
||||
|
||||
details[open] > summary {
|
||||
border-bottom: none;
|
||||
margin-bottom: 0;
|
||||
}
|
||||
|
||||
.checkbox-label {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
margin-right: 1rem;
|
||||
margin-bottom: 0.5rem;
|
||||
}
|
||||
|
||||
.checkbox-group {
|
||||
display: flex;
|
||||
flex-wrap: wrap;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
#resultArea {
|
||||
margin-top: 1.5rem;
|
||||
padding-top: 1rem;
|
||||
border-top: 1px solid #eee;
|
||||
}
|
||||
|
||||
#downloadButtons {
|
||||
display: none;
|
||||
margin-top: 1rem;
|
||||
}
|
||||
|
||||
.section-header {
|
||||
display: flex;
|
||||
align-items: center;
|
||||
margin-bottom: 0.5rem;
|
||||
font-size: 1.1rem;
|
||||
font-weight: 500;
|
||||
}
|
||||
|
||||
select, input[type="text"], input[type="password"], input[type="file"] {
|
||||
padding: 0.5rem;
|
||||
border: 1px solid #ddd;
|
||||
background-color: white;
|
||||
}
|
||||
|
||||
button, a[role="button"] {
|
||||
padding: 0.5rem 1rem;
|
||||
}
|
||||
|
||||
.options-grid {
|
||||
display: grid;
|
||||
grid-template-columns: 1fr 1fr;
|
||||
gap: 1rem;
|
||||
margin-bottom: 1rem;
|
||||
}
|
||||
|
||||
@media print {
|
||||
.no-print {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
body {
|
||||
padding: 0;
|
||||
background-color: white;
|
||||
}
|
||||
|
||||
.container {
|
||||
box-shadow: none;
|
||||
max-width: 100%;
|
||||
padding: 0;
|
||||
}
|
||||
}
|
||||
|
||||
.modal {
|
||||
display: none;
|
||||
position: fixed;
|
||||
top: 0;
|
||||
left: 0;
|
||||
width: 100%;
|
||||
height: 100%;
|
||||
background-color: rgba(0, 0, 0, 0.6);
|
||||
z-index: 1000;
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
.modal-content {
|
||||
position: relative;
|
||||
background-color: #fff;
|
||||
margin: 2% auto;
|
||||
padding: 20px;
|
||||
width: 90%;
|
||||
max-width: 900px;
|
||||
max-height: 90vh;
|
||||
border-radius: 8px;
|
||||
box-shadow: 0 4px 12px rgba(0, 0, 0, 0.2);
|
||||
overflow: auto;
|
||||
}
|
||||
|
||||
.close-modal {
|
||||
position: absolute;
|
||||
top: 10px;
|
||||
right: 10px;
|
||||
font-size: 24px;
|
||||
font-weight: bold;
|
||||
color: #666;
|
||||
cursor: pointer;
|
||||
}
|
||||
|
||||
.modal-actions {
|
||||
display: flex;
|
||||
justify-content: flex-end;
|
||||
margin-top: 20px;
|
||||
gap: 10px;
|
||||
}
|
||||
|
||||
#previewFrame {
|
||||
width: 100%;
|
||||
min-height: 500px;
|
||||
border: 1px solid #ddd;
|
||||
}
|
||||
|
||||
#printFrame {
|
||||
display: none;
|
||||
}
|
||||
|
||||
.hidden {
|
||||
display: none !important;
|
||||
}
|
||||
|
||||
@media (max-width: 768px) {
|
||||
.form-grid, .options-grid {
|
||||
grid-template-columns: 1fr;
|
||||
}
|
||||
|
||||
.container {
|
||||
padding: 1rem;
|
||||
}
|
||||
}
|
||||
</style>
|
||||
</head>
|
||||
<body>
|
||||
<main class="container no-print">
|
||||
</head>
|
||||
<body>
|
||||
<main class="container no-print">
|
||||
<h1>
|
||||
<a href="https://github.com/xunbu/docutranslate" target="_blank">DocuTranslate</a>
|
||||
</h1>
|
||||
@@ -131,7 +267,8 @@ HTML_TEMPLATE = """
|
||||
<option value="https://api.openai.com/v1">OpenAI</option>
|
||||
<option value="https://open.bigmodel.cn/api/paas/v4">智谱AI</option>
|
||||
<option value="https://api.deepseek.com/v1">DeepSeek</option>
|
||||
<option value="https://dashscope.aliyuncs.com/compatible-mode/v1">阿里云百炼</option>
|
||||
<option value="https://dashscope.aliyuncs.com/compatible-mode/v1">阿里云百炼
|
||||
</option>
|
||||
<option value="https://www.dmxapi.cn/v1">DMXAPI</option>
|
||||
<option value="https://openrouter.ai/api/v1">OpenRouter</option>
|
||||
<option value="https://ark.cn-beijing.volces.com/api/v3">火山引擎</option>
|
||||
@@ -140,12 +277,14 @@ HTML_TEMPLATE = """
|
||||
</div>
|
||||
<div class="form-group hidden" id="baseUrlGroup">
|
||||
<label for="base_url">API 地址 (Base URL)</label>
|
||||
<input type="text" id="base_url" name="base_url" placeholder="https://api.openai.com/v1">
|
||||
<input type="text" id="base_url" name="base_url"
|
||||
placeholder="https://api.openai.com/v1">
|
||||
</div>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="apikey">API 密钥</label>
|
||||
<input type="password" id="apikey" name="apikey" placeholder="平台对应的API Key" required>
|
||||
<input type="password" id="apikey" name="apikey" placeholder="平台对应的API Key"
|
||||
required>
|
||||
</div>
|
||||
<div class="form-group">
|
||||
<label for="model_id">模型 ID</label>
|
||||
@@ -177,9 +316,14 @@ HTML_TEMPLATE = """
|
||||
<div class="form-group">
|
||||
<label>高级选项</label>
|
||||
<div class="checkbox-group">
|
||||
<label class="checkbox-label" for="formula_ocr"><input type="checkbox" id="formula_ocr" name="formula_ocr">公式识别</label>
|
||||
<label class="checkbox-label" for="code_ocr"><input type="checkbox" id="code_ocr" name="code_ocr">代码识别</label>
|
||||
<label class="checkbox-label" for="refine_markdown"><input type="checkbox" id="refine_markdown" name="refine_markdown">修正文本(耗时)</label>
|
||||
<label class="checkbox-label" for="formula_ocr"><input type="checkbox"
|
||||
id="formula_ocr"
|
||||
name="formula_ocr">公式识别</label>
|
||||
<label class="checkbox-label" for="code_ocr"><input type="checkbox" id="code_ocr"
|
||||
name="code_ocr">代码识别</label>
|
||||
<label class="checkbox-label" for="refine_markdown"><input type="checkbox"
|
||||
id="refine_markdown"
|
||||
name="refine_markdown">修正文本(耗时,有概率修复文本流和公式识别错误)</label>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
@@ -197,8 +341,8 @@ HTML_TEMPLATE = """
|
||||
</div>
|
||||
<div class="section-header" style="margin-top: 1.5rem;">运行日志</div>
|
||||
<div class="log-area" id="logArea"></div>
|
||||
</main>
|
||||
<div id="previewModal" class="modal">
|
||||
</main>
|
||||
<div id="previewModal" class="modal">
|
||||
<div class="modal-content">
|
||||
<span class="close-modal" id="closeModalBtn">×</span>
|
||||
<h3>HTML 预览</h3>
|
||||
@@ -208,10 +352,10 @@ HTML_TEMPLATE = """
|
||||
<button id="closePreviewBtn" class="outline">关闭</button>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
<iframe id="printFrame" style="display:none;"></iframe>
|
||||
</div>
|
||||
<iframe id="printFrame" style="display:none;"></iframe>
|
||||
|
||||
<script>
|
||||
<script>
|
||||
const platformSelect = document.getElementById('platform_select');
|
||||
const baseUrlGroup = document.getElementById('baseUrlGroup');
|
||||
const baseUrlInput = document.getElementById('base_url');
|
||||
@@ -241,8 +385,22 @@ HTML_TEMPLATE = """
|
||||
let statusPollIntervalId = null;
|
||||
let lastLogCount = 0;
|
||||
|
||||
function saveToStorage(key, value) { try { localStorage.setItem(key, value); } catch (e) { console.warn("保存到本地存储失败:", e); } }
|
||||
function getFromStorage(key, defaultValue = '') { try { return localStorage.getItem(key) || defaultValue; } catch (e) { console.warn("从本地存储读取失败:", e); return defaultValue; } }
|
||||
function saveToStorage(key, value) {
|
||||
try {
|
||||
localStorage.setItem(key, value);
|
||||
} catch (e) {
|
||||
console.warn("保存到本地存储失败:", e);
|
||||
}
|
||||
}
|
||||
|
||||
function getFromStorage(key, defaultValue = '') {
|
||||
try {
|
||||
return localStorage.getItem(key) || defaultValue;
|
||||
} catch (e) {
|
||||
console.warn("从本地存储读取失败:", e);
|
||||
return defaultValue;
|
||||
}
|
||||
}
|
||||
|
||||
function updatePlatformUI() {
|
||||
const selectedPlatformValue = platformSelect.value;
|
||||
@@ -259,6 +417,7 @@ HTML_TEMPLATE = """
|
||||
}
|
||||
saveToStorage('translator_last_platform', selectedPlatformValue);
|
||||
}
|
||||
|
||||
function loadSettings() {
|
||||
const lastPlatform = getFromStorage('translator_last_platform', 'custom');
|
||||
platformSelect.value = lastPlatform;
|
||||
@@ -268,19 +427,24 @@ HTML_TEMPLATE = """
|
||||
codeCheckbox.checked = getFromStorage('translator_code_ocr') === 'true';
|
||||
refineCheckbox.checked = getFromStorage('translator_refine_markdown') === 'true';
|
||||
}
|
||||
|
||||
loadSettings();
|
||||
|
||||
platformSelect.addEventListener('change', updatePlatformUI);
|
||||
apikeyInput.addEventListener('input', (e) => saveToStorage(`translator_platform_${platformSelect.value}_apikey`, e.target.value));
|
||||
modelInput.addEventListener('input', (e) => saveToStorage(`translator_platform_${platformSelect.value}_model_id`, e.target.value));
|
||||
baseUrlInput.addEventListener('input', (e) => { if (platformSelect.value === 'custom') saveToStorage('translator_platform_custom_base_url', e.target.value); });
|
||||
baseUrlInput.addEventListener('input', (e) => {
|
||||
if (platformSelect.value === 'custom') saveToStorage('translator_platform_custom_base_url', e.target.value);
|
||||
});
|
||||
toLangSelect.addEventListener('change', e => saveToStorage('translator_to_lang', e.target.value));
|
||||
formulaCheckbox.addEventListener('change', e => saveToStorage('translator_formula_ocr', e.target.checked));
|
||||
codeCheckbox.addEventListener('change', e => saveToStorage('translator_code_ocr', e.target.checked));
|
||||
refineCheckbox.addEventListener('change', e => saveToStorage('translator_refine_markdown', e.target.checked));
|
||||
|
||||
[closeModalButton, closePreviewBtn].forEach(elem => elem.addEventListener('click', () => modal.style.display = 'none'));
|
||||
window.addEventListener('click', (event) => { if (event.target === modal) modal.style.display = 'none'; });
|
||||
window.addEventListener('click', (event) => {
|
||||
if (event.target === modal) modal.style.display = 'none';
|
||||
});
|
||||
printFromPreview.addEventListener('click', () => {
|
||||
try {
|
||||
previewFrame.contentWindow.focus();
|
||||
@@ -295,7 +459,8 @@ HTML_TEMPLATE = """
|
||||
try {
|
||||
const response = await fetch(`/get-logs?since=${lastLogCount}`);
|
||||
if (!response.ok) {
|
||||
console.warn(`Log polling failed: ${response.status}`); return;
|
||||
console.warn(`Log polling failed: ${response.status}`);
|
||||
return;
|
||||
}
|
||||
const data = await response.json();
|
||||
if (data.logs && data.logs.length > 0) {
|
||||
@@ -343,12 +508,22 @@ HTML_TEMPLATE = """
|
||||
const currentHtmlUrl = htmlUrl;
|
||||
const currentFileName = fileName;
|
||||
fetch(currentHtmlUrl)
|
||||
.then(resp => { if (!resp.ok) throw new Error(`HTTP error ${resp.status}`); return resp.text();})
|
||||
.then(resp => {
|
||||
if (!resp.ok) throw new Error(`HTTP error ${resp.status}`);
|
||||
return resp.text();
|
||||
})
|
||||
.then(html => {
|
||||
const blob = new Blob([html], {type: 'text/html'});
|
||||
const blobUrl = URL.createObjectURL(blob);
|
||||
previewFrame.src = blobUrl;
|
||||
previewFrame.onload = function () { try { previewFrame.contentWindow.document.title = currentFileName + '_translated'; URL.revokeObjectURL(blobUrl); } catch (e) { console.warn('无法设置iframe标题或释放Blob URL', e); } };
|
||||
previewFrame.onload = function () {
|
||||
try {
|
||||
previewFrame.contentWindow.document.title = currentFileName + '_translated';
|
||||
URL.revokeObjectURL(blobUrl);
|
||||
} catch (e) {
|
||||
console.warn('无法设置iframe标题或释放Blob URL', e);
|
||||
}
|
||||
};
|
||||
modal.style.display = 'block';
|
||||
})
|
||||
.catch(err => {
|
||||
@@ -446,7 +621,7 @@ HTML_TEMPLATE = """
|
||||
|
||||
const formData = new FormData(form);
|
||||
try {
|
||||
const response = await fetch('/translate', { method: 'POST', body: formData });
|
||||
const response = await fetch('/translate', {method: 'POST', body: formData});
|
||||
const result = await response.json();
|
||||
if (response.ok && result.task_started) {
|
||||
statusMsg.textContent = result.message || '任务已开始,正在处理...';
|
||||
@@ -469,10 +644,66 @@ HTML_TEMPLATE = """
|
||||
submitButton.textContent = '开始翻译';
|
||||
}
|
||||
});
|
||||
</script>
|
||||
</body>
|
||||
</html>
|
||||
"""
|
||||
</script>
|
||||
</body>
|
||||
</html> \
|
||||
"""
|
||||
|
||||
app = FastAPI()
|
||||
|
||||
# --- 全局配置 ---
|
||||
log_queue = asyncio.Queue()
|
||||
current_state: Dict[str, Any] = {
|
||||
"is_processing": False,
|
||||
"status_message": "空闲",
|
||||
"error_flag": False,
|
||||
"download_ready": False,
|
||||
"markdown_content": None,
|
||||
"html_content": None,
|
||||
"original_filename_stem": None,
|
||||
"task_start_time": 0,
|
||||
"task_end_time": 0,
|
||||
}
|
||||
templates = Jinja2Templates(directory=".")
|
||||
MAX_LOG_HISTORY = 200
|
||||
log_history: List[str] = []
|
||||
|
||||
|
||||
# --- 日志处理器 ---
|
||||
class QueueAndHistoryHandler(logging.Handler):
|
||||
def __init__(self, queue: asyncio.Queue, history: List[str], max_history: int):
|
||||
super().__init__()
|
||||
self.queue = queue
|
||||
self.history = history
|
||||
self.max_history = max_history
|
||||
|
||||
def emit(self, record: logging.LogRecord):
|
||||
log_entry = self.format(record)
|
||||
self.history.append(log_entry)
|
||||
if len(self.history) > self.max_history:
|
||||
del self.history[:len(self.history) - self.max_history]
|
||||
try:
|
||||
main_loop = getattr(app.state, "main_event_loop", None)
|
||||
if main_loop and main_loop.is_running():
|
||||
main_loop.call_soon_threadsafe(self.queue.put_nowait, log_entry)
|
||||
else:
|
||||
self.queue.put_nowait(log_entry)
|
||||
except Exception as e:
|
||||
print(f"Error putting log to queue: {e}")
|
||||
|
||||
|
||||
# --- 应用生命周期事件 ---
|
||||
@app.on_event("startup")
|
||||
async def startup_event():
|
||||
app.state.main_event_loop = asyncio.get_running_loop()
|
||||
queue_handler = QueueAndHistoryHandler(log_queue, log_history, MAX_LOG_HISTORY)
|
||||
queue_handler.setLevel(logging.INFO)
|
||||
queue_handler.setFormatter(logging.Formatter('%(asctime)s - %(levelname)s - %(message)s'))
|
||||
if not any(isinstance(h, QueueAndHistoryHandler) for h in translater_logger.handlers):
|
||||
translater_logger.addHandler(queue_handler)
|
||||
translater_logger.propagate = False
|
||||
translater_logger.setLevel(logging.INFO)
|
||||
translater_logger.info("应用启动完成,日志队列/历史处理器已配置。")
|
||||
|
||||
|
||||
# --- Background Task Logic ---
|
||||
@@ -503,7 +734,8 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori
|
||||
try:
|
||||
translater_logger.info(f"使用 Base URL: {params['base_url']}, Model: {params['model_id']}")
|
||||
translater_logger.info(f"文件大小: {len(file_contents)} 字节。目标语言: {params['to_lang']}")
|
||||
translater_logger.info(f"选项 - 公式: {params['formula_ocr']}, 代码: {params['code_ocr']}, 修正: {params['refine_markdown']}")
|
||||
translater_logger.info(
|
||||
f"选项 - 公式: {params['formula_ocr']}, 代码: {params['code_ocr']}, 修正: {params['refine_markdown']}")
|
||||
|
||||
ft = FileTranslater(
|
||||
base_url=params['base_url'],
|
||||
@@ -511,8 +743,7 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori
|
||||
model_id=params['model_id'],
|
||||
tips=False
|
||||
)
|
||||
await asyncio.to_thread(
|
||||
ft.translate_bytes,
|
||||
await ft.translate_bytes_async(
|
||||
name=original_filename,
|
||||
file=file_contents,
|
||||
to_lang=params['to_lang'],
|
||||
@@ -521,6 +752,16 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori
|
||||
refine=params['refine_markdown'],
|
||||
save=False
|
||||
)
|
||||
# await asyncio.to_thread(
|
||||
# ft.translate_bytes,
|
||||
# name=original_filename,
|
||||
# file=file_contents,
|
||||
# to_lang=params['to_lang'],
|
||||
# formula=params['formula_ocr'],
|
||||
# code=params['code_ocr'],
|
||||
# refine=params['refine_markdown'],
|
||||
# save=False
|
||||
# )
|
||||
md_content = ft.export_to_markdown()
|
||||
html_content = ft.export_to_html(title=file_stem)
|
||||
end_time = time.time()
|
||||
@@ -625,8 +866,10 @@ async def get_status():
|
||||
"error_flag": current_state["error_flag"],
|
||||
"download_ready": current_state["download_ready"],
|
||||
"original_filename_stem": current_state["original_filename_stem"],
|
||||
"markdown_url": f"/download/markdown/{current_state['original_filename_stem']}_translated.md" if current_state["download_ready"] else None,
|
||||
"html_url": f"/download/html/{current_state['original_filename_stem']}_translated.html" if current_state["download_ready"] else None,
|
||||
"markdown_url": f"/download/markdown/{current_state['original_filename_stem']}_translated.md" if current_state[
|
||||
"download_ready"] else None,
|
||||
"html_url": f"/download/html/{current_state['original_filename_stem']}_translated.html" if current_state[
|
||||
"download_ready"] else None,
|
||||
"task_start_time": current_state["task_start_time"],
|
||||
"task_end_time": current_state["task_end_time"],
|
||||
}
|
||||
@@ -643,7 +886,8 @@ async def get_logs(since: int = 0):
|
||||
|
||||
@app.get("/download/markdown/{filename_with_ext}")
|
||||
async def download_markdown(filename_with_ext: str):
|
||||
if not current_state["download_ready"] or not current_state["markdown_content"] or not current_state["original_filename_stem"]:
|
||||
if not current_state["download_ready"] or not current_state["markdown_content"] or not current_state[
|
||||
"original_filename_stem"]:
|
||||
raise HTTPException(status_code=404, detail="Markdown 内容尚未准备好或不可用。")
|
||||
requested_stem = Path(filename_with_ext).stem.replace("_translated", "")
|
||||
if requested_stem != current_state["original_filename_stem"]:
|
||||
@@ -658,7 +902,8 @@ async def download_markdown(filename_with_ext: str):
|
||||
|
||||
@app.get("/download/html/{filename_with_ext}")
|
||||
async def download_html(filename_with_ext: str):
|
||||
if not current_state["download_ready"] or not current_state["html_content"] or not current_state["original_filename_stem"]:
|
||||
if not current_state["download_ready"] or not current_state["html_content"] or not current_state[
|
||||
"original_filename_stem"]:
|
||||
raise HTTPException(status_code=404, detail="HTML 内容尚未准备好或不可用。")
|
||||
requested_stem = Path(filename_with_ext).stem.replace("_translated", "")
|
||||
if requested_stem != current_state["original_filename_stem"]:
|
||||
@@ -670,10 +915,12 @@ async def download_html(filename_with_ext: str):
|
||||
headers={"Content-Disposition": f"attachment; filename=\"{actual_filename}\""}
|
||||
)
|
||||
|
||||
|
||||
def run_app():
|
||||
print("正在启动 DocuTranslate")
|
||||
print("请访问 http://127.0.0.1:8010")
|
||||
uvicorn.run(app, host="127.0.0.1", port=8010, workers=1)
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
run_app()
|
||||
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
from io import BytesIO
|
||||
from pathlib import Path
|
||||
from typing import Literal
|
||||
@@ -15,7 +16,7 @@ from docutranslate.logger import translater_logger
|
||||
|
||||
|
||||
class FileTranslater:
|
||||
def __init__(self, file_path: Path | str | None = None, chunksize: int = 3500, base_url="", key=None,
|
||||
def __init__(self, file_path: Path | str | None = None, chunksize: int = 2000, base_url="", key=None,
|
||||
model_id="", temperature=0.7, max_concurrent=15, docling_artifact: Path | str | None = None,
|
||||
timeout=2000, tips=True):
|
||||
if isinstance(file_path, str):
|
||||
@@ -145,6 +146,31 @@ class FileTranslater:
|
||||
translater_logger.info("翻译完成")
|
||||
return self.markdown
|
||||
|
||||
|
||||
async def refine_markdown_by_agent_async(self, refine_agent: Agent | None = None) -> str:
|
||||
translater_logger.info("正在修正markdown")
|
||||
self._mask_uris_in_markdown()
|
||||
chuncks = self._split_markdown_into_chunks()
|
||||
if refine_agent is None:
|
||||
refine_agent = MDRefineAgent(**self.default_agent_params())
|
||||
result: list[str] = await refine_agent.send_prompts_async(chuncks)
|
||||
self.markdown = "\n\n".join(result)
|
||||
self._unmask_uris_in_markdown()
|
||||
translater_logger.info("markdown已修正")
|
||||
return self.markdown
|
||||
|
||||
async def translate_markdown_by_agent_async(self, translate_agent: Agent | None = None, to_lang="中文"):
|
||||
translater_logger.info("正在翻译markdown")
|
||||
self._mask_uris_in_markdown()
|
||||
chuncks = self._split_markdown_into_chunks()
|
||||
if translate_agent is None:
|
||||
translate_agent = MDTranslateAgent(to_lang=to_lang, **self.default_agent_params())
|
||||
result: list[str] = await translate_agent.send_prompts_async(chuncks)
|
||||
self.markdown = "\n\n".join(result)
|
||||
self._unmask_uris_in_markdown()
|
||||
translater_logger.info("翻译完成")
|
||||
return self.markdown
|
||||
|
||||
def save_as_markdown(self, filename: str | Path | None = None, output_dir: str | Path = "./output"):
|
||||
if isinstance(filename, str):
|
||||
filename = Path(filename)
|
||||
@@ -191,7 +217,7 @@ class FileTranslater:
|
||||
|
||||
def export_to_html(self, title="title") -> str:
|
||||
markdowner = markdown2.Markdown(extras=['tables', 'fenced-code-blocks', 'mermaid', "code-friendly"])
|
||||
|
||||
# language=html
|
||||
html = f"""<!DOCTYPE html>
|
||||
<html lang="en">
|
||||
<head>
|
||||
@@ -206,6 +232,7 @@ class FileTranslater:
|
||||
</style>
|
||||
<script type="text/x-mathjax-config">
|
||||
MathJax.Hub.Config({{
|
||||
messageStyle: "none",
|
||||
tex2jax: {{
|
||||
inlineMath: [ ['$','$'], ["\\\\(","\\\\)"] ],
|
||||
processEscapes: true
|
||||
@@ -264,7 +291,32 @@ class FileTranslater:
|
||||
filename = f"{file_path.stem}_{to_lang}.html"
|
||||
self.save_as_html(filename=filename, output_dir=output_dir)
|
||||
return self
|
||||
|
||||
async def translate_file_async(self, file_path: Path | str | None = None, to_lang="中文", output_dir="./output",
|
||||
formula=True,
|
||||
code=True, output_format: Literal["markdown", "html"] = "markdown", refine=False,
|
||||
refine_agent: Agent | None = None, translate_agent: Agent | None = None,save=True):
|
||||
if file_path is None:
|
||||
assert self.file_path is not None, "未输入文件路径"
|
||||
file_path = self.file_path
|
||||
if isinstance(file_path, str):
|
||||
file_path = Path(file_path)
|
||||
await asyncio.to_thread(
|
||||
self.read_file,
|
||||
file_path,
|
||||
formula=formula,
|
||||
code=code
|
||||
)
|
||||
if refine:
|
||||
await self.refine_markdown_by_agent_async(refine_agent)
|
||||
await self.translate_markdown_by_agent_async(translate_agent, to_lang=to_lang)
|
||||
if save:
|
||||
if output_format == "markdown":
|
||||
filename = f"{file_path.stem}_{to_lang}.md"
|
||||
self.save_as_markdown(filename=filename, output_dir=output_dir)
|
||||
elif output_format == "html":
|
||||
filename = f"{file_path.stem}_{to_lang}.html"
|
||||
self.save_as_html(filename=filename, output_dir=output_dir)
|
||||
return self
|
||||
def translate_bytes(self, name:str,file: bytes, to_lang="中文", output_dir="./output",
|
||||
formula=True,
|
||||
code=True, output_format: Literal["markdown", "html"] = "markdown", refine=False,
|
||||
@@ -281,3 +333,26 @@ class FileTranslater:
|
||||
filename = f"{name}_{to_lang}.html"
|
||||
self.save_as_html(filename=filename, output_dir=output_dir)
|
||||
return self
|
||||
|
||||
async def translate_bytes_async(self, name:str,file: bytes, to_lang="中文", output_dir="./output",
|
||||
formula=True,
|
||||
code=True, output_format: Literal["markdown", "html"] = "markdown", refine=False,
|
||||
refine_agent: Agent | None = None, translate_agent: Agent | None = None,save=True):
|
||||
await asyncio.to_thread(
|
||||
self.read_bytes,
|
||||
name=name,
|
||||
file=file,
|
||||
formula=formula,
|
||||
code=code
|
||||
)
|
||||
if refine:
|
||||
await self.refine_markdown_by_agent_async(refine_agent)
|
||||
await self.translate_markdown_by_agent_async(translate_agent, to_lang=to_lang)
|
||||
if save:
|
||||
if output_format == "markdown":
|
||||
filename = f"{name}_{to_lang}.md"
|
||||
self.save_as_markdown(filename=filename, output_dir=output_dir)
|
||||
elif output_format == "html":
|
||||
filename = f"{name}_{to_lang}.html"
|
||||
self.save_as_html(filename=filename, output_dir=output_dir)
|
||||
return self
|
||||
|
||||
@@ -1,3 +1,4 @@
|
||||
import asyncio
|
||||
import os
|
||||
from huggingface_hub.errors import LocalEntryNotFoundError
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
@@ -6,8 +7,7 @@ from docling_core.types.doc import ImageRefMode
|
||||
from pathlib import Path
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
from docling.datamodel.document import DocumentStream
|
||||
|
||||
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
|
||||
from docling.datamodel.settings import settings
|
||||
from docutranslate.logger import translater_logger
|
||||
IMAGE_RESOLUTION_SCALE = 4
|
||||
|
||||
@@ -22,20 +22,25 @@ def file2markdown_embed_images(file_path: Path | str|DocumentStream, formula=Fal
|
||||
pipeline_options.do_formula_enrichment=True
|
||||
if code:
|
||||
pipeline_options.do_code_enrichment=True
|
||||
pipeline_options.accelerator_options= AcceleratorOptions(
|
||||
num_threads=8, device=AcceleratorDevice.AUTO
|
||||
)
|
||||
# pipeline_options.accelerator_options= AcceleratorOptions(
|
||||
# num_threads=4, device=AcceleratorDevice.AUTO
|
||||
# )
|
||||
#打印时间
|
||||
settings.debug.profile_pipeline_timings=True
|
||||
converter = DocumentConverter(format_options={
|
||||
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
|
||||
|
||||
})
|
||||
try:
|
||||
result = converter.convert(file_path).document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)
|
||||
conversion_result = converter.convert(file_path)
|
||||
result = conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)
|
||||
except LocalEntryNotFoundError:
|
||||
translater_logger.info(f"无法连接huggingface,正在尝试换源")
|
||||
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
|
||||
result = converter.convert(file_path).document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)
|
||||
conversion_result = converter.convert(file_path)
|
||||
result=conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)
|
||||
translater_logger.info(f"已转换为markdown")
|
||||
translater_logger.info(f"pdf转换耗时: {conversion_result.timings["pipeline_total"].times}")
|
||||
return result
|
||||
|
||||
if __name__ == '__main__':
|
||||
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "docutranslate"
|
||||
version = "0.2.6.post1"
|
||||
version = "0.2.7"
|
||||
description = "文件翻译工具"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
|
||||
Reference in New Issue
Block a user