优化markdown块拼接方法

This commit is contained in:
xunbu
2025-05-18 23:07:10 +08:00
parent 8a5d85af8b
commit afd3bf8226
6 changed files with 83 additions and 90 deletions

145
.idea/workspace.xml generated
View File

@@ -6,7 +6,10 @@
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment=""> <list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/translater.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/translater.py" afterDir="false" /> <change beforePath="$PROJECT_DIR$/docutranslate/translater.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/translater.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/utils/convert.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/convert.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/utils/markdown_splitter.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/markdown_splitter.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
</list> </list>
<option name="SHOW_DIALOG" value="false" /> <option name="SHOW_DIALOG" value="false" />
@@ -54,6 +57,7 @@
"Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run", "Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
"Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run", "Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
"Python 测试.pytest (test_html.py 内).executor": "Run", "Python 测试.pytest (test_html.py 内).executor": "Run",
"Python.2test2 (1).executor": "Run",
"Python.PDFtranslater (1).executor": "Run", "Python.PDFtranslater (1).executor": "Run",
"Python.PDFtranslater (2).executor": "Run", "Python.PDFtranslater (2).executor": "Run",
"Python.agent.executor": "Debug", "Python.agent.executor": "Debug",
@@ -77,7 +81,7 @@
"RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true", "RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true",
"RunOnceActivity.git.unshallow": "true", "RunOnceActivity.git.unshallow": "true",
"git-widget-placeholder": "main", "git-widget-placeholder": "main",
"last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/dist/DocuTranslate", "last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/tests/files",
"node.js.detected.package.eslint": "true", "node.js.detected.package.eslint": "true",
"node.js.detected.package.tslint": "true", "node.js.detected.package.tslint": "true",
"node.js.selected.package.eslint": "(autodetect)", "node.js.selected.package.eslint": "(autodetect)",
@@ -89,11 +93,11 @@
}]]></component> }]]></component>
<component name="RecentsManager"> <component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS"> <key name="CopyFile.RECENT_KEYS">
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\files" />
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\DocuTranslate" /> <recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\DocuTranslate" />
<recent name="C:\Users\jxgm\Desktop\FileTranslate\docutranslate\agents" /> <recent name="C:\Users\jxgm\Desktop\FileTranslate\docutranslate\agents" />
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist" /> <recent name="C:\Users\jxgm\Desktop\FileTranslate\dist" />
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\app" /> <recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\app" />
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\files" />
</key> </key>
<key name="MoveFile.RECENT_KEYS"> <key name="MoveFile.RECENT_KEYS">
<recent name="C:\Users\jxgm\Desktop\FileTranslate" /> <recent name="C:\Users\jxgm\Desktop\FileTranslate" />
@@ -269,6 +273,52 @@
<configuration name="test.html" type="JavascriptDebugType" temporary="true" nameIsGenerated="true" uri="http://localhost:63342/filetranslate/tests/test.html" useBuiltInWebServerPort="true"> <configuration name="test.html" type="JavascriptDebugType" temporary="true" nameIsGenerated="true" uri="http://localhost:63342/filetranslate/tests/test.html" useBuiltInWebServerPort="true">
<method v="2" /> <method v="2" />
</configuration> </configuration>
<configuration name="2test2 (1)" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="FileTranslate" />
<option name="ENV_FILES" value="" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/tests" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/tests/2test2.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="3testhtml" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="FileTranslate" />
<option name="ENV_FILES" value="" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/tests" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="C:\Users\jxgm\Desktop\FileTranslate\tests\3testhtml.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration default="true" type="PythonConfigurationType" factoryName="Python"> <configuration default="true" type="PythonConfigurationType" factoryName="Python">
<module name="filetranslate" /> <module name="filetranslate" />
<option name="ENV_FILES" value="" /> <option name="ENV_FILES" value="" />
@@ -315,75 +365,6 @@
<option name="INPUT_FILE" value="" /> <option name="INPUT_FILE" value="" />
<method v="2" /> <method v="2" />
</configuration> </configuration>
<configuration name="test2" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="FileTranslate" />
<option name="ENV_FILES" value="" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/tests" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/tests/test2.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="test" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="FileTranslate" />
<option name="ENV_FILES" value="" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/tests" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/tests/test.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration name="testhtml" type="PythonConfigurationType" factoryName="Python" temporary="true" nameIsGenerated="true">
<module name="FileTranslate" />
<option name="ENV_FILES" value="" />
<option name="INTERPRETER_OPTIONS" value="" />
<option name="PARENT_ENVS" value="true" />
<envs>
<env name="PYTHONUNBUFFERED" value="1" />
</envs>
<option name="SDK_HOME" value="" />
<option name="WORKING_DIRECTORY" value="$PROJECT_DIR$/tests" />
<option name="IS_MODULE_SDK" value="true" />
<option name="ADD_CONTENT_ROOTS" value="true" />
<option name="ADD_SOURCE_ROOTS" value="true" />
<EXTENSION ID="PythonCoverageRunConfigurationExtension" runner="coverage.py" />
<option name="SCRIPT_NAME" value="$PROJECT_DIR$/tests/testhtml.py" />
<option name="PARAMETERS" value="" />
<option name="SHOW_COMMAND_LINE" value="false" />
<option name="EMULATE_TERMINAL" value="false" />
<option name="MODULE_MODE" value="false" />
<option name="REDIRECT_INPUT" value="false" />
<option name="INPUT_FILE" value="" />
<method v="2" />
</configuration>
<configuration default="true" type="Python.FlaskServer"> <configuration default="true" type="Python.FlaskServer">
<module name="filetranslate" /> <module name="filetranslate" />
<option name="ENV_FILES" value="" /> <option name="ENV_FILES" value="" />
@@ -481,7 +462,7 @@
<option name="USE_PATTERN" value="false" /> <option name="USE_PATTERN" value="false" />
<method v="2" /> <method v="2" />
</configuration> </configuration>
<configuration name="pytest (testhtml.py 内)" type="tests" factoryName="py.test" temporary="true" nameIsGenerated="true"> <configuration name="pytest (3testhtml.py 内)" type="tests" factoryName="py.test" temporary="true" nameIsGenerated="true">
<module name="FileTranslate" /> <module name="FileTranslate" />
<option name="ENV_FILES" value="" /> <option name="ENV_FILES" value="" />
<option name="INTERPRETER_OPTIONS" value="" /> <option name="INTERPRETER_OPTIONS" value="" />
@@ -495,17 +476,17 @@
<option name="_new_keywords" value="&quot;&quot;" /> <option name="_new_keywords" value="&quot;&quot;" />
<option name="_new_parameters" value="&quot;&quot;" /> <option name="_new_parameters" value="&quot;&quot;" />
<option name="_new_additionalArguments" value="&quot;&quot;" /> <option name="_new_additionalArguments" value="&quot;&quot;" />
<option name="_new_target" value="&quot;$PROJECT_DIR$/tests/testhtml.py&quot;" /> <option name="_new_target" value="&quot;$PROJECT_DIR$/tests/3testhtml.py&quot;" />
<option name="_new_targetType" value="&quot;PATH&quot;" /> <option name="_new_targetType" value="&quot;PATH&quot;" />
<method v="2" /> <method v="2" />
</configuration> </configuration>
<recent_temporary> <recent_temporary>
<list> <list>
<item itemvalue="Python.app_test (1)" /> <item itemvalue="Python.app_test (1)" />
<item itemvalue="Python.testhtml" /> <item itemvalue="Python.2test2 (1)" />
<item itemvalue="Python.3testhtml" />
<item itemvalue="JavaScript 调试.test.html" /> <item itemvalue="JavaScript 调试.test.html" />
<item itemvalue="Python 测试.pytest (testhtml.py 内)" /> <item itemvalue="Python 测试.pytest (3testhtml.py 内)" />
<item itemvalue="Python.test2" />
</list> </list>
</recent_temporary> </recent_temporary>
</component> </component>
@@ -580,7 +561,8 @@
<workItem from="1747403732304" duration="6270000" /> <workItem from="1747403732304" duration="6270000" />
<workItem from="1747452556852" duration="1474000" /> <workItem from="1747452556852" duration="1474000" />
<workItem from="1747478362092" duration="1735000" /> <workItem from="1747478362092" duration="1735000" />
<workItem from="1747553452592" duration="2260000" /> <workItem from="1747553452592" duration="4624000" />
<workItem from="1747578049178" duration="2689000" />
</task> </task>
<servers /> <servers />
</component> </component>
@@ -588,11 +570,11 @@
<option name="version" value="3" /> <option name="version" value="3" />
</component> </component>
<component name="com.intellij.coverage.CoverageDataManagerImpl"> <component name="com.intellij.coverage.CoverageDataManagerImpl">
<SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747555275808" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747579997941" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747472297913" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747472297913" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746963490689" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" /> <SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746963490689" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
<SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
<SUITE FILE_PATH="coverage/filetranslate$pytest__test_html_py__.coverage" NAME="pytest (test_html.py 内) 覆盖结果" MODIFIED="1747554037236" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$pytest__test_html_py__.coverage" NAME="pytest (test_html.py 内) 覆盖结果" MODIFIED="1747554037236" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
<SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" /> <SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
<SUITE FILE_PATH="coverage/filetranslate$app2.coverage" NAME="app2 覆盖结果" MODIFIED="1747108180309" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate" /> <SUITE FILE_PATH="coverage/filetranslate$app2.coverage" NAME="app2 覆盖结果" MODIFIED="1747108180309" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate" />
<SUITE FILE_PATH="coverage/filetranslate$app.coverage" NAME="app 覆盖结果" MODIFIED="1747448464521" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate" /> <SUITE FILE_PATH="coverage/filetranslate$app.coverage" NAME="app 覆盖结果" MODIFIED="1747448464521" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate" />
@@ -600,9 +582,10 @@
<SUITE FILE_PATH="coverage/filetranslate$test3.coverage" NAME="test3 覆盖结果" MODIFIED="1746884110572" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$test3.coverage" NAME="test3 覆盖结果" MODIFIED="1746884110572" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$app__1_.coverage" NAME="app (1) 覆盖结果" MODIFIED="1747136094477" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$app__1_.coverage" NAME="app (1) 覆盖结果" MODIFIED="1747136094477" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$markdown_splitter.coverage" NAME="markdown_splitter 覆盖结果" MODIFIED="1746805063874" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" /> <SUITE FILE_PATH="coverage/filetranslate$markdown_splitter.coverage" NAME="markdown_splitter 覆盖结果" MODIFIED="1746805063874" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
<SUITE FILE_PATH="coverage/filetranslate$agent.coverage" NAME="agent 覆盖结果" MODIFIED="1746805293987" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/Agents" />
<SUITE FILE_PATH="coverage/filetranslate$PDFtranslater__2_.coverage" NAME="PDFtranslater (2) 覆盖结果" MODIFIED="1746679546680" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/filetranslate_packages" />
<SUITE FILE_PATH="coverage/PDFtranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746629433597" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/PDFtranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746629433597" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$PDFtranslater__2_.coverage" NAME="PDFtranslater (2) 覆盖结果" MODIFIED="1746679546680" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/filetranslate_packages" />
<SUITE FILE_PATH="coverage/filetranslate$agent.coverage" NAME="agent 覆盖结果" MODIFIED="1746805293987" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/Agents" />
<SUITE FILE_PATH="coverage/filetranslate$2test2__1_.coverage" NAME="2test2 (1) 覆盖结果" MODIFIED="1747579915531" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746708534311" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" /> <SUITE FILE_PATH="coverage/filetranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746708534311" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
<SUITE FILE_PATH="coverage/filetranslate$.coverage" NAME="切分测试 覆盖结果" MODIFIED="1747187128847" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$.coverage" NAME="切分测试 覆盖结果" MODIFIED="1747187128847" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$test1.coverage" NAME="test1 覆盖结果" MODIFIED="1746936018440" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$test1.coverage" NAME="test1 覆盖结果" MODIFIED="1746936018440" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />

View File

@@ -144,7 +144,7 @@ translater = FileTranslater(base_url="<baseurl>", # 默认的模型baseurl
key="<key>", # 默认的模型api-key key="<key>", # 默认的模型api-key
model_id="<model-id>", # 默认的模型id model_id="<model-id>", # 默认的模型id
chunksize=2000, # markdown分块长度单位byte分块越大效果越好也越慢不建议超过8000 chunksize=2000, # markdown分块长度单位byte分块越大效果越好也越慢不建议超过8000
max_concurrent=15, # 并发数受到ai平台并发量限制如果文章很长建议适当加大到20以上 max_concurrent=20, # 并发数受到ai平台并发量限制如果文章很长建议适当加大到20以上
docling_artifact=None, # 使用提前下载好的docling模型 docling_artifact=None, # 使用提前下载好的docling模型
timeout=2000, # 调用api的超时时间 timeout=2000, # 调用api的超时时间
tips=True # 开场提示 tips=True # 开场提示

View File

@@ -9,7 +9,7 @@ from docling.datamodel.document import DocumentStream
from docutranslate.agents import Agent, AgentArgs from docutranslate.agents import Agent, AgentArgs
from docutranslate.agents import MDRefineAgent, MDTranslateAgent from docutranslate.agents import MDRefineAgent, MDTranslateAgent
from docutranslate.utils.convert import file2markdown_embed_images from docutranslate.utils.convert import file2markdown_embed_images
from docutranslate.utils.markdown_splitter import split_markdown_text from docutranslate.utils.markdown_splitter import split_markdown_text,join_markdown_texts
from docutranslate.utils.markdown_utils import uris2placeholder, placeholder2_uris, MaskDict from docutranslate.utils.markdown_utils import uris2placeholder, placeholder2_uris, MaskDict
from docutranslate.logger import translater_logger from docutranslate.logger import translater_logger
@@ -17,7 +17,7 @@ from docutranslate.logger import translater_logger
class FileTranslater: class FileTranslater:
def __init__(self, file_path: Path | str | None = None, chunksize: int = 2000, base_url="", key=None, def __init__(self, file_path: Path | str | None = None, chunksize: int = 2000, base_url="", key=None,
model_id="", temperature=0.7, max_concurrent=15, docling_artifact: Path | str | None = None, model_id="", temperature=0.7, max_concurrent=20, docling_artifact: Path | str | None = None,
timeout=2000, tips=True): timeout=2000, tips=True):
if isinstance(file_path, str): if isinstance(file_path, str):
file_path = Path(file_path) file_path = Path(file_path)
@@ -129,7 +129,7 @@ class FileTranslater:
if refine_agent is None: if refine_agent is None:
refine_agent = MDRefineAgent(**self.default_agent_params()) refine_agent = MDRefineAgent(**self.default_agent_params())
result: list[str] = refine_agent.send_prompts(chuncks) result: list[str] = refine_agent.send_prompts(chuncks)
self.markdown = "\n\n".join(result) self.markdown=join_markdown_texts(result)
self._unmask_uris_in_markdown() self._unmask_uris_in_markdown()
translater_logger.info("markdown已修正") translater_logger.info("markdown已修正")
return self.markdown return self.markdown
@@ -141,7 +141,7 @@ class FileTranslater:
if translate_agent is None: if translate_agent is None:
translate_agent = MDTranslateAgent(to_lang=to_lang, **self.default_agent_params()) translate_agent = MDTranslateAgent(to_lang=to_lang, **self.default_agent_params())
result: list[str] = translate_agent.send_prompts(chuncks) result: list[str] = translate_agent.send_prompts(chuncks)
self.markdown = "\n\n".join(result) self.markdown=join_markdown_texts(result)
self._unmask_uris_in_markdown() self._unmask_uris_in_markdown()
translater_logger.info("翻译完成") translater_logger.info("翻译完成")
return self.markdown return self.markdown
@@ -154,7 +154,7 @@ class FileTranslater:
if refine_agent is None: if refine_agent is None:
refine_agent = MDRefineAgent(**self.default_agent_params()) refine_agent = MDRefineAgent(**self.default_agent_params())
result: list[str] = await refine_agent.send_prompts_async(chuncks) result: list[str] = await refine_agent.send_prompts_async(chuncks)
self.markdown = "\n\n".join(result) self.markdown=join_markdown_texts(result)
self._unmask_uris_in_markdown() self._unmask_uris_in_markdown()
translater_logger.info("markdown已修正") translater_logger.info("markdown已修正")
return self.markdown return self.markdown
@@ -166,7 +166,7 @@ class FileTranslater:
if translate_agent is None: if translate_agent is None:
translate_agent = MDTranslateAgent(to_lang=to_lang, **self.default_agent_params()) translate_agent = MDTranslateAgent(to_lang=to_lang, **self.default_agent_params())
result: list[str] = await translate_agent.send_prompts_async(chuncks) result: list[str] = await translate_agent.send_prompts_async(chuncks)
self.markdown = "\n\n".join(result) self.markdown=join_markdown_texts(result)
self._unmask_uris_in_markdown() self._unmask_uris_in_markdown()
translater_logger.info("翻译完成") translater_logger.info("翻译完成")
return self.markdown return self.markdown

View File

@@ -17,6 +17,7 @@ def file2markdown_embed_images(file_path: Path | str|DocumentStream, formula=Fal
pipeline_options.do_ocr=False pipeline_options.do_ocr=False
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
pipeline_options.generate_picture_images = True pipeline_options.generate_picture_images = True
pipeline_options.table_structure_options.do_cell_matching=False
if formula: if formula:
pipeline_options.do_formula_enrichment=True pipeline_options.do_formula_enrichment=True
if code: if code:

View File

@@ -233,5 +233,15 @@ def split_markdown_text(markdown_text, max_block_size=5000):
return splitter.split_markdown(markdown_text) return splitter.split_markdown(markdown_text)
def join_markdown_texts(markdown_texts:list[str])->str:
result=""
for text in markdown_texts:
#只有表格会收到多余空行的影响
if text.lstrip().startswith("|"):
result=result+"\n"+text
else:
result+="\n\n"+text
return result
if __name__ == '__main__': if __name__ == '__main__':
pass pass

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "docutranslate" name = "docutranslate"
version = "0.2.15" version = "0.2.16"
description = "文件翻译工具" description = "文件翻译工具"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"
@@ -10,6 +10,5 @@ dependencies = [
"markdown2", "markdown2",
"fastapi[standard]>=0.115.12", "fastapi[standard]>=0.115.12",
] ]
[project.scripts] [project.scripts]
docutranslate="docutranslate.cli:main" docutranslate="docutranslate.cli:main"