更新readme

This commit is contained in:
xunbu
2025-05-15 17:35:08 +08:00
parent 77c4bf6ceb
commit 9c45a673f9
4 changed files with 80 additions and 62 deletions

110
.idea/workspace.xml generated
View File

@@ -6,6 +6,8 @@
<component name="ChangeListManager">
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/utils/convert.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/convert.py" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -31,54 +33,54 @@
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent"><![CDATA[{
"keyToString": {
"DefaultHtmlFileTemplate": "HTML File",
"JavaScript 调试.output.html (1).executor": "Run",
"JavaScript 调试.output.html.executor": "Run",
"JavaScript 调试.regex.md_中文.html.executor": "Run",
"JavaScript 调试.regex_中文.html.executor": "Run",
"JavaScript 调试.test2.html.executor": "Run",
"JavaScript 调试.test2_英文.html.executor": "Run",
"JavaScript 调试.test4-1_中文.html.executor": "Run",
"JavaScript 调试.互联网认证授权机制.html.executor": "Run",
"JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run",
"JavaScript 调试.毕业论文_英文.html.executor": "Run",
"ModuleVcsDetector.initialDetectionPerformed": "true",
"Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
"Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
"Python.PDFtranslater (1).executor": "Run",
"Python.PDFtranslater (2).executor": "Run",
"Python.agent.executor": "Debug",
"Python.agent_utils.executor": "Run",
"Python.app (1).executor": "Run",
"Python.app.executor": "Run",
"Python.app2.executor": "Run",
"Python.app_test (1).executor": "Run",
"Python.convert.executor": "Run",
"Python.markdown_splitter.executor": "Debug",
"Python.markdown_utils.executor": "Run",
"Python.test.executor": "Run",
"Python.test1.executor": "Run",
"Python.test2.executor": "Run",
"Python.test3.executor": "Run",
"Python.test4.executor": "Run",
"Python.translater.executor": "Run",
"Python.切分测试.executor": "Run",
"RunOnceActivity.ShowReadmeOnStart": "true",
"RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true",
"RunOnceActivity.git.unshallow": "true",
"git-widget-placeholder": "main",
"last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/dist/DocuTranslate",
"node.js.detected.package.eslint": "true",
"node.js.detected.package.tslint": "true",
"node.js.selected.package.eslint": "(autodetect)",
"node.js.selected.package.tslint": "(autodetect)",
"nodejs_package_manager_path": "npm",
"settings.editor.selected.configurable": "preferences.pluginManager",
"vue.rearranger.settings.migration": "true"
<component name="PropertiesComponent">{
&quot;keyToString&quot;: {
&quot;DefaultHtmlFileTemplate&quot;: &quot;HTML File&quot;,
&quot;JavaScript 调试.output.html (1).executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.output.html.executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.regex.md_中文.html.executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.regex_中文.html.executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.test2.html.executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.test2_英文.html.executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.test4-1_中文.html.executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.互联网认证授权机制.html.executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.互联网认证授权机制_英文.html.executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.毕业论文_英文.html.executor&quot;: &quot;Run&quot;,
&quot;ModuleVcsDetector.initialDetectionPerformed&quot;: &quot;true&quot;,
&quot;Python 测试.Python 测试 (markdown_mask.py 内).executor&quot;: &quot;Run&quot;,
&quot;Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor&quot;: &quot;Run&quot;,
&quot;Python.PDFtranslater (1).executor&quot;: &quot;Run&quot;,
&quot;Python.PDFtranslater (2).executor&quot;: &quot;Run&quot;,
&quot;Python.agent.executor&quot;: &quot;Debug&quot;,
&quot;Python.agent_utils.executor&quot;: &quot;Run&quot;,
&quot;Python.app (1).executor&quot;: &quot;Run&quot;,
&quot;Python.app.executor&quot;: &quot;Run&quot;,
&quot;Python.app2.executor&quot;: &quot;Run&quot;,
&quot;Python.app_test (1).executor&quot;: &quot;Run&quot;,
&quot;Python.convert.executor&quot;: &quot;Run&quot;,
&quot;Python.markdown_splitter.executor&quot;: &quot;Debug&quot;,
&quot;Python.markdown_utils.executor&quot;: &quot;Run&quot;,
&quot;Python.test.executor&quot;: &quot;Run&quot;,
&quot;Python.test1.executor&quot;: &quot;Run&quot;,
&quot;Python.test2.executor&quot;: &quot;Run&quot;,
&quot;Python.test3.executor&quot;: &quot;Run&quot;,
&quot;Python.test4.executor&quot;: &quot;Run&quot;,
&quot;Python.translater.executor&quot;: &quot;Run&quot;,
&quot;Python.切分测试.executor&quot;: &quot;Run&quot;,
&quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
&quot;RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager&quot;: &quot;true&quot;,
&quot;RunOnceActivity.git.unshallow&quot;: &quot;true&quot;,
&quot;git-widget-placeholder&quot;: &quot;main&quot;,
&quot;last_opened_file_path&quot;: &quot;C:/Users/jxgm/Desktop/FileTranslate/dist/DocuTranslate&quot;,
&quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
&quot;node.js.detected.package.tslint&quot;: &quot;true&quot;,
&quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;,
&quot;node.js.selected.package.tslint&quot;: &quot;(autodetect)&quot;,
&quot;nodejs_package_manager_path&quot;: &quot;npm&quot;,
&quot;settings.editor.selected.configurable&quot;: &quot;preferences.pluginManager&quot;,
&quot;vue.rearranger.settings.migration&quot;: &quot;true&quot;
}
}]]></component>
}</component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\DocuTranslate" />
@@ -95,7 +97,7 @@
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\resource" />
</key>
</component>
<component name="RunManager" selected="Python.app_test (1)">
<component name="RunManager" selected="Python.test">
<configuration default="true" type="DjangoTestsConfigurationType">
<module name="filetranslate" />
<option name="ENV_FILES" value="" />
@@ -541,11 +543,11 @@
</configuration>
<recent_temporary>
<list>
<item itemvalue="Python.test" />
<item itemvalue="Python.app_test (1)" />
<item itemvalue="Python.切分测试" />
<item itemvalue="Python.app_test" />
<item itemvalue="Python.app" />
<item itemvalue="Python.test" />
</list>
</recent_temporary>
</component>
@@ -612,7 +614,9 @@
<workItem from="1747050264594" duration="2135000" />
<workItem from="1747134000963" duration="3065000" />
<workItem from="1747146670281" duration="64000" />
<workItem from="1747185217844" duration="5868000" />
<workItem from="1747185217844" duration="6194000" />
<workItem from="1747297470216" duration="347000" />
<workItem from="1747299661166" duration="1977000" />
</task>
<servers />
</component>
@@ -621,7 +625,7 @@
</component>
<component name="com.intellij.coverage.CoverageDataManagerImpl">
<SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747189112668" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747125597841" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747300796373" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746963490689" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
<SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
<SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
@@ -631,9 +635,9 @@
<SUITE FILE_PATH="coverage/filetranslate$test3.coverage" NAME="test3 覆盖结果" MODIFIED="1746884110572" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$app__1_.coverage" NAME="app (1) 覆盖结果" MODIFIED="1747136094477" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$markdown_splitter.coverage" NAME="markdown_splitter 覆盖结果" MODIFIED="1746805063874" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
<SUITE FILE_PATH="coverage/PDFtranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746629433597" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$PDFtranslater__2_.coverage" NAME="PDFtranslater (2) 覆盖结果" MODIFIED="1746679546680" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/filetranslate_packages" />
<SUITE FILE_PATH="coverage/filetranslate$agent.coverage" NAME="agent 覆盖结果" MODIFIED="1746805293987" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/Agents" />
<SUITE FILE_PATH="coverage/filetranslate$PDFtranslater__2_.coverage" NAME="PDFtranslater (2) 覆盖结果" MODIFIED="1746679546680" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/filetranslate_packages" />
<SUITE FILE_PATH="coverage/PDFtranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746629433597" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746708534311" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
<SUITE FILE_PATH="coverage/filetranslate$.coverage" NAME="切分测试 覆盖结果" MODIFIED="1747187128847" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$test1.coverage" NAME="test1 覆盖结果" MODIFIED="1746936018440" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />

View File

@@ -6,6 +6,11 @@
文件翻译工具,借助[docling](https://github.com/docling-project/docling)与大语言模型实现多种格式文件的翻译
# 整合包
对于只使用基本翻译功能的用户,可以在[github releases](https://github.com/xunbu/docutranslate/releases)
上下载最新的整合包该整合包点击即用您所需的只是获取某个ai平台的api-key。
# 安装
使用pip
@@ -18,11 +23,14 @@
# 支持的文件格式
| 输入格式 | 输出格式 |
|------------|--------------|
|----------------|--------------|
| PDF非扫描版 | Markdown推荐 |
| Markdown | HTML |
| HTML、XHTML | |
| HTML、XHTML | PDF(仅交互界面支持) |
| CSV | |
| DOC、DOCX部分支持 | |
> 如果想不使用交互界面获取pdf可以先下载HTML文件用浏览器打开并打印
# 前置条件

View File

@@ -1,11 +1,13 @@
import os
from huggingface_hub.errors import LocalEntryNotFoundError
from docling.datamodel.base_models import InputFormat
from docling.datamodel.pipeline_options import PdfPipelineOptions
from docling.datamodel.pipeline_options import PdfPipelineOptions, AcceleratorOptions, AcceleratorDevice
from docling_core.types.doc import ImageRefMode
from pathlib import Path
from docling.document_converter import DocumentConverter, PdfFormatOption
from docling.datamodel.document import DocumentStream
from docling.backend.docling_parse_v2_backend import DoclingParseV2DocumentBackend
from docutranslate.logger import translater_logger
IMAGE_RESOLUTION_SCALE = 4
@@ -13,15 +15,19 @@ IMAGE_RESOLUTION_SCALE = 4
def file2markdown_embed_images(file_path: Path | str|DocumentStream, formula=False, code=False,artifacts_path:Path|str|None=None) -> str:
translater_logger.info(f"正在将文档转换为markdown")
pipeline_options = PdfPipelineOptions(artifacts_path=artifacts_path)
# pipeline_options.do_ocr=False
pipeline_options.do_ocr=False
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
pipeline_options.generate_picture_images = True
if formula:
pipeline_options.do_formula_enrichment=True
if code:
pipeline_options.do_code_enrichment=True
pipeline_options.accelerator_options= AcceleratorOptions(
num_threads=8, device=AcceleratorDevice.AUTO
)
converter = DocumentConverter(format_options={
InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options)
})
try:
result = converter.convert(file_path).document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)

View File

@@ -1,6 +1,6 @@
[project]
name = "docutranslate"
version = "0.2.6"
version = "0.2.6.post1"
description = "文件翻译工具"
readme = "README.md"
requires-python = ">=3.10"