This commit is contained in:
xunbu
2025-05-08 16:13:22 +08:00
parent 204c325209
commit 2798172528
14 changed files with 74 additions and 57 deletions

2
.gitignore vendored
View File

@@ -7,6 +7,6 @@ wheels/
*.egg-info
tests/resource/
tests/
filetranslate/output/
docutranslate/output/
# Virtual environments
.venv

76
.idea/workspace.xml generated
View File

@@ -5,8 +5,20 @@
</component>
<component name="ChangeListManager">
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
<change beforePath="$PROJECT_DIR$/.gitignore" beforeDir="false" afterPath="$PROJECT_DIR$/.gitignore" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
<change beforePath="$PROJECT_DIR$/filetranslate/__init__.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/__init__.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/filetranslate/decorator/__init__.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/decorator/__init__.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/filetranslate/decorator/markdown_mask.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/decorator/markdown_mask.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/filetranslate/decorator/time.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/decorator/time.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/filetranslate/translater.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/translater.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/filetranslate/utils/__init__.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/__init__.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/filetranslate/utils/agent_utils.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/agent_utils.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/filetranslate/utils/convert.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/convert.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/filetranslate/utils/markdown_splitter.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/markdown_splitter.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/filetranslate/utils/markdown_utils.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/markdown_utils.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
</list>
<option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -32,38 +44,38 @@
<option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" />
</component>
<component name="PropertiesComponent"><![CDATA[{
"keyToString": {
"DefaultHtmlFileTemplate": "HTML File",
"JavaScript 调试.output.html (1).executor": "Run",
"JavaScript 调试.output.html.executor": "Run",
"JavaScript 调试.regex_中文.html.executor": "Run",
"JavaScript 调试.test2_英文.html.executor": "Run",
"ModuleVcsDetector.initialDetectionPerformed": "true",
"Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
"Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
"Python.PDFtranslater (1).executor": "Run",
"Python.PDFtranslater (2).executor": "Run",
"Python.agent_utils.executor": "Run",
"Python.convert.executor": "Run",
"Python.markdown_splitter.executor": "Run",
"Python.markdown_utils.executor": "Run",
"Python.test.executor": "Run",
"Python.test1.executor": "Run",
"Python.translater.executor": "Debug",
"RunOnceActivity.ShowReadmeOnStart": "true",
"RunOnceActivity.git.unshallow": "true",
"git-widget-placeholder": "master",
"last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/tests/resource",
"node.js.detected.package.eslint": "true",
"node.js.detected.package.tslint": "true",
"node.js.selected.package.eslint": "(autodetect)",
"node.js.selected.package.tslint": "(autodetect)",
"nodejs_package_manager_path": "npm",
"settings.editor.selected.configurable": "Errors",
"vue.rearranger.settings.migration": "true"
<component name="PropertiesComponent">{
&quot;keyToString&quot;: {
&quot;DefaultHtmlFileTemplate&quot;: &quot;HTML File&quot;,
&quot;JavaScript 调试.output.html (1).executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.output.html.executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.regex_中文.html.executor&quot;: &quot;Run&quot;,
&quot;JavaScript 调试.test2_英文.html.executor&quot;: &quot;Run&quot;,
&quot;ModuleVcsDetector.initialDetectionPerformed&quot;: &quot;true&quot;,
&quot;Python 测试.Python 测试 (markdown_mask.py 内).executor&quot;: &quot;Run&quot;,
&quot;Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor&quot;: &quot;Run&quot;,
&quot;Python.PDFtranslater (1).executor&quot;: &quot;Run&quot;,
&quot;Python.PDFtranslater (2).executor&quot;: &quot;Run&quot;,
&quot;Python.agent_utils.executor&quot;: &quot;Run&quot;,
&quot;Python.convert.executor&quot;: &quot;Run&quot;,
&quot;Python.markdown_splitter.executor&quot;: &quot;Run&quot;,
&quot;Python.markdown_utils.executor&quot;: &quot;Run&quot;,
&quot;Python.test.executor&quot;: &quot;Run&quot;,
&quot;Python.test1.executor&quot;: &quot;Run&quot;,
&quot;Python.translater.executor&quot;: &quot;Debug&quot;,
&quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;,
&quot;RunOnceActivity.git.unshallow&quot;: &quot;true&quot;,
&quot;git-widget-placeholder&quot;: &quot;master&quot;,
&quot;last_opened_file_path&quot;: &quot;C:/Users/jxgm/Desktop/FileTranslate/tests/resource&quot;,
&quot;node.js.detected.package.eslint&quot;: &quot;true&quot;,
&quot;node.js.detected.package.tslint&quot;: &quot;true&quot;,
&quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;,
&quot;node.js.selected.package.tslint&quot;: &quot;(autodetect)&quot;,
&quot;nodejs_package_manager_path&quot;: &quot;npm&quot;,
&quot;settings.editor.selected.configurable&quot;: &quot;Errors&quot;,
&quot;vue.rearranger.settings.migration&quot;: &quot;true&quot;
}
}]]></component>
}</component>
<component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS">
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\resource" />
@@ -302,7 +314,7 @@
<workItem from="1746588383790" duration="2614000" />
<workItem from="1746593417117" duration="25924000" />
<workItem from="1746626070703" duration="7931000" />
<workItem from="1746669839816" duration="17865000" />
<workItem from="1746669839816" duration="20170000" />
</task>
<servers />
</component>

View File

@@ -1,15 +1,15 @@
# 简介
## FileTranslate
## DocuTranslate
一个使用大预言模型(llm)翻译pdf和markdown的包
[github主页](https://github.com/xunbu/filetranslate)
[github主页](https://github.com/xunbu/docutranslate)
# 安装
使用pip
`pip install filetranslate`
`pip install doctranslate`
使用uv
`uv add filetranslate`
`uv add doctranslate`
# 前置条件获取大模型平台的baseurl、key、model-id
由于需要使用大语言模型进行markdown调整与翻译所以需要预先获取模型的baseurl、key、model-id
@@ -17,40 +17,45 @@
# 使用方式
## 使用默认参数翻译pdf
```python
from filetranslate.translater import FileTranslater
from docutranslate.translater import FileTranslater
#不开启公式、代码识别
FileTranslater(base_url="<baseurl>",key="<key>",model_id="<model-id>").translate_pdf_file("<pdf路径>",to_lang="中文")
# 不开启公式、代码识别
FileTranslater(base_url="<baseurl>", key="<key>", model_id="<model-id>").translate_pdf_file("<pdf路径>", to_lang="中文")
#开启公式、代码识别(需要下载更多模型)
FileTranslater(base_url="<baseurl>",key="<key>",model_id="<model-id>").translate_pdf_file("<pdf路径>",to_lang="中文",formula=True,code=True)
# 开启公式、代码识别(需要下载更多模型)
FileTranslater(base_url="<baseurl>", key="<key>", model_id="<model-id>").translate_pdf_file("<pdf路径>", to_lang="中文",
formula=True, code=True)
```
> 第一次使用时需要下载模型约1G、使用公式、代码识别需要多约0.5G),请稍作等待
> 输出文件默认放在`./output`中
## 使用不同的agent分别进行文本修正和翻译
```python
from filetranslate.translater import FileTranslater
from docutranslate.translater import FileTranslater
translater = FileTranslater()
refine_agent=translater.create_refine_agent(baseurl="<baseurl-1>",key="<key-1>",model_id="<model-id-1>")
translate_agent=translater.create_translate_agent(baseurl="<baseurl-2>",key="<key-2>",model_id="<model-id-2>")
refine_agent = translater.create_refine_agent(baseurl="<baseurl-1>", key="<key-1>", model_id="<model-id-1>")
translate_agent = translater.create_translate_agent(baseurl="<baseurl-2>", key="<key-2>", model_id="<model-id-2>")
translater.translate_pdf_file(pdf_path="<pdf路径>",to_lang="中文",refine_agent=refine_agent,translate_agent=translate_agent)
translater.translate_pdf_file(pdf_path="<pdf路径>", to_lang="中文", refine_agent=refine_agent,
translate_agent=translate_agent)
```
## 参数说明
### 创建FileTranslate
```python
from filetranslate.translater import FileTranslater
from docutranslate.translater import FileTranslater
translater = FileTranslater(base_url="<baseurl>",
key="<key>",
model_id="<model-id>",#使用的模型id
chunksize=4000,#【可选】markdown分块长度分块越大效果越好不建议超过4096
max_concurrent=6#【可选】并发数受到ai平台并发量限制
model_id="<model-id>", # 使用的模型id
chunksize=4000, # 【可选】markdown分块长度分块越大效果越好不建议超过4096
max_concurrent=6 # 【可选】并发数受到ai平台并发量限制
)
```
### 翻译pdf文件

View File

@@ -2,7 +2,7 @@ from functools import wraps
from typing import Concatenate, ParamSpec, Callable
import re
from filetranslate.utils.markdown_utils import MaskDict
from docutranslate.utils.markdown_utils import MaskDict
P=ParamSpec("P")
def mask_uris_temp(func:Callable[Concatenate[str, P], str]) -> Callable[Concatenate[str, P], str]:

View File

@@ -3,11 +3,11 @@ from typing import Literal
import markdown2
from filetranslate.decorator.markdown_mask import MaskDict
from filetranslate.utils.agent_utils import Agent
from filetranslate.utils.convert import pdf2markdown_embed_images
from filetranslate.utils.markdown_splitter import split_markdown_text
from filetranslate.utils.markdown_utils import uris2placeholder, placeholder2_uris
from docutranslate.decorator.markdown_mask import MaskDict
from docutranslate.utils.agent_utils import Agent
from docutranslate.utils.convert import pdf2markdown_embed_images
from docutranslate.utils.markdown_splitter import split_markdown_text
from docutranslate.utils.markdown_utils import uris2placeholder, placeholder2_uris
class FileTranslater:

View File

@@ -1,5 +1,5 @@
[project]
name = "filetranslate"
name = "docutranslate"
version = "0.0.1"
description = "能翻译pdf和markdown的软件"
readme = "README.md"