From 279817252887021a5936ac5f3a2acb2ba142ac3d Mon Sep 17 00:00:00 2001 From: xunbu Date: Thu, 8 May 2025 16:13:22 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=B9=E5=90=8D?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .gitignore | 2 +- .idea/workspace.xml | 76 +++++++++++-------- README.md | 39 +++++----- {filetranslate => docutranslate}/__init__.py | 0 .../decorator/__init__.py | 0 .../decorator/markdown_mask.py | 2 +- .../decorator/time.py | 0 .../translater.py | 10 +-- .../utils/__init__.py | 0 .../utils/agent_utils.py | 0 .../utils/convert.py | 0 .../utils/markdown_splitter.py | 0 .../utils/markdown_utils.py | 0 pyproject.toml | 2 +- 14 files changed, 74 insertions(+), 57 deletions(-) rename {filetranslate => docutranslate}/__init__.py (100%) rename {filetranslate => docutranslate}/decorator/__init__.py (100%) rename {filetranslate => docutranslate}/decorator/markdown_mask.py (94%) rename {filetranslate => docutranslate}/decorator/time.py (100%) rename {filetranslate => docutranslate}/translater.py (97%) rename {filetranslate => docutranslate}/utils/__init__.py (100%) rename {filetranslate => docutranslate}/utils/agent_utils.py (100%) rename {filetranslate => docutranslate}/utils/convert.py (100%) rename {filetranslate => docutranslate}/utils/markdown_splitter.py (100%) rename {filetranslate => docutranslate}/utils/markdown_utils.py (100%) diff --git a/.gitignore b/.gitignore index a5628ae..8491508 100644 --- a/.gitignore +++ b/.gitignore @@ -7,6 +7,6 @@ wheels/ *.egg-info tests/resource/ tests/ -filetranslate/output/ +docutranslate/output/ # Virtual environments .venv diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 39e19fd..bbed93a 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -5,8 +5,20 @@ + + + + + + + + + + + + - { + "keyToString": { + "DefaultHtmlFileTemplate": "HTML File", + "JavaScript 调试.output.html (1).executor": "Run", + "JavaScript 调试.output.html.executor": "Run", + "JavaScript 调试.regex_中文.html.executor": "Run", + "JavaScript 调试.test2_英文.html.executor": "Run", + "ModuleVcsDetector.initialDetectionPerformed": "true", + "Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run", + "Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run", + "Python.PDFtranslater (1).executor": "Run", + "Python.PDFtranslater (2).executor": "Run", + "Python.agent_utils.executor": "Run", + "Python.convert.executor": "Run", + "Python.markdown_splitter.executor": "Run", + "Python.markdown_utils.executor": "Run", + "Python.test.executor": "Run", + "Python.test1.executor": "Run", + "Python.translater.executor": "Debug", + "RunOnceActivity.ShowReadmeOnStart": "true", + "RunOnceActivity.git.unshallow": "true", + "git-widget-placeholder": "master", + "last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/tests/resource", + "node.js.detected.package.eslint": "true", + "node.js.detected.package.tslint": "true", + "node.js.selected.package.eslint": "(autodetect)", + "node.js.selected.package.tslint": "(autodetect)", + "nodejs_package_manager_path": "npm", + "settings.editor.selected.configurable": "Errors", + "vue.rearranger.settings.migration": "true" } -}]]> +} @@ -302,7 +314,7 @@ - + diff --git a/README.md b/README.md index 01b1f3e..7a93a9b 100644 --- a/README.md +++ b/README.md @@ -1,15 +1,15 @@ # 简介 -## FileTranslate +## DocuTranslate 一个使用大预言模型(llm)翻译pdf和markdown的包 -[github主页](https://github.com/xunbu/filetranslate) +[github主页](https://github.com/xunbu/docutranslate) # 安装 使用pip -`pip install filetranslate` +`pip install doctranslate` 使用uv -`uv add filetranslate` +`uv add doctranslate` # 前置条件(获取大模型平台的baseurl、key、model-id) 由于需要使用大语言模型进行markdown调整与翻译,所以需要预先获取模型的baseurl、key、model-id @@ -17,40 +17,45 @@ # 使用方式 ## 使用默认参数翻译pdf + ```python -from filetranslate.translater import FileTranslater +from docutranslate.translater import FileTranslater -#不开启公式、代码识别 -FileTranslater(base_url="",key="",model_id="").translate_pdf_file("",to_lang="中文") +# 不开启公式、代码识别 +FileTranslater(base_url="", key="", model_id="").translate_pdf_file("", to_lang="中文") -#开启公式、代码识别(需要下载更多模型) -FileTranslater(base_url="",key="",model_id="").translate_pdf_file("",to_lang="中文",formula=True,code=True) +# 开启公式、代码识别(需要下载更多模型) +FileTranslater(base_url="", key="", model_id="").translate_pdf_file("", to_lang="中文", + formula=True, code=True) ``` > 第一次使用时需要下载模型(约1G、使用公式、代码识别需要多约0.5G),请稍作等待 > 输出文件默认放在`./output`中 ## 使用不同的agent分别进行文本修正和翻译 + ```python -from filetranslate.translater import FileTranslater +from docutranslate.translater import FileTranslater translater = FileTranslater() -refine_agent=translater.create_refine_agent(baseurl="",key="",model_id="") -translate_agent=translater.create_translate_agent(baseurl="",key="",model_id="") +refine_agent = translater.create_refine_agent(baseurl="", key="", model_id="") +translate_agent = translater.create_translate_agent(baseurl="", key="", model_id="") -translater.translate_pdf_file(pdf_path="",to_lang="中文",refine_agent=refine_agent,translate_agent=translate_agent) +translater.translate_pdf_file(pdf_path="", to_lang="中文", refine_agent=refine_agent, + translate_agent=translate_agent) ``` ## 参数说明 ### 创建FileTranslate + ```python -from filetranslate.translater import FileTranslater +from docutranslate.translater import FileTranslater translater = FileTranslater(base_url="", key="", - model_id="",#使用的模型id - chunksize=4000,#【可选】markdown分块长度,分块越大效果越好,不建议超过4096 - max_concurrent=6#【可选】并发数,受到ai平台并发量限制 + model_id="", # 使用的模型id + chunksize=4000, # 【可选】markdown分块长度,分块越大效果越好,不建议超过4096 + max_concurrent=6 # 【可选】并发数,受到ai平台并发量限制 ) ``` ### 翻译pdf文件 diff --git a/filetranslate/__init__.py b/docutranslate/__init__.py similarity index 100% rename from filetranslate/__init__.py rename to docutranslate/__init__.py diff --git a/filetranslate/decorator/__init__.py b/docutranslate/decorator/__init__.py similarity index 100% rename from filetranslate/decorator/__init__.py rename to docutranslate/decorator/__init__.py diff --git a/filetranslate/decorator/markdown_mask.py b/docutranslate/decorator/markdown_mask.py similarity index 94% rename from filetranslate/decorator/markdown_mask.py rename to docutranslate/decorator/markdown_mask.py index 57b2a9d..a532a72 100644 --- a/filetranslate/decorator/markdown_mask.py +++ b/docutranslate/decorator/markdown_mask.py @@ -2,7 +2,7 @@ from functools import wraps from typing import Concatenate, ParamSpec, Callable import re -from filetranslate.utils.markdown_utils import MaskDict +from docutranslate.utils.markdown_utils import MaskDict P=ParamSpec("P") def mask_uris_temp(func:Callable[Concatenate[str, P], str]) -> Callable[Concatenate[str, P], str]: diff --git a/filetranslate/decorator/time.py b/docutranslate/decorator/time.py similarity index 100% rename from filetranslate/decorator/time.py rename to docutranslate/decorator/time.py diff --git a/filetranslate/translater.py b/docutranslate/translater.py similarity index 97% rename from filetranslate/translater.py rename to docutranslate/translater.py index fb66bf7..5e7d2cc 100644 --- a/filetranslate/translater.py +++ b/docutranslate/translater.py @@ -3,11 +3,11 @@ from typing import Literal import markdown2 -from filetranslate.decorator.markdown_mask import MaskDict -from filetranslate.utils.agent_utils import Agent -from filetranslate.utils.convert import pdf2markdown_embed_images -from filetranslate.utils.markdown_splitter import split_markdown_text -from filetranslate.utils.markdown_utils import uris2placeholder, placeholder2_uris +from docutranslate.decorator.markdown_mask import MaskDict +from docutranslate.utils.agent_utils import Agent +from docutranslate.utils.convert import pdf2markdown_embed_images +from docutranslate.utils.markdown_splitter import split_markdown_text +from docutranslate.utils.markdown_utils import uris2placeholder, placeholder2_uris class FileTranslater: diff --git a/filetranslate/utils/__init__.py b/docutranslate/utils/__init__.py similarity index 100% rename from filetranslate/utils/__init__.py rename to docutranslate/utils/__init__.py diff --git a/filetranslate/utils/agent_utils.py b/docutranslate/utils/agent_utils.py similarity index 100% rename from filetranslate/utils/agent_utils.py rename to docutranslate/utils/agent_utils.py diff --git a/filetranslate/utils/convert.py b/docutranslate/utils/convert.py similarity index 100% rename from filetranslate/utils/convert.py rename to docutranslate/utils/convert.py diff --git a/filetranslate/utils/markdown_splitter.py b/docutranslate/utils/markdown_splitter.py similarity index 100% rename from filetranslate/utils/markdown_splitter.py rename to docutranslate/utils/markdown_splitter.py diff --git a/filetranslate/utils/markdown_utils.py b/docutranslate/utils/markdown_utils.py similarity index 100% rename from filetranslate/utils/markdown_utils.py rename to docutranslate/utils/markdown_utils.py diff --git a/pyproject.toml b/pyproject.toml index fc72b5b..7b38cbd 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -1,5 +1,5 @@ [project] -name = "filetranslate" +name = "docutranslate" version = "0.0.1" description = "能翻译pdf和markdown的软件" readme = "README.md"