This commit is contained in:
xunbu
2025-06-10 17:05:51 +08:00
parent bb724e8c29
commit 8a59cd9e4b
6 changed files with 22 additions and 16 deletions

View File

@@ -19,7 +19,7 @@
# 安装 # 安装
使用pip 使用pip
`pip install docutranslate` `pip install docutranslate`
`pip install docutranslate[docling]`#如果需要使用docling进行文档解析 `pip install docutranslate[docling]`#如果需要使用docling进行文档解析
@@ -122,9 +122,9 @@ docutranslate -i
```python ```python
from docutranslate.translater import FileTranslater from docutranslate.translater import FileTranslater
translater = FileTranslater(base_url="<baseurl>",#大模型的baseurl translater = FileTranslater(base_url="<baseurl>", # 大模型的baseurl
key="<api-key>",#大模型的api-key key="<api-key>", # 大模型的api-key
model_id="<model-id>",#大模型的model-id model_id="<model-id>", # 大模型的model-id
convert_engin="mineru", # 使用mineru解析文档 convert_engin="mineru", # 使用mineru解析文档
mineru_token="<申请的mineru_token>" # 使用mineru时必填 mineru_token="<申请的mineru_token>" # 使用mineru时必填
# convert_engin="docling" # 使用docling解析文档 # convert_engin="docling" # 使用docling解析文档
@@ -163,13 +163,16 @@ translater.translate_file("<文件路径>", to_lang="中文", refine_agent=refin
```python ```python
from docutranslate import FileTranslater from docutranslate import FileTranslater
translater = FileTranslater(base_url="<baseurl>", translater = FileTranslater(convert_engin="mineru", # 使用mineru解析文档
key="<key>", mineru_token="<申请的mineru_token>" # 使用mineru时必填
model_id="<model-id>") # convert_engin="docling" # 使用docling解析文档
)
# 文件转html # 文件转html
translater.read_file("<文件路径>").save_as_html() translater.read_file("<文件路径>").save_as_html()#保存
translater.read_file("<文件路径>").export_to_html()#输出字符串
# 文件转markdown # 文件转markdown
translater.read_file("<文件路径>").save_as_markdown() translater.read_file("<文件路径>").save_as_markdown()#保存
translater.read_file("<文件路径>").export_to_markdown()#输出字符串
``` ```
## 参数说明 ## 参数说明
@@ -182,8 +185,8 @@ from docutranslate import FileTranslater
translater = FileTranslater(base_url="<baseurl>", # 默认的模型baseurl translater = FileTranslater(base_url="<baseurl>", # 默认的模型baseurl
key="<api-key>", # 默认的大语言模型平台api-key key="<api-key>", # 默认的大语言模型平台api-key
model_id="<model-id>", # 默认的模型id model_id="<model-id>", # 默认的模型id
chunksize=2000, # markdown分块长度单位byte分块越大效果越好也越慢不建议超过8000 chunksize=3000, # markdown分块长度单位byte分块越大效果越好也越慢不建议超过8000
max_concurrent=20, # 并发数受到ai平台并发量限制如果文章很长建议适当加大到20以上 max_concurrent=30, # 并发数受到ai平台并发量限制如果文章很长建议适当加大到20以上
timeout=2000, # 调用api的超时时间 timeout=2000, # 调用api的超时时间
docling_artifact=None, # 使用提前下载好的docling模型 docling_artifact=None, # 使用提前下载好的docling模型
convert_engin="mineru", # 可选minerU或docling convert_engin="mineru", # 可选minerU或docling

View File

@@ -1,4 +1,4 @@
__version__="0.2.35" __version__="0.2.36"

View File

@@ -6,7 +6,7 @@ import markdown2
import jinja2 import jinja2
from docutranslate.agents import Agent, AgentArgs from docutranslate.agents import Agent, AgentArgs
from docutranslate.agents import MDRefineAgent, MDTranslateAgent from docutranslate.agents import MDRefineAgent, MDTranslateAgent
from docutranslate.cache import document_cacher_global from docutranslate.cacher import document_cacher_global
from docutranslate.converter import Document, ConverterMineru from docutranslate.converter import Document, ConverterMineru
from docutranslate.utils.markdown_splitter import split_markdown_text, join_markdown_texts from docutranslate.utils.markdown_splitter import split_markdown_text, join_markdown_texts
from docutranslate.utils.markdown_utils import uris2placeholder, placeholder2_uris, MaskDict, clean_markdown_math_block from docutranslate.utils.markdown_utils import uris2placeholder, placeholder2_uris, MaskDict, clean_markdown_math_block
@@ -308,7 +308,7 @@ class FileTranslater:
# 确保输出目录存在 # 确保输出目录存在
output_dir.mkdir(parents=True, exist_ok=True) output_dir.mkdir(parents=True, exist_ok=True)
full_name = output_dir / filename full_name = output_dir / filename
html_content = self.export_to_html(str(filename.resolve().stem)) html_content = self.export_to_html(title=str(full_name.resolve().stem))
with open(full_name, "w") as file: with open(full_name, "w") as file:
file.write(html_content) file.write(html_content)
translater_logger.info(f"文件已写入{full_name.resolve()}") translater_logger.info(f"文件已写入{full_name.resolve()}")

View File

@@ -8,7 +8,7 @@ dependencies = [
"markdown2>=2.5.3", "markdown2>=2.5.3",
"fastapi[standard]>=0.115.12", "fastapi[standard]>=0.115.12",
] ]
dynamic=["version"] dynamic = ["version"]
[project.optional-dependencies] [project.optional-dependencies]
docling = [ docling = [
@@ -32,7 +32,10 @@ requires = ["setuptools>=61.0"] # 或者你需要的 setuptools 版本
build-backend = "setuptools.build_meta" build-backend = "setuptools.build_meta"
backend-path = ["."] backend-path = ["."]
[tool.setuptools] [tool.setuptools]
packages = ["docutranslate"] #packages = ["docutranslate"]
[tool.setuptools.packages.find]
where = ["."]
include = ["docutranslate*"]
[tool.setuptools.package-data] [tool.setuptools.package-data]
docutranslate = ["static/**", "template/**"] docutranslate = ["static/**", "template/**"]