small fix

This commit is contained in:
xunbu
2025-05-09 18:28:20 +08:00
parent 63abf7daac
commit f5502d73d5
3 changed files with 23 additions and 22 deletions

17
.idea/workspace.xml generated
View File

@@ -5,18 +5,9 @@
</component> </component>
<component name="ChangeListManager"> <component name="ChangeListManager">
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment=""> <list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
<change afterPath="$PROJECT_DIR$/docutranslate/Agents/__init__.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/docutranslate/Agents/markdown_agent.py" afterDir="false" />
<change afterPath="$PROJECT_DIR$/docutranslate/utils/docling_utils.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" /> <change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/decorator/__init__.py" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/decorator/markdown_mask.py" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/decorator/time.py" beforeDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/translater.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/translater.py" afterDir="false" /> <change beforePath="$PROJECT_DIR$/docutranslate/translater.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/translater.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/utils/agent_utils.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/Agents/agent.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/utils/convert.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/convert.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
</list> </list>
<option name="SHOW_DIALOG" value="false" /> <option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" /> <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -93,7 +84,7 @@
<recent name="C:\Users\jxgm\Desktop\PDFtranslate\pdf" /> <recent name="C:\Users\jxgm\Desktop\PDFtranslate\pdf" />
</key> </key>
</component> </component>
<component name="RunManager" selected="Python.test3"> <component name="RunManager" selected="Python.test1">
<configuration default="true" type="DjangoTestsConfigurationType"> <configuration default="true" type="DjangoTestsConfigurationType">
<module name="filetranslate" /> <module name="filetranslate" />
<option name="ENV_FILES" value="" /> <option name="ENV_FILES" value="" />
@@ -335,8 +326,8 @@
</configuration> </configuration>
<recent_temporary> <recent_temporary>
<list> <list>
<item itemvalue="Python.test3" />
<item itemvalue="Python.test1" /> <item itemvalue="Python.test1" />
<item itemvalue="Python.test3" />
<item itemvalue="Python.convert" /> <item itemvalue="Python.convert" />
<item itemvalue="Python.translater" /> <item itemvalue="Python.translater" />
<item itemvalue="JavaScript 调试.毕业论文_英文.html" /> <item itemvalue="JavaScript 调试.毕业论文_英文.html" />
@@ -377,7 +368,7 @@
<workItem from="1746779030113" duration="1102000" /> <workItem from="1746779030113" duration="1102000" />
<workItem from="1746780247620" duration="1776000" /> <workItem from="1746780247620" duration="1776000" />
<workItem from="1746782039257" duration="307000" /> <workItem from="1746782039257" duration="307000" />
<workItem from="1746782370978" duration="3228000" /> <workItem from="1746782370978" duration="3979000" />
</task> </task>
<servers /> <servers />
</component> </component>
@@ -388,7 +379,7 @@
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746677277745" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746677277745" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746708534311" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" /> <SUITE FILE_PATH="coverage/filetranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746708534311" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
<SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746780691113" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" /> <SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746780691113" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
<SUITE FILE_PATH="coverage/filetranslate$test1.coverage" NAME="test1 覆盖结果" MODIFIED="1746782563450" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$test1.coverage" NAME="test1 覆盖结果" MODIFIED="1746785955738" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" /> <SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
<SUITE FILE_PATH="coverage/PDFtranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746596984213" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" /> <SUITE FILE_PATH="coverage/PDFtranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746596984213" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
<SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" /> <SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />

View File

@@ -60,8 +60,8 @@ os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
以下操作会自动从[huggingface](https://huggingface.co)下载模型windows需要使用**管理员模式**打开IDE运行脚本并按需换源 以下操作会自动从[huggingface](https://huggingface.co)下载模型windows需要使用**管理员模式**打开IDE运行脚本并按需换源
- 第一次读取非markdown文本 - 第一次使用该库读取、翻译非markdown文本
- 第一次使用公式识别或代码识别功能 - 第一次使用该库的公式识别或代码识别功能
## 翻译文件 ## 翻译文件
@@ -120,11 +120,13 @@ translater.read_file("<文件路径>").save_as_markdown()
```python ```python
from docutranslate import FileTranslater from docutranslate import FileTranslater
translater = FileTranslater(base_url="<baseurl>", translater = FileTranslater(base_url="<baseurl>",# 默认的模型baseurl
key="<key>", key="<key>",#默认的模型api-key
model_id="<model-id>", # 使用的模型id model_id="<model-id>", # 默认的模型id
chunksize=4000, # 【可选】markdown分块长度分块越大效果越好不建议超过4096 chunksize=4000, # markdown分块长度分块越大效果越好不建议超过4096
max_concurrent=6 # 【可选】并发数受到ai平台并发量限制如果文章很长建议适当加大到20以上 max_concurrent=6, # 并发数受到ai平台并发量限制如果文章很长建议适当加大到20以上
docling_artifact=None, #使用提前下载好的docling模型
tips=True#开场提示
) )
``` ```

View File

@@ -12,7 +12,7 @@ from docutranslate.utils.markdown_utils import uris2placeholder, placeholder2_ur
class FileTranslater: class FileTranslater:
def __init__(self, file_path: Path | str | None = None, chunksize: int = 4096, base_url="", key=None, def __init__(self, file_path: Path | str | None = None, chunksize: int = 4096, base_url="", key=None,
model_id="", temperature=0.7, max_concurrent=6, docling_artifact: Path | str | None = None): model_id="", temperature=0.7, max_concurrent=6, docling_artifact: Path | str | None = None,tips=True):
if isinstance(file_path, str): if isinstance(file_path, str):
file_path = Path(file_path) file_path = Path(file_path)
self.file_path: Path = file_path self.file_path: Path = file_path
@@ -26,7 +26,15 @@ class FileTranslater:
self.model_id: str = model_id self.model_id: str = model_id
self.temperature = temperature self.temperature = temperature
self.docling_artifact=docling_artifact self.docling_artifact=docling_artifact
if tips:
print("""
=======
[docutranslate](https://github.com/xunbu/docutranslate)
>以下操作会自动从[huggingface](https://huggingface.co)下载模型windows需要使用**管理员模式**打开IDE运行脚本并按需换源
- 第一次使用该库读取、翻译非markdown文本
- 第一次使用该库的公式识别或代码识别功能
=======
""")
def _mask_uris_in_markdown(self): def _mask_uris_in_markdown(self):
self.markdown = uris2placeholder(self.markdown, self._mask_dict) self.markdown = uris2placeholder(self.markdown, self._mask_dict)
return self return self