fix
This commit is contained in:
75
.idea/workspace.xml
generated
75
.idea/workspace.xml
generated
@@ -5,7 +5,11 @@
|
|||||||
</component>
|
</component>
|
||||||
<component name="ChangeListManager">
|
<component name="ChangeListManager">
|
||||||
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
|
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
|
||||||
|
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||||
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
|
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/docutranslate/__init__.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/__init__.py" afterDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/docutranslate/utils/agent_utils.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/agent_utils.py" afterDir="false" />
|
||||||
|
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
|
||||||
</list>
|
</list>
|
||||||
<option name="SHOW_DIALOG" value="false" />
|
<option name="SHOW_DIALOG" value="false" />
|
||||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||||
@@ -31,38 +35,38 @@
|
|||||||
<option name="hideEmptyMiddlePackages" value="true" />
|
<option name="hideEmptyMiddlePackages" value="true" />
|
||||||
<option name="showLibraryContents" value="true" />
|
<option name="showLibraryContents" value="true" />
|
||||||
</component>
|
</component>
|
||||||
<component name="PropertiesComponent">{
|
<component name="PropertiesComponent"><![CDATA[{
|
||||||
"keyToString": {
|
"keyToString": {
|
||||||
"DefaultHtmlFileTemplate": "HTML File",
|
"DefaultHtmlFileTemplate": "HTML File",
|
||||||
"JavaScript 调试.output.html (1).executor": "Run",
|
"JavaScript 调试.output.html (1).executor": "Run",
|
||||||
"JavaScript 调试.output.html.executor": "Run",
|
"JavaScript 调试.output.html.executor": "Run",
|
||||||
"JavaScript 调试.regex_中文.html.executor": "Run",
|
"JavaScript 调试.regex_中文.html.executor": "Run",
|
||||||
"JavaScript 调试.test2_英文.html.executor": "Run",
|
"JavaScript 调试.test2_英文.html.executor": "Run",
|
||||||
"ModuleVcsDetector.initialDetectionPerformed": "true",
|
"ModuleVcsDetector.initialDetectionPerformed": "true",
|
||||||
"Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
|
"Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
|
||||||
"Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
|
"Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
|
||||||
"Python.PDFtranslater (1).executor": "Run",
|
"Python.PDFtranslater (1).executor": "Run",
|
||||||
"Python.PDFtranslater (2).executor": "Run",
|
"Python.PDFtranslater (2).executor": "Run",
|
||||||
"Python.agent_utils.executor": "Run",
|
"Python.agent_utils.executor": "Run",
|
||||||
"Python.convert.executor": "Run",
|
"Python.convert.executor": "Run",
|
||||||
"Python.markdown_splitter.executor": "Run",
|
"Python.markdown_splitter.executor": "Run",
|
||||||
"Python.markdown_utils.executor": "Run",
|
"Python.markdown_utils.executor": "Run",
|
||||||
"Python.test.executor": "Run",
|
"Python.test.executor": "Run",
|
||||||
"Python.test1.executor": "Run",
|
"Python.test1.executor": "Run",
|
||||||
"Python.translater.executor": "Debug",
|
"Python.translater.executor": "Debug",
|
||||||
"RunOnceActivity.ShowReadmeOnStart": "true",
|
"RunOnceActivity.ShowReadmeOnStart": "true",
|
||||||
"RunOnceActivity.git.unshallow": "true",
|
"RunOnceActivity.git.unshallow": "true",
|
||||||
"git-widget-placeholder": "master",
|
"git-widget-placeholder": "main",
|
||||||
"last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/tests/resource",
|
"last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/tests/resource",
|
||||||
"node.js.detected.package.eslint": "true",
|
"node.js.detected.package.eslint": "true",
|
||||||
"node.js.detected.package.tslint": "true",
|
"node.js.detected.package.tslint": "true",
|
||||||
"node.js.selected.package.eslint": "(autodetect)",
|
"node.js.selected.package.eslint": "(autodetect)",
|
||||||
"node.js.selected.package.tslint": "(autodetect)",
|
"node.js.selected.package.tslint": "(autodetect)",
|
||||||
"nodejs_package_manager_path": "npm",
|
"nodejs_package_manager_path": "npm",
|
||||||
"settings.editor.selected.configurable": "Errors",
|
"settings.editor.selected.configurable": "Errors",
|
||||||
"vue.rearranger.settings.migration": "true"
|
"vue.rearranger.settings.migration": "true"
|
||||||
}
|
}
|
||||||
}</component>
|
}]]></component>
|
||||||
<component name="RecentsManager">
|
<component name="RecentsManager">
|
||||||
<key name="CopyFile.RECENT_KEYS">
|
<key name="CopyFile.RECENT_KEYS">
|
||||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\resource" />
|
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\resource" />
|
||||||
@@ -75,7 +79,7 @@
|
|||||||
<recent name="C:\Users\jxgm\Desktop\PDFtranslate\pdf" />
|
<recent name="C:\Users\jxgm\Desktop\PDFtranslate\pdf" />
|
||||||
</key>
|
</key>
|
||||||
</component>
|
</component>
|
||||||
<component name="RunManager" selected="JavaScript 调试.regex_中文.html">
|
<component name="RunManager" selected="Python.test1">
|
||||||
<configuration default="true" type="DjangoTestsConfigurationType">
|
<configuration default="true" type="DjangoTestsConfigurationType">
|
||||||
<module name="filetranslate" />
|
<module name="filetranslate" />
|
||||||
<option name="ENV_FILES" value="" />
|
<option name="ENV_FILES" value="" />
|
||||||
@@ -274,8 +278,8 @@
|
|||||||
</configuration>
|
</configuration>
|
||||||
<recent_temporary>
|
<recent_temporary>
|
||||||
<list>
|
<list>
|
||||||
<item itemvalue="JavaScript 调试.regex_中文.html" />
|
|
||||||
<item itemvalue="Python.test1" />
|
<item itemvalue="Python.test1" />
|
||||||
|
<item itemvalue="JavaScript 调试.regex_中文.html" />
|
||||||
<item itemvalue="JavaScript 调试.output.html (1)" />
|
<item itemvalue="JavaScript 调试.output.html (1)" />
|
||||||
<item itemvalue="JavaScript 调试.output.html" />
|
<item itemvalue="JavaScript 调试.output.html" />
|
||||||
<item itemvalue="Python.PDFtranslater (2)" />
|
<item itemvalue="Python.PDFtranslater (2)" />
|
||||||
@@ -301,7 +305,8 @@
|
|||||||
<workItem from="1746588383790" duration="2614000" />
|
<workItem from="1746588383790" duration="2614000" />
|
||||||
<workItem from="1746593417117" duration="25924000" />
|
<workItem from="1746593417117" duration="25924000" />
|
||||||
<workItem from="1746626070703" duration="7931000" />
|
<workItem from="1746626070703" duration="7931000" />
|
||||||
<workItem from="1746669839816" duration="20338000" />
|
<workItem from="1746669839816" duration="20657000" />
|
||||||
|
<workItem from="1746693297445" duration="538000" />
|
||||||
</task>
|
</task>
|
||||||
<servers />
|
<servers />
|
||||||
</component>
|
</component>
|
||||||
@@ -311,7 +316,7 @@
|
|||||||
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
||||||
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746677277745" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746677277745" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||||
<SUITE FILE_PATH="coverage/PDFtranslate$translater.coverage" NAME="translater 覆盖结果" MODIFIED="1746600434803" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
|
<SUITE FILE_PATH="coverage/PDFtranslate$translater.coverage" NAME="translater 覆盖结果" MODIFIED="1746600434803" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
|
||||||
<SUITE FILE_PATH="coverage/filetranslate$test1.coverage" NAME="test1 覆盖结果" MODIFIED="1746683792124" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
<SUITE FILE_PATH="coverage/filetranslate$test1.coverage" NAME="test1 覆盖结果" MODIFIED="1746693789697" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||||
<SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
|
<SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
|
||||||
<SUITE FILE_PATH="coverage/PDFtranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746596984213" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
|
<SUITE FILE_PATH="coverage/PDFtranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746596984213" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
|
||||||
<SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
|
<SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
|
||||||
|
|||||||
15
README.md
15
README.md
@@ -21,12 +21,15 @@
|
|||||||
```python
|
```python
|
||||||
from docutranslate.translater import FileTranslater
|
from docutranslate.translater import FileTranslater
|
||||||
|
|
||||||
|
|
||||||
|
translater=FileTranslater(base_url="<baseurl>",
|
||||||
|
key="<key>",
|
||||||
|
model_id="<model-id>")
|
||||||
# 不开启公式、代码识别
|
# 不开启公式、代码识别
|
||||||
FileTranslater(base_url="<baseurl>", key="<key>", model_id="<model-id>").translate_pdf_file("<pdf路径>", to_lang="中文")
|
translater.translate_pdf_file("<pdf路径>", to_lang="中文")
|
||||||
|
|
||||||
# 开启公式、代码识别(需要下载更多模型)
|
# 开启公式、代码识别(需要下载更多模型)
|
||||||
FileTranslater(base_url="<baseurl>", key="<key>", model_id="<model-id>").translate_pdf_file("<pdf路径>", to_lang="中文",
|
translater.translate_pdf_file("<pdf路径>", to_lang="中文",formula=True, code=True)
|
||||||
formula=True, code=True)
|
|
||||||
```
|
```
|
||||||
> 第一次使用时需要下载模型(约1G、使用公式、代码识别需要多约0.5G),请稍作等待
|
> 第一次使用时需要下载模型(约1G、使用公式、代码识别需要多约0.5G),请稍作等待
|
||||||
> 输出文件默认放在`./output`中
|
> 输出文件默认放在`./output`中
|
||||||
@@ -34,7 +37,7 @@ FileTranslater(base_url="<baseurl>", key="<key>", model_id="<model-id>").transla
|
|||||||
## 使用不同的agent分别进行文本修正和翻译
|
## 使用不同的agent分别进行文本修正和翻译
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from docutranslate.translater import FileTranslater
|
from docutranslate import FileTranslater
|
||||||
|
|
||||||
translater = FileTranslater()
|
translater = FileTranslater()
|
||||||
|
|
||||||
@@ -49,7 +52,7 @@ translater.translate_pdf_file(pdf_path="<pdf路径>", to_lang="中文", refine_a
|
|||||||
### 创建FileTranslate
|
### 创建FileTranslate
|
||||||
|
|
||||||
```python
|
```python
|
||||||
from docutranslate.translater import FileTranslater
|
from docutranslate import FileTranslater
|
||||||
|
|
||||||
translater = FileTranslater(base_url="<baseurl>",
|
translater = FileTranslater(base_url="<baseurl>",
|
||||||
key="<key>",
|
key="<key>",
|
||||||
@@ -84,7 +87,7 @@ translater.translate_markdown_file(r"<要翻译的markdown路径>",
|
|||||||
|
|
||||||
# 常用baseurl
|
# 常用baseurl
|
||||||
| 平台名称 | baseurl |
|
| 平台名称 | baseurl |
|
||||||
|-----------|--------------------------------------|
|
|----------|--------------------------------------|
|
||||||
| ollama | http://127.0.0.1:11434/v1 |
|
| ollama | http://127.0.0.1:11434/v1 |
|
||||||
| lm studio | http://127.0.0.1:1234/v1 |
|
| lm studio | http://127.0.0.1:1234/v1 |
|
||||||
| openai | https://api.openai.com/v1/ |
|
| openai | https://api.openai.com/v1/ |
|
||||||
|
|||||||
@@ -0,0 +1 @@
|
|||||||
|
from .translater import FileTranslater
|
||||||
@@ -2,6 +2,7 @@ import asyncio
|
|||||||
|
|
||||||
import httpx
|
import httpx
|
||||||
|
|
||||||
|
TIMEOUT=250
|
||||||
|
|
||||||
class Agent:
|
class Agent:
|
||||||
def __init__(self, baseurl="", key="", model_id="", system_prompt="", temperature=0.7, max_concurrent=5):
|
def __init__(self, baseurl="", key="", model_id="", system_prompt="", temperature=0.7, max_concurrent=5):
|
||||||
@@ -30,7 +31,7 @@ class Agent:
|
|||||||
}
|
}
|
||||||
return headers, data
|
return headers, data
|
||||||
|
|
||||||
# def send_prompt(self,prompt,system_prompt=None,timeout=50):
|
# def send_prompt(self,prompt,system_prompt=None,timeout=TIMEOUT):
|
||||||
# if system_prompt is None:
|
# if system_prompt is None:
|
||||||
# system_prompt=self.system_prompt
|
# system_prompt=self.system_prompt
|
||||||
# headers,data=self._prepare_request_data(prompt,system_prompt)
|
# headers,data=self._prepare_request_data(prompt,system_prompt)
|
||||||
@@ -38,7 +39,7 @@ class Agent:
|
|||||||
# response.raise_for_status()
|
# response.raise_for_status()
|
||||||
# return response.json()["choices"][0]["message"]["content"].lstrip()
|
# return response.json()["choices"][0]["message"]["content"].lstrip()
|
||||||
|
|
||||||
async def send_async(self, prompt: str, system_prompt: None | str = None, timeout: int = 200) -> str:
|
async def send_async(self, prompt: str, system_prompt: None | str = None, timeout: int = TIMEOUT) -> str:
|
||||||
if system_prompt is None:
|
if system_prompt is None:
|
||||||
system_prompt = self.system_prompt
|
system_prompt = self.system_prompt
|
||||||
"""Sends a single prompt asynchronously."""
|
"""Sends a single prompt asynchronously."""
|
||||||
@@ -64,7 +65,7 @@ class Agent:
|
|||||||
self,
|
self,
|
||||||
prompts: list[str],
|
prompts: list[str],
|
||||||
system_prompt: str | None = None,
|
system_prompt: str | None = None,
|
||||||
timeout: int = 50,
|
timeout: int = TIMEOUT,
|
||||||
max_concurrent: int = 5 # 新增参数,默认并发数为5
|
max_concurrent: int = 5 # 新增参数,默认并发数为5
|
||||||
) -> list[str]:
|
) -> list[str]:
|
||||||
total = len(prompts)
|
total = len(prompts)
|
||||||
@@ -81,7 +82,7 @@ class Agent:
|
|||||||
result = await self.send_async(
|
result = await self.send_async(
|
||||||
prompt=p_text,
|
prompt=p_text,
|
||||||
system_prompt=system_prompt,
|
system_prompt=system_prompt,
|
||||||
timeout=timeout
|
timeout=TIMEOUT
|
||||||
)
|
)
|
||||||
nonlocal count
|
nonlocal count
|
||||||
count += 1
|
count += 1
|
||||||
@@ -99,7 +100,7 @@ class Agent:
|
|||||||
self,
|
self,
|
||||||
prompts: list[str],
|
prompts: list[str],
|
||||||
system_prompt: str | None = None,
|
system_prompt: str | None = None,
|
||||||
timeout: int = 50,
|
timeout: int = TIMEOUT,
|
||||||
) -> list[str]:
|
) -> list[str]:
|
||||||
result = asyncio.run(self.send_prompts_async(prompts, system_prompt, timeout, self.max_concurrent))
|
result = asyncio.run(self.send_prompts_async(prompts, system_prompt, timeout, self.max_concurrent))
|
||||||
return result
|
return result
|
||||||
|
|||||||
@@ -8,5 +8,6 @@ dependencies = [
|
|||||||
"docling>=2.31.0",
|
"docling>=2.31.0",
|
||||||
"httpx>=0.28.1",
|
"httpx>=0.28.1",
|
||||||
"markdown2>=2.5.3",
|
"markdown2>=2.5.3",
|
||||||
|
"transformers>=4.42.4",
|
||||||
]
|
]
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user