diff --git a/.idea/workspace.xml b/.idea/workspace.xml
index 43121af..3077ad6 100644
--- a/.idea/workspace.xml
+++ b/.idea/workspace.xml
@@ -7,7 +7,10 @@
+
+
+
@@ -33,43 +36,43 @@
- {
- "keyToString": {
- "DefaultHtmlFileTemplate": "HTML File",
- "JavaScript 调试.output.html (1).executor": "Run",
- "JavaScript 调试.output.html.executor": "Run",
- "JavaScript 调试.regex_中文.html.executor": "Run",
- "JavaScript 调试.test2_英文.html.executor": "Run",
- "JavaScript 调试.test4-1_中文.html.executor": "Run",
- "JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run",
- "JavaScript 调试.毕业论文_英文.html.executor": "Run",
- "ModuleVcsDetector.initialDetectionPerformed": "true",
- "Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
- "Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
- "Python.PDFtranslater (1).executor": "Run",
- "Python.PDFtranslater (2).executor": "Run",
- "Python.agent_utils.executor": "Run",
- "Python.convert.executor": "Run",
- "Python.markdown_splitter.executor": "Run",
- "Python.markdown_utils.executor": "Run",
- "Python.test.executor": "Run",
- "Python.test1.executor": "Run",
- "Python.test2.executor": "Run",
- "Python.test3.executor": "Run",
- "Python.translater.executor": "Run",
- "RunOnceActivity.ShowReadmeOnStart": "true",
- "RunOnceActivity.git.unshallow": "true",
- "git-widget-placeholder": "main",
- "last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/tests/resource",
- "node.js.detected.package.eslint": "true",
- "node.js.detected.package.tslint": "true",
- "node.js.selected.package.eslint": "(autodetect)",
- "node.js.selected.package.tslint": "(autodetect)",
- "nodejs_package_manager_path": "npm",
- "settings.editor.selected.configurable": "Errors",
- "vue.rearranger.settings.migration": "true"
+
+}]]>
@@ -369,6 +372,13 @@
+
+
+
+
+
+
+
@@ -379,7 +389,7 @@
-
+
diff --git a/README.md b/README.md
index a20094d..d822f4a 100644
--- a/README.md
+++ b/README.md
@@ -52,7 +52,8 @@ os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
由于需要使用大语言模型进行markdown调整与翻译,所以需要预先获取模型的baseurl、key、model-id
常见的大模型平台baseurl与api获取方式可见[常用ai平台](#常用ai平台)
-> 比较推荐的模型有阿里云的qwen-plus、智谱的glm-4-air、glm-z1-flash等。免费的智谱glm-4-flash能用但效果欠佳(2025.5)
+> 比较推荐的模型有阿里云的qwen-plus、智谱的glm-4-air等。免费的智谱glm-4-flash也能用(2025.5)。
+> 推理模型需要支持api请求响应中区分`reasoning_content`和`content`(详见平台开发手册,ollama、lmstudio需开启对应选项)
# 使用方式
@@ -138,7 +139,7 @@ translater.translate_file(r"<要翻译的文件路径>",
to_lang="中文",
formula=False, # 是否启用公式识别
code=False, # 是否启用代码识别
- refine=True, # 是否在翻译前先修正markdown文本(较耗时)
+ refine=False, # 是否在翻译前先修正一遍markdown文本(较耗时)
output_format="markdown", # "markdown"与"html"两种输出格式
output_dir="./output", # 默认输出文件夹
refine_agent=None, # 修正Agent
diff --git a/docutranslate/Agents/agent.py b/docutranslate/Agents/agent.py
index a00d2be..6b9377c 100644
--- a/docutranslate/Agents/agent.py
+++ b/docutranslate/Agents/agent.py
@@ -1,20 +1,22 @@
import asyncio
-import re
+# import re
from typing import TypedDict
import httpx
+
class AgentArgs(TypedDict, total=False):
- baseurl:str
- key :str
- model_id:str
- system_prompt:str
- temperature:float
- max_concurrent:int
+ baseurl: str
+ key: str
+ model_id: str
+ system_prompt: str
+ temperature: float
+ max_concurrent: int
-TIMEOUT=250
+TIMEOUT = 250
+
class Agent:
def __init__(self, baseurl="", key="", model_id="", system_prompt="", temperature=0.7, max_concurrent=6):
@@ -26,7 +28,7 @@ class Agent:
self.client_async = httpx.AsyncClient()
self.max_concurrent = max_concurrent
- def _prepare_request_data(self, prompt:str, system_prompt:str, temperature=None, top_p=0.9):
+ def _prepare_request_data(self, prompt: str, system_prompt: str, temperature=None, top_p=0.9):
if temperature is None:
temperature = self.temperature
headers = {"Content-Type": "application/json",
@@ -34,7 +36,8 @@ class Agent:
data = {
"model": self.model_id,
"messages": [
- {"role": "system", "content": "重要:所有回复必须以【SSS】开头(该规则适用于之后的所有例子)。示例:【SSS】这是示例回答\n"+system_prompt},
+ {"role": "system", "content": system_prompt},
+ # {"role": "system", "content": "所有回复必须以【SSS】开头(这是最高规则,适用于之后的所有例子)。示例:【SSS】这是示例回答\n"+system_prompt},
{"role": "user", "content": prompt}
],
"temperature": temperature,
@@ -48,7 +51,7 @@ class Agent:
"""Sends a single prompt asynchronously."""
headers, data = self._prepare_request_data(prompt, system_prompt)
if self.baseurl.endswith("/"):
- self.baseurl=self.baseurl[:-1]
+ self.baseurl = self.baseurl[:-1]
try:
response = await self.client_async.post(
f"{self.baseurl}/chat/completions",
@@ -57,13 +60,13 @@ class Agent:
timeout=timeout
)
response.raise_for_status()
- result=response.json()["choices"][0]["message"]["content"]
- pattern=r".*【SSS】(.*)"
- match= re.search(pattern,result, re.DOTALL)
- if match is None:
- print("检测开头`【SSS】`失败")
- else:
- result=match.group(1)
+ result = response.json()["choices"][0]["message"]["content"]
+ # pattern = r".*【SSS】(.*)"
+ # match = re.search(pattern, result, re.DOTALL)
+ # if match is None:
+ # print("检测开头`【SSS】`失败")
+ # else:
+ # result = match.group(1)
return result
except httpx.HTTPStatusError as e:
raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e
@@ -81,9 +84,6 @@ class Agent:
) -> list[str]:
total = len(prompts)
count = 0
- """
- Sends multiple prompts asynchronously, limiting concurrent requests.
- """
semaphore = asyncio.Semaphore(max_concurrent)
tasks = []
diff --git a/docutranslate/Agents/markdown_agent.py b/docutranslate/Agents/markdown_agent.py
index 1112120..417368d 100644
--- a/docutranslate/Agents/markdown_agent.py
+++ b/docutranslate/Agents/markdown_agent.py
@@ -10,9 +10,9 @@ class MDRefineAgent(Agent):
你是一个修正markdown文本的专家。
# 工作
找到markdown片段的不合理之处,对于缺失的句子,应该查看缺失的语句是否可能被错误的放在了其他位置,并通过重组段落、去掉异常字词修复不合理之处。
-尽量忠实于原文。形如的占位符不要改变。code和latex保持原文。
+尽量忠实于原文。输入文本开头和结尾如有空行请保留,形如的占位符不要改变。code和latex保持原文。
# 输出
-修正后的markdown纯文本
+修正后的markdown纯文本(不能有多余文字)
# 示例
## 调整顺序
输入:
@@ -35,11 +35,12 @@ class MDTranslateAgent(Agent):
你是一个翻译markdown文本的专家。
# 工作
将输入的markdown文本翻译成{to_lang}。
-尽量忠实于原文(如空行)。
+尽量忠实于原文,修改明显错误的字符。
+输入文本开头和结尾如有空行请保留。
形如的占位符不要改变。
code和latex保持原文。
# 输出
-翻译后的markdown纯文本
+翻译后的markdown纯文本(不能有多余文字)
# 示例
## 英文翻译为中文:
输入:
diff --git a/docutranslate/translater.py b/docutranslate/translater.py
index bf3802d..1be2e97 100644
--- a/docutranslate/translater.py
+++ b/docutranslate/translater.py
@@ -190,7 +190,7 @@ class FileTranslater:
def translate_file(self, file_path: Path | str | None = None, to_lang="中文", output_dir="./output",
formula=False,
- code=False, output_format: Literal["markdown", "html"] = "markdown", refine=True,
+ code=False, output_format: Literal["markdown", "html"] = "markdown", refine=False,
refine_agent: Agent | None = None, translate_agent: Agent | None = None):
if file_path is None:
assert self.file_path is not None, "未输入文件路径"
diff --git a/pyproject.toml b/pyproject.toml
index 434da1a..cad0378 100644
--- a/pyproject.toml
+++ b/pyproject.toml
@@ -1,6 +1,6 @@
[project]
name = "docutranslate"
-version = "0.1.0"
+version = "0.1.1"
description = "文件翻译工具"
readme = "README.md"
requires-python = ">=3.10"