取消回复前置符

This commit is contained in:
xunbu
2025-05-09 20:38:16 +08:00
parent f5502d73d5
commit dadab6057f
6 changed files with 78 additions and 66 deletions

84
.idea/workspace.xml generated
View File

@@ -7,7 +7,10 @@
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment=""> <list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" /> <change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" /> <change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/Agents/agent.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/Agents/agent.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/Agents/markdown_agent.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/Agents/markdown_agent.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/docutranslate/translater.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/translater.py" afterDir="false" /> <change beforePath="$PROJECT_DIR$/docutranslate/translater.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/translater.py" afterDir="false" />
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
</list> </list>
<option name="SHOW_DIALOG" value="false" /> <option name="SHOW_DIALOG" value="false" />
<option name="HIGHLIGHT_CONFLICTS" value="true" /> <option name="HIGHLIGHT_CONFLICTS" value="true" />
@@ -33,43 +36,43 @@
<option name="hideEmptyMiddlePackages" value="true" /> <option name="hideEmptyMiddlePackages" value="true" />
<option name="showLibraryContents" value="true" /> <option name="showLibraryContents" value="true" />
</component> </component>
<component name="PropertiesComponent">{ <component name="PropertiesComponent"><![CDATA[{
&quot;keyToString&quot;: { "keyToString": {
&quot;DefaultHtmlFileTemplate&quot;: &quot;HTML File&quot;, "DefaultHtmlFileTemplate": "HTML File",
&quot;JavaScript 调试.output.html (1).executor&quot;: &quot;Run&quot;, "JavaScript 调试.output.html (1).executor": "Run",
&quot;JavaScript 调试.output.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.output.html.executor": "Run",
&quot;JavaScript 调试.regex_中文.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.regex_中文.html.executor": "Run",
&quot;JavaScript 调试.test2_英文.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.test2_英文.html.executor": "Run",
&quot;JavaScript 调试.test4-1_中文.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.test4-1_中文.html.executor": "Run",
&quot;JavaScript 调试.互联网认证授权机制_英文.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run",
&quot;JavaScript 调试.毕业论文_英文.html.executor&quot;: &quot;Run&quot;, "JavaScript 调试.毕业论文_英文.html.executor": "Run",
&quot;ModuleVcsDetector.initialDetectionPerformed&quot;: &quot;true&quot;, "ModuleVcsDetector.initialDetectionPerformed": "true",
&quot;Python 测试.Python 测试 (markdown_mask.py 内).executor&quot;: &quot;Run&quot;, "Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
&quot;Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor&quot;: &quot;Run&quot;, "Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
&quot;Python.PDFtranslater (1).executor&quot;: &quot;Run&quot;, "Python.PDFtranslater (1).executor": "Run",
&quot;Python.PDFtranslater (2).executor&quot;: &quot;Run&quot;, "Python.PDFtranslater (2).executor": "Run",
&quot;Python.agent_utils.executor&quot;: &quot;Run&quot;, "Python.agent_utils.executor": "Run",
&quot;Python.convert.executor&quot;: &quot;Run&quot;, "Python.convert.executor": "Run",
&quot;Python.markdown_splitter.executor&quot;: &quot;Run&quot;, "Python.markdown_splitter.executor": "Run",
&quot;Python.markdown_utils.executor&quot;: &quot;Run&quot;, "Python.markdown_utils.executor": "Run",
&quot;Python.test.executor&quot;: &quot;Run&quot;, "Python.test.executor": "Run",
&quot;Python.test1.executor&quot;: &quot;Run&quot;, "Python.test1.executor": "Run",
&quot;Python.test2.executor&quot;: &quot;Run&quot;, "Python.test2.executor": "Run",
&quot;Python.test3.executor&quot;: &quot;Run&quot;, "Python.test3.executor": "Run",
&quot;Python.translater.executor&quot;: &quot;Run&quot;, "Python.translater.executor": "Run",
&quot;RunOnceActivity.ShowReadmeOnStart&quot;: &quot;true&quot;, "RunOnceActivity.ShowReadmeOnStart": "true",
&quot;RunOnceActivity.git.unshallow&quot;: &quot;true&quot;, "RunOnceActivity.git.unshallow": "true",
&quot;git-widget-placeholder&quot;: &quot;main&quot;, "git-widget-placeholder": "main",
&quot;last_opened_file_path&quot;: &quot;C:/Users/jxgm/Desktop/FileTranslate/tests/resource&quot;, "last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/tests/resource",
&quot;node.js.detected.package.eslint&quot;: &quot;true&quot;, "node.js.detected.package.eslint": "true",
&quot;node.js.detected.package.tslint&quot;: &quot;true&quot;, "node.js.detected.package.tslint": "true",
&quot;node.js.selected.package.eslint&quot;: &quot;(autodetect)&quot;, "node.js.selected.package.eslint": "(autodetect)",
&quot;node.js.selected.package.tslint&quot;: &quot;(autodetect)&quot;, "node.js.selected.package.tslint": "(autodetect)",
&quot;nodejs_package_manager_path&quot;: &quot;npm&quot;, "nodejs_package_manager_path": "npm",
&quot;settings.editor.selected.configurable&quot;: &quot;Errors&quot;, "settings.editor.selected.configurable": "preferences.general",
&quot;vue.rearranger.settings.migration&quot;: &quot;true&quot; "vue.rearranger.settings.migration": "true"
} }
}</component> }]]></component>
<component name="RecentsManager"> <component name="RecentsManager">
<key name="CopyFile.RECENT_KEYS"> <key name="CopyFile.RECENT_KEYS">
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\resource" /> <recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\resource" />
@@ -369,6 +372,13 @@
<workItem from="1746780247620" duration="1776000" /> <workItem from="1746780247620" duration="1776000" />
<workItem from="1746782039257" duration="307000" /> <workItem from="1746782039257" duration="307000" />
<workItem from="1746782370978" duration="3979000" /> <workItem from="1746782370978" duration="3979000" />
<workItem from="1746786481394" duration="90000" />
<workItem from="1746787384154" duration="19000" />
<workItem from="1746787486668" duration="68000" />
<workItem from="1746787566021" duration="77000" />
<workItem from="1746787698072" duration="24000" />
<workItem from="1746788668813" duration="25000" />
<workItem from="1746791230782" duration="2932000" />
</task> </task>
<servers /> <servers />
</component> </component>
@@ -379,7 +389,7 @@
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746677277745" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746677277745" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/filetranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746708534311" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" /> <SUITE FILE_PATH="coverage/filetranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746708534311" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
<SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746780691113" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" /> <SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746780691113" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
<SUITE FILE_PATH="coverage/filetranslate$test1.coverage" NAME="test1 覆盖结果" MODIFIED="1746785955738" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" /> <SUITE FILE_PATH="coverage/filetranslate$test1.coverage" NAME="test1 覆盖结果" MODIFIED="1746793348041" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
<SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" /> <SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
<SUITE FILE_PATH="coverage/PDFtranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746596984213" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" /> <SUITE FILE_PATH="coverage/PDFtranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746596984213" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
<SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" /> <SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />

View File

@@ -52,7 +52,8 @@ os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
由于需要使用大语言模型进行markdown调整与翻译所以需要预先获取模型的baseurl、key、model-id 由于需要使用大语言模型进行markdown调整与翻译所以需要预先获取模型的baseurl、key、model-id
常见的大模型平台baseurl与api获取方式可见[常用ai平台](#常用ai平台) 常见的大模型平台baseurl与api获取方式可见[常用ai平台](#常用ai平台)
> 比较推荐的模型有阿里云的qwen-plus、智谱的glm-4-air、glm-z1-flash等。免费的智谱glm-4-flash能用但效果欠佳(2025.5) > 比较推荐的模型有阿里云的qwen-plus、智谱的glm-4-air等。免费的智谱glm-4-flash能用(2025.5)
> 推理模型需要支持api请求响应中区分`reasoning_content`和`content`详见平台开发手册ollama、lmstudio需开启对应选项
# 使用方式 # 使用方式
@@ -138,7 +139,7 @@ translater.translate_file(r"<要翻译的文件路径>",
to_lang="中文", to_lang="中文",
formula=False, # 是否启用公式识别 formula=False, # 是否启用公式识别
code=False, # 是否启用代码识别 code=False, # 是否启用代码识别
refine=True, # 是否在翻译前先修正markdown文本较耗时 refine=False, # 是否在翻译前先修正一遍markdown文本较耗时
output_format="markdown", # "markdown"与"html"两种输出格式 output_format="markdown", # "markdown"与"html"两种输出格式
output_dir="./output", # 默认输出文件夹 output_dir="./output", # 默认输出文件夹
refine_agent=None, # 修正Agent refine_agent=None, # 修正Agent

View File

@@ -1,20 +1,22 @@
import asyncio import asyncio
import re # import re
from typing import TypedDict from typing import TypedDict
import httpx import httpx
class AgentArgs(TypedDict, total=False): class AgentArgs(TypedDict, total=False):
baseurl:str baseurl: str
key :str key: str
model_id:str model_id: str
system_prompt:str system_prompt: str
temperature:float temperature: float
max_concurrent:int max_concurrent: int
TIMEOUT=250 TIMEOUT = 250
class Agent: class Agent:
def __init__(self, baseurl="", key="", model_id="", system_prompt="", temperature=0.7, max_concurrent=6): def __init__(self, baseurl="", key="", model_id="", system_prompt="", temperature=0.7, max_concurrent=6):
@@ -26,7 +28,7 @@ class Agent:
self.client_async = httpx.AsyncClient() self.client_async = httpx.AsyncClient()
self.max_concurrent = max_concurrent self.max_concurrent = max_concurrent
def _prepare_request_data(self, prompt:str, system_prompt:str, temperature=None, top_p=0.9): def _prepare_request_data(self, prompt: str, system_prompt: str, temperature=None, top_p=0.9):
if temperature is None: if temperature is None:
temperature = self.temperature temperature = self.temperature
headers = {"Content-Type": "application/json", headers = {"Content-Type": "application/json",
@@ -34,7 +36,8 @@ class Agent:
data = { data = {
"model": self.model_id, "model": self.model_id,
"messages": [ "messages": [
{"role": "system", "content": "重要所有回复必须以【SSS】开头该规则适用于之后的所有例子。示例【SSS】这是示例回答\n"+system_prompt}, {"role": "system", "content": system_prompt},
# {"role": "system", "content": "所有回复必须以【SSS】开头这是最高规则适用于之后的所有例子。示例【SSS】这是示例回答\n"+system_prompt},
{"role": "user", "content": prompt} {"role": "user", "content": prompt}
], ],
"temperature": temperature, "temperature": temperature,
@@ -48,7 +51,7 @@ class Agent:
"""Sends a single prompt asynchronously.""" """Sends a single prompt asynchronously."""
headers, data = self._prepare_request_data(prompt, system_prompt) headers, data = self._prepare_request_data(prompt, system_prompt)
if self.baseurl.endswith("/"): if self.baseurl.endswith("/"):
self.baseurl=self.baseurl[:-1] self.baseurl = self.baseurl[:-1]
try: try:
response = await self.client_async.post( response = await self.client_async.post(
f"{self.baseurl}/chat/completions", f"{self.baseurl}/chat/completions",
@@ -57,13 +60,13 @@ class Agent:
timeout=timeout timeout=timeout
) )
response.raise_for_status() response.raise_for_status()
result=response.json()["choices"][0]["message"]["content"] result = response.json()["choices"][0]["message"]["content"]
pattern=r".*【SSS】(.*)" # pattern = r".*【SSS】(.*)"
match= re.search(pattern,result, re.DOTALL) # match = re.search(pattern, result, re.DOTALL)
if match is None: # if match is None:
print("检测开头`【SSS】`失败") # print("检测开头`【SSS】`失败")
else: # else:
result=match.group(1) # result = match.group(1)
return result return result
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e
@@ -81,9 +84,6 @@ class Agent:
) -> list[str]: ) -> list[str]:
total = len(prompts) total = len(prompts)
count = 0 count = 0
"""
Sends multiple prompts asynchronously, limiting concurrent requests.
"""
semaphore = asyncio.Semaphore(max_concurrent) semaphore = asyncio.Semaphore(max_concurrent)
tasks = [] tasks = []

View File

@@ -10,9 +10,9 @@ class MDRefineAgent(Agent):
你是一个修正markdown文本的专家。 你是一个修正markdown文本的专家。
# 工作 # 工作
找到markdown片段的不合理之处对于缺失的句子应该查看缺失的语句是否可能被错误的放在了其他位置并通过重组段落、去掉异常字词修复不合理之处。 找到markdown片段的不合理之处对于缺失的句子应该查看缺失的语句是否可能被错误的放在了其他位置并通过重组段落、去掉异常字词修复不合理之处。
尽量忠实于原文。形如<ph-abc123>的占位符不要改变。code和latex保持原文。 尽量忠实于原文。输入文本开头和结尾如有空行请保留,形如<ph-abc123>的占位符不要改变。code和latex保持原文。
# 输出 # 输出
修正后的markdown纯文本 修正后的markdown纯文本(不能有多余文字)
# 示例 # 示例
## 调整顺序 ## 调整顺序
输入: 输入:
@@ -35,11 +35,12 @@ class MDTranslateAgent(Agent):
你是一个翻译markdown文本的专家。 你是一个翻译markdown文本的专家。
# 工作 # 工作
将输入的markdown文本翻译成{to_lang} 将输入的markdown文本翻译成{to_lang}
尽量忠实于原文(如空行) 尽量忠实于原文,修改明显错误的字符
输入文本开头和结尾如有空行请保留。
形如<ph-abc123>的占位符不要改变。 形如<ph-abc123>的占位符不要改变。
code和latex保持原文。 code和latex保持原文。
# 输出 # 输出
翻译后的markdown纯文本 翻译后的markdown纯文本(不能有多余文字)
# 示例 # 示例
## 英文翻译为中文: ## 英文翻译为中文:
输入: 输入:

View File

@@ -190,7 +190,7 @@ class FileTranslater:
def translate_file(self, file_path: Path | str | None = None, to_lang="中文", output_dir="./output", def translate_file(self, file_path: Path | str | None = None, to_lang="中文", output_dir="./output",
formula=False, formula=False,
code=False, output_format: Literal["markdown", "html"] = "markdown", refine=True, code=False, output_format: Literal["markdown", "html"] = "markdown", refine=False,
refine_agent: Agent | None = None, translate_agent: Agent | None = None): refine_agent: Agent | None = None, translate_agent: Agent | None = None):
if file_path is None: if file_path is None:
assert self.file_path is not None, "未输入文件路径" assert self.file_path is not None, "未输入文件路径"

View File

@@ -1,6 +1,6 @@
[project] [project]
name = "docutranslate" name = "docutranslate"
version = "0.1.0" version = "0.1.1"
description = "文件翻译工具" description = "文件翻译工具"
readme = "README.md" readme = "README.md"
requires-python = ">=3.10" requires-python = ">=3.10"