From bdf73212197eef029eca7188c35c98fffd76d8f9 Mon Sep 17 00:00:00 2001 From: xunbu Date: Tue, 2 Sep 2025 12:24:01 +0800 Subject: [PATCH] =?UTF-8?q?=E6=8F=90=E7=A4=BA=E8=AF=8D=E4=BC=98=E5=8C=96?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/agents/agent.py | 2 +- docutranslate/agents/glossary_agent.py | 9 +++++---- docutranslate/ir/attachment_manager.py | 2 ++ docutranslate/static/index.html | 2 +- 4 files changed, 9 insertions(+), 6 deletions(-) diff --git a/docutranslate/agents/agent.py b/docutranslate/agents/agent.py index ce8a8ab..b6e7d83 100644 --- a/docutranslate/agents/agent.py +++ b/docutranslate/agents/agent.py @@ -18,7 +18,7 @@ from docutranslate.logger import global_logger from docutranslate.utils.utils import get_httpx_proxies MAX_RETRY_COUNT = 2 -MAX_REQUESTS_PER_ERROR = 30 +MAX_REQUESTS_PER_ERROR = 20 ThinkingMode = Literal["enable", "disable", "default"] diff --git a/docutranslate/agents/glossary_agent.py b/docutranslate/agents/glossary_agent.py index bbca2dc..ffb4d29 100644 --- a/docutranslate/agents/glossary_agent.py +++ b/docutranslate/agents/glossary_agent.py @@ -28,22 +28,23 @@ You are a professional glossary extractor # Task You will receive a JSON-formatted list of paragraphs where keys are paragraph numbers and values are paragraph contents. -You need to extract **person names** and **location names** from these paragraphs and translate these terms into {self.to_lang}. +You need to extract person names and location names from these paragraphs and translate these terms into {self.to_lang}. Finally, output a glossary of original terms:translated terms # Requirements - Do not include special tags or tags formatted as `` in the glossary - The src in the output glossary must exactly match the original term, while dst is the {self.to_lang} translation of the term - The same src should only appear once in the glossary without repetition +-Do not include common nouns in the glossary. # Output The output format should be plain JSON text in a list format {[{"src": "", "dst": ""}]} # Example -## Input (translating to Chinese): +## Input {{"0":"Jobs likes apples","1":"Bill Gates is sunbathing in Shanghai."}} -## Output +## Output(Assuming the target language is Chinese) {r'[{"src": "Jobs", "dst": "乔布斯"}, {"src": "Bill Gates", "dst": "比尔盖茨"}, {"src": "Shanghai", "dst": "上海"}]'} """ @@ -106,6 +107,6 @@ The output format should be plain JSON text in a list format self.logger.info(f"json解析错误,解析文本:{chunk},错误:{e.__repr__()}") except Exception as e: self.logger.info(f"send_segments发生错误:{e.__repr__()}") - print(f"术语表:\n{result}") + # print(f"术语表:\n{result}") self.logger.info("术语表提取完成") return result diff --git a/docutranslate/ir/attachment_manager.py b/docutranslate/ir/attachment_manager.py index 650f4b1..0a5218a 100644 --- a/docutranslate/ir/attachment_manager.py +++ b/docutranslate/ir/attachment_manager.py @@ -12,6 +12,8 @@ class AttachMent: self.identifier = identifier self.document = document + def __repr__(self): + return self.document.name class AttachMentManager: def __init__(self): diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index da6ee2a..863dc72 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:

任务列表

LOGO

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file + DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:

任务列表

LOGO

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file