From 1ffed8f280a994fe43eb64bf93e21e9b9fede8d3 Mon Sep 17 00:00:00 2001 From: xunbu Date: Wed, 27 Aug 2025 21:11:02 +0800 Subject: [PATCH] =?UTF-8?q?translator=E5=A2=9E=E5=8A=A0glossary=5Fdict?= =?UTF-8?q?=E7=9A=84=E9=85=8D=E7=BD=AE?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/translator/ai_translator/base.py | 2 ++ docutranslate/translator/ai_translator/docx_translator.py | 3 ++- docutranslate/translator/ai_translator/epub_translator.py | 3 ++- docutranslate/translator/ai_translator/html_translator.py | 3 ++- docutranslate/translator/ai_translator/json_translator.py | 3 ++- docutranslate/translator/ai_translator/md_translator.py | 7 ++++--- docutranslate/translator/ai_translator/srt_translator.py | 3 ++- docutranslate/translator/ai_translator/txt_translator.py | 7 ++++--- docutranslate/translator/ai_translator/xlsx_translator.py | 3 ++- 9 files changed, 22 insertions(+), 12 deletions(-) diff --git a/docutranslate/translator/ai_translator/base.py b/docutranslate/translator/ai_translator/base.py index bba4799..5a85c4b 100644 --- a/docutranslate/translator/ai_translator/base.py +++ b/docutranslate/translator/ai_translator/base.py @@ -19,6 +19,7 @@ class AiTranslatorConfig(TranslatorConfig): timeout: int = 2000 chunk_size: int = 3000 concurrent: int = 30 + glossary_dict: dict[str:str] | None = None T = TypeVar('T', bound=Document) @@ -32,6 +33,7 @@ class AiTranslator(Translator[T]): def __init__(self, config: AiTranslatorConfig): super().__init__(config=config) + @abstractmethod def translate(self, document: T) -> Document: ... diff --git a/docutranslate/translator/ai_translator/docx_translator.py b/docutranslate/translator/ai_translator/docx_translator.py index 5050efb..36c615b 100644 --- a/docutranslate/translator/ai_translator/docx_translator.py +++ b/docutranslate/translator/ai_translator/docx_translator.py @@ -48,7 +48,8 @@ class DocxTranslator(AiTranslator): thinking=config.thinking, max_concurrent=config.concurrent, timeout=config.timeout, - logger=self.logger + logger=self.logger, + glossary_dict=config.glossary_dict ) self.translate_agent = SegmentsTranslateAgent(agent_config) self.insert_mode = config.insert_mode diff --git a/docutranslate/translator/ai_translator/epub_translator.py b/docutranslate/translator/ai_translator/epub_translator.py index 3381f96..39b0190 100644 --- a/docutranslate/translator/ai_translator/epub_translator.py +++ b/docutranslate/translator/ai_translator/epub_translator.py @@ -32,7 +32,8 @@ class EpubTranslator(AiTranslator): custom_prompt=config.custom_prompt, to_lang=config.to_lang, baseurl=config.base_url, key=config.api_key, model_id=config.model_id, system_prompt=None, temperature=config.temperature, thinking=config.thinking, - max_concurrent=config.concurrent, timeout=config.timeout, logger=self.logger + max_concurrent=config.concurrent, timeout=config.timeout, logger=self.logger, + glossary_dict=config.glossary_dict ) self.translate_agent = SegmentsTranslateAgent(agent_config) self.insert_mode = config.insert_mode diff --git a/docutranslate/translator/ai_translator/html_translator.py b/docutranslate/translator/ai_translator/html_translator.py index 804746b..5f8af48 100644 --- a/docutranslate/translator/ai_translator/html_translator.py +++ b/docutranslate/translator/ai_translator/html_translator.py @@ -96,7 +96,8 @@ class HtmlTranslator(AiTranslator): thinking=config.thinking, max_concurrent=config.concurrent, timeout=config.timeout, - logger=self.logger + logger=self.logger, + glossary_dict=config.glossary_dict ) self.translate_agent = SegmentsTranslateAgent(agent_config) self.insert_mode = config.insert_mode diff --git a/docutranslate/translator/ai_translator/json_translator.py b/docutranslate/translator/ai_translator/json_translator.py index e1fa455..57c016f 100644 --- a/docutranslate/translator/ai_translator/json_translator.py +++ b/docutranslate/translator/ai_translator/json_translator.py @@ -28,7 +28,8 @@ class JsonTranslator(AiTranslator): thinking=config.thinking, max_concurrent=config.concurrent, timeout=config.timeout, - logger=self.logger) + logger=self.logger, + glossary_dict=config.glossary_dict) self.translate_agent = SegmentsTranslateAgent(agent_config) self.jsonpaths = config.json_paths diff --git a/docutranslate/translator/ai_translator/md_translator.py b/docutranslate/translator/ai_translator/md_translator.py index fbbe4cc..af0de34 100644 --- a/docutranslate/translator/ai_translator/md_translator.py +++ b/docutranslate/translator/ai_translator/md_translator.py @@ -29,7 +29,8 @@ class MDTranslator(AiTranslator): thinking=config.thinking, max_concurrent=config.concurrent, timeout=config.timeout, - logger=self.logger) + logger=self.logger, + glossary_dict=config.glossary_dict) self.translate_agent = MDTranslateAgent(agent_config) def translate(self, document: MarkdownDocument) -> Self: @@ -37,7 +38,7 @@ class MDTranslator(AiTranslator): with MDMaskUrisContext(document): chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size) self.logger.info(f"markdown分为{len(chunks)}块") - result: list[str] = self.translate_agent.send_prompts(chunks) + result: list[str] = self.translate_agent.send_chunks(chunks) content = join_markdown_texts(result) # 做一些加强鲁棒性的操作 content = content.replace(r'\(', r'\(') @@ -52,7 +53,7 @@ class MDTranslator(AiTranslator): with MDMaskUrisContext(document): chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size) self.logger.info(f"markdown分为{len(chunks)}块") - result: list[str] = await self.translate_agent.send_prompts_async(chunks) + result: list[str] = await self.translate_agent.send_chunks_async(chunks) def run(): content = join_markdown_texts(result) diff --git a/docutranslate/translator/ai_translator/srt_translator.py b/docutranslate/translator/ai_translator/srt_translator.py index 6d12a7f..de8a24b 100644 --- a/docutranslate/translator/ai_translator/srt_translator.py +++ b/docutranslate/translator/ai_translator/srt_translator.py @@ -35,7 +35,8 @@ class SrtTranslator(AiTranslator): thinking=config.thinking, max_concurrent=config.concurrent, timeout=config.timeout, - logger=self.logger + logger=self.logger, + glossary_dict=config.glossary_dict ) self.translate_agent = SegmentsTranslateAgent(agent_config) self.insert_mode = config.insert_mode diff --git a/docutranslate/translator/ai_translator/txt_translator.py b/docutranslate/translator/ai_translator/txt_translator.py index 07a143e..e162a1d 100644 --- a/docutranslate/translator/ai_translator/txt_translator.py +++ b/docutranslate/translator/ai_translator/txt_translator.py @@ -26,14 +26,15 @@ class TXTTranslator(AiTranslator): thinking=config.thinking, max_concurrent=config.concurrent, timeout=config.timeout, - logger=self.logger) + logger=self.logger, + glossary_dict=config.glossary_dict) self.translate_agent = TXTTranslateAgent(agent_config) def translate(self, document: Document) -> Self: self.logger.info("正在翻译txt") chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size) self.logger.info(f"txt分为{len(chunks)}块") - result: list[str] = self.translate_agent.send_prompts(chunks) + result: list[str] = self.translate_agent.send_chunks(chunks) content = "\n".join(result) document.content = content.encode() self.logger.info("翻译完成") @@ -43,7 +44,7 @@ class TXTTranslator(AiTranslator): self.logger.info("正在翻译txt") chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size) self.logger.info(f"txt分为{len(chunks)}块") - result: list[str] = await self.translate_agent.send_prompts_async(chunks) + result: list[str] = await self.translate_agent.send_chunks_async(chunks) content = "\n".join(result) document.content = content.encode() self.logger.info("翻译完成") diff --git a/docutranslate/translator/ai_translator/xlsx_translator.py b/docutranslate/translator/ai_translator/xlsx_translator.py index e768da4..7015bde 100644 --- a/docutranslate/translator/ai_translator/xlsx_translator.py +++ b/docutranslate/translator/ai_translator/xlsx_translator.py @@ -36,7 +36,8 @@ class XlsxTranslator(AiTranslator): thinking=config.thinking, max_concurrent=config.concurrent, timeout=config.timeout, - logger=self.logger) + logger=self.logger, + glossary_dict=config.glossary_dict) self.translate_agent = SegmentsTranslateAgent(agent_config) self.insert_mode = config.insert_mode self.separator = config.separator