AiTranslatorConfig添加skip_translate参数
This commit is contained in:
@@ -52,7 +52,7 @@
|
|||||||
"chunkSizeLabel": "分块大小",
|
"chunkSizeLabel": "分块大小",
|
||||||
"resetBtn": "重置",
|
"resetBtn": "重置",
|
||||||
"concurrentLabel": "并发数",
|
"concurrentLabel": "并发数",
|
||||||
"glossaryGenTitle": "5. 术语表",
|
"glossaryGenTitle": "术语表",
|
||||||
"glossaryLabel": "术语表 (可选)",
|
"glossaryLabel": "术语表 (可选)",
|
||||||
"glossaryHelp": "选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。",
|
"glossaryHelp": "选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。",
|
||||||
"viewGlossaryBtn": "查看术语表",
|
"viewGlossaryBtn": "查看术语表",
|
||||||
@@ -206,7 +206,7 @@
|
|||||||
"chunkSizeLabel": "Chunk Size",
|
"chunkSizeLabel": "Chunk Size",
|
||||||
"resetBtn": "Reset",
|
"resetBtn": "Reset",
|
||||||
"concurrentLabel": "Concurrency",
|
"concurrentLabel": "Concurrency",
|
||||||
"glossaryGenTitle": "5. Glossary",
|
"glossaryGenTitle": "Glossary",
|
||||||
"glossaryLabel": "Glossary (Optional)",
|
"glossaryLabel": "Glossary (Optional)",
|
||||||
"glossaryHelp": "Select one or more CSV files. Files must have 'src' and 'dst' headers, representing source and destination text respectively.",
|
"glossaryHelp": "Select one or more CSV files. Files must have 'src' and 'dst' headers, representing source and destination text respectively.",
|
||||||
"viewGlossaryBtn": "View Glossary",
|
"viewGlossaryBtn": "View Glossary",
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1,5 +1,5 @@
|
|||||||
from abc import abstractmethod
|
from abc import abstractmethod
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
from typing import TypeVar
|
from typing import TypeVar
|
||||||
|
|
||||||
from docutranslate.agents.agent import ThinkingMode
|
from docutranslate.agents.agent import ThinkingMode
|
||||||
@@ -10,10 +10,10 @@ from docutranslate.translator.base import Translator, TranslatorConfig
|
|||||||
|
|
||||||
@dataclass(kw_only=True)
|
@dataclass(kw_only=True)
|
||||||
class AiTranslatorConfig(TranslatorConfig):
|
class AiTranslatorConfig(TranslatorConfig):
|
||||||
base_url: str
|
base_url: str | None = field(default=None,metadata={"description": "OpenAI兼容地址,当skip_translate为False时为必填项"})
|
||||||
api_key: str
|
api_key: str | None = field(default=None,metadata={"description": "当skip_translate为False时为必填项"})
|
||||||
model_id: str
|
model_id: str | None = field(default=None,metadata={"description": "当skip_translate为False时为必填项"})
|
||||||
to_lang: str
|
to_lang: str = "简体中文"
|
||||||
custom_prompt: str | None = None
|
custom_prompt: str | None = None
|
||||||
temperature: float = 0.7
|
temperature: float = 0.7
|
||||||
thinking: ThinkingMode = "default"
|
thinking: ThinkingMode = "default"
|
||||||
@@ -23,6 +23,7 @@ class AiTranslatorConfig(TranslatorConfig):
|
|||||||
glossary_dict: dict[str:str] | None = None
|
glossary_dict: dict[str:str] | None = None
|
||||||
glossary_generate_enable: bool = False
|
glossary_generate_enable: bool = False
|
||||||
glossary_agent_config: GlossaryAgentConfig | None = None
|
glossary_agent_config: GlossaryAgentConfig | None = None
|
||||||
|
skip_translate: bool = False
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar('T', bound=Document)
|
T = TypeVar('T', bound=Document)
|
||||||
@@ -35,8 +36,12 @@ class AiTranslator(Translator[T]):
|
|||||||
|
|
||||||
def __init__(self, config: AiTranslatorConfig):
|
def __init__(self, config: AiTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
|
self.skip_translate = config.skip_translate
|
||||||
self.glossary_agent = None
|
self.glossary_agent = None
|
||||||
self.glossary_dict_gen = None
|
self.glossary_dict_gen = None
|
||||||
|
if not self.skip_translate and (config.base_url is None or config.api_key is None or config.model_id is None):
|
||||||
|
raise ValueError("skip_translate不为false时,base_url、api_key、model_id为必填项")
|
||||||
|
|
||||||
if config.glossary_generate_enable:
|
if config.glossary_generate_enable:
|
||||||
if config.glossary_agent_config:
|
if config.glossary_agent_config:
|
||||||
self.glossary_agent = GlossaryAgent(config.glossary_agent_config)
|
self.glossary_agent = GlossaryAgent(config.glossary_agent_config)
|
||||||
@@ -53,6 +58,7 @@ class AiTranslator(Translator[T]):
|
|||||||
logger=self.logger,
|
logger=self.logger,
|
||||||
)
|
)
|
||||||
self.glossary_agent = GlossaryAgent(glossary_agent_config)
|
self.glossary_agent = GlossaryAgent(glossary_agent_config)
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def translate(self, document: T) -> Document:
|
def translate(self, document: T) -> Document:
|
||||||
...
|
...
|
||||||
|
|||||||
@@ -37,20 +37,22 @@ class DocxTranslator(AiTranslator):
|
|||||||
def __init__(self, config: DocxTranslatorConfig):
|
def __init__(self, config: DocxTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
agent_config = SegmentsTranslateAgentConfig(
|
self.translate_agent = None
|
||||||
custom_prompt=config.custom_prompt,
|
if not self.skip_translate:
|
||||||
to_lang=config.to_lang,
|
agent_config = SegmentsTranslateAgentConfig(
|
||||||
baseurl=config.base_url,
|
custom_prompt=config.custom_prompt,
|
||||||
key=config.api_key,
|
to_lang=config.to_lang,
|
||||||
model_id=config.model_id,
|
baseurl=config.base_url,
|
||||||
temperature=config.temperature,
|
key=config.api_key,
|
||||||
thinking=config.thinking,
|
model_id=config.model_id,
|
||||||
max_concurrent=config.concurrent,
|
temperature=config.temperature,
|
||||||
timeout=config.timeout,
|
thinking=config.thinking,
|
||||||
logger=self.logger,
|
max_concurrent=config.concurrent,
|
||||||
glossary_dict=config.glossary_dict
|
timeout=config.timeout,
|
||||||
)
|
logger=self.logger,
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
glossary_dict=config.glossary_dict
|
||||||
|
)
|
||||||
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
|
|
||||||
@@ -160,10 +162,14 @@ class DocxTranslator(AiTranslator):
|
|||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
# 调用翻译 agent
|
# 调用翻译 agent
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
if self.translate_agent:
|
||||||
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
|
|
||||||
# 将翻译结果写回文档
|
# 将翻译结果写回文档
|
||||||
document.content = self._after_translate(doc, elements_to_translate, translated_texts, original_texts)
|
document.content = self._after_translate(doc, elements_to_translate, translated_texts, original_texts)
|
||||||
@@ -184,11 +190,14 @@ class DocxTranslator(AiTranslator):
|
|||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
# 异步调用翻译 agent
|
# 异步调用翻译 agent
|
||||||
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
if self.translate_agent:
|
||||||
|
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
# 将翻译结果写回文档
|
# 将翻译结果写回文档
|
||||||
document.content = await asyncio.to_thread(self._after_translate, doc, elements_to_translate, translated_texts,
|
document.content = await asyncio.to_thread(self._after_translate, doc, elements_to_translate, translated_texts,
|
||||||
original_texts)
|
original_texts)
|
||||||
|
|||||||
@@ -28,14 +28,22 @@ class EpubTranslator(AiTranslator):
|
|||||||
def __init__(self, config: EpubTranslatorConfig):
|
def __init__(self, config: EpubTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
agent_config = SegmentsTranslateAgentConfig(
|
self.translate_agent = None
|
||||||
custom_prompt=config.custom_prompt, to_lang=config.to_lang,
|
if not self.skip_translate:
|
||||||
baseurl=config.base_url, key=config.api_key, model_id=config.model_id,
|
agent_config = SegmentsTranslateAgentConfig(
|
||||||
temperature=config.temperature, thinking=config.thinking,
|
custom_prompt=config.custom_prompt,
|
||||||
max_concurrent=config.concurrent, timeout=config.timeout, logger=self.logger,
|
to_lang=config.to_lang,
|
||||||
glossary_dict=config.glossary_dict
|
baseurl=config.base_url,
|
||||||
)
|
key=config.api_key,
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
model_id=config.model_id,
|
||||||
|
temperature=config.temperature,
|
||||||
|
thinking=config.thinking,
|
||||||
|
max_concurrent=config.concurrent,
|
||||||
|
timeout=config.timeout,
|
||||||
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict
|
||||||
|
)
|
||||||
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
|
|
||||||
@@ -175,8 +183,12 @@ class EpubTranslator(AiTranslator):
|
|||||||
return self
|
return self
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
if self.translate_agent:
|
||||||
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
document.content = self._after_translate(
|
document.content = self._after_translate(
|
||||||
all_files, items_to_translate, translated_texts, original_texts
|
all_files, items_to_translate, translated_texts, original_texts
|
||||||
)
|
)
|
||||||
@@ -195,11 +207,14 @@ class EpubTranslator(AiTranslator):
|
|||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
translated_texts = await self.translate_agent.send_segments_async(
|
if self.translate_agent:
|
||||||
original_texts, self.chunk_size
|
translated_texts = await self.translate_agent.send_segments_async(
|
||||||
)
|
original_texts, self.chunk_size
|
||||||
|
)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
document.content = await asyncio.to_thread(
|
document.content = await asyncio.to_thread(
|
||||||
self._after_translate, all_files, items_to_translate, translated_texts, original_texts
|
self._after_translate, all_files, items_to_translate, translated_texts, original_texts
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -85,20 +85,22 @@ class HtmlTranslator(AiTranslator):
|
|||||||
def __init__(self, config: HtmlTranslatorConfig):
|
def __init__(self, config: HtmlTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
agent_config = SegmentsTranslateAgentConfig(
|
self.translate_agent = None
|
||||||
custom_prompt=config.custom_prompt,
|
if not self.skip_translate:
|
||||||
to_lang=config.to_lang,
|
agent_config = SegmentsTranslateAgentConfig(
|
||||||
baseurl=config.base_url,
|
custom_prompt=config.custom_prompt,
|
||||||
key=config.api_key,
|
to_lang=config.to_lang,
|
||||||
model_id=config.model_id,
|
baseurl=config.base_url,
|
||||||
temperature=config.temperature,
|
key=config.api_key,
|
||||||
thinking=config.thinking,
|
model_id=config.model_id,
|
||||||
max_concurrent=config.concurrent,
|
temperature=config.temperature,
|
||||||
timeout=config.timeout,
|
thinking=config.thinking,
|
||||||
logger=self.logger,
|
max_concurrent=config.concurrent,
|
||||||
glossary_dict=config.glossary_dict
|
timeout=config.timeout,
|
||||||
)
|
logger=self.logger,
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
glossary_dict=config.glossary_dict
|
||||||
|
)
|
||||||
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
|
|
||||||
@@ -199,9 +201,12 @@ class HtmlTranslator(AiTranslator):
|
|||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
if self.translate_agent:
|
||||||
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
document.content = self._after_translate(soup, translatable_items, translated_texts, original_texts)
|
document.content = self._after_translate(soup, translatable_items, translated_texts, original_texts)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@@ -218,9 +223,12 @@ class HtmlTranslator(AiTranslator):
|
|||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
if self.translate_agent:
|
||||||
|
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
document.content = await asyncio.to_thread(
|
document.content = await asyncio.to_thread(
|
||||||
self._after_translate, soup, translatable_items, translated_texts, original_texts
|
self._after_translate, soup, translatable_items, translated_texts, original_texts
|
||||||
)
|
)
|
||||||
|
|||||||
@@ -18,18 +18,22 @@ class JsonTranslator(AiTranslator):
|
|||||||
def __init__(self, config: JsonTranslatorConfig):
|
def __init__(self, config: JsonTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
agent_config = SegmentsTranslateAgentConfig(custom_prompt=config.custom_prompt,
|
self.translate_agent = None
|
||||||
to_lang=config.to_lang,
|
if not self.skip_translate:
|
||||||
baseurl=config.base_url,
|
agent_config = SegmentsTranslateAgentConfig(
|
||||||
key=config.api_key,
|
custom_prompt=config.custom_prompt,
|
||||||
model_id=config.model_id,
|
to_lang=config.to_lang,
|
||||||
temperature=config.temperature,
|
baseurl=config.base_url,
|
||||||
thinking=config.thinking,
|
key=config.api_key,
|
||||||
max_concurrent=config.concurrent,
|
model_id=config.model_id,
|
||||||
timeout=config.timeout,
|
temperature=config.temperature,
|
||||||
logger=self.logger,
|
thinking=config.thinking,
|
||||||
glossary_dict=config.glossary_dict)
|
max_concurrent=config.concurrent,
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
timeout=config.timeout,
|
||||||
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict
|
||||||
|
)
|
||||||
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.jsonpaths = config.json_paths
|
self.jsonpaths = config.json_paths
|
||||||
|
|
||||||
def _extract_matches(self, content: dict) -> list[Any]:
|
def _extract_matches(self, content: dict) -> list[Any]:
|
||||||
@@ -76,10 +80,13 @@ class JsonTranslator(AiTranslator):
|
|||||||
original_texts = [match.value for match in all_matches]
|
original_texts = [match.value for match in all_matches]
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
# 步骤 2: 批量翻译提取出的文本
|
# 步骤 2: 批量翻译提取出的文本
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
if self.translate_agent:
|
||||||
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
# 健壮性检查:确保翻译回来的项目数量与发送的一致
|
# 健壮性检查:确保翻译回来的项目数量与发送的一致
|
||||||
if len(original_texts) != len(translated_texts):
|
if len(original_texts) != len(translated_texts):
|
||||||
raise ValueError("翻译服务返回的项目数量与发送的数量不匹配。")
|
raise ValueError("翻译服务返回的项目数量与发送的数量不匹配。")
|
||||||
@@ -107,11 +114,14 @@ class JsonTranslator(AiTranslator):
|
|||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
# 步骤 2: 批量翻译提取出的文本
|
|
||||||
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
|
||||||
|
|
||||||
|
# 步骤 2: 批量翻译提取出的文本
|
||||||
|
if self.translate_agent:
|
||||||
|
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
# 健壮性检查:确保翻译回来的项目数量与发送的一致
|
# 健壮性检查:确保翻译回来的项目数量与发送的一致
|
||||||
if len(original_texts) != len(translated_texts):
|
if len(original_texts) != len(translated_texts):
|
||||||
raise ValueError("翻译服务返回的项目数量与发送的数量不匹配。")
|
raise ValueError("翻译服务返回的项目数量与发送的数量不匹配。")
|
||||||
|
|||||||
@@ -19,18 +19,20 @@ class MDTranslator(AiTranslator):
|
|||||||
def __init__(self, config: MDTranslatorConfig):
|
def __init__(self, config: MDTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
agent_config = MDTranslateAgentConfig(custom_prompt=config.custom_prompt,
|
self.translate_agent = None
|
||||||
to_lang=config.to_lang,
|
if not self.skip_translate:
|
||||||
baseurl=config.base_url,
|
agent_config = MDTranslateAgentConfig(custom_prompt=config.custom_prompt,
|
||||||
key=config.api_key,
|
to_lang=config.to_lang,
|
||||||
model_id=config.model_id,
|
baseurl=config.base_url,
|
||||||
temperature=config.temperature,
|
key=config.api_key,
|
||||||
thinking=config.thinking,
|
model_id=config.model_id,
|
||||||
max_concurrent=config.concurrent,
|
temperature=config.temperature,
|
||||||
timeout=config.timeout,
|
thinking=config.thinking,
|
||||||
logger=self.logger,
|
max_concurrent=config.concurrent,
|
||||||
glossary_dict=config.glossary_dict)
|
timeout=config.timeout,
|
||||||
self.translate_agent = MDTranslateAgent(agent_config)
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict)
|
||||||
|
self.translate_agent = MDTranslateAgent(agent_config)
|
||||||
|
|
||||||
def translate(self, document: MarkdownDocument) -> Self:
|
def translate(self, document: MarkdownDocument) -> Self:
|
||||||
self.logger.info("正在翻译markdown")
|
self.logger.info("正在翻译markdown")
|
||||||
@@ -38,9 +40,13 @@ class MDTranslator(AiTranslator):
|
|||||||
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = self.glossary_agent.send_segments(chunks, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(chunks, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
self.logger.info(f"markdown分为{len(chunks)}块")
|
self.logger.info(f"markdown分为{len(chunks)}块")
|
||||||
result: list[str] = self.translate_agent.send_chunks(chunks)
|
if self.translate_agent:
|
||||||
|
result: list[str] = self.translate_agent.send_chunks(chunks)
|
||||||
|
else:
|
||||||
|
result = chunks
|
||||||
content = join_markdown_texts(result)
|
content = join_markdown_texts(result)
|
||||||
# 做一些加强鲁棒性的操作
|
# 做一些加强鲁棒性的操作
|
||||||
content = content.replace(r'\(', r'\(')
|
content = content.replace(r'\(', r'\(')
|
||||||
@@ -57,10 +63,14 @@ class MDTranslator(AiTranslator):
|
|||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(chunks, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(chunks, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
self.logger.info(f"markdown分为{len(chunks)}块")
|
self.logger.info(f"markdown分为{len(chunks)}块")
|
||||||
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
|
if self.translate_agent:
|
||||||
|
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
|
||||||
|
else:
|
||||||
|
result = chunks
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
content = join_markdown_texts(result)
|
content = join_markdown_texts(result)
|
||||||
|
|||||||
@@ -24,20 +24,22 @@ class SrtTranslator(AiTranslator):
|
|||||||
def __init__(self, config: SrtTranslatorConfig):
|
def __init__(self, config: SrtTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
agent_config = SegmentsTranslateAgentConfig(
|
self.translate_agent = None
|
||||||
custom_prompt=config.custom_prompt,
|
if not self.skip_translate:
|
||||||
to_lang=config.to_lang,
|
agent_config = SegmentsTranslateAgentConfig(
|
||||||
baseurl=config.base_url,
|
custom_prompt=config.custom_prompt,
|
||||||
key=config.api_key,
|
to_lang=config.to_lang,
|
||||||
model_id=config.model_id,
|
baseurl=config.base_url,
|
||||||
temperature=config.temperature,
|
key=config.api_key,
|
||||||
thinking=config.thinking,
|
model_id=config.model_id,
|
||||||
max_concurrent=config.concurrent,
|
temperature=config.temperature,
|
||||||
timeout=config.timeout,
|
thinking=config.thinking,
|
||||||
logger=self.logger,
|
max_concurrent=config.concurrent,
|
||||||
glossary_dict=config.glossary_dict
|
timeout=config.timeout,
|
||||||
)
|
logger=self.logger,
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
glossary_dict=config.glossary_dict
|
||||||
|
)
|
||||||
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
|
|
||||||
@@ -109,10 +111,13 @@ class SrtTranslator(AiTranslator):
|
|||||||
return self
|
return self
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
# --- 步骤 2: 调用翻译Agent ---
|
# --- 步骤 2: 调用翻译Agent ---
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
if self.translate_agent:
|
||||||
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
# --- 步骤 3: 后处理并更新文档内容 ---
|
# --- 步骤 3: 后处理并更新文档内容 ---
|
||||||
document.content = self._after_translate(subtitles, translated_texts, original_texts)
|
document.content = self._after_translate(subtitles, translated_texts, original_texts)
|
||||||
return self
|
return self
|
||||||
@@ -130,11 +135,14 @@ class SrtTranslator(AiTranslator):
|
|||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
# --- 步骤 2: 调用翻译Agent (异步) ---
|
# --- 步骤 2: 调用翻译Agent (异步) ---
|
||||||
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
if self.translate_agent:
|
||||||
|
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
# --- 步骤 3: 后处理并更新文档内容 (I/O密集型) ---
|
# --- 步骤 3: 后处理并更新文档内容 (I/O密集型) ---
|
||||||
document.content = await asyncio.to_thread(
|
document.content = await asyncio.to_thread(
|
||||||
self._after_translate, subtitles, translated_texts, original_texts
|
self._after_translate, subtitles, translated_texts, original_texts
|
||||||
|
|||||||
@@ -16,27 +16,33 @@ class TXTTranslator(AiTranslator):
|
|||||||
def __init__(self, config: TXTTranslatorConfig):
|
def __init__(self, config: TXTTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
agent_config = TXTTranslateAgentConfig(custom_prompt=config.custom_prompt,
|
self.translate_agent =None
|
||||||
to_lang=config.to_lang,
|
if not self.skip_translate:
|
||||||
baseurl=config.base_url,
|
agent_config = TXTTranslateAgentConfig(custom_prompt=config.custom_prompt,
|
||||||
key=config.api_key,
|
to_lang=config.to_lang,
|
||||||
model_id=config.model_id,
|
baseurl=config.base_url,
|
||||||
temperature=config.temperature,
|
key=config.api_key,
|
||||||
thinking=config.thinking,
|
model_id=config.model_id,
|
||||||
max_concurrent=config.concurrent,
|
temperature=config.temperature,
|
||||||
timeout=config.timeout,
|
thinking=config.thinking,
|
||||||
logger=self.logger,
|
max_concurrent=config.concurrent,
|
||||||
glossary_dict=config.glossary_dict)
|
timeout=config.timeout,
|
||||||
self.translate_agent = TXTTranslateAgent(agent_config)
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict)
|
||||||
|
self.translate_agent = TXTTranslateAgent(agent_config)
|
||||||
|
|
||||||
def translate(self, document: Document) -> Self:
|
def translate(self, document: Document) -> Self:
|
||||||
self.logger.info("正在翻译txt")
|
self.logger.info("正在翻译txt")
|
||||||
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = self.glossary_agent.send_segments(chunks, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(chunks, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
self.logger.info(f"txt分为{len(chunks)}块")
|
self.logger.info(f"txt分为{len(chunks)}块")
|
||||||
result: list[str] = self.translate_agent.send_chunks(chunks)
|
if self.translate_agent:
|
||||||
|
result: list[str] = self.translate_agent.send_chunks(chunks)
|
||||||
|
else:
|
||||||
|
result=chunks
|
||||||
content = "\n".join(result)
|
content = "\n".join(result)
|
||||||
document.content = content.encode()
|
document.content = content.encode()
|
||||||
self.logger.info("翻译完成")
|
self.logger.info("翻译完成")
|
||||||
@@ -48,10 +54,14 @@ class TXTTranslator(AiTranslator):
|
|||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(chunks, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(chunks, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
self.logger.info(f"txt分为{len(chunks)}块")
|
self.logger.info(f"txt分为{len(chunks)}块")
|
||||||
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
|
if self.translate_agent:
|
||||||
|
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
|
||||||
|
else:
|
||||||
|
result=chunks
|
||||||
content = "\n".join(result)
|
content = "\n".join(result)
|
||||||
document.content = content.encode()
|
document.content = content.encode()
|
||||||
self.logger.info("翻译完成")
|
self.logger.info("翻译完成")
|
||||||
|
|||||||
@@ -26,18 +26,22 @@ class XlsxTranslator(AiTranslator):
|
|||||||
def __init__(self, config: XlsxTranslatorConfig):
|
def __init__(self, config: XlsxTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
agent_config = SegmentsTranslateAgentConfig(custom_prompt=config.custom_prompt,
|
self.translate_agent = None
|
||||||
to_lang=config.to_lang,
|
if not self.skip_translate:
|
||||||
baseurl=config.base_url,
|
agent_config = SegmentsTranslateAgentConfig(
|
||||||
key=config.api_key,
|
custom_prompt=config.custom_prompt,
|
||||||
model_id=config.model_id,
|
to_lang=config.to_lang,
|
||||||
temperature=config.temperature,
|
baseurl=config.base_url,
|
||||||
thinking=config.thinking,
|
key=config.api_key,
|
||||||
max_concurrent=config.concurrent,
|
model_id=config.model_id,
|
||||||
timeout=config.timeout,
|
temperature=config.temperature,
|
||||||
logger=self.logger,
|
thinking=config.thinking,
|
||||||
glossary_dict=config.glossary_dict)
|
max_concurrent=config.concurrent,
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
timeout=config.timeout,
|
||||||
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict
|
||||||
|
)
|
||||||
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
# --- 新增功能 ---
|
# --- 新增功能 ---
|
||||||
@@ -156,9 +160,13 @@ class XlsxTranslator(AiTranslator):
|
|||||||
return self
|
return self
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
# --- 步骤 2: 调用翻译函数 ---
|
# --- 步骤 2: 调用翻译函数 ---
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
if self.translate_agent:
|
||||||
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
|
|
||||||
document.content = self._after_translate(workbook, cells_to_translate, translated_texts, original_texts)
|
document.content = self._after_translate(workbook, cells_to_translate, translated_texts, original_texts)
|
||||||
return self
|
return self
|
||||||
@@ -173,11 +181,14 @@ class XlsxTranslator(AiTranslator):
|
|||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
if self.translate_agent:
|
||||||
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
# --- 步骤 2: 调用翻译函数 ---
|
# --- 步骤 2: 调用翻译函数 ---
|
||||||
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
if self.translate_agent:
|
||||||
|
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
|
else:
|
||||||
|
translated_texts = original_texts
|
||||||
document.content = await asyncio.to_thread(self._after_translate, workbook, cells_to_translate,
|
document.content = await asyncio.to_thread(self._after_translate, workbook, cells_to_translate,
|
||||||
translated_texts, original_texts)
|
translated_texts, original_texts)
|
||||||
return self
|
return self
|
||||||
|
|||||||
Reference in New Issue
Block a user