translator增加glossary_dict的配置

This commit is contained in:
xunbu
2025-08-27 21:11:02 +08:00
parent 78a4525108
commit 1ffed8f280
9 changed files with 22 additions and 12 deletions

View File

@@ -19,6 +19,7 @@ class AiTranslatorConfig(TranslatorConfig):
timeout: int = 2000
chunk_size: int = 3000
concurrent: int = 30
glossary_dict: dict[str:str] | None = None
T = TypeVar('T', bound=Document)
@@ -32,6 +33,7 @@ class AiTranslator(Translator[T]):
def __init__(self, config: AiTranslatorConfig):
super().__init__(config=config)
@abstractmethod
def translate(self, document: T) -> Document:
...

View File

@@ -48,7 +48,8 @@ class DocxTranslator(AiTranslator):
thinking=config.thinking,
max_concurrent=config.concurrent,
timeout=config.timeout,
logger=self.logger
logger=self.logger,
glossary_dict=config.glossary_dict
)
self.translate_agent = SegmentsTranslateAgent(agent_config)
self.insert_mode = config.insert_mode

View File

@@ -32,7 +32,8 @@ class EpubTranslator(AiTranslator):
custom_prompt=config.custom_prompt, to_lang=config.to_lang,
baseurl=config.base_url, key=config.api_key, model_id=config.model_id,
system_prompt=None, temperature=config.temperature, thinking=config.thinking,
max_concurrent=config.concurrent, timeout=config.timeout, logger=self.logger
max_concurrent=config.concurrent, timeout=config.timeout, logger=self.logger,
glossary_dict=config.glossary_dict
)
self.translate_agent = SegmentsTranslateAgent(agent_config)
self.insert_mode = config.insert_mode

View File

@@ -96,7 +96,8 @@ class HtmlTranslator(AiTranslator):
thinking=config.thinking,
max_concurrent=config.concurrent,
timeout=config.timeout,
logger=self.logger
logger=self.logger,
glossary_dict=config.glossary_dict
)
self.translate_agent = SegmentsTranslateAgent(agent_config)
self.insert_mode = config.insert_mode

View File

@@ -28,7 +28,8 @@ class JsonTranslator(AiTranslator):
thinking=config.thinking,
max_concurrent=config.concurrent,
timeout=config.timeout,
logger=self.logger)
logger=self.logger,
glossary_dict=config.glossary_dict)
self.translate_agent = SegmentsTranslateAgent(agent_config)
self.jsonpaths = config.json_paths

View File

@@ -29,7 +29,8 @@ class MDTranslator(AiTranslator):
thinking=config.thinking,
max_concurrent=config.concurrent,
timeout=config.timeout,
logger=self.logger)
logger=self.logger,
glossary_dict=config.glossary_dict)
self.translate_agent = MDTranslateAgent(agent_config)
def translate(self, document: MarkdownDocument) -> Self:
@@ -37,7 +38,7 @@ class MDTranslator(AiTranslator):
with MDMaskUrisContext(document):
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
self.logger.info(f"markdown分为{len(chunks)}")
result: list[str] = self.translate_agent.send_prompts(chunks)
result: list[str] = self.translate_agent.send_chunks(chunks)
content = join_markdown_texts(result)
# 做一些加强鲁棒性的操作
content = content.replace(r'\', r'\(')
@@ -52,7 +53,7 @@ class MDTranslator(AiTranslator):
with MDMaskUrisContext(document):
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
self.logger.info(f"markdown分为{len(chunks)}")
result: list[str] = await self.translate_agent.send_prompts_async(chunks)
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
def run():
content = join_markdown_texts(result)

View File

@@ -35,7 +35,8 @@ class SrtTranslator(AiTranslator):
thinking=config.thinking,
max_concurrent=config.concurrent,
timeout=config.timeout,
logger=self.logger
logger=self.logger,
glossary_dict=config.glossary_dict
)
self.translate_agent = SegmentsTranslateAgent(agent_config)
self.insert_mode = config.insert_mode

View File

@@ -26,14 +26,15 @@ class TXTTranslator(AiTranslator):
thinking=config.thinking,
max_concurrent=config.concurrent,
timeout=config.timeout,
logger=self.logger)
logger=self.logger,
glossary_dict=config.glossary_dict)
self.translate_agent = TXTTranslateAgent(agent_config)
def translate(self, document: Document) -> Self:
self.logger.info("正在翻译txt")
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
self.logger.info(f"txt分为{len(chunks)}")
result: list[str] = self.translate_agent.send_prompts(chunks)
result: list[str] = self.translate_agent.send_chunks(chunks)
content = "\n".join(result)
document.content = content.encode()
self.logger.info("翻译完成")
@@ -43,7 +44,7 @@ class TXTTranslator(AiTranslator):
self.logger.info("正在翻译txt")
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
self.logger.info(f"txt分为{len(chunks)}")
result: list[str] = await self.translate_agent.send_prompts_async(chunks)
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
content = "\n".join(result)
document.content = content.encode()
self.logger.info("翻译完成")

View File

@@ -36,7 +36,8 @@ class XlsxTranslator(AiTranslator):
thinking=config.thinking,
max_concurrent=config.concurrent,
timeout=config.timeout,
logger=self.logger)
logger=self.logger,
glossary_dict=config.glossary_dict)
self.translate_agent = SegmentsTranslateAgent(agent_config)
self.insert_mode = config.insert_mode
self.separator = config.separator