translator增加glossary_dict的配置
This commit is contained in:
@@ -19,6 +19,7 @@ class AiTranslatorConfig(TranslatorConfig):
|
||||
timeout: int = 2000
|
||||
chunk_size: int = 3000
|
||||
concurrent: int = 30
|
||||
glossary_dict: dict[str:str] | None = None
|
||||
|
||||
|
||||
T = TypeVar('T', bound=Document)
|
||||
@@ -32,6 +33,7 @@ class AiTranslator(Translator[T]):
|
||||
def __init__(self, config: AiTranslatorConfig):
|
||||
super().__init__(config=config)
|
||||
|
||||
|
||||
@abstractmethod
|
||||
def translate(self, document: T) -> Document:
|
||||
...
|
||||
|
||||
@@ -48,7 +48,8 @@ class DocxTranslator(AiTranslator):
|
||||
thinking=config.thinking,
|
||||
max_concurrent=config.concurrent,
|
||||
timeout=config.timeout,
|
||||
logger=self.logger
|
||||
logger=self.logger,
|
||||
glossary_dict=config.glossary_dict
|
||||
)
|
||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||
self.insert_mode = config.insert_mode
|
||||
|
||||
@@ -32,7 +32,8 @@ class EpubTranslator(AiTranslator):
|
||||
custom_prompt=config.custom_prompt, to_lang=config.to_lang,
|
||||
baseurl=config.base_url, key=config.api_key, model_id=config.model_id,
|
||||
system_prompt=None, temperature=config.temperature, thinking=config.thinking,
|
||||
max_concurrent=config.concurrent, timeout=config.timeout, logger=self.logger
|
||||
max_concurrent=config.concurrent, timeout=config.timeout, logger=self.logger,
|
||||
glossary_dict=config.glossary_dict
|
||||
)
|
||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||
self.insert_mode = config.insert_mode
|
||||
|
||||
@@ -96,7 +96,8 @@ class HtmlTranslator(AiTranslator):
|
||||
thinking=config.thinking,
|
||||
max_concurrent=config.concurrent,
|
||||
timeout=config.timeout,
|
||||
logger=self.logger
|
||||
logger=self.logger,
|
||||
glossary_dict=config.glossary_dict
|
||||
)
|
||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||
self.insert_mode = config.insert_mode
|
||||
|
||||
@@ -28,7 +28,8 @@ class JsonTranslator(AiTranslator):
|
||||
thinking=config.thinking,
|
||||
max_concurrent=config.concurrent,
|
||||
timeout=config.timeout,
|
||||
logger=self.logger)
|
||||
logger=self.logger,
|
||||
glossary_dict=config.glossary_dict)
|
||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||
self.jsonpaths = config.json_paths
|
||||
|
||||
|
||||
@@ -29,7 +29,8 @@ class MDTranslator(AiTranslator):
|
||||
thinking=config.thinking,
|
||||
max_concurrent=config.concurrent,
|
||||
timeout=config.timeout,
|
||||
logger=self.logger)
|
||||
logger=self.logger,
|
||||
glossary_dict=config.glossary_dict)
|
||||
self.translate_agent = MDTranslateAgent(agent_config)
|
||||
|
||||
def translate(self, document: MarkdownDocument) -> Self:
|
||||
@@ -37,7 +38,7 @@ class MDTranslator(AiTranslator):
|
||||
with MDMaskUrisContext(document):
|
||||
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
||||
self.logger.info(f"markdown分为{len(chunks)}块")
|
||||
result: list[str] = self.translate_agent.send_prompts(chunks)
|
||||
result: list[str] = self.translate_agent.send_chunks(chunks)
|
||||
content = join_markdown_texts(result)
|
||||
# 做一些加强鲁棒性的操作
|
||||
content = content.replace(r'\(', r'\(')
|
||||
@@ -52,7 +53,7 @@ class MDTranslator(AiTranslator):
|
||||
with MDMaskUrisContext(document):
|
||||
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
||||
self.logger.info(f"markdown分为{len(chunks)}块")
|
||||
result: list[str] = await self.translate_agent.send_prompts_async(chunks)
|
||||
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
|
||||
|
||||
def run():
|
||||
content = join_markdown_texts(result)
|
||||
|
||||
@@ -35,7 +35,8 @@ class SrtTranslator(AiTranslator):
|
||||
thinking=config.thinking,
|
||||
max_concurrent=config.concurrent,
|
||||
timeout=config.timeout,
|
||||
logger=self.logger
|
||||
logger=self.logger,
|
||||
glossary_dict=config.glossary_dict
|
||||
)
|
||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||
self.insert_mode = config.insert_mode
|
||||
|
||||
@@ -26,14 +26,15 @@ class TXTTranslator(AiTranslator):
|
||||
thinking=config.thinking,
|
||||
max_concurrent=config.concurrent,
|
||||
timeout=config.timeout,
|
||||
logger=self.logger)
|
||||
logger=self.logger,
|
||||
glossary_dict=config.glossary_dict)
|
||||
self.translate_agent = TXTTranslateAgent(agent_config)
|
||||
|
||||
def translate(self, document: Document) -> Self:
|
||||
self.logger.info("正在翻译txt")
|
||||
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
||||
self.logger.info(f"txt分为{len(chunks)}块")
|
||||
result: list[str] = self.translate_agent.send_prompts(chunks)
|
||||
result: list[str] = self.translate_agent.send_chunks(chunks)
|
||||
content = "\n".join(result)
|
||||
document.content = content.encode()
|
||||
self.logger.info("翻译完成")
|
||||
@@ -43,7 +44,7 @@ class TXTTranslator(AiTranslator):
|
||||
self.logger.info("正在翻译txt")
|
||||
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
||||
self.logger.info(f"txt分为{len(chunks)}块")
|
||||
result: list[str] = await self.translate_agent.send_prompts_async(chunks)
|
||||
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
|
||||
content = "\n".join(result)
|
||||
document.content = content.encode()
|
||||
self.logger.info("翻译完成")
|
||||
|
||||
@@ -36,7 +36,8 @@ class XlsxTranslator(AiTranslator):
|
||||
thinking=config.thinking,
|
||||
max_concurrent=config.concurrent,
|
||||
timeout=config.timeout,
|
||||
logger=self.logger)
|
||||
logger=self.logger,
|
||||
glossary_dict=config.glossary_dict)
|
||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||
self.insert_mode = config.insert_mode
|
||||
self.separator = config.separator
|
||||
|
||||
Reference in New Issue
Block a user