translator增加glossary_dict的配置
This commit is contained in:
@@ -19,6 +19,7 @@ class AiTranslatorConfig(TranslatorConfig):
|
|||||||
timeout: int = 2000
|
timeout: int = 2000
|
||||||
chunk_size: int = 3000
|
chunk_size: int = 3000
|
||||||
concurrent: int = 30
|
concurrent: int = 30
|
||||||
|
glossary_dict: dict[str:str] | None = None
|
||||||
|
|
||||||
|
|
||||||
T = TypeVar('T', bound=Document)
|
T = TypeVar('T', bound=Document)
|
||||||
@@ -32,6 +33,7 @@ class AiTranslator(Translator[T]):
|
|||||||
def __init__(self, config: AiTranslatorConfig):
|
def __init__(self, config: AiTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
|
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def translate(self, document: T) -> Document:
|
def translate(self, document: T) -> Document:
|
||||||
...
|
...
|
||||||
|
|||||||
@@ -48,7 +48,8 @@ class DocxTranslator(AiTranslator):
|
|||||||
thinking=config.thinking,
|
thinking=config.thinking,
|
||||||
max_concurrent=config.concurrent,
|
max_concurrent=config.concurrent,
|
||||||
timeout=config.timeout,
|
timeout=config.timeout,
|
||||||
logger=self.logger
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict
|
||||||
)
|
)
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
|
|||||||
@@ -32,7 +32,8 @@ class EpubTranslator(AiTranslator):
|
|||||||
custom_prompt=config.custom_prompt, to_lang=config.to_lang,
|
custom_prompt=config.custom_prompt, to_lang=config.to_lang,
|
||||||
baseurl=config.base_url, key=config.api_key, model_id=config.model_id,
|
baseurl=config.base_url, key=config.api_key, model_id=config.model_id,
|
||||||
system_prompt=None, temperature=config.temperature, thinking=config.thinking,
|
system_prompt=None, temperature=config.temperature, thinking=config.thinking,
|
||||||
max_concurrent=config.concurrent, timeout=config.timeout, logger=self.logger
|
max_concurrent=config.concurrent, timeout=config.timeout, logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict
|
||||||
)
|
)
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
|
|||||||
@@ -96,7 +96,8 @@ class HtmlTranslator(AiTranslator):
|
|||||||
thinking=config.thinking,
|
thinking=config.thinking,
|
||||||
max_concurrent=config.concurrent,
|
max_concurrent=config.concurrent,
|
||||||
timeout=config.timeout,
|
timeout=config.timeout,
|
||||||
logger=self.logger
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict
|
||||||
)
|
)
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
|
|||||||
@@ -28,7 +28,8 @@ class JsonTranslator(AiTranslator):
|
|||||||
thinking=config.thinking,
|
thinking=config.thinking,
|
||||||
max_concurrent=config.concurrent,
|
max_concurrent=config.concurrent,
|
||||||
timeout=config.timeout,
|
timeout=config.timeout,
|
||||||
logger=self.logger)
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict)
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.jsonpaths = config.json_paths
|
self.jsonpaths = config.json_paths
|
||||||
|
|
||||||
|
|||||||
@@ -29,7 +29,8 @@ class MDTranslator(AiTranslator):
|
|||||||
thinking=config.thinking,
|
thinking=config.thinking,
|
||||||
max_concurrent=config.concurrent,
|
max_concurrent=config.concurrent,
|
||||||
timeout=config.timeout,
|
timeout=config.timeout,
|
||||||
logger=self.logger)
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict)
|
||||||
self.translate_agent = MDTranslateAgent(agent_config)
|
self.translate_agent = MDTranslateAgent(agent_config)
|
||||||
|
|
||||||
def translate(self, document: MarkdownDocument) -> Self:
|
def translate(self, document: MarkdownDocument) -> Self:
|
||||||
@@ -37,7 +38,7 @@ class MDTranslator(AiTranslator):
|
|||||||
with MDMaskUrisContext(document):
|
with MDMaskUrisContext(document):
|
||||||
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
||||||
self.logger.info(f"markdown分为{len(chunks)}块")
|
self.logger.info(f"markdown分为{len(chunks)}块")
|
||||||
result: list[str] = self.translate_agent.send_prompts(chunks)
|
result: list[str] = self.translate_agent.send_chunks(chunks)
|
||||||
content = join_markdown_texts(result)
|
content = join_markdown_texts(result)
|
||||||
# 做一些加强鲁棒性的操作
|
# 做一些加强鲁棒性的操作
|
||||||
content = content.replace(r'\(', r'\(')
|
content = content.replace(r'\(', r'\(')
|
||||||
@@ -52,7 +53,7 @@ class MDTranslator(AiTranslator):
|
|||||||
with MDMaskUrisContext(document):
|
with MDMaskUrisContext(document):
|
||||||
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
||||||
self.logger.info(f"markdown分为{len(chunks)}块")
|
self.logger.info(f"markdown分为{len(chunks)}块")
|
||||||
result: list[str] = await self.translate_agent.send_prompts_async(chunks)
|
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
|
||||||
|
|
||||||
def run():
|
def run():
|
||||||
content = join_markdown_texts(result)
|
content = join_markdown_texts(result)
|
||||||
|
|||||||
@@ -35,7 +35,8 @@ class SrtTranslator(AiTranslator):
|
|||||||
thinking=config.thinking,
|
thinking=config.thinking,
|
||||||
max_concurrent=config.concurrent,
|
max_concurrent=config.concurrent,
|
||||||
timeout=config.timeout,
|
timeout=config.timeout,
|
||||||
logger=self.logger
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict
|
||||||
)
|
)
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
|
|||||||
@@ -26,14 +26,15 @@ class TXTTranslator(AiTranslator):
|
|||||||
thinking=config.thinking,
|
thinking=config.thinking,
|
||||||
max_concurrent=config.concurrent,
|
max_concurrent=config.concurrent,
|
||||||
timeout=config.timeout,
|
timeout=config.timeout,
|
||||||
logger=self.logger)
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict)
|
||||||
self.translate_agent = TXTTranslateAgent(agent_config)
|
self.translate_agent = TXTTranslateAgent(agent_config)
|
||||||
|
|
||||||
def translate(self, document: Document) -> Self:
|
def translate(self, document: Document) -> Self:
|
||||||
self.logger.info("正在翻译txt")
|
self.logger.info("正在翻译txt")
|
||||||
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
||||||
self.logger.info(f"txt分为{len(chunks)}块")
|
self.logger.info(f"txt分为{len(chunks)}块")
|
||||||
result: list[str] = self.translate_agent.send_prompts(chunks)
|
result: list[str] = self.translate_agent.send_chunks(chunks)
|
||||||
content = "\n".join(result)
|
content = "\n".join(result)
|
||||||
document.content = content.encode()
|
document.content = content.encode()
|
||||||
self.logger.info("翻译完成")
|
self.logger.info("翻译完成")
|
||||||
@@ -43,7 +44,7 @@ class TXTTranslator(AiTranslator):
|
|||||||
self.logger.info("正在翻译txt")
|
self.logger.info("正在翻译txt")
|
||||||
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
||||||
self.logger.info(f"txt分为{len(chunks)}块")
|
self.logger.info(f"txt分为{len(chunks)}块")
|
||||||
result: list[str] = await self.translate_agent.send_prompts_async(chunks)
|
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
|
||||||
content = "\n".join(result)
|
content = "\n".join(result)
|
||||||
document.content = content.encode()
|
document.content = content.encode()
|
||||||
self.logger.info("翻译完成")
|
self.logger.info("翻译完成")
|
||||||
|
|||||||
@@ -36,7 +36,8 @@ class XlsxTranslator(AiTranslator):
|
|||||||
thinking=config.thinking,
|
thinking=config.thinking,
|
||||||
max_concurrent=config.concurrent,
|
max_concurrent=config.concurrent,
|
||||||
timeout=config.timeout,
|
timeout=config.timeout,
|
||||||
logger=self.logger)
|
logger=self.logger,
|
||||||
|
glossary_dict=config.glossary_dict)
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
|
|||||||
Reference in New Issue
Block a user