重构代码,新增了MarkdownBasedManager和TXTManager实现
This commit is contained in:
0
docutranslate/translater/__init__.py
Normal file
0
docutranslate/translater/__init__.py
Normal file
16
docutranslate/translater/base.py
Normal file
16
docutranslate/translater/base.py
Normal file
@@ -0,0 +1,16 @@
|
||||
from dataclasses import dataclass
|
||||
from logging import Logger
|
||||
|
||||
|
||||
@dataclass
|
||||
class AiTranslateConfig:
|
||||
base_url: str
|
||||
api_key: str
|
||||
model_id: str
|
||||
to_lang: str
|
||||
custom_prompt: str | None = None
|
||||
temperature: float = 0.7
|
||||
timeout: int = 2000
|
||||
chunk_size: int = 3000
|
||||
concurrent: int = 30
|
||||
logger: Logger | None = None
|
||||
21
docutranslate/translater/interfaces.py
Normal file
21
docutranslate/translater/interfaces.py
Normal file
@@ -0,0 +1,21 @@
|
||||
from typing import runtime_checkable, Protocol, TypeVar
|
||||
|
||||
from docutranslate.agents import Agent
|
||||
from docutranslate.ir.document import Document
|
||||
|
||||
T=TypeVar('T',bound=Document)
|
||||
V=TypeVar('V',bound=Agent)
|
||||
|
||||
@runtime_checkable
|
||||
class Translator(Protocol[T,V]):
|
||||
"""
|
||||
翻译中间文本(原地替换),Translator不做格式转换
|
||||
"""
|
||||
def translate(self, document:T) -> Document:
|
||||
...
|
||||
|
||||
async def translate_async(self, document: T) -> Document:
|
||||
...
|
||||
|
||||
def log(self,info:str):
|
||||
...
|
||||
70
docutranslate/translater/md_translator.py
Normal file
70
docutranslate/translater/md_translator.py
Normal file
@@ -0,0 +1,70 @@
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from logging import Logger
|
||||
from typing import Self
|
||||
|
||||
from docutranslate.agents import MDTranslateAgent
|
||||
from docutranslate.document_context.md_mask_context import MDMaskUrisContext
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
from docutranslate.logger import global_logger
|
||||
from docutranslate.translater.base import AiTranslateConfig
|
||||
from docutranslate.translater.interfaces import Translator
|
||||
from docutranslate.utils.markdown_splitter import split_markdown_text, join_markdown_texts
|
||||
from docutranslate.utils.markdown_utils import clean_markdown_math_block
|
||||
|
||||
|
||||
@dataclass
|
||||
class MDTranslateConfig(AiTranslateConfig):
|
||||
...
|
||||
|
||||
|
||||
|
||||
class MDTranslator(Translator):
|
||||
def __init__(self, config: MDTranslateConfig):
|
||||
self.logger = config.logger or global_logger
|
||||
self.chunk_size = config.chunk_size
|
||||
self.translate_agent = MDTranslateAgent(custom_prompt=config.custom_prompt,
|
||||
to_lang=config.to_lang,
|
||||
baseurl=config.base_url,
|
||||
key=config.api_key,
|
||||
model_id=config.model_id,
|
||||
system_prompt=None,
|
||||
temperature=config.temperature,
|
||||
max_concurrent=config.concurrent,
|
||||
timeout=config.timeout,
|
||||
logger=self.logger)
|
||||
|
||||
def translate(self, document: MarkdownDocument) -> Self:
|
||||
self.logger.info("正在翻译markdown")
|
||||
with MDMaskUrisContext(document):
|
||||
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
||||
self.logger.info(f"markdown分为{len(chunks)}块")
|
||||
result: list[str] = self.translate_agent.send_prompts(chunks)
|
||||
content = join_markdown_texts(result)
|
||||
# 做一些加强鲁棒性的操作
|
||||
content = content.replace(r'\(', r'\(')
|
||||
content = content.replace(r'\)', r'\)')
|
||||
content = clean_markdown_math_block(content)
|
||||
|
||||
document.content = content.encode()
|
||||
self.logger.info("翻译完成")
|
||||
return self
|
||||
|
||||
async def translate_async(self, document: MarkdownDocument) -> Self:
|
||||
self.logger.info("正在翻译markdown")
|
||||
with MDMaskUrisContext(document):
|
||||
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
||||
self.logger.info(f"markdown分为{len(chunks)}块")
|
||||
result: list[str] = await self.translate_agent.send_prompts_async(chunks)
|
||||
|
||||
def run():
|
||||
content = join_markdown_texts(result)
|
||||
# 做一些加强鲁棒性的操作
|
||||
content = content.replace(r'\(', r'\(')
|
||||
content = content.replace(r'\)', r'\)')
|
||||
content = clean_markdown_math_block(content)
|
||||
document.content = content.encode()
|
||||
|
||||
await asyncio.to_thread(run)
|
||||
self.logger.info("翻译完成")
|
||||
return self
|
||||
50
docutranslate/translater/txt_translator.py
Normal file
50
docutranslate/translater/txt_translator.py
Normal file
@@ -0,0 +1,50 @@
|
||||
from dataclasses import dataclass
|
||||
from typing import Self
|
||||
|
||||
from docutranslate.agents.txt_agent import TXTTranslateAgent
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.logger import global_logger
|
||||
from docutranslate.translater.base import AiTranslateConfig
|
||||
from docutranslate.translater.interfaces import Translator
|
||||
from docutranslate.utils.markdown_splitter import split_markdown_text
|
||||
|
||||
|
||||
@dataclass
|
||||
class TXTTranslateConfig(AiTranslateConfig):
|
||||
...
|
||||
|
||||
|
||||
class TXTTranslator(Translator):
|
||||
def __init__(self, config: TXTTranslateConfig):
|
||||
self.logger = config.logger or global_logger
|
||||
self.chunk_size = config.chunk_size
|
||||
self.translate_agent = TXTTranslateAgent(custom_prompt=config.custom_prompt,
|
||||
to_lang=config.to_lang,
|
||||
baseurl=config.base_url,
|
||||
key=config.api_key,
|
||||
model_id=config.model_id,
|
||||
system_prompt=None,
|
||||
temperature=config.temperature,
|
||||
max_concurrent=config.concurrent,
|
||||
timeout=config.timeout,
|
||||
logger=self.logger)
|
||||
|
||||
def translate(self, document: Document) -> Self:
|
||||
self.logger.info("正在翻译txt")
|
||||
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
||||
self.logger.info(f"txt分为{len(chunks)}块")
|
||||
result: list[str] = self.translate_agent.send_prompts(chunks)
|
||||
content = "\n".join(result)
|
||||
document.content = content.encode()
|
||||
self.logger.info("翻译完成")
|
||||
return self
|
||||
|
||||
async def translate_async(self, document: Document) -> Self:
|
||||
self.logger.info("正在翻译txt")
|
||||
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
||||
self.logger.info(f"txt分为{len(chunks)}块")
|
||||
result: list[str] = await self.translate_agent.send_prompts_async(chunks)
|
||||
content = "\n".join(result)
|
||||
document.content = content.encode()
|
||||
self.logger.info("翻译完成")
|
||||
return self
|
||||
Reference in New Issue
Block a user