重构代码,新增了MarkdownBasedManager和TXTManager实现

This commit is contained in:
xunbu
2025-07-28 23:41:35 +08:00
parent 6ab3278481
commit 80634fe749
45 changed files with 885 additions and 139 deletions

View File

View File

@@ -0,0 +1,16 @@
from dataclasses import dataclass
from logging import Logger
@dataclass
class AiTranslateConfig:
base_url: str
api_key: str
model_id: str
to_lang: str
custom_prompt: str | None = None
temperature: float = 0.7
timeout: int = 2000
chunk_size: int = 3000
concurrent: int = 30
logger: Logger | None = None

View File

@@ -0,0 +1,21 @@
from typing import runtime_checkable, Protocol, TypeVar
from docutranslate.agents import Agent
from docutranslate.ir.document import Document
T=TypeVar('T',bound=Document)
V=TypeVar('V',bound=Agent)
@runtime_checkable
class Translator(Protocol[T,V]):
"""
翻译中间文本原地替换Translator不做格式转换
"""
def translate(self, document:T) -> Document:
...
async def translate_async(self, document: T) -> Document:
...
def log(self,info:str):
...

View File

@@ -0,0 +1,70 @@
import asyncio
from dataclasses import dataclass
from logging import Logger
from typing import Self
from docutranslate.agents import MDTranslateAgent
from docutranslate.document_context.md_mask_context import MDMaskUrisContext
from docutranslate.ir.markdown_document import MarkdownDocument
from docutranslate.logger import global_logger
from docutranslate.translater.base import AiTranslateConfig
from docutranslate.translater.interfaces import Translator
from docutranslate.utils.markdown_splitter import split_markdown_text, join_markdown_texts
from docutranslate.utils.markdown_utils import clean_markdown_math_block
@dataclass
class MDTranslateConfig(AiTranslateConfig):
...
class MDTranslator(Translator):
def __init__(self, config: MDTranslateConfig):
self.logger = config.logger or global_logger
self.chunk_size = config.chunk_size
self.translate_agent = MDTranslateAgent(custom_prompt=config.custom_prompt,
to_lang=config.to_lang,
baseurl=config.base_url,
key=config.api_key,
model_id=config.model_id,
system_prompt=None,
temperature=config.temperature,
max_concurrent=config.concurrent,
timeout=config.timeout,
logger=self.logger)
def translate(self, document: MarkdownDocument) -> Self:
self.logger.info("正在翻译markdown")
with MDMaskUrisContext(document):
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
self.logger.info(f"markdown分为{len(chunks)}")
result: list[str] = self.translate_agent.send_prompts(chunks)
content = join_markdown_texts(result)
# 做一些加强鲁棒性的操作
content = content.replace(r'\', r'\(')
content = content.replace(r'\', r'\)')
content = clean_markdown_math_block(content)
document.content = content.encode()
self.logger.info("翻译完成")
return self
async def translate_async(self, document: MarkdownDocument) -> Self:
self.logger.info("正在翻译markdown")
with MDMaskUrisContext(document):
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
self.logger.info(f"markdown分为{len(chunks)}")
result: list[str] = await self.translate_agent.send_prompts_async(chunks)
def run():
content = join_markdown_texts(result)
# 做一些加强鲁棒性的操作
content = content.replace(r'\', r'\(')
content = content.replace(r'\', r'\)')
content = clean_markdown_math_block(content)
document.content = content.encode()
await asyncio.to_thread(run)
self.logger.info("翻译完成")
return self

View File

@@ -0,0 +1,50 @@
from dataclasses import dataclass
from typing import Self
from docutranslate.agents.txt_agent import TXTTranslateAgent
from docutranslate.ir.document import Document
from docutranslate.logger import global_logger
from docutranslate.translater.base import AiTranslateConfig
from docutranslate.translater.interfaces import Translator
from docutranslate.utils.markdown_splitter import split_markdown_text
@dataclass
class TXTTranslateConfig(AiTranslateConfig):
...
class TXTTranslator(Translator):
def __init__(self, config: TXTTranslateConfig):
self.logger = config.logger or global_logger
self.chunk_size = config.chunk_size
self.translate_agent = TXTTranslateAgent(custom_prompt=config.custom_prompt,
to_lang=config.to_lang,
baseurl=config.base_url,
key=config.api_key,
model_id=config.model_id,
system_prompt=None,
temperature=config.temperature,
max_concurrent=config.concurrent,
timeout=config.timeout,
logger=self.logger)
def translate(self, document: Document) -> Self:
self.logger.info("正在翻译txt")
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
self.logger.info(f"txt分为{len(chunks)}")
result: list[str] = self.translate_agent.send_prompts(chunks)
content = "\n".join(result)
document.content = content.encode()
self.logger.info("翻译完成")
return self
async def translate_async(self, document: Document) -> Self:
self.logger.info("正在翻译txt")
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
self.logger.info(f"txt分为{len(chunks)}")
result: list[str] = await self.translate_agent.send_prompts_async(chunks)
content = "\n".join(result)
document.content = content.encode()
self.logger.info("翻译完成")
return self