This commit is contained in:
xunbu
2025-08-18 10:19:40 +08:00
parent 87c8b9914a
commit b612c9e67e
4 changed files with 6 additions and 31 deletions

View File

@@ -1 +1 @@
__version__="1.1.4"
__version__="1.1.5"

View File

@@ -1,3 +1,4 @@
import asyncio
import json
from dataclasses import dataclass
from json import JSONDecodeError
@@ -76,7 +77,7 @@ class SegmentsTranslateAgent(Agent):
# todo:增加协程粒度
async def send_segments_async(self, segments: list[str], chunk_size: int):
indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks,segments, chunk_size)
prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
translated_chunks = await super().send_prompts_async(prompts=prompts)
indexed_translated = indexed_originals.copy()

View File

@@ -8,7 +8,6 @@ from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, Se
from docutranslate.ir.document import Document
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
from docutranslate.translator.base import Translator
from docutranslate.utils.json_utils import flat_json_split
@dataclass
@@ -46,31 +45,6 @@ class JsonTranslator(Translator):
all_matches.extend(matches)
return all_matches
def _translate_texts_in_batches(self, texts: list[str]) -> list[str]:
"""
将文本列表打包、分块、发送翻译并返回翻译结果。
此函数封装了与翻译代理交互的所有细节。
"""
# 1. 使用索引作为唯一ID将文本列表转换为字典便于API处理
indexed_originals = {str(i): text for i, text in enumerate(texts)}
# 2. 将大字典分割成小块以满足API的限制
chunks = flat_json_split(indexed_originals, self.chunk_size)
# 3. 将每个块序列化为JSON字符串并发送翻译
prompts = [json.dumps(chunk) for chunk in chunks]
translated_chunks = self.translate_agent.send_prompts(prompts)
# 4. 将翻译结果合并回一个字典
# 我们从原始字典的副本开始,以确保即使翻译失败,我们也能保持结构
indexed_translated = indexed_originals.copy()
for chunk_str in translated_chunks:
translated_part = json.loads(chunk_str)
indexed_translated.update(translated_part)
# 5. 按原始顺序返回翻译后的文本列表
return list(indexed_translated.values())
def _update_content_with_translations(self, content: dict, matches: list[Any], translated_texts: list[str]):
"""
使用翻译后的文本更新原始JSON内容。
@@ -103,7 +77,7 @@ class JsonTranslator(Translator):
original_texts = [match.value for match in all_matches]
# 步骤 2: 批量翻译提取出的文本
translated_texts = self.translate_agent.send_segments(original_texts,self.chunk_size)
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
# 健壮性检查:确保翻译回来的项目数量与发送的一致
if len(original_texts) != len(translated_texts):
@@ -131,7 +105,7 @@ class JsonTranslator(Translator):
original_texts = [match.value for match in all_matches]
# 步骤 2: 批量翻译提取出的文本
translated_texts = await self.translate_agent.send_segments_async(original_texts,self.chunk_size)
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
# 健壮性检查:确保翻译回来的项目数量与发送的一致
if len(original_texts) != len(translated_texts):