diff --git a/docutranslate/__init__.py b/docutranslate/__init__.py index a2e880d..4a6224c 100644 --- a/docutranslate/__init__.py +++ b/docutranslate/__init__.py @@ -1 +1 @@ -__version__="1.1.4" \ No newline at end of file +__version__="1.1.5" \ No newline at end of file diff --git a/docutranslate/agents/segments_agent.py b/docutranslate/agents/segments_agent.py index bda3195..b16c800 100644 --- a/docutranslate/agents/segments_agent.py +++ b/docutranslate/agents/segments_agent.py @@ -1,3 +1,4 @@ +import asyncio import json from dataclasses import dataclass from json import JSONDecodeError @@ -76,7 +77,7 @@ class SegmentsTranslateAgent(Agent): # todo:增加协程粒度 async def send_segments_async(self, segments: list[str], chunk_size: int): - indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size) + indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks,segments, chunk_size) prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks] translated_chunks = await super().send_prompts_async(prompts=prompts) indexed_translated = indexed_originals.copy() diff --git a/docutranslate/translator/ai_translator/json_translator.py b/docutranslate/translator/ai_translator/json_translator.py index 61403f3..4c14b93 100644 --- a/docutranslate/translator/ai_translator/json_translator.py +++ b/docutranslate/translator/ai_translator/json_translator.py @@ -8,7 +8,6 @@ from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, Se from docutranslate.ir.document import Document from docutranslate.translator.ai_translator.base import AiTranslatorConfig from docutranslate.translator.base import Translator -from docutranslate.utils.json_utils import flat_json_split @dataclass @@ -46,31 +45,6 @@ class JsonTranslator(Translator): all_matches.extend(matches) return all_matches - def _translate_texts_in_batches(self, texts: list[str]) -> list[str]: - """ - 将文本列表打包、分块、发送翻译并返回翻译结果。 - 此函数封装了与翻译代理交互的所有细节。 - """ - # 1. 使用索引作为唯一ID,将文本列表转换为字典,便于API处理 - indexed_originals = {str(i): text for i, text in enumerate(texts)} - - # 2. 将大字典分割成小块,以满足API的限制 - chunks = flat_json_split(indexed_originals, self.chunk_size) - - # 3. 将每个块序列化为JSON字符串并发送翻译 - prompts = [json.dumps(chunk) for chunk in chunks] - translated_chunks = self.translate_agent.send_prompts(prompts) - - # 4. 将翻译结果合并回一个字典 - # 我们从原始字典的副本开始,以确保即使翻译失败,我们也能保持结构 - indexed_translated = indexed_originals.copy() - for chunk_str in translated_chunks: - translated_part = json.loads(chunk_str) - indexed_translated.update(translated_part) - - # 5. 按原始顺序返回翻译后的文本列表 - return list(indexed_translated.values()) - def _update_content_with_translations(self, content: dict, matches: list[Any], translated_texts: list[str]): """ 使用翻译后的文本更新原始JSON内容。 @@ -103,7 +77,7 @@ class JsonTranslator(Translator): original_texts = [match.value for match in all_matches] # 步骤 2: 批量翻译提取出的文本 - translated_texts = self.translate_agent.send_segments(original_texts,self.chunk_size) + translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size) # 健壮性检查:确保翻译回来的项目数量与发送的一致 if len(original_texts) != len(translated_texts): @@ -131,7 +105,7 @@ class JsonTranslator(Translator): original_texts = [match.value for match in all_matches] # 步骤 2: 批量翻译提取出的文本 - translated_texts = await self.translate_agent.send_segments_async(original_texts,self.chunk_size) + translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size) # 健壮性检查:确保翻译回来的项目数量与发送的一致 if len(original_texts) != len(translated_texts): diff --git a/更新日志.txt b/更新日志.txt index 44e17b1..f57ed40 100644 --- a/更新日志.txt +++ b/更新日志.txt @@ -1,6 +1,6 @@ 更新日志 ---------------------------------------- -v1.1.4版 2025.8.18 +v1.1.5版 2025.8.18 修复 - 修复xlsx、docx单个块/段落过长时被截断的问题 - 交互式界面tooltip不显示的问题