fix
This commit is contained in:
@@ -1 +1 @@
|
|||||||
__version__="1.1.4"
|
__version__="1.1.5"
|
||||||
@@ -1,3 +1,4 @@
|
|||||||
|
import asyncio
|
||||||
import json
|
import json
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from json import JSONDecodeError
|
from json import JSONDecodeError
|
||||||
@@ -76,7 +77,7 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
|
|
||||||
# todo:增加协程粒度
|
# todo:增加协程粒度
|
||||||
async def send_segments_async(self, segments: list[str], chunk_size: int):
|
async def send_segments_async(self, segments: list[str], chunk_size: int):
|
||||||
indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
|
indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks,segments, chunk_size)
|
||||||
prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
|
prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
|
||||||
translated_chunks = await super().send_prompts_async(prompts=prompts)
|
translated_chunks = await super().send_prompts_async(prompts=prompts)
|
||||||
indexed_translated = indexed_originals.copy()
|
indexed_translated = indexed_originals.copy()
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, Se
|
|||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
||||||
from docutranslate.translator.base import Translator
|
from docutranslate.translator.base import Translator
|
||||||
from docutranslate.utils.json_utils import flat_json_split
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -46,31 +45,6 @@ class JsonTranslator(Translator):
|
|||||||
all_matches.extend(matches)
|
all_matches.extend(matches)
|
||||||
return all_matches
|
return all_matches
|
||||||
|
|
||||||
def _translate_texts_in_batches(self, texts: list[str]) -> list[str]:
|
|
||||||
"""
|
|
||||||
将文本列表打包、分块、发送翻译并返回翻译结果。
|
|
||||||
此函数封装了与翻译代理交互的所有细节。
|
|
||||||
"""
|
|
||||||
# 1. 使用索引作为唯一ID,将文本列表转换为字典,便于API处理
|
|
||||||
indexed_originals = {str(i): text for i, text in enumerate(texts)}
|
|
||||||
|
|
||||||
# 2. 将大字典分割成小块,以满足API的限制
|
|
||||||
chunks = flat_json_split(indexed_originals, self.chunk_size)
|
|
||||||
|
|
||||||
# 3. 将每个块序列化为JSON字符串并发送翻译
|
|
||||||
prompts = [json.dumps(chunk) for chunk in chunks]
|
|
||||||
translated_chunks = self.translate_agent.send_prompts(prompts)
|
|
||||||
|
|
||||||
# 4. 将翻译结果合并回一个字典
|
|
||||||
# 我们从原始字典的副本开始,以确保即使翻译失败,我们也能保持结构
|
|
||||||
indexed_translated = indexed_originals.copy()
|
|
||||||
for chunk_str in translated_chunks:
|
|
||||||
translated_part = json.loads(chunk_str)
|
|
||||||
indexed_translated.update(translated_part)
|
|
||||||
|
|
||||||
# 5. 按原始顺序返回翻译后的文本列表
|
|
||||||
return list(indexed_translated.values())
|
|
||||||
|
|
||||||
def _update_content_with_translations(self, content: dict, matches: list[Any], translated_texts: list[str]):
|
def _update_content_with_translations(self, content: dict, matches: list[Any], translated_texts: list[str]):
|
||||||
"""
|
"""
|
||||||
使用翻译后的文本更新原始JSON内容。
|
使用翻译后的文本更新原始JSON内容。
|
||||||
|
|||||||
Reference in New Issue
Block a user