From 2c6d544ba4afc888051972f1849d89e2a6f5dda1 Mon Sep 17 00:00:00 2001 From: xunbu Date: Sat, 6 Sep 2025 20:11:44 +0800 Subject: [PATCH] =?UTF-8?q?=E9=99=8D=E4=BD=8E=E7=BC=BA=E9=94=AE=E6=A6=82?= =?UTF-8?q?=E7=8E=87?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/agents/agent.py | 2 +- docutranslate/agents/segments_agent.py | 31 ++++++++++++++++++-------- docutranslate/utils/json_utils.py | 13 ++++------- 3 files changed, 27 insertions(+), 19 deletions(-) diff --git a/docutranslate/agents/agent.py b/docutranslate/agents/agent.py index 8e4b03a..19bf369 100644 --- a/docutranslate/agents/agent.py +++ b/docutranslate/agents/agent.py @@ -191,7 +191,7 @@ class Agent: should_retry = True # 专门捕获部分翻译错误(软错误) except PartialAgentResultError as e: - # print(f"【测试】\nprompt:\n{prompt}\nresp:\n{result}") + print(f"【测试】\nprompt:\n{prompt}\nresp:\n{result}") self.logger.error(f"收到部分返回结果,将尝试重试: {e}") current_partial_result = e.partial_result should_retry = True diff --git a/docutranslate/agents/segments_agent.py b/docutranslate/agents/segments_agent.py index 162a160..11a9308 100644 --- a/docutranslate/agents/segments_agent.py +++ b/docutranslate/agents/segments_agent.py @@ -41,16 +41,29 @@ class SegmentsTranslateAgent(Agent): - If a segment is already in the target language({config.to_lang}), keep it as is. # Output - The translated sequence of segments, represented as JSON text (note: not a code block). The keys are the segment IDs, and the values are the translated segments. -- The returned JSON text must be a dictionary of the form {{: }}. +- The response must be a JSON object(indent=0) with the following structure: +{{ +"": "" +}} - (very important) The segment IDs in the output must exactly match those in the input. And all segment IDs in input must appear in the output. +- All keys that appear in the input JSON must exist in the output JSON. # Example(Assuming the target language is Chinese in the example, {config.to_lang} is the actual target language) ## Input -{{"10":"Tom say:\"hello\"","11":“apple”,"12":true,"13":"false","14":null}} +{{ +"10": "Tom say:\"hello\"", +"11": "apple", +"12": true, +"13": "false", +"14": null +}} ## Correct Output -{{"10":"汤姆说:“你好”","11":"苹果","12":true,"13":"错误","14":null}} -## Incorrect Output -{{"10":"汤姆说:“你好”,"11":“苹果”,"12":true,"13":"错误"}} -> Warning: Never wrap the JSON text in ```, Never miss segment Translation. +{{ +"10": "汤姆说:“你好”", +"11": "苹果", +"12": true, +"13": "错误", +"14": null +}} """ self.custom_prompt = config.custom_prompt if config.custom_prompt: @@ -75,7 +88,7 @@ class SegmentsTranslateAgent(Agent): raise AgentResultError("result为空值但原文不为空") return {} try: - result=fix_json_string(result) + result = fix_json_string(result) original_chunk = json.loads(origin_prompt) repaired_result = json_repair.loads(result) @@ -138,7 +151,7 @@ class SegmentsTranslateAgent(Agent): def send_segments(self, segments: list[str], chunk_size: int) -> list[str]: indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size) - prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks] + prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks] translated_chunks = super().send_prompts(prompts=prompts, pre_send_handler=self._pre_send_handler, result_handler=self._result_handler, @@ -176,7 +189,7 @@ class SegmentsTranslateAgent(Agent): async def send_segments_async(self, segments: list[str], chunk_size: int) -> list[str]: indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments, chunk_size) - prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks] + prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks] translated_chunks = await super().send_prompts_async(prompts=prompts, pre_send_handler=self._pre_send_handler, result_handler=self._result_handler, diff --git a/docutranslate/utils/json_utils.py b/docutranslate/utils/json_utils.py index 061811a..f1c9bb9 100644 --- a/docutranslate/utils/json_utils.py +++ b/docutranslate/utils/json_utils.py @@ -87,17 +87,12 @@ list[dict[str, str]], list[tuple[int, int]]]: def fix_json_string(json_string): def repl(m:re.Match): - result="" - if m.group(1): - result+='",' - result+=f'"{m.group(2)}":' - if m.group(3): - result+='"' - return result + return f'{'"' if m.group(1) else ""},\n"{m.group(2)}":{'"' if m.group(3) else ""}' fixed_json = re.sub( - r'([”"])?\s?[,|,]\s?[\"|“]\s?(\d+?)\s?[\"|”]\s?[:|:]\s?([\"|“])?', + r'([“”"])?\s*[,,]\s*["“”]\s*(\d+)\s*["“”]\s*[::]\s*(["“”])?', repl, - json_string + json_string, + re.MULTILINE ) return fixed_json