降低缺键概率

2025-09-06 20:11:44 +08:00
parent 9b478ebc4a
commit 2c6d544ba4
3 changed files with 27 additions and 19 deletions
--- a/docutranslate/agents/agent.py
+++ b/docutranslate/agents/agent.py
@@ -191,7 +191,7 @@ class Agent:
            should_retry = True
        # 专门捕获部分翻译错误（软错误）
        except PartialAgentResultError as e:
-            # print(f"【测试】\nprompt:\n{prompt}\nresp:\n{result}")
+            print(f"【测试】\nprompt:\n{prompt}\nresp:\n{result}")
            self.logger.error(f"收到部分返回结果，将尝试重试: {e}")
            current_partial_result = e.partial_result
            should_retry = True
--- a/docutranslate/agents/segments_agent.py
+++ b/docutranslate/agents/segments_agent.py
@@ -41,16 +41,29 @@ class SegmentsTranslateAgent(Agent):
 - If a segment is already in the target language({config.to_lang}), keep it as is.
 # Output
 - The translated sequence of segments, represented as JSON text (note: not a code block). The keys are the segment IDs, and the values are the translated segments.
- The returned JSON text must be a dictionary of the form {{<segment_id>: <translation>}}.
+- The response must be a JSON object(indent=0) with the following structure: 
+{{
+"<segment_id>": "<translation>"
+}}
 - (very important) The segment IDs in the output must exactly match those in the input. And all segment IDs in input must appear in the output.
+- All keys that appear in the input JSON must exist in the output JSON.
 # Example(Assuming the target language is Chinese in the example, {config.to_lang} is the actual target language)
 ## Input
-{{"10":"Tom say:\"hello\"","11":“apple”，"12":true,"13":"false","14":null}}
+{{
+"10": "Tom say:\"hello\"",
+"11": "apple",
+"12": true,
+"13": "false",
+"14": null
+}}
 ## Correct Output
-{{"10":"汤姆说：“你好”","11":"苹果","12":true,"13":"错误","14":null}}
-## Incorrect Output
-{{"10":"汤姆说:“你好”，"11":“苹果”，"12":true,"13":"错误"}}
-> Warning: Never wrap the JSON text in ```, Never miss segment Translation.
+{{
+"10": "汤姆说：“你好”",
+"11": "苹果",
+"12": true,
+"13": "错误",
+"14": null
+}}
 """
        self.custom_prompt = config.custom_prompt
        if config.custom_prompt:
@@ -138,7 +151,7 @@ class SegmentsTranslateAgent(Agent):

    def send_segments(self, segments: list[str], chunk_size: int) -> list[str]:
        indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
-        prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
+        prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks]

        translated_chunks = super().send_prompts(prompts=prompts, pre_send_handler=self._pre_send_handler,
                                                 result_handler=self._result_handler,
@@ -176,7 +189,7 @@ class SegmentsTranslateAgent(Agent):
    async def send_segments_async(self, segments: list[str], chunk_size: int) -> list[str]:
        indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments,
                                                                                 chunk_size)
-        prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
+        prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks]

        translated_chunks = await super().send_prompts_async(prompts=prompts, pre_send_handler=self._pre_send_handler,
                                                             result_handler=self._result_handler,
--- a/docutranslate/utils/json_utils.py
+++ b/docutranslate/utils/json_utils.py
@@ -87,17 +87,12 @@ list[dict[str, str]], list[tuple[int, int]]]:

 def fix_json_string(json_string):
    def repl(m:re.Match):
-        result=""
-        if m.group(1):
-            result+='",'
-        result+=f'"{m.group(2)}":'
-        if m.group(3):
-            result+='"'
-        return result
+        return f'{'"' if m.group(1) else ""},\n"{m.group(2)}":{'"' if m.group(3) else ""}'
    fixed_json = re.sub(
-        r'([”"])?\s?[，|,]\s?[\"|“]\s?(\d+?)\s?[\"|”]\s?[：|:]\s?([\"|“])?',
+        r'([“”"])?\s*[，,]\s*["“”]\s*(\d+)\s*["“”]\s*[：:]\s*(["“”])?',
        repl,
-        json_string
+        json_string,
+        re.MULTILINE
    )
    return fixed_json