增加json文本修复处理,大幅减少缺键错误
This commit is contained in:
@@ -180,7 +180,6 @@ class Agent:
|
|||||||
)
|
)
|
||||||
response.raise_for_status()
|
response.raise_for_status()
|
||||||
result = response.json()["choices"][0]["message"]["content"]
|
result = response.json()["choices"][0]["message"]["content"]
|
||||||
|
|
||||||
if retry_count > 0:
|
if retry_count > 0:
|
||||||
self.logger.info(f"重试成功 (第 {retry_count}/{MAX_RETRY_COUNT} 次尝试)。")
|
self.logger.info(f"重试成功 (第 {retry_count}/{MAX_RETRY_COUNT} 次尝试)。")
|
||||||
|
|
||||||
@@ -192,6 +191,7 @@ class Agent:
|
|||||||
should_retry = True
|
should_retry = True
|
||||||
# 专门捕获部分翻译错误(软错误)
|
# 专门捕获部分翻译错误(软错误)
|
||||||
except PartialAgentResultError as e:
|
except PartialAgentResultError as e:
|
||||||
|
# print(f"【测试】\nprompt:\n{prompt}\nresp:\n{result}")
|
||||||
self.logger.error(f"收到部分返回结果,将尝试重试: {e}")
|
self.logger.error(f"收到部分返回结果,将尝试重试: {e}")
|
||||||
current_partial_result = e.partial_result
|
current_partial_result = e.partial_result
|
||||||
should_retry = True
|
should_retry = True
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from json_repair import json_repair
|
|||||||
from docutranslate.agents import AgentConfig, Agent
|
from docutranslate.agents import AgentConfig, Agent
|
||||||
from docutranslate.agents.agent import PartialAgentResultError, AgentResultError
|
from docutranslate.agents.agent import PartialAgentResultError, AgentResultError
|
||||||
from docutranslate.glossary.glossary import Glossary
|
from docutranslate.glossary.glossary import Glossary
|
||||||
from docutranslate.utils.json_utils import segments2json_chunks
|
from docutranslate.utils.json_utils import segments2json_chunks, fix_json_string
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -42,13 +42,15 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
# Output
|
# Output
|
||||||
- The translated sequence of segments, represented as JSON text (note: not a code block). The keys are the segment IDs, and the values are the translated segments.
|
- The translated sequence of segments, represented as JSON text (note: not a code block). The keys are the segment IDs, and the values are the translated segments.
|
||||||
- The returned JSON text must be a dictionary of the form {{<segment_id>: <translation>}}.
|
- The returned JSON text must be a dictionary of the form {{<segment_id>: <translation>}}.
|
||||||
- The segment IDs in the output must **exactly** match those in the input. And all segment IDs in input must appear in the output.
|
- (very important) The segment IDs in the output must exactly match those in the input. And all segment IDs in input must appear in the output.
|
||||||
# Example(Assuming the target language is Chinese in the example, {config.to_lang} is the actual target language)
|
# Example(Assuming the target language is Chinese in the example, {config.to_lang} is the actual target language)
|
||||||
## Input
|
## Input
|
||||||
{r'{"10":"hello","11":"apple","12":true,"13":"false","14":null}'}
|
{{"10":"Tom say:\"hello\"","11":“apple”,"12":true,"13":"false","14":null}}
|
||||||
## Output
|
## Correct Output
|
||||||
{r'{"10":"你好","11":"苹果","12":true,"13":"错误","14":null}'}
|
{{"10":"汤姆说:“你好”","11":"苹果","12":true,"13":"错误","14":null}}
|
||||||
> Warning: Never wrap the JSON text in ```.
|
## Incorrect Output
|
||||||
|
{{"10":"汤姆说:“你好”,"11":“苹果”,"12":true,"13":"错误"}}
|
||||||
|
> Warning: Never wrap the JSON text in ```, Never miss segment Translation.
|
||||||
"""
|
"""
|
||||||
self.custom_prompt = config.custom_prompt
|
self.custom_prompt = config.custom_prompt
|
||||||
if config.custom_prompt:
|
if config.custom_prompt:
|
||||||
@@ -73,6 +75,7 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
raise AgentResultError("result为空值但原文不为空")
|
raise AgentResultError("result为空值但原文不为空")
|
||||||
return {}
|
return {}
|
||||||
try:
|
try:
|
||||||
|
result=fix_json_string(result)
|
||||||
original_chunk = json.loads(origin_prompt)
|
original_chunk = json.loads(origin_prompt)
|
||||||
repaired_result = json_repair.loads(result)
|
repaired_result = json_repair.loads(result)
|
||||||
|
|
||||||
|
|||||||
@@ -1,6 +1,7 @@
|
|||||||
# SPDX-FileCopyrightText: 2025 QinHan
|
# SPDX-FileCopyrightText: 2025 QinHan
|
||||||
# SPDX-License-Identifier: MPL-2.0
|
# SPDX-License-Identifier: MPL-2.0
|
||||||
import json
|
import json
|
||||||
|
import re
|
||||||
|
|
||||||
|
|
||||||
def get_json_size(js: dict) -> int:
|
def get_json_size(js: dict) -> int:
|
||||||
@@ -84,5 +85,22 @@ list[dict[str, str]], list[tuple[int, int]]]:
|
|||||||
return js, json_chunks_list, merged_indices_list
|
return js, json_chunks_list, merged_indices_list
|
||||||
|
|
||||||
|
|
||||||
|
def fix_json_string(json_string):
|
||||||
|
def repl(m:re.Match):
|
||||||
|
result=""
|
||||||
|
if m.group(1):
|
||||||
|
result+='",'
|
||||||
|
result+=f'"{m.group(2)}":'
|
||||||
|
if m.group(3):
|
||||||
|
result+='"'
|
||||||
|
return result
|
||||||
|
fixed_json = re.sub(
|
||||||
|
r'([”"])?\s?[,|,]\s?[\"|“]\s?(\d+?)\s?[\"|”]\s?[:|:]\s?([\"|“])?',
|
||||||
|
repl,
|
||||||
|
json_string
|
||||||
|
)
|
||||||
|
return fixed_json
|
||||||
|
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
print(get_json_size({"0": ""}))
|
print(get_json_size({"0": ""}))
|
||||||
|
|||||||
Reference in New Issue
Block a user