降低缺键概率
This commit is contained in:
@@ -191,7 +191,7 @@ class Agent:
|
|||||||
should_retry = True
|
should_retry = True
|
||||||
# 专门捕获部分翻译错误(软错误)
|
# 专门捕获部分翻译错误(软错误)
|
||||||
except PartialAgentResultError as e:
|
except PartialAgentResultError as e:
|
||||||
# print(f"【测试】\nprompt:\n{prompt}\nresp:\n{result}")
|
print(f"【测试】\nprompt:\n{prompt}\nresp:\n{result}")
|
||||||
self.logger.error(f"收到部分返回结果,将尝试重试: {e}")
|
self.logger.error(f"收到部分返回结果,将尝试重试: {e}")
|
||||||
current_partial_result = e.partial_result
|
current_partial_result = e.partial_result
|
||||||
should_retry = True
|
should_retry = True
|
||||||
|
|||||||
@@ -41,16 +41,29 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
- If a segment is already in the target language({config.to_lang}), keep it as is.
|
- If a segment is already in the target language({config.to_lang}), keep it as is.
|
||||||
# Output
|
# Output
|
||||||
- The translated sequence of segments, represented as JSON text (note: not a code block). The keys are the segment IDs, and the values are the translated segments.
|
- The translated sequence of segments, represented as JSON text (note: not a code block). The keys are the segment IDs, and the values are the translated segments.
|
||||||
- The returned JSON text must be a dictionary of the form {{<segment_id>: <translation>}}.
|
- The response must be a JSON object(indent=0) with the following structure:
|
||||||
|
{{
|
||||||
|
"<segment_id>": "<translation>"
|
||||||
|
}}
|
||||||
- (very important) The segment IDs in the output must exactly match those in the input. And all segment IDs in input must appear in the output.
|
- (very important) The segment IDs in the output must exactly match those in the input. And all segment IDs in input must appear in the output.
|
||||||
|
- All keys that appear in the input JSON must exist in the output JSON.
|
||||||
# Example(Assuming the target language is Chinese in the example, {config.to_lang} is the actual target language)
|
# Example(Assuming the target language is Chinese in the example, {config.to_lang} is the actual target language)
|
||||||
## Input
|
## Input
|
||||||
{{"10":"Tom say:\"hello\"","11":“apple”,"12":true,"13":"false","14":null}}
|
{{
|
||||||
|
"10": "Tom say:\"hello\"",
|
||||||
|
"11": "apple",
|
||||||
|
"12": true,
|
||||||
|
"13": "false",
|
||||||
|
"14": null
|
||||||
|
}}
|
||||||
## Correct Output
|
## Correct Output
|
||||||
{{"10":"汤姆说:“你好”","11":"苹果","12":true,"13":"错误","14":null}}
|
{{
|
||||||
## Incorrect Output
|
"10": "汤姆说:“你好”",
|
||||||
{{"10":"汤姆说:“你好”,"11":“苹果”,"12":true,"13":"错误"}}
|
"11": "苹果",
|
||||||
> Warning: Never wrap the JSON text in ```, Never miss segment Translation.
|
"12": true,
|
||||||
|
"13": "错误",
|
||||||
|
"14": null
|
||||||
|
}}
|
||||||
"""
|
"""
|
||||||
self.custom_prompt = config.custom_prompt
|
self.custom_prompt = config.custom_prompt
|
||||||
if config.custom_prompt:
|
if config.custom_prompt:
|
||||||
@@ -138,7 +151,7 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
|
|
||||||
def send_segments(self, segments: list[str], chunk_size: int) -> list[str]:
|
def send_segments(self, segments: list[str], chunk_size: int) -> list[str]:
|
||||||
indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
|
indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
|
||||||
prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
|
prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks]
|
||||||
|
|
||||||
translated_chunks = super().send_prompts(prompts=prompts, pre_send_handler=self._pre_send_handler,
|
translated_chunks = super().send_prompts(prompts=prompts, pre_send_handler=self._pre_send_handler,
|
||||||
result_handler=self._result_handler,
|
result_handler=self._result_handler,
|
||||||
@@ -176,7 +189,7 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
async def send_segments_async(self, segments: list[str], chunk_size: int) -> list[str]:
|
async def send_segments_async(self, segments: list[str], chunk_size: int) -> list[str]:
|
||||||
indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments,
|
indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments,
|
||||||
chunk_size)
|
chunk_size)
|
||||||
prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
|
prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks]
|
||||||
|
|
||||||
translated_chunks = await super().send_prompts_async(prompts=prompts, pre_send_handler=self._pre_send_handler,
|
translated_chunks = await super().send_prompts_async(prompts=prompts, pre_send_handler=self._pre_send_handler,
|
||||||
result_handler=self._result_handler,
|
result_handler=self._result_handler,
|
||||||
|
|||||||
@@ -87,17 +87,12 @@ list[dict[str, str]], list[tuple[int, int]]]:
|
|||||||
|
|
||||||
def fix_json_string(json_string):
|
def fix_json_string(json_string):
|
||||||
def repl(m:re.Match):
|
def repl(m:re.Match):
|
||||||
result=""
|
return f'{'"' if m.group(1) else ""},\n"{m.group(2)}":{'"' if m.group(3) else ""}'
|
||||||
if m.group(1):
|
|
||||||
result+='",'
|
|
||||||
result+=f'"{m.group(2)}":'
|
|
||||||
if m.group(3):
|
|
||||||
result+='"'
|
|
||||||
return result
|
|
||||||
fixed_json = re.sub(
|
fixed_json = re.sub(
|
||||||
r'([”"])?\s?[,|,]\s?[\"|“]\s?(\d+?)\s?[\"|”]\s?[:|:]\s?([\"|“])?',
|
r'([“”"])?\s*[,,]\s*["“”]\s*(\d+)\s*["“”]\s*[::]\s*(["“”])?',
|
||||||
repl,
|
repl,
|
||||||
json_string
|
json_string,
|
||||||
|
re.MULTILINE
|
||||||
)
|
)
|
||||||
return fixed_json
|
return fixed_json
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user