From f611eb85010fbce81d3a3cc849338da40b8180ac Mon Sep 17 00:00:00 2001 From: xunbu Date: Fri, 17 Oct 2025 12:06:09 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BC=98=E5=8C=96docx=E7=BF=BB=E8=AF=91?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/agents/segments_agent.py | 3 ++- .../translator/ai_translator/docx_translator.py | 12 +++++------- 2 files changed, 7 insertions(+), 8 deletions(-) diff --git a/docutranslate/agents/segments_agent.py b/docutranslate/agents/segments_agent.py index a1f35a0..46d1faf 100644 --- a/docutranslate/agents/segments_agent.py +++ b/docutranslate/agents/segments_agent.py @@ -32,8 +32,9 @@ Here is the input: For each Key-Value Pair in the JSON, translate the contents of the value into {to_lang}, Write the translation back into the value for that JSON. > (Very important) The original text segments and translated segments must strictly correspond one-to-one. It is strictly forbidden for the IDs of the translated segments to differ from those of the original segments. > The segment IDs in the output must exactly match those in the input. And all segment IDs in input must appear in the output. -Here is an example of the expected format: +> If necessary, two segments can only be translated together, the translation should be proportionally allocated to the corresponding key's value based on the word count ratio of the segments. +Here is an example of the expected format: Input: diff --git a/docutranslate/translator/ai_translator/docx_translator.py b/docutranslate/translator/ai_translator/docx_translator.py index 8d53305..be10870 100644 --- a/docutranslate/translator/ai_translator/docx_translator.py +++ b/docutranslate/translator/ai_translator/docx_translator.py @@ -33,17 +33,15 @@ def is_image_run(run: Run) -> bool: # ==================== MODIFICATION START ==================== # 对 is_formatting_only_run 函数进行了修改 # 旧的实现无法识别仅包含颜色等 rPr 属性的空 Run,导致其与后续文本 Run 错误合并。 -# 新的实现通过一个更简单的标准来判断:只要一个 Run 的文本内容在去除空白后为空, -# 它就被认为是纯格式化的,从而解决了交叉引用文本消失的问题。 +# # 新的实现通过一个更简单的标准来判断:只要一个 Run 的文本内容为空, +# # 它就被认为是纯格式化的,从而解决了交叉引用文本消失的问题。 # ========================================================== def is_formatting_only_run(run: Run) -> bool: """ - 检查一个 Run 是否仅用于格式化,不包含应被翻译的实质性文本。 - 这包括: - - 完全没有文本的 Run (即使它带有颜色等格式)。 - - 只包含空格、制表符等空白字符的 Run。 + 检查一个 Run 是否仅用于格式化,不包含任何应被渲染的文本。 + 这仅适用于其 .text 属性为 "" 的情况。 """ - return not run.text.strip() + return run.text == "" # ===================== MODIFICATION END =====================