diff --git a/docutranslate/translator/ai_translator/docx_translator.py b/docutranslate/translator/ai_translator/docx_translator.py index f1177d2..9712e06 100644 --- a/docutranslate/translator/ai_translator/docx_translator.py +++ b/docutranslate/translator/ai_translator/docx_translator.py @@ -156,7 +156,8 @@ class DocxTranslator(AiTranslator): # ---------- 代码修改部分 1: 形状翻译逻辑的核心实现 ---------- def _process_element_children(self, element, parent_paragraph: Paragraph, elements: List[Dict[str, Any]], texts: List[str], - state: Dict[str, Any]): + state: Dict[str, Any], + top_level_para: Paragraph): def flush_segment(): current_runs = state['current_runs'] @@ -164,8 +165,13 @@ class DocxTranslator(AiTranslator): return full_text = "".join(r.text for r in current_runs) if full_text.strip(): - # 在 elements 中增加对父段落的引用 - elements.append({"type": "text_runs", "runs": list(current_runs), "paragraph": parent_paragraph}) + # 在 elements 中增加对父段落和顶级段落的引用 + elements.append({ + "type": "text_runs", + "runs": list(current_runs), + "paragraph": parent_paragraph, + "top_level_paragraph": top_level_para + }) texts.append(full_text) state['current_runs'].clear() @@ -175,7 +181,7 @@ class DocxTranslator(AiTranslator): if child.tag in self.RECURSIVE_CONTAINER_TAGS: flush_segment() - self._process_element_children(child, parent_paragraph, elements, texts, state) + self._process_element_children(child, parent_paragraph, elements, texts, state, top_level_para) flush_segment() # 在递归容器后也刷新,确保其内容成为独立片段 continue @@ -202,8 +208,8 @@ class DocxTranslator(AiTranslator): for p_element in txbx_content.findall(qn('w:p')): # 创建新的段落对象,并传入父级上下文 shape_para = Paragraph(p_element, parent_paragraph) - # 递归处理该段落 - self._process_paragraph(shape_para, elements, texts) + # 递归处理该段落,并传递顶级段落上下文 + self._process_paragraph(shape_para, elements, texts, top_level_para=top_level_para) # 如果处理了形状内的文本,则该 Run 的任务已完成 continue @@ -224,19 +230,29 @@ class DocxTranslator(AiTranslator): # 遇到任何非 Run 的块级元素(如在单元格中嵌套的表格),都应结束当前文本片段。 flush_segment() - def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str]): + def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str], + top_level_para: Paragraph = None): + # 如果是首次进入段落处理(非递归调用),则当前段落是顶级段落 + if top_level_para is None: + top_level_para = para + state = { 'current_runs': [], } - # 修改调用:传入 `para` 对象作为父级上下文 - self._process_element_children(para._p, para, elements, texts, state) + # 修改调用:传入 `para` 对象、其顶级上下文 + self._process_element_children(para._p, para, elements, texts, state, top_level_para) # 确保在段落处理结束时,刷新所有剩余的 Run current_runs = state['current_runs'] if current_runs: full_text = "".join(r.text for r in current_runs) if full_text.strip(): - elements.append({"type": "text_runs", "runs": list(current_runs), "paragraph": para}) + elements.append({ + "type": "text_runs", + "runs": list(current_runs), + "paragraph": para, + "top_level_paragraph": top_level_para + }) texts.append(full_text) current_runs.clear() @@ -412,35 +428,37 @@ class DocxTranslator(AiTranslator): self._apply_translation(info, trans) else: paragraph_segments = defaultdict(list) - # [结构优化] 预先按段落对所有片段进行分组 + # [FIX] 按顶级段落对所有片段进行分组,以确保形状等嵌套内容与主段落一起处理 for i, info in enumerate(elements): - paragraph = info["paragraph"] - paragraph_segments[id(paragraph._p)].append({ + top_level_paragraph = info["top_level_paragraph"] + paragraph_segments[id(top_level_paragraph._p)].append({ "index": i, "translation": translated[i], - "paragraph_obj": paragraph + "paragraph_obj": top_level_paragraph }) - # [结构优化] 直接遍历按段落分组后的字典,每个段落只处理一次 for para_id, segments_for_this_para in paragraph_segments.items(): - # 从该组的第一个片段中获取唯一的段落对象 - paragraph = segments_for_this_para[0]["paragraph_obj"] - p_element = paragraph._p + # 从该组的第一个片段中获取唯一的顶级段落对象 + top_level_paragraph_orig = segments_for_this_para[0]["paragraph_obj"] + p_element_orig = top_level_paragraph_orig._p - translated_p_element = deepcopy(p_element) - - # ---------- FIX: 在处理副本前调用新增的修剪逻辑 ---------- + # 创建顶级段落的深拷贝,所有翻译将应用于此副本 + translated_p_element = deepcopy(p_element_orig) self._prune_unwanted_elements_from_copy(translated_p_element) - # ------------------------------------------------------------- + top_level_paragraph_copy = Paragraph(translated_p_element, top_level_paragraph_orig._parent) - translated_paragraph_obj = Paragraph(translated_p_element, paragraph._parent) + # [FIX] 创建从原始元素到复制元素的映射,包括嵌套的段落和所有runs + # 1. 嵌套段落映射 + para_map = {id(p_element_orig): top_level_paragraph_copy} + orig_nested_ps = p_element_orig.iter(qn('w:p')) + copy_nested_ps = translated_p_element.iter(qn('w:p')) + for o, c in zip(orig_nested_ps, copy_nested_ps): + para_map[id(o)] = Paragraph(c, top_level_paragraph_copy) - # [超链接修复] 使用 iter() 进行深度搜索,而不是 findall() - original_r_elements = p_element.iter(qn('w:r')) - copied_r_elements = translated_p_element.iter(qn('w:r')) - element_map = { + # 2. Run元素映射 + run_element_map = { id(orig_r): copied_r - for orig_r, copied_r in zip(original_r_elements, copied_r_elements) + for orig_r, copied_r in zip(p_element_orig.iter(qn('w:r')), translated_p_element.iter(qn('w:r'))) } for seg_info in segments_for_this_para: @@ -448,10 +466,21 @@ class DocxTranslator(AiTranslator): translation = seg_info["translation"] original_element_info = elements[element_index] + # [FIX] 从映射中查找当前片段对应的、位于副本中的父段落对象 + original_para_id = id(original_element_info["paragraph"]._p) + translated_paragraph_obj = para_map.get(original_para_id) + + if not translated_paragraph_obj: + self.logger.warning( + f"无法在段落副本中找到对应的嵌套段落,跳过翻译应用: '{translation}'") + continue + + # [FIX] 查找位于副本中的Run对象 runs_from_copy = [] for r in original_element_info["runs"]: - copied_r_element = element_map.get(id(r.element)) + copied_r_element = run_element_map.get(id(r.element)) if copied_r_element is not None: + # 使用正确的、位于副本中的父段落对象来创建Run new_run = Run(copied_r_element, translated_paragraph_obj) runs_from_copy.append(new_run) @@ -482,13 +511,13 @@ class DocxTranslator(AiTranslator): separator_p_element.append(run_element) if self.insert_mode == "append": - current_element = p_element + current_element = p_element_orig if separator_p_element is not None: current_element.addnext(separator_p_element) current_element = separator_p_element current_element.addnext(translated_p_element) elif self.insert_mode == "prepend": - p_element.addprevious(translated_p_element) + p_element_orig.addprevious(translated_p_element) if separator_p_element is not None: translated_p_element.addnext(separator_p_element)