译文附加时可以翻译形状内文本

This commit is contained in:
xunbu
2025-10-30 20:55:08 +08:00
parent a2ade9940c
commit 668af629e9

View File

@@ -156,7 +156,8 @@ class DocxTranslator(AiTranslator):
# ---------- 代码修改部分 1: 形状翻译逻辑的核心实现 ---------- # ---------- 代码修改部分 1: 形状翻译逻辑的核心实现 ----------
def _process_element_children(self, element, parent_paragraph: Paragraph, elements: List[Dict[str, Any]], def _process_element_children(self, element, parent_paragraph: Paragraph, elements: List[Dict[str, Any]],
texts: List[str], texts: List[str],
state: Dict[str, Any]): state: Dict[str, Any],
top_level_para: Paragraph):
def flush_segment(): def flush_segment():
current_runs = state['current_runs'] current_runs = state['current_runs']
@@ -164,8 +165,13 @@ class DocxTranslator(AiTranslator):
return return
full_text = "".join(r.text for r in current_runs) full_text = "".join(r.text for r in current_runs)
if full_text.strip(): if full_text.strip():
# 在 elements 中增加对父段落的引用 # 在 elements 中增加对父段落和顶级段落的引用
elements.append({"type": "text_runs", "runs": list(current_runs), "paragraph": parent_paragraph}) elements.append({
"type": "text_runs",
"runs": list(current_runs),
"paragraph": parent_paragraph,
"top_level_paragraph": top_level_para
})
texts.append(full_text) texts.append(full_text)
state['current_runs'].clear() state['current_runs'].clear()
@@ -175,7 +181,7 @@ class DocxTranslator(AiTranslator):
if child.tag in self.RECURSIVE_CONTAINER_TAGS: if child.tag in self.RECURSIVE_CONTAINER_TAGS:
flush_segment() flush_segment()
self._process_element_children(child, parent_paragraph, elements, texts, state) self._process_element_children(child, parent_paragraph, elements, texts, state, top_level_para)
flush_segment() # 在递归容器后也刷新,确保其内容成为独立片段 flush_segment() # 在递归容器后也刷新,确保其内容成为独立片段
continue continue
@@ -202,8 +208,8 @@ class DocxTranslator(AiTranslator):
for p_element in txbx_content.findall(qn('w:p')): for p_element in txbx_content.findall(qn('w:p')):
# 创建新的段落对象,并传入父级上下文 # 创建新的段落对象,并传入父级上下文
shape_para = Paragraph(p_element, parent_paragraph) shape_para = Paragraph(p_element, parent_paragraph)
# 递归处理该段落 # 递归处理该段落,并传递顶级段落上下文
self._process_paragraph(shape_para, elements, texts) self._process_paragraph(shape_para, elements, texts, top_level_para=top_level_para)
# 如果处理了形状内的文本,则该 Run 的任务已完成 # 如果处理了形状内的文本,则该 Run 的任务已完成
continue continue
@@ -224,19 +230,29 @@ class DocxTranslator(AiTranslator):
# 遇到任何非 Run 的块级元素(如在单元格中嵌套的表格),都应结束当前文本片段。 # 遇到任何非 Run 的块级元素(如在单元格中嵌套的表格),都应结束当前文本片段。
flush_segment() flush_segment()
def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str]): def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str],
top_level_para: Paragraph = None):
# 如果是首次进入段落处理(非递归调用),则当前段落是顶级段落
if top_level_para is None:
top_level_para = para
state = { state = {
'current_runs': [], 'current_runs': [],
} }
# 修改调用:传入 `para` 对象作为父级上下文 # 修改调用:传入 `para` 对象、其顶级上下文
self._process_element_children(para._p, para, elements, texts, state) self._process_element_children(para._p, para, elements, texts, state, top_level_para)
# 确保在段落处理结束时,刷新所有剩余的 Run # 确保在段落处理结束时,刷新所有剩余的 Run
current_runs = state['current_runs'] current_runs = state['current_runs']
if current_runs: if current_runs:
full_text = "".join(r.text for r in current_runs) full_text = "".join(r.text for r in current_runs)
if full_text.strip(): if full_text.strip():
elements.append({"type": "text_runs", "runs": list(current_runs), "paragraph": para}) elements.append({
"type": "text_runs",
"runs": list(current_runs),
"paragraph": para,
"top_level_paragraph": top_level_para
})
texts.append(full_text) texts.append(full_text)
current_runs.clear() current_runs.clear()
@@ -412,35 +428,37 @@ class DocxTranslator(AiTranslator):
self._apply_translation(info, trans) self._apply_translation(info, trans)
else: else:
paragraph_segments = defaultdict(list) paragraph_segments = defaultdict(list)
# [结构优化] 预先按段落对所有片段进行分组 # [FIX] 按顶级段落对所有片段进行分组,以确保形状等嵌套内容与主段落一起处理
for i, info in enumerate(elements): for i, info in enumerate(elements):
paragraph = info["paragraph"] top_level_paragraph = info["top_level_paragraph"]
paragraph_segments[id(paragraph._p)].append({ paragraph_segments[id(top_level_paragraph._p)].append({
"index": i, "index": i,
"translation": translated[i], "translation": translated[i],
"paragraph_obj": paragraph "paragraph_obj": top_level_paragraph
}) })
# [结构优化] 直接遍历按段落分组后的字典,每个段落只处理一次
for para_id, segments_for_this_para in paragraph_segments.items(): for para_id, segments_for_this_para in paragraph_segments.items():
# 从该组的第一个片段中获取唯一的段落对象 # 从该组的第一个片段中获取唯一的顶级段落对象
paragraph = segments_for_this_para[0]["paragraph_obj"] top_level_paragraph_orig = segments_for_this_para[0]["paragraph_obj"]
p_element = paragraph._p p_element_orig = top_level_paragraph_orig._p
translated_p_element = deepcopy(p_element) # 创建顶级段落的深拷贝,所有翻译将应用于此副本
translated_p_element = deepcopy(p_element_orig)
# ---------- FIX: 在处理副本前调用新增的修剪逻辑 ----------
self._prune_unwanted_elements_from_copy(translated_p_element) self._prune_unwanted_elements_from_copy(translated_p_element)
# ------------------------------------------------------------- top_level_paragraph_copy = Paragraph(translated_p_element, top_level_paragraph_orig._parent)
translated_paragraph_obj = Paragraph(translated_p_element, paragraph._parent) # [FIX] 创建从原始元素到复制元素的映射包括嵌套的段落和所有runs
# 1. 嵌套段落映射
para_map = {id(p_element_orig): top_level_paragraph_copy}
orig_nested_ps = p_element_orig.iter(qn('w:p'))
copy_nested_ps = translated_p_element.iter(qn('w:p'))
for o, c in zip(orig_nested_ps, copy_nested_ps):
para_map[id(o)] = Paragraph(c, top_level_paragraph_copy)
# [超链接修复] 使用 iter() 进行深度搜索,而不是 findall() # 2. Run元素映射
original_r_elements = p_element.iter(qn('w:r')) run_element_map = {
copied_r_elements = translated_p_element.iter(qn('w:r'))
element_map = {
id(orig_r): copied_r id(orig_r): copied_r
for orig_r, copied_r in zip(original_r_elements, copied_r_elements) for orig_r, copied_r in zip(p_element_orig.iter(qn('w:r')), translated_p_element.iter(qn('w:r')))
} }
for seg_info in segments_for_this_para: for seg_info in segments_for_this_para:
@@ -448,10 +466,21 @@ class DocxTranslator(AiTranslator):
translation = seg_info["translation"] translation = seg_info["translation"]
original_element_info = elements[element_index] original_element_info = elements[element_index]
# [FIX] 从映射中查找当前片段对应的、位于副本中的父段落对象
original_para_id = id(original_element_info["paragraph"]._p)
translated_paragraph_obj = para_map.get(original_para_id)
if not translated_paragraph_obj:
self.logger.warning(
f"无法在段落副本中找到对应的嵌套段落,跳过翻译应用: '{translation}'")
continue
# [FIX] 查找位于副本中的Run对象
runs_from_copy = [] runs_from_copy = []
for r in original_element_info["runs"]: for r in original_element_info["runs"]:
copied_r_element = element_map.get(id(r.element)) copied_r_element = run_element_map.get(id(r.element))
if copied_r_element is not None: if copied_r_element is not None:
# 使用正确的、位于副本中的父段落对象来创建Run
new_run = Run(copied_r_element, translated_paragraph_obj) new_run = Run(copied_r_element, translated_paragraph_obj)
runs_from_copy.append(new_run) runs_from_copy.append(new_run)
@@ -482,13 +511,13 @@ class DocxTranslator(AiTranslator):
separator_p_element.append(run_element) separator_p_element.append(run_element)
if self.insert_mode == "append": if self.insert_mode == "append":
current_element = p_element current_element = p_element_orig
if separator_p_element is not None: if separator_p_element is not None:
current_element.addnext(separator_p_element) current_element.addnext(separator_p_element)
current_element = separator_p_element current_element = separator_p_element
current_element.addnext(translated_p_element) current_element.addnext(translated_p_element)
elif self.insert_mode == "prepend": elif self.insert_mode == "prepend":
p_element.addprevious(translated_p_element) p_element_orig.addprevious(translated_p_element)
if separator_p_element is not None: if separator_p_element is not None:
translated_p_element.addnext(separator_p_element) translated_p_element.addnext(separator_p_element)