译文附加时可以翻译形状内文本
This commit is contained in:
@@ -156,7 +156,8 @@ class DocxTranslator(AiTranslator):
|
|||||||
# ---------- 代码修改部分 1: 形状翻译逻辑的核心实现 ----------
|
# ---------- 代码修改部分 1: 形状翻译逻辑的核心实现 ----------
|
||||||
def _process_element_children(self, element, parent_paragraph: Paragraph, elements: List[Dict[str, Any]],
|
def _process_element_children(self, element, parent_paragraph: Paragraph, elements: List[Dict[str, Any]],
|
||||||
texts: List[str],
|
texts: List[str],
|
||||||
state: Dict[str, Any]):
|
state: Dict[str, Any],
|
||||||
|
top_level_para: Paragraph):
|
||||||
|
|
||||||
def flush_segment():
|
def flush_segment():
|
||||||
current_runs = state['current_runs']
|
current_runs = state['current_runs']
|
||||||
@@ -164,8 +165,13 @@ class DocxTranslator(AiTranslator):
|
|||||||
return
|
return
|
||||||
full_text = "".join(r.text for r in current_runs)
|
full_text = "".join(r.text for r in current_runs)
|
||||||
if full_text.strip():
|
if full_text.strip():
|
||||||
# 在 elements 中增加对父段落的引用
|
# 在 elements 中增加对父段落和顶级段落的引用
|
||||||
elements.append({"type": "text_runs", "runs": list(current_runs), "paragraph": parent_paragraph})
|
elements.append({
|
||||||
|
"type": "text_runs",
|
||||||
|
"runs": list(current_runs),
|
||||||
|
"paragraph": parent_paragraph,
|
||||||
|
"top_level_paragraph": top_level_para
|
||||||
|
})
|
||||||
texts.append(full_text)
|
texts.append(full_text)
|
||||||
state['current_runs'].clear()
|
state['current_runs'].clear()
|
||||||
|
|
||||||
@@ -175,7 +181,7 @@ class DocxTranslator(AiTranslator):
|
|||||||
|
|
||||||
if child.tag in self.RECURSIVE_CONTAINER_TAGS:
|
if child.tag in self.RECURSIVE_CONTAINER_TAGS:
|
||||||
flush_segment()
|
flush_segment()
|
||||||
self._process_element_children(child, parent_paragraph, elements, texts, state)
|
self._process_element_children(child, parent_paragraph, elements, texts, state, top_level_para)
|
||||||
flush_segment() # 在递归容器后也刷新,确保其内容成为独立片段
|
flush_segment() # 在递归容器后也刷新,确保其内容成为独立片段
|
||||||
continue
|
continue
|
||||||
|
|
||||||
@@ -202,8 +208,8 @@ class DocxTranslator(AiTranslator):
|
|||||||
for p_element in txbx_content.findall(qn('w:p')):
|
for p_element in txbx_content.findall(qn('w:p')):
|
||||||
# 创建新的段落对象,并传入父级上下文
|
# 创建新的段落对象,并传入父级上下文
|
||||||
shape_para = Paragraph(p_element, parent_paragraph)
|
shape_para = Paragraph(p_element, parent_paragraph)
|
||||||
# 递归处理该段落
|
# 递归处理该段落,并传递顶级段落上下文
|
||||||
self._process_paragraph(shape_para, elements, texts)
|
self._process_paragraph(shape_para, elements, texts, top_level_para=top_level_para)
|
||||||
|
|
||||||
# 如果处理了形状内的文本,则该 Run 的任务已完成
|
# 如果处理了形状内的文本,则该 Run 的任务已完成
|
||||||
continue
|
continue
|
||||||
@@ -224,19 +230,29 @@ class DocxTranslator(AiTranslator):
|
|||||||
# 遇到任何非 Run 的块级元素(如在单元格中嵌套的表格),都应结束当前文本片段。
|
# 遇到任何非 Run 的块级元素(如在单元格中嵌套的表格),都应结束当前文本片段。
|
||||||
flush_segment()
|
flush_segment()
|
||||||
|
|
||||||
def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str]):
|
def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str],
|
||||||
|
top_level_para: Paragraph = None):
|
||||||
|
# 如果是首次进入段落处理(非递归调用),则当前段落是顶级段落
|
||||||
|
if top_level_para is None:
|
||||||
|
top_level_para = para
|
||||||
|
|
||||||
state = {
|
state = {
|
||||||
'current_runs': [],
|
'current_runs': [],
|
||||||
}
|
}
|
||||||
# 修改调用:传入 `para` 对象作为父级上下文
|
# 修改调用:传入 `para` 对象、其顶级上下文
|
||||||
self._process_element_children(para._p, para, elements, texts, state)
|
self._process_element_children(para._p, para, elements, texts, state, top_level_para)
|
||||||
|
|
||||||
# 确保在段落处理结束时,刷新所有剩余的 Run
|
# 确保在段落处理结束时,刷新所有剩余的 Run
|
||||||
current_runs = state['current_runs']
|
current_runs = state['current_runs']
|
||||||
if current_runs:
|
if current_runs:
|
||||||
full_text = "".join(r.text for r in current_runs)
|
full_text = "".join(r.text for r in current_runs)
|
||||||
if full_text.strip():
|
if full_text.strip():
|
||||||
elements.append({"type": "text_runs", "runs": list(current_runs), "paragraph": para})
|
elements.append({
|
||||||
|
"type": "text_runs",
|
||||||
|
"runs": list(current_runs),
|
||||||
|
"paragraph": para,
|
||||||
|
"top_level_paragraph": top_level_para
|
||||||
|
})
|
||||||
texts.append(full_text)
|
texts.append(full_text)
|
||||||
current_runs.clear()
|
current_runs.clear()
|
||||||
|
|
||||||
@@ -412,35 +428,37 @@ class DocxTranslator(AiTranslator):
|
|||||||
self._apply_translation(info, trans)
|
self._apply_translation(info, trans)
|
||||||
else:
|
else:
|
||||||
paragraph_segments = defaultdict(list)
|
paragraph_segments = defaultdict(list)
|
||||||
# [结构优化] 预先按段落对所有片段进行分组
|
# [FIX] 按顶级段落对所有片段进行分组,以确保形状等嵌套内容与主段落一起处理
|
||||||
for i, info in enumerate(elements):
|
for i, info in enumerate(elements):
|
||||||
paragraph = info["paragraph"]
|
top_level_paragraph = info["top_level_paragraph"]
|
||||||
paragraph_segments[id(paragraph._p)].append({
|
paragraph_segments[id(top_level_paragraph._p)].append({
|
||||||
"index": i,
|
"index": i,
|
||||||
"translation": translated[i],
|
"translation": translated[i],
|
||||||
"paragraph_obj": paragraph
|
"paragraph_obj": top_level_paragraph
|
||||||
})
|
})
|
||||||
|
|
||||||
# [结构优化] 直接遍历按段落分组后的字典,每个段落只处理一次
|
|
||||||
for para_id, segments_for_this_para in paragraph_segments.items():
|
for para_id, segments_for_this_para in paragraph_segments.items():
|
||||||
# 从该组的第一个片段中获取唯一的段落对象
|
# 从该组的第一个片段中获取唯一的顶级段落对象
|
||||||
paragraph = segments_for_this_para[0]["paragraph_obj"]
|
top_level_paragraph_orig = segments_for_this_para[0]["paragraph_obj"]
|
||||||
p_element = paragraph._p
|
p_element_orig = top_level_paragraph_orig._p
|
||||||
|
|
||||||
translated_p_element = deepcopy(p_element)
|
# 创建顶级段落的深拷贝,所有翻译将应用于此副本
|
||||||
|
translated_p_element = deepcopy(p_element_orig)
|
||||||
# ---------- FIX: 在处理副本前调用新增的修剪逻辑 ----------
|
|
||||||
self._prune_unwanted_elements_from_copy(translated_p_element)
|
self._prune_unwanted_elements_from_copy(translated_p_element)
|
||||||
# -------------------------------------------------------------
|
top_level_paragraph_copy = Paragraph(translated_p_element, top_level_paragraph_orig._parent)
|
||||||
|
|
||||||
translated_paragraph_obj = Paragraph(translated_p_element, paragraph._parent)
|
# [FIX] 创建从原始元素到复制元素的映射,包括嵌套的段落和所有runs
|
||||||
|
# 1. 嵌套段落映射
|
||||||
|
para_map = {id(p_element_orig): top_level_paragraph_copy}
|
||||||
|
orig_nested_ps = p_element_orig.iter(qn('w:p'))
|
||||||
|
copy_nested_ps = translated_p_element.iter(qn('w:p'))
|
||||||
|
for o, c in zip(orig_nested_ps, copy_nested_ps):
|
||||||
|
para_map[id(o)] = Paragraph(c, top_level_paragraph_copy)
|
||||||
|
|
||||||
# [超链接修复] 使用 iter() 进行深度搜索,而不是 findall()
|
# 2. Run元素映射
|
||||||
original_r_elements = p_element.iter(qn('w:r'))
|
run_element_map = {
|
||||||
copied_r_elements = translated_p_element.iter(qn('w:r'))
|
|
||||||
element_map = {
|
|
||||||
id(orig_r): copied_r
|
id(orig_r): copied_r
|
||||||
for orig_r, copied_r in zip(original_r_elements, copied_r_elements)
|
for orig_r, copied_r in zip(p_element_orig.iter(qn('w:r')), translated_p_element.iter(qn('w:r')))
|
||||||
}
|
}
|
||||||
|
|
||||||
for seg_info in segments_for_this_para:
|
for seg_info in segments_for_this_para:
|
||||||
@@ -448,10 +466,21 @@ class DocxTranslator(AiTranslator):
|
|||||||
translation = seg_info["translation"]
|
translation = seg_info["translation"]
|
||||||
original_element_info = elements[element_index]
|
original_element_info = elements[element_index]
|
||||||
|
|
||||||
|
# [FIX] 从映射中查找当前片段对应的、位于副本中的父段落对象
|
||||||
|
original_para_id = id(original_element_info["paragraph"]._p)
|
||||||
|
translated_paragraph_obj = para_map.get(original_para_id)
|
||||||
|
|
||||||
|
if not translated_paragraph_obj:
|
||||||
|
self.logger.warning(
|
||||||
|
f"无法在段落副本中找到对应的嵌套段落,跳过翻译应用: '{translation}'")
|
||||||
|
continue
|
||||||
|
|
||||||
|
# [FIX] 查找位于副本中的Run对象
|
||||||
runs_from_copy = []
|
runs_from_copy = []
|
||||||
for r in original_element_info["runs"]:
|
for r in original_element_info["runs"]:
|
||||||
copied_r_element = element_map.get(id(r.element))
|
copied_r_element = run_element_map.get(id(r.element))
|
||||||
if copied_r_element is not None:
|
if copied_r_element is not None:
|
||||||
|
# 使用正确的、位于副本中的父段落对象来创建Run
|
||||||
new_run = Run(copied_r_element, translated_paragraph_obj)
|
new_run = Run(copied_r_element, translated_paragraph_obj)
|
||||||
runs_from_copy.append(new_run)
|
runs_from_copy.append(new_run)
|
||||||
|
|
||||||
@@ -482,13 +511,13 @@ class DocxTranslator(AiTranslator):
|
|||||||
separator_p_element.append(run_element)
|
separator_p_element.append(run_element)
|
||||||
|
|
||||||
if self.insert_mode == "append":
|
if self.insert_mode == "append":
|
||||||
current_element = p_element
|
current_element = p_element_orig
|
||||||
if separator_p_element is not None:
|
if separator_p_element is not None:
|
||||||
current_element.addnext(separator_p_element)
|
current_element.addnext(separator_p_element)
|
||||||
current_element = separator_p_element
|
current_element = separator_p_element
|
||||||
current_element.addnext(translated_p_element)
|
current_element.addnext(translated_p_element)
|
||||||
elif self.insert_mode == "prepend":
|
elif self.insert_mode == "prepend":
|
||||||
p_element.addprevious(translated_p_element)
|
p_element_orig.addprevious(translated_p_element)
|
||||||
if separator_p_element is not None:
|
if separator_p_element is not None:
|
||||||
translated_p_element.addnext(separator_p_element)
|
translated_p_element.addnext(separator_p_element)
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user