译文附加时可以翻译形状内文本
This commit is contained in:
@@ -156,7 +156,8 @@ class DocxTranslator(AiTranslator):
|
||||
# ---------- 代码修改部分 1: 形状翻译逻辑的核心实现 ----------
|
||||
def _process_element_children(self, element, parent_paragraph: Paragraph, elements: List[Dict[str, Any]],
|
||||
texts: List[str],
|
||||
state: Dict[str, Any]):
|
||||
state: Dict[str, Any],
|
||||
top_level_para: Paragraph):
|
||||
|
||||
def flush_segment():
|
||||
current_runs = state['current_runs']
|
||||
@@ -164,8 +165,13 @@ class DocxTranslator(AiTranslator):
|
||||
return
|
||||
full_text = "".join(r.text for r in current_runs)
|
||||
if full_text.strip():
|
||||
# 在 elements 中增加对父段落的引用
|
||||
elements.append({"type": "text_runs", "runs": list(current_runs), "paragraph": parent_paragraph})
|
||||
# 在 elements 中增加对父段落和顶级段落的引用
|
||||
elements.append({
|
||||
"type": "text_runs",
|
||||
"runs": list(current_runs),
|
||||
"paragraph": parent_paragraph,
|
||||
"top_level_paragraph": top_level_para
|
||||
})
|
||||
texts.append(full_text)
|
||||
state['current_runs'].clear()
|
||||
|
||||
@@ -175,7 +181,7 @@ class DocxTranslator(AiTranslator):
|
||||
|
||||
if child.tag in self.RECURSIVE_CONTAINER_TAGS:
|
||||
flush_segment()
|
||||
self._process_element_children(child, parent_paragraph, elements, texts, state)
|
||||
self._process_element_children(child, parent_paragraph, elements, texts, state, top_level_para)
|
||||
flush_segment() # 在递归容器后也刷新,确保其内容成为独立片段
|
||||
continue
|
||||
|
||||
@@ -202,8 +208,8 @@ class DocxTranslator(AiTranslator):
|
||||
for p_element in txbx_content.findall(qn('w:p')):
|
||||
# 创建新的段落对象,并传入父级上下文
|
||||
shape_para = Paragraph(p_element, parent_paragraph)
|
||||
# 递归处理该段落
|
||||
self._process_paragraph(shape_para, elements, texts)
|
||||
# 递归处理该段落,并传递顶级段落上下文
|
||||
self._process_paragraph(shape_para, elements, texts, top_level_para=top_level_para)
|
||||
|
||||
# 如果处理了形状内的文本,则该 Run 的任务已完成
|
||||
continue
|
||||
@@ -224,19 +230,29 @@ class DocxTranslator(AiTranslator):
|
||||
# 遇到任何非 Run 的块级元素(如在单元格中嵌套的表格),都应结束当前文本片段。
|
||||
flush_segment()
|
||||
|
||||
def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str]):
|
||||
def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str],
|
||||
top_level_para: Paragraph = None):
|
||||
# 如果是首次进入段落处理(非递归调用),则当前段落是顶级段落
|
||||
if top_level_para is None:
|
||||
top_level_para = para
|
||||
|
||||
state = {
|
||||
'current_runs': [],
|
||||
}
|
||||
# 修改调用:传入 `para` 对象作为父级上下文
|
||||
self._process_element_children(para._p, para, elements, texts, state)
|
||||
# 修改调用:传入 `para` 对象、其顶级上下文
|
||||
self._process_element_children(para._p, para, elements, texts, state, top_level_para)
|
||||
|
||||
# 确保在段落处理结束时,刷新所有剩余的 Run
|
||||
current_runs = state['current_runs']
|
||||
if current_runs:
|
||||
full_text = "".join(r.text for r in current_runs)
|
||||
if full_text.strip():
|
||||
elements.append({"type": "text_runs", "runs": list(current_runs), "paragraph": para})
|
||||
elements.append({
|
||||
"type": "text_runs",
|
||||
"runs": list(current_runs),
|
||||
"paragraph": para,
|
||||
"top_level_paragraph": top_level_para
|
||||
})
|
||||
texts.append(full_text)
|
||||
current_runs.clear()
|
||||
|
||||
@@ -412,35 +428,37 @@ class DocxTranslator(AiTranslator):
|
||||
self._apply_translation(info, trans)
|
||||
else:
|
||||
paragraph_segments = defaultdict(list)
|
||||
# [结构优化] 预先按段落对所有片段进行分组
|
||||
# [FIX] 按顶级段落对所有片段进行分组,以确保形状等嵌套内容与主段落一起处理
|
||||
for i, info in enumerate(elements):
|
||||
paragraph = info["paragraph"]
|
||||
paragraph_segments[id(paragraph._p)].append({
|
||||
top_level_paragraph = info["top_level_paragraph"]
|
||||
paragraph_segments[id(top_level_paragraph._p)].append({
|
||||
"index": i,
|
||||
"translation": translated[i],
|
||||
"paragraph_obj": paragraph
|
||||
"paragraph_obj": top_level_paragraph
|
||||
})
|
||||
|
||||
# [结构优化] 直接遍历按段落分组后的字典,每个段落只处理一次
|
||||
for para_id, segments_for_this_para in paragraph_segments.items():
|
||||
# 从该组的第一个片段中获取唯一的段落对象
|
||||
paragraph = segments_for_this_para[0]["paragraph_obj"]
|
||||
p_element = paragraph._p
|
||||
# 从该组的第一个片段中获取唯一的顶级段落对象
|
||||
top_level_paragraph_orig = segments_for_this_para[0]["paragraph_obj"]
|
||||
p_element_orig = top_level_paragraph_orig._p
|
||||
|
||||
translated_p_element = deepcopy(p_element)
|
||||
|
||||
# ---------- FIX: 在处理副本前调用新增的修剪逻辑 ----------
|
||||
# 创建顶级段落的深拷贝,所有翻译将应用于此副本
|
||||
translated_p_element = deepcopy(p_element_orig)
|
||||
self._prune_unwanted_elements_from_copy(translated_p_element)
|
||||
# -------------------------------------------------------------
|
||||
top_level_paragraph_copy = Paragraph(translated_p_element, top_level_paragraph_orig._parent)
|
||||
|
||||
translated_paragraph_obj = Paragraph(translated_p_element, paragraph._parent)
|
||||
# [FIX] 创建从原始元素到复制元素的映射,包括嵌套的段落和所有runs
|
||||
# 1. 嵌套段落映射
|
||||
para_map = {id(p_element_orig): top_level_paragraph_copy}
|
||||
orig_nested_ps = p_element_orig.iter(qn('w:p'))
|
||||
copy_nested_ps = translated_p_element.iter(qn('w:p'))
|
||||
for o, c in zip(orig_nested_ps, copy_nested_ps):
|
||||
para_map[id(o)] = Paragraph(c, top_level_paragraph_copy)
|
||||
|
||||
# [超链接修复] 使用 iter() 进行深度搜索,而不是 findall()
|
||||
original_r_elements = p_element.iter(qn('w:r'))
|
||||
copied_r_elements = translated_p_element.iter(qn('w:r'))
|
||||
element_map = {
|
||||
# 2. Run元素映射
|
||||
run_element_map = {
|
||||
id(orig_r): copied_r
|
||||
for orig_r, copied_r in zip(original_r_elements, copied_r_elements)
|
||||
for orig_r, copied_r in zip(p_element_orig.iter(qn('w:r')), translated_p_element.iter(qn('w:r')))
|
||||
}
|
||||
|
||||
for seg_info in segments_for_this_para:
|
||||
@@ -448,10 +466,21 @@ class DocxTranslator(AiTranslator):
|
||||
translation = seg_info["translation"]
|
||||
original_element_info = elements[element_index]
|
||||
|
||||
# [FIX] 从映射中查找当前片段对应的、位于副本中的父段落对象
|
||||
original_para_id = id(original_element_info["paragraph"]._p)
|
||||
translated_paragraph_obj = para_map.get(original_para_id)
|
||||
|
||||
if not translated_paragraph_obj:
|
||||
self.logger.warning(
|
||||
f"无法在段落副本中找到对应的嵌套段落,跳过翻译应用: '{translation}'")
|
||||
continue
|
||||
|
||||
# [FIX] 查找位于副本中的Run对象
|
||||
runs_from_copy = []
|
||||
for r in original_element_info["runs"]:
|
||||
copied_r_element = element_map.get(id(r.element))
|
||||
copied_r_element = run_element_map.get(id(r.element))
|
||||
if copied_r_element is not None:
|
||||
# 使用正确的、位于副本中的父段落对象来创建Run
|
||||
new_run = Run(copied_r_element, translated_paragraph_obj)
|
||||
runs_from_copy.append(new_run)
|
||||
|
||||
@@ -482,13 +511,13 @@ class DocxTranslator(AiTranslator):
|
||||
separator_p_element.append(run_element)
|
||||
|
||||
if self.insert_mode == "append":
|
||||
current_element = p_element
|
||||
current_element = p_element_orig
|
||||
if separator_p_element is not None:
|
||||
current_element.addnext(separator_p_element)
|
||||
current_element = separator_p_element
|
||||
current_element.addnext(translated_p_element)
|
||||
elif self.insert_mode == "prepend":
|
||||
p_element.addprevious(translated_p_element)
|
||||
p_element_orig.addprevious(translated_p_element)
|
||||
if separator_p_element is not None:
|
||||
translated_p_element.addnext(separator_p_element)
|
||||
|
||||
|
||||
Reference in New Issue
Block a user