修复html分隔符不能换行的问题
This commit is contained in:
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -150,6 +150,7 @@ class HtmlTranslator(AiTranslator):
|
|||||||
translated_texts: list[str], original_texts: list[str]) -> bytes:
|
translated_texts: list[str], original_texts: list[str]) -> bytes:
|
||||||
"""
|
"""
|
||||||
将翻译后的文本写回到BeautifulSoup对象中对应的节点或属性,并返回最终的HTML字节流。
|
将翻译后的文本写回到BeautifulSoup对象中对应的节点或属性,并返回最终的HTML字节流。
|
||||||
|
【版本 3.0: 修正了对HTML分隔符的支持】
|
||||||
"""
|
"""
|
||||||
if len(translatable_items) != len(translated_texts):
|
if len(translatable_items) != len(translated_texts):
|
||||||
self.logger.error("翻译前后的文本片段数量不匹配 (%d vs %d),跳过写入操作以防损坏文件。",
|
self.logger.error("翻译前后的文本片段数量不匹配 (%d vs %d),跳过写入操作以防损坏文件。",
|
||||||
@@ -160,36 +161,59 @@ class HtmlTranslator(AiTranslator):
|
|||||||
translated_text = translated_texts[i]
|
translated_text = translated_texts[i]
|
||||||
original_text = original_texts[i]
|
original_text = original_texts[i]
|
||||||
|
|
||||||
new_content = ""
|
|
||||||
if self.insert_mode == "replace":
|
|
||||||
if item['type'] == 'node':
|
|
||||||
# 对于文本节点,保留原文前后的空白字符,这对维持内联元素的间距至关重要。
|
|
||||||
leading_space = original_text[:len(original_text) - len(original_text.lstrip())]
|
|
||||||
trailing_space = original_text[len(original_text.rstrip()):]
|
|
||||||
new_content = leading_space + translated_text + trailing_space
|
|
||||||
else: # 属性
|
|
||||||
new_content = translated_text
|
|
||||||
|
|
||||||
elif self.insert_mode == "append":
|
|
||||||
new_content = original_text + self.separator + translated_text
|
|
||||||
elif self.insert_mode == "prepend":
|
|
||||||
new_content = translated_text + self.separator + original_text
|
|
||||||
else:
|
|
||||||
self.logger.error(f"不正确的HtmlTranslatorConfig参数: insert_mode='{self.insert_mode}'")
|
|
||||||
new_content = original_text # 出错时恢复原文
|
|
||||||
|
|
||||||
# 根据类型将内容写回
|
|
||||||
if item['type'] == 'node':
|
if item['type'] == 'node':
|
||||||
node = item['object']
|
node = item['object']
|
||||||
# 检查节点是否仍然在解析树中,以防在处理过程中被移动或删除
|
if not node.parent: # 确保节点仍然在树中
|
||||||
if node.parent:
|
continue
|
||||||
node.replace_with(NavigableString(new_content))
|
|
||||||
|
# --- 构造包含HTML的新内容字符串 ---
|
||||||
|
new_content_str = ""
|
||||||
|
if self.insert_mode == "replace":
|
||||||
|
leading_space = original_text[:len(original_text) - len(original_text.lstrip())]
|
||||||
|
trailing_space = original_text[len(original_text.rstrip()):]
|
||||||
|
new_content_str = leading_space + translated_text + trailing_space
|
||||||
|
elif self.insert_mode == "append":
|
||||||
|
new_content_str = original_text + self.separator + translated_text
|
||||||
|
elif self.insert_mode == "prepend":
|
||||||
|
new_content_str = translated_text + self.separator + original_text
|
||||||
|
else:
|
||||||
|
self.logger.error(f"不正确的HtmlTranslatorConfig参数: insert_mode='{self.insert_mode}'")
|
||||||
|
new_content_str = original_text
|
||||||
|
|
||||||
|
# --- 核心修改:正确地将HTML字符串片段插入DOM ---
|
||||||
|
# 1. 使用一个临时的父标签(如此处的'div')来解析HTML片段,
|
||||||
|
# 这是在BeautifulSoup中处理片段的标准做法,避免了自动添加<html><body>。
|
||||||
|
temp_soup = BeautifulSoup(f"<div>{new_content_str}</div>", 'html.parser')
|
||||||
|
new_elements = temp_soup.div.contents
|
||||||
|
|
||||||
|
# 2. 将解析出的新元素(可能是文本节点和<br>标签的混合)
|
||||||
|
# 以相反的顺序插入到原始节点之后。这样做可以保持它们的原始顺序。
|
||||||
|
for element in reversed(new_elements):
|
||||||
|
node.insert_after(element)
|
||||||
|
|
||||||
|
# 3. 移除原始的文本节点
|
||||||
|
node.decompose()
|
||||||
|
|
||||||
elif item['type'] == 'attribute':
|
elif item['type'] == 'attribute':
|
||||||
|
# --- 属性逻辑保持不变,因为属性值不支持HTML ---
|
||||||
tag = item['tag']
|
tag = item['tag']
|
||||||
attr = item['attribute']
|
attr = item['attribute']
|
||||||
tag[attr] = new_content
|
new_attr_value = ""
|
||||||
|
|
||||||
|
# 在属性值中,<br>将被视为普通文本,这是正确的行为
|
||||||
|
separator_for_attr = self.separator.replace('<br>', ' ').replace('<br/>', ' ')
|
||||||
|
|
||||||
|
if self.insert_mode == "replace":
|
||||||
|
new_attr_value = translated_text
|
||||||
|
elif self.insert_mode == "append":
|
||||||
|
new_attr_value = original_text + separator_for_attr + translated_text
|
||||||
|
elif self.insert_mode == "prepend":
|
||||||
|
new_attr_value = translated_text + separator_for_attr + original_text
|
||||||
|
else:
|
||||||
|
new_attr_value = original_text
|
||||||
|
|
||||||
|
tag[attr] = new_attr_value.strip()
|
||||||
|
|
||||||
# 将修改后的BeautifulSoup对象编码为utf-8字节流
|
|
||||||
return soup.encode('utf-8')
|
return soup.encode('utf-8')
|
||||||
|
|
||||||
def translate(self, document: Document) -> Self:
|
def translate(self, document: Document) -> Self:
|
||||||
|
|||||||
Reference in New Issue
Block a user