修复部分文本未翻译问题
This commit is contained in:
@@ -5,10 +5,9 @@ from dataclasses import dataclass
|
|||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Self, Literal, List, Optional, Dict, Tuple, Set
|
from typing import Self, Literal, List, Optional, Dict, Tuple, Set
|
||||||
import zipfile
|
import zipfile
|
||||||
import re # 引入正则用于解析简写
|
import re
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
|
||||||
# 仅导入 openpyxl 的工具函数用于坐标计算,不加载 workbook 对象
|
|
||||||
from openpyxl.utils.cell import coordinate_to_tuple, range_boundaries
|
from openpyxl.utils.cell import coordinate_to_tuple, range_boundaries
|
||||||
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
@@ -21,9 +20,6 @@ class XlsxTranslatorConfig(AiTranslatorConfig):
|
|||||||
insert_mode: Literal["replace", "append", "prepend"] = "replace"
|
insert_mode: Literal["replace", "append", "prepend"] = "replace"
|
||||||
separator: str = "\n"
|
separator: str = "\n"
|
||||||
# 指定翻译区域列表。
|
# 指定翻译区域列表。
|
||||||
# 示例: ["Sheet1!A1:B10", "C", "3"] (支持简写: C代表C列, 3代表第3行)
|
|
||||||
# 如果不指定表名 (如 "C"),则应用于所有表。
|
|
||||||
# 如果为 None 或空列表,则翻译整个文件中的所有文本。
|
|
||||||
translate_regions: Optional[List[str]] = None
|
translate_regions: Optional[List[str]] = None
|
||||||
|
|
||||||
|
|
||||||
@@ -54,44 +50,81 @@ class XlsxTranslator(AiTranslator):
|
|||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
self.translate_regions = config.translate_regions
|
self.translate_regions = config.translate_regions
|
||||||
|
|
||||||
# 命名空间定义
|
# 我们虽然不依赖它查找,但写入新节点时最好还是带上标准命名空间
|
||||||
self.ns = {
|
self.NS_MAIN = 'http://schemas.openxmlformats.org/spreadsheetml/2006/main'
|
||||||
'main': 'http://schemas.openxmlformats.org/spreadsheetml/2006/main',
|
ET.register_namespace('', self.NS_MAIN)
|
||||||
'r': 'http://schemas.openxmlformats.org/officeDocument/2006/relationships'
|
|
||||||
}
|
|
||||||
ET.register_namespace('', self.ns['main'])
|
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# 辅助方法:无需加载 Workbook 即可解析结构
|
# 核心辅助方法:忽略命名空间查找
|
||||||
|
# =========================================================================
|
||||||
|
|
||||||
|
def _tag_is(self, elem: ET.Element, tag_name: str) -> bool:
|
||||||
|
"""判断元素的标签名是否匹配(忽略命名空间)。"""
|
||||||
|
return elem.tag.endswith(f"}}{tag_name}") or elem.tag == tag_name
|
||||||
|
|
||||||
|
def _find_child(self, parent: ET.Element, tag_name: str) -> Optional[ET.Element]:
|
||||||
|
"""在直接子节点中查找指定标签(忽略命名空间)。"""
|
||||||
|
for child in parent:
|
||||||
|
if self._tag_is(child, tag_name):
|
||||||
|
return child
|
||||||
|
return None
|
||||||
|
|
||||||
|
def _find_all_children(self, parent: ET.Element, tag_name: str) -> List[ET.Element]:
|
||||||
|
"""查找所有匹配的直接子节点(忽略命名空间)。"""
|
||||||
|
return [child for child in parent if self._tag_is(child, tag_name)]
|
||||||
|
|
||||||
|
def _get_child_text(self, parent: ET.Element, tag_name: str) -> Optional[str]:
|
||||||
|
"""获取子节点的文本内容(忽略命名空间)。"""
|
||||||
|
child = self._find_child(parent, tag_name)
|
||||||
|
return child.text if child is not None else None
|
||||||
|
|
||||||
|
# =========================================================================
|
||||||
|
# 辅助逻辑
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
def _get_shared_strings(self, zf: zipfile.ZipFile) -> List[str]:
|
def _get_shared_strings(self, zf: zipfile.ZipFile) -> List[str]:
|
||||||
"""解析共享字符串表,返回字符串列表。"""
|
|
||||||
if "xl/sharedStrings.xml" not in zf.namelist():
|
if "xl/sharedStrings.xml" not in zf.namelist():
|
||||||
return []
|
return []
|
||||||
|
|
||||||
shared_strings = []
|
shared_strings = []
|
||||||
with zf.open("xl/sharedStrings.xml") as f:
|
with zf.open("xl/sharedStrings.xml") as f:
|
||||||
context = ET.iterparse(f, events=("end",))
|
context = ET.iterparse(f, events=("end",))
|
||||||
for event, elem in context:
|
for event, elem in context:
|
||||||
if elem.tag.endswith('}si'): # shared item
|
# 匹配 <si>
|
||||||
texts = [t.text for t in elem.findall('.//main:t', self.ns) if t.text]
|
if self._tag_is(elem, "si"):
|
||||||
|
# 查找所有 <t>
|
||||||
|
# 注意:xml.etree 的 findall 只能简单的路径查找,
|
||||||
|
# 既然我们要忽略命名空间,最好手动遍历子树,但 si 结构简单,
|
||||||
|
# 这里简化处理:直接遍历 iter 出来的 t 元素(如果在 si 内部)
|
||||||
|
pass
|
||||||
|
# 由于 iterparse 是扁平流,很难直接关联 si 和 t。
|
||||||
|
# 更稳妥的方式是:当 elem 是 si 时,遍历 elem 的 children
|
||||||
|
texts = []
|
||||||
|
for child in elem.iter():
|
||||||
|
if self._tag_is(child, "t") and child.text:
|
||||||
|
texts.append(child.text)
|
||||||
shared_strings.append("".join(texts))
|
shared_strings.append("".join(texts))
|
||||||
elem.clear()
|
elem.clear()
|
||||||
return shared_strings
|
return shared_strings
|
||||||
|
|
||||||
def _get_sheet_mapping(self, zf: zipfile.ZipFile) -> Dict[str, str]:
|
def _get_sheet_mapping(self, zf: zipfile.ZipFile) -> Dict[str, str]:
|
||||||
"""
|
"""建立 SheetName -> ZipFilename 的映射(稳健版)"""
|
||||||
获取 Sheet 名称到文件路径的映射。
|
|
||||||
例如: {'Sheet1': 'xl/worksheets/sheet1.xml'}
|
|
||||||
"""
|
|
||||||
sheet_name_to_rid = {}
|
sheet_name_to_rid = {}
|
||||||
try:
|
try:
|
||||||
with zf.open("xl/workbook.xml") as f:
|
with zf.open("xl/workbook.xml") as f:
|
||||||
root = ET.fromstring(f.read())
|
root = ET.fromstring(f.read())
|
||||||
for sheet in root.findall(".//main:sheet", self.ns):
|
# 查找所有 sheet 标签
|
||||||
|
# 需递归查找,因为 sheet 通常在 sheets 节点下
|
||||||
|
for sheet in root.iter():
|
||||||
|
if self._tag_is(sheet, "sheet"):
|
||||||
name = sheet.get("name")
|
name = sheet.get("name")
|
||||||
rid = sheet.get(f"{{{self.ns['r']}}}id")
|
# id 的属性名通常带有 r: 前缀,这很难忽略命名空间,
|
||||||
|
# 但 openpyxl 规范里 id 属性几乎总是依赖 relationships 命名空间
|
||||||
|
# 这里我们尝试遍历属性找到 key 包含 id 的
|
||||||
|
rid = None
|
||||||
|
for k, v in sheet.attrib.items():
|
||||||
|
if k.endswith("id"): # 匹配 r:id
|
||||||
|
rid = v
|
||||||
|
break
|
||||||
if name and rid:
|
if name and rid:
|
||||||
sheet_name_to_rid[name] = rid
|
sheet_name_to_rid[name] = rid
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -100,13 +133,16 @@ class XlsxTranslator(AiTranslator):
|
|||||||
rid_to_target = {}
|
rid_to_target = {}
|
||||||
try:
|
try:
|
||||||
with zf.open("xl/_rels/workbook.xml.rels") as f:
|
with zf.open("xl/_rels/workbook.xml.rels") as f:
|
||||||
tree = ET.parse(f)
|
root = ET.fromstring(f.read())
|
||||||
root = tree.getroot()
|
|
||||||
for child in root:
|
for child in root:
|
||||||
|
# Relationships 里的 tag 也是 Relationship
|
||||||
rid = child.get("Id")
|
rid = child.get("Id")
|
||||||
target = child.get("Target")
|
target = child.get("Target")
|
||||||
if rid and target:
|
if rid and target:
|
||||||
if not target.startswith("/"):
|
target = target.replace("\\", "/")
|
||||||
|
if target.startswith("/"):
|
||||||
|
target = target.lstrip("/")
|
||||||
|
else:
|
||||||
target = "xl/" + target
|
target = "xl/" + target
|
||||||
rid_to_target[rid] = target
|
rid_to_target[rid] = target
|
||||||
except Exception:
|
except Exception:
|
||||||
@@ -116,50 +152,32 @@ class XlsxTranslator(AiTranslator):
|
|||||||
for name, rid in sheet_name_to_rid.items():
|
for name, rid in sheet_name_to_rid.items():
|
||||||
if rid in rid_to_target:
|
if rid in rid_to_target:
|
||||||
mapping[name] = rid_to_target[rid]
|
mapping[name] = rid_to_target[rid]
|
||||||
|
|
||||||
return mapping
|
return mapping
|
||||||
|
|
||||||
def _normalize_range(self, range_str: str) -> str:
|
def _normalize_range(self, range_str: str) -> str:
|
||||||
"""
|
|
||||||
将用户输入的简写转换为标准范围格式。
|
|
||||||
"C" -> "C:C"
|
|
||||||
"3" -> "3:3"
|
|
||||||
"A1" -> "A1:A1" (openpyxl range_boundaries 实际上支持 A1,但这里统一处理更安全)
|
|
||||||
"""
|
|
||||||
# 纯字母 (例如 "C", "AA") -> 整列
|
|
||||||
if re.match(r"^[A-Za-z]+$", range_str):
|
if re.match(r"^[A-Za-z]+$", range_str):
|
||||||
return f"{range_str}:{range_str}"
|
return f"{range_str}:{range_str}"
|
||||||
# 纯数字 (例如 "3", "10") -> 整行
|
|
||||||
if re.match(r"^\d+$", range_str):
|
if re.match(r"^\d+$", range_str):
|
||||||
return f"{range_str}:{range_str}"
|
return f"{range_str}:{range_str}"
|
||||||
return range_str
|
return range_str
|
||||||
|
|
||||||
def _parse_region_boundaries(self, sheet_mapping: Dict[str, str]) -> Dict[str, List[Tuple]]:
|
def _parse_region_boundaries(self, sheet_mapping: Dict[str, str]) -> Dict[str, List[Tuple]]:
|
||||||
"""
|
|
||||||
解析配置的 translate_regions。
|
|
||||||
返回: { 'xl/worksheets/sheet1.xml': [(min_col, min_row, max_col, max_row), ...], ... }
|
|
||||||
"""
|
|
||||||
if not self.translate_regions:
|
if not self.translate_regions:
|
||||||
return {}
|
return {}
|
||||||
|
region_map = {}
|
||||||
region_map = {} # filename -> list of boundaries
|
global_regions = []
|
||||||
global_regions = [] # list of boundaries for all sheets
|
|
||||||
|
|
||||||
for region in self.translate_regions:
|
for region in self.translate_regions:
|
||||||
sheet_name = None
|
sheet_name = None
|
||||||
raw_range = region.strip()
|
raw_range = region.strip()
|
||||||
|
|
||||||
if "!" in raw_range:
|
if "!" in raw_range:
|
||||||
parts = raw_range.split("!", 1)
|
parts = raw_range.split("!", 1)
|
||||||
sheet_name = parts[0].strip("'")
|
sheet_name = parts[0].strip("'")
|
||||||
range_part = self._normalize_range(parts[1])
|
range_part = self._normalize_range(parts[1])
|
||||||
else:
|
else:
|
||||||
range_part = self._normalize_range(raw_range)
|
range_part = self._normalize_range(raw_range)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# boundaries: (min_col, min_row, max_col, max_row)
|
|
||||||
boundaries = range_boundaries(range_part)
|
boundaries = range_boundaries(range_part)
|
||||||
|
|
||||||
if sheet_name:
|
if sheet_name:
|
||||||
filename = sheet_mapping.get(sheet_name)
|
filename = sheet_mapping.get(sheet_name)
|
||||||
if filename:
|
if filename:
|
||||||
@@ -171,18 +189,15 @@ class XlsxTranslator(AiTranslator):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"无法解析区域 '{region}': {e}")
|
self.logger.warning(f"无法解析区域 '{region}': {e}")
|
||||||
|
|
||||||
# 将全局区域添加到所有已知 Sheet
|
|
||||||
if global_regions:
|
if global_regions:
|
||||||
all_files = set(sheet_mapping.values())
|
target_files = set(sheet_mapping.values())
|
||||||
for f in all_files:
|
for f in target_files:
|
||||||
if f not in region_map:
|
if f not in region_map:
|
||||||
region_map[f] = []
|
region_map[f] = []
|
||||||
region_map[f].extend(global_regions)
|
region_map[f].extend(global_regions)
|
||||||
|
|
||||||
return region_map
|
return region_map
|
||||||
|
|
||||||
def _is_in_boundaries(self, col: int, row: int, boundaries_list: List[Tuple]) -> bool:
|
def _is_in_boundaries(self, col: int, row: int, boundaries_list: List[Tuple]) -> bool:
|
||||||
"""检查坐标 (col, row) 是否在给定的边界列表中。"""
|
|
||||||
for (min_col, min_row, max_col, max_row) in boundaries_list:
|
for (min_col, min_row, max_col, max_row) in boundaries_list:
|
||||||
if min_col is not None and col < min_col: continue
|
if min_col is not None and col < min_col: continue
|
||||||
if min_row is not None and row < min_row: continue
|
if min_row is not None and row < min_row: continue
|
||||||
@@ -191,20 +206,30 @@ class XlsxTranslator(AiTranslator):
|
|||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
|
|
||||||
|
def _apply_insert_mode(self, original: str, translated: str) -> str:
|
||||||
|
if self.insert_mode == "append":
|
||||||
|
return f"{original}{self.separator}{translated}"
|
||||||
|
elif self.insert_mode == "prepend":
|
||||||
|
return f"{translated}{self.separator}{original}"
|
||||||
|
else:
|
||||||
|
return translated
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# 高效 XML 区域提取与重构
|
# 区域处理 (使用 Helper 方法,完全解耦命名空间)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
def _get_texts_xml_regions(self, document: Document) -> List[str]:
|
def _get_texts_xml_regions(self, document: Document) -> List[str]:
|
||||||
"""使用纯 XML 解析(结合 SharedStrings)提取指定区域文本。"""
|
|
||||||
texts_to_translate = set()
|
texts_to_translate = set()
|
||||||
|
|
||||||
with zipfile.ZipFile(BytesIO(document.content), 'r') as zf:
|
with zipfile.ZipFile(BytesIO(document.content), 'r') as zf:
|
||||||
shared_strings = self._get_shared_strings(zf)
|
shared_strings = self._get_shared_strings(zf)
|
||||||
if not shared_strings:
|
|
||||||
return []
|
|
||||||
|
|
||||||
sheet_mapping = self._get_sheet_mapping(zf)
|
sheet_mapping = self._get_sheet_mapping(zf)
|
||||||
|
|
||||||
|
if not sheet_mapping:
|
||||||
|
all_sheets = [n for n in zf.namelist() if n.startswith("xl/worksheets/sheet") and n.endswith(".xml")]
|
||||||
|
for s in all_sheets:
|
||||||
|
sheet_mapping[f"Unknown_{s}"] = s
|
||||||
|
|
||||||
boundaries_map = self._parse_region_boundaries(sheet_mapping)
|
boundaries_map = self._parse_region_boundaries(sheet_mapping)
|
||||||
|
|
||||||
for filename, boundaries in boundaries_map.items():
|
for filename, boundaries in boundaries_map.items():
|
||||||
@@ -214,79 +239,107 @@ class XlsxTranslator(AiTranslator):
|
|||||||
with zf.open(filename) as f:
|
with zf.open(filename) as f:
|
||||||
context = ET.iterparse(f, events=("end",))
|
context = ET.iterparse(f, events=("end",))
|
||||||
for event, elem in context:
|
for event, elem in context:
|
||||||
if elem.tag.endswith('}c'): # Cell
|
# 匹配 <c>
|
||||||
r_attr = elem.get('r') # e.g. "C5"
|
if self._tag_is(elem, "c"):
|
||||||
t_attr = elem.get('t') # e.g. "s"
|
r_attr = elem.get('r')
|
||||||
|
t_attr = elem.get('t')
|
||||||
|
|
||||||
if r_attr and t_attr == 's':
|
if r_attr:
|
||||||
try:
|
try:
|
||||||
# 【修正】coordinate_to_tuple 返回 (row, col)
|
|
||||||
row, col = coordinate_to_tuple(r_attr)
|
row, col = coordinate_to_tuple(r_attr)
|
||||||
if self._is_in_boundaries(col, row, boundaries):
|
if self._is_in_boundaries(col, row, boundaries):
|
||||||
v_node = elem.find('main:v', self.ns)
|
text_found = None
|
||||||
if v_node is not None and v_node.text:
|
|
||||||
idx = int(v_node.text)
|
# Shared String
|
||||||
|
if t_attr == 's':
|
||||||
|
v_text = self._get_child_text(elem, "v")
|
||||||
|
if v_text:
|
||||||
|
idx = int(v_text)
|
||||||
if 0 <= idx < len(shared_strings):
|
if 0 <= idx < len(shared_strings):
|
||||||
texts_to_translate.add(shared_strings[idx])
|
text_found = shared_strings[idx]
|
||||||
|
|
||||||
|
# Inline String
|
||||||
|
elif t_attr == 'inlineStr':
|
||||||
|
is_node = self._find_child(elem, "is")
|
||||||
|
if is_node is not None:
|
||||||
|
t_text = self._get_child_text(is_node, "t")
|
||||||
|
if t_text:
|
||||||
|
text_found = t_text
|
||||||
|
|
||||||
|
if text_found:
|
||||||
|
texts_to_translate.add(text_found)
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
elem.clear()
|
elem.clear()
|
||||||
|
elif self._tag_is(elem, "row"):
|
||||||
|
elem.clear()
|
||||||
|
|
||||||
return list(texts_to_translate)
|
return list(texts_to_translate)
|
||||||
|
|
||||||
def _rebuild_xml_regions(self, original_content_bytes: bytes, translation_map: dict) -> bytes:
|
def _rebuild_xml_regions(self, original_content_bytes: bytes, translation_map: dict) -> bytes:
|
||||||
"""使用纯 XML 重构,修正了坐标解包顺序。"""
|
|
||||||
output_zip_io = BytesIO()
|
output_zip_io = BytesIO()
|
||||||
|
|
||||||
with zipfile.ZipFile(BytesIO(original_content_bytes), 'r') as zf_in:
|
with zipfile.ZipFile(BytesIO(original_content_bytes), 'r') as zf_in:
|
||||||
with zipfile.ZipFile(output_zip_io, 'w', zipfile.ZIP_DEFLATED) as zf_out:
|
with zipfile.ZipFile(output_zip_io, 'w', zipfile.ZIP_DEFLATED) as zf_out:
|
||||||
|
|
||||||
shared_strings = self._get_shared_strings(zf_in)
|
shared_strings = self._get_shared_strings(zf_in)
|
||||||
sheet_mapping = self._get_sheet_mapping(zf_in)
|
sheet_mapping = self._get_sheet_mapping(zf_in)
|
||||||
|
|
||||||
|
if not sheet_mapping:
|
||||||
|
all_sheets = [n for n in zf_in.namelist() if
|
||||||
|
n.startswith("xl/worksheets/sheet") and n.endswith(".xml")]
|
||||||
|
for s in all_sheets:
|
||||||
|
sheet_mapping[f"Unknown_{s}"] = s
|
||||||
|
|
||||||
boundaries_map = self._parse_region_boundaries(sheet_mapping)
|
boundaries_map = self._parse_region_boundaries(sheet_mapping)
|
||||||
|
|
||||||
for item in zf_in.infolist():
|
for item in zf_in.infolist():
|
||||||
if item.filename in boundaries_map:
|
if item.filename in boundaries_map:
|
||||||
boundaries = boundaries_map[item.filename]
|
boundaries = boundaries_map[item.filename]
|
||||||
|
|
||||||
with zf_in.open(item.filename) as f:
|
with zf_in.open(item.filename) as f:
|
||||||
tree = ET.parse(f)
|
tree = ET.parse(f)
|
||||||
root = tree.getroot()
|
root = tree.getroot()
|
||||||
|
|
||||||
cells_modified = False
|
cells_modified = False
|
||||||
for cell in root.findall(".//main:c", self.ns):
|
|
||||||
|
# 查找所有 <c>
|
||||||
|
# 因为 findall 不支持复杂的 tag.endswith,这里我们遍历所有节点
|
||||||
|
# 对于 parse 加载的树,iter() 是高效的
|
||||||
|
for cell in root.iter():
|
||||||
|
if not self._tag_is(cell, "c"):
|
||||||
|
continue
|
||||||
|
|
||||||
r_attr = cell.get('r')
|
r_attr = cell.get('r')
|
||||||
t_attr = cell.get('t')
|
t_attr = cell.get('t')
|
||||||
|
if r_attr:
|
||||||
if r_attr and t_attr == 's':
|
|
||||||
try:
|
try:
|
||||||
# 【修正】coordinate_to_tuple 返回 (row, col)
|
|
||||||
row, col = coordinate_to_tuple(r_attr)
|
row, col = coordinate_to_tuple(r_attr)
|
||||||
|
|
||||||
if self._is_in_boundaries(col, row, boundaries):
|
if self._is_in_boundaries(col, row, boundaries):
|
||||||
v_node = cell.find('main:v', self.ns)
|
original_text = None
|
||||||
if v_node is not None and v_node.text:
|
|
||||||
idx = int(v_node.text)
|
if t_attr == 's':
|
||||||
|
v_text = self._get_child_text(cell, "v")
|
||||||
|
if v_text:
|
||||||
|
idx = int(v_text)
|
||||||
if 0 <= idx < len(shared_strings):
|
if 0 <= idx < len(shared_strings):
|
||||||
original_text = shared_strings[idx]
|
original_text = shared_strings[idx]
|
||||||
|
elif t_attr == 'inlineStr':
|
||||||
|
is_node = self._find_child(cell, "is")
|
||||||
|
if is_node is not None:
|
||||||
|
original_text = self._get_child_text(is_node, "t")
|
||||||
|
|
||||||
if original_text in translation_map:
|
if original_text and original_text in translation_map:
|
||||||
translated_text = translation_map[original_text]
|
final_text = self._apply_insert_mode(original_text,
|
||||||
|
translation_map[original_text])
|
||||||
|
|
||||||
final_text = translated_text
|
# 清空旧内容
|
||||||
if self.insert_mode == "append":
|
for child in list(cell):
|
||||||
final_text = original_text + self.separator + translated_text
|
cell.remove(child)
|
||||||
elif self.insert_mode == "prepend":
|
|
||||||
final_text = translated_text + self.separator + original_text
|
|
||||||
|
|
||||||
# 转换为 inlineStr
|
# 写入新内容 (这里必须使用带有命名空间的标签名,否则Excel不认)
|
||||||
cell.set('t', 'inlineStr')
|
cell.set('t', 'inlineStr')
|
||||||
cell.remove(v_node)
|
# 注意:写入时使用 self.NS_MAIN 是必须的,因为这是标准
|
||||||
is_node = ET.Element(f"{{{self.ns['main']}}}is")
|
is_node = ET.Element(f"{{{self.NS_MAIN}}}is")
|
||||||
t_node = ET.SubElement(is_node, f"{{{self.ns['main']}}}t")
|
t_node = ET.SubElement(is_node, f"{{{self.NS_MAIN}}}t")
|
||||||
t_node.text = final_text
|
t_node.text = final_text
|
||||||
cell.append(is_node)
|
cell.append(is_node)
|
||||||
|
|
||||||
cells_modified = True
|
cells_modified = True
|
||||||
except Exception:
|
except Exception:
|
||||||
pass
|
pass
|
||||||
@@ -298,11 +351,10 @@ class XlsxTranslator(AiTranslator):
|
|||||||
zf_out.writestr(item, zf_in.read(item.filename))
|
zf_out.writestr(item, zf_in.read(item.filename))
|
||||||
else:
|
else:
|
||||||
zf_out.writestr(item, zf_in.read(item.filename))
|
zf_out.writestr(item, zf_in.read(item.filename))
|
||||||
|
|
||||||
return output_zip_io.getvalue()
|
return output_zip_io.getvalue()
|
||||||
|
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
# 原有全文档逻辑 (针对全文档翻译保持极致速度)
|
# 全文档处理 (同样使用 Helper)
|
||||||
# =========================================================================
|
# =========================================================================
|
||||||
|
|
||||||
def _get_texts_xml_all(self, document: Document) -> List[str]:
|
def _get_texts_xml_all(self, document: Document) -> List[str]:
|
||||||
@@ -311,20 +363,39 @@ class XlsxTranslator(AiTranslator):
|
|||||||
with zipfile.ZipFile(BytesIO(document.content), 'r') as zf:
|
with zipfile.ZipFile(BytesIO(document.content), 'r') as zf:
|
||||||
if "xl/sharedStrings.xml" in zf.namelist():
|
if "xl/sharedStrings.xml" in zf.namelist():
|
||||||
with zf.open("xl/sharedStrings.xml") as f:
|
with zf.open("xl/sharedStrings.xml") as f:
|
||||||
root = ET.fromstring(f.read())
|
context = ET.iterparse(f, events=("end",))
|
||||||
for node in root.findall('.//main:t', self.ns):
|
for event, elem in context:
|
||||||
if node.text and node.text.strip():
|
if self._tag_is(elem, "t"):
|
||||||
texts_to_translate.add(node.text)
|
if elem.text and elem.text.strip():
|
||||||
|
texts_to_translate.add(elem.text)
|
||||||
|
elem.clear()
|
||||||
|
|
||||||
|
sheet_files = [n for n in zf.namelist() if n.startswith("xl/worksheets/sheet") and n.endswith(".xml")]
|
||||||
|
for sheet_file in sheet_files:
|
||||||
|
with zf.open(sheet_file) as f:
|
||||||
|
context = ET.iterparse(f, events=("end",))
|
||||||
|
for event, elem in context:
|
||||||
|
if self._tag_is(elem, "c"):
|
||||||
|
if elem.get('t') == 'inlineStr':
|
||||||
|
is_node = self._find_child(elem, "is")
|
||||||
|
if is_node is not None:
|
||||||
|
t_text = self._get_child_text(is_node, "t")
|
||||||
|
if t_text and t_text.strip():
|
||||||
|
texts_to_translate.add(t_text)
|
||||||
|
elem.clear()
|
||||||
|
elif self._tag_is(elem, "row"):
|
||||||
|
elem.clear()
|
||||||
|
|
||||||
for item in zf.infolist():
|
for item in zf.infolist():
|
||||||
if item.filename.startswith("xl/tables/table"):
|
if item.filename.startswith("xl/tables/table"):
|
||||||
with zf.open(item.filename) as f:
|
with zf.open(item.filename) as f:
|
||||||
root = ET.fromstring(f.read())
|
root = ET.fromstring(f.read())
|
||||||
for col in root.findall('.//main:tableColumn', self.ns):
|
for col in root.iter():
|
||||||
|
if self._tag_is(col, "tableColumn"):
|
||||||
if col.get('name'):
|
if col.get('name'):
|
||||||
texts_to_translate.add(col.get('name'))
|
texts_to_translate.add(col.get('name'))
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"XML解析失败: {e}")
|
self.logger.error(f"XML解析失败: {e}", exc_info=True)
|
||||||
return list(texts_to_translate)
|
return list(texts_to_translate)
|
||||||
|
|
||||||
def _rebuild_xml_all(self, original_content_bytes: bytes, translation_map: dict) -> bytes:
|
def _rebuild_xml_all(self, original_content_bytes: bytes, translation_map: dict) -> bytes:
|
||||||
@@ -337,41 +408,54 @@ class XlsxTranslator(AiTranslator):
|
|||||||
|
|
||||||
if item.filename == "xl/sharedStrings.xml":
|
if item.filename == "xl/sharedStrings.xml":
|
||||||
root = ET.fromstring(content)
|
root = ET.fromstring(content)
|
||||||
for node in root.findall('.//main:t', self.ns):
|
modified = False
|
||||||
|
for node in root.iter():
|
||||||
|
if self._tag_is(node, "t"):
|
||||||
if node.text in translation_map:
|
if node.text in translation_map:
|
||||||
trans = translation_map[node.text]
|
node.text = self._apply_insert_mode(node.text, translation_map[node.text])
|
||||||
if self.insert_mode == "append":
|
modified = True
|
||||||
node.text = node.text + self.separator + trans
|
if modified:
|
||||||
elif self.insert_mode == "prepend":
|
zf_out.writestr(item, ET.tostring(root, encoding='utf-8', xml_declaration=True))
|
||||||
node.text = trans + self.separator + node.text
|
|
||||||
else:
|
else:
|
||||||
node.text = trans
|
zf_out.writestr(item, content)
|
||||||
content = ET.tostring(root, encoding='utf-8', xml_declaration=True)
|
|
||||||
|
elif item.filename.startswith("xl/worksheets/sheet") and item.filename.endswith(".xml"):
|
||||||
|
root = ET.fromstring(content)
|
||||||
|
modified = False
|
||||||
|
for cell in root.iter():
|
||||||
|
if self._tag_is(cell, "c") and cell.get('t') == 'inlineStr':
|
||||||
|
is_node = self._find_child(cell, "is")
|
||||||
|
if is_node is not None:
|
||||||
|
t_node = self._find_child(is_node, "t")
|
||||||
|
if t_node is not None and t_node.text in translation_map:
|
||||||
|
t_node.text = self._apply_insert_mode(t_node.text,
|
||||||
|
translation_map[t_node.text])
|
||||||
|
modified = True
|
||||||
|
if modified:
|
||||||
|
zf_out.writestr(item, ET.tostring(root, encoding='utf-8', xml_declaration=True))
|
||||||
|
else:
|
||||||
|
zf_out.writestr(item, content)
|
||||||
|
|
||||||
elif item.filename.startswith("xl/tables/table"):
|
elif item.filename.startswith("xl/tables/table"):
|
||||||
root = ET.fromstring(content)
|
root = ET.fromstring(content)
|
||||||
for col in root.findall('.//main:tableColumn', self.ns):
|
modified = False
|
||||||
|
for col in root.iter():
|
||||||
|
if self._tag_is(col, "tableColumn"):
|
||||||
orig = col.get('name')
|
orig = col.get('name')
|
||||||
if orig in translation_map:
|
if orig in translation_map:
|
||||||
trans = translation_map[orig]
|
col.set('name', self._apply_insert_mode(orig, translation_map[orig]))
|
||||||
if self.insert_mode == "append":
|
modified = True
|
||||||
col.set('name', orig + self.separator + trans)
|
if modified:
|
||||||
elif self.insert_mode == "prepend":
|
zf_out.writestr(item, ET.tostring(root, encoding='utf-8', xml_declaration=True))
|
||||||
col.set('name', trans + self.separator + orig)
|
else:
|
||||||
|
zf_out.writestr(item, content)
|
||||||
else:
|
else:
|
||||||
col.set('name', trans)
|
|
||||||
content = ET.tostring(root, encoding='utf-8', xml_declaration=True)
|
|
||||||
|
|
||||||
zf_out.writestr(item, content)
|
zf_out.writestr(item, content)
|
||||||
return output_zip_io.getvalue()
|
return output_zip_io.getvalue()
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"XML重构失败: {e}")
|
self.logger.error(f"XML重构失败: {e}", exc_info=True)
|
||||||
return original_content_bytes
|
return original_content_bytes
|
||||||
|
|
||||||
# =========================================================================
|
|
||||||
# 主入口
|
|
||||||
# =========================================================================
|
|
||||||
|
|
||||||
def translate(self, document: Document) -> Self:
|
def translate(self, document: Document) -> Self:
|
||||||
if self.translate_regions:
|
if self.translate_regions:
|
||||||
original_texts = self._get_texts_xml_regions(document)
|
original_texts = self._get_texts_xml_regions(document)
|
||||||
|
|||||||
Reference in New Issue
Block a user