优化docx翻译
This commit is contained in:
@@ -8,10 +8,10 @@ from typing import Self, Literal, List, Dict, Any, Tuple
|
|||||||
import docx
|
import docx
|
||||||
from docx.document import Document as DocumentObject
|
from docx.document import Document as DocumentObject
|
||||||
from docx.oxml.ns import qn
|
from docx.oxml.ns import qn
|
||||||
from docx.oxml.shared import OxmlElement
|
|
||||||
from docx.oxml.text.run import CT_R
|
from docx.oxml.text.run import CT_R
|
||||||
from docx.text.paragraph import Paragraph
|
from docx.text.paragraph import Paragraph
|
||||||
from docx.text.run import Run
|
from docx.text.run import Run
|
||||||
|
from docx.table import _Cell, Table
|
||||||
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
@@ -27,17 +27,12 @@ def is_image_run(run: Run) -> bool:
|
|||||||
|
|
||||||
def is_formatting_only_run(run: Run) -> bool:
|
def is_formatting_only_run(run: Run) -> bool:
|
||||||
"""
|
"""
|
||||||
检查一个 Run 是否主要用于格式化,例如:
|
检查一个 Run 是否主要用于格式化,例如一个空的粗体/斜体/下划线 Run。
|
||||||
- 一个空的粗体/斜体/下划线 Run。
|
|
||||||
- 一个只包含空格但有下划线的 Run (用于画线)。
|
|
||||||
"""
|
"""
|
||||||
text = run.text
|
text = run.text
|
||||||
# 如果文本为空或只包含空格
|
|
||||||
if not text.strip():
|
if not text.strip():
|
||||||
# 并且它带有任何一种常见的格式,就认为它是一个格式化标记
|
|
||||||
if run.underline or run.bold or run.italic or run.font.strike or run.font.subscript or run.font.superscript:
|
if run.underline or run.bold or run.italic or run.font.strike or run.font.subscript or run.font.superscript:
|
||||||
return True
|
return True
|
||||||
# 特别处理:如果文本是空格且有下划线,这几乎总是为了画线
|
|
||||||
if text and run.underline:
|
if text and run.underline:
|
||||||
return True
|
return True
|
||||||
return False
|
return False
|
||||||
@@ -53,29 +48,20 @@ class DocxTranslatorConfig(AiTranslatorConfig):
|
|||||||
# ---------------- 主类 ----------------
|
# ---------------- 主类 ----------------
|
||||||
class DocxTranslator(AiTranslator):
|
class DocxTranslator(AiTranslator):
|
||||||
"""
|
"""
|
||||||
用于翻译 .docx 文件的高级翻译器,能够高精度保留样式、处理超链接、
|
一个基于高级结构化解析的 .docx 文件翻译器。
|
||||||
域代码(如图注),并支持翻译脚注、尾注等。
|
它能高精度保留样式,并正确处理正文、表格、页眉/脚、脚注/尾注、超链接和目录(TOC)等复杂元素。
|
||||||
[v3.6 - 引入递归解析以处理嵌套内容标签,如 smartTag]
|
|
||||||
|
[v4.1 - 稳定版重构]
|
||||||
|
- 移除所有对 python-docx 内部类的脆弱导入和猴子补丁。
|
||||||
|
- 实现了一个健壮的、基于鸭子类型的 _traverse_container 函数来统一处理所有文本容器。
|
||||||
"""
|
"""
|
||||||
|
|
||||||
# 包含所有应被解析器完全忽略的、不影响文本内容的元数据标签
|
|
||||||
IGNORED_TAGS = {
|
IGNORED_TAGS = {
|
||||||
qn('w:proofErr'), # 拼写和语法错误标记
|
qn('w:proofErr'), qn('w:lastRenderedPageBreak'), qn('w:bookmarkStart'),
|
||||||
qn('w:lastRenderedPageBreak'), # 上次渲染的分页符位置
|
qn('w:bookmarkEnd'), qn('w:commentRangeStart'), qn('w:commentRangeEnd'),
|
||||||
qn('w:bookmarkStart'), # 书签开始
|
qn('w:del'), qn('w:ins'), qn('w:moveFrom'), qn('w:moveTo'),
|
||||||
qn('w:bookmarkEnd'), # 书签结束
|
|
||||||
qn('w:commentRangeStart'), # 批注范围开始
|
|
||||||
qn('w:commentRangeEnd'), # 批注范围结束
|
|
||||||
qn('w:del'), # 修订:删除
|
|
||||||
qn('w:ins'), # 修订:插入
|
|
||||||
qn('w:moveFrom'), # 修订:移动源
|
|
||||||
qn('w:moveTo'), # 修订:移动目标
|
|
||||||
}
|
}
|
||||||
|
|
||||||
# 包含应递归处理其内部内容的容器标签
|
|
||||||
RECURSIVE_CONTAINER_TAGS = {
|
RECURSIVE_CONTAINER_TAGS = {
|
||||||
qn('w:smartTag'), # 智能标记 (包含文本)
|
qn('w:smartTag'), qn('w:sdtContent'), qn('w:hyperlink'),
|
||||||
qn('w:sdtContent'), # 结构化文档标签内容 (包含文本)
|
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, config: DocxTranslatorConfig):
|
def __init__(self, config: DocxTranslatorConfig):
|
||||||
@@ -84,58 +70,23 @@ class DocxTranslator(AiTranslator):
|
|||||||
self.translate_agent = None
|
self.translate_agent = None
|
||||||
if not self.skip_translate:
|
if not self.skip_translate:
|
||||||
agent_config = SegmentsTranslateAgentConfig(
|
agent_config = SegmentsTranslateAgentConfig(
|
||||||
custom_prompt=config.custom_prompt,
|
custom_prompt=config.custom_prompt, to_lang=config.to_lang, base_url=config.base_url,
|
||||||
to_lang=config.to_lang,
|
api_key=config.api_key, model_id=config.model_id, temperature=config.temperature,
|
||||||
base_url=config.base_url,
|
thinking=config.thinking, concurrent=config.concurrent, timeout=config.timeout,
|
||||||
api_key=config.api_key,
|
logger=self.logger, glossary_dict=config.glossary_dict, retry=config.retry,
|
||||||
model_id=config.model_id,
|
|
||||||
temperature=config.temperature,
|
|
||||||
thinking=config.thinking,
|
|
||||||
concurrent=config.concurrent,
|
|
||||||
timeout=config.timeout,
|
|
||||||
logger=self.logger,
|
|
||||||
glossary_dict=config.glossary_dict,
|
|
||||||
retry=config.retry,
|
|
||||||
system_proxy_enable=config.system_proxy_enable
|
system_proxy_enable=config.system_proxy_enable
|
||||||
)
|
)
|
||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
|
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _extract_hyperlink_text(hyperlink_element) -> str:
|
|
||||||
"""从超链接 OXML 元素中提取所有显示文本。"""
|
|
||||||
return ''.join(
|
|
||||||
t.text for t in hyperlink_element.findall('.//w:t', namespaces=hyperlink_element.nsmap) if t.text
|
|
||||||
)
|
|
||||||
|
|
||||||
def _is_seq_field(self, child) -> bool:
|
|
||||||
"""判断一个 OXML 元素是否为 SEQ 域的一部分(如图、表编号)。"""
|
|
||||||
try:
|
|
||||||
if child.tag == qn('w:fldSimple'):
|
|
||||||
instr = child.get(qn('w:instr'), '')
|
|
||||||
if 'SEQ' in instr: return True
|
|
||||||
if child.tag == qn('w:r'):
|
|
||||||
for instr_text in child.findall('.//w:instrText', namespaces=child.nsmap):
|
|
||||||
if instr_text.text and 'SEQ' in instr_text.text:
|
|
||||||
return True
|
|
||||||
except Exception:
|
|
||||||
pass
|
|
||||||
return False
|
|
||||||
|
|
||||||
def _process_element_children(self, element, elements: List[Dict[str, Any]], texts: List[str],
|
def _process_element_children(self, element, elements: List[Dict[str, Any]], texts: List[str],
|
||||||
state: Dict[str, Any]):
|
state: Dict[str, Any]):
|
||||||
"""
|
|
||||||
[新函数] 递归处理任何给定XML元素的子节点。
|
|
||||||
'state' 字典用于跨递归调用传递状态,如 current_runs 和 is_inside_field。
|
|
||||||
"""
|
|
||||||
current_runs = state['current_runs']
|
current_runs = state['current_runs']
|
||||||
|
|
||||||
def flush_segment():
|
def flush_segment():
|
||||||
nonlocal current_runs
|
nonlocal current_runs
|
||||||
if not current_runs:
|
if not current_runs: return
|
||||||
return
|
|
||||||
full_text = "".join(r.text for r in current_runs)
|
full_text = "".join(r.text for r in current_runs)
|
||||||
if full_text.strip():
|
if full_text.strip():
|
||||||
elements.append({"type": "text_runs", "runs": current_runs})
|
elements.append({"type": "text_runs", "runs": current_runs})
|
||||||
@@ -146,62 +97,38 @@ class DocxTranslator(AiTranslator):
|
|||||||
for child in element:
|
for child in element:
|
||||||
if child.tag in self.IGNORED_TAGS:
|
if child.tag in self.IGNORED_TAGS:
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if child.tag in self.RECURSIVE_CONTAINER_TAGS:
|
if child.tag in self.RECURSIVE_CONTAINER_TAGS:
|
||||||
self._process_element_children(child, elements, texts, state)
|
self._process_element_children(child, elements, texts, state)
|
||||||
continue
|
continue
|
||||||
|
field_char_element = child.find(qn('w:fldChar')) if isinstance(child, CT_R) else (
|
||||||
if child.tag == qn('w:hyperlink'):
|
child if child.tag == qn('w:fldChar') else None)
|
||||||
flush_segment()
|
|
||||||
hyperlink_text = self._extract_hyperlink_text(child)
|
|
||||||
if hyperlink_text.strip():
|
|
||||||
elements.append({"type": "hyperlink", "element": child})
|
|
||||||
texts.append(hyperlink_text)
|
|
||||||
continue
|
|
||||||
|
|
||||||
field_char_element = None
|
|
||||||
if child.tag == qn('w:fldChar'):
|
|
||||||
field_char_element = child
|
|
||||||
elif isinstance(child, CT_R):
|
|
||||||
field_char_element = child.find(qn('w:fldChar'))
|
|
||||||
|
|
||||||
if field_char_element is not None:
|
if field_char_element is not None:
|
||||||
flush_segment()
|
flush_segment()
|
||||||
fld_type = field_char_element.get(qn('w:fldCharType'))
|
fld_type = field_char_element.get(qn('w:fldCharType'))
|
||||||
if fld_type == 'begin':
|
if fld_type == 'begin':
|
||||||
state['is_inside_field'] = True
|
state['field_depth'] += 1
|
||||||
elif fld_type == 'end':
|
elif fld_type == 'end':
|
||||||
state['is_inside_field'] = False
|
state['field_depth'] = max(0, state['field_depth'] - 1)
|
||||||
continue
|
continue
|
||||||
|
|
||||||
if state.get('is_inside_field', False):
|
|
||||||
continue
|
|
||||||
|
|
||||||
if self._is_seq_field(child):
|
|
||||||
flush_segment()
|
|
||||||
continue
|
|
||||||
|
|
||||||
if isinstance(child, CT_R):
|
if isinstance(child, CT_R):
|
||||||
|
if child.find(qn('w:instrText')) is not None:
|
||||||
|
continue
|
||||||
|
if state['field_depth'] > 0:
|
||||||
|
continue
|
||||||
run = Run(child, None)
|
run = Run(child, None)
|
||||||
if is_image_run(run) or run.element.find(qn('w:tab')) is not None or is_formatting_only_run(run):
|
if is_image_run(run) or is_formatting_only_run(run):
|
||||||
flush_segment()
|
flush_segment()
|
||||||
else:
|
else:
|
||||||
current_runs.append(run)
|
current_runs.append(run)
|
||||||
else:
|
else:
|
||||||
flush_segment()
|
flush_segment()
|
||||||
|
|
||||||
state['current_runs'] = current_runs
|
state['current_runs'] = current_runs
|
||||||
|
|
||||||
def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str]):
|
def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str]):
|
||||||
"""
|
|
||||||
[重构] 作为递归处理器的入口点,初始化状态并调用递归函数。
|
|
||||||
"""
|
|
||||||
if not para.text.strip():
|
if not para.text.strip():
|
||||||
return
|
return
|
||||||
|
state = {'current_runs': [], 'field_depth': 0}
|
||||||
state = {'current_runs': [], 'is_inside_field': False}
|
|
||||||
self._process_element_children(para._p, elements, texts, state)
|
self._process_element_children(para._p, elements, texts, state)
|
||||||
|
|
||||||
current_runs = state['current_runs']
|
current_runs = state['current_runs']
|
||||||
if current_runs:
|
if current_runs:
|
||||||
full_text = "".join(r.text for r in current_runs)
|
full_text = "".join(r.text for r in current_runs)
|
||||||
@@ -209,90 +136,78 @@ class DocxTranslator(AiTranslator):
|
|||||||
elements.append({"type": "text_runs", "runs": current_runs})
|
elements.append({"type": "text_runs", "runs": current_runs})
|
||||||
texts.append(full_text)
|
texts.append(full_text)
|
||||||
|
|
||||||
def _process_container(self, container, elements: List[Dict[str, Any]], texts: List[str]):
|
def _traverse_container(self, container, elements: List[Dict[str, Any]], texts: List[str]):
|
||||||
"""递归处理包含段落和表格的容器(如文档、单元格、页眉)。"""
|
"""
|
||||||
if not container: return
|
[核心导航员] 健壮地遍历任何文本容器 (Document, _Cell, _Header, etc.)。
|
||||||
for para in getattr(container, 'paragraphs', []):
|
"""
|
||||||
self._process_paragraph(para, elements, texts)
|
if container is None:
|
||||||
for table in getattr(container, 'tables', []):
|
return
|
||||||
for row in table.rows:
|
|
||||||
for cell in row.cells:
|
|
||||||
self._process_container(cell, elements, texts)
|
|
||||||
|
|
||||||
def _process_part(self, doc_part, elements: List[Dict[str, Any]], texts: List[str]):
|
# --- 关键修复 ---
|
||||||
"""处理文档的非主内容部分,如脚注、尾注。"""
|
# 通过检查属性来确定如何获取子元素,而不是依赖于对象的具体类型。
|
||||||
if not doc_part: return
|
# 这种方法对所有类型的容器都有效。
|
||||||
for para_element in doc_part.element.findall('.//w:p', namespaces=doc_part.element.nsmap):
|
parent_element = None
|
||||||
try:
|
if hasattr(container, 'element') and hasattr(container.element, 'body'):
|
||||||
self._process_paragraph(Paragraph(para_element, doc_part), elements, texts)
|
parent_element = container.element.body # For Document
|
||||||
except Exception as e:
|
elif hasattr(container, '_element'):
|
||||||
self.logger.warning(f"处理文档部件段落时出错: {e}")
|
parent_element = container._element # For _Cell, _Header, _Footer, NotesParts
|
||||||
|
|
||||||
|
if parent_element is None:
|
||||||
|
return
|
||||||
|
|
||||||
|
for child_element in parent_element:
|
||||||
|
if child_element.tag.endswith('p'):
|
||||||
|
self._process_paragraph(Paragraph(child_element, container), elements, texts)
|
||||||
|
elif child_element.tag.endswith('tbl'):
|
||||||
|
table = Table(child_element, container)
|
||||||
|
for row in table.rows:
|
||||||
|
for cell in row.cells:
|
||||||
|
self._traverse_container(cell, elements, texts)
|
||||||
|
|
||||||
def _pre_translate(self, document: Document) -> Tuple[DocumentObject, List[Dict[str, Any]], List[str]]:
|
def _pre_translate(self, document: Document) -> Tuple[DocumentObject, List[Dict[str, Any]], List[str]]:
|
||||||
doc = docx.Document(BytesIO(document.content))
|
doc = docx.Document(BytesIO(document.content))
|
||||||
elements, texts = [], []
|
elements, texts = [], []
|
||||||
|
|
||||||
# 1. 处理主文档内容
|
# 1. 处理主文档内容
|
||||||
self._process_container(doc, elements, texts)
|
self._traverse_container(doc, elements, texts)
|
||||||
|
|
||||||
# 2. 处理所有类型的页眉和页脚
|
# 2. 处理所有节的页眉和页脚
|
||||||
for section in doc.sections:
|
for section in doc.sections:
|
||||||
self._process_container(section.header, elements, texts)
|
self._traverse_container(section.header, elements, texts)
|
||||||
self._process_container(section.first_page_header, elements, texts)
|
self._traverse_container(section.first_page_header, elements, texts)
|
||||||
self._process_container(section.even_page_header, elements, texts)
|
self._traverse_container(section.even_page_header, elements, texts)
|
||||||
self._process_container(section.footer, elements, texts)
|
self._traverse_container(section.footer, elements, texts)
|
||||||
self._process_container(section.first_page_footer, elements, texts)
|
self._traverse_container(section.first_page_footer, elements, texts)
|
||||||
self._process_container(section.even_page_footer, elements, texts)
|
self._traverse_container(section.even_page_footer, elements, texts)
|
||||||
|
|
||||||
# 3. 处理脚注、尾注
|
# 3. 处理脚注和尾注
|
||||||
if part := getattr(doc.part, 'footnotes_part', None): self._process_part(part, elements, texts)
|
if hasattr(doc.part, 'footnotes_part') and doc.part.footnotes_part is not None:
|
||||||
if part := getattr(doc.part, 'endnotes_part', None): self._process_part(part, elements, texts)
|
self._traverse_container(doc.part.footnotes_part, elements, texts)
|
||||||
|
if hasattr(doc.part, 'endnotes_part') and doc.part.endnotes_part is not None:
|
||||||
|
self._traverse_container(doc.part.endnotes_part, elements, texts)
|
||||||
|
|
||||||
return doc, elements, texts
|
return doc, elements, texts
|
||||||
|
|
||||||
def _apply_translation(self, element_info: Dict[str, Any], final_text: str):
|
def _apply_translation(self, element_info: Dict[str, Any], final_text: str):
|
||||||
"""
|
if element_info["type"] == "text_runs":
|
||||||
将翻译后的文本写回对应的 OXML 元素。
|
|
||||||
对于多Run的文本段,写入第一个Run并【删除】其余,以避免产生方框占位符。
|
|
||||||
"""
|
|
||||||
el_type = element_info["type"]
|
|
||||||
if el_type == "text_runs":
|
|
||||||
runs = element_info["runs"]
|
runs = element_info["runs"]
|
||||||
if not runs:
|
if not runs: return
|
||||||
return
|
|
||||||
|
|
||||||
runs[0].text = final_text
|
runs[0].text = final_text
|
||||||
|
|
||||||
for run in runs[1:]:
|
for run in runs[1:]:
|
||||||
p_element = run.element.getparent()
|
parent_element = run.element.getparent()
|
||||||
if p_element is not None:
|
if parent_element is not None:
|
||||||
p_element.remove(run.element)
|
try:
|
||||||
|
parent_element.remove(run.element)
|
||||||
elif el_type == "hyperlink":
|
except ValueError:
|
||||||
hyperlink = element_info["element"]
|
pass
|
||||||
r_elements = hyperlink.findall(f'.//{qn("w:r")}')
|
|
||||||
if r_elements:
|
|
||||||
first_r = r_elements[0]
|
|
||||||
for t in first_r.findall(f'.//{qn("w:t")}'):
|
|
||||||
first_r.remove(t)
|
|
||||||
|
|
||||||
new_t = OxmlElement('w:t')
|
|
||||||
new_t.text = final_text
|
|
||||||
new_t.set(qn('xml:space'), 'preserve')
|
|
||||||
first_r.append(new_t)
|
|
||||||
|
|
||||||
for other_r in r_elements[1:]:
|
|
||||||
if (parent := other_r.getparent()) is not None:
|
|
||||||
parent.remove(other_r)
|
|
||||||
|
|
||||||
def _after_translate(self, doc: DocumentObject, elements: List[Dict[str, Any]], translated: List[str],
|
def _after_translate(self, doc: DocumentObject, elements: List[Dict[str, Any]], translated: List[str],
|
||||||
originals: List[str]) -> bytes:
|
originals: List[str]) -> bytes:
|
||||||
if len(elements) != len(translated):
|
if len(elements) != len(translated):
|
||||||
self.logger.error(
|
self.logger.error(
|
||||||
f"Translation count mismatch! Originals: {len(originals)}, Translated: {len(translated)}. Processing common part only.")
|
f"翻译数量不匹配!原文: {len(originals)}, 译文: {len(translated)}. 将只处理公共部分。")
|
||||||
min_len = min(len(elements), len(translated), len(originals))
|
min_len = min(len(elements), len(translated), len(originals))
|
||||||
elements, translated, originals = elements[:min_len], translated[:min_len], originals[:min_len]
|
elements, translated, originals = elements[:min_len], translated[:min_len], originals[:min_len]
|
||||||
|
|
||||||
for info, orig, trans in zip(elements, originals, translated):
|
for info, orig, trans in zip(elements, originals, translated):
|
||||||
if self.insert_mode == "replace":
|
if self.insert_mode == "replace":
|
||||||
final_text = trans
|
final_text = trans
|
||||||
@@ -303,7 +218,6 @@ class DocxTranslator(AiTranslator):
|
|||||||
else:
|
else:
|
||||||
final_text = trans
|
final_text = trans
|
||||||
self._apply_translation(info, final_text)
|
self._apply_translation(info, final_text)
|
||||||
|
|
||||||
doc_output_stream = BytesIO()
|
doc_output_stream = BytesIO()
|
||||||
doc.save(doc_output_stream)
|
doc.save(doc_output_stream)
|
||||||
return doc_output_stream.getvalue()
|
return doc_output_stream.getvalue()
|
||||||
@@ -311,35 +225,29 @@ class DocxTranslator(AiTranslator):
|
|||||||
def translate(self, document: Document) -> Self:
|
def translate(self, document: Document) -> Self:
|
||||||
doc, elements, originals = self._pre_translate(document)
|
doc, elements, originals = self._pre_translate(document)
|
||||||
if not originals:
|
if not originals:
|
||||||
self.logger.info("\nNo translatable text content found in the document.")
|
self.logger.info("\n文档中未找到可翻译的文本内容。")
|
||||||
document.content = self._after_translate(doc, elements, [], [])
|
document.content = self._after_translate(doc, elements, [], [])
|
||||||
return self
|
return self
|
||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = self.glossary_agent.send_segments(originals, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(originals, self.chunk_size)
|
||||||
if self.translate_agent:
|
if self.translate_agent:
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
translated = self.translate_agent.send_segments(originals,
|
translated = self.translate_agent.send_segments(originals,
|
||||||
self.chunk_size) if self.translate_agent else originals
|
self.chunk_size) if self.translate_agent else originals
|
||||||
|
|
||||||
document.content = self._after_translate(doc, elements, translated, originals)
|
document.content = self._after_translate(doc, elements, translated, originals)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def translate_async(self, document: Document) -> Self:
|
async def translate_async(self, document: Document) -> Self:
|
||||||
doc, elements, originals = await asyncio.to_thread(self._pre_translate, document)
|
doc, elements, originals = await asyncio.to_thread(self._pre_translate, document)
|
||||||
if not originals:
|
if not originals:
|
||||||
self.logger.info("\nNo translatable text content found in the document.")
|
self.logger.info("\n文档中未找到可翻译的文本内容。")
|
||||||
document.content = await asyncio.to_thread(self._after_translate, doc, elements, [], [])
|
document.content = await asyncio.to_thread(self._after_translate, doc, elements, [], [])
|
||||||
return self
|
return self
|
||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(originals, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(originals, self.chunk_size)
|
||||||
if self.translate_agent:
|
if self.translate_agent:
|
||||||
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
translated = await self.translate_agent.send_segments_async(originals,
|
translated = await self.translate_agent.send_segments_async(originals,
|
||||||
self.chunk_size) if self.translate_agent else originals
|
self.chunk_size) if self.translate_agent else originals
|
||||||
|
|
||||||
document.content = await asyncio.to_thread(self._after_translate, doc, elements, translated, originals)
|
document.content = await asyncio.to_thread(self._after_translate, doc, elements, translated, originals)
|
||||||
return self
|
return self
|
||||||
Reference in New Issue
Block a user