From 97b7b205654dedba430cf46f800e504055f6b665 Mon Sep 17 00:00:00 2001 From: Leon Date: Mon, 8 Jun 2026 14:43:54 +0800 Subject: [PATCH] =?UTF-8?q?fix:=20=E5=8D=B0=E5=B0=BC=E8=AF=ADfallback?= =?UTF-8?q?=E7=BC=BA=E5=A4=B1=20+=20docx=E6=A0=BC=E5=BC=8F100%=E4=BF=9D?= =?UTF-8?q?=E7=95=99?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - index.html: catch块增加id回退、setLang动态更新title、标题栏初始化 - docx_translator.py: 从分段翻译改为段落级翻译,Run结构100%保留 删除SIGNIFICANT_STYLES/is_tab_run/_process_element_children等~120行死代码 Co-Authored-By: Claude Opus 4.7 --- docutranslate/static/index.html | 27 ++- .../ai_translator/docx_translator.py | 171 ++---------------- 2 files changed, 42 insertions(+), 156 deletions(-) diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index d476799..49c1b91 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -3,7 +3,7 @@ - DocuTranslate - 交互式文档翻译 + DocuTranslate @@ -1860,6 +1860,8 @@ localStorage.setItem('ui_language', l); const langMap = {zh: 'zh-CN', en: 'en', id: 'id'}; document.documentElement.lang = langMap[l] || 'en'; + const dict = i18nData.value[l] || i18nData.value['en'] || {}; + document.title = dict['pageTitle'] || 'DocuTranslate'; }; const setTheme = (t) => { localStorage.setItem('theme', t); @@ -1886,6 +1888,7 @@ if(i18nData.value.en) Object.assign(i18nData.value.en, extraEn); } catch (e) { + console.error("i18n load failed", e); i18nData.value = { zh: { pageTitle: "DocuTranslate", @@ -1921,10 +1924,32 @@ mineruDeployServerUrlPlaceholder: "http://127.0.0.1:30000", mineruDeployParseMethodLabel: "Parse Method", mineruDeployTableEnableLabel: "Table Recognition" + }, + id: { + pageTitle: "DocuTranslate", + tutorialBtn: "Tutorial", + projectContributeBtn: "Kolaborasi", + workflowTitle: "Pilih Alur Kerja", + autoWorkflowLabel: "Pilih Otomatis", + modelPresetLabel: "Preset Model", + modelPresetPlaceholder: "Pilih preset model", + modelPresetEmpty: "Konfigurasi preset di server", + modelPresetRuntimeHint: "Provider, endpoint, dan API key akan dibaca dari environment server.", + workflowOptionPptx: "Presentasi PPTX", + pptxSettingsTitleText: "Pengaturan PPTX", + mineruDeployServerUrlLabel: "Server URL", + mineruDeployLangListLabel: "Daftar Bahasa", + mineruDeployServerUrlPlaceholder: "http://127.0.0.1:30000", + mineruDeployParseMethodLabel: "Parse Method", + mineruDeployTableEnableLabel: "Table Recognition" } }; } + // Set initial page title based on detected language + const initDict = i18nData.value[currentLang.value] || i18nData.value['en'] || {}; + document.title = initDict['pageTitle'] || 'DocuTranslate'; + // Backend Metadata try { const [metaRes, enginRes, paramsRes, configRes] = await Promise.all([ diff --git a/docutranslate/translator/ai_translator/docx_translator.py b/docutranslate/translator/ai_translator/docx_translator.py index 6d736a0..006c9a5 100644 --- a/docutranslate/translator/ai_translator/docx_translator.py +++ b/docutranslate/translator/ai_translator/docx_translator.py @@ -24,19 +24,6 @@ from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTr # ---------------- 辅助函数 ---------------- -# [v6.2] 定义一组具有显著视觉效果的格式标签。 -# 我们只在 Run 包含这些格式时才将其视为空白格式边界。 -# 这避免了因字体、字号等微小变化导致的过度文本切分。 -SIGNIFICANT_STYLES = frozenset([ - qn('w:u'), # 下划线 - qn('w:strike'), # 删除线 - qn('w:dstrike'), # 双删除线 - qn('w:shd'), # 底纹/背景色 - qn('w:highlight'), # 荧光笔高亮 - qn('w:bdr'), # 边框 - qn('w:effectLst'), # 文本效果 (如发光、阴影) - qn('w:em'), # 强调标记 (着重号) -]) def is_image_run(run: Run) -> bool: @@ -45,33 +32,6 @@ def is_image_run(run: Run) -> bool: return ' bool: - """ - 检查一个 Run 是否仅用于格式化,不包含任何应被渲染的文本。 - 这仅适用于其 .text 属性为 "" 的情况。 - """ - return run.text == "" - - -# ---------- 新增修改部分 1: is_styled_whitespace_run 函数被移除 ---------- -# 此函数不再需要,因为新的逻辑会根据格式变化来切分,而不是根据带格式的空格。 -# ---------------------- 修改结束 ---------------------- - -def is_tab_run(run: Run) -> bool: - """ - 检查一个 Run 是否主要代表一个制表符,应被视作格式边界。 - 仅当 Run 的文本内容为空或仅包含空白,且 XML 中存在 时, - 才将其视为纯格式化用途的 Run。 - """ - # .text 属性会将 转换成 '\t' - # 如果 .text 在去除空白后仍有内容,说明这个 Run 不仅仅是个制表符。 - if run.text.strip(): - return False - - xml = getattr(run.element, 'xml', '') - return ' frozenset: - """从一个 Run 中提取“显著”格式标签的集合。""" - if run is None: - return frozenset() - rPr = run.element.rPr - if rPr is None: - return frozenset() - return frozenset(child.tag for child in rPr if child.tag in SIGNIFICANT_STYLES) - - def _have_same_significant_styles(self, run1: Run, run2: Run) -> bool: - """检查两个 Run 是否具有相同的“显著”格式集合。""" - styles1 = self._get_significant_styles(run1) - styles2 = self._get_significant_styles(run2) - return styles1 == styles2 - - # ---------------------- 修改结束 ---------------------- - - # ---------- 代码修改部分 1: 形状翻译逻辑的核心实现 ---------- - def _process_element_children(self, element, parent_paragraph: Paragraph, elements: List[Dict[str, Any]], - texts: List[str], - state: Dict[str, Any], - top_level_para: Paragraph): - - def flush_segment(): - current_runs = state['current_runs'] - if not current_runs: - return - full_text = "".join(r.text for r in current_runs) - if full_text.strip(): - # 在 elements 中增加对父段落和顶级段落的引用 - elements.append({ - "type": "text_runs", - "runs": list(current_runs), - "paragraph": parent_paragraph, - "top_level_paragraph": top_level_para - }) - texts.append(full_text) - state['current_runs'].clear() - - for child in element: - if child.tag in self.IGNORED_TAGS: - continue - - if child.tag in self.RECURSIVE_CONTAINER_TAGS: - flush_segment() - self._process_element_children(child, parent_paragraph, elements, texts, state, top_level_para) - flush_segment() # 在递归容器后也刷新,确保其内容成为独立片段 - continue - - field_char_element = child.find(qn('w:fldChar')) if isinstance(child, CT_R) else None - if field_char_element is not None: - fld_type = field_char_element.get(qn('w:fldCharType')) - if fld_type == 'begin' or fld_type == 'end': - flush_segment() - continue - - if isinstance(child, CT_R): - # 传入 parent_paragraph 以确保 Run 对象具有正确的上下文 - run = Run(child, parent_paragraph) - - # 新增逻辑:处理形状(drawing/pict)内的文本 - # 形状可以包含文本框,需要优先于图片处理逻辑进行解析 - if '