diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html
index d476799..49c1b91 100644
--- a/docutranslate/static/index.html
+++ b/docutranslate/static/index.html
@@ -3,7 +3,7 @@
- DocuTranslate - 交互式文档翻译
+ DocuTranslate
@@ -1860,6 +1860,8 @@
localStorage.setItem('ui_language', l);
const langMap = {zh: 'zh-CN', en: 'en', id: 'id'};
document.documentElement.lang = langMap[l] || 'en';
+ const dict = i18nData.value[l] || i18nData.value['en'] || {};
+ document.title = dict['pageTitle'] || 'DocuTranslate';
};
const setTheme = (t) => {
localStorage.setItem('theme', t);
@@ -1886,6 +1888,7 @@
if(i18nData.value.en) Object.assign(i18nData.value.en, extraEn);
} catch (e) {
+ console.error("i18n load failed", e);
i18nData.value = {
zh: {
pageTitle: "DocuTranslate",
@@ -1921,10 +1924,32 @@
mineruDeployServerUrlPlaceholder: "http://127.0.0.1:30000",
mineruDeployParseMethodLabel: "Parse Method",
mineruDeployTableEnableLabel: "Table Recognition"
+ },
+ id: {
+ pageTitle: "DocuTranslate",
+ tutorialBtn: "Tutorial",
+ projectContributeBtn: "Kolaborasi",
+ workflowTitle: "Pilih Alur Kerja",
+ autoWorkflowLabel: "Pilih Otomatis",
+ modelPresetLabel: "Preset Model",
+ modelPresetPlaceholder: "Pilih preset model",
+ modelPresetEmpty: "Konfigurasi preset di server",
+ modelPresetRuntimeHint: "Provider, endpoint, dan API key akan dibaca dari environment server.",
+ workflowOptionPptx: "Presentasi PPTX",
+ pptxSettingsTitleText: "Pengaturan PPTX",
+ mineruDeployServerUrlLabel: "Server URL",
+ mineruDeployLangListLabel: "Daftar Bahasa",
+ mineruDeployServerUrlPlaceholder: "http://127.0.0.1:30000",
+ mineruDeployParseMethodLabel: "Parse Method",
+ mineruDeployTableEnableLabel: "Table Recognition"
}
};
}
+ // Set initial page title based on detected language
+ const initDict = i18nData.value[currentLang.value] || i18nData.value['en'] || {};
+ document.title = initDict['pageTitle'] || 'DocuTranslate';
+
// Backend Metadata
try {
const [metaRes, enginRes, paramsRes, configRes] = await Promise.all([
diff --git a/docutranslate/translator/ai_translator/docx_translator.py b/docutranslate/translator/ai_translator/docx_translator.py
index 6d736a0..006c9a5 100644
--- a/docutranslate/translator/ai_translator/docx_translator.py
+++ b/docutranslate/translator/ai_translator/docx_translator.py
@@ -24,19 +24,6 @@ from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTr
# ---------------- 辅助函数 ----------------
-# [v6.2] 定义一组具有显著视觉效果的格式标签。
-# 我们只在 Run 包含这些格式时才将其视为空白格式边界。
-# 这避免了因字体、字号等微小变化导致的过度文本切分。
-SIGNIFICANT_STYLES = frozenset([
- qn('w:u'), # 下划线
- qn('w:strike'), # 删除线
- qn('w:dstrike'), # 双删除线
- qn('w:shd'), # 底纹/背景色
- qn('w:highlight'), # 荧光笔高亮
- qn('w:bdr'), # 边框
- qn('w:effectLst'), # 文本效果 (如发光、阴影)
- qn('w:em'), # 强调标记 (着重号)
-])
def is_image_run(run: Run) -> bool:
@@ -45,33 +32,6 @@ def is_image_run(run: Run) -> bool:
return ' bool:
- """
- 检查一个 Run 是否仅用于格式化,不包含任何应被渲染的文本。
- 这仅适用于其 .text 属性为 "" 的情况。
- """
- return run.text == ""
-
-
-# ---------- 新增修改部分 1: is_styled_whitespace_run 函数被移除 ----------
-# 此函数不再需要,因为新的逻辑会根据格式变化来切分,而不是根据带格式的空格。
-# ---------------------- 修改结束 ----------------------
-
-def is_tab_run(run: Run) -> bool:
- """
- 检查一个 Run 是否主要代表一个制表符,应被视作格式边界。
- 仅当 Run 的文本内容为空或仅包含空白,且 XML 中存在 时,
- 才将其视为纯格式化用途的 Run。
- """
- # .text 属性会将 转换成 '\t'
- # 如果 .text 在去除空白后仍有内容,说明这个 Run 不仅仅是个制表符。
- if run.text.strip():
- return False
-
- xml = getattr(run.element, 'xml', '')
- return ' frozenset:
- """从一个 Run 中提取“显著”格式标签的集合。"""
- if run is None:
- return frozenset()
- rPr = run.element.rPr
- if rPr is None:
- return frozenset()
- return frozenset(child.tag for child in rPr if child.tag in SIGNIFICANT_STYLES)
-
- def _have_same_significant_styles(self, run1: Run, run2: Run) -> bool:
- """检查两个 Run 是否具有相同的“显著”格式集合。"""
- styles1 = self._get_significant_styles(run1)
- styles2 = self._get_significant_styles(run2)
- return styles1 == styles2
-
- # ---------------------- 修改结束 ----------------------
-
- # ---------- 代码修改部分 1: 形状翻译逻辑的核心实现 ----------
- def _process_element_children(self, element, parent_paragraph: Paragraph, elements: List[Dict[str, Any]],
- texts: List[str],
- state: Dict[str, Any],
- top_level_para: Paragraph):
-
- def flush_segment():
- current_runs = state['current_runs']
- if not current_runs:
- return
- full_text = "".join(r.text for r in current_runs)
- if full_text.strip():
- # 在 elements 中增加对父段落和顶级段落的引用
- elements.append({
- "type": "text_runs",
- "runs": list(current_runs),
- "paragraph": parent_paragraph,
- "top_level_paragraph": top_level_para
- })
- texts.append(full_text)
- state['current_runs'].clear()
-
- for child in element:
- if child.tag in self.IGNORED_TAGS:
- continue
-
- if child.tag in self.RECURSIVE_CONTAINER_TAGS:
- flush_segment()
- self._process_element_children(child, parent_paragraph, elements, texts, state, top_level_para)
- flush_segment() # 在递归容器后也刷新,确保其内容成为独立片段
- continue
-
- field_char_element = child.find(qn('w:fldChar')) if isinstance(child, CT_R) else None
- if field_char_element is not None:
- fld_type = field_char_element.get(qn('w:fldCharType'))
- if fld_type == 'begin' or fld_type == 'end':
- flush_segment()
- continue
-
- if isinstance(child, CT_R):
- # 传入 parent_paragraph 以确保 Run 对象具有正确的上下文
- run = Run(child, parent_paragraph)
-
- # 新增逻辑:处理形状(drawing/pict)内的文本
- # 形状可以包含文本框,需要优先于图片处理逻辑进行解析
- if '