fix: MT模式翻译残留、docx格式丢失、语言切换器及provider域名修复
- provider.py: 域名匹配改为包含匹配,覆盖dashscope-intl国际站 - segments_agent.py: MT模式改用<<<SEG:n>>>纯文本标记替代JSON,避免qwen-mt模型原文残留 - docx_translator.py: _apply_translation改为按字符比例分配译文到各Run,保留原始格式 - i18nData.json: vi(越南语)替换为id(印尼语),含完整175键翻译 - index.html: 语言切换器移至顶部标题栏,新增浏览器语言自动检测 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -5,7 +5,7 @@ ProviderType: TypeAlias = Literal["ollama", "bigmodel", "aliyuncs", "volces", "g
|
||||
def get_provider_by_domain(domain:str)->ProviderType:
|
||||
if domain == "open.bigmodel.cn":
|
||||
return "bigmodel"
|
||||
elif domain == "dashscope.aliyuncs.com":
|
||||
elif "dashscope.aliyuncs.com" in domain:
|
||||
return "aliyuncs"
|
||||
elif domain == "ark.cn-beijing.volces.com":
|
||||
return "volces"
|
||||
|
||||
@@ -15,10 +15,13 @@ from docutranslate.agents.agent import PartialAgentResultError, AgentResultError
|
||||
from docutranslate.glossary.glossary import Glossary
|
||||
from docutranslate.utils.json_utils import segments2json_chunks, fix_json_string
|
||||
|
||||
# MT mode plain-text segment marker — designed to survive machine translation unchanged
|
||||
MT_SEG_MARKER_RE = re.compile(r'<<<SEG:(\d+)>>>\s*\n(.*?)(?=<<<SEG:\d+>>>|\Z)', re.DOTALL)
|
||||
|
||||
|
||||
def generate_prompt(json_segments: str, to_lang: str):
|
||||
return f"""
|
||||
You will receive a sequence of original text segments to be translated, represented in JSON format. The keys are segment IDs, and the values are the text content to be translated.
|
||||
You will receive a sequence of original text segments to be translated, represented in JSON format. The keys are segment IDs, and the values are the text content to be translated.
|
||||
Here is the input:
|
||||
|
||||
<input>
|
||||
@@ -58,8 +61,8 @@ Below is an example of how merging should be done when necessary:
|
||||
input:
|
||||
```json
|
||||
{{
|
||||
"EXAMPLE_KEY_1":"汤姆说:“杰克你",
|
||||
"EXAMPLE_KEY_2":"好”。"
|
||||
"EXAMPLE_KEY_1":"汤姆说:\"杰克你",
|
||||
"EXAMPLE_KEY_2":"好\"。"
|
||||
}}
|
||||
```
|
||||
output:
|
||||
@@ -92,6 +95,44 @@ def get_target_segments(result: str):
|
||||
return result
|
||||
|
||||
|
||||
def _chunk_to_mt_prompt(chunk: dict) -> str:
|
||||
"""Convert a JSON chunk like {'0': 'text1', '1': 'text2'} to MT-friendly plain text."""
|
||||
parts = []
|
||||
for key in sorted(chunk.keys(), key=int):
|
||||
parts.append(f"<<<SEG:{key}>>>\n{chunk[key]}")
|
||||
return "\n".join(parts)
|
||||
|
||||
|
||||
def _parse_mt_prompt_to_dict(mt_prompt: str) -> dict:
|
||||
"""Parse an MT prompt string back to the original segment dict."""
|
||||
result = {}
|
||||
for match in MT_SEG_MARKER_RE.finditer(mt_prompt):
|
||||
key = match.group(1)
|
||||
value = match.group(2).strip()
|
||||
result[key] = value
|
||||
if not result:
|
||||
# MT format parsing failed — wrap entire prompt as single segment
|
||||
result = {"0": mt_prompt}
|
||||
return result
|
||||
|
||||
|
||||
def _parse_mt_response(text: str, original_chunk: dict) -> dict:
|
||||
"""Parse MT plain-text response using <<<SEG:n>>> markers back to dict."""
|
||||
result = {}
|
||||
for match in MT_SEG_MARKER_RE.finditer(text):
|
||||
key = match.group(1)
|
||||
value = match.group(2).strip()
|
||||
if key in original_chunk:
|
||||
result[key] = value
|
||||
|
||||
# Fill missing keys from original
|
||||
for key in original_chunk:
|
||||
if key not in result:
|
||||
result[key] = ""
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@dataclass(kw_only=True)
|
||||
class SegmentsTranslateAgentConfig(AgentConfig):
|
||||
to_lang: str
|
||||
@@ -123,20 +164,16 @@ class SegmentsTranslateAgent(Agent):
|
||||
def _result_handler(self, result: str, origin_prompt: str, logger: Logger):
|
||||
"""
|
||||
处理成功的API响应。
|
||||
- 如果键完全匹配,返回翻译结果。
|
||||
- 如果键不匹配,构造一个部分成功的结果,并通过 PartialTranslationError 异常抛出,以触发重试。
|
||||
- 其他错误(如JSON解析失败、模型偷懒)则抛出普通 ValueError 触发重试。
|
||||
- MT模式下,如果返回的是纯文本而非JSON,将其按行分割并映射到原始键。
|
||||
MT模式下使用 <<<SEG:n>>> 标记解析纯文本响应,避免JSON格式不兼容问题。
|
||||
"""
|
||||
# MT模式下直接解析origin_prompt为JSON(纯净JSON,没有<input>包装)
|
||||
if self.is_mt_mode:
|
||||
original_segments = origin_prompt
|
||||
else:
|
||||
original_segments = get_original_segments(origin_prompt)
|
||||
return self._result_handler_mt(result, origin_prompt, logger)
|
||||
|
||||
# --- Non-MT mode (JSON-based) ---
|
||||
original_segments = get_original_segments(origin_prompt)
|
||||
result = get_target_segments(result)
|
||||
if result == "":
|
||||
if original_segments.strip() != "":
|
||||
# print(f"【测试】origin_prompt:\n{origin_prompt}\nresult:\n{result}")
|
||||
raise AgentResultError("result为空值但原文不为空")
|
||||
return {}
|
||||
try:
|
||||
@@ -144,37 +181,6 @@ class SegmentsTranslateAgent(Agent):
|
||||
original_chunk = json_repair.loads(original_segments)
|
||||
repaired_result = json_repair.loads(result)
|
||||
|
||||
# MT模式兼容:处理各种非标准返回格式
|
||||
if self.is_mt_mode:
|
||||
# 如果是列表,尝试合并所有字典
|
||||
if isinstance(repaired_result, list):
|
||||
logger.debug(f"[MT模式] 返回结果是列表,包含 {len(repaired_result)} 个元素")
|
||||
merged_result = {}
|
||||
for item in repaired_result:
|
||||
if isinstance(item, dict):
|
||||
merged_result.update(item)
|
||||
repaired_result = merged_result
|
||||
|
||||
# 如果返回的是纯文本(字符串),尝试将其映射到原始键
|
||||
if isinstance(repaired_result, str):
|
||||
original_keys = list(original_chunk.keys())
|
||||
# 按行分割结果,去除空行
|
||||
result_lines = [line.strip() for line in repaired_result.split('\n') if line.strip()]
|
||||
|
||||
# 如果只有一行结果但多个键,将整个结果分配给第一个键,其余为空
|
||||
if len(result_lines) == 1 and len(original_keys) > 1:
|
||||
repaired_result = {original_keys[0]: result_lines[0]}
|
||||
for key in original_keys[1:]:
|
||||
repaired_result[key] = ""
|
||||
# 如果结果行数与键数匹配,逐行对应
|
||||
elif len(result_lines) == len(original_keys):
|
||||
repaired_result = {original_keys[i]: result_lines[i] for i in range(len(original_keys))}
|
||||
# 如果结果行数不匹配,将所有结果合并给第一个键
|
||||
else:
|
||||
repaired_result = {original_keys[0]: repaired_result}
|
||||
for key in original_keys[1:]:
|
||||
repaired_result[key] = ""
|
||||
|
||||
if not isinstance(repaired_result, dict):
|
||||
raise AgentResultError(f"Agent返回结果不是dict的json形式, result: {result}")
|
||||
|
||||
@@ -184,9 +190,7 @@ class SegmentsTranslateAgent(Agent):
|
||||
original_keys = set(original_chunk.keys())
|
||||
result_keys = set(repaired_result.keys())
|
||||
|
||||
# 如果键不完全匹配
|
||||
if original_keys != result_keys:
|
||||
# 仍然先构造一个最完整的“部分结果”
|
||||
final_chunk = {}
|
||||
common_keys = original_keys.intersection(result_keys)
|
||||
missing_keys = original_keys - result_keys
|
||||
@@ -201,74 +205,104 @@ class SegmentsTranslateAgent(Agent):
|
||||
for key in missing_keys:
|
||||
final_chunk[key] = str(original_chunk[key])
|
||||
|
||||
raise PartialAgentResultError("键不匹配,触发重试", partial_result=final_chunk,
|
||||
append_prompt=f"\nBe careful not to omit any keys from the input; do not combine sentences when translating.\n")
|
||||
|
||||
# 抛出自定义异常,将部分结果和错误信息一起传递出去
|
||||
raise PartialAgentResultError("键不匹配,触发重试", partial_result=final_chunk,append_prompt=f"\nBe careful not to omit any keys from the input; do not combine sentences when translating.\n")
|
||||
|
||||
# 如果键完全匹配(理想情况),正常返回
|
||||
for key, value in repaired_result.items():
|
||||
repaired_result[key] = str(value)
|
||||
|
||||
return repaired_result
|
||||
|
||||
except (RuntimeError, JSONDecodeError) as e:
|
||||
# MT模式兼容:如果JSON解析失败,尝试将结果作为纯文本处理
|
||||
if self.is_mt_mode:
|
||||
try:
|
||||
original_chunk = json_repair.loads(original_segments)
|
||||
original_keys = list(original_chunk.keys())
|
||||
result_lines = [line.strip() for line in result.split('\n') if line.strip()]
|
||||
|
||||
if len(result_lines) == 1 and len(original_keys) > 1:
|
||||
repaired_result = {original_keys[0]: result_lines[0]}
|
||||
for key in original_keys[1:]:
|
||||
repaired_result[key] = ""
|
||||
elif len(result_lines) == len(original_keys):
|
||||
repaired_result = {original_keys[i]: result_lines[i] for i in range(len(original_keys))}
|
||||
else:
|
||||
repaired_result = {original_keys[0]: result}
|
||||
for key in original_keys[1:]:
|
||||
repaired_result[key] = ""
|
||||
|
||||
# 验证结果
|
||||
if set(repaired_result.keys()) != set(original_chunk.keys()):
|
||||
raise AgentResultError(f"MT模式解析后键不匹配")
|
||||
|
||||
return repaired_result
|
||||
except Exception as mt_e:
|
||||
raise AgentResultError(f"MT模式纯文本处理失败: {mt_e.__repr__()}")
|
||||
|
||||
# 对于JSON解析等硬性错误,继续抛出普通ValueError
|
||||
raise AgentResultError(f"结果处理失败: {e.__repr__()}")
|
||||
|
||||
def _result_handler_mt(self, result: str, origin_prompt: str, logger: Logger) -> dict:
|
||||
"""MT模式专用结果处理器:解析 <<<SEG:n>>> 标记格式的纯文本响应。"""
|
||||
result_clean = result.strip()
|
||||
if result_clean == "":
|
||||
if origin_prompt.strip() != "":
|
||||
raise AgentResultError("result为空值但原文不为空")
|
||||
return {}
|
||||
|
||||
original_chunk = _parse_mt_prompt_to_dict(origin_prompt)
|
||||
original_keys = set(original_chunk.keys())
|
||||
|
||||
# Try parsing with <<<SEG:n>>> markers
|
||||
parsed = _parse_mt_response(result_clean, original_chunk)
|
||||
|
||||
if parsed and any(v.strip() for v in parsed.values()):
|
||||
result_keys = set(parsed.keys())
|
||||
if result_keys == original_keys:
|
||||
# Check if result is identical to original (no translation happened)
|
||||
all_same = all(
|
||||
parsed.get(k, "").strip() == str(original_chunk.get(k, "")).strip()
|
||||
for k in original_keys
|
||||
)
|
||||
if all_same:
|
||||
raise AgentResultError("翻译结果与原文完全相同,疑似翻译失败,将进行重试。")
|
||||
return parsed
|
||||
|
||||
# If key mismatch, try as Partial result
|
||||
if result_keys and result_keys != original_keys:
|
||||
final_chunk = {}
|
||||
for key in original_keys:
|
||||
final_chunk[key] = parsed.get(key, str(original_chunk.get(key, "")))
|
||||
raise PartialAgentResultError(
|
||||
"MT模式键不匹配,触发重试",
|
||||
partial_result=final_chunk,
|
||||
append_prompt="\nPreserve all <<<SEG:n>>> markers exactly as they appear.\n"
|
||||
)
|
||||
|
||||
# Fallback: Try line-by-line mapping (MT model might have removed markers)
|
||||
result_lines = [line.strip() for line in result_clean.split('\n') if line.strip()]
|
||||
original_seg_list = [str(original_chunk.get(str(i), "")) for i in range(len(original_chunk))]
|
||||
|
||||
non_empty_lines = [l for l in result_lines if l]
|
||||
if len(non_empty_lines) == len(original_chunk):
|
||||
repaired = {str(i): non_empty_lines[i] for i in range(len(non_empty_lines))}
|
||||
all_same = all(
|
||||
repaired.get(k, "").strip() == str(original_chunk.get(k, "")).strip()
|
||||
for k in original_keys
|
||||
)
|
||||
if all_same:
|
||||
raise AgentResultError("翻译结果与原文完全相同(逐行),疑似翻译失败,将进行重试。")
|
||||
return repaired
|
||||
|
||||
# Last fallback: assign all result text to first key
|
||||
if non_empty_lines:
|
||||
repaired = {str(i): "" for i in range(len(original_chunk))}
|
||||
repaired["0"] = "\n".join(non_empty_lines)
|
||||
return repaired
|
||||
|
||||
raise AgentResultError("MT模式无法解析响应")
|
||||
|
||||
def _error_result_handler(self, origin_prompt: str, logger: Logger):
|
||||
"""
|
||||
处理在所有重试后仍然失败的请求。
|
||||
作为备用方案,返回原文内容,并将所有值转换为字符串。
|
||||
作为备用方案,返回原文内容。
|
||||
"""
|
||||
# MT模式下直接解析origin_prompt为JSON(纯净JSON,没有<input>包装)
|
||||
if self.is_mt_mode:
|
||||
original_segments = origin_prompt
|
||||
else:
|
||||
original_segments = get_original_segments(origin_prompt)
|
||||
original_chunk = _parse_mt_prompt_to_dict(origin_prompt)
|
||||
for key in list(original_chunk.keys()):
|
||||
original_chunk[key] = f"{original_chunk[key]}"
|
||||
return original_chunk
|
||||
|
||||
original_segments = get_original_segments(origin_prompt)
|
||||
if original_segments == "":
|
||||
return {}
|
||||
try:
|
||||
original_chunk = json_repair.loads(original_segments)
|
||||
# 此处逻辑保留,作为最终的兜底方案
|
||||
for key, value in original_chunk.items():
|
||||
original_chunk[key] = f"{value}"
|
||||
return original_chunk
|
||||
except (RuntimeError, JSONDecodeError):
|
||||
logger.error(f"原始prompt也不是有效的json格式: {original_segments}")
|
||||
# 如果原始prompt本身也无效,返回一个清晰的错误对象
|
||||
return {"error": f"{original_segments}"}
|
||||
|
||||
def send_segments(self, segments: list[str], chunk_size: int) -> list[str]:
|
||||
indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
|
||||
# MT模式下直接发送纯净JSON,不添加额外提示词
|
||||
if self.is_mt_mode:
|
||||
prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks]
|
||||
prompts = [_chunk_to_mt_prompt(chunk) for chunk in chunks]
|
||||
else:
|
||||
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
|
||||
translated_chunks = super().send_prompts(prompts=prompts, json_format=self.force_json,
|
||||
@@ -292,7 +326,6 @@ class SegmentsTranslateAgent(Agent):
|
||||
except Exception as e:
|
||||
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
|
||||
|
||||
# 重建最终列表
|
||||
result = []
|
||||
last_end = 0
|
||||
ls = list(indexed_translated.values())
|
||||
@@ -308,9 +341,8 @@ class SegmentsTranslateAgent(Agent):
|
||||
async def send_segments_async(self, segments: list[str], chunk_size: int) -> list[str]:
|
||||
indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments,
|
||||
chunk_size)
|
||||
# MT模式下直接发送纯净JSON,不添加额外提示词
|
||||
if self.is_mt_mode:
|
||||
prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks]
|
||||
prompts = [_chunk_to_mt_prompt(chunk) for chunk in chunks]
|
||||
else:
|
||||
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
|
||||
|
||||
@@ -326,7 +358,6 @@ class SegmentsTranslateAgent(Agent):
|
||||
continue
|
||||
for key, val in chunk.items():
|
||||
if key in indexed_translated:
|
||||
# 此处不再需要 str(val),因为 _result_handler 已经处理好了
|
||||
indexed_translated[key] = val
|
||||
else:
|
||||
self.logger.warning(f"在结果chunk中发现未知键 '{key}',已忽略。")
|
||||
@@ -335,7 +366,6 @@ class SegmentsTranslateAgent(Agent):
|
||||
except Exception as e:
|
||||
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
|
||||
|
||||
# 重建最终列表
|
||||
result = []
|
||||
last_end = 0
|
||||
ls = list(indexed_translated.values())
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,5 +1,5 @@
|
||||
<!DOCTYPE html>
|
||||
<html lang="zh-CN" data-bs-theme="auto">
|
||||
<html lang="en" data-bs-theme="auto">
|
||||
<head>
|
||||
<meta charset="UTF-8">
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||
@@ -159,15 +159,6 @@
|
||||
white-space: pre;
|
||||
}
|
||||
|
||||
.bottom-left-controls {
|
||||
position: fixed;
|
||||
bottom: 1rem;
|
||||
left: 1rem;
|
||||
z-index: 1050;
|
||||
display: flex;
|
||||
gap: 0.5rem;
|
||||
}
|
||||
|
||||
.step-number {
|
||||
margin-right: 0.25rem;
|
||||
}
|
||||
@@ -226,6 +217,31 @@
|
||||
<div class="d-flex align-items-center">
|
||||
<h4 class="mb-0 me-3 fw-bold" :title="t('pageTitle')">DocuTranslate</h4>
|
||||
</div>
|
||||
<!-- Language & Theme Controls -->
|
||||
<div class="d-flex gap-2">
|
||||
<div class="dropdown">
|
||||
<button class="btn btn-outline-secondary btn-sm dropdown-toggle" type="button" data-bs-toggle="dropdown">
|
||||
<i class="bi bi-translate me-1"></i><span>{{ {zh:'中文',en:'English',id:'Bahasa'}[currentLang] || 'Language' }}</span>
|
||||
</button>
|
||||
<ul class="dropdown-menu dropdown-menu-end">
|
||||
<li><a class="dropdown-item" :class="{active: currentLang==='zh'}" href="#"
|
||||
@click.prevent="setLang('zh')">中文</a></li>
|
||||
<li><a class="dropdown-item" :class="{active: currentLang==='en'}" href="#"
|
||||
@click.prevent="setLang('en')">English</a></li>
|
||||
<li><a class="dropdown-item" :class="{active: currentLang==='id'}" href="#"
|
||||
@click.prevent="setLang('id')">Bahasa Indonesia</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="dropdown">
|
||||
<button class="btn btn-outline-secondary btn-sm dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
|
||||
class="bi bi-circle-half"></i></button>
|
||||
<ul class="dropdown-menu dropdown-menu-end">
|
||||
<li><button class="dropdown-item" @click="setTheme('light')"><i class="bi bi-sun-fill me-2"></i>Light</button></li>
|
||||
<li><button class="dropdown-item" @click="setTheme('dark')"><i class="bi bi-moon-stars-fill me-2"></i>Dark</button></li>
|
||||
<li><button class="dropdown-item" @click="setTheme('auto')"><i class="bi bi-circle-half me-2"></i>Auto</button></li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<form id="translateForm" @submit.prevent>
|
||||
@@ -923,40 +939,7 @@
|
||||
</div>
|
||||
<iframe id="printFrame" ref="printFrame" style="display: none;"></iframe>
|
||||
|
||||
<!-- Controls -->
|
||||
<div class="bottom-left-controls">
|
||||
<div class="dropdown">
|
||||
<button class="btn btn-secondary dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
|
||||
class="bi bi-translate"></i></button>
|
||||
<ul class="dropdown-menu">
|
||||
<li><a class="dropdown-item" :class="{active: currentLang==='zh'}" href="#"
|
||||
@click.prevent="setLang('zh')">中文</a></li>
|
||||
<li><a class="dropdown-item" :class="{active: currentLang==='en'}" href="#"
|
||||
@click.prevent="setLang('en')">English</a></li>
|
||||
<li><a class="dropdown-item" :class="{active: currentLang==='vi'}" href="#"
|
||||
@click.prevent="setLang('vi')">Tiếng Việt</a></li>
|
||||
</ul>
|
||||
</div>
|
||||
<div class="dropdown">
|
||||
<button class="btn btn-secondary dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
|
||||
class="bi bi-circle-half"></i></button>
|
||||
<ul class="dropdown-menu">
|
||||
<li>
|
||||
<button class="dropdown-item" @click="setTheme('light')"><i class="bi bi-sun-fill me-2"></i> Light
|
||||
</button>
|
||||
</li>
|
||||
<li>
|
||||
<button class="dropdown-item" @click="setTheme('dark')"><i class="bi bi-moon-stars-fill me-2"></i>
|
||||
Dark
|
||||
</button>
|
||||
</li>
|
||||
<li>
|
||||
<button class="dropdown-item" @click="setTheme('auto')"><i class="bi bi-circle-half me-2"></i> Auto
|
||||
</button>
|
||||
</li>
|
||||
</ul>
|
||||
</div>
|
||||
</div>
|
||||
<!-- Header controls now in left panel top-right -->
|
||||
</div>
|
||||
|
||||
<script src="/static/bootstrap.bundle.min.js"></script>
|
||||
@@ -1048,7 +1031,14 @@
|
||||
components: {SliderControl, ModelPresetSelector},
|
||||
setup() {
|
||||
const version = ref("");
|
||||
const currentLang = ref(localStorage.getItem('ui_language') || 'zh');
|
||||
function detectBrowserLang() {
|
||||
const nav = navigator.language || navigator.userLanguage || '';
|
||||
const lang = nav.split('-')[0].toLowerCase();
|
||||
if (['zh', 'en', 'id'].includes(lang)) return lang;
|
||||
if (lang === 'zh') return 'zh';
|
||||
return 'en'; // default to English for unrecognized languages
|
||||
}
|
||||
const currentLang = ref(localStorage.getItem('ui_language') || detectBrowserLang());
|
||||
const i18nData = ref({});
|
||||
const glossaryData = ref({});
|
||||
const tasks = ref([]);
|
||||
@@ -1868,7 +1858,8 @@
|
||||
const setLang = (l) => {
|
||||
currentLang.value = l;
|
||||
localStorage.setItem('ui_language', l);
|
||||
document.documentElement.lang = l === 'zh' ? 'zh-CN' : 'en';
|
||||
const langMap = {zh: 'zh-CN', en: 'en', id: 'id'};
|
||||
document.documentElement.lang = langMap[l] || 'en';
|
||||
};
|
||||
const setTheme = (t) => {
|
||||
localStorage.setItem('theme', t);
|
||||
|
||||
@@ -326,33 +326,58 @@ class DocxTranslator(AiTranslator):
|
||||
runs = element_info["runs"]
|
||||
if not runs: return
|
||||
|
||||
first_real_run_index = -1
|
||||
# 找到第一个可以写入文本的run
|
||||
for i, run in enumerate(runs):
|
||||
# Filter to runs that are still attached to the document
|
||||
valid_runs = []
|
||||
for run in runs:
|
||||
if run.element.getparent() is not None:
|
||||
# 如果 run 是副本的一部分,其 _parent 可能仍然指向原始文档的段落
|
||||
# 但我们需要确保它与 element_info["paragraph"] 同步
|
||||
run._parent = element_info["paragraph"]
|
||||
run.text = final_text
|
||||
first_real_run_index = i
|
||||
break
|
||||
valid_runs.append(run)
|
||||
|
||||
# 如果没有找到有效的run(例如,它们都已被删除),则记录警告
|
||||
if first_real_run_index == -1:
|
||||
if not valid_runs:
|
||||
self.logger.warning(f"无法应用翻译 '{final_text}',因为找不到有效的run。")
|
||||
return
|
||||
|
||||
# 删除所有后续的run,因为它们的文本已经被合并到第一个run中了
|
||||
for i in range(first_real_run_index + 1, len(runs)):
|
||||
run = runs[i]
|
||||
parent_element = run.element.getparent()
|
||||
if parent_element is not None:
|
||||
try:
|
||||
parent_element.remove(run.element)
|
||||
except ValueError:
|
||||
# 在某些复杂情况下,一个run可能已经被其父元素隐式删除
|
||||
self.logger.debug(f"尝试删除一个不存在的run元素。这通常是安全的。")
|
||||
pass
|
||||
if len(valid_runs) == 1:
|
||||
# Single run: just write the translation
|
||||
valid_runs[0].text = final_text
|
||||
return
|
||||
|
||||
# Multiple runs: proportionally distribute translated text to preserve formatting
|
||||
orig_lengths = [len(r.text) for r in valid_runs]
|
||||
total_orig = sum(orig_lengths)
|
||||
final_len = len(final_text)
|
||||
|
||||
if total_orig == 0:
|
||||
valid_runs[0].text = final_text
|
||||
for run in valid_runs[1:]:
|
||||
self._remove_run_element(run)
|
||||
return
|
||||
|
||||
# Distribute characters proportionally
|
||||
char_pos = 0
|
||||
for i, run in enumerate(valid_runs):
|
||||
if i == len(valid_runs) - 1:
|
||||
# Last run gets all remaining text
|
||||
run.text = final_text[char_pos:]
|
||||
else:
|
||||
ratio = orig_lengths[i] / total_orig
|
||||
run_char_count = max(1, round(final_len * ratio))
|
||||
run_char_count = min(run_char_count, final_len - char_pos - (len(valid_runs) - i - 1))
|
||||
if run_char_count <= 0:
|
||||
# Remove runs that would get zero characters
|
||||
self._remove_run_element(run)
|
||||
continue
|
||||
run.text = final_text[char_pos:char_pos + run_char_count]
|
||||
char_pos += run_char_count
|
||||
|
||||
def _remove_run_element(self, run) -> None:
|
||||
"""Safely remove a run element from its parent."""
|
||||
parent_element = run.element.getparent()
|
||||
if parent_element is not None:
|
||||
try:
|
||||
parent_element.remove(run.element)
|
||||
except ValueError:
|
||||
self.logger.debug(f"尝试删除一个不存在的run元素。这通常是安全的。")
|
||||
|
||||
# ---------- FIX START: 新增用于清理副本段落的辅助方法 ----------
|
||||
def _prune_unwanted_elements_from_copy(self, p_element: OxmlElement):
|
||||
|
||||
Reference in New Issue
Block a user