fix: MT模式翻译残留、docx格式丢失、语言切换器及provider域名修复

- provider.py: 域名匹配改为包含匹配,覆盖dashscope-intl国际站
- segments_agent.py: MT模式改用<<<SEG:n>>>纯文本标记替代JSON,避免qwen-mt模型原文残留
- docx_translator.py: _apply_translation改为按字符比例分配译文到各Run,保留原始格式
- i18nData.json: vi(越南语)替换为id(印尼语),含完整175键翻译
- index.html: 语言切换器移至顶部标题栏,新增浏览器语言自动检测

Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
2026-06-08 14:07:13 +08:00
parent 9d8eacf0b4
commit 8a5f62342a
5 changed files with 377 additions and 331 deletions

View File

@@ -5,7 +5,7 @@ ProviderType: TypeAlias = Literal["ollama", "bigmodel", "aliyuncs", "volces", "g
def get_provider_by_domain(domain:str)->ProviderType: def get_provider_by_domain(domain:str)->ProviderType:
if domain == "open.bigmodel.cn": if domain == "open.bigmodel.cn":
return "bigmodel" return "bigmodel"
elif domain == "dashscope.aliyuncs.com": elif "dashscope.aliyuncs.com" in domain:
return "aliyuncs" return "aliyuncs"
elif domain == "ark.cn-beijing.volces.com": elif domain == "ark.cn-beijing.volces.com":
return "volces" return "volces"

View File

@@ -15,10 +15,13 @@ from docutranslate.agents.agent import PartialAgentResultError, AgentResultError
from docutranslate.glossary.glossary import Glossary from docutranslate.glossary.glossary import Glossary
from docutranslate.utils.json_utils import segments2json_chunks, fix_json_string from docutranslate.utils.json_utils import segments2json_chunks, fix_json_string
# MT mode plain-text segment marker — designed to survive machine translation unchanged
MT_SEG_MARKER_RE = re.compile(r'<<<SEG:(\d+)>>>\s*\n(.*?)(?=<<<SEG:\d+>>>|\Z)', re.DOTALL)
def generate_prompt(json_segments: str, to_lang: str): def generate_prompt(json_segments: str, to_lang: str):
return f""" return f"""
You will receive a sequence of original text segments to be translated, represented in JSON format. The keys are segment IDs, and the values are the text content to be translated. You will receive a sequence of original text segments to be translated, represented in JSON format. The keys are segment IDs, and the values are the text content to be translated.
Here is the input: Here is the input:
<input> <input>
@@ -58,8 +61,8 @@ Below is an example of how merging should be done when necessary:
input: input:
```json ```json
{{ {{
"EXAMPLE_KEY_1":"汤姆说:杰克你", "EXAMPLE_KEY_1":"汤姆说:\"杰克你",
"EXAMPLE_KEY_2":"" "EXAMPLE_KEY_2":"\""
}} }}
``` ```
output: output:
@@ -92,6 +95,44 @@ def get_target_segments(result: str):
return result return result
def _chunk_to_mt_prompt(chunk: dict) -> str:
"""Convert a JSON chunk like {'0': 'text1', '1': 'text2'} to MT-friendly plain text."""
parts = []
for key in sorted(chunk.keys(), key=int):
parts.append(f"<<<SEG:{key}>>>\n{chunk[key]}")
return "\n".join(parts)
def _parse_mt_prompt_to_dict(mt_prompt: str) -> dict:
"""Parse an MT prompt string back to the original segment dict."""
result = {}
for match in MT_SEG_MARKER_RE.finditer(mt_prompt):
key = match.group(1)
value = match.group(2).strip()
result[key] = value
if not result:
# MT format parsing failed — wrap entire prompt as single segment
result = {"0": mt_prompt}
return result
def _parse_mt_response(text: str, original_chunk: dict) -> dict:
"""Parse MT plain-text response using <<<SEG:n>>> markers back to dict."""
result = {}
for match in MT_SEG_MARKER_RE.finditer(text):
key = match.group(1)
value = match.group(2).strip()
if key in original_chunk:
result[key] = value
# Fill missing keys from original
for key in original_chunk:
if key not in result:
result[key] = ""
return result
@dataclass(kw_only=True) @dataclass(kw_only=True)
class SegmentsTranslateAgentConfig(AgentConfig): class SegmentsTranslateAgentConfig(AgentConfig):
to_lang: str to_lang: str
@@ -123,20 +164,16 @@ class SegmentsTranslateAgent(Agent):
def _result_handler(self, result: str, origin_prompt: str, logger: Logger): def _result_handler(self, result: str, origin_prompt: str, logger: Logger):
""" """
处理成功的API响应。 处理成功的API响应。
- 如果键完全匹配,返回翻译结果 MT模式下使用 <<<SEG:n>>> 标记解析纯文本响应避免JSON格式不兼容问题
- 如果键不匹配,构造一个部分成功的结果,并通过 PartialTranslationError 异常抛出,以触发重试。
- 其他错误如JSON解析失败、模型偷懒则抛出普通 ValueError 触发重试。
- MT模式下如果返回的是纯文本而非JSON将其按行分割并映射到原始键。
""" """
# MT模式下直接解析origin_prompt为JSON纯净JSON没有<input>包装)
if self.is_mt_mode: if self.is_mt_mode:
original_segments = origin_prompt return self._result_handler_mt(result, origin_prompt, logger)
else:
original_segments = get_original_segments(origin_prompt) # --- Non-MT mode (JSON-based) ---
original_segments = get_original_segments(origin_prompt)
result = get_target_segments(result) result = get_target_segments(result)
if result == "": if result == "":
if original_segments.strip() != "": if original_segments.strip() != "":
# print(f"【测试】origin_prompt:\n{origin_prompt}\nresult:\n{result}")
raise AgentResultError("result为空值但原文不为空") raise AgentResultError("result为空值但原文不为空")
return {} return {}
try: try:
@@ -144,37 +181,6 @@ class SegmentsTranslateAgent(Agent):
original_chunk = json_repair.loads(original_segments) original_chunk = json_repair.loads(original_segments)
repaired_result = json_repair.loads(result) repaired_result = json_repair.loads(result)
# MT模式兼容处理各种非标准返回格式
if self.is_mt_mode:
# 如果是列表,尝试合并所有字典
if isinstance(repaired_result, list):
logger.debug(f"[MT模式] 返回结果是列表,包含 {len(repaired_result)} 个元素")
merged_result = {}
for item in repaired_result:
if isinstance(item, dict):
merged_result.update(item)
repaired_result = merged_result
# 如果返回的是纯文本(字符串),尝试将其映射到原始键
if isinstance(repaired_result, str):
original_keys = list(original_chunk.keys())
# 按行分割结果,去除空行
result_lines = [line.strip() for line in repaired_result.split('\n') if line.strip()]
# 如果只有一行结果但多个键,将整个结果分配给第一个键,其余为空
if len(result_lines) == 1 and len(original_keys) > 1:
repaired_result = {original_keys[0]: result_lines[0]}
for key in original_keys[1:]:
repaired_result[key] = ""
# 如果结果行数与键数匹配,逐行对应
elif len(result_lines) == len(original_keys):
repaired_result = {original_keys[i]: result_lines[i] for i in range(len(original_keys))}
# 如果结果行数不匹配,将所有结果合并给第一个键
else:
repaired_result = {original_keys[0]: repaired_result}
for key in original_keys[1:]:
repaired_result[key] = ""
if not isinstance(repaired_result, dict): if not isinstance(repaired_result, dict):
raise AgentResultError(f"Agent返回结果不是dict的json形式, result: {result}") raise AgentResultError(f"Agent返回结果不是dict的json形式, result: {result}")
@@ -184,9 +190,7 @@ class SegmentsTranslateAgent(Agent):
original_keys = set(original_chunk.keys()) original_keys = set(original_chunk.keys())
result_keys = set(repaired_result.keys()) result_keys = set(repaired_result.keys())
# 如果键不完全匹配
if original_keys != result_keys: if original_keys != result_keys:
# 仍然先构造一个最完整的“部分结果”
final_chunk = {} final_chunk = {}
common_keys = original_keys.intersection(result_keys) common_keys = original_keys.intersection(result_keys)
missing_keys = original_keys - result_keys missing_keys = original_keys - result_keys
@@ -201,74 +205,104 @@ class SegmentsTranslateAgent(Agent):
for key in missing_keys: for key in missing_keys:
final_chunk[key] = str(original_chunk[key]) final_chunk[key] = str(original_chunk[key])
raise PartialAgentResultError("键不匹配,触发重试", partial_result=final_chunk,
append_prompt=f"\nBe careful not to omit any keys from the input; do not combine sentences when translating.\n")
# 抛出自定义异常,将部分结果和错误信息一起传递出去
raise PartialAgentResultError("键不匹配,触发重试", partial_result=final_chunk,append_prompt=f"\nBe careful not to omit any keys from the input; do not combine sentences when translating.\n")
# 如果键完全匹配(理想情况),正常返回
for key, value in repaired_result.items(): for key, value in repaired_result.items():
repaired_result[key] = str(value) repaired_result[key] = str(value)
return repaired_result return repaired_result
except (RuntimeError, JSONDecodeError) as e: except (RuntimeError, JSONDecodeError) as e:
# MT模式兼容如果JSON解析失败尝试将结果作为纯文本处理
if self.is_mt_mode:
try:
original_chunk = json_repair.loads(original_segments)
original_keys = list(original_chunk.keys())
result_lines = [line.strip() for line in result.split('\n') if line.strip()]
if len(result_lines) == 1 and len(original_keys) > 1:
repaired_result = {original_keys[0]: result_lines[0]}
for key in original_keys[1:]:
repaired_result[key] = ""
elif len(result_lines) == len(original_keys):
repaired_result = {original_keys[i]: result_lines[i] for i in range(len(original_keys))}
else:
repaired_result = {original_keys[0]: result}
for key in original_keys[1:]:
repaired_result[key] = ""
# 验证结果
if set(repaired_result.keys()) != set(original_chunk.keys()):
raise AgentResultError(f"MT模式解析后键不匹配")
return repaired_result
except Exception as mt_e:
raise AgentResultError(f"MT模式纯文本处理失败: {mt_e.__repr__()}")
# 对于JSON解析等硬性错误继续抛出普通ValueError
raise AgentResultError(f"结果处理失败: {e.__repr__()}") raise AgentResultError(f"结果处理失败: {e.__repr__()}")
def _result_handler_mt(self, result: str, origin_prompt: str, logger: Logger) -> dict:
"""MT模式专用结果处理器解析 <<<SEG:n>>> 标记格式的纯文本响应。"""
result_clean = result.strip()
if result_clean == "":
if origin_prompt.strip() != "":
raise AgentResultError("result为空值但原文不为空")
return {}
original_chunk = _parse_mt_prompt_to_dict(origin_prompt)
original_keys = set(original_chunk.keys())
# Try parsing with <<<SEG:n>>> markers
parsed = _parse_mt_response(result_clean, original_chunk)
if parsed and any(v.strip() for v in parsed.values()):
result_keys = set(parsed.keys())
if result_keys == original_keys:
# Check if result is identical to original (no translation happened)
all_same = all(
parsed.get(k, "").strip() == str(original_chunk.get(k, "")).strip()
for k in original_keys
)
if all_same:
raise AgentResultError("翻译结果与原文完全相同,疑似翻译失败,将进行重试。")
return parsed
# If key mismatch, try as Partial result
if result_keys and result_keys != original_keys:
final_chunk = {}
for key in original_keys:
final_chunk[key] = parsed.get(key, str(original_chunk.get(key, "")))
raise PartialAgentResultError(
"MT模式键不匹配触发重试",
partial_result=final_chunk,
append_prompt="\nPreserve all <<<SEG:n>>> markers exactly as they appear.\n"
)
# Fallback: Try line-by-line mapping (MT model might have removed markers)
result_lines = [line.strip() for line in result_clean.split('\n') if line.strip()]
original_seg_list = [str(original_chunk.get(str(i), "")) for i in range(len(original_chunk))]
non_empty_lines = [l for l in result_lines if l]
if len(non_empty_lines) == len(original_chunk):
repaired = {str(i): non_empty_lines[i] for i in range(len(non_empty_lines))}
all_same = all(
repaired.get(k, "").strip() == str(original_chunk.get(k, "")).strip()
for k in original_keys
)
if all_same:
raise AgentResultError("翻译结果与原文完全相同(逐行),疑似翻译失败,将进行重试。")
return repaired
# Last fallback: assign all result text to first key
if non_empty_lines:
repaired = {str(i): "" for i in range(len(original_chunk))}
repaired["0"] = "\n".join(non_empty_lines)
return repaired
raise AgentResultError("MT模式无法解析响应")
def _error_result_handler(self, origin_prompt: str, logger: Logger): def _error_result_handler(self, origin_prompt: str, logger: Logger):
""" """
处理在所有重试后仍然失败的请求。 处理在所有重试后仍然失败的请求。
作为备用方案,返回原文内容,并将所有值转换为字符串 作为备用方案,返回原文内容。
""" """
# MT模式下直接解析origin_prompt为JSON纯净JSON没有<input>包装)
if self.is_mt_mode: if self.is_mt_mode:
original_segments = origin_prompt original_chunk = _parse_mt_prompt_to_dict(origin_prompt)
else: for key in list(original_chunk.keys()):
original_segments = get_original_segments(origin_prompt) original_chunk[key] = f"{original_chunk[key]}"
return original_chunk
original_segments = get_original_segments(origin_prompt)
if original_segments == "": if original_segments == "":
return {} return {}
try: try:
original_chunk = json_repair.loads(original_segments) original_chunk = json_repair.loads(original_segments)
# 此处逻辑保留,作为最终的兜底方案
for key, value in original_chunk.items(): for key, value in original_chunk.items():
original_chunk[key] = f"{value}" original_chunk[key] = f"{value}"
return original_chunk return original_chunk
except (RuntimeError, JSONDecodeError): except (RuntimeError, JSONDecodeError):
logger.error(f"原始prompt也不是有效的json格式: {original_segments}") logger.error(f"原始prompt也不是有效的json格式: {original_segments}")
# 如果原始prompt本身也无效返回一个清晰的错误对象
return {"error": f"{original_segments}"} return {"error": f"{original_segments}"}
def send_segments(self, segments: list[str], chunk_size: int) -> list[str]: def send_segments(self, segments: list[str], chunk_size: int) -> list[str]:
indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size) indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
# MT模式下直接发送纯净JSON不添加额外提示词
if self.is_mt_mode: if self.is_mt_mode:
prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks] prompts = [_chunk_to_mt_prompt(chunk) for chunk in chunks]
else: else:
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks] prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
translated_chunks = super().send_prompts(prompts=prompts, json_format=self.force_json, translated_chunks = super().send_prompts(prompts=prompts, json_format=self.force_json,
@@ -292,7 +326,6 @@ class SegmentsTranslateAgent(Agent):
except Exception as e: except Exception as e:
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}") self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
# 重建最终列表
result = [] result = []
last_end = 0 last_end = 0
ls = list(indexed_translated.values()) ls = list(indexed_translated.values())
@@ -308,9 +341,8 @@ class SegmentsTranslateAgent(Agent):
async def send_segments_async(self, segments: list[str], chunk_size: int) -> list[str]: async def send_segments_async(self, segments: list[str], chunk_size: int) -> list[str]:
indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments, indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments,
chunk_size) chunk_size)
# MT模式下直接发送纯净JSON不添加额外提示词
if self.is_mt_mode: if self.is_mt_mode:
prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks] prompts = [_chunk_to_mt_prompt(chunk) for chunk in chunks]
else: else:
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks] prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
@@ -326,7 +358,6 @@ class SegmentsTranslateAgent(Agent):
continue continue
for key, val in chunk.items(): for key, val in chunk.items():
if key in indexed_translated: if key in indexed_translated:
# 此处不再需要 str(val),因为 _result_handler 已经处理好了
indexed_translated[key] = val indexed_translated[key] = val
else: else:
self.logger.warning(f"在结果chunk中发现未知键 '{key}',已忽略。") self.logger.warning(f"在结果chunk中发现未知键 '{key}',已忽略。")
@@ -335,7 +366,6 @@ class SegmentsTranslateAgent(Agent):
except Exception as e: except Exception as e:
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}") self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
# 重建最终列表
result = [] result = []
last_end = 0 last_end = 0
ls = list(indexed_translated.values()) ls = list(indexed_translated.values())

File diff suppressed because one or more lines are too long

View File

@@ -1,5 +1,5 @@
<!DOCTYPE html> <!DOCTYPE html>
<html lang="zh-CN" data-bs-theme="auto"> <html lang="en" data-bs-theme="auto">
<head> <head>
<meta charset="UTF-8"> <meta charset="UTF-8">
<meta name="viewport" content="width=device-width, initial-scale=1.0"> <meta name="viewport" content="width=device-width, initial-scale=1.0">
@@ -159,15 +159,6 @@
white-space: pre; white-space: pre;
} }
.bottom-left-controls {
position: fixed;
bottom: 1rem;
left: 1rem;
z-index: 1050;
display: flex;
gap: 0.5rem;
}
.step-number { .step-number {
margin-right: 0.25rem; margin-right: 0.25rem;
} }
@@ -226,6 +217,31 @@
<div class="d-flex align-items-center"> <div class="d-flex align-items-center">
<h4 class="mb-0 me-3 fw-bold" :title="t('pageTitle')">DocuTranslate</h4> <h4 class="mb-0 me-3 fw-bold" :title="t('pageTitle')">DocuTranslate</h4>
</div> </div>
<!-- Language & Theme Controls -->
<div class="d-flex gap-2">
<div class="dropdown">
<button class="btn btn-outline-secondary btn-sm dropdown-toggle" type="button" data-bs-toggle="dropdown">
<i class="bi bi-translate me-1"></i><span>{{ {zh:'中文',en:'English',id:'Bahasa'}[currentLang] || 'Language' }}</span>
</button>
<ul class="dropdown-menu dropdown-menu-end">
<li><a class="dropdown-item" :class="{active: currentLang==='zh'}" href="#"
@click.prevent="setLang('zh')">中文</a></li>
<li><a class="dropdown-item" :class="{active: currentLang==='en'}" href="#"
@click.prevent="setLang('en')">English</a></li>
<li><a class="dropdown-item" :class="{active: currentLang==='id'}" href="#"
@click.prevent="setLang('id')">Bahasa Indonesia</a></li>
</ul>
</div>
<div class="dropdown">
<button class="btn btn-outline-secondary btn-sm dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
class="bi bi-circle-half"></i></button>
<ul class="dropdown-menu dropdown-menu-end">
<li><button class="dropdown-item" @click="setTheme('light')"><i class="bi bi-sun-fill me-2"></i>Light</button></li>
<li><button class="dropdown-item" @click="setTheme('dark')"><i class="bi bi-moon-stars-fill me-2"></i>Dark</button></li>
<li><button class="dropdown-item" @click="setTheme('auto')"><i class="bi bi-circle-half me-2"></i>Auto</button></li>
</ul>
</div>
</div>
</div> </div>
<form id="translateForm" @submit.prevent> <form id="translateForm" @submit.prevent>
@@ -923,40 +939,7 @@
</div> </div>
<iframe id="printFrame" ref="printFrame" style="display: none;"></iframe> <iframe id="printFrame" ref="printFrame" style="display: none;"></iframe>
<!-- Controls --> <!-- Header controls now in left panel top-right -->
<div class="bottom-left-controls">
<div class="dropdown">
<button class="btn btn-secondary dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
class="bi bi-translate"></i></button>
<ul class="dropdown-menu">
<li><a class="dropdown-item" :class="{active: currentLang==='zh'}" href="#"
@click.prevent="setLang('zh')">中文</a></li>
<li><a class="dropdown-item" :class="{active: currentLang==='en'}" href="#"
@click.prevent="setLang('en')">English</a></li>
<li><a class="dropdown-item" :class="{active: currentLang==='vi'}" href="#"
@click.prevent="setLang('vi')">Tiếng Việt</a></li>
</ul>
</div>
<div class="dropdown">
<button class="btn btn-secondary dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
class="bi bi-circle-half"></i></button>
<ul class="dropdown-menu">
<li>
<button class="dropdown-item" @click="setTheme('light')"><i class="bi bi-sun-fill me-2"></i> Light
</button>
</li>
<li>
<button class="dropdown-item" @click="setTheme('dark')"><i class="bi bi-moon-stars-fill me-2"></i>
Dark
</button>
</li>
<li>
<button class="dropdown-item" @click="setTheme('auto')"><i class="bi bi-circle-half me-2"></i> Auto
</button>
</li>
</ul>
</div>
</div>
</div> </div>
<script src="/static/bootstrap.bundle.min.js"></script> <script src="/static/bootstrap.bundle.min.js"></script>
@@ -1048,7 +1031,14 @@
components: {SliderControl, ModelPresetSelector}, components: {SliderControl, ModelPresetSelector},
setup() { setup() {
const version = ref(""); const version = ref("");
const currentLang = ref(localStorage.getItem('ui_language') || 'zh'); function detectBrowserLang() {
const nav = navigator.language || navigator.userLanguage || '';
const lang = nav.split('-')[0].toLowerCase();
if (['zh', 'en', 'id'].includes(lang)) return lang;
if (lang === 'zh') return 'zh';
return 'en'; // default to English for unrecognized languages
}
const currentLang = ref(localStorage.getItem('ui_language') || detectBrowserLang());
const i18nData = ref({}); const i18nData = ref({});
const glossaryData = ref({}); const glossaryData = ref({});
const tasks = ref([]); const tasks = ref([]);
@@ -1868,7 +1858,8 @@
const setLang = (l) => { const setLang = (l) => {
currentLang.value = l; currentLang.value = l;
localStorage.setItem('ui_language', l); localStorage.setItem('ui_language', l);
document.documentElement.lang = l === 'zh' ? 'zh-CN' : 'en'; const langMap = {zh: 'zh-CN', en: 'en', id: 'id'};
document.documentElement.lang = langMap[l] || 'en';
}; };
const setTheme = (t) => { const setTheme = (t) => {
localStorage.setItem('theme', t); localStorage.setItem('theme', t);

View File

@@ -326,33 +326,58 @@ class DocxTranslator(AiTranslator):
runs = element_info["runs"] runs = element_info["runs"]
if not runs: return if not runs: return
first_real_run_index = -1 # Filter to runs that are still attached to the document
# 找到第一个可以写入文本的run valid_runs = []
for i, run in enumerate(runs): for run in runs:
if run.element.getparent() is not None: if run.element.getparent() is not None:
# 如果 run 是副本的一部分,其 _parent 可能仍然指向原始文档的段落
# 但我们需要确保它与 element_info["paragraph"] 同步
run._parent = element_info["paragraph"] run._parent = element_info["paragraph"]
run.text = final_text valid_runs.append(run)
first_real_run_index = i
break
# 如果没有找到有效的run例如它们都已被删除则记录警告 if not valid_runs:
if first_real_run_index == -1:
self.logger.warning(f"无法应用翻译 '{final_text}'因为找不到有效的run。") self.logger.warning(f"无法应用翻译 '{final_text}'因为找不到有效的run。")
return return
# 删除所有后续的run因为它们的文本已经被合并到第一个run中了 if len(valid_runs) == 1:
for i in range(first_real_run_index + 1, len(runs)): # Single run: just write the translation
run = runs[i] valid_runs[0].text = final_text
parent_element = run.element.getparent() return
if parent_element is not None:
try: # Multiple runs: proportionally distribute translated text to preserve formatting
parent_element.remove(run.element) orig_lengths = [len(r.text) for r in valid_runs]
except ValueError: total_orig = sum(orig_lengths)
# 在某些复杂情况下一个run可能已经被其父元素隐式删除 final_len = len(final_text)
self.logger.debug(f"尝试删除一个不存在的run元素。这通常是安全的。")
pass if total_orig == 0:
valid_runs[0].text = final_text
for run in valid_runs[1:]:
self._remove_run_element(run)
return
# Distribute characters proportionally
char_pos = 0
for i, run in enumerate(valid_runs):
if i == len(valid_runs) - 1:
# Last run gets all remaining text
run.text = final_text[char_pos:]
else:
ratio = orig_lengths[i] / total_orig
run_char_count = max(1, round(final_len * ratio))
run_char_count = min(run_char_count, final_len - char_pos - (len(valid_runs) - i - 1))
if run_char_count <= 0:
# Remove runs that would get zero characters
self._remove_run_element(run)
continue
run.text = final_text[char_pos:char_pos + run_char_count]
char_pos += run_char_count
def _remove_run_element(self, run) -> None:
"""Safely remove a run element from its parent."""
parent_element = run.element.getparent()
if parent_element is not None:
try:
parent_element.remove(run.element)
except ValueError:
self.logger.debug(f"尝试删除一个不存在的run元素。这通常是安全的。")
# ---------- FIX START: 新增用于清理副本段落的辅助方法 ---------- # ---------- FIX START: 新增用于清理副本段落的辅助方法 ----------
def _prune_unwanted_elements_from_copy(self, p_element: OxmlElement): def _prune_unwanted_elements_from_copy(self, p_element: OxmlElement):