fix: MT模式翻译残留、docx格式丢失、语言切换器及provider域名修复
- provider.py: 域名匹配改为包含匹配,覆盖dashscope-intl国际站 - segments_agent.py: MT模式改用<<<SEG:n>>>纯文本标记替代JSON,避免qwen-mt模型原文残留 - docx_translator.py: _apply_translation改为按字符比例分配译文到各Run,保留原始格式 - i18nData.json: vi(越南语)替换为id(印尼语),含完整175键翻译 - index.html: 语言切换器移至顶部标题栏,新增浏览器语言自动检测 Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
This commit is contained in:
@@ -5,7 +5,7 @@ ProviderType: TypeAlias = Literal["ollama", "bigmodel", "aliyuncs", "volces", "g
|
|||||||
def get_provider_by_domain(domain:str)->ProviderType:
|
def get_provider_by_domain(domain:str)->ProviderType:
|
||||||
if domain == "open.bigmodel.cn":
|
if domain == "open.bigmodel.cn":
|
||||||
return "bigmodel"
|
return "bigmodel"
|
||||||
elif domain == "dashscope.aliyuncs.com":
|
elif "dashscope.aliyuncs.com" in domain:
|
||||||
return "aliyuncs"
|
return "aliyuncs"
|
||||||
elif domain == "ark.cn-beijing.volces.com":
|
elif domain == "ark.cn-beijing.volces.com":
|
||||||
return "volces"
|
return "volces"
|
||||||
|
|||||||
@@ -15,6 +15,9 @@ from docutranslate.agents.agent import PartialAgentResultError, AgentResultError
|
|||||||
from docutranslate.glossary.glossary import Glossary
|
from docutranslate.glossary.glossary import Glossary
|
||||||
from docutranslate.utils.json_utils import segments2json_chunks, fix_json_string
|
from docutranslate.utils.json_utils import segments2json_chunks, fix_json_string
|
||||||
|
|
||||||
|
# MT mode plain-text segment marker — designed to survive machine translation unchanged
|
||||||
|
MT_SEG_MARKER_RE = re.compile(r'<<<SEG:(\d+)>>>\s*\n(.*?)(?=<<<SEG:\d+>>>|\Z)', re.DOTALL)
|
||||||
|
|
||||||
|
|
||||||
def generate_prompt(json_segments: str, to_lang: str):
|
def generate_prompt(json_segments: str, to_lang: str):
|
||||||
return f"""
|
return f"""
|
||||||
@@ -58,8 +61,8 @@ Below is an example of how merging should be done when necessary:
|
|||||||
input:
|
input:
|
||||||
```json
|
```json
|
||||||
{{
|
{{
|
||||||
"EXAMPLE_KEY_1":"汤姆说:“杰克你",
|
"EXAMPLE_KEY_1":"汤姆说:\"杰克你",
|
||||||
"EXAMPLE_KEY_2":"好”。"
|
"EXAMPLE_KEY_2":"好\"。"
|
||||||
}}
|
}}
|
||||||
```
|
```
|
||||||
output:
|
output:
|
||||||
@@ -92,6 +95,44 @@ def get_target_segments(result: str):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _chunk_to_mt_prompt(chunk: dict) -> str:
|
||||||
|
"""Convert a JSON chunk like {'0': 'text1', '1': 'text2'} to MT-friendly plain text."""
|
||||||
|
parts = []
|
||||||
|
for key in sorted(chunk.keys(), key=int):
|
||||||
|
parts.append(f"<<<SEG:{key}>>>\n{chunk[key]}")
|
||||||
|
return "\n".join(parts)
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_mt_prompt_to_dict(mt_prompt: str) -> dict:
|
||||||
|
"""Parse an MT prompt string back to the original segment dict."""
|
||||||
|
result = {}
|
||||||
|
for match in MT_SEG_MARKER_RE.finditer(mt_prompt):
|
||||||
|
key = match.group(1)
|
||||||
|
value = match.group(2).strip()
|
||||||
|
result[key] = value
|
||||||
|
if not result:
|
||||||
|
# MT format parsing failed — wrap entire prompt as single segment
|
||||||
|
result = {"0": mt_prompt}
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_mt_response(text: str, original_chunk: dict) -> dict:
|
||||||
|
"""Parse MT plain-text response using <<<SEG:n>>> markers back to dict."""
|
||||||
|
result = {}
|
||||||
|
for match in MT_SEG_MARKER_RE.finditer(text):
|
||||||
|
key = match.group(1)
|
||||||
|
value = match.group(2).strip()
|
||||||
|
if key in original_chunk:
|
||||||
|
result[key] = value
|
||||||
|
|
||||||
|
# Fill missing keys from original
|
||||||
|
for key in original_chunk:
|
||||||
|
if key not in result:
|
||||||
|
result[key] = ""
|
||||||
|
|
||||||
|
return result
|
||||||
|
|
||||||
|
|
||||||
@dataclass(kw_only=True)
|
@dataclass(kw_only=True)
|
||||||
class SegmentsTranslateAgentConfig(AgentConfig):
|
class SegmentsTranslateAgentConfig(AgentConfig):
|
||||||
to_lang: str
|
to_lang: str
|
||||||
@@ -123,20 +164,16 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
def _result_handler(self, result: str, origin_prompt: str, logger: Logger):
|
def _result_handler(self, result: str, origin_prompt: str, logger: Logger):
|
||||||
"""
|
"""
|
||||||
处理成功的API响应。
|
处理成功的API响应。
|
||||||
- 如果键完全匹配,返回翻译结果。
|
MT模式下使用 <<<SEG:n>>> 标记解析纯文本响应,避免JSON格式不兼容问题。
|
||||||
- 如果键不匹配,构造一个部分成功的结果,并通过 PartialTranslationError 异常抛出,以触发重试。
|
|
||||||
- 其他错误(如JSON解析失败、模型偷懒)则抛出普通 ValueError 触发重试。
|
|
||||||
- MT模式下,如果返回的是纯文本而非JSON,将其按行分割并映射到原始键。
|
|
||||||
"""
|
"""
|
||||||
# MT模式下直接解析origin_prompt为JSON(纯净JSON,没有<input>包装)
|
|
||||||
if self.is_mt_mode:
|
if self.is_mt_mode:
|
||||||
original_segments = origin_prompt
|
return self._result_handler_mt(result, origin_prompt, logger)
|
||||||
else:
|
|
||||||
original_segments = get_original_segments(origin_prompt)
|
# --- Non-MT mode (JSON-based) ---
|
||||||
|
original_segments = get_original_segments(origin_prompt)
|
||||||
result = get_target_segments(result)
|
result = get_target_segments(result)
|
||||||
if result == "":
|
if result == "":
|
||||||
if original_segments.strip() != "":
|
if original_segments.strip() != "":
|
||||||
# print(f"【测试】origin_prompt:\n{origin_prompt}\nresult:\n{result}")
|
|
||||||
raise AgentResultError("result为空值但原文不为空")
|
raise AgentResultError("result为空值但原文不为空")
|
||||||
return {}
|
return {}
|
||||||
try:
|
try:
|
||||||
@@ -144,37 +181,6 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
original_chunk = json_repair.loads(original_segments)
|
original_chunk = json_repair.loads(original_segments)
|
||||||
repaired_result = json_repair.loads(result)
|
repaired_result = json_repair.loads(result)
|
||||||
|
|
||||||
# MT模式兼容:处理各种非标准返回格式
|
|
||||||
if self.is_mt_mode:
|
|
||||||
# 如果是列表,尝试合并所有字典
|
|
||||||
if isinstance(repaired_result, list):
|
|
||||||
logger.debug(f"[MT模式] 返回结果是列表,包含 {len(repaired_result)} 个元素")
|
|
||||||
merged_result = {}
|
|
||||||
for item in repaired_result:
|
|
||||||
if isinstance(item, dict):
|
|
||||||
merged_result.update(item)
|
|
||||||
repaired_result = merged_result
|
|
||||||
|
|
||||||
# 如果返回的是纯文本(字符串),尝试将其映射到原始键
|
|
||||||
if isinstance(repaired_result, str):
|
|
||||||
original_keys = list(original_chunk.keys())
|
|
||||||
# 按行分割结果,去除空行
|
|
||||||
result_lines = [line.strip() for line in repaired_result.split('\n') if line.strip()]
|
|
||||||
|
|
||||||
# 如果只有一行结果但多个键,将整个结果分配给第一个键,其余为空
|
|
||||||
if len(result_lines) == 1 and len(original_keys) > 1:
|
|
||||||
repaired_result = {original_keys[0]: result_lines[0]}
|
|
||||||
for key in original_keys[1:]:
|
|
||||||
repaired_result[key] = ""
|
|
||||||
# 如果结果行数与键数匹配,逐行对应
|
|
||||||
elif len(result_lines) == len(original_keys):
|
|
||||||
repaired_result = {original_keys[i]: result_lines[i] for i in range(len(original_keys))}
|
|
||||||
# 如果结果行数不匹配,将所有结果合并给第一个键
|
|
||||||
else:
|
|
||||||
repaired_result = {original_keys[0]: repaired_result}
|
|
||||||
for key in original_keys[1:]:
|
|
||||||
repaired_result[key] = ""
|
|
||||||
|
|
||||||
if not isinstance(repaired_result, dict):
|
if not isinstance(repaired_result, dict):
|
||||||
raise AgentResultError(f"Agent返回结果不是dict的json形式, result: {result}")
|
raise AgentResultError(f"Agent返回结果不是dict的json形式, result: {result}")
|
||||||
|
|
||||||
@@ -184,9 +190,7 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
original_keys = set(original_chunk.keys())
|
original_keys = set(original_chunk.keys())
|
||||||
result_keys = set(repaired_result.keys())
|
result_keys = set(repaired_result.keys())
|
||||||
|
|
||||||
# 如果键不完全匹配
|
|
||||||
if original_keys != result_keys:
|
if original_keys != result_keys:
|
||||||
# 仍然先构造一个最完整的“部分结果”
|
|
||||||
final_chunk = {}
|
final_chunk = {}
|
||||||
common_keys = original_keys.intersection(result_keys)
|
common_keys = original_keys.intersection(result_keys)
|
||||||
missing_keys = original_keys - result_keys
|
missing_keys = original_keys - result_keys
|
||||||
@@ -201,74 +205,104 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
for key in missing_keys:
|
for key in missing_keys:
|
||||||
final_chunk[key] = str(original_chunk[key])
|
final_chunk[key] = str(original_chunk[key])
|
||||||
|
|
||||||
|
raise PartialAgentResultError("键不匹配,触发重试", partial_result=final_chunk,
|
||||||
|
append_prompt=f"\nBe careful not to omit any keys from the input; do not combine sentences when translating.\n")
|
||||||
|
|
||||||
# 抛出自定义异常,将部分结果和错误信息一起传递出去
|
|
||||||
raise PartialAgentResultError("键不匹配,触发重试", partial_result=final_chunk,append_prompt=f"\nBe careful not to omit any keys from the input; do not combine sentences when translating.\n")
|
|
||||||
|
|
||||||
# 如果键完全匹配(理想情况),正常返回
|
|
||||||
for key, value in repaired_result.items():
|
for key, value in repaired_result.items():
|
||||||
repaired_result[key] = str(value)
|
repaired_result[key] = str(value)
|
||||||
|
|
||||||
return repaired_result
|
return repaired_result
|
||||||
|
|
||||||
except (RuntimeError, JSONDecodeError) as e:
|
except (RuntimeError, JSONDecodeError) as e:
|
||||||
# MT模式兼容:如果JSON解析失败,尝试将结果作为纯文本处理
|
|
||||||
if self.is_mt_mode:
|
|
||||||
try:
|
|
||||||
original_chunk = json_repair.loads(original_segments)
|
|
||||||
original_keys = list(original_chunk.keys())
|
|
||||||
result_lines = [line.strip() for line in result.split('\n') if line.strip()]
|
|
||||||
|
|
||||||
if len(result_lines) == 1 and len(original_keys) > 1:
|
|
||||||
repaired_result = {original_keys[0]: result_lines[0]}
|
|
||||||
for key in original_keys[1:]:
|
|
||||||
repaired_result[key] = ""
|
|
||||||
elif len(result_lines) == len(original_keys):
|
|
||||||
repaired_result = {original_keys[i]: result_lines[i] for i in range(len(original_keys))}
|
|
||||||
else:
|
|
||||||
repaired_result = {original_keys[0]: result}
|
|
||||||
for key in original_keys[1:]:
|
|
||||||
repaired_result[key] = ""
|
|
||||||
|
|
||||||
# 验证结果
|
|
||||||
if set(repaired_result.keys()) != set(original_chunk.keys()):
|
|
||||||
raise AgentResultError(f"MT模式解析后键不匹配")
|
|
||||||
|
|
||||||
return repaired_result
|
|
||||||
except Exception as mt_e:
|
|
||||||
raise AgentResultError(f"MT模式纯文本处理失败: {mt_e.__repr__()}")
|
|
||||||
|
|
||||||
# 对于JSON解析等硬性错误,继续抛出普通ValueError
|
|
||||||
raise AgentResultError(f"结果处理失败: {e.__repr__()}")
|
raise AgentResultError(f"结果处理失败: {e.__repr__()}")
|
||||||
|
|
||||||
|
def _result_handler_mt(self, result: str, origin_prompt: str, logger: Logger) -> dict:
|
||||||
|
"""MT模式专用结果处理器:解析 <<<SEG:n>>> 标记格式的纯文本响应。"""
|
||||||
|
result_clean = result.strip()
|
||||||
|
if result_clean == "":
|
||||||
|
if origin_prompt.strip() != "":
|
||||||
|
raise AgentResultError("result为空值但原文不为空")
|
||||||
|
return {}
|
||||||
|
|
||||||
|
original_chunk = _parse_mt_prompt_to_dict(origin_prompt)
|
||||||
|
original_keys = set(original_chunk.keys())
|
||||||
|
|
||||||
|
# Try parsing with <<<SEG:n>>> markers
|
||||||
|
parsed = _parse_mt_response(result_clean, original_chunk)
|
||||||
|
|
||||||
|
if parsed and any(v.strip() for v in parsed.values()):
|
||||||
|
result_keys = set(parsed.keys())
|
||||||
|
if result_keys == original_keys:
|
||||||
|
# Check if result is identical to original (no translation happened)
|
||||||
|
all_same = all(
|
||||||
|
parsed.get(k, "").strip() == str(original_chunk.get(k, "")).strip()
|
||||||
|
for k in original_keys
|
||||||
|
)
|
||||||
|
if all_same:
|
||||||
|
raise AgentResultError("翻译结果与原文完全相同,疑似翻译失败,将进行重试。")
|
||||||
|
return parsed
|
||||||
|
|
||||||
|
# If key mismatch, try as Partial result
|
||||||
|
if result_keys and result_keys != original_keys:
|
||||||
|
final_chunk = {}
|
||||||
|
for key in original_keys:
|
||||||
|
final_chunk[key] = parsed.get(key, str(original_chunk.get(key, "")))
|
||||||
|
raise PartialAgentResultError(
|
||||||
|
"MT模式键不匹配,触发重试",
|
||||||
|
partial_result=final_chunk,
|
||||||
|
append_prompt="\nPreserve all <<<SEG:n>>> markers exactly as they appear.\n"
|
||||||
|
)
|
||||||
|
|
||||||
|
# Fallback: Try line-by-line mapping (MT model might have removed markers)
|
||||||
|
result_lines = [line.strip() for line in result_clean.split('\n') if line.strip()]
|
||||||
|
original_seg_list = [str(original_chunk.get(str(i), "")) for i in range(len(original_chunk))]
|
||||||
|
|
||||||
|
non_empty_lines = [l for l in result_lines if l]
|
||||||
|
if len(non_empty_lines) == len(original_chunk):
|
||||||
|
repaired = {str(i): non_empty_lines[i] for i in range(len(non_empty_lines))}
|
||||||
|
all_same = all(
|
||||||
|
repaired.get(k, "").strip() == str(original_chunk.get(k, "")).strip()
|
||||||
|
for k in original_keys
|
||||||
|
)
|
||||||
|
if all_same:
|
||||||
|
raise AgentResultError("翻译结果与原文完全相同(逐行),疑似翻译失败,将进行重试。")
|
||||||
|
return repaired
|
||||||
|
|
||||||
|
# Last fallback: assign all result text to first key
|
||||||
|
if non_empty_lines:
|
||||||
|
repaired = {str(i): "" for i in range(len(original_chunk))}
|
||||||
|
repaired["0"] = "\n".join(non_empty_lines)
|
||||||
|
return repaired
|
||||||
|
|
||||||
|
raise AgentResultError("MT模式无法解析响应")
|
||||||
|
|
||||||
def _error_result_handler(self, origin_prompt: str, logger: Logger):
|
def _error_result_handler(self, origin_prompt: str, logger: Logger):
|
||||||
"""
|
"""
|
||||||
处理在所有重试后仍然失败的请求。
|
处理在所有重试后仍然失败的请求。
|
||||||
作为备用方案,返回原文内容,并将所有值转换为字符串。
|
作为备用方案,返回原文内容。
|
||||||
"""
|
"""
|
||||||
# MT模式下直接解析origin_prompt为JSON(纯净JSON,没有<input>包装)
|
|
||||||
if self.is_mt_mode:
|
if self.is_mt_mode:
|
||||||
original_segments = origin_prompt
|
original_chunk = _parse_mt_prompt_to_dict(origin_prompt)
|
||||||
else:
|
for key in list(original_chunk.keys()):
|
||||||
original_segments = get_original_segments(origin_prompt)
|
original_chunk[key] = f"{original_chunk[key]}"
|
||||||
|
return original_chunk
|
||||||
|
|
||||||
|
original_segments = get_original_segments(origin_prompt)
|
||||||
if original_segments == "":
|
if original_segments == "":
|
||||||
return {}
|
return {}
|
||||||
try:
|
try:
|
||||||
original_chunk = json_repair.loads(original_segments)
|
original_chunk = json_repair.loads(original_segments)
|
||||||
# 此处逻辑保留,作为最终的兜底方案
|
|
||||||
for key, value in original_chunk.items():
|
for key, value in original_chunk.items():
|
||||||
original_chunk[key] = f"{value}"
|
original_chunk[key] = f"{value}"
|
||||||
return original_chunk
|
return original_chunk
|
||||||
except (RuntimeError, JSONDecodeError):
|
except (RuntimeError, JSONDecodeError):
|
||||||
logger.error(f"原始prompt也不是有效的json格式: {original_segments}")
|
logger.error(f"原始prompt也不是有效的json格式: {original_segments}")
|
||||||
# 如果原始prompt本身也无效,返回一个清晰的错误对象
|
|
||||||
return {"error": f"{original_segments}"}
|
return {"error": f"{original_segments}"}
|
||||||
|
|
||||||
def send_segments(self, segments: list[str], chunk_size: int) -> list[str]:
|
def send_segments(self, segments: list[str], chunk_size: int) -> list[str]:
|
||||||
indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
|
indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
|
||||||
# MT模式下直接发送纯净JSON,不添加额外提示词
|
|
||||||
if self.is_mt_mode:
|
if self.is_mt_mode:
|
||||||
prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks]
|
prompts = [_chunk_to_mt_prompt(chunk) for chunk in chunks]
|
||||||
else:
|
else:
|
||||||
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
|
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
|
||||||
translated_chunks = super().send_prompts(prompts=prompts, json_format=self.force_json,
|
translated_chunks = super().send_prompts(prompts=prompts, json_format=self.force_json,
|
||||||
@@ -292,7 +326,6 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
|
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
|
||||||
|
|
||||||
# 重建最终列表
|
|
||||||
result = []
|
result = []
|
||||||
last_end = 0
|
last_end = 0
|
||||||
ls = list(indexed_translated.values())
|
ls = list(indexed_translated.values())
|
||||||
@@ -308,9 +341,8 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
async def send_segments_async(self, segments: list[str], chunk_size: int) -> list[str]:
|
async def send_segments_async(self, segments: list[str], chunk_size: int) -> list[str]:
|
||||||
indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments,
|
indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments,
|
||||||
chunk_size)
|
chunk_size)
|
||||||
# MT模式下直接发送纯净JSON,不添加额外提示词
|
|
||||||
if self.is_mt_mode:
|
if self.is_mt_mode:
|
||||||
prompts = [json.dumps(chunk, ensure_ascii=False, indent=0) for chunk in chunks]
|
prompts = [_chunk_to_mt_prompt(chunk) for chunk in chunks]
|
||||||
else:
|
else:
|
||||||
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
|
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
|
||||||
|
|
||||||
@@ -326,7 +358,6 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
continue
|
continue
|
||||||
for key, val in chunk.items():
|
for key, val in chunk.items():
|
||||||
if key in indexed_translated:
|
if key in indexed_translated:
|
||||||
# 此处不再需要 str(val),因为 _result_handler 已经处理好了
|
|
||||||
indexed_translated[key] = val
|
indexed_translated[key] = val
|
||||||
else:
|
else:
|
||||||
self.logger.warning(f"在结果chunk中发现未知键 '{key}',已忽略。")
|
self.logger.warning(f"在结果chunk中发现未知键 '{key}',已忽略。")
|
||||||
@@ -335,7 +366,6 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
|
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
|
||||||
|
|
||||||
# 重建最终列表
|
|
||||||
result = []
|
result = []
|
||||||
last_end = 0
|
last_end = 0
|
||||||
ls = list(indexed_translated.values())
|
ls = list(indexed_translated.values())
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1,5 +1,5 @@
|
|||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="zh-CN" data-bs-theme="auto">
|
<html lang="en" data-bs-theme="auto">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
@@ -159,15 +159,6 @@
|
|||||||
white-space: pre;
|
white-space: pre;
|
||||||
}
|
}
|
||||||
|
|
||||||
.bottom-left-controls {
|
|
||||||
position: fixed;
|
|
||||||
bottom: 1rem;
|
|
||||||
left: 1rem;
|
|
||||||
z-index: 1050;
|
|
||||||
display: flex;
|
|
||||||
gap: 0.5rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.step-number {
|
.step-number {
|
||||||
margin-right: 0.25rem;
|
margin-right: 0.25rem;
|
||||||
}
|
}
|
||||||
@@ -226,6 +217,31 @@
|
|||||||
<div class="d-flex align-items-center">
|
<div class="d-flex align-items-center">
|
||||||
<h4 class="mb-0 me-3 fw-bold" :title="t('pageTitle')">DocuTranslate</h4>
|
<h4 class="mb-0 me-3 fw-bold" :title="t('pageTitle')">DocuTranslate</h4>
|
||||||
</div>
|
</div>
|
||||||
|
<!-- Language & Theme Controls -->
|
||||||
|
<div class="d-flex gap-2">
|
||||||
|
<div class="dropdown">
|
||||||
|
<button class="btn btn-outline-secondary btn-sm dropdown-toggle" type="button" data-bs-toggle="dropdown">
|
||||||
|
<i class="bi bi-translate me-1"></i><span>{{ {zh:'中文',en:'English',id:'Bahasa'}[currentLang] || 'Language' }}</span>
|
||||||
|
</button>
|
||||||
|
<ul class="dropdown-menu dropdown-menu-end">
|
||||||
|
<li><a class="dropdown-item" :class="{active: currentLang==='zh'}" href="#"
|
||||||
|
@click.prevent="setLang('zh')">中文</a></li>
|
||||||
|
<li><a class="dropdown-item" :class="{active: currentLang==='en'}" href="#"
|
||||||
|
@click.prevent="setLang('en')">English</a></li>
|
||||||
|
<li><a class="dropdown-item" :class="{active: currentLang==='id'}" href="#"
|
||||||
|
@click.prevent="setLang('id')">Bahasa Indonesia</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="dropdown">
|
||||||
|
<button class="btn btn-outline-secondary btn-sm dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
|
||||||
|
class="bi bi-circle-half"></i></button>
|
||||||
|
<ul class="dropdown-menu dropdown-menu-end">
|
||||||
|
<li><button class="dropdown-item" @click="setTheme('light')"><i class="bi bi-sun-fill me-2"></i>Light</button></li>
|
||||||
|
<li><button class="dropdown-item" @click="setTheme('dark')"><i class="bi bi-moon-stars-fill me-2"></i>Dark</button></li>
|
||||||
|
<li><button class="dropdown-item" @click="setTheme('auto')"><i class="bi bi-circle-half me-2"></i>Auto</button></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<form id="translateForm" @submit.prevent>
|
<form id="translateForm" @submit.prevent>
|
||||||
@@ -923,40 +939,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<iframe id="printFrame" ref="printFrame" style="display: none;"></iframe>
|
<iframe id="printFrame" ref="printFrame" style="display: none;"></iframe>
|
||||||
|
|
||||||
<!-- Controls -->
|
<!-- Header controls now in left panel top-right -->
|
||||||
<div class="bottom-left-controls">
|
|
||||||
<div class="dropdown">
|
|
||||||
<button class="btn btn-secondary dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
|
|
||||||
class="bi bi-translate"></i></button>
|
|
||||||
<ul class="dropdown-menu">
|
|
||||||
<li><a class="dropdown-item" :class="{active: currentLang==='zh'}" href="#"
|
|
||||||
@click.prevent="setLang('zh')">中文</a></li>
|
|
||||||
<li><a class="dropdown-item" :class="{active: currentLang==='en'}" href="#"
|
|
||||||
@click.prevent="setLang('en')">English</a></li>
|
|
||||||
<li><a class="dropdown-item" :class="{active: currentLang==='vi'}" href="#"
|
|
||||||
@click.prevent="setLang('vi')">Tiếng Việt</a></li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
<div class="dropdown">
|
|
||||||
<button class="btn btn-secondary dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
|
|
||||||
class="bi bi-circle-half"></i></button>
|
|
||||||
<ul class="dropdown-menu">
|
|
||||||
<li>
|
|
||||||
<button class="dropdown-item" @click="setTheme('light')"><i class="bi bi-sun-fill me-2"></i> Light
|
|
||||||
</button>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<button class="dropdown-item" @click="setTheme('dark')"><i class="bi bi-moon-stars-fill me-2"></i>
|
|
||||||
Dark
|
|
||||||
</button>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<button class="dropdown-item" @click="setTheme('auto')"><i class="bi bi-circle-half me-2"></i> Auto
|
|
||||||
</button>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script src="/static/bootstrap.bundle.min.js"></script>
|
<script src="/static/bootstrap.bundle.min.js"></script>
|
||||||
@@ -1048,7 +1031,14 @@
|
|||||||
components: {SliderControl, ModelPresetSelector},
|
components: {SliderControl, ModelPresetSelector},
|
||||||
setup() {
|
setup() {
|
||||||
const version = ref("");
|
const version = ref("");
|
||||||
const currentLang = ref(localStorage.getItem('ui_language') || 'zh');
|
function detectBrowserLang() {
|
||||||
|
const nav = navigator.language || navigator.userLanguage || '';
|
||||||
|
const lang = nav.split('-')[0].toLowerCase();
|
||||||
|
if (['zh', 'en', 'id'].includes(lang)) return lang;
|
||||||
|
if (lang === 'zh') return 'zh';
|
||||||
|
return 'en'; // default to English for unrecognized languages
|
||||||
|
}
|
||||||
|
const currentLang = ref(localStorage.getItem('ui_language') || detectBrowserLang());
|
||||||
const i18nData = ref({});
|
const i18nData = ref({});
|
||||||
const glossaryData = ref({});
|
const glossaryData = ref({});
|
||||||
const tasks = ref([]);
|
const tasks = ref([]);
|
||||||
@@ -1868,7 +1858,8 @@
|
|||||||
const setLang = (l) => {
|
const setLang = (l) => {
|
||||||
currentLang.value = l;
|
currentLang.value = l;
|
||||||
localStorage.setItem('ui_language', l);
|
localStorage.setItem('ui_language', l);
|
||||||
document.documentElement.lang = l === 'zh' ? 'zh-CN' : 'en';
|
const langMap = {zh: 'zh-CN', en: 'en', id: 'id'};
|
||||||
|
document.documentElement.lang = langMap[l] || 'en';
|
||||||
};
|
};
|
||||||
const setTheme = (t) => {
|
const setTheme = (t) => {
|
||||||
localStorage.setItem('theme', t);
|
localStorage.setItem('theme', t);
|
||||||
|
|||||||
@@ -326,33 +326,58 @@ class DocxTranslator(AiTranslator):
|
|||||||
runs = element_info["runs"]
|
runs = element_info["runs"]
|
||||||
if not runs: return
|
if not runs: return
|
||||||
|
|
||||||
first_real_run_index = -1
|
# Filter to runs that are still attached to the document
|
||||||
# 找到第一个可以写入文本的run
|
valid_runs = []
|
||||||
for i, run in enumerate(runs):
|
for run in runs:
|
||||||
if run.element.getparent() is not None:
|
if run.element.getparent() is not None:
|
||||||
# 如果 run 是副本的一部分,其 _parent 可能仍然指向原始文档的段落
|
|
||||||
# 但我们需要确保它与 element_info["paragraph"] 同步
|
|
||||||
run._parent = element_info["paragraph"]
|
run._parent = element_info["paragraph"]
|
||||||
run.text = final_text
|
valid_runs.append(run)
|
||||||
first_real_run_index = i
|
|
||||||
break
|
|
||||||
|
|
||||||
# 如果没有找到有效的run(例如,它们都已被删除),则记录警告
|
if not valid_runs:
|
||||||
if first_real_run_index == -1:
|
|
||||||
self.logger.warning(f"无法应用翻译 '{final_text}',因为找不到有效的run。")
|
self.logger.warning(f"无法应用翻译 '{final_text}',因为找不到有效的run。")
|
||||||
return
|
return
|
||||||
|
|
||||||
# 删除所有后续的run,因为它们的文本已经被合并到第一个run中了
|
if len(valid_runs) == 1:
|
||||||
for i in range(first_real_run_index + 1, len(runs)):
|
# Single run: just write the translation
|
||||||
run = runs[i]
|
valid_runs[0].text = final_text
|
||||||
parent_element = run.element.getparent()
|
return
|
||||||
if parent_element is not None:
|
|
||||||
try:
|
# Multiple runs: proportionally distribute translated text to preserve formatting
|
||||||
parent_element.remove(run.element)
|
orig_lengths = [len(r.text) for r in valid_runs]
|
||||||
except ValueError:
|
total_orig = sum(orig_lengths)
|
||||||
# 在某些复杂情况下,一个run可能已经被其父元素隐式删除
|
final_len = len(final_text)
|
||||||
self.logger.debug(f"尝试删除一个不存在的run元素。这通常是安全的。")
|
|
||||||
pass
|
if total_orig == 0:
|
||||||
|
valid_runs[0].text = final_text
|
||||||
|
for run in valid_runs[1:]:
|
||||||
|
self._remove_run_element(run)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Distribute characters proportionally
|
||||||
|
char_pos = 0
|
||||||
|
for i, run in enumerate(valid_runs):
|
||||||
|
if i == len(valid_runs) - 1:
|
||||||
|
# Last run gets all remaining text
|
||||||
|
run.text = final_text[char_pos:]
|
||||||
|
else:
|
||||||
|
ratio = orig_lengths[i] / total_orig
|
||||||
|
run_char_count = max(1, round(final_len * ratio))
|
||||||
|
run_char_count = min(run_char_count, final_len - char_pos - (len(valid_runs) - i - 1))
|
||||||
|
if run_char_count <= 0:
|
||||||
|
# Remove runs that would get zero characters
|
||||||
|
self._remove_run_element(run)
|
||||||
|
continue
|
||||||
|
run.text = final_text[char_pos:char_pos + run_char_count]
|
||||||
|
char_pos += run_char_count
|
||||||
|
|
||||||
|
def _remove_run_element(self, run) -> None:
|
||||||
|
"""Safely remove a run element from its parent."""
|
||||||
|
parent_element = run.element.getparent()
|
||||||
|
if parent_element is not None:
|
||||||
|
try:
|
||||||
|
parent_element.remove(run.element)
|
||||||
|
except ValueError:
|
||||||
|
self.logger.debug(f"尝试删除一个不存在的run元素。这通常是安全的。")
|
||||||
|
|
||||||
# ---------- FIX START: 新增用于清理副本段落的辅助方法 ----------
|
# ---------- FIX START: 新增用于清理副本段落的辅助方法 ----------
|
||||||
def _prune_unwanted_elements_from_copy(self, p_element: OxmlElement):
|
def _prune_unwanted_elements_from_copy(self, p_element: OxmlElement):
|
||||||
|
|||||||
Reference in New Issue
Block a user