Compare commits
19 Commits
8e91475e5c
...
main
| Author | SHA1 | Date | |
|---|---|---|---|
| 2d39c009cf | |||
| 4893163221 | |||
| 4f6bd1bc7b | |||
| a8b8c416dd | |||
| 4cf1a8c67d | |||
| 52bb8858c8 | |||
| 7f02abae0e | |||
| 97b7b20565 | |||
| 8a5f62342a | |||
|
|
9d8eacf0b4 | ||
|
|
47a3e9126a | ||
|
|
9e82daa2a1 | ||
|
|
86a9958f58 | ||
|
|
cd218a5ad0 | ||
|
|
6a2563bed6 | ||
|
|
2c7e879cd5 | ||
|
|
a6fe62420a | ||
|
|
93009d70a9 | ||
|
|
5871f5dd85 |
29
.env.example
Normal file
29
.env.example
Normal file
@@ -0,0 +1,29 @@
|
|||||||
|
# DocuTranslate 环境变量配置示例
|
||||||
|
# 复制此文件为 .env 并按需修改(需配合 python-dotenv 或容器环境变量使用)
|
||||||
|
|
||||||
|
# --- 代理配置 ---
|
||||||
|
# 是否启用系统代理,设置为 true 开启
|
||||||
|
# DOCUTRANSLATE_PROXY_ENABLED=true
|
||||||
|
|
||||||
|
# --- 缓存配置 ---
|
||||||
|
# 任务缓存数量(默认 10)
|
||||||
|
# DOCUTRANSLATE_CACHE_NUM=10
|
||||||
|
|
||||||
|
# --- 翻译 API 默认配置 ---
|
||||||
|
# 前端"自定义接口-default"平台的默认值,留空则不预填
|
||||||
|
|
||||||
|
# API 地址(Base URL),例如 https://api.openai.com/v1
|
||||||
|
DOCUTRANSLATE_BASE_URL=
|
||||||
|
|
||||||
|
# API 密钥
|
||||||
|
DOCUTRANSLATE_API_KEY=
|
||||||
|
|
||||||
|
# 模型 ID,例如 qwen-mt-turbo
|
||||||
|
DOCUTRANSLATE_MODEL_ID=
|
||||||
|
|
||||||
|
# --- 限流配置 ---
|
||||||
|
# RPM 限制(每分钟请求数),留空则不限制
|
||||||
|
DOCUTRANSLATE_RPM=
|
||||||
|
|
||||||
|
# TPM 限制(每分钟 Token 数),留空则不限制
|
||||||
|
DOCUTRANSLATE_TPM=
|
||||||
3
.gitignore
vendored
3
.gitignore
vendored
@@ -14,3 +14,6 @@ docutranslate/output/
|
|||||||
.idea/
|
.idea/
|
||||||
#claude
|
#claude
|
||||||
.claude/
|
.claude/
|
||||||
|
/.omc/
|
||||||
|
# Environment variables
|
||||||
|
.env
|
||||||
|
|||||||
39
AGENTS.md
Normal file
39
AGENTS.md
Normal file
@@ -0,0 +1,39 @@
|
|||||||
|
# Repository Guidelines
|
||||||
|
|
||||||
|
## Project Structure & Module Organization
|
||||||
|
Core Python code is in `docutranslate/`.
|
||||||
|
- `docutranslate/workflow/`: file-type workflows (`*_workflow.py`) that orchestrate conversion, translation, and export.
|
||||||
|
- `docutranslate/converter/`, `docutranslate/translator/`, `docutranslate/exporter/`: pipeline stages.
|
||||||
|
- `docutranslate/app.py`: FastAPI/Web UI backend entrypoint.
|
||||||
|
- `docutranslate/cli.py`: CLI entry (`docutranslate` command).
|
||||||
|
- `docutranslate/static/` and `docutranslate/template/`: bundled frontend/static assets.
|
||||||
|
- Packaging/build files live at root: `pyproject.toml`, `Dockerfile`, `*.spec`, `.github/workflows/`.
|
||||||
|
|
||||||
|
## Build, Test, and Development Commands
|
||||||
|
- `uv sync`: install project dependencies from `pyproject.toml`/`uv.lock`.
|
||||||
|
- `uv run docutranslate -i`: start local Web UI + API (default `127.0.0.1:8010`).
|
||||||
|
- `uv run docutranslate -i -p 8011 --cors`: run on a custom port with CORS enabled.
|
||||||
|
- `docker run -d -p 8010:8010 xunbu/docutranslate:latest`: run the published container locally.
|
||||||
|
- `uv pip install pyinstaller && uv run pyinstaller lite.spec --noconfirm --clean -y`: build a lightweight desktop package (see also `full.spec`, `lite_mac.spec`).
|
||||||
|
|
||||||
|
## Coding Style & Naming Conventions
|
||||||
|
- Follow Python 3.11+ conventions and PEP 8: 4-space indentation, clear type-oriented config classes, small focused functions.
|
||||||
|
- Use `snake_case` for modules/functions/variables and `PascalCase` for classes.
|
||||||
|
- Keep workflow naming consistent: `xxx_workflow.py`, matching config and workflow class names.
|
||||||
|
- Prefer explicit, composable configs over hard-coded provider values.
|
||||||
|
|
||||||
|
## Testing Guidelines
|
||||||
|
There is currently no first-party `tests/` suite or enforced coverage gate in this repository.
|
||||||
|
- For behavior changes, run a manual smoke test: start `docutranslate -i`, open `/docs`, and execute at least one translation path you touched.
|
||||||
|
- If you add automated tests, place them under `tests/` with `test_*.py` names and keep fixtures small and file-type specific.
|
||||||
|
|
||||||
|
## Commit & Pull Request Guidelines
|
||||||
|
Recent history favors short, imperative commit subjects (Chinese or English), for example: `Fix Gemini provider tag`, `Add regex dependency`, `Add Vietnamese`.
|
||||||
|
- Keep subject lines concise and action-focused.
|
||||||
|
- In PRs, include: what changed, why, how you validated it, and UI screenshots when `docutranslate/static/` or interface behavior changes.
|
||||||
|
- Link related issues and note any new env vars/API provider requirements.
|
||||||
|
|
||||||
|
## Security & Configuration Tips
|
||||||
|
- Never commit real API keys or tokens.
|
||||||
|
- Keep provider credentials in environment variables or local untracked config.
|
||||||
|
- For LAN exposure, use `--host 0.0.0.0` intentionally and restrict network access as needed.
|
||||||
@@ -1,3 +1,3 @@
|
|||||||
# SPDX-FileCopyrightText: 2025 QinHan
|
# SPDX-FileCopyrightText: 2025 QinHan
|
||||||
# SPDX-License-Identifier: MPL-2.0
|
# SPDX-License-Identifier: MPL-2.0
|
||||||
__version__="1.6.2"
|
__version__="1.6.3"
|
||||||
@@ -59,6 +59,7 @@ class AgentConfig:
|
|||||||
rpm: int | None = None # 每分钟请求数限制
|
rpm: int | None = None # 每分钟请求数限制
|
||||||
tpm: int | None = None # 每分钟Token数限制
|
tpm: int | None = None # 每分钟Token数限制
|
||||||
provider: ProviderType | None = None
|
provider: ProviderType | None = None
|
||||||
|
source_lang: str | None = None # qwen-mt: 源语言
|
||||||
|
|
||||||
|
|
||||||
class TotalErrorCounter:
|
class TotalErrorCounter:
|
||||||
@@ -290,6 +291,223 @@ _COMPLEX_SCRIPT_PATTERN = re.compile(
|
|||||||
r'[\u2e80-\u9fff\u0400-\u04ff\u0600-\u06ff\u0e00-\u0e7f\u0900-\u097f]'
|
r'[\u2e80-\u9fff\u0400-\u04ff\u0600-\u06ff\u0e00-\u0e7f\u0900-\u097f]'
|
||||||
)
|
)
|
||||||
|
|
||||||
|
def _normalize_mt_lang_key(lang: str) -> str:
|
||||||
|
key = str(lang).strip().lower()
|
||||||
|
key = key.replace("_", "-")
|
||||||
|
key = key.replace("'", "'").replace("'", "'")
|
||||||
|
key = key.replace("–", "-").replace("—", "-")
|
||||||
|
key = re.sub(r"\s+", " ", key)
|
||||||
|
return key
|
||||||
|
|
||||||
|
|
||||||
|
_MT_LANG_BY_CODE = {
|
||||||
|
"en": "English",
|
||||||
|
"zh": "Chinese",
|
||||||
|
"zh-tw": "Traditional Chinese",
|
||||||
|
"ru": "Russian",
|
||||||
|
"ja": "Japanese",
|
||||||
|
"ko": "Korean",
|
||||||
|
"es": "Spanish",
|
||||||
|
"fr": "French",
|
||||||
|
"pt": "Portuguese",
|
||||||
|
"de": "German",
|
||||||
|
"it": "Italian",
|
||||||
|
"th": "Thai",
|
||||||
|
"vi": "Vietnamese",
|
||||||
|
"id": "Indonesian",
|
||||||
|
"ms": "Malay",
|
||||||
|
"ar": "Arabic",
|
||||||
|
"hi": "Hindi",
|
||||||
|
"he": "Hebrew",
|
||||||
|
"my": "Burmese",
|
||||||
|
"ta": "Tamil",
|
||||||
|
"ur": "Urdu",
|
||||||
|
"bn": "Bengali",
|
||||||
|
"pl": "Polish",
|
||||||
|
"nl": "Dutch",
|
||||||
|
"ro": "Romanian",
|
||||||
|
"tr": "Turkish",
|
||||||
|
"km": "Khmer",
|
||||||
|
"lo": "Lao",
|
||||||
|
"yue": "Cantonese",
|
||||||
|
"cs": "Czech",
|
||||||
|
"el": "Greek",
|
||||||
|
"sv": "Swedish",
|
||||||
|
"hu": "Hungarian",
|
||||||
|
"da": "Danish",
|
||||||
|
"fi": "Finnish",
|
||||||
|
"uk": "Ukrainian",
|
||||||
|
"bg": "Bulgarian",
|
||||||
|
"sr": "Serbian",
|
||||||
|
"te": "Telugu",
|
||||||
|
"af": "Afrikaans",
|
||||||
|
"hy": "Armenian",
|
||||||
|
"as": "Assamese",
|
||||||
|
"ast": "Asturian",
|
||||||
|
"eu": "Basque",
|
||||||
|
"be": "Belarusian",
|
||||||
|
"bs": "Bosnian",
|
||||||
|
"ca": "Catalan",
|
||||||
|
"ceb": "Cebuano",
|
||||||
|
"hr": "Croatian",
|
||||||
|
"arz": "Egyptian Arabic",
|
||||||
|
"et": "Estonian",
|
||||||
|
"gl": "Galician",
|
||||||
|
"ka": "Georgian",
|
||||||
|
"gu": "Gujarati",
|
||||||
|
"is": "Icelandic",
|
||||||
|
"jv": "Javanese",
|
||||||
|
"kn": "Kannada",
|
||||||
|
"kk": "Kazakh",
|
||||||
|
"lv": "Latvian",
|
||||||
|
"lt": "Lithuanian",
|
||||||
|
"lb": "Luxembourgish",
|
||||||
|
"mk": "Macedonian",
|
||||||
|
"mai": "Maithili",
|
||||||
|
"mt": "Maltese",
|
||||||
|
"mr": "Marathi",
|
||||||
|
"acm": "Mesopotamian Arabic",
|
||||||
|
"ary": "Moroccan Arabic",
|
||||||
|
"ars": "Najdi Arabic",
|
||||||
|
"ne": "Nepali",
|
||||||
|
"az": "North Azerbaijani",
|
||||||
|
"apc": "North Levantine Arabic",
|
||||||
|
"uz": "Northern Uzbek",
|
||||||
|
"nb": "Norwegian Bokmål",
|
||||||
|
"nn": "Norwegian Nynorsk",
|
||||||
|
"oc": "Occitan",
|
||||||
|
"or": "Odia",
|
||||||
|
"pag": "Pangasinan",
|
||||||
|
"scn": "Sicilian",
|
||||||
|
"sd": "Sindhi",
|
||||||
|
"si": "Sinhala",
|
||||||
|
"sk": "Slovak",
|
||||||
|
"sl": "Slovenian",
|
||||||
|
"ajp": "South Levantine Arabic",
|
||||||
|
"sw": "Swahili",
|
||||||
|
"tl": "Tagalog",
|
||||||
|
"acq": "Ta'izzi-Adeni Arabic",
|
||||||
|
"sq": "Tosk Albanian",
|
||||||
|
"aeb": "Tunisian Arabic",
|
||||||
|
"vec": "Venetian",
|
||||||
|
"war": "Waray",
|
||||||
|
"cy": "Welsh",
|
||||||
|
"fa": "Western Persian",
|
||||||
|
}
|
||||||
|
|
||||||
|
_MT_LANG_BY_NAME = {
|
||||||
|
_normalize_mt_lang_key(name): name for name in set(_MT_LANG_BY_CODE.values())
|
||||||
|
}
|
||||||
|
|
||||||
|
_MT_LANG_ALIASES = {
|
||||||
|
# Existing UI/common aliases
|
||||||
|
"english": "English",
|
||||||
|
"英语": "English",
|
||||||
|
"英文": "English",
|
||||||
|
"简体中文": "Chinese",
|
||||||
|
"中文": "Chinese",
|
||||||
|
"simplified chinese": "Chinese",
|
||||||
|
"chinese": "Chinese",
|
||||||
|
"traditional chinese": "Traditional Chinese",
|
||||||
|
"繁体中文": "Traditional Chinese",
|
||||||
|
"zh-hans": "Chinese",
|
||||||
|
"zh-cn": "Chinese",
|
||||||
|
"zh-hant": "Traditional Chinese",
|
||||||
|
# Full Chinese aliases from qwen-mt language list
|
||||||
|
"俄语": "Russian",
|
||||||
|
"日语": "Japanese",
|
||||||
|
"韩语": "Korean",
|
||||||
|
"西班牙语": "Spanish",
|
||||||
|
"法语": "French",
|
||||||
|
"葡萄牙语": "Portuguese",
|
||||||
|
"德语": "German",
|
||||||
|
"意大利语": "Italian",
|
||||||
|
"泰语": "Thai",
|
||||||
|
"越南语": "Vietnamese",
|
||||||
|
"印度尼西亚语": "Indonesian",
|
||||||
|
"马来语": "Malay",
|
||||||
|
"阿拉伯语": "Arabic",
|
||||||
|
"印地语": "Hindi",
|
||||||
|
"希伯来语": "Hebrew",
|
||||||
|
"缅甸语": "Burmese",
|
||||||
|
"泰米尔语": "Tamil",
|
||||||
|
"乌尔都语": "Urdu",
|
||||||
|
"孟加拉语": "Bengali",
|
||||||
|
"波兰语": "Polish",
|
||||||
|
"荷兰语": "Dutch",
|
||||||
|
"罗马尼亚语": "Romanian",
|
||||||
|
"土耳其语": "Turkish",
|
||||||
|
"高棉语": "Khmer",
|
||||||
|
"老挝语": "Lao",
|
||||||
|
"粤语": "Cantonese",
|
||||||
|
"捷克语": "Czech",
|
||||||
|
"希腊语": "Greek",
|
||||||
|
"瑞典语": "Swedish",
|
||||||
|
"匈牙利语": "Hungarian",
|
||||||
|
"丹麦语": "Danish",
|
||||||
|
"芬兰语": "Finnish",
|
||||||
|
"乌克兰语": "Ukrainian",
|
||||||
|
"保加利亚语": "Bulgarian",
|
||||||
|
"塞尔维亚语": "Serbian",
|
||||||
|
"泰卢固语": "Telugu",
|
||||||
|
"南非荷兰语": "Afrikaans",
|
||||||
|
"亚美尼亚语": "Armenian",
|
||||||
|
"阿萨姆语": "Assamese",
|
||||||
|
"阿斯图里亚斯语": "Asturian",
|
||||||
|
"巴斯克语": "Basque",
|
||||||
|
"白俄罗斯语": "Belarusian",
|
||||||
|
"波斯尼亚语": "Bosnian",
|
||||||
|
"加泰罗尼亚语": "Catalan",
|
||||||
|
"宿务语": "Cebuano",
|
||||||
|
"克罗地亚语": "Croatian",
|
||||||
|
"埃及阿拉伯语": "Egyptian Arabic",
|
||||||
|
"爱沙尼亚语": "Estonian",
|
||||||
|
"加利西亚语": "Galician",
|
||||||
|
"格鲁吉亚语": "Georgian",
|
||||||
|
"古吉拉特语": "Gujarati",
|
||||||
|
"冰岛语": "Icelandic",
|
||||||
|
"爪哇语": "Javanese",
|
||||||
|
"卡纳达语": "Kannada",
|
||||||
|
"哈萨克语": "Kazakh",
|
||||||
|
"拉脱维亚语": "Latvian",
|
||||||
|
"立陶宛语": "Lithuanian",
|
||||||
|
"卢森堡语": "Luxembourgish",
|
||||||
|
"马其顿语": "Macedonian",
|
||||||
|
"马加希语": "Maithili",
|
||||||
|
"马耳他语": "Maltese",
|
||||||
|
"马拉地语": "Marathi",
|
||||||
|
"美索不达米亚阿拉伯语": "Mesopotamian Arabic",
|
||||||
|
"摩洛哥阿拉伯语": "Moroccan Arabic",
|
||||||
|
"内志阿拉伯语": "Najdi Arabic",
|
||||||
|
"尼泊尔语": "Nepali",
|
||||||
|
"北阿塞拜疆语": "North Azerbaijani",
|
||||||
|
"北黎凡特阿拉伯语": "North Levantine Arabic",
|
||||||
|
"北乌兹别克语": "Northern Uzbek",
|
||||||
|
"书面语挪威语": "Norwegian Bokmål",
|
||||||
|
"新挪威语": "Norwegian Nynorsk",
|
||||||
|
"奥克语": "Occitan",
|
||||||
|
"奥里亚语": "Odia",
|
||||||
|
"邦阿西楠语": "Pangasinan",
|
||||||
|
"西西里语": "Sicilian",
|
||||||
|
"信德语": "Sindhi",
|
||||||
|
"僧伽罗语": "Sinhala",
|
||||||
|
"斯洛伐克语": "Slovak",
|
||||||
|
"斯洛文尼亚语": "Slovenian",
|
||||||
|
"南黎凡特阿拉伯语": "South Levantine Arabic",
|
||||||
|
"斯瓦希里语": "Swahili",
|
||||||
|
"他加禄语": "Tagalog",
|
||||||
|
"塔伊兹-亚丁阿拉伯语": "Ta'izzi-Adeni Arabic",
|
||||||
|
"托斯克阿尔巴尼亚语": "Tosk Albanian",
|
||||||
|
"突尼斯阿拉伯语": "Tunisian Arabic",
|
||||||
|
"威尼斯语": "Venetian",
|
||||||
|
"瓦莱语": "Waray",
|
||||||
|
"威尔士语": "Welsh",
|
||||||
|
"西波斯语": "Western Persian",
|
||||||
|
# English punctuation/variant aliases
|
||||||
|
"norwegian bokmal": "Norwegian Bokmål",
|
||||||
|
"ta'izzi-adeni arabic": "Ta'izzi-Adeni Arabic",
|
||||||
|
}
|
||||||
|
|
||||||
class Agent:
|
class Agent:
|
||||||
|
|
||||||
def __init__(self, config: AgentConfig):
|
def __init__(self, config: AgentConfig):
|
||||||
@@ -316,6 +534,11 @@ class Agent:
|
|||||||
self.rate_limiter = RateLimiter(rpm=config.rpm, tpm=config.tpm)
|
self.rate_limiter = RateLimiter(rpm=config.rpm, tpm=config.tpm)
|
||||||
|
|
||||||
self.provider = config.provider if config.provider is not None else get_provider_by_domain(self.domain)
|
self.provider = config.provider if config.provider is not None else get_provider_by_domain(self.domain)
|
||||||
|
self.is_mt_mode = "mt" in self.model_id.lower()
|
||||||
|
self.mt_source_lang = config.source_lang if config.source_lang else "auto"
|
||||||
|
self.mt_target_lang = getattr(config, "to_lang", None)
|
||||||
|
self.mt_domains = getattr(config, "custom_prompt", None)
|
||||||
|
self.mt_glossary_dict = getattr(config, "glossary_dict", None)
|
||||||
|
|
||||||
def _estimate_tokens(self, text: str) -> int:
|
def _estimate_tokens(self, text: str) -> int:
|
||||||
"""
|
"""
|
||||||
@@ -352,6 +575,52 @@ class Agent:
|
|||||||
elif self.thinking == "disable":
|
elif self.thinking == "disable":
|
||||||
data[field_thinking] = val_disable
|
data[field_thinking] = val_disable
|
||||||
|
|
||||||
|
def _normalize_mt_lang(self, lang: str | None) -> str | None:
|
||||||
|
if lang is None:
|
||||||
|
return None
|
||||||
|
lang_text = str(lang).strip()
|
||||||
|
if not lang_text:
|
||||||
|
return None
|
||||||
|
key = _normalize_mt_lang_key(lang_text)
|
||||||
|
if key in _MT_LANG_BY_CODE:
|
||||||
|
return _MT_LANG_BY_CODE[key]
|
||||||
|
if key in _MT_LANG_BY_NAME:
|
||||||
|
return _MT_LANG_BY_NAME[key]
|
||||||
|
if key in _MT_LANG_ALIASES:
|
||||||
|
return _MT_LANG_ALIASES[key]
|
||||||
|
return lang_text
|
||||||
|
|
||||||
|
def _build_mt_translation_options(self, prompt: str = "") -> dict:
|
||||||
|
translation_options = {}
|
||||||
|
|
||||||
|
source_lang = self._normalize_mt_lang(self.mt_source_lang)
|
||||||
|
if source_lang:
|
||||||
|
translation_options["source_lang"] = source_lang
|
||||||
|
|
||||||
|
target_lang = self._normalize_mt_lang(self.mt_target_lang)
|
||||||
|
if target_lang:
|
||||||
|
translation_options["target_lang"] = target_lang
|
||||||
|
|
||||||
|
domains = str(self.mt_domains).strip() if self.mt_domains is not None else ""
|
||||||
|
if domains:
|
||||||
|
translation_options["domains"] = domains
|
||||||
|
|
||||||
|
if self.mt_glossary_dict:
|
||||||
|
terminology_list = [
|
||||||
|
{"source": src, "target": tgt}
|
||||||
|
for src, tgt in self.mt_glossary_dict.items()
|
||||||
|
if src and tgt and src.lower() in prompt.lower()
|
||||||
|
]
|
||||||
|
if terminology_list:
|
||||||
|
translation_options["terms"] = terminology_list
|
||||||
|
|
||||||
|
return translation_options
|
||||||
|
|
||||||
|
def _build_mt_user_prompt(self, prompt: str, system_prompt: str) -> str:
|
||||||
|
# MT模式下,直接返回原始prompt,不添加任何system prompt
|
||||||
|
# MT模型会把整个user prompt当作待翻译内容
|
||||||
|
return prompt
|
||||||
|
|
||||||
def _prepare_request_data(
|
def _prepare_request_data(
|
||||||
self, prompt: str, system_prompt: str, temperature=None, top_p=0.9, json_format=False
|
self, prompt: str, system_prompt: str, temperature=None, top_p=0.9, json_format=False
|
||||||
):
|
):
|
||||||
@@ -361,6 +630,19 @@ class Agent:
|
|||||||
"Content-Type": "application/json",
|
"Content-Type": "application/json",
|
||||||
"Authorization": f"Bearer {self.key}",
|
"Authorization": f"Bearer {self.key}",
|
||||||
}
|
}
|
||||||
|
|
||||||
|
if self.is_mt_mode:
|
||||||
|
data = {
|
||||||
|
"model": self.model_id,
|
||||||
|
"messages": [
|
||||||
|
{"role": "user", "content": self._build_mt_user_prompt(prompt, system_prompt)},
|
||||||
|
],
|
||||||
|
}
|
||||||
|
translation_options = self._build_mt_translation_options(prompt=prompt)
|
||||||
|
if translation_options:
|
||||||
|
data["translation_options"] = translation_options
|
||||||
|
return headers, data
|
||||||
|
|
||||||
data = {
|
data = {
|
||||||
"model": self.model_id,
|
"model": self.model_id,
|
||||||
"messages": [
|
"messages": [
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ ProviderType: TypeAlias = Literal["ollama", "bigmodel", "aliyuncs", "volces", "g
|
|||||||
def get_provider_by_domain(domain:str)->ProviderType:
|
def get_provider_by_domain(domain:str)->ProviderType:
|
||||||
if domain == "open.bigmodel.cn":
|
if domain == "open.bigmodel.cn":
|
||||||
return "bigmodel"
|
return "bigmodel"
|
||||||
elif domain == "dashscope.aliyuncs.com":
|
elif "dashscope.aliyuncs.com" in domain:
|
||||||
return "aliyuncs"
|
return "aliyuncs"
|
||||||
elif domain == "ark.cn-beijing.volces.com":
|
elif domain == "ark.cn-beijing.volces.com":
|
||||||
return "volces"
|
return "volces"
|
||||||
|
|||||||
@@ -15,7 +15,6 @@ from docutranslate.agents.agent import PartialAgentResultError, AgentResultError
|
|||||||
from docutranslate.glossary.glossary import Glossary
|
from docutranslate.glossary.glossary import Glossary
|
||||||
from docutranslate.utils.json_utils import segments2json_chunks, fix_json_string
|
from docutranslate.utils.json_utils import segments2json_chunks, fix_json_string
|
||||||
|
|
||||||
|
|
||||||
def generate_prompt(json_segments: str, to_lang: str):
|
def generate_prompt(json_segments: str, to_lang: str):
|
||||||
return f"""
|
return f"""
|
||||||
You will receive a sequence of original text segments to be translated, represented in JSON format. The keys are segment IDs, and the values are the text content to be translated.
|
You will receive a sequence of original text segments to be translated, represented in JSON format. The keys are segment IDs, and the values are the text content to be translated.
|
||||||
@@ -32,15 +31,15 @@ For each Key-Value Pair in the JSON, translate the contents of the value into {t
|
|||||||
> The segment IDs in the output must exactly match those in the input. And all segment IDs in input must appear in the output.
|
> The segment IDs in the output must exactly match those in the input. And all segment IDs in input must appear in the output.
|
||||||
> If necessary, two segments can only be translated together, the translation should be proportionally allocated to the corresponding key's value based on the word count ratio of the segments.
|
> If necessary, two segments can only be translated together, the translation should be proportionally allocated to the corresponding key's value based on the word count ratio of the segments.
|
||||||
|
|
||||||
Here is an example of the expected format:
|
Here is an example of the expected format (Note: This is ONLY a format example, do NOT translate the example content):
|
||||||
|
|
||||||
<example>
|
<example>
|
||||||
Input:
|
Input:
|
||||||
|
|
||||||
```json
|
```json
|
||||||
{{
|
{{
|
||||||
"3":source,
|
"EXAMPLE_KEY_1": "source text",
|
||||||
"4":source,
|
"EXAMPLE_KEY_2": "source text"
|
||||||
}}
|
}}
|
||||||
```
|
```
|
||||||
|
|
||||||
@@ -48,8 +47,8 @@ Output(target language: {to_lang}):
|
|||||||
|
|
||||||
```json
|
```json
|
||||||
{{
|
{{
|
||||||
"3":translation,
|
"EXAMPLE_KEY_1": "translated text",
|
||||||
"4":translation,
|
"EXAMPLE_KEY_2": "translated text"
|
||||||
}}
|
}}
|
||||||
```
|
```
|
||||||
For statements that must be combined during translation, employ merging at the minimal structural level. The total number of keys must remain unchanged after merging, and any empty values should be retained.
|
For statements that must be combined during translation, employ merging at the minimal structural level. The total number of keys must remain unchanged after merging, and any empty values should be retained.
|
||||||
@@ -58,18 +57,20 @@ Below is an example of how merging should be done when necessary:
|
|||||||
input:
|
input:
|
||||||
```json
|
```json
|
||||||
{{
|
{{
|
||||||
"3":"汤姆说:“杰克你",
|
"EXAMPLE_KEY_1":"汤姆说:\"杰克你",
|
||||||
"4":"好”。"
|
"EXAMPLE_KEY_2":"好\"。"
|
||||||
}}
|
}}
|
||||||
```
|
```
|
||||||
output:
|
output:
|
||||||
```json
|
```json
|
||||||
{{
|
{{
|
||||||
"3":"Tom says:\"Hello Jack.\"",
|
"EXAMPLE_KEY_1":"Tom says:\"Hello Jack.\"",
|
||||||
"4":""
|
"EXAMPLE_KEY_2":""
|
||||||
}}
|
}}
|
||||||
```
|
```
|
||||||
</example>
|
</example>
|
||||||
|
|
||||||
|
IMPORTANT: Only translate the content in the <input> section above. Do NOT include or translate the example content from this <example> section in your output.
|
||||||
Please return the translated JSON directly without including any additional information and preserve special tags or untranslatable elements (such as code, brand names, technical terms) as they are.
|
Please return the translated JSON directly without including any additional information and preserve special tags or untranslatable elements (such as code, brand names, technical terms) as they are.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
@@ -119,17 +120,12 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
return system_prompt, prompt
|
return system_prompt, prompt
|
||||||
|
|
||||||
def _result_handler(self, result: str, origin_prompt: str, logger: Logger):
|
def _result_handler(self, result: str, origin_prompt: str, logger: Logger):
|
||||||
"""
|
"""处理非MT模式的JSON翻译响应。"""
|
||||||
处理成功的API响应。
|
# --- JSON-based ---
|
||||||
- 如果键完全匹配,返回翻译结果。
|
|
||||||
- 如果键不匹配,构造一个部分成功的结果,并通过 PartialTranslationError 异常抛出,以触发重试。
|
|
||||||
- 其他错误(如JSON解析失败、模型偷懒)则抛出普通 ValueError 触发重试。
|
|
||||||
"""
|
|
||||||
original_segments = get_original_segments(origin_prompt)
|
original_segments = get_original_segments(origin_prompt)
|
||||||
result = get_target_segments(result)
|
result = get_target_segments(result)
|
||||||
if result == "":
|
if result == "":
|
||||||
if original_segments.strip() != "":
|
if original_segments.strip() != "":
|
||||||
# print(f"【测试】origin_prompt:\n{origin_prompt}\nresult:\n{result}")
|
|
||||||
raise AgentResultError("result为空值但原文不为空")
|
raise AgentResultError("result为空值但原文不为空")
|
||||||
return {}
|
return {}
|
||||||
try:
|
try:
|
||||||
@@ -146,9 +142,7 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
original_keys = set(original_chunk.keys())
|
original_keys = set(original_chunk.keys())
|
||||||
result_keys = set(repaired_result.keys())
|
result_keys = set(repaired_result.keys())
|
||||||
|
|
||||||
# 如果键不完全匹配
|
|
||||||
if original_keys != result_keys:
|
if original_keys != result_keys:
|
||||||
# 仍然先构造一个最完整的“部分结果”
|
|
||||||
final_chunk = {}
|
final_chunk = {}
|
||||||
common_keys = original_keys.intersection(result_keys)
|
common_keys = original_keys.intersection(result_keys)
|
||||||
missing_keys = original_keys - result_keys
|
missing_keys = original_keys - result_keys
|
||||||
@@ -163,40 +157,159 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
for key in missing_keys:
|
for key in missing_keys:
|
||||||
final_chunk[key] = str(original_chunk[key])
|
final_chunk[key] = str(original_chunk[key])
|
||||||
|
|
||||||
|
raise PartialAgentResultError("键不匹配,触发重试", partial_result=final_chunk,
|
||||||
|
append_prompt=f"\nBe careful not to omit any keys from the input; do not combine sentences when translating.\n")
|
||||||
|
|
||||||
# 抛出自定义异常,将部分结果和错误信息一起传递出去
|
|
||||||
raise PartialAgentResultError("键不匹配,触发重试", partial_result=final_chunk,append_prompt=f"\nBe careful not to omit any keys from the input; do not combine sentences when translating.\n")
|
|
||||||
|
|
||||||
# 如果键完全匹配(理想情况),正常返回
|
|
||||||
for key, value in repaired_result.items():
|
for key, value in repaired_result.items():
|
||||||
repaired_result[key] = str(value)
|
repaired_result[key] = str(value)
|
||||||
|
|
||||||
return repaired_result
|
return repaired_result
|
||||||
|
|
||||||
except (RuntimeError, JSONDecodeError) as e:
|
except (RuntimeError, JSONDecodeError) as e:
|
||||||
# 对于JSON解析等硬性错误,继续抛出普通ValueError
|
|
||||||
raise AgentResultError(f"结果处理失败: {e.__repr__()}")
|
raise AgentResultError(f"结果处理失败: {e.__repr__()}")
|
||||||
|
|
||||||
def _error_result_handler(self, origin_prompt: str, logger: Logger):
|
def _error_result_handler(self, origin_prompt: str, logger: Logger):
|
||||||
"""
|
"""非MT模式: 所有重试失败后返回原文。"""
|
||||||
处理在所有重试后仍然失败的请求。
|
|
||||||
作为备用方案,返回原文内容,并将所有值转换为字符串。
|
|
||||||
"""
|
|
||||||
original_segments = get_original_segments(origin_prompt)
|
original_segments = get_original_segments(origin_prompt)
|
||||||
if original_segments == "":
|
if original_segments == "":
|
||||||
return {}
|
return {}
|
||||||
try:
|
try:
|
||||||
original_chunk = json_repair.loads(original_segments)
|
original_chunk = json_repair.loads(original_segments)
|
||||||
# 此处逻辑保留,作为最终的兜底方案
|
|
||||||
for key, value in original_chunk.items():
|
for key, value in original_chunk.items():
|
||||||
original_chunk[key] = f"{value}"
|
original_chunk[key] = f"{value}"
|
||||||
return original_chunk
|
return original_chunk
|
||||||
except (RuntimeError, JSONDecodeError):
|
except (RuntimeError, JSONDecodeError):
|
||||||
logger.error(f"原始prompt也不是有效的json格式: {original_segments}")
|
logger.error(f"原始prompt也不是有效的json格式: {original_segments}")
|
||||||
# 如果原始prompt本身也无效,返回一个清晰的错误对象
|
|
||||||
return {"error": f"{original_segments}"}
|
return {"error": f"{original_segments}"}
|
||||||
|
|
||||||
|
def _mt_json_result_handler(self, result: str, origin_prompt: str, logger: Logger) -> dict:
|
||||||
|
"""MT JSON batch: 解析JSON响应,返回{key: translation}。"""
|
||||||
|
try:
|
||||||
|
original_chunk = json_repair.loads(origin_prompt)
|
||||||
|
repaired = json_repair.loads(result)
|
||||||
|
except (RuntimeError, JSONDecodeError):
|
||||||
|
raise AgentResultError("MT JSON parse failed")
|
||||||
|
|
||||||
|
if not isinstance(repaired, dict):
|
||||||
|
raise AgentResultError(f"MT JSON result not dict: {type(repaired)}")
|
||||||
|
|
||||||
|
out = {}
|
||||||
|
for key in original_chunk:
|
||||||
|
out[key] = str(repaired.get(key, original_chunk[key]))
|
||||||
|
return out
|
||||||
|
|
||||||
|
def _mt_json_error_handler(self, origin_prompt: str, logger: Logger) -> dict:
|
||||||
|
"""MT JSON batch error: 返回原文。"""
|
||||||
|
try:
|
||||||
|
original_chunk = json_repair.loads(origin_prompt)
|
||||||
|
return {k: str(v) for k, v in original_chunk.items()}
|
||||||
|
except Exception:
|
||||||
|
return {"0": origin_prompt}
|
||||||
|
|
||||||
|
def _mt_individual_result_handler(self, result: str, origin_prompt: str, logger: Logger) -> str:
|
||||||
|
"""MT individual: 直接返回翻译结果。"""
|
||||||
|
return result.strip()
|
||||||
|
|
||||||
|
def _mt_individual_error_handler(self, origin_prompt: str, logger: Logger) -> str:
|
||||||
|
"""MT individual error: 返回原文。"""
|
||||||
|
return origin_prompt
|
||||||
|
|
||||||
|
def _apply_mt_batch_results(self, segments: list[str], batch_results: list,
|
||||||
|
batch_indices: list[list[int]]) -> list[str]:
|
||||||
|
"""应用批处理结果。对计数不匹配的批次,逐条回退重译。"""
|
||||||
|
all_translated = [""] * len(segments)
|
||||||
|
mismatch_batches = []
|
||||||
|
|
||||||
|
for batch_parts, indices in zip(batch_results, batch_indices):
|
||||||
|
if len(batch_parts) == len(indices):
|
||||||
|
for j, idx in enumerate(indices):
|
||||||
|
all_translated[idx] = batch_parts[j]
|
||||||
|
else:
|
||||||
|
self.logger.warning(
|
||||||
|
f"MT batch mismatch: got {len(batch_parts)} parts, expected {len(indices)}. "
|
||||||
|
f"Falling back to individual translation."
|
||||||
|
)
|
||||||
|
mismatch_batches.append(indices)
|
||||||
|
|
||||||
|
return all_translated, mismatch_batches
|
||||||
|
|
||||||
|
def _retranslate_mismatched(self, segments: list[str],
|
||||||
|
mismatch_batches: list[list[int]]) -> list[str]:
|
||||||
|
"""对计数不匹配的批次,逐条重新翻译。"""
|
||||||
|
# Collect all mismatched indices
|
||||||
|
all_mismatched = []
|
||||||
|
for indices in mismatch_batches:
|
||||||
|
all_mismatched.extend(indices)
|
||||||
|
|
||||||
|
if not all_mismatched:
|
||||||
|
return []
|
||||||
|
|
||||||
|
self.logger.info(f"Retranslating {len(all_mismatched)} mismatched segments individually")
|
||||||
|
individual_segments = [segments[i] for i in all_mismatched]
|
||||||
|
individual_results = super().send_prompts(
|
||||||
|
prompts=individual_segments,
|
||||||
|
result_handler=self._mt_individual_result_handler,
|
||||||
|
error_result_handler=self._mt_individual_error_handler,
|
||||||
|
)
|
||||||
|
|
||||||
|
result_map = {}
|
||||||
|
for idx, trans in zip(all_mismatched, individual_results):
|
||||||
|
result_map[idx] = trans
|
||||||
|
return result_map
|
||||||
|
|
||||||
def send_segments(self, segments: list[str], chunk_size: int) -> list[str]:
|
def send_segments(self, segments: list[str], chunk_size: int) -> list[str]:
|
||||||
|
if self.is_mt_mode:
|
||||||
|
if not segments:
|
||||||
|
return []
|
||||||
|
# JSON batching: 616 segments → ~12 JSON chunks
|
||||||
|
indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
|
||||||
|
prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
|
||||||
|
translated_chunks = super().send_prompts(
|
||||||
|
prompts=prompts,
|
||||||
|
result_handler=self._mt_json_result_handler,
|
||||||
|
error_result_handler=self._mt_json_error_handler,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Detect unchanged segments (MT model returned original text)
|
||||||
|
indexed_translated = indexed_originals.copy()
|
||||||
|
failed_indices = []
|
||||||
|
for chunk_result, chunk_original in zip(translated_chunks, chunks):
|
||||||
|
if not isinstance(chunk_result, dict):
|
||||||
|
for k in chunk_original:
|
||||||
|
failed_indices.append(int(k))
|
||||||
|
continue
|
||||||
|
for key in chunk_original:
|
||||||
|
val = chunk_result.get(key, "")
|
||||||
|
if isinstance(val, str) and val.strip() == str(chunk_original[key]).strip():
|
||||||
|
failed_indices.append(int(key))
|
||||||
|
indexed_translated[key] = str(val)
|
||||||
|
|
||||||
|
# Retry failed segments individually
|
||||||
|
if failed_indices:
|
||||||
|
self.logger.info(
|
||||||
|
f"MT JSON batch: {len(failed_indices)}/{len(segments)} segments unchanged, retrying individually"
|
||||||
|
)
|
||||||
|
retry_segments = [segments[i] for i in failed_indices]
|
||||||
|
retry_results = super().send_prompts(
|
||||||
|
prompts=retry_segments,
|
||||||
|
result_handler=self._mt_individual_result_handler,
|
||||||
|
error_result_handler=self._mt_individual_error_handler,
|
||||||
|
)
|
||||||
|
for idx, trans in zip(failed_indices, retry_results):
|
||||||
|
indexed_translated[str(idx)] = trans
|
||||||
|
|
||||||
|
# Reconstruct result list
|
||||||
|
result = []
|
||||||
|
last_end = 0
|
||||||
|
ls = list(indexed_translated.values())
|
||||||
|
for start, end in merged_indices_list:
|
||||||
|
result.extend(ls[last_end:start])
|
||||||
|
result.append("".join(map(str, ls[start:end])))
|
||||||
|
last_end = end
|
||||||
|
result.extend(ls[last_end:])
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Non-MT mode: JSON batch translation
|
||||||
indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
|
indexed_originals, chunks, merged_indices_list = segments2json_chunks(segments, chunk_size)
|
||||||
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
|
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
|
||||||
translated_chunks = super().send_prompts(prompts=prompts, json_format=self.force_json,
|
translated_chunks = super().send_prompts(prompts=prompts, json_format=self.force_json,
|
||||||
@@ -220,7 +333,6 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
|
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
|
||||||
|
|
||||||
# 重建最终列表
|
|
||||||
result = []
|
result = []
|
||||||
last_end = 0
|
last_end = 0
|
||||||
ls = list(indexed_translated.values())
|
ls = list(indexed_translated.values())
|
||||||
@@ -234,10 +346,63 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
return result
|
return result
|
||||||
|
|
||||||
async def send_segments_async(self, segments: list[str], chunk_size: int) -> list[str]:
|
async def send_segments_async(self, segments: list[str], chunk_size: int) -> list[str]:
|
||||||
|
if self.is_mt_mode:
|
||||||
|
if not segments:
|
||||||
|
return []
|
||||||
|
# JSON batching: 616 segments → ~12 JSON chunks
|
||||||
|
indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(
|
||||||
|
segments2json_chunks, segments, chunk_size
|
||||||
|
)
|
||||||
|
prompts = [json.dumps(chunk, ensure_ascii=False) for chunk in chunks]
|
||||||
|
translated_chunks = await super().send_prompts_async(
|
||||||
|
prompts=prompts,
|
||||||
|
result_handler=self._mt_json_result_handler,
|
||||||
|
error_result_handler=self._mt_json_error_handler,
|
||||||
|
)
|
||||||
|
|
||||||
|
# Detect unchanged segments
|
||||||
|
indexed_translated = indexed_originals.copy()
|
||||||
|
failed_indices = []
|
||||||
|
for chunk_result, chunk_original in zip(translated_chunks, chunks):
|
||||||
|
if not isinstance(chunk_result, dict):
|
||||||
|
for k in chunk_original:
|
||||||
|
failed_indices.append(int(k))
|
||||||
|
continue
|
||||||
|
for key in chunk_original:
|
||||||
|
val = chunk_result.get(key, "")
|
||||||
|
if isinstance(val, str) and val.strip() == str(chunk_original[key]).strip():
|
||||||
|
failed_indices.append(int(key))
|
||||||
|
indexed_translated[key] = str(val)
|
||||||
|
|
||||||
|
# Retry failed segments individually
|
||||||
|
if failed_indices:
|
||||||
|
self.logger.info(
|
||||||
|
f"MT JSON batch: {len(failed_indices)}/{len(segments)} segments unchanged, retrying individually"
|
||||||
|
)
|
||||||
|
retry_segments = [segments[i] for i in failed_indices]
|
||||||
|
retry_results = await super().send_prompts_async(
|
||||||
|
prompts=retry_segments,
|
||||||
|
result_handler=self._mt_individual_result_handler,
|
||||||
|
error_result_handler=self._mt_individual_error_handler,
|
||||||
|
)
|
||||||
|
for idx, trans in zip(failed_indices, retry_results):
|
||||||
|
indexed_translated[str(idx)] = trans
|
||||||
|
|
||||||
|
# Reconstruct result list
|
||||||
|
result = []
|
||||||
|
last_end = 0
|
||||||
|
ls = list(indexed_translated.values())
|
||||||
|
for start, end in merged_indices_list:
|
||||||
|
result.extend(ls[last_end:start])
|
||||||
|
result.append("".join(map(str, ls[start:end])))
|
||||||
|
last_end = end
|
||||||
|
result.extend(ls[last_end:])
|
||||||
|
return result
|
||||||
|
|
||||||
|
# Non-MT mode: JSON batch translation
|
||||||
indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments,
|
indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments,
|
||||||
chunk_size)
|
chunk_size)
|
||||||
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
|
prompts = [generate_prompt(json.dumps(chunk, ensure_ascii=False, indent=0), self.to_lang) for chunk in chunks]
|
||||||
|
|
||||||
translated_chunks = await super().send_prompts_async(prompts=prompts, force_json=self.force_json,
|
translated_chunks = await super().send_prompts_async(prompts=prompts, force_json=self.force_json,
|
||||||
pre_send_handler=self._pre_send_handler,
|
pre_send_handler=self._pre_send_handler,
|
||||||
result_handler=self._result_handler,
|
result_handler=self._result_handler,
|
||||||
@@ -250,7 +415,6 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
continue
|
continue
|
||||||
for key, val in chunk.items():
|
for key, val in chunk.items():
|
||||||
if key in indexed_translated:
|
if key in indexed_translated:
|
||||||
# 此处不再需要 str(val),因为 _result_handler 已经处理好了
|
|
||||||
indexed_translated[key] = val
|
indexed_translated[key] = val
|
||||||
else:
|
else:
|
||||||
self.logger.warning(f"在结果chunk中发现未知键 '{key}',已忽略。")
|
self.logger.warning(f"在结果chunk中发现未知键 '{key}',已忽略。")
|
||||||
@@ -259,7 +423,6 @@ class SegmentsTranslateAgent(Agent):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
|
self.logger.error(f"处理chunk时发生未知错误: {e.__repr__()}")
|
||||||
|
|
||||||
# 重建最终列表
|
|
||||||
result = []
|
result = []
|
||||||
last_end = 0
|
last_end = 0
|
||||||
ls = list(indexed_translated.values())
|
ls = list(indexed_translated.values())
|
||||||
|
|||||||
@@ -56,9 +56,16 @@ from pydantic import (
|
|||||||
|
|
||||||
from docutranslate import __version__
|
from docutranslate import __version__
|
||||||
from docutranslate.agents.glossary_agent import GlossaryAgentConfig
|
from docutranslate.agents.glossary_agent import GlossaryAgentConfig
|
||||||
|
from docutranslate.core.model_presets import apply_model_preset_to_payload
|
||||||
from docutranslate.core.schemas import TranslatePayload, MarkdownWorkflowParams, TextWorkflowParams, JsonWorkflowParams, \
|
from docutranslate.core.schemas import TranslatePayload, MarkdownWorkflowParams, TextWorkflowParams, JsonWorkflowParams, \
|
||||||
XlsxWorkflowParams, DocxWorkflowParams, SrtWorkflowParams, EpubWorkflowParams, HtmlWorkflowParams, \
|
XlsxWorkflowParams, DocxWorkflowParams, SrtWorkflowParams, EpubWorkflowParams, HtmlWorkflowParams, \
|
||||||
AssWorkflowParams, PPTXWorkflowParams
|
AssWorkflowParams, PPTXWorkflowParams
|
||||||
|
from docutranslate.environment import (
|
||||||
|
DOCUTRANSLATE_RPM,
|
||||||
|
DOCUTRANSLATE_TPM,
|
||||||
|
get_default_model_preset,
|
||||||
|
get_public_model_presets,
|
||||||
|
)
|
||||||
from docutranslate.exporter.md.types import ConvertEngineType
|
from docutranslate.exporter.md.types import ConvertEngineType
|
||||||
# --- 核心代码 Imports ---
|
# --- 核心代码 Imports ---
|
||||||
from docutranslate.global_values.conditional_import import DOCLING_EXIST
|
from docutranslate.global_values.conditional_import import DOCLING_EXIST
|
||||||
@@ -1341,6 +1348,11 @@ async def _start_translation_task(
|
|||||||
file_contents: bytes,
|
file_contents: bytes,
|
||||||
original_filename: str,
|
original_filename: str,
|
||||||
):
|
):
|
||||||
|
try:
|
||||||
|
payload = apply_model_preset_to_payload(payload)
|
||||||
|
except ValueError as e:
|
||||||
|
raise HTTPException(status_code=400, detail=str(e))
|
||||||
|
|
||||||
# --- 新增: Auto 工作流路由逻辑 ---
|
# --- 新增: Auto 工作流路由逻辑 ---
|
||||||
if payload.workflow_type == "auto":
|
if payload.workflow_type == "auto":
|
||||||
detected_type = get_workflow_type_from_filename(original_filename)
|
detected_type = get_workflow_type_from_filename(original_filename)
|
||||||
@@ -2210,6 +2222,7 @@ async def service_get_app_version():
|
|||||||
async def service_flat_translate(
|
async def service_flat_translate(
|
||||||
request: Request,
|
request: Request,
|
||||||
file: UploadFile = File(..., description="要翻译的文件"),
|
file: UploadFile = File(..., description="要翻译的文件"),
|
||||||
|
model_preset: str = Form("", description="服务端模型预设ID"),
|
||||||
model_id: str = Form("", description="模型ID (例如: gpt-4o, glm-4-air),当 skip_translate=False 时必填"),
|
model_id: str = Form("", description="模型ID (例如: gpt-4o, glm-4-air),当 skip_translate=False 时必填"),
|
||||||
base_url: Optional[str] = Form("", description="LLM API 基础 URL (如不填则依赖环境变量或默认值,当 skip_translate=False 时必填)"),
|
base_url: Optional[str] = Form("", description="LLM API 基础 URL (如不填则依赖环境变量或默认值,当 skip_translate=False 时必填)"),
|
||||||
api_key: str = Form("xx", description="API Key (默认xx)"),
|
api_key: str = Form("xx", description="API Key (默认xx)"),
|
||||||
@@ -2307,6 +2320,7 @@ async def service_flat_translate(
|
|||||||
payload_dict = {
|
payload_dict = {
|
||||||
# --- 基础参数 ---
|
# --- 基础参数 ---
|
||||||
"workflow_type": workflow_type,
|
"workflow_type": workflow_type,
|
||||||
|
"model_preset": model_preset,
|
||||||
"base_url": base_url,
|
"base_url": base_url,
|
||||||
"api_key": api_key,
|
"api_key": api_key,
|
||||||
"model_id": model_id,
|
"model_id": model_id,
|
||||||
@@ -2389,6 +2403,7 @@ async def service_flat_translate(
|
|||||||
try:
|
try:
|
||||||
# 使用 TypeAdapter 进行多态校验,将扁平字典转为嵌套的 TranslatePayload 对象
|
# 使用 TypeAdapter 进行多态校验,将扁平字典转为嵌套的 TranslatePayload 对象
|
||||||
payload_obj = TypeAdapter(TranslatePayload).validate_python(payload_dict)
|
payload_obj = TypeAdapter(TranslatePayload).validate_python(payload_dict)
|
||||||
|
payload_obj = apply_model_preset_to_payload(payload_obj)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
raise HTTPException(status_code=400, detail=f"参数配置校验失败: {str(e)}")
|
raise HTTPException(status_code=400, detail=f"参数配置校验失败: {str(e)}")
|
||||||
|
|
||||||
@@ -2472,6 +2487,17 @@ async def service_flat_translate(
|
|||||||
})
|
})
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/api/config", tags=["Config"], summary="获取服务端环境变量默认配置")
|
||||||
|
async def get_config():
|
||||||
|
"""返回前端可用的模型预设列表与全局默认配置,不包含敏感信息。"""
|
||||||
|
return JSONResponse({
|
||||||
|
"model_presets": get_public_model_presets(),
|
||||||
|
"default_model_preset": get_default_model_preset(),
|
||||||
|
"rpm": DOCUTRANSLATE_RPM,
|
||||||
|
"tpm": DOCUTRANSLATE_TPM,
|
||||||
|
})
|
||||||
|
|
||||||
|
|
||||||
@app.get("/", response_class=HTMLResponse, include_in_schema=False)
|
@app.get("/", response_class=HTMLResponse, include_in_schema=False)
|
||||||
async def main_page():
|
async def main_page():
|
||||||
index_path = Path(STATIC_DIR) / "index.html"
|
index_path = Path(STATIC_DIR) / "index.html"
|
||||||
|
|||||||
@@ -5,6 +5,7 @@
|
|||||||
import logging
|
import logging
|
||||||
|
|
||||||
from docutranslate.agents.glossary_agent import GlossaryAgentConfig
|
from docutranslate.agents.glossary_agent import GlossaryAgentConfig
|
||||||
|
from docutranslate.core.model_presets import apply_model_preset_to_payload
|
||||||
from docutranslate.core.schemas import TranslatePayload, MarkdownWorkflowParams, TextWorkflowParams, JsonWorkflowParams, \
|
from docutranslate.core.schemas import TranslatePayload, MarkdownWorkflowParams, TextWorkflowParams, JsonWorkflowParams, \
|
||||||
XlsxWorkflowParams, DocxWorkflowParams, SrtWorkflowParams, EpubWorkflowParams, HtmlWorkflowParams, \
|
XlsxWorkflowParams, DocxWorkflowParams, SrtWorkflowParams, EpubWorkflowParams, HtmlWorkflowParams, \
|
||||||
AssWorkflowParams, PPTXWorkflowParams
|
AssWorkflowParams, PPTXWorkflowParams
|
||||||
@@ -48,6 +49,8 @@ def create_workflow_from_payload(payload: TranslatePayload, logger: logging.Logg
|
|||||||
"""
|
"""
|
||||||
根据扁平化的 Payload 配置对象,构建并返回对应的 Workflow 实例。
|
根据扁平化的 Payload 配置对象,构建并返回对应的 Workflow 实例。
|
||||||
"""
|
"""
|
||||||
|
payload = apply_model_preset_to_payload(payload)
|
||||||
|
|
||||||
if logger is None:
|
if logger is None:
|
||||||
logger = logging.getLogger("docutranslate.factory")
|
logger = logging.getLogger("docutranslate.factory")
|
||||||
|
|
||||||
@@ -60,7 +63,7 @@ def create_workflow_from_payload(payload: TranslatePayload, logger: logging.Logg
|
|||||||
# 1. Markdown Based Workflow
|
# 1. Markdown Based Workflow
|
||||||
if isinstance(payload, MarkdownWorkflowParams):
|
if isinstance(payload, MarkdownWorkflowParams):
|
||||||
translator_args = payload.model_dump(
|
translator_args = payload.model_dump(
|
||||||
include={"skip_translate", "base_url", "api_key", "model_id", "to_lang", "custom_prompt",
|
include={"skip_translate", "base_url", "api_key", "model_id", "to_lang", "source_lang", "custom_prompt",
|
||||||
"temperature", "thinking", "chunk_size", "concurrent", "glossary_dict", "timeout",
|
"temperature", "thinking", "chunk_size", "concurrent", "glossary_dict", "timeout",
|
||||||
"retry", "system_proxy_enable", "force_json", "rpm", "tpm", "provider"},
|
"retry", "system_proxy_enable", "force_json", "rpm", "tpm", "provider"},
|
||||||
exclude_none=True,
|
exclude_none=True,
|
||||||
@@ -115,7 +118,7 @@ def create_workflow_from_payload(payload: TranslatePayload, logger: logging.Logg
|
|||||||
for param_type, (TransConf, WorkConf, WorkClass, ExpConf) in mapping.items():
|
for param_type, (TransConf, WorkConf, WorkClass, ExpConf) in mapping.items():
|
||||||
if isinstance(payload, param_type):
|
if isinstance(payload, param_type):
|
||||||
# 提取通用 Translator 参数
|
# 提取通用 Translator 参数
|
||||||
dump_exclude = {"workflow_type"}
|
dump_exclude = {"workflow_type", "model_preset"}
|
||||||
# 特定类型的特殊参数需要保留,例如 json_paths, insert_mode 等
|
# 特定类型的特殊参数需要保留,例如 json_paths, insert_mode 等
|
||||||
# model_dump 会自动包含定义在 param_type 中的所有字段
|
# model_dump 会自动包含定义在 param_type 中的所有字段
|
||||||
translator_args = payload.model_dump(exclude=dump_exclude, exclude_none=True)
|
translator_args = payload.model_dump(exclude=dump_exclude, exclude_none=True)
|
||||||
|
|||||||
42
docutranslate/core/model_presets.py
Normal file
42
docutranslate/core/model_presets.py
Normal file
@@ -0,0 +1,42 @@
|
|||||||
|
# SPDX-FileCopyrightText: 2025 QinHan
|
||||||
|
# SPDX-License-Identifier: MPL-2.0
|
||||||
|
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from pydantic import TypeAdapter
|
||||||
|
|
||||||
|
from docutranslate.core.schemas import TranslatePayload
|
||||||
|
from docutranslate.environment import resolve_model_preset
|
||||||
|
|
||||||
|
|
||||||
|
def apply_model_preset_to_payload_data(
|
||||||
|
payload_data: dict[str, Any],
|
||||||
|
) -> dict[str, Any]:
|
||||||
|
if payload_data.get("skip_translate"):
|
||||||
|
return payload_data
|
||||||
|
|
||||||
|
model_preset = str(payload_data.get("model_preset") or "").strip()
|
||||||
|
if not model_preset:
|
||||||
|
return payload_data
|
||||||
|
|
||||||
|
preset = resolve_model_preset(model_preset)
|
||||||
|
hydrated = dict(payload_data)
|
||||||
|
hydrated["base_url"] = preset["base_url"]
|
||||||
|
hydrated["api_key"] = preset["api_key"]
|
||||||
|
hydrated["model_id"] = preset["model_id"]
|
||||||
|
hydrated["provider"] = preset.get("provider")
|
||||||
|
|
||||||
|
if hydrated.get("rpm") in (None, "") and preset.get("rpm") is not None:
|
||||||
|
hydrated["rpm"] = preset["rpm"]
|
||||||
|
if hydrated.get("tpm") in (None, "") and preset.get("tpm") is not None:
|
||||||
|
hydrated["tpm"] = preset["tpm"]
|
||||||
|
|
||||||
|
return hydrated
|
||||||
|
|
||||||
|
|
||||||
|
def apply_model_preset_to_payload(payload: TranslatePayload) -> TranslatePayload:
|
||||||
|
payload_data = payload.model_dump()
|
||||||
|
hydrated_data = apply_model_preset_to_payload_data(payload_data)
|
||||||
|
if hydrated_data == payload_data:
|
||||||
|
return payload
|
||||||
|
return TypeAdapter(TranslatePayload).validate_python(hydrated_data)
|
||||||
@@ -99,6 +99,11 @@ class BaseWorkflowParams(BaseModel):
|
|||||||
default=False,
|
default=False,
|
||||||
description="是否跳过翻译步骤。如果为True,则仅执行文档解析和格式转换。",
|
description="是否跳过翻译步骤。如果为True,则仅执行文档解析和格式转换。",
|
||||||
)
|
)
|
||||||
|
model_preset: Optional[str] = Field(
|
||||||
|
default="",
|
||||||
|
description="服务端模型预设ID。设置后会由服务端从环境变量中注入模型配置。",
|
||||||
|
examples=["default"],
|
||||||
|
)
|
||||||
# 修改: 默认值改为 ""
|
# 修改: 默认值改为 ""
|
||||||
base_url: Optional[str] = Field(
|
base_url: Optional[str] = Field(
|
||||||
default="",
|
default="",
|
||||||
@@ -157,6 +162,9 @@ class BaseWorkflowParams(BaseModel):
|
|||||||
custom_prompt: Optional[str] = Field(
|
custom_prompt: Optional[str] = Field(
|
||||||
default="", description="用户自定义的翻译Prompt。", alias="custom_prompt"
|
default="", description="用户自定义的翻译Prompt。", alias="custom_prompt"
|
||||||
)
|
)
|
||||||
|
source_lang: Optional[str] = Field(
|
||||||
|
default=None, description="源语言(qwen-mt系列模型专用,如 'Chinese'、'English')。", examples=[None]
|
||||||
|
)
|
||||||
glossary_dict: Optional[Dict[str, str]] = Field(
|
glossary_dict: Optional[Dict[str, str]] = Field(
|
||||||
None, description="术语表字典,key为原文,value为译文。", examples=[None]
|
None, description="术语表字典,key为原文,value为译文。", examples=[None]
|
||||||
)
|
)
|
||||||
@@ -193,14 +201,17 @@ class BaseWorkflowParams(BaseModel):
|
|||||||
|
|
||||||
if isinstance(values, dict):
|
if isinstance(values, dict):
|
||||||
if not values.get("skip_translate"):
|
if not values.get("skip_translate"):
|
||||||
|
has_model_preset = bool(str(values.get("model_preset") or "").strip())
|
||||||
# 如果是空字符串 "" (即默认值),not "" 为 True,会触发错误,符合预期
|
# 如果是空字符串 "" (即默认值),not "" 为 True,会触发错误,符合预期
|
||||||
if not (values.get("base_url") or values.get("baseurl")):
|
if not has_model_preset and not (
|
||||||
|
values.get("base_url") or values.get("baseurl")
|
||||||
|
):
|
||||||
# Auto 模式在校验前不强制要求 base_url
|
# Auto 模式在校验前不强制要求 base_url
|
||||||
if values.get("workflow_type") != "auto":
|
if values.get("workflow_type") != "auto":
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"当 `skip_translate` 为 `False` 时, `base_url` 或 `baseurl` 字段是必须的。"
|
"当 `skip_translate` 为 `False` 时, `base_url` 或 `baseurl` 字段是必须的。"
|
||||||
)
|
)
|
||||||
if not values.get("model_id"):
|
if not has_model_preset and not values.get("model_id"):
|
||||||
if values.get("workflow_type") != "auto":
|
if values.get("workflow_type") != "auto":
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"当 `skip_translate` 为 `False` 时, `model_id` 字段是必须的。"
|
"当 `skip_translate` 为 `False` 时, `model_id` 字段是必须的。"
|
||||||
|
|||||||
185
docutranslate/environment.py
Normal file
185
docutranslate/environment.py
Normal file
@@ -0,0 +1,185 @@
|
|||||||
|
# SPDX-FileCopyrightText: 2025 QinHan
|
||||||
|
# SPDX-License-Identifier: MPL-2.0
|
||||||
|
"""
|
||||||
|
集中管理所有环境变量。
|
||||||
|
所有 os.getenv() 调用应在此处统一声明,其他模块从这里导入。
|
||||||
|
"""
|
||||||
|
import json
|
||||||
|
import os
|
||||||
|
from functools import lru_cache
|
||||||
|
from typing import Any
|
||||||
|
|
||||||
|
from dotenv import load_dotenv
|
||||||
|
|
||||||
|
load_dotenv() # 自动从项目根目录的 .env 文件加载环境变量(不覆盖已有的 shell 变量)
|
||||||
|
|
||||||
|
|
||||||
|
# --- 代理配置 ---
|
||||||
|
# 是否启用系统代理,设置为 "true" 开启
|
||||||
|
DOCUTRANSLATE_PROXY_ENABLED: bool = (
|
||||||
|
os.getenv("DOCUTRANSLATE_PROXY_ENABLED", "").lower() == "true"
|
||||||
|
)
|
||||||
|
|
||||||
|
# --- 缓存配置 ---
|
||||||
|
# 任务缓存数量
|
||||||
|
DOCUTRANSLATE_CACHE_NUM: int = int(os.getenv("DOCUTRANSLATE_CACHE_NUM", "10"))
|
||||||
|
|
||||||
|
# --- 翻译 API 默认配置 ---
|
||||||
|
# 默认 API 地址 (自定义接口的 Base URL)
|
||||||
|
DOCUTRANSLATE_BASE_URL: str = os.getenv("DOCUTRANSLATE_BASE_URL", "")
|
||||||
|
|
||||||
|
# 默认 API 密钥
|
||||||
|
DOCUTRANSLATE_API_KEY: str = os.getenv("DOCUTRANSLATE_API_KEY", "")
|
||||||
|
|
||||||
|
# 默认模型 ID
|
||||||
|
DOCUTRANSLATE_MODEL_ID: str = os.getenv("DOCUTRANSLATE_MODEL_ID", "")
|
||||||
|
|
||||||
|
# --- 限流默认配置 ---
|
||||||
|
# 默认 RPM 限制 (Requests Per Minute),不设置则不限制
|
||||||
|
_rpm_str = os.getenv("DOCUTRANSLATE_RPM", "")
|
||||||
|
DOCUTRANSLATE_RPM: int | None = int(_rpm_str) if _rpm_str.strip() else None
|
||||||
|
|
||||||
|
# 默认 TPM 限制 (Tokens Per Minute),不设置则不限制
|
||||||
|
_tpm_str = os.getenv("DOCUTRANSLATE_TPM", "")
|
||||||
|
DOCUTRANSLATE_TPM: int | None = int(_tpm_str) if _tpm_str.strip() else None
|
||||||
|
|
||||||
|
# 模型预设配置(JSON 字符串)
|
||||||
|
DOCUTRANSLATE_MODEL_PRESETS: str = os.getenv("DOCUTRANSLATE_MODEL_PRESETS", "").strip()
|
||||||
|
|
||||||
|
# 前端默认选中的模型预设 ID
|
||||||
|
DOCUTRANSLATE_DEFAULT_MODEL_PRESET: str = os.getenv(
|
||||||
|
"DOCUTRANSLATE_DEFAULT_MODEL_PRESET", ""
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
# 兼容旧版单模型配置时的展示名称
|
||||||
|
DOCUTRANSLATE_DEFAULT_MODEL_PRESET_LABEL: str = os.getenv(
|
||||||
|
"DOCUTRANSLATE_DEFAULT_MODEL_PRESET_LABEL", "环境默认模型"
|
||||||
|
).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _parse_optional_int(value: Any) -> int | None:
|
||||||
|
if value is None:
|
||||||
|
return None
|
||||||
|
if isinstance(value, int):
|
||||||
|
return value
|
||||||
|
text = str(value).strip()
|
||||||
|
return int(text) if text else None
|
||||||
|
|
||||||
|
|
||||||
|
def _clean_text(value: Any) -> str:
|
||||||
|
if value is None:
|
||||||
|
return ""
|
||||||
|
return str(value).strip()
|
||||||
|
|
||||||
|
|
||||||
|
def _normalize_model_preset(preset_id: str, raw: dict[str, Any]) -> dict[str, Any]:
|
||||||
|
base_url = _clean_text(raw.get("base_url", ""))
|
||||||
|
model_id = _clean_text(raw.get("model_id", ""))
|
||||||
|
if not base_url or not model_id:
|
||||||
|
raise ValueError(
|
||||||
|
f"模型预设 '{preset_id}' 缺少必要字段 `base_url` 或 `model_id`。"
|
||||||
|
)
|
||||||
|
|
||||||
|
api_key_env = _clean_text(raw.get("api_key_env", ""))
|
||||||
|
api_key = os.getenv(api_key_env, "").strip() if api_key_env else _clean_text(
|
||||||
|
raw.get("api_key", "")
|
||||||
|
)
|
||||||
|
if not api_key:
|
||||||
|
api_key = DOCUTRANSLATE_API_KEY.strip()
|
||||||
|
|
||||||
|
provider = _clean_text(raw.get("provider", "")) or None
|
||||||
|
description = _clean_text(raw.get("description", "")) or None
|
||||||
|
|
||||||
|
return {
|
||||||
|
"id": preset_id,
|
||||||
|
"label": _clean_text(raw.get("label") or raw.get("name") or preset_id),
|
||||||
|
"description": description,
|
||||||
|
"base_url": base_url,
|
||||||
|
"api_key": api_key or "xx",
|
||||||
|
"model_id": model_id,
|
||||||
|
"provider": provider,
|
||||||
|
"rpm": _parse_optional_int(raw.get("rpm", DOCUTRANSLATE_RPM)),
|
||||||
|
"tpm": _parse_optional_int(raw.get("tpm", DOCUTRANSLATE_TPM)),
|
||||||
|
}
|
||||||
|
|
||||||
|
|
||||||
|
@lru_cache(maxsize=1)
|
||||||
|
def get_model_presets() -> dict[str, dict[str, Any]]:
|
||||||
|
presets: dict[str, dict[str, Any]] = {}
|
||||||
|
|
||||||
|
if DOCUTRANSLATE_MODEL_PRESETS:
|
||||||
|
parsed = json.loads(DOCUTRANSLATE_MODEL_PRESETS)
|
||||||
|
if isinstance(parsed, list):
|
||||||
|
for item in parsed:
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
raise ValueError("DOCUTRANSLATE_MODEL_PRESETS 列表项必须是对象。")
|
||||||
|
preset_id = str(item.get("id") or item.get("name") or "").strip()
|
||||||
|
if not preset_id:
|
||||||
|
raise ValueError(
|
||||||
|
"DOCUTRANSLATE_MODEL_PRESETS 的列表项必须包含 `id` 或 `name`。"
|
||||||
|
)
|
||||||
|
presets[preset_id] = _normalize_model_preset(preset_id, item)
|
||||||
|
elif isinstance(parsed, dict):
|
||||||
|
for preset_id, item in parsed.items():
|
||||||
|
if not isinstance(item, dict):
|
||||||
|
raise ValueError("DOCUTRANSLATE_MODEL_PRESETS 对象成员必须是对象。")
|
||||||
|
normalized_id = str(preset_id).strip()
|
||||||
|
if not normalized_id:
|
||||||
|
raise ValueError(
|
||||||
|
"DOCUTRANSLATE_MODEL_PRESETS 的对象键不能是空字符串。"
|
||||||
|
)
|
||||||
|
presets[normalized_id] = _normalize_model_preset(normalized_id, item)
|
||||||
|
else:
|
||||||
|
raise ValueError(
|
||||||
|
"DOCUTRANSLATE_MODEL_PRESETS 必须是 JSON 对象或 JSON 数组。"
|
||||||
|
)
|
||||||
|
|
||||||
|
if not presets and DOCUTRANSLATE_BASE_URL.strip() and DOCUTRANSLATE_MODEL_ID.strip():
|
||||||
|
presets["default"] = {
|
||||||
|
"id": "default",
|
||||||
|
"label": DOCUTRANSLATE_DEFAULT_MODEL_PRESET_LABEL,
|
||||||
|
"description": None,
|
||||||
|
"base_url": DOCUTRANSLATE_BASE_URL.strip(),
|
||||||
|
"api_key": DOCUTRANSLATE_API_KEY.strip() or "xx",
|
||||||
|
"model_id": DOCUTRANSLATE_MODEL_ID.strip(),
|
||||||
|
"provider": None,
|
||||||
|
"rpm": DOCUTRANSLATE_RPM,
|
||||||
|
"tpm": DOCUTRANSLATE_TPM,
|
||||||
|
}
|
||||||
|
|
||||||
|
return presets
|
||||||
|
|
||||||
|
|
||||||
|
def get_default_model_preset() -> str | None:
|
||||||
|
presets = get_model_presets()
|
||||||
|
if not presets:
|
||||||
|
return None
|
||||||
|
if DOCUTRANSLATE_DEFAULT_MODEL_PRESET:
|
||||||
|
if DOCUTRANSLATE_DEFAULT_MODEL_PRESET not in presets:
|
||||||
|
raise ValueError(
|
||||||
|
"DOCUTRANSLATE_DEFAULT_MODEL_PRESET 指向了不存在的模型预设。"
|
||||||
|
)
|
||||||
|
return DOCUTRANSLATE_DEFAULT_MODEL_PRESET
|
||||||
|
return next(iter(presets))
|
||||||
|
|
||||||
|
|
||||||
|
def get_public_model_presets() -> list[dict[str, str]]:
|
||||||
|
public_presets: list[dict[str, str]] = []
|
||||||
|
for preset_id, preset in get_model_presets().items():
|
||||||
|
item = {"id": preset_id, "label": preset["label"]}
|
||||||
|
if preset.get("description"):
|
||||||
|
item["description"] = preset["description"]
|
||||||
|
public_presets.append(item)
|
||||||
|
return public_presets
|
||||||
|
|
||||||
|
|
||||||
|
def resolve_model_preset(preset_id: str) -> dict[str, Any]:
|
||||||
|
preset_key = str(preset_id or "").strip()
|
||||||
|
if not preset_key:
|
||||||
|
raise ValueError("模型预设不能为空。")
|
||||||
|
|
||||||
|
presets = get_model_presets()
|
||||||
|
if preset_key not in presets:
|
||||||
|
raise ValueError(f"未找到模型预设 '{preset_key}'。")
|
||||||
|
|
||||||
|
return dict(presets[preset_key])
|
||||||
@@ -1,16 +1,9 @@
|
|||||||
# SPDX-FileCopyrightText: 2025 QinHan
|
# SPDX-FileCopyrightText: 2025 QinHan
|
||||||
# SPDX-License-Identifier: MPL-2.0
|
# SPDX-License-Identifier: MPL-2.0
|
||||||
import os
|
from docutranslate.environment import DOCUTRANSLATE_PROXY_ENABLED
|
||||||
|
|
||||||
from .conditional_import import available_packages, conditional_import
|
from .conditional_import import available_packages, conditional_import
|
||||||
|
|
||||||
USE_PROXY = (
|
USE_PROXY = DOCUTRANSLATE_PROXY_ENABLED
|
||||||
True
|
|
||||||
if (
|
|
||||||
os.getenv("DOCUTRANSLATE_PROXY_ENABLED")
|
|
||||||
and os.getenv("DOCUTRANSLATE_PROXY_ENABLED").lower() == "true"
|
|
||||||
)
|
|
||||||
else False
|
|
||||||
)
|
|
||||||
if USE_PROXY:
|
if USE_PROXY:
|
||||||
print(f"USE_PROXY:{USE_PROXY}")
|
print(f"USE_PROXY:{USE_PROXY}")
|
||||||
|
|||||||
@@ -198,6 +198,7 @@ class Client:
|
|||||||
retry: Optional[int] = None,
|
retry: Optional[int] = None,
|
||||||
thinking: Optional[ThinkingMode] = None,
|
thinking: Optional[ThinkingMode] = None,
|
||||||
custom_prompt: Optional[str] = None,
|
custom_prompt: Optional[str] = None,
|
||||||
|
source_lang: Optional[str] = None,
|
||||||
system_proxy_enable: Optional[bool] = None,
|
system_proxy_enable: Optional[bool] = None,
|
||||||
force_json: Optional[bool] = None,
|
force_json: Optional[bool] = None,
|
||||||
rpm: Optional[int] = None,
|
rpm: Optional[int] = None,
|
||||||
@@ -264,6 +265,7 @@ class Client:
|
|||||||
retry: Optional[int] = None,
|
retry: Optional[int] = None,
|
||||||
thinking: Optional[ThinkingMode] = None,
|
thinking: Optional[ThinkingMode] = None,
|
||||||
custom_prompt: Optional[str] = None,
|
custom_prompt: Optional[str] = None,
|
||||||
|
source_lang: Optional[str] = None,
|
||||||
system_proxy_enable: Optional[bool] = None,
|
system_proxy_enable: Optional[bool] = None,
|
||||||
force_json: Optional[bool] = None,
|
force_json: Optional[bool] = None,
|
||||||
rpm: Optional[int] = None,
|
rpm: Optional[int] = None,
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1,9 +1,9 @@
|
|||||||
<!DOCTYPE html>
|
<!DOCTYPE html>
|
||||||
<html lang="zh-CN" data-bs-theme="auto">
|
<html lang="en" data-bs-theme="auto">
|
||||||
<head>
|
<head>
|
||||||
<meta charset="UTF-8">
|
<meta charset="UTF-8">
|
||||||
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
<meta name="viewport" content="width=device-width, initial-scale=1.0">
|
||||||
<title>DocuTranslate - 交互式文档翻译</title>
|
<title>DocuTranslate</title>
|
||||||
<link rel="icon" href="/static/favicon.ico" type="image/x-icon">
|
<link rel="icon" href="/static/favicon.ico" type="image/x-icon">
|
||||||
<!-- Bootstrap CSS -->
|
<!-- Bootstrap CSS -->
|
||||||
<link href="/static/bootstrap.css" rel="stylesheet" crossorigin="anonymous">
|
<link href="/static/bootstrap.css" rel="stylesheet" crossorigin="anonymous">
|
||||||
@@ -159,15 +159,6 @@
|
|||||||
white-space: pre;
|
white-space: pre;
|
||||||
}
|
}
|
||||||
|
|
||||||
.bottom-left-controls {
|
|
||||||
position: fixed;
|
|
||||||
bottom: 1rem;
|
|
||||||
left: 1rem;
|
|
||||||
z-index: 1050;
|
|
||||||
display: flex;
|
|
||||||
gap: 0.5rem;
|
|
||||||
}
|
|
||||||
|
|
||||||
.step-number {
|
.step-number {
|
||||||
margin-right: 0.25rem;
|
margin-right: 0.25rem;
|
||||||
}
|
}
|
||||||
@@ -225,15 +216,30 @@
|
|||||||
<div class="d-flex justify-content-between align-items-center mb-3">
|
<div class="d-flex justify-content-between align-items-center mb-3">
|
||||||
<div class="d-flex align-items-center">
|
<div class="d-flex align-items-center">
|
||||||
<h4 class="mb-0 me-3 fw-bold" :title="t('pageTitle')">DocuTranslate</h4>
|
<h4 class="mb-0 me-3 fw-bold" :title="t('pageTitle')">DocuTranslate</h4>
|
||||||
<div class="btn-group">
|
</div>
|
||||||
<button type="button" class="btn btn-sm btn-outline-info" data-bs-toggle="modal"
|
<!-- Language & Theme Controls -->
|
||||||
data-bs-target="#tutorialModal">
|
<div class="d-flex gap-2">
|
||||||
<i class="bi bi-question-circle-fill me-1"></i><span>{{ t('tutorialBtn') }}</span>
|
<div class="dropdown">
|
||||||
</button>
|
<button class="btn btn-outline-secondary btn-sm dropdown-toggle" type="button" data-bs-toggle="dropdown">
|
||||||
<button type="button" class="btn btn-sm btn-outline-warning" data-bs-toggle="modal"
|
<i class="bi bi-translate me-1"></i><span>{{ {zh:'中文',en:'English',id:'Bahasa'}[currentLang] || 'Language' }}</span>
|
||||||
data-bs-target="#contributorsModal">
|
|
||||||
<i class="bi bi-people-fill me-1"></i><span>{{ t('projectContributeBtn') }}</span>
|
|
||||||
</button>
|
</button>
|
||||||
|
<ul class="dropdown-menu dropdown-menu-end">
|
||||||
|
<li><a class="dropdown-item" :class="{active: currentLang==='zh'}" href="#"
|
||||||
|
@click.prevent="setLang('zh')">中文</a></li>
|
||||||
|
<li><a class="dropdown-item" :class="{active: currentLang==='en'}" href="#"
|
||||||
|
@click.prevent="setLang('en')">English</a></li>
|
||||||
|
<li><a class="dropdown-item" :class="{active: currentLang==='id'}" href="#"
|
||||||
|
@click.prevent="setLang('id')">Bahasa Indonesia</a></li>
|
||||||
|
</ul>
|
||||||
|
</div>
|
||||||
|
<div class="dropdown">
|
||||||
|
<button class="btn btn-outline-secondary btn-sm dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
|
||||||
|
class="bi bi-circle-half"></i></button>
|
||||||
|
<ul class="dropdown-menu dropdown-menu-end">
|
||||||
|
<li><button class="dropdown-item" @click="setTheme('light')"><i class="bi bi-sun-fill me-2"></i>Light</button></li>
|
||||||
|
<li><button class="dropdown-item" @click="setTheme('dark')"><i class="bi bi-moon-stars-fill me-2"></i>Dark</button></li>
|
||||||
|
<li><button class="dropdown-item" @click="setTheme('auto')"><i class="bi bi-circle-half me-2"></i>Auto</button></li>
|
||||||
|
</ul>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -536,17 +542,12 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div v-show="!form.skip_translate">
|
<div v-show="!form.skip_translate">
|
||||||
<platform-selector
|
<model-preset-selector
|
||||||
v-model:platform="form.platform"
|
v-model:model-preset="form.model_preset"
|
||||||
v-model:base-url="form.base_url"
|
:presets="modelPresets"
|
||||||
v-model:api-key="form.api_key"
|
:invalid-model-preset="errors.model_preset"
|
||||||
v-model:model-id="form.model_id"
|
|
||||||
v-model:provider="form.provider"
|
|
||||||
:invalid-api-key="errors.api_key"
|
|
||||||
:invalid-base-url="errors.base_url"
|
|
||||||
:invalid-model-id="errors.model_id"
|
|
||||||
@clear-error="clearError"
|
@clear-error="clearError"
|
||||||
:t="t" prefix="translator_platform"></platform-selector>
|
:t="t"></model-preset-selector>
|
||||||
|
|
||||||
<div class="form-check form-switch mb-3">
|
<div class="form-check form-switch mb-3">
|
||||||
<input class="form-check-input" type="checkbox" role="switch"
|
<input class="form-check-input" type="checkbox" role="switch"
|
||||||
@@ -596,6 +597,7 @@
|
|||||||
<option value="Portuguese">葡萄牙文(Português)</option>
|
<option value="Portuguese">葡萄牙文(Português)</option>
|
||||||
<option value="Arabic">阿拉伯文(العَرَبِيَّة)</option>
|
<option value="Arabic">阿拉伯文(العَرَبِيَّة)</option>
|
||||||
<option value="Vietnamese">越南文(tiếng Việt)</option>
|
<option value="Vietnamese">越南文(tiếng Việt)</option>
|
||||||
|
<option value="Indonesian">印尼文(Bahasa Indonesia)</option>
|
||||||
<option value="custom">{{ t('targetLanguageCustom') }}</option>
|
<option value="custom">{{ t('targetLanguageCustom') }}</option>
|
||||||
</select>
|
</select>
|
||||||
<div class="mt-2" v-if="form.to_lang === 'custom'">
|
<div class="mt-2" v-if="form.to_lang === 'custom'">
|
||||||
@@ -673,7 +675,7 @@
|
|||||||
<h2 class="accordion-header">
|
<h2 class="accordion-header">
|
||||||
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse"
|
<button class="accordion-button collapsed" type="button" data-bs-toggle="collapse"
|
||||||
data-bs-target="#collapseGlossary">
|
data-bs-target="#collapseGlossary">
|
||||||
<strong><span class="step-number">{{ stepMap.glossary }} </span><i
|
<strong><i
|
||||||
class="bi bi-journal-bookmark me-2"></i><span>{{ t('glossaryGenTitle')
|
class="bi bi-journal-bookmark me-2"></i><span>{{ t('glossaryGenTitle')
|
||||||
}}</span></strong>
|
}}</span></strong>
|
||||||
</button>
|
</button>
|
||||||
@@ -698,148 +700,6 @@
|
|||||||
</button>
|
</button>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="form-check form-switch mb-3 border-top pt-3">
|
|
||||||
<input class="form-check-input" type="checkbox" role="switch"
|
|
||||||
v-model="form.glossary_generate_enable"
|
|
||||||
@change="saveSetting('glossary_generate_enable', form.glossary_generate_enable)">
|
|
||||||
<label class="form-check-label">{{ t('glossaryGenEnableLabel') }}</label>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div v-if="form.glossary_generate_enable">
|
|
||||||
<div class="mb-3">
|
|
||||||
<label class="form-label">{{ t('glossaryCustomPromptLabel') }}</label>
|
|
||||||
<textarea class="form-control"
|
|
||||||
v-model="form.glossary_agent_custom_prompt"
|
|
||||||
@input="saveSetting('glossary_agent_custom_prompt', form.glossary_agent_custom_prompt)"
|
|
||||||
rows="3"
|
|
||||||
:placeholder="t('glossaryCustomPromptPlaceholder')"></textarea>
|
|
||||||
</div>
|
|
||||||
<div class="mb-3">
|
|
||||||
<label class="form-label">{{ t('glossaryGenConfigLabel') }}</label>
|
|
||||||
<div class="btn-group w-100">
|
|
||||||
<input type="radio" class="btn-check" value="same" id="gSame"
|
|
||||||
v-model="form.glossary_agent_config_choice"
|
|
||||||
@change="saveSetting('glossary_agent_config_choice', 'same')">
|
|
||||||
<label class="btn btn-outline-primary"
|
|
||||||
for="gSame">{{ t('glossaryGenConfigSame') }}</label>
|
|
||||||
<input type="radio" class="btn-check" value="custom" id="gCustom"
|
|
||||||
v-model="form.glossary_agent_config_choice"
|
|
||||||
@change="saveSetting('glossary_agent_config_choice', 'custom')">
|
|
||||||
<label class="btn btn-outline-primary"
|
|
||||||
for="gCustom">{{ t('glossaryGenConfigCustom') }}</label>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div v-if="form.glossary_agent_config_choice === 'custom'"
|
|
||||||
class="border p-3 rounded">
|
|
||||||
<platform-selector
|
|
||||||
v-model:platform="form.glossary_agent_platform"
|
|
||||||
v-model:base-url="form.glossary_agent_baseurl"
|
|
||||||
v-model:api-key="form.glossary_agent_key"
|
|
||||||
v-model:model-id="form.glossary_agent_model_id"
|
|
||||||
v-model:provider="form.glossary_agent_provider"
|
|
||||||
:t="t" prefix="glossary_agent_platform"></platform-selector>
|
|
||||||
|
|
||||||
<div class="mb-3">
|
|
||||||
<label class="form-label">{{ t('targetLanguageLabel') }}</label>
|
|
||||||
<select class="form-select" v-model="form.glossary_agent_to_lang"
|
|
||||||
@change="saveSetting('glossary_agent_to_lang', form.glossary_agent_to_lang)">
|
|
||||||
<option value="Simplified Chinese">中文(简体中文)</option>
|
|
||||||
<option value="English">英文(English)</option>
|
|
||||||
<option value="Spanish">西班牙文(Español)</option>
|
|
||||||
<option value="French">法文(Français)</option>
|
|
||||||
<option value="German">德文(Deutsch)</option>
|
|
||||||
<option value="Japanese">日文(日本語)</option>
|
|
||||||
<option value="Korean">韩文(한국어)</option>
|
|
||||||
<option value="Russian">俄文(Русский)</option>
|
|
||||||
<option value="Portuguese">葡萄牙文(Português)</option>
|
|
||||||
<option value="Arabic">阿拉伯文(العَرَبِيَّة)</option>
|
|
||||||
<option value="Vietnamese">越南文(tiếng Việt)</option>
|
|
||||||
<option value="custom">{{ t('targetLanguageCustom') }}</option>
|
|
||||||
</select>
|
|
||||||
<div class="mt-2" v-if="form.glossary_agent_to_lang === 'custom'">
|
|
||||||
<input type="text" class="form-control"
|
|
||||||
v-model="form.glossary_agent_custom_to_lang"
|
|
||||||
@input="saveSetting('glossary_agent_custom_to_lang', form.glossary_agent_custom_to_lang)"
|
|
||||||
:placeholder="t('customLangPlaceholder')">
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<slider-control :label="t('chunkSizeLabel')"
|
|
||||||
v-model="form.glossary_agent_chunk_size"
|
|
||||||
save-key="glossary_agent_chunk_size"
|
|
||||||
:default-val="defaultParams.chunk_size" :min="1000"
|
|
||||||
:max="8000" :step="100" :t="t"></slider-control>
|
|
||||||
<slider-control :label="t('concurrentLabel')"
|
|
||||||
v-model="form.glossary_agent_concurrent"
|
|
||||||
save-key="glossary_agent_concurrent"
|
|
||||||
:default-val="defaultParams.concurrent" :min="1"
|
|
||||||
:max="120" :step="1" :t="t"></slider-control>
|
|
||||||
<slider-control label="Temperature"
|
|
||||||
v-model="form.glossary_agent_temperature"
|
|
||||||
save-key="glossary_agent_temperature" :default-val="0.7"
|
|
||||||
:min="0" :max="2" :step="0.1" :t="t"></slider-control>
|
|
||||||
<slider-control :label="t('retryLabel')"
|
|
||||||
v-model="form.glossary_agent_retry"
|
|
||||||
save-key="glossary_agent_retry"
|
|
||||||
:default-val="defaultParams.retry" :min="1" :max="6"
|
|
||||||
:step="1" :t="t"></slider-control>
|
|
||||||
|
|
||||||
<!-- Glossary Agent RPM/TPM [Vertical Layout] -->
|
|
||||||
<div class="mb-3">
|
|
||||||
<label class="form-label">RPM <small
|
|
||||||
class="text-muted">({{ t('rpmLabel')
|
|
||||||
}})</small></label>
|
|
||||||
<input type="number" class="form-control"
|
|
||||||
v-model="form.glossary_agent_rpm"
|
|
||||||
@input="saveSetting('glossary_agent_rpm', form.glossary_agent_rpm)"
|
|
||||||
min="1" :placeholder="t('unlimitedPlaceholder')">
|
|
||||||
</div>
|
|
||||||
<div class="mb-3">
|
|
||||||
<label class="form-label">TPM <small
|
|
||||||
class="text-muted">({{ t('tpmLabel')
|
|
||||||
}})</small></label>
|
|
||||||
<input type="number" class="form-control"
|
|
||||||
v-model="form.glossary_agent_tpm"
|
|
||||||
@input="saveSetting('glossary_agent_tpm', form.glossary_agent_tpm)"
|
|
||||||
min="1" :placeholder="t('unlimitedPlaceholder')">
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="mb-3">
|
|
||||||
<label class="form-label">{{ t('thinkingModeLabel') }}</label>
|
|
||||||
<div class="btn-group w-100">
|
|
||||||
<input type="radio" class="btn-check" value="enable"
|
|
||||||
id="gtEnable" v-model="form.glossary_agent_thinking"
|
|
||||||
@change="saveSetting('glossary_agent_thinking_mode', 'enable')">
|
|
||||||
<label class="btn btn-outline-primary"
|
|
||||||
for="gtEnable">{{ t('thinkingModeEnable') }}</label>
|
|
||||||
<input type="radio" class="btn-check" value="disable"
|
|
||||||
id="gtDisable" v-model="form.glossary_agent_thinking"
|
|
||||||
@change="saveSetting('glossary_agent_thinking_mode', 'disable')">
|
|
||||||
<label class="btn btn-outline-primary"
|
|
||||||
for="gtDisable">{{ t('thinkingModeDisable') }}</label>
|
|
||||||
<input type="radio" class="btn-check" value="default"
|
|
||||||
id="gtDefault" v-model="form.glossary_agent_thinking"
|
|
||||||
@change="saveSetting('glossary_agent_thinking_mode', 'default')">
|
|
||||||
<label class="btn btn-outline-primary"
|
|
||||||
for="gtDefault">{{ t('thinkingModeDefault') }}</label>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="form-check form-switch mb-3">
|
|
||||||
<input class="form-check-input" type="checkbox" role="switch"
|
|
||||||
v-model="form.glossary_agent_system_proxy_enable"
|
|
||||||
@change="saveSetting('glossary_agent_system_proxy_enable', form.glossary_agent_system_proxy_enable)">
|
|
||||||
<label class="form-check-label">{{ t('systemProxyLabel') }}</label>
|
|
||||||
</div>
|
|
||||||
<div class="form-check form-switch mb-3">
|
|
||||||
<input class="form-check-input" type="checkbox" role="switch"
|
|
||||||
v-model="form.glossary_agent_force_json"
|
|
||||||
@change="saveSetting('glossary_agent_force_json', form.glossary_agent_force_json)">
|
|
||||||
<label class="form-check-label">{{ t('forceJson') }}</label>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -858,10 +718,6 @@
|
|||||||
|
|
||||||
<!-- Project Info -->
|
<!-- Project Info -->
|
||||||
<div class="mt-4 text-center text-muted small project-info">
|
<div class="mt-4 text-center text-muted small project-info">
|
||||||
<p class="bi bi-github mb-2"> GitHub主页(欢迎star❤): <br/><a
|
|
||||||
href="https://github.com/xunbu/docutranslate" target="_blank">https://github.com/xunbu/docutranslate</a>
|
|
||||||
</p>
|
|
||||||
<p class="bi bi-tencent-qq mb-2"> 交流QQ群: 1047781902 </p>
|
|
||||||
<p class="bi mb-0">version:<span>{{ version ? 'v' + version : '' }}</span></p>
|
<p class="bi mb-0">version:<span>{{ version ? 'v' + version : '' }}</span></p>
|
||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
@@ -1023,58 +879,6 @@
|
|||||||
</div>
|
</div>
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<div class="modal fade" id="tutorialModal" tabindex="-1">
|
|
||||||
<div class="modal-dialog modal-lg modal-dialog-centered modal-dialog-scrollable">
|
|
||||||
<div class="modal-content">
|
|
||||||
<div class="modal-header"><h5 class="modal-title"><i
|
|
||||||
class="bi bi-book-half me-2"></i>{{ t('tutorialModalTitle') }}</h5>
|
|
||||||
<button type="button" class="btn-close" data-bs-dismiss="modal"></button>
|
|
||||||
</div>
|
|
||||||
<div class="modal-body" v-html="t('tutorialModalBody')"></div>
|
|
||||||
<div class="modal-footer">
|
|
||||||
<button type="button" class="btn btn-primary" data-bs-dismiss="modal">{{ t('tutorialUnderstandBtn')
|
|
||||||
}}
|
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="modal fade" id="contributorsModal" tabindex="-1">
|
|
||||||
<div class="modal-dialog modal-dialog-centered">
|
|
||||||
<div class="modal-content">
|
|
||||||
<div class="modal-header"><h5 class="modal-title"><i
|
|
||||||
class="bi bi-heart-fill me-2 text-danger"></i>{{ t('contributorsModalTitle') }}</h5>
|
|
||||||
<button type="button" class="btn-close" data-bs-dismiss="modal"></button>
|
|
||||||
</div>
|
|
||||||
<div class="modal-body">
|
|
||||||
<p>{{ t('contributorsPara1') }}</p>
|
|
||||||
<p>{{ t('contributorsPara2') }}</p>
|
|
||||||
<div class="alert alert-success mt-4" role="alert">
|
|
||||||
<p>{{ t('contributorsWelcome') }}</p>
|
|
||||||
<hr>
|
|
||||||
<p class="mb-0">
|
|
||||||
<a href="https://github.com/xunbu/docutranslate" target="_blank"
|
|
||||||
class="btn btn-info btn-sm ms-2"><i
|
|
||||||
class="bi bi-github me-1"></i><span>{{ t('contributorsGithub') }}</span></a>
|
|
||||||
<a href="https://github.com/xunbu/docutranslate/pulls" target="_blank"
|
|
||||||
class="btn btn-success btn-sm ms-2"><i
|
|
||||||
class="bi bi-git me-1"></i><span>{{ t('contributorsPR') }}</span></a>
|
|
||||||
<a href="https://github.com/xunbu/docutranslate/issues" target="_blank"
|
|
||||||
class="btn btn-warning btn-sm ms-2"><i
|
|
||||||
class="bi bi-bug-fill me-1"></i><span>{{ t('contributorsIssue') }}</span></a>
|
|
||||||
</p>
|
|
||||||
<hr>
|
|
||||||
<p>{{ t('contributorsQQ') }}</p>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="modal-footer">
|
|
||||||
<button type="button" class="btn btn-secondary" data-bs-dismiss="modal">{{ t('closeBtn') }}</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<!-- Preview Offcanvas -->
|
<!-- Preview Offcanvas -->
|
||||||
<div class="offcanvas offcanvas-end" tabindex="-1" id="previewOffcanvas" ref="previewOffcanvas">
|
<div class="offcanvas offcanvas-end" tabindex="-1" id="previewOffcanvas" ref="previewOffcanvas">
|
||||||
<div class="offcanvas-header border-bottom">
|
<div class="offcanvas-header border-bottom">
|
||||||
@@ -1135,40 +939,7 @@
|
|||||||
</div>
|
</div>
|
||||||
<iframe id="printFrame" ref="printFrame" style="display: none;"></iframe>
|
<iframe id="printFrame" ref="printFrame" style="display: none;"></iframe>
|
||||||
|
|
||||||
<!-- Controls -->
|
<!-- Header controls now in left panel top-right -->
|
||||||
<div class="bottom-left-controls">
|
|
||||||
<div class="dropdown">
|
|
||||||
<button class="btn btn-secondary dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
|
|
||||||
class="bi bi-translate"></i></button>
|
|
||||||
<ul class="dropdown-menu">
|
|
||||||
<li><a class="dropdown-item" :class="{active: currentLang==='zh'}" href="#"
|
|
||||||
@click.prevent="setLang('zh')">中文</a></li>
|
|
||||||
<li><a class="dropdown-item" :class="{active: currentLang==='en'}" href="#"
|
|
||||||
@click.prevent="setLang('en')">English</a></li>
|
|
||||||
<li><a class="dropdown-item" :class="{active: currentLang==='vi'}" href="#"
|
|
||||||
@click.prevent="setLang('vi')">Tiếng Việt</a></li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
<div class="dropdown">
|
|
||||||
<button class="btn btn-secondary dropdown-toggle" type="button" data-bs-toggle="dropdown"><i
|
|
||||||
class="bi bi-circle-half"></i></button>
|
|
||||||
<ul class="dropdown-menu">
|
|
||||||
<li>
|
|
||||||
<button class="dropdown-item" @click="setTheme('light')"><i class="bi bi-sun-fill me-2"></i> Light
|
|
||||||
</button>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<button class="dropdown-item" @click="setTheme('dark')"><i class="bi bi-moon-stars-fill me-2"></i>
|
|
||||||
Dark
|
|
||||||
</button>
|
|
||||||
</li>
|
|
||||||
<li>
|
|
||||||
<button class="dropdown-item" @click="setTheme('auto')"><i class="bi bi-circle-half me-2"></i> Auto
|
|
||||||
</button>
|
|
||||||
</li>
|
|
||||||
</ul>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
</div>
|
</div>
|
||||||
|
|
||||||
<script src="/static/bootstrap.bundle.min.js"></script>
|
<script src="/static/bootstrap.bundle.min.js"></script>
|
||||||
@@ -1207,149 +978,31 @@
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const KNOWN_PLATFORMS = [
|
const ModelPresetSelector = {
|
||||||
{val: "custom", label: "platformCustom", provider: "default"},
|
props: ['modelPreset', 'presets', 't', 'invalidModelPreset'],
|
||||||
{val: "https://api.302.ai/v1", label: "302.AI", provider: ""},
|
|
||||||
{val: "https://api.openai.com/v1", label: "OpenAI", provider: "default"},
|
|
||||||
{
|
|
||||||
val: "https://generativelanguage.googleapis.com/v1beta/openai/",
|
|
||||||
label: "Gemini",
|
|
||||||
provider: "google"
|
|
||||||
},
|
|
||||||
{val: "https://api.deepseek.com/v1", label: "DeepSeek", provider: ""},
|
|
||||||
{
|
|
||||||
val: "https://dashscope.aliyuncs.com/compatible-mode/v1",
|
|
||||||
label: "阿里云百炼(DashScope)",
|
|
||||||
provider: "aliyuncs"
|
|
||||||
},
|
|
||||||
{
|
|
||||||
val: "https://ark.cn-beijing.volces.com/api/v3",
|
|
||||||
label: "火山引擎(volces)",
|
|
||||||
provider: "volces"
|
|
||||||
},
|
|
||||||
{val: "https://api.siliconflow.cn/v1", label: "硅基流动(siliconflow CN)", provider: "siliconflow"},
|
|
||||||
{val: "https://open.bigmodel.cn/api/paas/v4", label: "智谱AI(bigmodel CN)", provider: "bigmodel"},
|
|
||||||
{val: "https://www.dmxapi.cn/v1", label: "DMXAPI_CN", provider: ""},
|
|
||||||
{val: "https://www.dmxapi.com/v1", label: "DMXAPI_GLOBAL", provider: ""},
|
|
||||||
{val: "https://ai.juguang.chat/v1", label: "聚光AI(juguang CN)", provider: ""},
|
|
||||||
{val: "https://openrouter.ai/api/v1", label: "OpenRouter", provider: ""},
|
|
||||||
{val: "http://127.0.0.1:1234/v1", label: "LM Studio", provider: ""},
|
|
||||||
{val: "http://127.0.0.1:11434/v1", label: "Ollama", provider: "ollama"}
|
|
||||||
];
|
|
||||||
|
|
||||||
const PlatformSelector = {
|
|
||||||
props: ['platform', 'baseUrl', 'apiKey', 'modelId', 'provider', 't', 'prefix', 'invalidApiKey', 'invalidBaseUrl', 'invalidModelId'],
|
|
||||||
template: `
|
template: `
|
||||||
<div>
|
<div>
|
||||||
<div class="mb-2">
|
|
||||||
<label class="form-label">{{ t('platformLabel') }}</label>
|
|
||||||
<select class="form-select" :value="platform" @change="handlePlatformChange($event.target.value)">
|
|
||||||
<option v-for="p in platforms" :value="p.val">{{ p.val === 'custom' ? t(p.label) : p.label }}</option>
|
|
||||||
</select>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="mb-3" v-if="platform === 'custom'">
|
|
||||||
<label class="form-label">{{ t('providerLabel') }}</label>
|
|
||||||
<select class="form-select" :value="provider" @change="handleProviderChange($event.target.value)">
|
|
||||||
<option v-for="prov in providers" :value="prov">{{ prov }}</option>
|
|
||||||
</select>
|
|
||||||
</div>
|
|
||||||
|
|
||||||
<div class="form-text mb-3">Base URL: <code ref="baseUrlDisplay">{{ baseUrl }}</code></div>
|
|
||||||
<div class="mb-3" v-if="platform === 'custom'">
|
|
||||||
<label class="form-label">{{ t('baseUrlLabel') }}</label>
|
|
||||||
<input type="url" class="form-control" :class="{'is-invalid': invalidBaseUrl}"
|
|
||||||
:value="baseUrl" @input="handleBaseUrlChange($event.target.value)" required
|
|
||||||
placeholder="OpenAi Compatible URL">
|
|
||||||
</div>
|
|
||||||
<div class="mb-3">
|
<div class="mb-3">
|
||||||
<label class="form-label">API Key <a v-if="apiHref" :href="apiHref[0]" target="_blank" class="ms-1"><i
|
<label class="form-label">{{ t('modelPresetLabel') }}</label>
|
||||||
class="bi bi-box-arrow-up-right"></i></a> <span
|
<select class="form-select" :class="{'is-invalid': invalidModelPreset}"
|
||||||
class="ms-2 text-muted small">{{ apiHref && apiHref[1] ? t(apiHref[1]) : '' }}</span></label>
|
:value="modelPreset" :disabled="!presets.length"
|
||||||
<div class="input-group">
|
@change="handlePresetChange($event.target.value)">
|
||||||
<input :type="showPass?'text':'password'" class="form-control" :class="{'is-invalid': invalidApiKey}"
|
<option value="" disabled>{{ presets.length ? t('modelPresetPlaceholder') : t('modelPresetEmpty') }}</option>
|
||||||
:value="apiKey" @input="handleApiKeyChange($event.target.value)"
|
<option v-for="preset in presets" :key="preset.id" :value="preset.id">{{ preset.label }}</option>
|
||||||
:placeholder="t('apiKeyPlaceholder')">
|
</select>
|
||||||
<button class="btn btn-outline-secondary" type="button" @click="showPass=!showPass"><i class="bi"
|
<div class="form-text mt-2" v-if="presets.length">{{ t('modelPresetRuntimeHint') }}</div>
|
||||||
:class="showPass?'bi-eye':'bi-eye-slash'"></i>
|
<div class="form-text mt-2" v-else>{{ t('modelPresetEmpty') }}</div>
|
||||||
</button>
|
|
||||||
</div>
|
|
||||||
</div>
|
|
||||||
<div class="mb-3">
|
|
||||||
<label class="form-label">{{ t('modelIdLabel') }}</label>
|
|
||||||
<input type="text" class="form-control" :class="{'is-invalid': invalidModelId}"
|
|
||||||
:value="modelId" @input="handleModelChange($event.target.value)" required
|
|
||||||
:placeholder="t('modelIdPlaceholder')">
|
|
||||||
</div>
|
</div>
|
||||||
</div>`,
|
</div>`,
|
||||||
setup(props, {emit}) {
|
setup(props, {emit}) {
|
||||||
const showPass = ref(false);
|
const handlePresetChange = (val) => {
|
||||||
const platforms = KNOWN_PLATFORMS;
|
emit('update:modelPreset', val);
|
||||||
|
emit('clearError', 'model_preset');
|
||||||
// ProviderType Literal values
|
localStorage.setItem('translator_model_preset', val);
|
||||||
const providers = [
|
|
||||||
"default", "ollama", "bigmodel", "aliyuncs", "volces", "google", "siliconflow"
|
|
||||||
];
|
|
||||||
|
|
||||||
const apiHrefMap = {
|
|
||||||
"https://api.302.ai/v1": ["https://share.302.ai/BgRLAe", "apiHrefInfo302ai"],
|
|
||||||
"https://openrouter.ai/api/v1": ["https://openrouter.ai/settings/keys", null],
|
|
||||||
"https://api.openai.com/v1": ["https://platform.openai.com/api-keys", null],
|
|
||||||
"https://api.deepseek.com/v1": ["https://platform.deepseek.com/api_keys", null],
|
|
||||||
"https://open.bigmodel.cn/api/paas/v4": ["https://open.bigmodel.cn/usercenter/apikeys", null],
|
|
||||||
"https://dashscope.aliyuncs.com/compatible-mode/v1": ["https://bailian.console.aliyun.com/?tab=model#/api-key", null],
|
|
||||||
"https://ark.cn-beijing.volces.com/api/v3": ["https://console.volcengine.com/ark/region:ark+cn-beijing/apiKey?apikey=%7B%7D", null],
|
|
||||||
"https://api.siliconflow.cn/v1": ["https://cloud.siliconflow.cn/account/ak", null],
|
|
||||||
"https://ai.juguang.chat/v1": ["https://ai.juguang.chat/console/token", null],
|
|
||||||
"https://www.dmxapi.cn/v1": ["https://www.dmxapi.cn/token", null],
|
|
||||||
"https://www.dmxapi.com/v1": ["https://www.dmxapi.com/console/token", null],
|
|
||||||
"https://generativelanguage.googleapis.com/v1beta/openai/": ["https://aistudio.google.com/u/0/apikey", null]
|
|
||||||
};
|
|
||||||
const apiHref = computed(() => apiHrefMap[props.baseUrl]);
|
|
||||||
|
|
||||||
const save = (key, val) => localStorage.setItem(key, val);
|
|
||||||
const handlePlatformChange = (val) => {
|
|
||||||
emit('update:platform', val);
|
|
||||||
save(`${props.prefix}_last_platform`, val);
|
|
||||||
|
|
||||||
// Determine provider based on platform
|
|
||||||
const selected = platforms.find(p => p.val === val);
|
|
||||||
if (val === 'custom') {
|
|
||||||
emit('update:provider', 'default');
|
|
||||||
} else if (selected) {
|
|
||||||
emit('update:provider', selected.provider);
|
|
||||||
}
|
|
||||||
};
|
|
||||||
const handleBaseUrlChange = (val) => {
|
|
||||||
emit('update:baseUrl', val);
|
|
||||||
emit('clearError', 'base_url');
|
|
||||||
if (props.platform === 'custom') save(`${props.prefix}_custom_base_url`, val);
|
|
||||||
};
|
|
||||||
const handleApiKeyChange = (val) => {
|
|
||||||
emit('update:apiKey', val);
|
|
||||||
emit('clearError', 'api_key');
|
|
||||||
save(`${props.prefix}_${props.platform}_apikey`, val);
|
|
||||||
};
|
|
||||||
const handleModelChange = (val) => {
|
|
||||||
emit('update:modelId', val);
|
|
||||||
emit('clearError', 'model_id');
|
|
||||||
save(`${props.prefix}_${props.platform}_model_id`, val);
|
|
||||||
};
|
|
||||||
const handleProviderChange = (val) => {
|
|
||||||
emit('update:provider', val);
|
|
||||||
save(`${props.prefix}_${props.platform}_provider`, val);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
return {
|
return {
|
||||||
showPass,
|
handlePresetChange
|
||||||
platforms,
|
|
||||||
providers,
|
|
||||||
apiHref,
|
|
||||||
handlePlatformChange,
|
|
||||||
handleBaseUrlChange,
|
|
||||||
handleApiKeyChange,
|
|
||||||
handleModelChange,
|
|
||||||
handleProviderChange
|
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
@@ -1375,15 +1028,24 @@
|
|||||||
];
|
];
|
||||||
|
|
||||||
createApp({
|
createApp({
|
||||||
components: {SliderControl, PlatformSelector},
|
components: {SliderControl, ModelPresetSelector},
|
||||||
setup() {
|
setup() {
|
||||||
const version = ref("");
|
const version = ref("");
|
||||||
const currentLang = ref(localStorage.getItem('ui_language') || 'zh');
|
function detectBrowserLang() {
|
||||||
|
const nav = navigator.language || navigator.userLanguage || '';
|
||||||
|
const lang = nav.split('-')[0].toLowerCase();
|
||||||
|
if (['zh', 'en', 'id'].includes(lang)) return lang;
|
||||||
|
if (lang === 'zh') return 'zh';
|
||||||
|
return 'en'; // default to English for unrecognized languages
|
||||||
|
}
|
||||||
|
const currentLang = ref(localStorage.getItem('ui_language') || detectBrowserLang());
|
||||||
const i18nData = ref({});
|
const i18nData = ref({});
|
||||||
const glossaryData = ref({});
|
const glossaryData = ref({});
|
||||||
const tasks = ref([]);
|
const tasks = ref([]);
|
||||||
const enginList = ref([]);
|
const enginList = ref([]);
|
||||||
const defaultParams = reactive({});
|
const defaultParams = reactive({});
|
||||||
|
const modelPresets = ref([]);
|
||||||
|
const defaultModelPreset = ref('');
|
||||||
|
|
||||||
// Refs for DOM elements
|
// Refs for DOM elements
|
||||||
const glossaryInput = ref(null);
|
const glossaryInput = ref(null);
|
||||||
@@ -1398,9 +1060,7 @@
|
|||||||
|
|
||||||
// Validation State
|
// Validation State
|
||||||
const errors = reactive({
|
const errors = reactive({
|
||||||
model_id: false,
|
model_preset: false,
|
||||||
api_key: false,
|
|
||||||
base_url: false,
|
|
||||||
mineru_token: false,
|
mineru_token: false,
|
||||||
mineru_deploy_base_url: false,
|
mineru_deploy_base_url: false,
|
||||||
custom_to_lang: false,
|
custom_to_lang: false,
|
||||||
@@ -1430,11 +1090,7 @@
|
|||||||
formula_ocr: true,
|
formula_ocr: true,
|
||||||
code_ocr: true,
|
code_ocr: true,
|
||||||
skip_translate: false,
|
skip_translate: false,
|
||||||
platform: 'https://api.302.ai/v1',
|
model_preset: '',
|
||||||
base_url: '',
|
|
||||||
api_key: '',
|
|
||||||
model_id: '',
|
|
||||||
provider: 'api.openai.com', // Default provider
|
|
||||||
system_proxy_enable: false,
|
system_proxy_enable: false,
|
||||||
force_json: false,
|
force_json: false,
|
||||||
to_lang: 'Simplified Chinese',
|
to_lang: 'Simplified Chinese',
|
||||||
@@ -1447,25 +1103,6 @@
|
|||||||
retry: 3,
|
retry: 3,
|
||||||
rpm: null, // New RPM
|
rpm: null, // New RPM
|
||||||
tpm: null, // New TPM
|
tpm: null, // New TPM
|
||||||
glossary_generate_enable: false,
|
|
||||||
glossary_agent_custom_prompt: '',
|
|
||||||
glossary_agent_config_choice: 'same',
|
|
||||||
glossary_agent_platform: 'https://api.302.ai/v1',
|
|
||||||
glossary_agent_baseurl: '',
|
|
||||||
glossary_agent_key: '',
|
|
||||||
glossary_agent_model_id: '',
|
|
||||||
glossary_agent_provider: 'api.openai.com', // Default glossary provider
|
|
||||||
glossary_agent_to_lang: 'Simplified Chinese',
|
|
||||||
glossary_agent_custom_to_lang: '',
|
|
||||||
glossary_agent_chunk_size: 1000,
|
|
||||||
glossary_agent_concurrent: 5,
|
|
||||||
glossary_agent_temperature: 0.7,
|
|
||||||
glossary_agent_retry: 3,
|
|
||||||
glossary_agent_thinking: 'default',
|
|
||||||
glossary_agent_system_proxy_enable: false,
|
|
||||||
glossary_agent_force_json: false,
|
|
||||||
glossary_agent_rpm: null, // New Glossary Agent RPM
|
|
||||||
glossary_agent_tpm: null // New Glossary Agent TPM
|
|
||||||
});
|
});
|
||||||
|
|
||||||
// Nested Params for specific workflows
|
// Nested Params for specific workflows
|
||||||
@@ -1496,8 +1133,12 @@
|
|||||||
const v = localStorage.getItem(k);
|
const v = localStorage.getItem(k);
|
||||||
return (v === null || v === '' || v === 'null') ? null : Number(v);
|
return (v === null || v === '' || v === 'null') ? null : Number(v);
|
||||||
};
|
};
|
||||||
|
const validPresetIds = modelPresets.value.map(p => p.id);
|
||||||
|
const fallbackPreset = validPresetIds.includes(defaultModelPreset.value)
|
||||||
|
? defaultModelPreset.value
|
||||||
|
: (validPresetIds[0] || '');
|
||||||
|
|
||||||
form.workflow_type = get('translator_last_workflow', 'markdown_based');
|
form.workflow_type = get('translator_last_workflow', 'docx');
|
||||||
form.auto_workflow_enabled = getBool('translator_auto_workflow_enabled', true);
|
form.auto_workflow_enabled = getBool('translator_auto_workflow_enabled', true);
|
||||||
form.convert_engine = get('translator_convert_engin', 'mineru');
|
form.convert_engine = get('translator_convert_engin', 'mineru');
|
||||||
form.mineru_token = get('translator_mineru_token', '');
|
form.mineru_token = get('translator_mineru_token', '');
|
||||||
@@ -1516,12 +1157,12 @@
|
|||||||
form.formula_ocr = getBool('translator_formula_ocr', true);
|
form.formula_ocr = getBool('translator_formula_ocr', true);
|
||||||
form.code_ocr = getBool('translator_code_ocr', true);
|
form.code_ocr = getBool('translator_code_ocr', true);
|
||||||
form.skip_translate = getBool('translator_skip_translate', false);
|
form.skip_translate = getBool('translator_skip_translate', false);
|
||||||
form.platform = get('translator_platform_last_platform', 'https://api.302.ai/v1');
|
form.model_preset = get('translator_model_preset', fallbackPreset);
|
||||||
form.system_proxy_enable = getBool('translator_system_proxy_enable', false);
|
form.system_proxy_enable = getBool('translator_system_proxy_enable', false);
|
||||||
form.force_json = getBool('translator_force_json', false);
|
form.force_json = getBool('translator_force_json', false);
|
||||||
form.to_lang = get('translator_to_lang', 'Simplified Chinese');
|
form.to_lang = get('translator_to_lang', 'Simplified Chinese');
|
||||||
form.custom_to_lang = get('translator_custom_to_lang', '');
|
form.custom_to_lang = get('translator_custom_to_lang', '');
|
||||||
form.thinking = get('translator_thinking_mode', 'disable');
|
form.thinking = get('translator_thinking_mode', 'default');
|
||||||
form.custom_prompt = get('custom_prompt', '');
|
form.custom_prompt = get('custom_prompt', '');
|
||||||
form.chunk_size = getNum('chunk_size', 1000);
|
form.chunk_size = getNum('chunk_size', 1000);
|
||||||
form.concurrent = getNum('concurrent', 5);
|
form.concurrent = getNum('concurrent', 5);
|
||||||
@@ -1530,37 +1171,8 @@
|
|||||||
form.rpm = getNumOrNull('rpm'); // Load RPM
|
form.rpm = getNumOrNull('rpm'); // Load RPM
|
||||||
form.tpm = getNumOrNull('tpm'); // Load TPM
|
form.tpm = getNumOrNull('tpm'); // Load TPM
|
||||||
|
|
||||||
// Determine Provider
|
if (!validPresetIds.includes(form.model_preset)) {
|
||||||
const platObj = KNOWN_PLATFORMS.find(p => p.val === form.platform);
|
form.model_preset = fallbackPreset;
|
||||||
if (form.platform === 'custom') {
|
|
||||||
// 修正:读取组件实际保存的 Key (translator_platform_custom_provider)
|
|
||||||
form.provider = get('translator_platform_custom_provider', 'default');
|
|
||||||
} else {
|
|
||||||
form.provider = platObj ? platObj.provider : '';
|
|
||||||
}
|
|
||||||
|
|
||||||
form.glossary_generate_enable = getBool('glossary_generate_enable', false);
|
|
||||||
form.glossary_agent_custom_prompt = get('glossary_agent_custom_prompt', '');
|
|
||||||
form.glossary_agent_config_choice = get('glossary_agent_config_choice', 'same');
|
|
||||||
form.glossary_agent_platform = get('glossary_agent_platform_last_platform', 'https://api.302.ai/v1');
|
|
||||||
form.glossary_agent_to_lang = get('glossary_agent_to_lang', 'Simplified Chinese');
|
|
||||||
form.glossary_agent_custom_to_lang = get('glossary_agent_custom_to_lang', '');
|
|
||||||
form.glossary_agent_chunk_size = getNum('glossary_agent_chunk_size', 1000);
|
|
||||||
form.glossary_agent_concurrent = getNum('glossary_agent_concurrent', 5);
|
|
||||||
form.glossary_agent_temperature = getNum('glossary_agent_temperature', 0.7);
|
|
||||||
form.glossary_agent_retry = getNum('glossary_agent_retry', 3);
|
|
||||||
form.glossary_agent_thinking = get('glossary_agent_thinking_mode', 'default');
|
|
||||||
form.glossary_agent_system_proxy_enable = getBool('glossary_agent_system_proxy_enable', false);
|
|
||||||
form.glossary_agent_force_json = getBool('glossary_agent_force_json', false);
|
|
||||||
form.glossary_agent_rpm = getNumOrNull('glossary_agent_rpm'); // Load Glossary RPM
|
|
||||||
form.glossary_agent_tpm = getNumOrNull('glossary_agent_tpm'); // Load Glossary TPM
|
|
||||||
|
|
||||||
// Determine Glossary Provider
|
|
||||||
const gPlatObj = KNOWN_PLATFORMS.find(p => p.val === form.glossary_agent_platform);
|
|
||||||
if (form.glossary_agent_platform === 'custom') {
|
|
||||||
form.glossary_agent_provider = get('glossary_agent_platform_custom_provider', 'default');
|
|
||||||
} else {
|
|
||||||
form.glossary_agent_provider = gPlatObj ? gPlatObj.provider : '';
|
|
||||||
}
|
}
|
||||||
|
|
||||||
// Restore workflow specific params
|
// Restore workflow specific params
|
||||||
@@ -1571,10 +1183,6 @@
|
|||||||
workflowParams.txt.segment_mode = get('translator_txt_segment_mode', 'line');
|
workflowParams.txt.segment_mode = get('translator_txt_segment_mode', 'line');
|
||||||
workflowParams.xlsx.translate_regions = get('translator_xlsx_translate_regions', '');
|
workflowParams.xlsx.translate_regions = get('translator_xlsx_translate_regions', '');
|
||||||
workflowParams.json.json_paths = get('translator_json_paths', '');
|
workflowParams.json.json_paths = get('translator_json_paths', '');
|
||||||
|
|
||||||
// Trigger platform updates to load API keys/models
|
|
||||||
updatePlatformParams(form.platform, 'translator_platform', form);
|
|
||||||
updatePlatformParams(form.glossary_agent_platform, 'glossary_agent_platform', form, true);
|
|
||||||
};
|
};
|
||||||
|
|
||||||
// --- 新增:专门用于将当前 form 数据全部写入 localStorage 的函数 ---
|
// --- 新增:专门用于将当前 form 数据全部写入 localStorage 的函数 ---
|
||||||
@@ -1604,7 +1212,7 @@
|
|||||||
s('translator_formula_ocr', f.formula_ocr);
|
s('translator_formula_ocr', f.formula_ocr);
|
||||||
s('translator_code_ocr', f.code_ocr);
|
s('translator_code_ocr', f.code_ocr);
|
||||||
s('translator_skip_translate', f.skip_translate);
|
s('translator_skip_translate', f.skip_translate);
|
||||||
s('translator_platform_last_platform', f.platform);
|
s('translator_model_preset', f.model_preset);
|
||||||
s('translator_system_proxy_enable', f.system_proxy_enable);
|
s('translator_system_proxy_enable', f.system_proxy_enable);
|
||||||
s('translator_force_json', f.force_json);
|
s('translator_force_json', f.force_json);
|
||||||
s('translator_to_lang', f.to_lang);
|
s('translator_to_lang', f.to_lang);
|
||||||
@@ -1620,36 +1228,7 @@
|
|||||||
s('rpm', f.rpm || '');
|
s('rpm', f.rpm || '');
|
||||||
s('tpm', f.tpm || '');
|
s('tpm', f.tpm || '');
|
||||||
|
|
||||||
// 平台相关 (API Key 等)
|
// 2. 自动循环保存所有具体工作流参数 (txt, docx, xlsx...)
|
||||||
s(`translator_platform_${f.platform}_apikey`, f.api_key);
|
|
||||||
s(`translator_platform_${f.platform}_model_id`, f.model_id);
|
|
||||||
s('translator_provider', f.provider);
|
|
||||||
if (f.platform === 'custom') s('translator_platform_custom_base_url', f.base_url);
|
|
||||||
|
|
||||||
// 2. 术语表相关
|
|
||||||
s('glossary_generate_enable', f.glossary_generate_enable);
|
|
||||||
s('glossary_agent_custom_prompt', f.glossary_agent_custom_prompt);
|
|
||||||
s('glossary_agent_config_choice', f.glossary_agent_config_choice);
|
|
||||||
s('glossary_agent_platform_last_platform', f.glossary_agent_platform);
|
|
||||||
s('glossary_agent_to_lang', f.glossary_agent_to_lang);
|
|
||||||
s('glossary_agent_custom_to_lang', f.glossary_agent_custom_to_lang);
|
|
||||||
s('glossary_agent_chunk_size', f.glossary_agent_chunk_size);
|
|
||||||
s('glossary_agent_concurrent', f.glossary_agent_concurrent);
|
|
||||||
s('glossary_agent_temperature', f.glossary_agent_temperature);
|
|
||||||
s('glossary_agent_retry', f.glossary_agent_retry);
|
|
||||||
s('glossary_agent_thinking_mode', f.glossary_agent_thinking);
|
|
||||||
s('glossary_agent_system_proxy_enable', f.glossary_agent_system_proxy_enable);
|
|
||||||
s('glossary_agent_force_json', f.glossary_agent_force_json);
|
|
||||||
s('glossary_agent_rpm', f.glossary_agent_rpm || '');
|
|
||||||
s('glossary_agent_tpm', f.glossary_agent_tpm || '');
|
|
||||||
|
|
||||||
// 术语表平台 Key
|
|
||||||
s(`glossary_agent_platform_${f.glossary_agent_platform}_apikey`, f.glossary_agent_key);
|
|
||||||
s(`glossary_agent_platform_${f.glossary_agent_platform}_model_id`, f.glossary_agent_model_id);
|
|
||||||
s('glossary_agent_provider', f.glossary_agent_provider);
|
|
||||||
if (f.glossary_agent_platform === 'custom') s('glossary_agent_platform_custom_base_url', f.glossary_agent_baseurl);
|
|
||||||
|
|
||||||
// 3. 自动循环保存所有具体工作流参数 (txt, docx, xlsx...)
|
|
||||||
for (const [wfType, params] of Object.entries(workflowParams)) {
|
for (const [wfType, params] of Object.entries(workflowParams)) {
|
||||||
for (const [key, val] of Object.entries(params)) {
|
for (const [key, val] of Object.entries(params)) {
|
||||||
s(`translator_${wfType}_${key}`, val);
|
s(`translator_${wfType}_${key}`, val);
|
||||||
@@ -1657,34 +1236,22 @@
|
|||||||
}
|
}
|
||||||
};
|
};
|
||||||
|
|
||||||
const updatePlatformParams = (plat, prefix, target, isGlossary = false) => {
|
|
||||||
const get = (k) => localStorage.getItem(k) || '';
|
|
||||||
if (isGlossary) {
|
|
||||||
target.glossary_agent_key = get(`${prefix}_${plat}_apikey`);
|
|
||||||
target.glossary_agent_model_id = get(`${prefix}_${plat}_model_id`);
|
|
||||||
target.glossary_agent_baseurl = plat === 'custom' ? get(`${prefix}_custom_base_url`) : plat;
|
|
||||||
} else {
|
|
||||||
target.api_key = get(`${prefix}_${plat}_apikey`);
|
|
||||||
target.model_id = get(`${prefix}_${plat}_model_id`);
|
|
||||||
target.base_url = plat === 'custom' ? get(`${prefix}_custom_base_url`) : plat;
|
|
||||||
}
|
|
||||||
};
|
|
||||||
|
|
||||||
watch(() => form.platform, (n) => {
|
|
||||||
updatePlatformParams(n, 'translator_platform', form);
|
|
||||||
});
|
|
||||||
|
|
||||||
watch(() => form.glossary_agent_platform, (n) => {
|
|
||||||
updatePlatformParams(n, 'glossary_agent_platform', form, true);
|
|
||||||
});
|
|
||||||
|
|
||||||
const t = (k) => {
|
const t = (k) => {
|
||||||
const dict = i18nData.value[currentLang.value] || i18nData.value['zh'] || {};
|
const dict = i18nData.value[currentLang.value] || i18nData.value['en'] || {};
|
||||||
return dict[k] || k;
|
return dict[k] || k;
|
||||||
};
|
};
|
||||||
const capitalize = (s) => s.charAt(0).toUpperCase() + s.slice(1);
|
const capitalize = (s) => s.charAt(0).toUpperCase() + s.slice(1);
|
||||||
const saveSetting = (k, v) => localStorage.setItem(k, v);
|
const saveSetting = (k, v) => localStorage.setItem(k, v);
|
||||||
const saveSettingArray = (k, v) => localStorage.setItem(k, JSON.stringify(v));
|
const saveSettingArray = (k, v) => localStorage.setItem(k, JSON.stringify(v));
|
||||||
|
const syncModelPresetSelection = () => {
|
||||||
|
const validPresetIds = modelPresets.value.map(p => p.id);
|
||||||
|
const fallbackPreset = validPresetIds.includes(defaultModelPreset.value)
|
||||||
|
? defaultModelPreset.value
|
||||||
|
: (validPresetIds[0] || '');
|
||||||
|
if (!validPresetIds.includes(form.model_preset)) {
|
||||||
|
form.model_preset = fallbackPreset;
|
||||||
|
}
|
||||||
|
};
|
||||||
|
|
||||||
const saveWorkflowParam = (keySuffix) => {
|
const saveWorkflowParam = (keySuffix) => {
|
||||||
const wf = form.workflow_type;
|
const wf = form.workflow_type;
|
||||||
@@ -1751,12 +1318,11 @@
|
|||||||
// Dynamic Step Numbering
|
// Dynamic Step Numbering
|
||||||
const stepMap = computed(() => {
|
const stepMap = computed(() => {
|
||||||
let step = 2;
|
let step = 2;
|
||||||
const map = {specific: 0, parsing: 0, ai: 0, trans: 0, glossary: 0};
|
const map = {specific: 0, parsing: 0, ai: 0, trans: 0};
|
||||||
if (currentWorkflowConfig.value) map.specific = step++;
|
if (currentWorkflowConfig.value) map.specific = step++;
|
||||||
if (form.workflow_type === 'markdown_based') map.parsing = step++;
|
if (form.workflow_type === 'markdown_based') map.parsing = step++;
|
||||||
map.ai = step++;
|
map.ai = step++;
|
||||||
if (!form.skip_translate) map.trans = step++;
|
if (!form.skip_translate) map.trans = step++;
|
||||||
map.glossary = step++;
|
|
||||||
return map;
|
return map;
|
||||||
});
|
});
|
||||||
|
|
||||||
@@ -1861,10 +1427,7 @@
|
|||||||
// Clone basic form
|
// Clone basic form
|
||||||
const basePayload = {
|
const basePayload = {
|
||||||
skip_translate: form.skip_translate,
|
skip_translate: form.skip_translate,
|
||||||
base_url: emptyToNull(form.base_url),
|
model_preset: emptyToNull(form.model_preset),
|
||||||
api_key: form.api_key || "",
|
|
||||||
model_id: emptyToNull(form.model_id),
|
|
||||||
provider: emptyToNull(form.provider), // Add provider
|
|
||||||
to_lang: form.to_lang === 'custom' ? form.custom_to_lang : form.to_lang,
|
to_lang: form.to_lang === 'custom' ? form.custom_to_lang : form.to_lang,
|
||||||
thinking: form.thinking,
|
thinking: form.thinking,
|
||||||
chunk_size: Number(form.chunk_size),
|
chunk_size: Number(form.chunk_size),
|
||||||
@@ -1875,33 +1438,11 @@
|
|||||||
glossary_dict: Object.keys(glossaryData.value).length ? glossaryData.value : null,
|
glossary_dict: Object.keys(glossaryData.value).length ? glossaryData.value : null,
|
||||||
system_proxy_enable: form.system_proxy_enable,
|
system_proxy_enable: form.system_proxy_enable,
|
||||||
force_json: form.force_json,
|
force_json: form.force_json,
|
||||||
glossary_generate_enable: form.glossary_generate_enable,
|
|
||||||
workflow_type: form.workflow_type,
|
workflow_type: form.workflow_type,
|
||||||
rpm: emptyToNull(form.rpm),
|
rpm: emptyToNull(form.rpm),
|
||||||
tpm: emptyToNull(form.tpm)
|
tpm: emptyToNull(form.tpm)
|
||||||
};
|
};
|
||||||
|
|
||||||
// Agent Config
|
|
||||||
if (basePayload.glossary_generate_enable) {
|
|
||||||
const isCustom = form.glossary_agent_config_choice === 'custom';
|
|
||||||
basePayload.glossary_agent_config = {
|
|
||||||
base_url: isCustom ? emptyToNull(form.glossary_agent_baseurl) : basePayload.base_url,
|
|
||||||
api_key: isCustom ? (form.glossary_agent_key || "") : basePayload.api_key,
|
|
||||||
model_id: isCustom ? emptyToNull(form.glossary_agent_model_id) : basePayload.model_id,
|
|
||||||
provider: isCustom ? emptyToNull(form.glossary_agent_provider) : basePayload.provider, // Add provider
|
|
||||||
to_lang: isCustom ? (form.glossary_agent_to_lang === 'custom' ? form.glossary_agent_custom_to_lang : form.glossary_agent_to_lang) : basePayload.to_lang,
|
|
||||||
custom_prompt: emptyToNull(form.glossary_agent_custom_prompt),
|
|
||||||
temperature: isCustom ? Number(form.glossary_agent_temperature) : basePayload.temperature,
|
|
||||||
concurrent: isCustom ? Number(form.glossary_agent_concurrent) : basePayload.concurrent,
|
|
||||||
retry: isCustom ? Number(form.glossary_agent_retry) : basePayload.retry,
|
|
||||||
thinking: isCustom ? form.glossary_agent_thinking : basePayload.thinking,
|
|
||||||
system_proxy_enable: isCustom ? form.glossary_agent_system_proxy_enable : basePayload.system_proxy_enable,
|
|
||||||
chunk_size: isCustom ? Number(form.glossary_agent_chunk_size) : basePayload.chunk_size,
|
|
||||||
force_json: isCustom ? form.glossary_agent_force_json : basePayload.force_json,
|
|
||||||
rpm: isCustom ? emptyToNull(form.glossary_agent_rpm) : basePayload.rpm,
|
|
||||||
tpm: isCustom ? emptyToNull(form.glossary_agent_tpm) : basePayload.tpm
|
|
||||||
};
|
|
||||||
}
|
|
||||||
|
|
||||||
// Specific Workflow Params
|
// Specific Workflow Params
|
||||||
if (form.workflow_type === 'markdown_based') {
|
if (form.workflow_type === 'markdown_based') {
|
||||||
@@ -1959,12 +1500,8 @@
|
|||||||
Object.keys(errors).forEach(k => errors[k] = false);
|
Object.keys(errors).forEach(k => errors[k] = false);
|
||||||
|
|
||||||
if (!form.skip_translate) {
|
if (!form.skip_translate) {
|
||||||
if (!form.model_id) {
|
if (!form.model_preset) {
|
||||||
errors.model_id = true;
|
errors.model_preset = true;
|
||||||
isValid = false;
|
|
||||||
}
|
|
||||||
if (form.platform === 'custom' && !form.base_url) {
|
|
||||||
errors.base_url = true;
|
|
||||||
isValid = false;
|
isValid = false;
|
||||||
}
|
}
|
||||||
if (form.to_lang === 'custom' && !form.custom_to_lang) {
|
if (form.to_lang === 'custom' && !form.custom_to_lang) {
|
||||||
@@ -2303,6 +1840,10 @@
|
|||||||
const data = JSON.parse(ev.target.result);
|
const data = JSON.parse(ev.target.result);
|
||||||
if (data.form) Object.assign(form, data.form);
|
if (data.form) Object.assign(form, data.form);
|
||||||
if (data.workflowParams) Object.assign(workflowParams, data.workflowParams);
|
if (data.workflowParams) Object.assign(workflowParams, data.workflowParams);
|
||||||
|
['platform', 'base_url', 'api_key', 'model_id', 'provider'].forEach((key) => {
|
||||||
|
if (key in form) delete form[key];
|
||||||
|
});
|
||||||
|
syncModelPresetSelection();
|
||||||
saveAllSettings();
|
saveAllSettings();
|
||||||
alert(t('configImportSuccess'));
|
alert(t('configImportSuccess'));
|
||||||
} catch (err) {
|
} catch (err) {
|
||||||
@@ -2317,7 +1858,10 @@
|
|||||||
const setLang = (l) => {
|
const setLang = (l) => {
|
||||||
currentLang.value = l;
|
currentLang.value = l;
|
||||||
localStorage.setItem('ui_language', l);
|
localStorage.setItem('ui_language', l);
|
||||||
document.documentElement.lang = l === 'zh' ? 'zh-CN' : 'en';
|
const langMap = {zh: 'zh-CN', en: 'en', id: 'id'};
|
||||||
|
document.documentElement.lang = langMap[l] || 'en';
|
||||||
|
const dict = i18nData.value[l] || i18nData.value['en'] || {};
|
||||||
|
document.title = dict['pageTitle'] || 'DocuTranslate';
|
||||||
};
|
};
|
||||||
const setTheme = (t) => {
|
const setTheme = (t) => {
|
||||||
localStorage.setItem('theme', t);
|
localStorage.setItem('theme', t);
|
||||||
@@ -2331,6 +1875,12 @@
|
|||||||
const res = await fetch("/static/i18nData.json");
|
const res = await fetch("/static/i18nData.json");
|
||||||
i18nData.value = await res.json();
|
i18nData.value = await res.json();
|
||||||
|
|
||||||
|
// Backward compat: ensure id lang pack exists on servers not yet updated
|
||||||
|
if (!i18nData.value.id) {
|
||||||
|
console.warn('id language pack missing, using en as fallback');
|
||||||
|
i18nData.value.id = i18nData.value.en || {};
|
||||||
|
}
|
||||||
|
|
||||||
// Add new missing translations for Mineru Deploy
|
// Add new missing translations for Mineru Deploy
|
||||||
const extraZh = {
|
const extraZh = {
|
||||||
mineruDeployParseMethodLabel: "解析方法 (Parse Method)",
|
mineruDeployParseMethodLabel: "解析方法 (Parse Method)",
|
||||||
@@ -2344,6 +1894,7 @@
|
|||||||
if(i18nData.value.en) Object.assign(i18nData.value.en, extraEn);
|
if(i18nData.value.en) Object.assign(i18nData.value.en, extraEn);
|
||||||
|
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
|
console.error("i18n load failed", e);
|
||||||
i18nData.value = {
|
i18nData.value = {
|
||||||
zh: {
|
zh: {
|
||||||
pageTitle: "DocuTranslate",
|
pageTitle: "DocuTranslate",
|
||||||
@@ -2351,6 +1902,10 @@
|
|||||||
projectContributeBtn: "项目协作",
|
projectContributeBtn: "项目协作",
|
||||||
workflowTitle: "选择工作流",
|
workflowTitle: "选择工作流",
|
||||||
autoWorkflowLabel: "自动选择工作流",
|
autoWorkflowLabel: "自动选择工作流",
|
||||||
|
modelPresetLabel: "模型预设",
|
||||||
|
modelPresetPlaceholder: "请选择模型预设",
|
||||||
|
modelPresetEmpty: "请先在服务端环境变量中配置模型预设",
|
||||||
|
modelPresetRuntimeHint: "运行时将从服务端环境变量读取供应商、模型端点与 API Key。",
|
||||||
workflowOptionPptx: "PPTX 演示文稿",
|
workflowOptionPptx: "PPTX 演示文稿",
|
||||||
pptxSettingsTitleText: "PPTX 设置",
|
pptxSettingsTitleText: "PPTX 设置",
|
||||||
mineruDeployServerUrlLabel: "Server URL",
|
mineruDeployServerUrlLabel: "Server URL",
|
||||||
@@ -2364,6 +1919,10 @@
|
|||||||
tutorialBtn: "Tutorial",
|
tutorialBtn: "Tutorial",
|
||||||
projectContributeBtn: "Contribute",
|
projectContributeBtn: "Contribute",
|
||||||
workflowTitle: "Select Workflow",
|
workflowTitle: "Select Workflow",
|
||||||
|
modelPresetLabel: "Model Preset",
|
||||||
|
modelPresetPlaceholder: "Select a model preset",
|
||||||
|
modelPresetEmpty: "Configure model presets in server environment variables first",
|
||||||
|
modelPresetRuntimeHint: "Provider, endpoint, and API key will be loaded from server environment variables at runtime.",
|
||||||
workflowOptionPptx: "PPTX Presentation",
|
workflowOptionPptx: "PPTX Presentation",
|
||||||
pptxSettingsTitleText: "PPTX Settings",
|
pptxSettingsTitleText: "PPTX Settings",
|
||||||
mineruDeployServerUrlLabel: "Server URL",
|
mineruDeployServerUrlLabel: "Server URL",
|
||||||
@@ -2371,19 +1930,54 @@
|
|||||||
mineruDeployServerUrlPlaceholder: "http://127.0.0.1:30000",
|
mineruDeployServerUrlPlaceholder: "http://127.0.0.1:30000",
|
||||||
mineruDeployParseMethodLabel: "Parse Method",
|
mineruDeployParseMethodLabel: "Parse Method",
|
||||||
mineruDeployTableEnableLabel: "Table Recognition"
|
mineruDeployTableEnableLabel: "Table Recognition"
|
||||||
|
},
|
||||||
|
id: {
|
||||||
|
pageTitle: "DocuTranslate",
|
||||||
|
tutorialBtn: "Tutorial",
|
||||||
|
projectContributeBtn: "Kolaborasi",
|
||||||
|
workflowTitle: "Pilih Alur Kerja",
|
||||||
|
autoWorkflowLabel: "Pilih Otomatis",
|
||||||
|
modelPresetLabel: "Preset Model",
|
||||||
|
modelPresetPlaceholder: "Pilih preset model",
|
||||||
|
modelPresetEmpty: "Konfigurasi preset di server",
|
||||||
|
modelPresetRuntimeHint: "Provider, endpoint, dan API key akan dibaca dari environment server.",
|
||||||
|
workflowOptionPptx: "Presentasi PPTX",
|
||||||
|
pptxSettingsTitleText: "Pengaturan PPTX",
|
||||||
|
mineruDeployServerUrlLabel: "Server URL",
|
||||||
|
mineruDeployLangListLabel: "Daftar Bahasa",
|
||||||
|
mineruDeployServerUrlPlaceholder: "http://127.0.0.1:30000",
|
||||||
|
mineruDeployParseMethodLabel: "Parse Method",
|
||||||
|
mineruDeployTableEnableLabel: "Table Recognition"
|
||||||
}
|
}
|
||||||
};
|
};
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// Set initial page title based on detected language
|
||||||
|
const initDict = i18nData.value[currentLang.value] || i18nData.value['en'] || {};
|
||||||
|
document.title = initDict['pageTitle'] || 'DocuTranslate';
|
||||||
|
|
||||||
// Backend Metadata
|
// Backend Metadata
|
||||||
try {
|
try {
|
||||||
const [metaRes, enginRes, paramsRes] = await Promise.all([
|
const [metaRes, enginRes, paramsRes, configRes] = await Promise.all([
|
||||||
fetch("/service/meta"), fetch('/service/engin-list'), fetch("/service/default-params")
|
fetch("/service/meta"), fetch('/service/engin-list'), fetch("/service/default-params"),
|
||||||
|
fetch("/api/config")
|
||||||
]);
|
]);
|
||||||
const meta = await metaRes.json();
|
const meta = await metaRes.json();
|
||||||
version.value = meta.version;
|
version.value = meta.version;
|
||||||
enginList.value = await enginRes.json();
|
enginList.value = await enginRes.json();
|
||||||
Object.assign(defaultParams, await paramsRes.json());
|
Object.assign(defaultParams, await paramsRes.json());
|
||||||
|
const envConfig = await configRes.json().catch(() => ({}));
|
||||||
|
modelPresets.value = Array.isArray(envConfig.model_presets) ? envConfig.model_presets : [];
|
||||||
|
defaultModelPreset.value = envConfig.default_model_preset || (modelPresets.value[0]?.id || '');
|
||||||
|
if (defaultModelPreset.value && !localStorage.getItem('translator_model_preset')) {
|
||||||
|
localStorage.setItem('translator_model_preset', defaultModelPreset.value);
|
||||||
|
}
|
||||||
|
if (envConfig.rpm != null && !localStorage.getItem('rpm')) {
|
||||||
|
localStorage.setItem('rpm', String(envConfig.rpm));
|
||||||
|
}
|
||||||
|
if (envConfig.tpm != null && !localStorage.getItem('tpm')) {
|
||||||
|
localStorage.setItem('tpm', String(envConfig.tpm));
|
||||||
|
}
|
||||||
} catch (e) {
|
} catch (e) {
|
||||||
console.error("Backend init failed", e);
|
console.error("Backend init failed", e);
|
||||||
}
|
}
|
||||||
@@ -2418,6 +2012,7 @@
|
|||||||
|
|
||||||
return {
|
return {
|
||||||
version, currentLang, i18nData, glossaryData, glossaryCount, tasks, enginList, defaultParams,
|
version, currentLang, i18nData, glossaryData, glossaryCount, tasks, enginList, defaultParams,
|
||||||
|
modelPresets,
|
||||||
form, workflowParams, showMineruToken, previewMode, syncScrollEnabled, showIdentityOption,
|
form, workflowParams, showMineruToken, previewMode, syncScrollEnabled, showIdentityOption,
|
||||||
errors, clearError,
|
errors, clearError,
|
||||||
t, createNewTask, removeTask, handleTaskFileSelect, handleTaskFileDrop, toggleTaskState,
|
t, createNewTask, removeTask, handleTaskFileSelect, handleTaskFileDrop, toggleTaskState,
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from collections import defaultdict
|
|||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Self, Literal, List, Dict, Any, Tuple
|
from typing import Self, Literal, List, Dict, Any, Tuple, Optional
|
||||||
|
|
||||||
import docx
|
import docx
|
||||||
from docx.document import Document as DocumentObject
|
from docx.document import Document as DocumentObject
|
||||||
@@ -24,19 +24,6 @@ from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTr
|
|||||||
|
|
||||||
# ---------------- 辅助函数 ----------------
|
# ---------------- 辅助函数 ----------------
|
||||||
|
|
||||||
# [v6.2] 定义一组具有显著视觉效果的格式标签。
|
|
||||||
# 我们只在 Run 包含这些格式时才将其视为空白格式边界。
|
|
||||||
# 这避免了因字体、字号等微小变化导致的过度文本切分。
|
|
||||||
SIGNIFICANT_STYLES = frozenset([
|
|
||||||
qn('w:u'), # 下划线
|
|
||||||
qn('w:strike'), # 删除线
|
|
||||||
qn('w:dstrike'), # 双删除线
|
|
||||||
qn('w:shd'), # 底纹/背景色
|
|
||||||
qn('w:highlight'), # 荧光笔高亮
|
|
||||||
qn('w:bdr'), # 边框
|
|
||||||
qn('w:effectLst'), # 文本效果 (如发光、阴影)
|
|
||||||
qn('w:em'), # 强调标记 (着重号)
|
|
||||||
])
|
|
||||||
|
|
||||||
|
|
||||||
def is_image_run(run: Run) -> bool:
|
def is_image_run(run: Run) -> bool:
|
||||||
@@ -45,31 +32,13 @@ def is_image_run(run: Run) -> bool:
|
|||||||
return '<w:drawing' in xml or '<w:pict' in xml
|
return '<w:drawing' in xml or '<w:pict' in xml
|
||||||
|
|
||||||
|
|
||||||
def is_formatting_only_run(run: Run) -> bool:
|
def is_instr_text_run(run: Run) -> bool:
|
||||||
"""
|
"""
|
||||||
检查一个 Run 是否仅用于格式化,不包含任何应被渲染的文本。
|
检查 Run 是否包含域指令文本 (w:instrText)。
|
||||||
这仅适用于其 .text 属性为 "" 的情况。
|
目录(TOC)、页码等功能的指令代码存储在此标签中。
|
||||||
|
必须跳过这些 Run,否则写入 text 会破坏域结构。
|
||||||
"""
|
"""
|
||||||
return run.text == ""
|
return run.element.find(qn('w:instrText')) is not None
|
||||||
|
|
||||||
|
|
||||||
# ---------- 新增修改部分 1: is_styled_whitespace_run 函数被移除 ----------
|
|
||||||
# 此函数不再需要,因为新的逻辑会根据格式变化来切分,而不是根据带格式的空格。
|
|
||||||
# ---------------------- 修改结束 ----------------------
|
|
||||||
|
|
||||||
def is_tab_run(run: Run) -> bool:
|
|
||||||
"""
|
|
||||||
检查一个 Run 是否主要代表一个制表符,应被视作格式边界。
|
|
||||||
仅当 Run 的文本内容为空或仅包含空白,且 XML 中存在 <w:tab/> 时,
|
|
||||||
才将其视为纯格式化用途的 Run。
|
|
||||||
"""
|
|
||||||
# .text 属性会将 <w:tab/> 转换成 '\t'
|
|
||||||
# 如果 .text 在去除空白后仍有内容,说明这个 Run 不仅仅是个制表符。
|
|
||||||
if run.text.strip():
|
|
||||||
return False
|
|
||||||
|
|
||||||
xml = getattr(run.element, 'xml', '')
|
|
||||||
return '<w:tab' in xml or '<w:ptab' in xml
|
|
||||||
|
|
||||||
|
|
||||||
# ---------------- 配置类 ----------------
|
# ---------------- 配置类 ----------------
|
||||||
@@ -77,6 +46,7 @@ def is_tab_run(run: Run) -> bool:
|
|||||||
class DocxTranslatorConfig(AiTranslatorConfig):
|
class DocxTranslatorConfig(AiTranslatorConfig):
|
||||||
insert_mode: Literal["replace", "append", "prepend"] = "replace"
|
insert_mode: Literal["replace", "append", "prepend"] = "replace"
|
||||||
separator: str = "\n"
|
separator: str = "\n"
|
||||||
|
office_password: Optional[str] = None
|
||||||
|
|
||||||
|
|
||||||
# ---------------- 主类 ----------------
|
# ---------------- 主类 ----------------
|
||||||
@@ -110,14 +80,6 @@ class DocxTranslator(AiTranslator):
|
|||||||
[v6.0 - 语义切分重构版]
|
[v6.0 - 语义切分重构版]
|
||||||
- 重构核心逻辑,不再跳过域结果,而是将其作为语义边界来切分文本,增强了鲁棒性。
|
- 重构核心逻辑,不再跳过域结果,而是将其作为语义边界来切分文本,增强了鲁棒性。
|
||||||
"""
|
"""
|
||||||
IGNORED_TAGS = {
|
|
||||||
qn('w:proofErr'), qn('w:lastRenderedPageBreak'), qn('w:bookmarkStart'),
|
|
||||||
qn('w:bookmarkEnd'), qn('w:commentRangeStart'), qn('w:commentRangeEnd'),
|
|
||||||
qn('w:del'), qn('w:ins'), qn('w:moveFrom'), qn('w:moveTo'),
|
|
||||||
}
|
|
||||||
RECURSIVE_CONTAINER_TAGS = {
|
|
||||||
qn('w:smartTag'), qn('w:sdtContent'), qn('w:hyperlink'),
|
|
||||||
}
|
|
||||||
|
|
||||||
def __init__(self, config: DocxTranslatorConfig):
|
def __init__(self, config: DocxTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
@@ -138,127 +100,99 @@ class DocxTranslator(AiTranslator):
|
|||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
|
self.office_password = config.office_password
|
||||||
|
|
||||||
# ---------- 新增修改部分 2: 增加用于比较格式的辅助函数 ----------
|
def _decrypt_if_needed(self, content: bytes) -> bytes:
|
||||||
def _get_significant_styles(self, run: Run) -> frozenset:
|
"""如果文件加密则解密,否则返回原内容。"""
|
||||||
"""从一个 Run 中提取“显著”格式标签的集合。"""
|
try:
|
||||||
if run is None:
|
import msoffcrypto
|
||||||
return frozenset()
|
from io import BytesIO as BIO
|
||||||
rPr = run.element.rPr
|
file_stream = BIO(content)
|
||||||
if rPr is None:
|
try:
|
||||||
return frozenset()
|
office_file = msoffcrypto.OfficeFile(file_stream)
|
||||||
return frozenset(child.tag for child in rPr if child.tag in SIGNIFICANT_STYLES)
|
if office_file.is_encrypted():
|
||||||
|
if not self.office_password:
|
||||||
|
raise ValueError("此DOCX文件已加密,但未提供密码。")
|
||||||
|
decrypted = BIO()
|
||||||
|
office_file.load_key(password=self.office_password)
|
||||||
|
office_file.decrypt(decrypted)
|
||||||
|
return decrypted.getvalue()
|
||||||
|
return content
|
||||||
|
finally:
|
||||||
|
file_stream.close()
|
||||||
|
except ImportError:
|
||||||
|
return content
|
||||||
|
|
||||||
def _have_same_significant_styles(self, run1: Run, run2: Run) -> bool:
|
@staticmethod
|
||||||
"""检查两个 Run 是否具有相同的“显著”格式集合。"""
|
def _run_format_key(run: Run):
|
||||||
styles1 = self._get_significant_styles(run1)
|
"""生成 Run 的格式签名,用于合并相同格式的 Run。"""
|
||||||
styles2 = self._get_significant_styles(run2)
|
return (
|
||||||
return styles1 == styles2
|
run.bold,
|
||||||
|
run.italic,
|
||||||
|
run.underline,
|
||||||
|
run.font.size if run.font.size else None,
|
||||||
|
str(run.font.color.rgb) if run.font.color and run.font.color.rgb else None,
|
||||||
|
run.font.name or None,
|
||||||
|
)
|
||||||
|
|
||||||
# ---------------------- 修改结束 ----------------------
|
def _merge_adjacent_runs(self, runs: List[Run]) -> List[Run]:
|
||||||
|
"""
|
||||||
|
合并相邻的、格式完全相同的 Run,同时累积文本到第一个 Run。
|
||||||
|
解决 Word 因修订历史/变更追踪产生的微观 Run 碎片问题(单个字符一个 Run)。
|
||||||
|
"""
|
||||||
|
if len(runs) <= 1:
|
||||||
|
return runs
|
||||||
|
|
||||||
# ---------- 代码修改部分 1: 形状翻译逻辑的核心实现 ----------
|
merged = []
|
||||||
def _process_element_children(self, element, parent_paragraph: Paragraph, elements: List[Dict[str, Any]],
|
group_start = 0
|
||||||
texts: List[str],
|
for i in range(1, len(runs)):
|
||||||
state: Dict[str, Any],
|
if self._run_format_key(runs[i]) != self._run_format_key(runs[group_start]):
|
||||||
top_level_para: Paragraph):
|
# Format boundary: finalize the current group
|
||||||
|
if i - group_start > 1:
|
||||||
|
# Merge: accumulate all text into first run, delete the rest
|
||||||
|
runs[group_start].text = "".join(r.text for r in runs[group_start:i])
|
||||||
|
for r in runs[group_start + 1:i]:
|
||||||
|
self._remove_run_element(r)
|
||||||
|
merged.append(runs[group_start])
|
||||||
|
group_start = i
|
||||||
|
|
||||||
def flush_segment():
|
# Final group
|
||||||
current_runs = state['current_runs']
|
if len(runs) - group_start > 1:
|
||||||
if not current_runs:
|
runs[group_start].text = "".join(r.text for r in runs[group_start:])
|
||||||
return
|
for r in runs[group_start + 1:]:
|
||||||
full_text = "".join(r.text for r in current_runs)
|
self._remove_run_element(r)
|
||||||
if full_text.strip():
|
merged.append(runs[group_start])
|
||||||
# 在 elements 中增加对父段落和顶级段落的引用
|
|
||||||
elements.append({
|
|
||||||
"type": "text_runs",
|
|
||||||
"runs": list(current_runs),
|
|
||||||
"paragraph": parent_paragraph,
|
|
||||||
"top_level_paragraph": top_level_para
|
|
||||||
})
|
|
||||||
texts.append(full_text)
|
|
||||||
state['current_runs'].clear()
|
|
||||||
|
|
||||||
for child in element:
|
return merged
|
||||||
if child.tag in self.IGNORED_TAGS:
|
|
||||||
continue
|
|
||||||
|
|
||||||
if child.tag in self.RECURSIVE_CONTAINER_TAGS:
|
|
||||||
flush_segment()
|
|
||||||
self._process_element_children(child, parent_paragraph, elements, texts, state, top_level_para)
|
|
||||||
flush_segment() # 在递归容器后也刷新,确保其内容成为独立片段
|
|
||||||
continue
|
|
||||||
|
|
||||||
field_char_element = child.find(qn('w:fldChar')) if isinstance(child, CT_R) else None
|
|
||||||
if field_char_element is not None:
|
|
||||||
fld_type = field_char_element.get(qn('w:fldCharType'))
|
|
||||||
if fld_type == 'begin' or fld_type == 'end':
|
|
||||||
flush_segment()
|
|
||||||
continue
|
|
||||||
|
|
||||||
if isinstance(child, CT_R):
|
|
||||||
# 传入 parent_paragraph 以确保 Run 对象具有正确的上下文
|
|
||||||
run = Run(child, parent_paragraph)
|
|
||||||
|
|
||||||
# 新增逻辑:处理形状(drawing/pict)内的文本
|
|
||||||
# 形状可以包含文本框,需要优先于图片处理逻辑进行解析
|
|
||||||
if '<w:drawing' in run.element.xml or '<w:pict' in run.element.xml:
|
|
||||||
# 使用 list() 消耗迭代器,以便检查是否找到了文本框
|
|
||||||
text_boxes = list(run.element.iter(qn('w:txbxContent')))
|
|
||||||
if text_boxes:
|
|
||||||
flush_segment() # 包含文本的形状是一个边界,刷新前面的文本
|
|
||||||
for txbx_content in text_boxes:
|
|
||||||
# 遍历文本框内的所有段落
|
|
||||||
for p_element in txbx_content.findall(qn('w:p')):
|
|
||||||
# 创建新的段落对象,并传入父级上下文
|
|
||||||
shape_para = Paragraph(p_element, parent_paragraph)
|
|
||||||
# 递归处理该段落,并传递顶级段落上下文
|
|
||||||
self._process_paragraph(shape_para, elements, texts, top_level_para=top_level_para)
|
|
||||||
|
|
||||||
# 如果处理了形状内的文本,则该 Run 的任务已完成
|
|
||||||
continue
|
|
||||||
|
|
||||||
# 保留原有逻辑: 检查绝对边界(图片、制表符等)
|
|
||||||
if is_image_run(run) or is_formatting_only_run(run) or is_tab_run(run):
|
|
||||||
flush_segment()
|
|
||||||
continue # 这些 Run 本身不包含在任何文本片段中
|
|
||||||
|
|
||||||
# 保留原有逻辑: 基于格式变化进行切分
|
|
||||||
last_run_in_segment = state['current_runs'][-1] if state['current_runs'] else None
|
|
||||||
if last_run_in_segment and not self._have_same_significant_styles(last_run_in_segment, run):
|
|
||||||
flush_segment()
|
|
||||||
|
|
||||||
# 将当前 Run 添加到片段中
|
|
||||||
state['current_runs'].append(run)
|
|
||||||
else:
|
|
||||||
# 遇到任何非 Run 的块级元素(如在单元格中嵌套的表格),都应结束当前文本片段。
|
|
||||||
flush_segment()
|
|
||||||
|
|
||||||
def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str],
|
def _process_paragraph(self, para: Paragraph, elements: List[Dict[str, Any]], texts: List[str],
|
||||||
top_level_para: Paragraph = None):
|
top_level_para: Paragraph = None):
|
||||||
# 如果是首次进入段落处理(非递归调用),则当前段落是顶级段落
|
"""
|
||||||
|
段落级翻译处理:收集所有文本 Run → 合并相邻同格式 Run → 整段翻译 → 按比例分配。
|
||||||
|
"""
|
||||||
if top_level_para is None:
|
if top_level_para is None:
|
||||||
top_level_para = para
|
top_level_para = para
|
||||||
|
|
||||||
state = {
|
text_runs = []
|
||||||
'current_runs': [],
|
for run in para.runs:
|
||||||
}
|
if is_image_run(run) or is_instr_text_run(run):
|
||||||
# 修改调用:传入 `para` 对象、其顶级上下文
|
continue
|
||||||
self._process_element_children(para._p, para, elements, texts, state, top_level_para)
|
if not run.text.strip():
|
||||||
|
continue
|
||||||
|
text_runs.append(run)
|
||||||
|
|
||||||
# 确保在段落处理结束时,刷新所有剩余的 Run
|
if text_runs:
|
||||||
current_runs = state['current_runs']
|
# Merge adjacent runs with identical formatting to reduce fragmentation
|
||||||
if current_runs:
|
text_runs = self._merge_adjacent_runs(text_runs)
|
||||||
full_text = "".join(r.text for r in current_runs)
|
full_text = "".join(r.text for r in text_runs)
|
||||||
if full_text.strip():
|
if full_text.strip():
|
||||||
elements.append({
|
elements.append({
|
||||||
"type": "text_runs",
|
"type": "text_runs",
|
||||||
"runs": list(current_runs),
|
"runs": list(text_runs),
|
||||||
"paragraph": para,
|
"paragraph": para,
|
||||||
"top_level_paragraph": top_level_para
|
"top_level_paragraph": top_level_para
|
||||||
})
|
})
|
||||||
texts.append(full_text)
|
texts.append(full_text)
|
||||||
current_runs.clear()
|
|
||||||
|
|
||||||
# ---------------------- 修改结束 ----------------------
|
# ---------------------- 修改结束 ----------------------
|
||||||
|
|
||||||
@@ -301,7 +235,8 @@ class DocxTranslator(AiTranslator):
|
|||||||
self._process_body_elements(parent_element, container, elements, texts)
|
self._process_body_elements(parent_element, container, elements, texts)
|
||||||
|
|
||||||
def _pre_translate(self, document: Document) -> Tuple[DocumentObject, List[Dict[str, Any]], List[str]]:
|
def _pre_translate(self, document: Document) -> Tuple[DocumentObject, List[Dict[str, Any]], List[str]]:
|
||||||
doc = docx.Document(BytesIO(document.content))
|
content = self._decrypt_if_needed(document.content)
|
||||||
|
doc = docx.Document(BytesIO(content))
|
||||||
elements, texts = [], []
|
elements, texts = [], []
|
||||||
|
|
||||||
self._traverse_container(doc, elements, texts)
|
self._traverse_container(doc, elements, texts)
|
||||||
@@ -326,33 +261,58 @@ class DocxTranslator(AiTranslator):
|
|||||||
runs = element_info["runs"]
|
runs = element_info["runs"]
|
||||||
if not runs: return
|
if not runs: return
|
||||||
|
|
||||||
first_real_run_index = -1
|
# Filter to runs that are still attached to the document
|
||||||
# 找到第一个可以写入文本的run
|
valid_runs = []
|
||||||
for i, run in enumerate(runs):
|
for run in runs:
|
||||||
if run.element.getparent() is not None:
|
if run.element.getparent() is not None:
|
||||||
# 如果 run 是副本的一部分,其 _parent 可能仍然指向原始文档的段落
|
|
||||||
# 但我们需要确保它与 element_info["paragraph"] 同步
|
|
||||||
run._parent = element_info["paragraph"]
|
run._parent = element_info["paragraph"]
|
||||||
run.text = final_text
|
valid_runs.append(run)
|
||||||
first_real_run_index = i
|
|
||||||
break
|
|
||||||
|
|
||||||
# 如果没有找到有效的run(例如,它们都已被删除),则记录警告
|
if not valid_runs:
|
||||||
if first_real_run_index == -1:
|
|
||||||
self.logger.warning(f"无法应用翻译 '{final_text}',因为找不到有效的run。")
|
self.logger.warning(f"无法应用翻译 '{final_text}',因为找不到有效的run。")
|
||||||
return
|
return
|
||||||
|
|
||||||
# 删除所有后续的run,因为它们的文本已经被合并到第一个run中了
|
if len(valid_runs) == 1:
|
||||||
for i in range(first_real_run_index + 1, len(runs)):
|
# Single run: just write the translation
|
||||||
run = runs[i]
|
valid_runs[0].text = final_text
|
||||||
|
return
|
||||||
|
|
||||||
|
# Multiple runs: proportionally distribute translated text to preserve formatting
|
||||||
|
orig_lengths = [len(r.text) for r in valid_runs]
|
||||||
|
total_orig = sum(orig_lengths)
|
||||||
|
final_len = len(final_text)
|
||||||
|
|
||||||
|
if total_orig == 0:
|
||||||
|
valid_runs[0].text = final_text
|
||||||
|
for run in valid_runs[1:]:
|
||||||
|
self._remove_run_element(run)
|
||||||
|
return
|
||||||
|
|
||||||
|
# Distribute characters proportionally
|
||||||
|
char_pos = 0
|
||||||
|
for i, run in enumerate(valid_runs):
|
||||||
|
if i == len(valid_runs) - 1:
|
||||||
|
# Last run gets all remaining text
|
||||||
|
run.text = final_text[char_pos:]
|
||||||
|
else:
|
||||||
|
ratio = orig_lengths[i] / total_orig
|
||||||
|
run_char_count = max(1, round(final_len * ratio))
|
||||||
|
run_char_count = min(run_char_count, final_len - char_pos - (len(valid_runs) - i - 1))
|
||||||
|
if run_char_count <= 0:
|
||||||
|
# Remove runs that would get zero characters
|
||||||
|
self._remove_run_element(run)
|
||||||
|
continue
|
||||||
|
run.text = final_text[char_pos:char_pos + run_char_count]
|
||||||
|
char_pos += run_char_count
|
||||||
|
|
||||||
|
def _remove_run_element(self, run) -> None:
|
||||||
|
"""Safely remove a run element from its parent."""
|
||||||
parent_element = run.element.getparent()
|
parent_element = run.element.getparent()
|
||||||
if parent_element is not None:
|
if parent_element is not None:
|
||||||
try:
|
try:
|
||||||
parent_element.remove(run.element)
|
parent_element.remove(run.element)
|
||||||
except ValueError:
|
except ValueError:
|
||||||
# 在某些复杂情况下,一个run可能已经被其父元素隐式删除
|
|
||||||
self.logger.debug(f"尝试删除一个不存在的run元素。这通常是安全的。")
|
self.logger.debug(f"尝试删除一个不存在的run元素。这通常是安全的。")
|
||||||
pass
|
|
||||||
|
|
||||||
# ---------- FIX START: 新增用于清理副本段落的辅助方法 ----------
|
# ---------- FIX START: 新增用于清理副本段落的辅助方法 ----------
|
||||||
def _prune_unwanted_elements_from_copy(self, p_element: OxmlElement):
|
def _prune_unwanted_elements_from_copy(self, p_element: OxmlElement):
|
||||||
|
|||||||
@@ -31,6 +31,7 @@ class MDTranslator(AiTranslator):
|
|||||||
if not self.skip_translate:
|
if not self.skip_translate:
|
||||||
agent_config = MDTranslateAgentConfig(custom_prompt=config.custom_prompt,
|
agent_config = MDTranslateAgentConfig(custom_prompt=config.custom_prompt,
|
||||||
to_lang=config.to_lang,
|
to_lang=config.to_lang,
|
||||||
|
source_lang=config.source_lang,
|
||||||
base_url=config.base_url,
|
base_url=config.base_url,
|
||||||
api_key=config.api_key,
|
api_key=config.api_key,
|
||||||
model_id=config.model_id,
|
model_id=config.model_id,
|
||||||
|
|||||||
@@ -1,12 +1,12 @@
|
|||||||
# SPDX-FileCopyrightText: 2025 QinHan
|
# SPDX-FileCopyrightText: 2025 QinHan
|
||||||
# SPDX-License-Identifier: MPL-2.0
|
# SPDX-License-Identifier: MPL-2.0
|
||||||
import asyncio
|
import asyncio
|
||||||
|
import regex # [使用您依赖列表中的 regex 库]
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Self, Literal, List, Dict, Any, Tuple
|
from typing import Self, Literal, List, Dict, Any, Tuple
|
||||||
|
|
||||||
from pptx import Presentation
|
from pptx import Presentation
|
||||||
from pptx.enum.dml import MSO_COLOR_TYPE
|
|
||||||
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
from pptx.enum.shapes import MSO_SHAPE_TYPE
|
||||||
from pptx.enum.text import MSO_AUTO_SIZE
|
from pptx.enum.text import MSO_AUTO_SIZE
|
||||||
from pptx.oxml.ns import qn
|
from pptx.oxml.ns import qn
|
||||||
@@ -17,6 +17,59 @@ from docutranslate.ir.document import Document
|
|||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
|
|
||||||
|
|
||||||
|
# ---------------- 辅助工具类:语言与字体智能适配 ----------------
|
||||||
|
class LanguageHelper:
|
||||||
|
"""
|
||||||
|
专门处理 PPTX 的语言标签与字体渲染适配。
|
||||||
|
利用 regex 库的 Unicode 属性检测脚本类型。
|
||||||
|
"""
|
||||||
|
|
||||||
|
# 常用语言映射 (覆盖常见写法)
|
||||||
|
_COMMON_MAP = {
|
||||||
|
"chinese": "zh-CN", "simplified chinese": "zh-CN", "zh": "zh-CN",
|
||||||
|
"english": "en-US", "en": "en-US",
|
||||||
|
"japanese": "ja-JP", "ja": "ja-JP",
|
||||||
|
"korean": "ko-KR", "ko": "ko-KR",
|
||||||
|
"french": "fr-FR", "fr": "fr-FR",
|
||||||
|
"german": "de-DE", "de": "de-DE",
|
||||||
|
"spanish": "es-ES", "es": "es-ES",
|
||||||
|
"russian": "ru-RU", "ru": "ru-RU",
|
||||||
|
# ... 其他语言
|
||||||
|
}
|
||||||
|
|
||||||
|
# [关键改进] 使用 regex 库的 Unicode 属性进行精确匹配
|
||||||
|
# \p{Han}: 汉字
|
||||||
|
# \p{Hiragana} / \p{Katakana}: 日文假名
|
||||||
|
# \p{Hangul}: 韩文
|
||||||
|
# 如果包含这些字符,说明需要启用东亚字体渲染
|
||||||
|
_CJK_PATTERN = regex.compile(r'[\p{Han}\p{Hiragana}\p{Katakana}\p{Hangul}]')
|
||||||
|
|
||||||
|
@classmethod
|
||||||
|
def guess_lang_tag(cls, config_lang: str, text_content: str) -> str:
|
||||||
|
"""
|
||||||
|
根据用户配置和实际文本内容,推断最合适的 PPT XML lang 属性。
|
||||||
|
"""
|
||||||
|
# 1. 优先尝试解析用户配置
|
||||||
|
if config_lang:
|
||||||
|
clean_lang = config_lang.lower().strip()
|
||||||
|
if clean_lang in cls._COMMON_MAP:
|
||||||
|
return cls._COMMON_MAP[clean_lang]
|
||||||
|
# 如果看起来像 ISO 代码 (如 'fr-FR'), 直接信赖
|
||||||
|
if regex.match(r'^[a-z]{2,3}(-[a-z0-9]+)?$', clean_lang):
|
||||||
|
return config_lang
|
||||||
|
|
||||||
|
# 2. [兜底策略] 基于内容的脚本检测
|
||||||
|
# 使用 regex 检查是否包含中日韩字符
|
||||||
|
if cls._CJK_PATTERN.search(text_content):
|
||||||
|
# 包含 CJK 字符 -> 声明为中文,激活东亚字体槽 (a:ea)
|
||||||
|
# 即使是日文/韩文,设为 zh-CN 在字体回退机制上通常也能正确激活 CJK 渲染逻辑
|
||||||
|
return "zh-CN"
|
||||||
|
else:
|
||||||
|
# 不含 CJK -> 默认为英文,激活西文字体槽 (a:latin)
|
||||||
|
# 这涵盖了英文、法文、德文、俄文、越南语等绝大多数非 CJK 语言
|
||||||
|
return "en-US"
|
||||||
|
|
||||||
|
|
||||||
# ---------------- 配置类 ----------------
|
# ---------------- 配置类 ----------------
|
||||||
@dataclass
|
@dataclass
|
||||||
class PPTXTranslatorConfig(AiTranslatorConfig):
|
class PPTXTranslatorConfig(AiTranslatorConfig):
|
||||||
@@ -27,13 +80,8 @@ class PPTXTranslatorConfig(AiTranslatorConfig):
|
|||||||
# ---------------- 主类 ----------------
|
# ---------------- 主类 ----------------
|
||||||
class PPTXTranslator(AiTranslator):
|
class PPTXTranslator(AiTranslator):
|
||||||
"""
|
"""
|
||||||
基于 python-pptx 的 .pptx 文件翻译器 (增强版)。
|
基于 python-pptx 的 .pptx 文件翻译器 (最终增强版)。
|
||||||
|
使用 regex 库进行高性能的脚本检测。
|
||||||
改进特性:
|
|
||||||
1. 深度遍历:支持母版、版式、备注页、以及隐藏在 AlternateContent (兼容性块) 中的文本。
|
|
||||||
2. 公式保护:智能检测文本间的公式,防止翻译后文字错位。
|
|
||||||
3. 样式保留:翻译后完全保留原有的中英文字体设置,不做强制覆盖。
|
|
||||||
4. 布局自适应:防止翻译后文本溢出。
|
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, config: PPTXTranslatorConfig):
|
def __init__(self, config: PPTXTranslatorConfig):
|
||||||
@@ -56,80 +104,92 @@ class PPTXTranslator(AiTranslator):
|
|||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
|
|
||||||
# ---------------- 辅助函数:样式与字体 ----------------
|
# ---------------- 辅助函数:视觉样式 ----------------
|
||||||
|
|
||||||
def _get_font_signature(self, run) -> Tuple:
|
def _get_visual_style_signature(self, run) -> Tuple:
|
||||||
"""获取 Run 的字体样式签名,用于合并判断。"""
|
"""获取 Run 的视觉样式签名"""
|
||||||
font = run.font
|
r_element = run._r
|
||||||
color_key = None
|
rPr = r_element.rPr
|
||||||
|
|
||||||
# 稳健的颜色获取逻辑
|
if rPr is None:
|
||||||
if hasattr(font, 'color') and font.color:
|
return ("DEFAULT",)
|
||||||
try:
|
|
||||||
if font.color.type == MSO_COLOR_TYPE.RGB:
|
|
||||||
color_key = str(font.color.rgb)
|
|
||||||
elif font.color.type == MSO_COLOR_TYPE.THEME:
|
|
||||||
color_key = f"THEME_{font.color.theme_color}_{font.color.brightness}"
|
|
||||||
except AttributeError:
|
|
||||||
pass
|
|
||||||
|
|
||||||
return (
|
def get_bool_attr(tag_name):
|
||||||
font.name,
|
node = rPr.find(qn(f'a:{tag_name}'))
|
||||||
font.size,
|
if node is None: return None
|
||||||
font.bold,
|
val = node.get('val')
|
||||||
font.italic,
|
return val if val is not None else '1'
|
||||||
font.underline,
|
|
||||||
color_key
|
bold = get_bool_attr('b')
|
||||||
)
|
italic = get_bool_attr('i')
|
||||||
|
u_node = rPr.find(qn('a:u'))
|
||||||
|
underline = u_node.get('val') if u_node is not None else None
|
||||||
|
strike_node = rPr.find(qn('a:strike'))
|
||||||
|
strike = strike_node.get('val') if strike_node is not None else None
|
||||||
|
sz = rPr.get('sz')
|
||||||
|
latin = rPr.find(qn('a:latin'))
|
||||||
|
latin_face = latin.get('typeface') if latin is not None else None
|
||||||
|
ea = rPr.find(qn('a:ea'))
|
||||||
|
ea_face = ea.get('typeface') if ea is not None else None
|
||||||
|
|
||||||
|
color_sig = "INHERITED"
|
||||||
|
for tag in ['solidFill', 'gradFill', 'noFill', 'blipFill', 'pattFill']:
|
||||||
|
fill_node = rPr.find(qn(f'a:{tag}'))
|
||||||
|
if fill_node is not None:
|
||||||
|
parts = [tag]
|
||||||
|
for child in fill_node:
|
||||||
|
val = child.get('val') or ""
|
||||||
|
parts.append(f"{child.tag.split('}')[-1]}:{val}")
|
||||||
|
color_sig = "-".join(parts)
|
||||||
|
break
|
||||||
|
|
||||||
|
baseline = rPr.get('baseline')
|
||||||
|
effect_sig = []
|
||||||
|
for tag in ['highlight', 'effectLst', 'sp3d']:
|
||||||
|
if rPr.find(qn(f'a:{tag}')) is not None:
|
||||||
|
effect_sig.append(tag)
|
||||||
|
|
||||||
|
return (bold, italic, underline, strike, sz, latin_face, ea_face, baseline, color_sig,
|
||||||
|
tuple(sorted(effect_sig)))
|
||||||
|
|
||||||
def _have_same_significant_styles(self, run1, run2) -> bool:
|
def _have_same_significant_styles(self, run1, run2) -> bool:
|
||||||
"""检查两个 Run 是否样式相同且在 XML 结构上紧邻(中间无公式)。"""
|
"""检查两个 Run 是否样式一致且紧邻"""
|
||||||
if run1 is None or run2 is None:
|
if run1 is None or run2 is None: return False
|
||||||
return False
|
if self._get_visual_style_signature(run1) != self._get_visual_style_signature(run2): return False
|
||||||
|
|
||||||
# 1. 检查视觉样式是否一致
|
|
||||||
if self._get_font_signature(run1) != self._get_font_signature(run2):
|
|
||||||
return False
|
|
||||||
|
|
||||||
# 2. 检查 XML 邻接性
|
|
||||||
# 如果 run1 和 run2 之间夹杂了 <m:oMath> (公式) 或其他标签,
|
|
||||||
# 它们的 XML 索引将不连续。此时必须切分,否则回填时文字会跑到公式前面。
|
|
||||||
try:
|
try:
|
||||||
r1_element = run1._r
|
r1_element = run1._r
|
||||||
r2_element = run2._r
|
r2_element = run2._r
|
||||||
parent = r1_element.getparent()
|
parent = r1_element.getparent()
|
||||||
|
if parent != r2_element.getparent(): return False
|
||||||
# 只有当它们属于同一个父节点,且索引差为1时,才视为紧邻
|
if parent.index(r2_element) != parent.index(r1_element) + 1: return False
|
||||||
if parent == r2_element.getparent():
|
|
||||||
index1 = parent.index(r1_element)
|
|
||||||
index2 = parent.index(r2_element)
|
|
||||||
if index2 != index1 + 1:
|
|
||||||
return False # 中间有东西(如公式),禁止合并
|
|
||||||
except Exception:
|
except Exception:
|
||||||
# 如果底层操作失败,保守起见不合并
|
|
||||||
return False
|
return False
|
||||||
|
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
def _apply_lang_correction(self, run, text_content: str):
|
||||||
|
"""[智能修正] 根据配置和文本内容,设置正确的 lang 属性"""
|
||||||
|
if not text_content: return
|
||||||
|
best_lang = LanguageHelper.guess_lang_tag(self.config.to_lang, text_content)
|
||||||
|
if best_lang:
|
||||||
|
rPr = run._r.get_or_add_rPr()
|
||||||
|
rPr.set('lang', best_lang)
|
||||||
|
rPr.set('altLang', best_lang)
|
||||||
|
|
||||||
# ---------------- 核心遍历逻辑 ----------------
|
# ---------------- 核心遍历逻辑 ----------------
|
||||||
|
|
||||||
def _process_text_frame(self, text_frame: TextFrame, elements: List[Dict[str, Any]], texts: List[str]):
|
def _process_text_frame(self, text_frame: TextFrame, elements: List[Dict[str, Any]], texts: List[str]):
|
||||||
"""处理 TextFrame 中的所有段落"""
|
|
||||||
for paragraph in text_frame.paragraphs:
|
for paragraph in text_frame.paragraphs:
|
||||||
self._process_paragraph(paragraph, elements, texts)
|
self._process_paragraph(paragraph, elements, texts)
|
||||||
|
|
||||||
def _process_paragraph(self, paragraph: _Paragraph, elements: List[Dict[str, Any]], texts: List[str]):
|
def _process_paragraph(self, paragraph: _Paragraph, elements: List[Dict[str, Any]], texts: List[str]):
|
||||||
"""处理单个段落,智能切分文本"""
|
if not paragraph.runs: return
|
||||||
if not paragraph.runs:
|
|
||||||
return
|
|
||||||
|
|
||||||
current_runs = []
|
state = {'current_runs': []}
|
||||||
|
|
||||||
def flush_segment():
|
def flush_segment():
|
||||||
if not current_runs:
|
current_runs = state['current_runs']
|
||||||
return
|
if not current_runs: return
|
||||||
full_text = "".join(r.text for r in current_runs)
|
full_text = "".join(r.text for r in current_runs)
|
||||||
# 只有非空文本才翻译
|
|
||||||
if full_text.strip():
|
if full_text.strip():
|
||||||
elements.append({
|
elements.append({
|
||||||
"type": "text_runs",
|
"type": "text_runs",
|
||||||
@@ -141,29 +201,20 @@ class PPTXTranslator(AiTranslator):
|
|||||||
current_runs.clear()
|
current_runs.clear()
|
||||||
|
|
||||||
for run in paragraph.runs:
|
for run in paragraph.runs:
|
||||||
# 这里的 run.text 只有纯文本,不包含公式内容
|
if not run.text: continue
|
||||||
if not run.text:
|
last_run = state['current_runs'][-1] if state['current_runs'] else None
|
||||||
continue
|
|
||||||
|
|
||||||
last_run = current_runs[-1] if current_runs else None
|
|
||||||
|
|
||||||
# 样式不同 或 物理位置不连续(中间有公式)则切分
|
|
||||||
if last_run and not self._have_same_significant_styles(last_run, run):
|
if last_run and not self._have_same_significant_styles(last_run, run):
|
||||||
flush_segment()
|
flush_segment()
|
||||||
|
state['current_runs'].append(run)
|
||||||
current_runs.append(run)
|
|
||||||
|
|
||||||
flush_segment()
|
flush_segment()
|
||||||
|
|
||||||
def _process_shape(self, shape, elements: List[Dict[str, Any]], texts: List[str]):
|
def _process_shape(self, shape, elements: List[Dict[str, Any]], texts: List[str]):
|
||||||
"""递归处理常规形状"""
|
|
||||||
# 1. 组合图形
|
|
||||||
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
|
if shape.shape_type == MSO_SHAPE_TYPE.GROUP:
|
||||||
for child_shape in shape.shapes:
|
for child_shape in shape.shapes:
|
||||||
self._process_shape(child_shape, elements, texts)
|
self._process_shape(child_shape, elements, texts)
|
||||||
return
|
return
|
||||||
|
|
||||||
# 2. 表格
|
|
||||||
if shape.has_table:
|
if shape.has_table:
|
||||||
for row in shape.table.rows:
|
for row in shape.table.rows:
|
||||||
for cell in row.cells:
|
for cell in row.cells:
|
||||||
@@ -171,7 +222,6 @@ class PPTXTranslator(AiTranslator):
|
|||||||
self._process_text_frame(cell.text_frame, elements, texts)
|
self._process_text_frame(cell.text_frame, elements, texts)
|
||||||
return
|
return
|
||||||
|
|
||||||
# 3. 常规文本框
|
|
||||||
if shape.has_text_frame:
|
if shape.has_text_frame:
|
||||||
try:
|
try:
|
||||||
self._process_text_frame(shape.text_frame, elements, texts)
|
self._process_text_frame(shape.text_frame, elements, texts)
|
||||||
@@ -179,120 +229,79 @@ class PPTXTranslator(AiTranslator):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def _scan_deep_xml_for_text(self, slide_element, elements: List[Dict[str, Any]], texts: List[str]):
|
def _scan_deep_xml_for_text(self, slide_element, elements: List[Dict[str, Any]], texts: List[str]):
|
||||||
"""
|
|
||||||
[深度扫描] 直接遍历 XML 树,寻找标准 API 无法触及的文本。
|
|
||||||
修复了 KeyError: 'mc' 问题。
|
|
||||||
"""
|
|
||||||
# 定义 XML 命名空间 URI
|
|
||||||
MC_NS = "http://schemas.openxmlformats.org/markup-compatibility/2006"
|
MC_NS = "http://schemas.openxmlformats.org/markup-compatibility/2006"
|
||||||
# 手动构建带命名空间的标签名,不依赖 qn()
|
|
||||||
MC_ALT = f"{{{MC_NS}}}AlternateContent"
|
MC_ALT = f"{{{MC_NS}}}AlternateContent"
|
||||||
MC_CHOICE = f"{{{MC_NS}}}Choice"
|
MC_CHOICE = f"{{{MC_NS}}}Choice"
|
||||||
|
|
||||||
# 对于 'p' (PresentationML) 命名空间,python-pptx 支持 qn,可以继续使用
|
|
||||||
P_SP = qn('p:sp')
|
P_SP = qn('p:sp')
|
||||||
P_TXBODY = qn('p:txBody')
|
P_TXBODY = qn('p:txBody')
|
||||||
|
|
||||||
# 查找所有 AlternateContent 块
|
|
||||||
for alt_content in slide_element.iter(MC_ALT):
|
for alt_content in slide_element.iter(MC_ALT):
|
||||||
# 找到 Choice 分支
|
|
||||||
choice = alt_content.find(MC_CHOICE)
|
choice = alt_content.find(MC_CHOICE)
|
||||||
if choice is None:
|
if choice is None: continue
|
||||||
continue
|
|
||||||
|
|
||||||
# 在 Choice 内部寻找形状 (p:sp)
|
|
||||||
for sp in choice.iter(P_SP):
|
for sp in choice.iter(P_SP):
|
||||||
# 寻找 p:txBody (文本主体)
|
|
||||||
txBody = sp.find(P_TXBODY)
|
txBody = sp.find(P_TXBODY)
|
||||||
if txBody is not None:
|
if txBody is not None:
|
||||||
try:
|
try:
|
||||||
# 手动构建 TextFrame 对象
|
|
||||||
# 这里的 parent 设为 None 在读取/写入 text 属性时通常是安全的
|
|
||||||
tf = TextFrame(txBody, None)
|
tf = TextFrame(txBody, None)
|
||||||
self._process_text_frame(tf, elements, texts)
|
self._process_text_frame(tf, elements, texts)
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"处理深度 XML 文本框时出错: {e}")
|
self.logger.warning(f"Deep XML Scan Error: {e}")
|
||||||
|
|
||||||
def _scan_presentation_content(self, prs: Presentation, elements: List[Dict[str, Any]], texts: List[str]):
|
def _scan_presentation_content(self, prs: Presentation, elements: List[Dict[str, Any]], texts: List[str]):
|
||||||
"""全量扫描 PPT 内容"""
|
|
||||||
|
|
||||||
# 辅助内部函数:扫描单个“幻灯片类”对象
|
|
||||||
def scan_slide_object(slide_obj):
|
def scan_slide_object(slide_obj):
|
||||||
# 1. 常规 API 遍历 (处理普通文本、表格、组合)
|
|
||||||
for shape in slide_obj.shapes:
|
for shape in slide_obj.shapes:
|
||||||
self._process_shape(shape, elements, texts)
|
self._process_shape(shape, elements, texts)
|
||||||
|
|
||||||
# 2. 深度 XML 遍历 (处理 AlternateContent/公式文本)
|
|
||||||
self._scan_deep_xml_for_text(slide_obj.element, elements, texts)
|
self._scan_deep_xml_for_text(slide_obj.element, elements, texts)
|
||||||
|
|
||||||
# 1. 遍历普通幻灯片 (Slides)
|
|
||||||
for slide in prs.slides:
|
for slide in prs.slides:
|
||||||
scan_slide_object(slide)
|
scan_slide_object(slide)
|
||||||
# 备注页
|
if slide.has_notes_slide and slide.notes_slide.notes_text_frame:
|
||||||
if slide.has_notes_slide:
|
self._process_text_frame(slide.notes_slide.notes_text_frame, elements, texts)
|
||||||
notes = slide.notes_slide
|
|
||||||
if notes.notes_text_frame:
|
|
||||||
self._process_text_frame(notes.notes_text_frame, elements, texts)
|
|
||||||
|
|
||||||
# 2. 遍历母版 (Slide Masters)
|
|
||||||
for master in prs.slide_masters:
|
for master in prs.slide_masters:
|
||||||
scan_slide_object(master)
|
scan_slide_object(master)
|
||||||
|
|
||||||
# 3. 遍历版式 (Layouts)
|
|
||||||
for layout in master.slide_layouts:
|
for layout in master.slide_layouts:
|
||||||
scan_slide_object(layout)
|
scan_slide_object(layout)
|
||||||
|
|
||||||
# ---------------- 翻译前后处理 ----------------
|
# ---------------- 翻译逻辑 ----------------
|
||||||
|
|
||||||
def _pre_translate(self, document: Document) -> Tuple[Presentation, List[Dict[str, Any]], List[str]]:
|
def _pre_translate(self, document: Document) -> Tuple[Presentation, List[Dict[str, Any]], List[str]]:
|
||||||
"""解析 PPT 文件"""
|
|
||||||
prs = Presentation(BytesIO(document.content))
|
prs = Presentation(BytesIO(document.content))
|
||||||
elements, texts = [], []
|
elements, texts = [], []
|
||||||
|
|
||||||
self._scan_presentation_content(prs, elements, texts)
|
self._scan_presentation_content(prs, elements, texts)
|
||||||
self.logger.info(f"共提取了 {len(texts)} 个文本片段 (包含隐藏的公式文本)。")
|
self.logger.info(f"Extracted {len(texts)} text segments.")
|
||||||
return prs, elements, texts
|
return prs, elements, texts
|
||||||
|
|
||||||
def _apply_translation(self, element_info: Dict[str, Any], final_text: str):
|
def _apply_translation(self, element_info: Dict[str, Any], final_text: str):
|
||||||
"""回填翻译,精细控制样式"""
|
|
||||||
runs = element_info["runs"]
|
runs = element_info["runs"]
|
||||||
if not runs:
|
if not runs: return
|
||||||
return
|
|
||||||
|
|
||||||
original_text = "".join(r.text for r in runs)
|
original_text = "".join(r.text for r in runs)
|
||||||
|
|
||||||
text_to_set = final_text
|
text_to_set = final_text
|
||||||
if self.insert_mode == "append":
|
if self.insert_mode == "append":
|
||||||
text_to_set = original_text + self.separator + final_text
|
text_to_set = original_text + self.separator + final_text
|
||||||
elif self.insert_mode == "prepend":
|
elif self.insert_mode == "prepend":
|
||||||
text_to_set = final_text + self.separator + original_text
|
text_to_set = final_text + self.separator + original_text
|
||||||
|
|
||||||
# --- 回填策略 ---
|
|
||||||
primary_run = runs[0]
|
primary_run = runs[0]
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# 1. 写入文本 (python-pptx 会自动保留原有的 rPr 属性,即保留默认字体)
|
|
||||||
primary_run.text = text_to_set
|
primary_run.text = text_to_set
|
||||||
|
# 调用利用 regex 的智能修正
|
||||||
|
self._apply_lang_correction(primary_run, text_to_set)
|
||||||
|
|
||||||
# 2. (已移除字体强制设置逻辑,以保留 PPT 原样)
|
|
||||||
|
|
||||||
# 3. 处理溢出
|
|
||||||
text_frame = element_info.get("text_frame")
|
text_frame = element_info.get("text_frame")
|
||||||
if text_frame and hasattr(text_frame, 'auto_size'):
|
if text_frame and hasattr(text_frame, 'auto_size'):
|
||||||
if text_frame.auto_size == MSO_AUTO_SIZE.NONE:
|
if text_frame.auto_size == MSO_AUTO_SIZE.NONE:
|
||||||
text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
|
text_frame.auto_size = MSO_AUTO_SIZE.TEXT_TO_FIT_SHAPE
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
self.logger.warning(f"应用翻译到 Run 时出错: {e}")
|
self.logger.warning(f"Error applying translation: {e}")
|
||||||
return
|
return
|
||||||
|
|
||||||
# 清空后续 run (模拟合并效果)
|
|
||||||
for i in range(1, len(runs)):
|
for i in range(1, len(runs)):
|
||||||
runs[i].text = ""
|
runs[i].text = ""
|
||||||
|
|
||||||
def _after_translate(self, prs: Presentation, elements: List[Dict[str, Any]], translated: List[str],
|
def _after_translate(self, prs: Presentation, elements: List[Dict[str, Any]], translated: List[str],
|
||||||
originals: List[str]) -> bytes:
|
originals: List[str]) -> bytes:
|
||||||
"""保存结果"""
|
|
||||||
if len(elements) != len(translated):
|
if len(elements) != len(translated):
|
||||||
min_len = min(len(elements), len(translated))
|
min_len = min(len(elements), len(translated))
|
||||||
elements = elements[:min_len]
|
elements = elements[:min_len]
|
||||||
@@ -305,26 +314,20 @@ class PPTXTranslator(AiTranslator):
|
|||||||
prs.save(output_stream)
|
prs.save(output_stream)
|
||||||
return output_stream.getvalue()
|
return output_stream.getvalue()
|
||||||
|
|
||||||
# ---------------- 接口实现 ----------------
|
# ---------------- 接口 ----------------
|
||||||
|
|
||||||
def translate(self, document: Document) -> Self:
|
def translate(self, document: Document) -> Self:
|
||||||
prs, elements, originals = self._pre_translate(document)
|
prs, elements, originals = self._pre_translate(document)
|
||||||
if not originals:
|
if not originals:
|
||||||
self.logger.info("未找到可翻译文本。")
|
self.logger.info("No text found.")
|
||||||
document.content = self._after_translate(prs, elements, [], [])
|
document.content = self._after_translate(prs, elements, [], [])
|
||||||
return self
|
return self
|
||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
# 1. 获取增量
|
|
||||||
glossary_dict_gen = self.glossary_agent.send_segments(originals, self.chunk_size)
|
glossary_dict_gen = self.glossary_agent.send_segments(originals, self.chunk_size)
|
||||||
|
if self.glossary: self.glossary.update(glossary_dict_gen)
|
||||||
# 2. 在 Translator 层统一合并 (SSOT)
|
if self.translate_agent and self.glossary: self.translate_agent.update_glossary_dict(
|
||||||
if self.glossary:
|
self.glossary.glossary_dict)
|
||||||
self.glossary.update(glossary_dict_gen)
|
|
||||||
|
|
||||||
# 3. 将合并后的【完整字典】传给 Agent
|
|
||||||
if self.translate_agent and self.glossary:
|
|
||||||
self.translate_agent.update_glossary_dict(self.glossary.glossary_dict)
|
|
||||||
|
|
||||||
translated = self.translate_agent.send_segments(originals,
|
translated = self.translate_agent.send_segments(originals,
|
||||||
self.chunk_size) if self.translate_agent else originals
|
self.chunk_size) if self.translate_agent else originals
|
||||||
@@ -334,21 +337,15 @@ class PPTXTranslator(AiTranslator):
|
|||||||
async def translate_async(self, document: Document) -> Self:
|
async def translate_async(self, document: Document) -> Self:
|
||||||
prs, elements, originals = await asyncio.to_thread(self._pre_translate, document)
|
prs, elements, originals = await asyncio.to_thread(self._pre_translate, document)
|
||||||
if not originals:
|
if not originals:
|
||||||
self.logger.info("未找到可翻译文本。")
|
self.logger.info("No text found.")
|
||||||
document.content = await asyncio.to_thread(self._after_translate, prs, elements, [], [])
|
document.content = await asyncio.to_thread(self._after_translate, prs, elements, [], [])
|
||||||
return self
|
return self
|
||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
# 1. 获取增量
|
|
||||||
glossary_dict_gen = await self.glossary_agent.send_segments_async(originals, self.chunk_size)
|
glossary_dict_gen = await self.glossary_agent.send_segments_async(originals, self.chunk_size)
|
||||||
|
if self.glossary: self.glossary.update(glossary_dict_gen)
|
||||||
# 2. 在 Translator 层统一合并 (SSOT)
|
if self.translate_agent and self.glossary: self.translate_agent.update_glossary_dict(
|
||||||
if self.glossary:
|
self.glossary.glossary_dict)
|
||||||
self.glossary.update(glossary_dict_gen)
|
|
||||||
|
|
||||||
# 3. 将合并后的【完整字典】传给 Agent
|
|
||||||
if self.translate_agent and self.glossary:
|
|
||||||
self.translate_agent.update_glossary_dict(self.glossary.glossary_dict)
|
|
||||||
|
|
||||||
translated = await self.translate_agent.send_segments_async(originals,
|
translated = await self.translate_agent.send_segments_async(originals,
|
||||||
self.chunk_size) if self.translate_agent else originals
|
self.chunk_size) if self.translate_agent else originals
|
||||||
|
|||||||
@@ -20,6 +20,9 @@ dependencies = [
|
|||||||
"httpx>=0.28.1",
|
"httpx>=0.28.1",
|
||||||
"python-pptx>=1.0.2",
|
"python-pptx>=1.0.2",
|
||||||
"pypdf>=6.4.2",
|
"pypdf>=6.4.2",
|
||||||
|
"regex>=2025.11.3",
|
||||||
|
"charset-normalizer>=3.4.4",
|
||||||
|
"python-dotenv>=1.0.0",
|
||||||
]
|
]
|
||||||
dynamic = ["version"]
|
dynamic = ["version"]
|
||||||
|
|
||||||
|
|||||||
35
run.bat
Normal file
35
run.bat
Normal file
@@ -0,0 +1,35 @@
|
|||||||
|
@echo off
|
||||||
|
setlocal
|
||||||
|
|
||||||
|
REM 切换到脚本所在目录
|
||||||
|
cd /d "%~dp0"
|
||||||
|
|
||||||
|
REM 检查 .venv 是否存在
|
||||||
|
if not exist ".venv\Scripts\activate.bat" (
|
||||||
|
echo [ERROR] 未找到 .venv\Scripts\activate.bat
|
||||||
|
pause
|
||||||
|
exit /b 1
|
||||||
|
)
|
||||||
|
|
||||||
|
REM 激活虚拟环境
|
||||||
|
call ".venv\Scripts\activate.bat"
|
||||||
|
|
||||||
|
REM 如果传了参数,就用参数;否则默认用当前目录
|
||||||
|
if "%~1"=="" (
|
||||||
|
python docutranslate/cli.py -i --host 0.0.0.0
|
||||||
|
) else (
|
||||||
|
python docutranslate/cli.py -i "%~1"
|
||||||
|
)
|
||||||
|
|
||||||
|
set EXIT_CODE=%ERRORLEVEL%
|
||||||
|
|
||||||
|
REM 退出虚拟环境
|
||||||
|
call deactivate >nul 2>nul
|
||||||
|
|
||||||
|
if not "%EXIT_CODE%"=="0" (
|
||||||
|
echo.
|
||||||
|
echo [ERROR] 程序退出,返回码: %EXIT_CODE%
|
||||||
|
pause
|
||||||
|
)
|
||||||
|
|
||||||
|
exit /b %EXIT_CODE%
|
||||||
14
uv.lock
generated
14
uv.lock
generated
@@ -370,6 +370,7 @@ name = "docutranslate"
|
|||||||
source = { editable = "." }
|
source = { editable = "." }
|
||||||
dependencies = [
|
dependencies = [
|
||||||
{ name = "beautifulsoup4" },
|
{ name = "beautifulsoup4" },
|
||||||
|
{ name = "charset-normalizer" },
|
||||||
{ name = "fastapi", extra = ["standard"] },
|
{ name = "fastapi", extra = ["standard"] },
|
||||||
{ name = "httpx" },
|
{ name = "httpx" },
|
||||||
{ name = "json-repair" },
|
{ name = "json-repair" },
|
||||||
@@ -382,7 +383,9 @@ dependencies = [
|
|||||||
{ name = "pypdf" },
|
{ name = "pypdf" },
|
||||||
{ name = "pysubs2" },
|
{ name = "pysubs2" },
|
||||||
{ name = "python-docx" },
|
{ name = "python-docx" },
|
||||||
|
{ name = "python-dotenv" },
|
||||||
{ name = "python-pptx" },
|
{ name = "python-pptx" },
|
||||||
|
{ name = "regex" },
|
||||||
{ name = "srt" },
|
{ name = "srt" },
|
||||||
{ name = "xlsx2html" },
|
{ name = "xlsx2html" },
|
||||||
]
|
]
|
||||||
@@ -404,6 +407,7 @@ dev = [
|
|||||||
[package.metadata]
|
[package.metadata]
|
||||||
requires-dist = [
|
requires-dist = [
|
||||||
{ name = "beautifulsoup4", specifier = ">=4.13.4" },
|
{ name = "beautifulsoup4", specifier = ">=4.13.4" },
|
||||||
|
{ name = "charset-normalizer", specifier = ">=3.4.4" },
|
||||||
{ name = "docling", marker = "extra == 'docling'", specifier = ">=2.40.0" },
|
{ name = "docling", marker = "extra == 'docling'", specifier = ">=2.40.0" },
|
||||||
{ name = "fastapi", extras = ["standard"], specifier = ">=0.115.12" },
|
{ name = "fastapi", extras = ["standard"], specifier = ">=0.115.12" },
|
||||||
{ name = "hf-xet", marker = "extra == 'docling'", specifier = ">=1.1.10" },
|
{ name = "hf-xet", marker = "extra == 'docling'", specifier = ">=1.1.10" },
|
||||||
@@ -419,7 +423,9 @@ requires-dist = [
|
|||||||
{ name = "pypdf", specifier = ">=6.4.2" },
|
{ name = "pypdf", specifier = ">=6.4.2" },
|
||||||
{ name = "pysubs2", specifier = ">=1.8.0" },
|
{ name = "pysubs2", specifier = ">=1.8.0" },
|
||||||
{ name = "python-docx", specifier = ">=1.2.0" },
|
{ name = "python-docx", specifier = ">=1.2.0" },
|
||||||
|
{ name = "python-dotenv", specifier = ">=1.0.0" },
|
||||||
{ name = "python-pptx", specifier = ">=1.0.2" },
|
{ name = "python-pptx", specifier = ">=1.0.2" },
|
||||||
|
{ name = "regex", specifier = ">=2025.11.3" },
|
||||||
{ name = "srt", specifier = ">=3.5.3" },
|
{ name = "srt", specifier = ">=3.5.3" },
|
||||||
{ name = "xlsx2html", specifier = ">=0.6.2" },
|
{ name = "xlsx2html", specifier = ">=0.6.2" },
|
||||||
]
|
]
|
||||||
@@ -1734,14 +1740,6 @@ wheels = [
|
|||||||
{ url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906 },
|
{ url = "https://files.pythonhosted.org/packages/5c/96/5fb7d8c3c17bc8c62fdb031c47d77a1af698f1d7a406b0f79aaa1338f9ad/pydantic_core-2.41.5-cp314-cp314t-win32.whl", hash = "sha256:b4ececa40ac28afa90871c2cc2b9ffd2ff0bf749380fbdf57d165fd23da353aa", size = 1988906 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607 },
|
{ url = "https://files.pythonhosted.org/packages/22/ed/182129d83032702912c2e2d8bbe33c036f342cc735737064668585dac28f/pydantic_core-2.41.5-cp314-cp314t-win_amd64.whl", hash = "sha256:80aa89cad80b32a912a65332f64a4450ed00966111b6615ca6816153d3585a8c", size = 1981607 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769 },
|
{ url = "https://files.pythonhosted.org/packages/9f/ed/068e41660b832bb0b1aa5b58011dea2a3fe0ba7861ff38c4d4904c1c1a99/pydantic_core-2.41.5-cp314-cp314t-win_arm64.whl", hash = "sha256:35b44f37a3199f771c3eaa53051bc8a70cd7b54f333531c59e29fd4db5d15008", size = 1974769 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/11/72/90fda5ee3b97e51c494938a4a44c3a35a9c96c19bba12372fb9c634d6f57/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_10_12_x86_64.whl", hash = "sha256:b96d5f26b05d03cc60f11a7761a5ded1741da411e7fe0909e27a5e6a0cb7b034", size = 2115441 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/1f/53/8942f884fa33f50794f119012dc6a1a02ac43a56407adaac20463df8e98f/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-macosx_11_0_arm64.whl", hash = "sha256:634e8609e89ceecea15e2d61bc9ac3718caaaa71963717bf3c8f38bfde64242c", size = 1930291 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/79/c8/ecb9ed9cd942bce09fc888ee960b52654fbdbede4ba6c2d6e0d3b1d8b49c/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:93e8740d7503eb008aa2df04d3b9735f845d43ae845e6dcd2be0b55a2da43cd2", size = 1948632 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/2e/1b/687711069de7efa6af934e74f601e2a4307365e8fdc404703afc453eab26/pydantic_core-2.41.5-graalpy311-graalpy242_311_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:f15489ba13d61f670dcc96772e733aad1a6f9c429cc27574c6cdaed82d0146ad", size = 2138905 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/09/32/59b0c7e63e277fa7911c2fc70ccfb45ce4b98991e7ef37110663437005af/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_10_12_x86_64.whl", hash = "sha256:7da7087d756b19037bc2c06edc6c170eeef3c3bafcb8f532ff17d64dc427adfd", size = 2110495 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/aa/81/05e400037eaf55ad400bcd318c05bb345b57e708887f07ddb2d20e3f0e98/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-macosx_11_0_arm64.whl", hash = "sha256:aabf5777b5c8ca26f7824cb4a120a740c9588ed58df9b2d196ce92fba42ff8dc", size = 1915388 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/6e/0d/e3549b2399f71d56476b77dbf3cf8937cec5cd70536bdc0e374a421d0599/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_aarch64.manylinux2014_aarch64.whl", hash = "sha256:c007fe8a43d43b3969e8469004e9845944f1a80e6acd47c150856bb87f230c56", size = 1942879 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/f7/07/34573da085946b6a313d7c42f82f16e8920bfd730665de2d11c0c37a74b5/pydantic_core-2.41.5-graalpy312-graalpy250_312_native-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:76d0819de158cd855d1cbb8fcafdf6f5cf1eb8e470abe056d5d161106e38062b", size = 2139017 },
|
|
||||||
{ url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980 },
|
{ url = "https://files.pythonhosted.org/packages/5f/9b/1b3f0e9f9305839d7e84912f9e8bfbd191ed1b1ef48083609f0dabde978c/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_10_12_x86_64.whl", hash = "sha256:b2379fa7ed44ddecb5bfe4e48577d752db9fc10be00a6b7446e9663ba143de26", size = 2101980 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865 },
|
{ url = "https://files.pythonhosted.org/packages/a4/ed/d71fefcb4263df0da6a85b5d8a7508360f2f2e9b3bf5814be9c8bccdccc1/pydantic_core-2.41.5-pp311-pypy311_pp73-macosx_11_0_arm64.whl", hash = "sha256:266fb4cbf5e3cbd0b53669a6d1b039c45e3ce651fd5442eff4d07c2cc8d66808", size = 1923865 },
|
||||||
{ url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256 },
|
{ url = "https://files.pythonhosted.org/packages/ce/3a/626b38db460d675f873e4444b4bb030453bbe7b4ba55df821d026a0493c4/pydantic_core-2.41.5-pp311-pypy311_pp73-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:58133647260ea01e4d0500089a8c4f07bd7aa6ce109682b1426394988d8aaacc", size = 2134256 },
|
||||||
|
|||||||
Reference in New Issue
Block a user