diff --git a/docutranslate/app.py b/docutranslate/app.py
index ead3427..6e591f7 100644
--- a/docutranslate/app.py
+++ b/docutranslate/app.py
@@ -113,6 +113,7 @@ def _create_default_task_state() -> Dict[str, Any]:
"original_filename": None,
"temp_dir": None, # 用于存储临时文件的目录
"downloadable_files": {}, # 存储可下载文件的路径和名称
+ "attachment_files": {}, # 存储附件文件的路径和标识符
}
@@ -202,9 +203,10 @@ DocuTranslate 后端服务 API,提供文档翻译、状态查询、结果下
2. **`GET /service/status/{{task_id}}`**: 使用获取到的 `task_id` 轮询此端点,获取任务的实时状态。
3. **`GET /service/logs/{{task_id}}`**: (可选) 获取实时的翻译日志。
4. **`GET /service/download/{{task_id}}/{{file_type}}`**: 任务完成后 (当 `download_ready` 为 `true` 时),通过此端点下载结果文件。
-5. **`GET /service/content/{{task_id}}/{{file_type}}`**: 任务完成后(当 `download_ready` 为 `true` 时),以JSON格式获取文件内容。
-6. **`POST /service/cancel/{{task_id}}`**: (可选) 取消一个正在进行的任务。
-7. **`POST /service/release/{{task_id}}`**: (可选) 当任务不再需要时,释放其在服务器上占用的所有资源,包括临时文件。
+5. **`GET /service/attachment/{{task_id}}/{{identifier}}`**: (可选) 如果任务生成了附件(如术语表),通过此端点下载。
+6. **`GET /service/content/{{task_id}}/{{file_type}}`**: 任务完成后(当 `download_ready` 为 `true` 时),以JSON格式获取文件内容。
+7. **`POST /service/cancel/{{task_id}}`**: (可选) 取消一个正在进行的任务。
+8. **`POST /service/release/{{task_id}}`**: (可选) 当任务不再需要时,释放其在服务器上占用的所有资源,包括临时文件。
**版本**: {__version__}
""",
@@ -795,6 +797,23 @@ async def _perform_translation(
except Exception as export_error:
task_logger.error(f"生成 {file_type} 文件时出错: {export_error}", exc_info=True)
+ # 处理附件文件
+ attachment_files = {}
+ attachment_object = workflow.get_attachment()
+ if attachment_object and attachment_object.attachment_dict:
+ task_logger.info(f"发现 {len(attachment_object.attachment_dict)} 个附件,正在处理...")
+ for identifier, doc in attachment_object.attachment_dict.items():
+ try:
+ # 'doc' is a Document object
+ attachment_filename = f"{doc.stem or identifier}.{doc.suffix}"
+ attachment_path = os.path.join(temp_dir, attachment_filename)
+ with open(attachment_path, "wb") as f:
+ f.write(doc.content)
+ attachment_files[identifier] = {"path": attachment_path, "filename": attachment_filename}
+ task_logger.info(f"成功生成附件 '{identifier}' 文件: {attachment_filename}")
+ except Exception as attachment_error:
+ task_logger.error(f"生成附件 '{identifier}' 文件时出错: {attachment_error}", exc_info=True)
+
# 5. 任务成功,更新最终状态
end_time = time.time()
duration = end_time - task_state["task_start_time"]
@@ -804,6 +823,7 @@ async def _perform_translation(
"error_flag": False,
"task_end_time": end_time,
"downloadable_files": downloadable_files,
+ "attachment_files": attachment_files,
})
task_logger.info(f"翻译成功完成,用时 {duration:.2f} 秒。")
@@ -867,7 +887,7 @@ async def _start_translation_task(
"original_filename_stem": Path(original_filename).stem,
"original_filename": original_filename,
"task_start_time": time.time(), "task_end_time": 0, "current_task_ref": None,
- "temp_dir": None, "downloadable_files": {},
+ "temp_dir": None, "downloadable_files": {}, "attachment_files": {},
})
log_history = tasks_log_histories[task_id]
@@ -1014,7 +1034,7 @@ async def service_release_task(task_id: str):
@service_router.get(
"/status/{task_id}",
summary="获取任务状态",
- description="根据任务ID获取任务的当前状态。当 `download_ready` 为 `true` 时,`downloads` 对象中会包含可用的下载链接。",
+ description="根据任务ID获取任务的当前状态。当 `download_ready` 为 `true` 时,`downloads` 和 `attachment` 对象中会包含可用的下载链接。",
responses={
200: {
"description": "成功获取任务状态。",
@@ -1028,7 +1048,7 @@ async def service_release_task(task_id: str):
"status_message": "正在处理 'annual_report.pdf'...",
"error_flag": False, "download_ready": False, "original_filename_stem": "annual_report",
"original_filename": "annual_report.pdf", "task_start_time": 1678889400.0,
- "task_end_time": 0, "downloads": {}
+ "task_end_time": 0, "downloads": {}, "attachment": {}
}
},
"completed_markdown": {
@@ -1043,6 +1063,26 @@ async def service_release_task(task_id: str):
"html": "/service/download/b2865b93/html",
"markdown": "/service/download/b2865b93/markdown",
"markdown_zip": "/service/download/b2865b93/markdown_zip"
+ },
+ "attachment": {}
+ }
+ },
+ "completed_with_attachment": {
+ "summary": "已完成 (带附件)",
+ "value": {
+ "task_id": "g1h2i3j4", "is_processing": False,
+ "status_message": "翻译成功!用时 125.00 秒。",
+ "error_flag": False, "download_ready": True,
+ "original_filename_stem": "complex_document",
+ "original_filename": "complex_document.docx",
+ "task_start_time": 1678891000.0,
+ "task_end_time": 1678891125.0,
+ "downloads": {
+ "docx": "/service/download/g1h2i3j4/docx",
+ "html": "/service/download/g1h2i3j4/html"
+ },
+ "attachment": {
+ "glossary": "/service/attachment/g1h2i3j4/glossary"
}
}
},
@@ -1062,7 +1102,8 @@ async def service_release_task(task_id: str):
"xlsx": "/service/download/d7e8f9a0/xlsx",
"csv": "/service/download/d7e8f9a0/csv",
"html": "/service/download/d7e8f9a0/html"
- }
+ },
+ "attachment": {}
}
},
"completed_docx": {
@@ -1076,7 +1117,8 @@ async def service_release_task(task_id: str):
"downloads": {
"docx": "/service/download/f8a9c1b2/docx",
"html": "/service/download/f8a9c1b2/html"
- }
+ },
+ "attachment": {}
}
},
"completed_epub": {
@@ -1090,7 +1132,8 @@ async def service_release_task(task_id: str):
"downloads": {
"epub": "/service/download/e9b8d7c6/epub",
"html": "/service/download/e9b8d7c6/html"
- }
+ },
+ "attachment": {}
}
},
# --- HTML STATUS EXAMPLE START ---
@@ -1104,7 +1147,8 @@ async def service_release_task(task_id: str):
"task_end_time": 1678890115.78,
"downloads": {
"html": "/service/download/a1b2c3d4/html"
- }
+ },
+ "attachment": {}
}
},
# --- HTML STATUS EXAMPLE END ---
@@ -1115,7 +1159,7 @@ async def service_release_task(task_id: str):
"status_message": "翻译过程中发生错误: LLM API key is invalid",
"error_flag": True, "download_ready": False, "original_filename_stem": "bad_config",
"original_filename": "bad_config.json", "task_start_time": 1678889600.0,
- "task_end_time": 1678889610.0, "downloads": {}
+ "task_end_time": 1678889610.0, "downloads": {}, "attachment": {}
}
}
}
@@ -1136,6 +1180,11 @@ async def service_get_status(
for file_type in task_state["downloadable_files"].keys():
downloads[file_type] = f"/service/download/{task_id}/{file_type}"
+ attachments = {}
+ if task_state.get("download_ready") and task_state.get("attachment_files"):
+ for identifier in task_state["attachment_files"].keys():
+ attachments[identifier] = f"/service/attachment/{task_id}/{identifier}"
+
return JSONResponse(content={
"task_id": task_id,
"is_processing": task_state["is_processing"],
@@ -1146,7 +1195,8 @@ async def service_get_status(
"original_filename": task_state.get("original_filename"),
"task_start_time": task_state["task_start_time"],
"task_end_time": task_state["task_end_time"],
- "downloads": downloads
+ "downloads": downloads,
+ "attachment": attachments
})
@@ -1218,6 +1268,42 @@ async def service_download_file(
return FileResponse(path=file_path, media_type=media_type, filename=filename)
+@service_router.get(
+ "/attachment/{task_id}/{identifier}",
+ summary="下载附件文件",
+ description="根据任务ID和附件标识符下载在翻译过程中生成的附加文件,例如自动生成的术语表。",
+ responses={
+ 200: {
+ "description": "成功返回文件流。文件名通过 Content-Disposition 头指定。",
+ "content": {
+ "application/octet-stream": {"schema": {"type": "string", "format": "binary"}},
+ }
+ },
+ 404: {"description": "任务ID不存在,或该任务没有指定的附件,或临时文件已丢失。"},
+ }
+)
+async def service_download_attachment(
+ task_id: str = FastApiPath(..., description="已完成任务的ID", examples=["g1h2i3j4"]),
+ identifier: str = FastApiPath(..., description="要下载的附件的标识符。", examples=["glossary"])
+):
+ task_state = tasks_state.get(task_id)
+ if not task_state:
+ raise HTTPException(status_code=404, detail=f"找不到任务ID '{task_id}'。")
+
+ attachment_info = task_state.get("attachment_files", {}).get(identifier)
+ if not attachment_info or not os.path.exists(attachment_info.get("path")):
+ raise HTTPException(status_code=404,
+ detail=f"任务 '{task_id}' 不存在标识符为 '{identifier}' 的附件,或文件已丢失。")
+
+ file_path = attachment_info["path"]
+ filename = attachment_info["filename"]
+
+ # Use a generic media type as attachments can be of various formats
+ media_type = "application/octet-stream"
+
+ return FileResponse(path=file_path, media_type=media_type, filename=filename)
+
+
@service_router.get(
"/content/{task_id}/{file_type}",
summary="下载翻译结果内容 (JSON)",
diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json
index 3f361c3..e81ebf2 100644
--- a/docutranslate/static/i18nData.json
+++ b/docutranslate/static/i18nData.json
@@ -18,23 +18,18 @@
"insertModeAppend": "附加到原文后 (Append)",
"insertModePrepend": "附加到原文前 (Prepend)",
"insertModeHelpDocx": "选择如何将翻译后的文本插入。",
+ "separatorLabel": "分隔符",
+ "separatorHelp": "当插入模式为附加或前置时,用于分隔原文和译文的字符。\\n 代表换行。",
"insertModeHelpXlsx": "选择如何将翻译后的文本插入到单元格中。",
+ "xlsxTranslateRegionsLabel": "翻译区域 (可选)",
"insertModeHelpSrt": "选择如何将翻译后的文本插入。",
"insertModeHelpEpub": "选择如何将翻译后的文本插入。",
"insertModeHelpHtml": "选择如何将翻译后的文本插入。",
- "separatorLabel": "分隔符",
- "separatorPlaceholder": "例如: \\n---翻译---\\n",
- "separatorPlaceholderSimple": "例如: \\n---\\n",
- "separatorHelp": "当插入模式为附加或前置时,用于分隔原文和译文的字符。\\n 代表换行。",
- "xlsxTranslateRegionsLabel": "翻译区域 (可选)",
- "xlsxTranslateRegionsPlaceholder": "每行一个区域, 例如:Sheet1!A1:B10(不指定表名则对所有表生效)",
"jsonPathLabel": "需要翻译的JSON路径",
- "jsonPathPlaceholder": "每行一个路径, 例如:\n$.name\n$.*",
"jsonPathHelp": "采用jsonpath-ng的路径选择语法,每一行表示一个json路径",
"parsingEngineLabel": "解析引擎",
"parsingEngineHelp": "如果上传的文件本身是.md格式,此项可不选。",
"getMineruTokenTitle": "获取Mineru Token",
- "mineruTokenPlaceholder": "使用Mineru引擎时需要",
"modelVersionLabel": "Mineru 模型版本",
"modelVersionVlm": "VLM",
"modelVersionPipline": "Pipeline",
@@ -44,33 +39,28 @@
"platformLabel": "选择平台",
"platformCustom": "自定义接口",
"baseUrlLabel": "API 地址 (Base URL)",
- "baseUrlPlaceholder": "OpenAi兼容地址",
"getApiKeyTitle": "获取API Key",
- "apiKeyPlaceholder": "请输入您的API Key",
"modelIdLabel": "模型 ID",
- "modelIdPlaceholder": "例如: gpt-4o, glm-4",
"targetLanguageLabel": "目标语言",
"targetLanguageCustom": "其它 (自定义)",
- "customLangPlaceholder": "请输入目标语言, 例如: Italian",
"thinkingModeLabel": "思考模式",
"thinkingModeTooltip": "设置混合推理模型的思考模式,目前支持智谱平台的glm-4.5系列、阿里云的qwen3系列、火山引擎的Doubao-Seed-1.6系列等",
"thinkingModeEnable": "启用",
"thinkingModeDisable": "禁用",
"thinkingModeDefault": "默认",
"customPromptLabel": "自定义Prompt",
- "customPromptPlaceholder": "可选,如“人名保持原文不翻译”",
+ "chunkSizeLabel": "分块大小",
+ "resetBtn": "重置",
+ "concurrentLabel": "并发数",
+ "glossaryGenTitle": "5. 术语表",
"glossaryLabel": "术语表 (可选)",
"glossaryHelp": "选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。",
"viewGlossaryBtn": "查看术语表",
"clearGlossaryBtn": "清空",
- "glossaryGenTitle": "术语表",
"glossaryGenEnableLabel": "自动生成术语表",
"glossaryGenConfigLabel": "生成术语表配置",
"glossaryGenConfigSame": "与翻译配置相同",
"glossaryGenConfigCustom": "自定义",
- "resetBtn": "重置",
- "chunkSizeLabel": "分块大小",
- "concurrentLabel": "并发数",
"githubInfo": "GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate",
"qqGroupInfo": "交流QQ群: 1047781902",
"taskListTitle": "任务列表",
@@ -80,11 +70,12 @@
"taskCardIdPlaceholder": "等待提交...",
"taskCardFileDrop": "点击或拖拽文件到此处",
"taskCardFileSelected": "文件已选择",
- "taskCardFilenameLabel": "文件名:",
+ "taskCardFilenameLabel": "文件名: ",
"taskCardLogLabel": "日志",
"taskCardStatusWaiting": "等待上传文件...",
"taskCardPreviewBtn": "预览",
"taskCardDownloadBtn": "下载",
+ "taskCardAttachmentBtn": "附件",
"taskCardStartBtn": "开始翻译",
"downloadMdEmbedded": "Markdown(嵌图)",
"downloadMdZip": "Markdown压缩包",
@@ -96,7 +87,7 @@
"closeBtn": "关闭",
"downloadBtn": "下载",
"tutorialModalTitle": "使用教程",
- "tutorialModalBody": "
视频教程可以在B站搜索 docutranslate 获取。
欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:
首先,在配置面板顶部选择您需要的翻译流程。不同的工作流适用于不同类型的文件:
.txt 等纯文本文件。.json 文件中的特定字段。.docx 文件。.xlsx 电子表格、 .csv 文件。.srt 字幕文件。.epub 电子书文件。.html 文件。根据您选择的工作流,完成相应的配置。所有配置项都会自动保存在您的浏览器中。
minerU 引擎,需要在此处填入您的Token。$..description翻译所有键为description的值。$.items[0].name翻译第一个item的name值。在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。
文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。
翻译完成后,任务卡片下方会出现操作按钮:
视频教程可以在B站搜索 docutranslate 获取。
欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:
首先,在配置面板顶部选择您需要的翻译流程。不同的工作流适用于不同类型的文件:
.txt 等纯文本文件。.json 文件中的特定字段。.docx 文件。.xlsx 电子表格、 .csv 文件。.srt 字幕文件。.epub 电子书文件。.html 文件。根据您选择的工作流,完成相应的配置。所有配置项都会自动保存在您的浏览器中。
minerU 引擎,需要在此处填入您的Token。$..description翻译所有键为description的值。$.items[0].name翻译第一个item的name值。在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。
文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。
翻译完成后,任务卡片下方会出现操作按钮:
\\n represents a newline.",
- "xlsxTranslateRegionsLabel": "Translate Regions (Optional)",
- "xlsxTranslateRegionsPlaceholder": "One region per line, e.g., Sheet1!A1:B10 (applies to all sheets if sheet name is omitted)",
+ "separatorHelp": "Separator character used between original and translated text when in Append or Prepend mode. \\n represents a newline.",
+ "insertModeHelpXlsx": "Choose how to insert translated text into cells.",
+ "xlsxTranslateRegionsLabel": "Translation Regions (Optional)",
+ "insertModeHelpSrt": "Choose how to insert the translated text.",
+ "insertModeHelpEpub": "Choose how to insert the translated text.",
+ "insertModeHelpHtml": "Choose how to insert the translated text.",
"jsonPathLabel": "JSON Paths to Translate",
- "jsonPathPlaceholder": "One path per line, e.g.:\n$.name\n$.*",
- "jsonPathHelp": "Uses jsonpath-ng syntax. Each line represents a JSON path.",
+ "jsonPathHelp": "Uses jsonpath-ng syntax. Enter one JSON path per line.",
"parsingEngineLabel": "Parsing Engine",
- "parsingEngineHelp": "If the uploaded file is already in .md format, this can be left unselected.",
+ "parsingEngineHelp": "Not required if the uploaded file is already in .md format.",
"getMineruTokenTitle": "Get Mineru Token",
- "mineruTokenPlaceholder": "Required when using the Mineru engine",
"modelVersionLabel": "Mineru Model Version",
"modelVersionVlm": "VLM",
"modelVersionPipline": "Pipeline",
- "modelVersionHelp": "Mineru VLM is a newer, internal beta model.",
+ "modelVersionHelp": "Mineru VLM is a newer model in beta.",
"formulaOcrLabel": "Formula Recognition",
"codeOcrLabel": "Code Recognition",
"platformLabel": "Select Platform",
"platformCustom": "Custom Endpoint",
- "baseUrlLabel": "API Base URL",
- "baseUrlPlaceholder": "OpenAI-compatible URL",
+ "baseUrlLabel": "API Address (Base URL)",
"getApiKeyTitle": "Get API Key",
- "apiKeyPlaceholder": "Please enter your API Key",
"modelIdLabel": "Model ID",
- "modelIdPlaceholder": "e.g., gpt-4o, llama-3-70b",
"targetLanguageLabel": "Target Language",
"targetLanguageCustom": "Other (Custom)",
- "customLangPlaceholder": "Enter target language, e.g., Italian",
"thinkingModeLabel": "Thinking Mode",
- "thinkingModeTooltip": "Sets the thinking mode for hybrid inference models, currently supporting Zhipu's glm-4.5 series, Alibaba Cloud's qwen3 series, Volcengine's Doubao-Seed-1.6 series, etc.",
+ "thinkingModeTooltip": "Set the thinking mode for mixed-inference models. Currently supports Zhipu's glm-4.5 series, Alibaba's qwen3 series, Volcengine's Doubao-Seed-1.6 series, etc.",
"thinkingModeEnable": "Enable",
"thinkingModeDisable": "Disable",
"thinkingModeDefault": "Default",
"customPromptLabel": "Custom Prompt",
- "customPromptPlaceholder": "Optional, e.g., 'Do not translate proper names'",
+ "chunkSizeLabel": "Chunk Size",
+ "resetBtn": "Reset",
+ "concurrentLabel": "Concurrency",
+ "glossaryGenTitle": "5. Glossary",
"glossaryLabel": "Glossary (Optional)",
- "glossaryHelp": "Select one or more CSV files. Files must contain 'src' and 'dst' headers for source and destination terms.",
+ "glossaryHelp": "Select one or more CSV files. Files must have 'src' and 'dst' headers, representing source and destination text respectively.",
"viewGlossaryBtn": "View Glossary",
"clearGlossaryBtn": "Clear",
- "glossaryGenTitle": "Glossary",
"glossaryGenEnableLabel": "Auto-generate Glossary",
"glossaryGenConfigLabel": "Glossary Generation Config",
- "glossaryGenConfigSame": "Same as Translator",
+ "glossaryGenConfigSame": "Same as Translation Config",
"glossaryGenConfigCustom": "Custom",
- "resetBtn": "Reset",
- "chunkSizeLabel": "Chunk Size",
- "concurrentLabel": "Concurrency",
- "githubInfo": "GitHub Repo (Star us! ❤): Video tutorials are available on Bilibili by searching for docutranslate.
Welcome to DocuTranslate! Follow these steps to translate your documents:
First, choose your desired translation process from the top of the settings panel. Different workflows are suited for different file types:
.txt and other plain text files..json files..docx files..xlsx spreadsheets and .csv files..srt subtitle files..epub e-book files..html files.Configure the settings based on your chosen workflow. All settings are automatically saved in your browser.
minerU engine, you must enter your token here.$..description translates all values with the key 'description'. $.items[0].name translates the 'name' of the first item.In the task list on the right, click or drag your document into the file upload area.
Once the file is selected, click the Start Translation button on the task card. The system will begin processing, and you can monitor real-time progress in the log area.
After the translation is complete, action buttons will appear on the task card:
Video tutorials are available on YouTube by searching for docutranslate.
Welcome to DocuTranslate! Follow these steps to translate your documents:
First, choose the appropriate translation process from the top of the settings panel. Different workflows are suited for different file types:
.txt files..json files..docx files..xlsx spreadsheets or .csv files..srt subtitle files..epub e-books..html files.Configure the settings based on your chosen workflow. All settings are automatically saved in your browser.
minerU engine.$..description translates all values with the key 'description'. $.items[0].name translates the 'name' of the first item.In the task list on the right, click or drag your document into the file upload area.
After selecting a file, click the Start Translation button on the task card. The system will begin processing, and you can monitor the progress in the log area.
Once the translation is complete, new buttons will appear on the task card:
GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate
交流QQ群: 1047781902
当前没有任务,点击“新建任务”开始吧!
等待提交...
点击或拖拽文件到此处
GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate
交流QQ群: 1047781902
当前没有任务,点击“新建任务”开始吧!
等待提交...
点击或拖拽文件到此处