From 63e6101064c45ee71698049dac7b483e056b5dc4 Mon Sep 17 00:00:00 2001 From: xunbu Date: Sat, 29 Nov 2025 21:51:09 +0800 Subject: [PATCH] =?UTF-8?q?pptx=E6=94=AF=E6=8C=81?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/app.py | 190 +++++++++++++++--- docutranslate/static/i18nData.json | 18 +- docutranslate/static/index.html | 2 +- .../ai_translator/pptx_translator.py | 24 +-- 4 files changed, 171 insertions(+), 63 deletions(-) diff --git a/docutranslate/app.py b/docutranslate/app.py index 6326d98..adb2cf3 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -65,6 +65,9 @@ from docutranslate.workflow.base import Workflow from docutranslate.workflow.docx_workflow import DocxWorkflow, DocxWorkflowConfig from docutranslate.workflow.epub_workflow import EpubWorkflow, EpubWorkflowConfig from docutranslate.workflow.html_workflow import HtmlWorkflow, HtmlWorkflowConfig +# --- 新增的 Import --- +from docutranslate.workflow.pptx_workflow import PPTXWorkflow, PPTXWorkflowConfig +# ---------------------- from docutranslate.workflow.interfaces import DocxExportable, EpubExportable from docutranslate.workflow.interfaces import ( HTMLExportable, @@ -75,6 +78,7 @@ from docutranslate.workflow.interfaces import ( SrtExportable, CsvExportable, AssExportable, + PPTXExportable, # Added PPTXExportable ) from docutranslate.workflow.json_workflow import JsonWorkflow, JsonWorkflowConfig from docutranslate.workflow.md_based_workflow import ( @@ -88,9 +92,7 @@ from docutranslate.workflow.xlsx_workflow import XlsxWorkflow, XlsxWorkflowConfi if DOCLING_EXIST or TYPE_CHECKING: from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig -# --- 新增的 Import --- from docutranslate.converter.x2md.converter_mineru_deploy import ConverterMineruDeployConfig -# ---------------------- from docutranslate.exporter.md.md2html_exporter import MD2HTMLExporterConfig from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig from docutranslate.translator.ai_translator.md_translator import MDTranslatorConfig @@ -108,8 +110,8 @@ from docutranslate.exporter.epub.epub2html_exporter import Epub2HTMLExporterConf from docutranslate.translator.ai_translator.html_translator import HtmlTranslatorConfig from docutranslate.translator.ai_translator.ass_translator import AssTranslatorConfig from docutranslate.exporter.ass.ass2html_exporter import Ass2HTMLExporterConfig - -# ------------------------------------ +from docutranslate.translator.ai_translator.pptx_translator import PPTXTranslatorConfig +from docutranslate.exporter.pptx.pptx2html_exporter import PPTX2HTMLExporterConfig from docutranslate.logger import global_logger from docutranslate.translator import default_params @@ -133,6 +135,7 @@ WORKFLOW_DICT: Dict[str, Type[Workflow]] = { "epub": EpubWorkflow, "html": HtmlWorkflow, "ass": AssWorkflow, + "pptx": PPTXWorkflow, # Added PPTXWorkflow } # --- 媒体类型映射 --- @@ -148,6 +151,7 @@ MEDIA_TYPES = { "srt": "text/plain; charset=utf-8", "epub": "application/epub+zip", "ass": "text/plain; charset=utf-8", + "pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", # Added PPTX MIME } @@ -609,6 +613,25 @@ class AssWorkflowParams(BaseWorkflowParams): # --- ASS WORKFLOW PARAMS END --- +# --- PPTX WORKFLOW PARAMS START --- +class PPTXWorkflowParams(BaseWorkflowParams): + workflow_type: Literal["pptx"] = Field( + ..., description="指定使用PPTX的翻译工作流。" + ) + insert_mode: Literal["replace", "append", "prepend"] = Field( + "replace", + description="翻译文本的插入模式。'replace':替换原文,'append':附加到原文后,'prepend':附加到原文前。", + ) + separator: str = Field( + "\n", + description="当 insert_mode 为 'append' 或 'prepend' 时,用于分隔原文和译文的分隔符。", + ) + # target_cjk_font removed as per request + + +# --- PPTX WORKFLOW PARAMS END --- + + # 3. 使用可辨识联合类型(Discriminated Union)将它们组合起来 TranslatePayload = Annotated[ Union[ @@ -621,6 +644,7 @@ TranslatePayload = Annotated[ EpubWorkflowParams, HtmlWorkflowParams, AssWorkflowParams, + PPTXWorkflowParams, ], Field(discriminator="workflow_type"), ] @@ -639,6 +663,7 @@ class TranslateServiceRequest(BaseModel): "my_book.epub", "index.html", "dialogue.ass", + "presentation.pptx", ], ) file_content: str = Field( @@ -864,6 +889,26 @@ class TranslateServiceRequest(BaseModel): "retry": default_params["retry"], }, }, + { + "file_name": "presentation.pptx", + "file_content": "UEsDBBQAAAAIA... (base64-encoded pptx)", + "payload": { + "workflow_type": "pptx", + "skip_translate": False, + "base_url": "https://api.openai.com/v1", + "api_key": "sk-your-api-key-here", + "model_id": "gpt-4o", + "to_lang": "中文", + "insert_mode": "replace", + "separator": "\n", + "chunk_size": default_params["chunk_size"], + "concurrent": default_params["concurrent"], + "temperature": default_params["temperature"], + "timeout": default_params["timeout"], + "thinking": "default", + "retry": default_params["retry"], + }, + }, ] } ) @@ -1283,6 +1328,46 @@ async def _perform_translation( workflow = AssWorkflow(config=workflow_config) # --- ASS WORKFLOW LOGIC END --- + # --- PPTX WORKFLOW LOGIC START --- + elif isinstance(payload, PPTXWorkflowParams): + task_logger.info("构建 PPTXWorkflow 配置。") + translator_args = payload.model_dump( + include={ + "skip_translate", + "base_url", + "api_key", + "model_id", + "to_lang", + "custom_prompt", + "temperature", + "thinking", + "chunk_size", + "concurrent", + "insert_mode", + "separator", + "glossary_dict", + "timeout", + "retry", + "system_proxy_enable", + "force_json", + }, + exclude_none=True, + ) + translator_args["glossary_generate_enable"] = ( + payload.glossary_generate_enable + ) + translator_args["glossary_agent_config"] = build_glossary_agent_config() + translator_config = PPTXTranslatorConfig(**translator_args) + + html_exporter_config = PPTX2HTMLExporterConfig(cdn=True) + workflow_config = PPTXWorkflowConfig( + translator_config=translator_config, + html_exporter_config=html_exporter_config, + logger=task_logger, + ) + workflow = PPTXWorkflow(config=workflow_config) + # --- PPTX WORKFLOW LOGIC END --- + else: raise TypeError(f"工作流类型 '{payload.workflow_type}' 的处理逻辑未实现。") @@ -1313,30 +1398,7 @@ async def _perform_translation( # 定义导出函数映射 export_map = {} - # 根据 workflow 的类型填充导出映射 - if isinstance(workflow, HTMLExportable): - html_config = None - if isinstance(workflow, MarkdownBasedWorkflow): - html_config = MD2HTMLExporterConfig(cdn=is_cdn_available) - elif isinstance(workflow, TXTWorkflow): - html_config = TXT2HTMLExporterConfig(cdn=is_cdn_available) - elif isinstance(workflow, JsonWorkflow): - html_config = Json2HTMLExporterConfig(cdn=is_cdn_available) - elif isinstance(workflow, XlsxWorkflow): - html_config = Xlsx2HTMLExporterConfig(cdn=is_cdn_available) - elif isinstance(workflow, DocxWorkflow): - html_config = Docx2HTMLExporterConfig(cdn=is_cdn_available) - elif isinstance(workflow, SrtWorkflow): - html_config = Srt2HTMLExporterConfig(cdn=is_cdn_available) - elif isinstance(workflow, EpubWorkflow): - html_config = Epub2HTMLExporterConfig(cdn=is_cdn_available) - elif isinstance(workflow, AssWorkflow): - html_config = Ass2HTMLExporterConfig(cdn=is_cdn_available) - export_map["html"] = ( - lambda: workflow.export_to_html(html_config), - f"{filename_stem}_translated.html", - True, - ) + if isinstance(workflow, MDFormatsExportable): export_map["markdown"] = ( workflow.export_to_markdown, @@ -1396,6 +1458,39 @@ async def _perform_translation( f"{filename_stem}_translated.ass", True, ) + if isinstance(workflow, PPTXExportable): + export_map["pptx"] = ( + workflow.export_to_pptx, + f"{filename_stem}_translated.pptx", + False, + ) + + # 根据 workflow 的类型填充导出映射 + if isinstance(workflow, HTMLExportable): + html_config = None + if isinstance(workflow, MarkdownBasedWorkflow): + html_config = MD2HTMLExporterConfig(cdn=is_cdn_available) + elif isinstance(workflow, TXTWorkflow): + html_config = TXT2HTMLExporterConfig(cdn=is_cdn_available) + elif isinstance(workflow, JsonWorkflow): + html_config = Json2HTMLExporterConfig(cdn=is_cdn_available) + elif isinstance(workflow, XlsxWorkflow): + html_config = Xlsx2HTMLExporterConfig(cdn=is_cdn_available) + elif isinstance(workflow, DocxWorkflow): + html_config = Docx2HTMLExporterConfig(cdn=is_cdn_available) + elif isinstance(workflow, SrtWorkflow): + html_config = Srt2HTMLExporterConfig(cdn=is_cdn_available) + elif isinstance(workflow, EpubWorkflow): + html_config = Epub2HTMLExporterConfig(cdn=is_cdn_available) + elif isinstance(workflow, AssWorkflow): + html_config = Ass2HTMLExporterConfig(cdn=is_cdn_available) + elif isinstance(workflow, PPTXWorkflow): + html_config = PPTX2HTMLExporterConfig(cdn=is_cdn_available) + export_map["html"] = ( + lambda: workflow.export_to_html(html_config), + f"{filename_stem}_translated.html", + True, + ) # 循环生成文件 for file_type, (export_func, filename, is_string_output) in export_map.items(): @@ -1616,7 +1711,7 @@ def _cancel_translation_logic(task_id: str): description=""" 接收一个包含文件内容(Base64编码)和工作流参数的JSON请求,启动一个后台翻译任务。 -- **工作流选择**: 请求体中的 `payload.workflow_type` 字段决定了本次任务的类型(如 `markdown_based`, `txt`, `json`, `xlsx`, `docx`, `srt`, `epub`, `html`, `ass`)。 +- **工作流选择**: 请求体中的 `payload.workflow_type` 字段决定了本次任务的类型(如 `markdown_based`, `txt`, `json`, `xlsx`, `docx`, `srt`, `epub`, `html`, `ass`, `pptx`)。 - **动态参数**: 根据所选工作流,API需要不同的参数集。请参考下面的Schema或示例。 - **异步处理**: 此端点会立即返回任务ID,客户端需轮询状态接口获取进度。 """, @@ -1960,6 +2055,27 @@ async def service_release_task(task_id: str): }, }, # --- ASS STATUS EXAMPLE END --- + # --- PPTX STATUS EXAMPLE START --- + "completed_pptx": { + "summary": "已完成 (PPTX)", + "value": { + "task_id": "a1b2c3d6", + "is_processing": False, + "status_message": "翻译成功!用时 30.50 秒。", + "error_flag": False, + "download_ready": True, + "original_filename_stem": "presentation", + "original_filename": "presentation.pptx", + "task_start_time": 1678890300.0, + "task_end_time": 1678890330.50, + "downloads": { + "pptx": "/service/download/a1b2c3d6/pptx", + "html": "/service/download/a1b2c3d6/html", + }, + "attachment": {}, + }, + }, + # --- PPTX STATUS EXAMPLE END --- "error": { "summary": "失败", "value": { @@ -2052,6 +2168,7 @@ FileType = Literal[ "srt", "epub", "ass", + "pptx", ] @@ -2077,6 +2194,9 @@ FileType = Literal[ "application/epub+zip": { "schema": {"type": "string", "format": "binary"} }, + "application/vnd.openxmlformats-officedocument.presentationml.presentation": { + "schema": {"type": "string", "format": "binary"} + }, }, }, 404: { @@ -2092,7 +2212,7 @@ async def service_download_file( file_type: FileType = FastApiPath( ..., description="要下载的文件类型。", - examples=["html", "json", "csv", "docx", "srt", "epub", "ass"], + examples=["html", "json", "csv", "docx", "srt", "epub", "ass", "pptx"], ), ): task_state = tasks_state.get(task_id) @@ -2198,6 +2318,14 @@ async def service_download_attachment( "content": "UEsDBBQAAAAIA... (base64-encoded string)", }, }, + "pptx_base64": { + "summary": "PPTX 内容 (Base64)", + "value": { + "file_type": "pptx", + "filename": "my_presentation_translated.pptx", + "content": "UEsDBBQAAAAIA... (base64-encoded string)", + }, + }, } } }, @@ -2215,7 +2343,7 @@ async def service_content( file_type: FileType = FastApiPath( ..., description="要获取内容的文件类型。", - examples=["html", "json", "csv", "docx", "srt", "epub", "ass"], + examples=["html", "json", "csv", "docx", "srt", "epub", "ass", "pptx"], ), ): task_state = tasks_state.get(task_id) diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json index c045479..4e1ee00 100644 --- a/docutranslate/static/i18nData.json +++ b/docutranslate/static/i18nData.json @@ -13,6 +13,7 @@ "workflowOptionAss": "ASS字幕翻译 (.ass)", "workflowOptionJson": "JSON翻译 (.json)", "workflowOptionHtml": "HTML翻译 (.html)", + "workflowOptionPptx": "PPTX演示文稿 (.pptx)", "autoWorkflowLabel": "自动选择工作流", "txtSettingsTitleText": "TXT翻译选项", "insertModeLabel": "插入模式", @@ -48,6 +49,8 @@ "insertModeHelpAss": "选择如何将翻译后的文本插入。", "separatorPlaceholderAss": "例如: \\N (换行符)", "separatorHelpAss": "当插入模式为附加或前置时,用于分隔原文和译文的字符。\\N 是ASS格式的换行符。", + "pptxSettingsTitleText": "PPTX翻译选项", + "insertModeHelpPptx": "选择如何将翻译后的文本插入文本框。", "jsonSettingsTitleText": "JSON路径配置", "jsonPathLabel": "需要翻译的JSON路径", "jsonPathPlaceholder": "每行一个路径, 例如:\n$.name\n$.*", @@ -55,7 +58,6 @@ "parsingSettingsTitleText": "解析配置", "parsingEngineLabel": "解析引擎", "parsingEngineHelp": "如果上传的文件本身是.md格式,此项可不选。", - "getMineruTokenTitle": "获取Mineru Token", "mineruTokenPlaceholder": "使用Mineru引擎时需要", "modelVersionLabel": "Mineru 模型版本", "modelVersionVlm": "VLM", @@ -80,7 +82,6 @@ "platformCustom": "自定义接口", "baseUrlLabel": "API 地址 (Base URL)", "baseUrlPlaceholder": "OpenAi兼容地址", - "getApiKeyTitle": "获取API Key", "apiKeyPlaceholder": "请输入您的API Key", "modelIdLabel": "模型ID", "modelIdPlaceholder": "例如: gpt-4o, glm-4", @@ -115,8 +116,6 @@ "glossaryGenConfigCustom": "自定义", "importConfigBtn": "导入配置", "exportConfigBtn": "导出配置", - "githubInfo": "GitHub主页(欢迎star❤):
\n https://github.com/xunbu/docutranslate", - "qqGroupInfo": "交流QQ群: 1047781902", "taskListTitle": "任务列表", "newTaskBtn": "新建任务", "noTaskPlaceholder": "当前没有任务,点击“新建任务”开始吧!", @@ -144,7 +143,7 @@ "closeBtn": "关闭", "downloadBtn": "下载", "tutorialModalTitle": "使用教程", - "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 第一步:选择工作流

    在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。

    提示: 默认已开启“自动选择工作流”。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。
    • 转Markdown再翻译: 适用于翻译PDF、markdown、图片等文件。这是最通用和强大的模式。
    • 纯文本翻译: 用于翻译 .txt 纯文本文件。
    • EPUB翻译: 用于翻译 .epub 电子书文件。
    • DOCX翻译: 用于翻译 .docx Word文档。
    • XLSX翻译: 用于翻译 .xlsx.csv 电子表格文件。
    • SRT字幕翻译: 用于翻译 .srt 字幕文件。
    • ASS字幕翻译: 用于翻译 .ass 特效字幕文件。
    • JSON翻译: 用于翻译 .json 文件中的特定字段。
    • HTML翻译: 用于翻译 .html 网页文件。
  2. 第二步:配置参数

    选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):

    A. 工作流特定选项 (根据您第一步的选择出现):

    • 如果选择“转Markdown再翻译”,请配置 解析配置
      • 解析引擎: 选择一个引擎将您的文件(如PDF)转换为适合翻译的Markdown格式。如果您的文件已经是Markdown格式,则无需选择。
      • Mineru Token: 如果您选择 minerU 引擎,需要在此处填入您的Token。
    • 如果选择“纯文本/DOCX/XLSX/SRT/ASS/EPUB/HTML”,请配置其 翻译选项
      • 插入模式: 定义翻译结果如何放入文档。您可以选择直接“替换”原文,或是在原文之后“附加”,或是在原文之前“前置”。
      • 分隔符: 当选择“附加”或“前置”模式时,此项用于在原文和译文之间插入分隔符(例如ASS格式常用 \\N,EPUB格式常用 <br /> 作为换行分隔符)。
    • 如果选择“JSON翻译”,请配置 JSON路径
      • 需要翻译的JSON路径: 每行输入一个 JSONPath 表达式,将翻译所有与路径匹配的对象中的字符串。例如:$.*(翻译全部字符串),$..description(翻译所有键为description的值)。

    B. 通用选项 (适用于所有工作流):

    • 翻译模型:
      • 选择平台/API 地址/API Key/模型ID: 配置您希望使用的AI翻译服务。模型能力指令遵循越强,出错漏翻的概率越低。
      • 跳过翻译: 勾选此项后,将只执行文档解析和格式转换,不调用AI进行翻译。
    • 翻译配置:
      • 目标语言: 指定翻译的目标语言。
      • 自定义Prompt: 可选,添加额外指令,如“人名保持原文不翻译”。
      • 思考模式: 针对部分支持混合推理的模型进行设置,建议选择“禁用(推荐)”。
      • 分块大小/并发数等: 高级参数,用于调整性能和API请求行为,通常保持默认即可。
    • 术语表:
      • 上传术语表 (可选): 上传CSV文件(需包含'src'和'dst'列)来保证特定术语翻译的统一性和准确性。
      • 自动生成术语表: 启用后,程序会先从原文中提取术语并生成一个术语表,然后再进行翻译。
  3. 第三步:上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 第四步:开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 第五步:查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

    • 预览: 在右侧滑出的面板中进行原文和译文的对照预览。
    • 下载: 下载包括 PDF, DOCX, Markdown 等多种格式的译文。
    • 附件: 如果翻译过程中生成了附加文件(如自动生成的术语表),可在此处下载。
重要提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。您也可以使用新增的“导出配置”和“导入配置”按钮来备份和恢复您的设置。
", + "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 第一步:选择工作流

    在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。

    提示: 默认已开启“自动选择工作流”。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。
    • 转Markdown再翻译: 适用于翻译PDF、markdown、图片等文件。这是最通用和强大的模式。
    • 纯文本翻译: 用于翻译 .txt 纯文本文件。
    • EPUB翻译: 用于翻译 .epub 电子书文件。
    • DOCX翻译: 用于翻译 .docx Word文档。
    • XLSX翻译: 用于翻译 .xlsx.csv 电子表格文件。
    • PPTX翻译: 用于翻译 .pptx 幻灯片文件。
    • SRT字幕翻译: 用于翻译 .srt 字幕文件。
    • ASS字幕翻译: 用于翻译 .ass 特效字幕文件。
    • JSON翻译: 用于翻译 .json 文件中的特定字段。
    • HTML翻译: 用于翻译 .html 网页文件。
  2. 第二步:配置参数

    选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):

    A. 工作流特定选项 (根据您第一步的选择出现):

    • 如果选择“转Markdown再翻译”,请配置 解析配置
      • 解析引擎: 选择一个引擎将您的文件(如PDF)转换为适合翻译的Markdown格式。如果您的文件已经是Markdown格式,则无需选择。
      • Mineru Token: 如果您选择 minerU 引擎,需要在此处填入您的Token。
    • 如果选择“纯文本/DOCX/XLSX/PPTX/SRT/ASS/EPUB/HTML”,请配置其 翻译选项
      • 插入模式: 定义翻译结果如何放入文档。您可以选择直接“替换”原文,或是在原文之后“附加”,或是在原文之前“前置”。
      • 分隔符: 当选择“附加”或“前置”模式时,此项用于在原文和译文之间插入分隔符(例如ASS格式常用 \\N,EPUB格式常用 <br /> 作为换行分隔符)。
    • 如果选择“JSON翻译”,请配置 JSON路径
      • 需要翻译的JSON路径: 每行输入一个 JSONPath 表达式,将翻译所有与路径匹配的对象中的字符串。例如:$.*(翻译全部字符串),$..description(翻译所有键为description的值)。

    B. 通用选项 (适用于所有工作流):

    • 翻译模型:
      • 选择平台/API 地址/API Key/模型ID: 配置您希望使用的AI翻译服务。模型能力指令遵循越强,出错漏翻的概率越低。
      • 跳过翻译: 勾选此项后,将只执行文档解析和格式转换,不调用AI进行翻译。
    • 翻译配置:
      • 目标语言: 指定翻译的目标语言。
      • 自定义Prompt: 可选,添加额外指令,如“人名保持原文不翻译”。
      • 思考模式: 针对部分支持混合推理的模型进行设置,建议选择“禁用(推荐)”。
      • 分块大小/并发数等: 高级参数,用于调整性能和API请求行为,通常保持默认即可。
    • 术语表:
      • 上传术语表 (可选): 上传CSV文件(需包含'src'和'dst'列)来保证特定术语翻译的统一性和准确性。
      • 自动生成术语表: 启用后,程序会先从原文中提取术语并生成一个术语表,然后再进行翻译。
  3. 第三步:上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 第四步:开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 第五步:查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

    • 预览: 在右侧滑出的面板中进行原文和译文的对照预览。
    • 下载: 下载包括 PDF, DOCX, Markdown 等多种格式的译文。
    • 附件: 如果翻译过程中生成了附加文件(如自动生成的术语表),可在此处下载。
重要提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。您也可以使用新增的“导出配置”和“导入配置”按钮来备份和恢复您的设置。
", "tutorialUnderstandBtn": "我明白了", "contributorsModalTitle": "感谢贡献", "contributorsPara1": "DocuTranslate是一个开源项目!大家的需求与使用是项目进步的动力。", @@ -208,6 +207,7 @@ "workflowOptionAss": "ASS Subtitle Translation (.ass)", "workflowOptionJson": "JSON Translation (.json)", "workflowOptionHtml": "HTML Translation (.html)", + "workflowOptionPptx": "PPTX Presentation (.pptx)", "autoWorkflowLabel": "Auto-select Workflow", "txtSettingsTitleText": "TXT Translation Options", "insertModeLabel": "Insert Mode", @@ -243,6 +243,8 @@ "insertModeHelpAss": "Choose how to insert the translated text.", "separatorPlaceholderAss": "e.g., \\N (newline character)", "separatorHelpAss": "Character used to separate original and translated text in append or prepend mode. \\N is the newline character for the ASS format.", + "pptxSettingsTitleText": "PPTX Translation Options", + "insertModeHelpPptx": "Choose how to insert the translated text into text boxes.", "jsonSettingsTitleText": "JSON Path Configuration", "jsonPathLabel": "JSON Paths to Translate", "jsonPathPlaceholder": "One path per line, e.g.:\n$.name\n$.*", @@ -250,7 +252,6 @@ "parsingSettingsTitleText": "Parsing Configuration", "parsingEngineLabel": "Parsing Engine", "parsingEngineHelp": "If the uploaded file is already in .md format, this can be skipped.", - "getMineruTokenTitle": "Get Mineru Token", "mineruTokenPlaceholder": "Required when using the Mineru engine", "modelVersionLabel": "Mineru Model Version", "modelVersionVlm": "VLM", @@ -275,7 +276,6 @@ "platformCustom": "Custom API", "baseUrlLabel": "API Address (Base URL)", "baseUrlPlaceholder": "OpenAI-compatible address", - "getApiKeyTitle": "Get API Key", "apiKeyPlaceholder": "Please enter your API Key", "modelIdLabel": "Model ID", "modelIdPlaceholder": "e.g., gpt-4o, glm-4", @@ -310,8 +310,6 @@ "glossaryGenConfigCustom": "Custom", "importConfigBtn": "Import Config", "exportConfigBtn": "Export Config", - "githubInfo": "GitHub Page (stars❤ welcome):
\n https://github.com/xunbu/docutranslate", - "qqGroupInfo": "Discussion QQ Group: 1047781902", "taskListTitle": "Task List", "newTaskBtn": "New Task", "noTaskPlaceholder": "No tasks yet. Click \"New Task\" to get started!", @@ -339,7 +337,7 @@ "closeBtn": "Close", "downloadBtn": "Download", "tutorialModalTitle": "Tutorial", - "tutorialModalBody": "

Video tutorials are available on Bilibili by searching for docutranslate.

Welcome to DocuTranslate! Please follow these steps to translate your documents:

  1. Step 1: Select Workflow

    At the top of the left-side configuration panel, first choose the processing flow that best suits your file type.

    Tip: \"Auto-select Workflow\" is enabled by default. Simply upload your file, and the system will automatically match it with the appropriate workflow to simplify the process.
    • Convert to Markdown then Translate: Suitable for translating PDF, Markdown, images, etc. This is the most versatile and powerful mode.
    • Plain Text Translation: For translating .txt plain text files.
    • EPUB Translation: For translating .epub e-book files.
    • DOCX Translation: For translating .docx Word documents.
    • XLSX Translation: For translating .xlsx or .csv spreadsheet files.
    • SRT Subtitle Translation: For translating .srt subtitle files.
    • ASS Subtitle Translation: For translating .ass advanced subtitle files.
    • JSON Translation: For translating specific fields within .json files.
    • HTML Translation: For translating .html web page files.
  2. Step 2: Configure Parameters

    After selecting a workflow, the relevant configuration options will appear below. Please complete the settings in order (all configurations are automatically saved in your browser):

    A. Workflow-Specific Options (Appears based on your choice in Step 1):

    • If \"Convert to Markdown then Translate\" is selected, configure Parsing Configuration:
      • Parsing Engine: Choose an engine to convert your file (like a PDF) into a translation-friendly Markdown format. No selection is needed if your file is already in Markdown format.
      • Mineru Token: If you choose the minerU engine, you need to enter your token here.
    • If \"Plain Text/DOCX/XLSX/SRT/ASS/EPUB/HTML\" is selected, configure its Translation Options:
      • Insert Mode: Defines how the translation result is placed in the document. You can choose to \"Replace\" the original, \"Append\" it after the original, or \"Prepend\" it before the original.
      • Separator: When \"Append\" or \"Prepend\" mode is selected, this is used to insert a separator between the original and translated text (e.g., \\\\N is common for ASS format, <br /> for EPUB format as a line break).
    • If \"JSON Translation\" is selected, configure JSON Paths:
      • JSON Paths to Translate: Enter one JSONPath expression per line to translate all strings within the matched objects. For example: $.* (translate all strings), $..description (translate all values with the key description).

    B. General Options (Applicable to all workflows):

    • Translation Model:
      • Select Platform/API Address/API Key/Model ID: Configure the AI translation service you wish to use. The better the model follows instructions, the lower the probability of errors and missed translations.
      • Skip Translation: If checked, only document parsing and format conversion will be performed, without calling the AI for translation.
    • Translation Configuration:
      • Target Language: Specify the target language for the translation.
      • Custom Prompt: Optional, add extra instructions, like \"Do not translate personal names.\"
      • Thinking Mode: A setting for some models that support mixed inference; \"Disable (Recommended)\" is the suggested choice.
      • Chunk Size/Concurrency, etc.: Advanced parameters for adjusting performance and API request behavior; usually, the defaults are fine.
    • Glossary:
      • Upload Glossary (Optional): Upload a CSV file (must contain 'src' and 'dst' columns) to ensure consistency and accuracy for specific terms.
      • Auto-generate Glossary: When enabled, the program will first extract terms from the original text to create a glossary before proceeding with the translation.
  3. Step 3: Upload File

    In the task list on the right, click or drag your document into the file upload area.

  4. Step 4: Start Translation

    Once the file is successfully selected, click the Start Translation button on the bottom right of the task card. The system will begin processing the task, and you can view the real-time progress in the log area.

  5. Step 5: View and Download

    After the translation is complete, action buttons will appear on the task card:

    • Preview: Compare the original and translated text side-by-side in a slide-out panel.
    • Download: Download the translation in various formats, including PDF, DOCX, Markdown, etc.
    • Attachments: If any additional files were generated during the process (like an auto-generated glossary), they can be downloaded here.
Important Note: All configurations are automatically saved locally in your browser for future use. You can also use the new \"Export Config\" and \"Import Config\" buttons to back up and restore your settings.
", + "tutorialModalBody": "

Video tutorials are available on Bilibili by searching for docutranslate.

Welcome to DocuTranslate! Please follow these steps to translate your documents:

  1. Step 1: Select Workflow

    At the top of the left-side configuration panel, first choose the processing flow that best suits your file type.

    Tip: \"Auto-select Workflow\" is enabled by default. Simply upload your file, and the system will automatically match it with the appropriate workflow to simplify the process.
    • Convert to Markdown then Translate: Suitable for translating PDF, Markdown, images, etc. This is the most versatile and powerful mode.
    • Plain Text Translation: For translating .txt plain text files.
    • EPUB Translation: For translating .epub e-book files.
    • DOCX Translation: For translating .docx Word documents.
    • XLSX Translation: For translating .xlsx or .csv spreadsheet files.
    • PPTX Translation: For translating .pptx slide files.
    • SRT Subtitle Translation: For translating .srt subtitle files.
    • ASS Subtitle Translation: For translating .ass advanced subtitle files.
    • JSON Translation: For translating specific fields within .json files.
    • HTML Translation: For translating .html web page files.
  2. Step 2: Configure Parameters

    After selecting a workflow, the relevant configuration options will appear below. Please complete the settings in order (all configurations are automatically saved in your browser):

    A. Workflow-Specific Options (Appears based on your choice in Step 1):

    • If \"Convert to Markdown then Translate\" is selected, configure Parsing Configuration:
      • Parsing Engine: Choose an engine to convert your file (like a PDF) into a translation-friendly Markdown format. No selection is needed if your file is already in Markdown format.
      • Mineru Token: If you choose the minerU engine, you need to enter your token here.
    • If \"Plain Text/DOCX/XLSX/PPTX/SRT/ASS/EPUB/HTML\" is selected, configure its Translation Options:
      • Insert Mode: Defines how the translation result is placed in the document. You can choose to \"Replace\" the original, \"Append\" it after the original, or \"Prepend\" it before the original.
      • Separator: When \"Append\" or \"Prepend\" mode is selected, this is used to insert a separator between the original and translated text (e.g., \\\\N is common for ASS format, <br /> for EPUB format as a line break).
    • If \"JSON Translation\" is selected, configure JSON Paths:
      • JSON Paths to Translate: Enter one JSONPath expression per line to translate all strings within the matched objects. For example: $.* (translate all strings), $..description (translate all values with the key description).

    B. General Options (Applicable to all workflows):

    • Translation Model:
      • Select Platform/API Address/API Key/Model ID: Configure the AI translation service you wish to use. The better the model follows instructions, the lower the probability of errors and missed translations.
      • Skip Translation: If checked, only document parsing and format conversion will be performed, without calling the AI for translation.
    • Translation Configuration:
      • Target Language: Specify the target language for the translation.
      • Custom Prompt: Optional, add extra instructions, like \"Do not translate personal names.\"
      • Thinking Mode: A setting for some models that support mixed inference; \"Disable (Recommended)\" is the suggested choice.
      • Chunk Size/Concurrency, etc.: Advanced parameters for adjusting performance and API request behavior; usually, the defaults are fine.
    • Glossary:
      • Upload Glossary (Optional): Upload a CSV file (must contain 'src' and 'dst' columns) to ensure consistency and accuracy for specific terms.
      • Auto-generate Glossary: When enabled, the program will first extract terms from the original text to create a glossary before proceeding with the translation.
  3. Step 3: Upload File

    In the task list on the right, click or drag your document into the file upload area.

  4. Step 4: Start Translation

    Once the file is successfully selected, click the Start Translation button on the bottom right of the task card. The system will begin processing the task, and you can view the real-time progress in the log area.

  5. Step 5: View and Download

    After the translation is complete, action buttons will appear on the task card:

    • Preview: Compare the original and translated text side-by-side in a slide-out panel.
    • Download: Download the translation in various formats, including PDF, DOCX, Markdown, etc.
    • Attachments: If any additional files were generated during the process (like an auto-generated glossary), they can be downloaded here.
Important Note: All configurations are automatically saved locally in your browser for future use. You can also use the new \"Export Config\" and \"Import Config\" buttons to back up and restore your settings.
", "tutorialUnderstandBtn": "I Understand", "contributorsModalTitle": "Thanks for Contributing", "contributorsPara1": "DocuTranslate is an open-source project! The community's needs and usage are the driving force behind its progress.", diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index f255b7d..1daaa80 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

{{ t(currentWorkflowConfig.insertHelpKey || 'insertModeHelpTxt') }}
{{ t('segmentModeHelp') }}

{{ t('parsingEngineHelp') }}
{{ t('modelVersionHelp') }}

{{ t('glossaryHelp') }}

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:{{ version ? 'v' + version : '' }}

{{ t('taskListTitle') }}

LOGO

{{ t('noTaskPlaceholder') }}

{{ t('taskCardIdLabel') }}: {{ task.backendId || t('taskCardIdPlaceholder') }}

{{ t('taskCardFileDrop') }}

{{ t('taskCardFileSelected') }}

{{ t('taskCardFilenameLabel') }} {{ task.fileName || task.file.name }}
{{ t('taskCardLogLabel') }}
{{ task.statusMessage || t('taskCardStatusWaiting') }}
{{ previewMode === 'bilingual' ? t('preview_bilingual') : t('preview_translatedOnly') }}
{{ t('previewOriginal') }}
{{ t('previewTranslated') }}
\ No newline at end of file + DocuTranslate - 交互式文档翻译

DocuTranslate

{{ t(currentWorkflowConfig.insertHelpKey || 'insertModeHelpTxt') }}
{{ t('segmentModeHelp') }}

{{ t('parsingEngineHelp') }}
{{ t('modelVersionHelp') }}

{{ t('glossaryHelp') }}

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:{{ version ? 'v' + version : '' }}

{{ t('taskListTitle') }}

LOGO

{{ t('noTaskPlaceholder') }}

{{ t('taskCardIdLabel') }}: {{ task.backendId || t('taskCardIdPlaceholder') }}

{{ t('taskCardFileDrop') }}

{{ t('taskCardFileSelected') }}

{{ t('taskCardFilenameLabel') }} {{ task.fileName || task.file.name }}
{{ t('taskCardLogLabel') }}
{{ task.statusMessage || t('taskCardStatusWaiting') }}
{{ previewMode === 'bilingual' ? t('preview_bilingual') : t('preview_translatedOnly') }}
{{ t('previewOriginal') }}
{{ t('previewTranslated') }}
\ No newline at end of file diff --git a/docutranslate/translator/ai_translator/pptx_translator.py b/docutranslate/translator/ai_translator/pptx_translator.py index b7a690a..6ffa5f1 100644 --- a/docutranslate/translator/ai_translator/pptx_translator.py +++ b/docutranslate/translator/ai_translator/pptx_translator.py @@ -22,9 +22,6 @@ from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTr class PPTXTranslatorConfig(AiTranslatorConfig): insert_mode: Literal["replace", "append", "prepend"] = "replace" separator: str = "\n" - # 指定翻译后的中文字体(东亚字体),防止乱码或回退到宋体 - # 推荐使用 "Microsoft YaHei" (微软雅黑) 或 "DengXian" (等线) - target_cjk_font: str = "Microsoft YaHei" # ---------------- 主类 ---------------- @@ -35,7 +32,7 @@ class PPTXTranslator(AiTranslator): 改进特性: 1. 深度遍历:支持母版、版式、备注页、以及隐藏在 AlternateContent (兼容性块) 中的文本。 2. 公式保护:智能检测文本间的公式,防止翻译后文字错位。 - 3. 字体美化:中西文字体分离,中文使用微软雅黑,英文保持原样。 + 3. 样式保留:翻译后完全保留原有的中英文字体设置,不做强制覆盖。 4. 布局自适应:防止翻译后文本溢出。 """ @@ -54,7 +51,6 @@ class PPTXTranslator(AiTranslator): self.translate_agent = SegmentsTranslateAgent(agent_config) self.insert_mode = config.insert_mode self.separator = config.separator - self.target_cjk_font = config.target_cjk_font # ---------------- 辅助函数:样式与字体 ---------------- @@ -111,18 +107,6 @@ class PPTXTranslator(AiTranslator): return True - def _set_east_asian_font(self, run, font_name: str): - """设置 Run 的东亚字体 (解决中文乱码/宋体问题)。""" - if not font_name: - return - try: - rPr = run.font._element.get_or_add_rPr() - # 设置 ea (East Asian) 字体,不影响 latin (西文) 字体 - ea = rPr.get_or_add_ea() - ea.set(qn('a:typeface'), font_name) - except Exception: - pass - # ---------------- 核心遍历逻辑 ---------------- def _process_text_frame(self, text_frame: TextFrame, elements: List[Dict[str, Any]], texts: List[str]): @@ -283,12 +267,10 @@ class PPTXTranslator(AiTranslator): primary_run = runs[0] try: - # 1. 写入文本 + # 1. 写入文本 (python-pptx 会自动保留原有的 rPr 属性,即保留默认字体) primary_run.text = text_to_set - # 2. 设置东亚字体 (保留西文字体设置) - if self.target_cjk_font: - self._set_east_asian_font(primary_run, self.target_cjk_font) + # 2. (已移除字体强制设置逻辑,以保留 PPT 原样) # 3. 处理溢出 text_frame = element_info.get("text_frame")