From d4e94e86e515ba01957d0490222564fd6de746b8 Mon Sep 17 00:00:00 2001 From: xunbu Date: Fri, 29 Aug 2025 21:47:30 +0800 Subject: [PATCH] =?UTF-8?q?json=E7=BF=BB=E8=AF=91path=E5=8C=B9=E9=85=8D?= =?UTF-8?q?=E5=88=B0=E7=9A=84=E5=AF=B9=E8=B1=A1=E7=9A=84=E4=B8=AD=E6=89=80?= =?UTF-8?q?=E6=9C=89=E5=AD=97=E7=AC=A6=E4=B8=B2?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/static/i18nData.json | 196 +++++++++--------- docutranslate/static/index.html | 2 +- .../ai_translator/json_translator.py | 121 +++++++---- 更新日志.txt | 1 + 4 files changed, 180 insertions(+), 140 deletions(-) diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json index 6e8e3e6..5ab00e6 100644 --- a/docutranslate/static/i18nData.json +++ b/docutranslate/static/i18nData.json @@ -13,31 +13,24 @@ "workflowOptionEpub": "EPUB翻译 (.epub)", "workflowOptionHtml": "HTML翻译 (.html)", "autoWorkflowLabel": "自动选择工作流", - "docxSettingsTitleText": "DOCX翻译选项", "insertModeLabel": "插入模式", "insertModeReplace": "替换原文 (Replace)", "insertModeAppend": "附加到原文后 (Append)", "insertModePrepend": "附加到原文前 (Prepend)", "insertModeHelpDocx": "选择如何将翻译后的文本插入。", + "insertModeHelpXlsx": "选择如何将翻译后的文本插入到单元格中。", + "insertModeHelpSrt": "选择如何将翻译后的文本插入。", + "insertModeHelpEpub": "选择如何将翻译后的文本插入。", + "insertModeHelpHtml": "选择如何将翻译后的文本插入。", "separatorLabel": "分隔符", "separatorPlaceholder": "例如: \\n---翻译---\\n", - "separatorHelp": "当插入模式为附加或前置时,用于分隔原文和译文的字符。\\n 代表换行。", - "xlsxSettingsTitleText": "XLSX翻译选项", - "insertModeHelpXlsx": "选择如何将翻译后的文本插入到单元格中。", "separatorPlaceholderSimple": "例如: \\n---\\n", + "separatorHelp": "当插入模式为附加或前置时,用于分隔原文和译文的字符。\\n 代表换行。", "xlsxTranslateRegionsLabel": "翻译区域 (可选)", "xlsxTranslateRegionsPlaceholder": "每行一个区域, 例如:Sheet1!A1:B10(不指定表名则对所有表生效)", - "srtSettingsTitleText": "SRT翻译选项", - "insertModeHelpSrt": "选择如何将翻译后的文本插入。", - "epubSettingsTitleText": "EPUB翻译选项", - "insertModeHelpEpub": "选择如何将翻译后的文本插入。", - "htmlSettingsTitleText": "HTML翻译选项", - "insertModeHelpHtml": "选择如何将翻译后的文本插入。", - "jsonSettingsTitleText": "JSON路径配置", "jsonPathLabel": "需要翻译的JSON路径", "jsonPathPlaceholder": "每行一个路径, 例如:\n$.name\n$.*", - "jsonPathHelp": "采用jsonpath-ng的路径选择语法,每一行表示一个json路径", - "parsingSettingsTitleText": "解析配置", + "jsonPathHelp": "采用jsonpath-ng的路径选择语法,每一行表示一个json路径,将翻译路径匹配对象内的所有字符串", "parsingEngineLabel": "解析引擎", "parsingEngineHelp": "如果上传的文件本身是.md格式,此项可不选。", "getMineruTokenTitle": "获取Mineru Token", @@ -48,7 +41,6 @@ "modelVersionHelp": "mineru VLM是更新的内测模型。", "formulaOcrLabel": "公式识别", "codeOcrLabel": "代码识别", - "aiSettingsTitleText": "翻译模型", "skipTranslationLabel": "跳过翻译", "platformLabel": "选择平台", "platformCustom": "自定义接口", @@ -58,12 +50,11 @@ "apiKeyPlaceholder": "请输入您的API Key", "modelIdLabel": "模型 ID", "modelIdPlaceholder": "例如: gpt-4o, glm-4", - "translationSettingsTitleText": "翻译配置", "targetLanguageLabel": "目标语言", "targetLanguageCustom": "其它 (自定义)", "customLangPlaceholder": "请输入目标语言, 例如: Italian", "thinkingModeLabel": "思考模式", - "thinkingModeTooltip": "设置混合推理模型的思考模式,目前支持智谱平台的glm-4.5系列、阿里云的qwen3系列、火山引擎的Doubao-Seed-1.6系列等,建议关闭", + "thinkingModeTooltip": "设置混合推理模型的思考模式,目前支持智谱平台的glm-4.5系列、阿里云的qwen3系列、火山引擎的Doubao-Seed-1.6系列等,建议关闭", "thinkingModeEnable": "启用", "thinkingModeDisable": "禁用", "thinkingModeDefault": "默认", @@ -81,7 +72,7 @@ "glossaryGenConfigLabel": "生成术语表配置", "glossaryGenConfigSame": "与翻译配置相同", "glossaryGenConfigCustom": "自定义", - "githubInfo": "GitHub主页(欢迎star❤):
\n https://github.com/xunbu/docutranslate", + "githubInfo": "GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate", "qqGroupInfo": "交流QQ群: 1047781902", "taskListTitle": "任务列表", "newTaskBtn": "新建任务", @@ -107,7 +98,7 @@ "closeBtn": "关闭", "downloadBtn": "下载", "tutorialModalTitle": "使用教程", - "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 选择工作流

    首先,在配置面板顶部选择您需要的翻译流程。不同的工作流适用于不同类型的文件:

    • 转Markdown再翻译: 适用于翻译PDF、markdown、图片等文件。
    • 纯文本翻译: 用于翻译 .txt 等纯文本文件。
    • JSON翻译: 用于翻译 .json 文件中的特定字段。
    • DOCX翻译: 用于翻译 .docx 文件。
    • XLSX翻译: 用于翻译 .xlsx 电子表格、 .csv 文件。
    • SRT字幕翻译: 用于翻译 .srt 字幕文件。
    • EPUB翻译: 用于翻译 .epub 电子书文件。
    • HTML翻译: 用于翻译 .html 文件。
    新增功能: \"自动选择工作流\"开关已默认开启。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。

  2. 配置参数

    根据您选择的工作流,完成相应的配置。所有配置项都会自动保存在您的浏览器中。

    • 解析配置 (仅在“转Markdown再翻译”工作流下显示):
      • 解析引擎: 选择一个引擎将您的文件(如PDF)转换为适合翻译的Markdown格式。如果您的文件已经是Markdown格式,则无需选择。
      • Mineru Token: 如果您选择 minerU 引擎,需要在此处填入您的Token。
    • DOCX/XLSX/SRT/EPUB/HTML翻译选项 (在对应工作流下显示):
      • 插入模式: 定义翻译结果如何放入文档或字幕。您可以选择直接“替换”原文,或是在原文之后“附加”,或是在原文之前“前置”。
      • 分隔符: 当选择“附加”或“前置”模式时,此项用于在原文和译文之间插入分隔符。
    • JSON路径配置 (仅在“JSON翻译”工作流下显示):
      • 需要翻译的JSON路径: 每行输入一个 JSONPath 表达式,指定需要翻译的字段。
      • 例如:$..description翻译所有键为description的值。$.items[0].name翻译第一个item的name值。
    • 翻译模型:
      • 跳过翻译: 勾选此项后,将只执行文档解析和格式转换,不调用AI进行翻译。
      • 选择平台/API 地址/API Key/模型 ID: 配置您希望使用的AI翻译服务。
      • 模型ID参考平台文档,建议使用非推理模型或混合推理模型(关闭思考)。
    • 翻译配置:
      • 目标语言/自定义Prompt/术语表: 指定翻译的目标语言、附加指令以及用于保证特定名词翻译准确性的术语表。
      • 思考模式:设置混合推理模型是否进行思考,目前支持智谱的glm4.5系列、阿里云的qwen3系列、火山引擎的seed1.6系列,建议选择禁用思考。
      • 分块大小/并发数/Temperature: 发给AI的分块大小、并发请求数和温度,通常保持默认即可。
  3. 上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

    • 预览: 在右侧滑出的面板中进行原文和译文的对照预览(仅作参考)。
    • 下载: 下载包括 PDF, DOCX, XLSX, HTML, Markdown 等多种格式的译文。
    • 附件: 如果翻译过程中生成了附加文件(如术语表),可在此处下载。
提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。
", + "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 选择工作流

    首先,在配置面板顶部选择您需要的翻译流程。不同的工作流适用于不同类型的文件:

    • 转Markdown再翻译: 适用于翻译PDF、markdown、图片等文件。
    • 纯文本翻译: 用于翻译 .txt 等纯文本文件。
    • JSON翻译: 用于翻译 .json 文件中的特定字段。
    • DOCX翻译: 用于翻译 .docx 文件。
    • XLSX翻译: 用于翻译 .xlsx 电子表格、 .csv 文件。
    • SRT字幕翻译: 用于翻译 .srt 字幕文件。
    • EPUB翻译: 用于翻译 .epub 电子书文件。
    • HTML翻译: 用于翻译 .html 文件。
    新增功能: \"自动选择工作流\"开关已默认开启。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。

  2. 配置参数

    根据您选择的工作流,完成相应的配置。所有配置项都会自动保存在您的浏览器中。

    • 解析配置 (仅在“转Markdown再翻译”工作流下显示):
      • 解析引擎: 选择一个引擎将您的文件(如PDF)转换为适合翻译的Markdown格式。如果您的文件已经是Markdown格式,则无需选择。
      • Mineru Token: 如果您选择 minerU 引擎,需要在此处填入您的Token。
    • DOCX/XLSX/SRT/EPUB/HTML翻译选项 (在对应工作流下显示):
      • 插入模式: 定义翻译结果如何放入文档或字幕。您可以选择直接“替换”原文,或是在原文之后“附加”,或是在原文之前“前置”。
      • 分隔符: 当选择“附加”或“前置”模式时,此项用于在原文和译文之间插入分隔符。
    • JSON路径配置 (仅在“JSON翻译”工作流下显示):
      • 需要翻译的JSON路径: 每行输入一个 JSONPath 表达式,指定需要翻译的字段。
      • 例如:$..description翻译所有键为description的值。$.items[0].name翻译第一个item的name值。$.*翻译所有字符串。
    • 翻译模型:
      • 跳过翻译: 勾选此项后,将只执行文档解析和格式转换,不调用AI进行翻译。
      • 选择平台/API 地址/API Key/模型 ID: 配置您希望使用的AI翻译服务。
      • 模型ID参考平台文档,建议使用非推理模型或混合推理模型(关闭思考)。
    • 翻译配置:
      • 目标语言/自定义Prompt/术语表: 指定翻译的目标语言、附加指令以及用于保证特定名词翻译准确性的术语表。
      • 思考模式:设置混合推理模型是否进行思考,目前支持智谱的glm4.5系列、阿里云的qwen3系列、火山引擎的seed1.6系列,建议选择禁用思考。
      • 分块大小/并发数/Temperature: 发给AI的分块大小、并发请求数和温度,通常保持默认即可。
  3. 上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

    • 预览: 在右侧滑出的面板中进行原文和译文的对照预览(仅作参考)。
    • 下载: 下载包括 PDF, DOCX, XLSX, HTML, Markdown 等多种格式的译文。
    • 附件: 如果翻译过程中生成了附加文件(如术语表),可在此处下载。
提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。
", "tutorialUnderstandBtn": "我明白了", "contributorsModalTitle": "感谢贡献", "contributorsPara1": "DocuTranslate是一个开源项目!大家的需求与使用是项目进步的动力。", @@ -122,44 +113,51 @@ "glossaryTableDestination": "译文 (dst)", "init_i18n_failed_alert": "加载界面翻译资源失败,请检查网络连接或联系管理员。", "init_failed_alert": "初始化失败,无法连接到后端服务。请检查服务是否运行或刷新页面。", + "parsingSettingsTitleText": "解析配置", + "jsonSettingsTitleText": "JSON路径配置", + "xlsxSettingsTitleText": "XLSX翻译选项", + "docxSettingsTitleText": "DOCX翻译选项", + "srtSettingsTitleText": "SRT翻译选项", + "epubSettingsTitleText": "EPUB翻译选项", + "htmlSettingsTitleText": "HTML翻译选项", + "aiSettingsTitleText": "翻译模型", + "translationSettingsTitleText": "翻译配置", + "engineOptionIdentity": "不做转换(文件本身是.md)", + "engineOptionMineru": "minerU(pdf/图片/md)", + "engineOptionDocling": "docLing(pdf/图片/md)", "glossaryEmpty": "术语表为空。", - "engineOptionIdentity": "不解析(.md)", - "engineOptionMineru": "Mineru", - "engineOptionDocling": "Docling", - "status_selectFileFirst": "请先选择文件!", - "status_invalidWorkflow": "无效的工作流类型。", - "status_fillRequired": "请填写所有必填项。", - "status_releasingOldTask": "正在释放旧任务...", + "status_selectFileFirst": "请先选择文件", + "status_fillRequired": "请填写所有必填项", "btn_initializing": "初始化中...", - "status_encodingAndSubmitting": "正在编码并提交任务...", - "status_requestOk": "请求成功,任务已开始。", + "status_encodingAndSubmitting": "文件编码和任务提交中...", + "status_requestOk": "请求成功,任务已提交", "btn_cancelTranslation": "取消翻译", "status_requestFail": "请求失败", - "status_initFail": "任务初始化失败", + "status_initFail": "初始化任务失败", "status_cancelling": "取消中...", - "status_cancelSent": "已发送取消请求。", + "status_cancelSent": "取消请求已发送", "status_cancelFail": "取消失败", - "admin_tasklist_failed": "管理员模式:加载任务列表失败。", - "status_gettingStatus": "正在获取状态...", - "status_updateError": "状态更新失败。", + "status_gettingStatus": "获取状态中...", "btn_reTranslate": "重新翻译", + "status_updateError": "状态更新出错", "preview_loading": "加载预览中...", "preview_cantReadOriginal": "无法读取原始文件内容。", "preview_cantPreviewType": "无法预览此文件类型", "preview_noOriginalCache": "无原始文件缓存可供预览。", "preview_loadFailed": "加载预览失败。", - "pdf_preparing": "正在准备PDF...", - "pdf_print_failed": "调用打印功能失败。请尝试手动保存为PDF。", - "pdf_fetch_failed": "获取PDF内容失败。", - "preview_bilingual": "双语预览", - "preview_translatedOnly": "仅译文预览" + "pdf_preparing": "正在准备PDF以便打印...", + "pdf_print_failed": "调用打印功能失败。请尝试手动打印(Ctrl+P)。", + "pdf_fetch_failed": "获取预览内容失败,无法生成PDF。", + "preview_bilingual": "双语对照预览", + "preview_translatedOnly": "仅译文预览", + "admin_tasklist_failed": "管理员模式:加载任务列表失败。" }, "en": { "pageTitle": "DocuTranslate - Interactive Document Translation", "tutorialBtn": "Tutorial", - "projectContributeBtn": "Contribute", + "projectContributeBtn": "Project Contribution", "workflowTitle": "Select Workflow", - "workflowOptionMarkdown": "Markdown-based Translation (.pdf/.md/.png etc.)", + "workflowOptionMarkdown": "Convert to Markdown then Translate (.pdf/.md/.png, etc.)", "workflowOptionTxt": "Plain Text Translation (.txt)", "workflowOptionJson": "JSON Translation (.json)", "workflowOptionDocx": "DOCX Translation (.docx)", @@ -167,84 +165,75 @@ "workflowOptionSrt": "SRT Subtitle Translation (.srt)", "workflowOptionEpub": "EPUB Translation (.epub)", "workflowOptionHtml": "HTML Translation (.html)", - "autoWorkflowLabel": "Auto-select Workflow", - "docxSettingsTitleText": "DOCX Translation Options", - "insertModeLabel": "Insertion Mode", - "insertModeReplace": "Replace Original (Replace)", - "insertModeAppend": "Append to Original (Append)", - "insertModePrepend": "Prepend to Original (Prepend)", + "autoWorkflowLabel": "Automatically select workflow", + "insertModeLabel": "Insert Mode", + "insertModeReplace": "Replace original text", + "insertModeAppend": "Append after original text", + "insertModePrepend": "Prepend before original text", "insertModeHelpDocx": "Choose how to insert the translated text.", - "separatorLabel": "Separator", - "separatorPlaceholder": "e.g., \\n---translation---\\n", - "separatorHelp": "Used to separate original and translated text in Append or Prepend mode. \\n represents a newline.", - "xlsxSettingsTitleText": "XLSX Translation Options", - "insertModeHelpXlsx": "Choose how to insert translated text into cells.", - "separatorPlaceholderSimple": "e.g., \\n---\\n", - "xlsxTranslateRegionsLabel": "Translation Regions (Optional)", - "xlsxTranslateRegionsPlaceholder": "One region per line, e.g., Sheet1!A1:B10 (applies to all sheets if sheet name is omitted)", - "srtSettingsTitleText": "SRT Translation Options", + "insertModeHelpXlsx": "Choose how to insert the translated text into cells.", "insertModeHelpSrt": "Choose how to insert the translated text.", - "epubSettingsTitleText": "EPUB Translation Options", "insertModeHelpEpub": "Choose how to insert the translated text.", - "htmlSettingsTitleText": "HTML Translation Options", "insertModeHelpHtml": "Choose how to insert the translated text.", - "jsonSettingsTitleText": "JSON Path Configuration", + "separatorLabel": "Separator", + "separatorPlaceholder": "e.g., \\n---Translation---\\n", + "separatorPlaceholderSimple": "e.g., \\n---\\n", + "separatorHelp": "Characters used to separate original and translated text in append/prepend modes. \\n represents a newline.", + "xlsxTranslateRegionsLabel": "Translate Regions (Optional)", + "xlsxTranslateRegionsPlaceholder": "One region per line, e.g., Sheet1!A1:B10 (applies to all sheets if sheet name is omitted)", "jsonPathLabel": "JSON Paths to Translate", - "jsonPathPlaceholder": "One path per line, e.g.,\n$.name\n$.*", - "jsonPathHelp": "Uses jsonpath-ng syntax. Each line represents a JSON path.", - "parsingSettingsTitleText": "Parsing Configuration", + "jsonPathPlaceholder": "One path per line, e.g.:\n$.name\n$.*", + "jsonPathHelp": "Uses jsonpath-ng syntax. Each line represents a JSON path. All strings within the matched objects will be translated.", "parsingEngineLabel": "Parsing Engine", - "parsingEngineHelp": "This can be skipped if the uploaded file is already in .md format.", + "parsingEngineHelp": "If the uploaded file is already in .md format, this option is not required.", "getMineruTokenTitle": "Get Mineru Token", - "mineruTokenPlaceholder": "Required when using Mineru engine", + "mineruTokenPlaceholder": "Required when using the Mineru engine", "modelVersionLabel": "Mineru Model Version", "modelVersionVlm": "VLM", "modelVersionPipline": "Pipeline", - "modelVersionHelp": "mineru VLM is a newer model in closed beta.", + "modelVersionHelp": "Mineru VLM is a newer internal test model.", "formulaOcrLabel": "Formula Recognition", "codeOcrLabel": "Code Recognition", - "aiSettingsTitleText": "Translation Model", "skipTranslationLabel": "Skip Translation", "platformLabel": "Select Platform", "platformCustom": "Custom Endpoint", "baseUrlLabel": "API Base URL", - "baseUrlPlaceholder": "OpenAI-compatible URL", + "baseUrlPlaceholder": "OpenAI-compatible address", "getApiKeyTitle": "Get API Key", "apiKeyPlaceholder": "Please enter your API Key", "modelIdLabel": "Model ID", "modelIdPlaceholder": "e.g., gpt-4o, glm-4", - "translationSettingsTitleText": "Translation Configuration", "targetLanguageLabel": "Target Language", "targetLanguageCustom": "Other (Custom)", "customLangPlaceholder": "Enter target language, e.g., Italian", "thinkingModeLabel": "Thinking Mode", - "thinkingModeTooltip": "Set the thinking mode for the hybrid reasoning model. Currently supported models include the glm-4.5 series from Zhipu Platform, the qwen3 series from Alibaba Cloud, the Doubao-Seed-1.6 series from Volcano Engine, and others. It is recommended to turn it off.", + "thinkingModeTooltip": "Set the thinking mode for mixed-inference models. Currently supports Zhipu's glm-4.5 series, Alibaba Cloud's qwen3 series, Volcengine's Doubao-Seed-1.6 series, etc. It is recommended to disable it.", "thinkingModeEnable": "Enable", "thinkingModeDisable": "Disable", "thinkingModeDefault": "Default", "customPromptLabel": "Custom Prompt", - "customPromptPlaceholder": "Optional, e.g., 'Keep proper names in their original language.'", + "customPromptPlaceholder": "Optional, e.g., 'Do not translate proper nouns'", "chunkSizeLabel": "Chunk Size", "resetBtn": "Reset", "concurrentLabel": "Concurrency", "glossaryGenTitle": "Glossary", "glossaryLabel": "Glossary (Optional)", - "glossaryHelp": "Select one or more CSV files. Files must contain 'src' and 'dst' headers for source and destination terms.", + "glossaryHelp": "Select one or more CSV files. Files must contain 'src' and 'dst' columns for source and destination terms.", "viewGlossaryBtn": "View Glossary", "clearGlossaryBtn": "Clear", - "glossaryGenEnableLabel": "Auto-generate Glossary", + "glossaryGenEnableLabel": "Automatically Generate Glossary", "glossaryGenConfigLabel": "Glossary Generation Config", "glossaryGenConfigSame": "Same as Translation Config", "glossaryGenConfigCustom": "Custom", - "githubInfo": "GitHub Repo (Stars welcome❤):
\n https://github.com/xunbu/docutranslate", - "qqGroupInfo": "Discussion QQ Group: 1047781902", + "githubInfo": "GitHub Page (stars❤ welcome):
https://github.com/xunbu/docutranslate", + "qqGroupInfo": "QQ Group for discussion: 1047781902", "taskListTitle": "Task List", "newTaskBtn": "New Task", "noTaskPlaceholder": "No tasks yet. Click 'New Task' to get started!", "taskCardIdLabel": "Task ID", "taskCardIdPlaceholder": "Waiting for submission...", "taskCardFileDrop": "Click or drag file here", - "taskCardFileSelected": "File Selected", + "taskCardFileSelected": "File selected", "taskCardFilenameLabel": "Filename: ", "taskCardLogLabel": "Log", "taskCardStatusWaiting": "Waiting for file upload...", @@ -252,23 +241,23 @@ "taskCardDownloadBtn": "Download", "taskCardAttachmentBtn": "Attachments", "taskCardStartBtn": "Start Translation", - "downloadMdEmbedded": "Markdown (Embedded Images)", + "downloadMdEmbedded": "Markdown (Embedded Img)", "downloadMdZip": "Markdown (Zip)", "previewTitle": "Preview", "previewBilingualBtn": "Bilingual", "previewTranslatedOnlyBtn": "Translated Only", "previewOriginal": "Original", - "previewTranslated": "Translated", + "previewTranslated": "Translation", "closeBtn": "Close", "downloadBtn": "Download", - "tutorialModalTitle": "Tutorial", - "tutorialModalBody": "

Video tutorials can be found by searching for docutranslate on Bilibili.

Welcome to DocuTranslate! Follow these steps to translate your documents:

  1. Select Workflow

    First, choose the appropriate translation process from the top of the settings panel. Different workflows are suited for different file types:

    • Markdown-based Translation: Ideal for translating PDFs, markdown files, images, etc.
    • Plain Text Translation: For translating .txt or other plain text files.
    • JSON Translation: For translating specific fields within .json files.
    • DOCX Translation: For translating .docx files.
    • XLSX Translation: For translating .xlsx spreadsheets and .csv files.
    • SRT Subtitle Translation: For translating .srt subtitle files.
    • EPUB Translation: For translating .epub e-book files.
    • HTML Translation: For translating .html files.
    New Feature: The \"Auto-select Workflow\" switch is now on by default. Simply upload your file, and the system will automatically choose the best workflow for you, simplifying the process.

  2. Configure Parameters

    Configure the relevant settings based on your chosen workflow. All your settings are automatically saved in your browser.

    • Parsing Configuration (only for 'Markdown-based Translation'):
      • Parsing Engine: Select an engine to convert your file (like a PDF) into a translation-friendly Markdown format. This is not needed if your file is already Markdown.
      • Mineru Token: If you choose the minerU engine, you must enter your token here.
    • DOCX/XLSX/SRT/EPUB/HTML Options (for their respective workflows):
      • Insertion Mode: Defines how the translation is placed in the document. You can 'Replace' the original text, 'Append' it after, or 'Prepend' it before.
      • Separator: When using 'Append' or 'Prepend', this defines the characters used to separate the original and translated text.
    • JSON Path Configuration (only for 'JSON Translation'):
      • JSON Paths to Translate: Enter one JSONPath expression per line to specify which fields to translate.
      • For example, $..description translates all values with the key 'description'. $.items[0].name translates the 'name' of the first item.
    • Translation Model:
      • Skip Translation: If checked, the process will only parse and convert the document format without calling an AI for translation.
      • Select Platform/API Base URL/API Key/Model ID: Configure the AI translation service you want to use.
      • Refer to the platform's documentation for model IDs. Models not specialized in reasoning or mixture-of-experts models (with thinking disabled) are recommended.
    • Translation Configuration:
      • Target Language/Custom Prompt/Glossary: Specify the target language, add custom instructions, and provide a glossary to ensure term consistency.
      • Thinking Mode: Sets whether mixture-of-experts models should 'think' before answering. Currently supported by Zhipu's glm4.5 series, Alibaba's qwen3 series, and Volcengine's seed1.6 series. Disabling 'thinking' is recommended.
      • Chunk Size/Concurrency/Temperature: These control the size of text chunks sent to the AI, the number of parallel requests, and the randomness of the output. Default values are usually fine.
  3. Upload File

    In the task list on the right, click or drag your document into the file drop area.

  4. Start Translation

    Once the file is selected, click the Start Translation button on the task card. The system will begin processing, and you can monitor the progress in the log area.

  5. Review and Download

    After the translation is complete, action buttons will appear on the task card:

    • Preview: Opens a side panel for a side-by-side comparison of the original and translated text (for reference only).
    • Download: Download the translated document in various formats, including PDF, DOCX, XLSX, HTML, and Markdown.
    • Attachments: If any additional files were generated (like a glossary), you can download them here.
Tip: All settings are saved locally in your browser for your convenience.
", + "tutorialModalTitle": "User Guide", + "tutorialModalBody": "

Video tutorials are available; search for docutranslate on Bilibili.

Welcome to DocuTranslate! Follow these steps to translate your documents:

  1. Select Workflow

    First, choose the appropriate translation process from the top of the settings panel. Different workflows are suited for different file types:

    • Convert to Markdown then Translate: For translating PDF, markdown, images, etc.
    • Plain Text Translation: For translating .txt and other plain text files.
    • JSON Translation: For translating specific fields in .json files.
    • DOCX Translation: For translating .docx files.
    • XLSX Translation: For translating .xlsx spreadsheets and .csv files.
    • SRT Subtitle Translation: For translating .srt subtitle files.
    • EPUB Translation: For translating .epub e-book files.
    • HTML Translation: For translating .html files.
    New Feature: The \"Automatically select workflow\" switch is now on by default. Simply upload your file, and the system will automatically match it with the correct workflow, simplifying the process.

  2. Configure Parameters

    Based on your chosen workflow, complete the necessary configurations. All settings are automatically saved in your browser.

    • Parsing Configuration (visible only for 'Convert to Markdown' workflow):
      • Parsing Engine: Select an engine to convert your file (like a PDF) into a translation-friendly Markdown format. No selection is needed if your file is already in Markdown.
      • Mineru Token: If you choose the minerU engine, you must enter your token here.
    • DOCX/XLSX/SRT/EPUB/HTML Translation Options (visible for corresponding workflows):
      • Insert Mode: Define how the translation result is placed in the document or subtitles. You can choose to 'Replace' the original, 'Append' after it, or 'Prepend' before it.
      • Separator: When in 'Append' or 'Prepend' mode, this is used to insert a separator between the original and translated text.
    • JSON Path Configuration (visible only for 'JSON Translation' workflow):
      • JSON Paths to Translate: Enter one JSONPath expression per line to specify which fields to translate.
      • For example: $..description translates all values with the key 'description'. $.items[0].name translates the name of the first item. $.* translates all strings.
    • Translation Model:
      • Skip Translation: Check this to only perform document parsing and format conversion without calling an AI for translation.
      • Select Platform/API Base URL/API Key/Model ID: Configure the AI translation service you wish to use.
      • Refer to the platform's documentation for Model IDs. It's recommended to use non-inference or mixed-inference models (with thinking mode turned off).
    • Translation Configuration:
      • Target Language/Custom Prompt/Glossary: Specify the target language, add extra instructions, and provide a glossary to ensure the accuracy of specific terms.
      • Thinking Mode: Sets whether a mixed-inference model should perform thinking. Currently supports Zhipu's glm4.5 series, Alibaba Cloud's qwen3 series, and Volcengine's seed1.6 series. It is recommended to select 'Disable'.
      • Chunk Size/Concurrency/Temperature: The size of text chunks sent to the AI, number of concurrent requests, and temperature. Default values are usually fine.
  3. Upload File

    In the task list on the right, click or drag your document into the file upload area.

  4. Start Translation

    Once the file is selected, click the Start Translation button on the task card. The system will begin processing, and you can monitor the real-time progress in the log area.

  5. View & Download

    After the translation is complete, action buttons will appear on the task card:

    • Preview: Compare the original and translated text side-by-side in a slide-out panel (for reference only).
    • Download: Download the translated document in various formats, including PDF, DOCX, XLSX, HTML, and Markdown.
    • Attachments: If any additional files were generated during the process (like a glossary), you can download them here.
Tip: All your configurations are saved locally in your browser for your next visit.
", "tutorialUnderstandBtn": "I Understand", "contributorsModalTitle": "Thanks for Contributing", "contributorsPara1": "DocuTranslate is an open-source project! The community's needs and usage are the driving force behind its progress.", - "contributorsPara2": "A big thank you to everyone who has sponsored the project, submitted code, provided valuable suggestions, and starred the repository!", - "contributorsWelcome": "We welcome contributions in the following ways:", - "contributorsGithub": "GitHub Repo", + "contributorsPara2": "A heartfelt thank you to all the friends who have funded the project, submitted code, provided valuable suggestions, and starred the project!", + "contributorsWelcome": "You are welcome to contribute in the following ways:", + "contributorsGithub": "GitHub Page", "contributorsPR": "Submit a Pull Request", "contributorsIssue": "Report an Issue", "contributorsQQ": "Or contact the author via QQ Group: 1047781902", @@ -277,36 +266,43 @@ "glossaryTableDestination": "Destination (dst)", "init_i18n_failed_alert": "Failed to load interface translations. Please check your network connection or contact an administrator.", "init_failed_alert": "Initialization failed, could not connect to the backend service. Please ensure the service is running and refresh the page.", - "glossaryEmpty": "Glossary is empty.", - "engineOptionIdentity": "No Parsing(.md)", - "engineOptionMineru": "Mineru", - "engineOptionDocling": "Docling", - "status_selectFileFirst": "Please select a file first!", - "status_invalidWorkflow": "Invalid workflow type.", - "status_fillRequired": "Please fill in all required fields.", - "status_releasingOldTask": "Releasing old task...", + "parsingSettingsTitleText": "Parsing Configuration", + "jsonSettingsTitleText": "JSON Path Configuration", + "xlsxSettingsTitleText": "XLSX Translation Options", + "docxSettingsTitleText": "DOCX Translation Options", + "srtSettingsTitleText": "SRT Translation Options", + "epubSettingsTitleText": "EPUB Translation Options", + "htmlSettingsTitleText": "HTML Translation Options", + "aiSettingsTitleText": "Translation Model", + "translationSettingsTitleText": "Translation Configuration", + "engineOptionIdentity": "Identity (file is already .md)", + "engineOptionMineru": "minerU (pdf/image/md)", + "engineOptionDocling": "docLing (pdf/image/md)", + "glossaryEmpty": "The glossary is empty.", + "status_selectFileFirst": "Please select a file first", + "status_fillRequired": "Please fill in all required fields", "btn_initializing": "Initializing...", - "status_encodingAndSubmitting": "Encoding and submitting task...", - "status_requestOk": "Request successful, task has started.", + "status_encodingAndSubmitting": "Encoding file and submitting task...", + "status_requestOk": "Request successful, task submitted", "btn_cancelTranslation": "Cancel Translation", "status_requestFail": "Request failed", - "status_initFail": "Task initialization failed", + "status_initFail": "Failed to initialize task", "status_cancelling": "Cancelling...", - "status_cancelSent": "Cancellation request sent.", + "status_cancelSent": "Cancellation request sent", "status_cancelFail": "Cancellation failed", - "admin_tasklist_failed": "Admin mode: Failed to load task list.", "status_gettingStatus": "Getting status...", - "status_updateError": "Status update failed.", "btn_reTranslate": "Re-translate", + "status_updateError": "Error updating status", "preview_loading": "Loading preview...", - "preview_cantReadOriginal": "Could not read original file content.", + "preview_cantReadOriginal": "Could not read the original file content.", "preview_cantPreviewType": "Cannot preview this file type", - "preview_noOriginalCache": "No original file cached for preview.", + "preview_noOriginalCache": "No original file cache available for preview.", "preview_loadFailed": "Failed to load preview.", - "pdf_preparing": "Preparing PDF...", - "pdf_print_failed": "Failed to invoke print function. Please try saving to PDF manually.", - "pdf_fetch_failed": "Failed to fetch content for PDF.", + "pdf_preparing": "Preparing PDF for printing...", + "pdf_print_failed": "Failed to invoke print function. Please try printing manually (Ctrl+P).", + "pdf_fetch_failed": "Failed to fetch preview content, cannot generate PDF.", "preview_bilingual": "Bilingual Preview", - "preview_translatedOnly": "Translated Only Preview" + "preview_translatedOnly": "Translated Only Preview", + "admin_tasklist_failed": "Admin mode: Failed to load task list." } } \ No newline at end of file diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index 172c9df..9db1353 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file + DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file diff --git a/docutranslate/translator/ai_translator/json_translator.py b/docutranslate/translator/ai_translator/json_translator.py index 647ed34..3c2e2f6 100644 --- a/docutranslate/translator/ai_translator/json_translator.py +++ b/docutranslate/translator/ai_translator/json_translator.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: MPL-2.0 import json from dataclasses import dataclass -from typing import Self, Any +from typing import Self, Any, Tuple, List from jsonpath_ng.ext import parse @@ -36,28 +36,78 @@ class JsonTranslator(AiTranslator): glossary_dict=config.glossary_dict ) self.translate_agent = SegmentsTranslateAgent(agent_config) - self.jsonpaths = config.json_paths + self.json_paths = config.json_paths + if not self.json_paths: + self.json_paths = ["$.*"] # 翻译所有字符串 - def _extract_matches(self, content: dict) -> list[Any]: + def _get_key_or_index_from_path(self, path) -> Any: + """从jsonpath_ng的Path对象中提取键或索引。""" + if hasattr(path, 'fields') and path.fields: + return path.fields[0] + if hasattr(path, 'index'): + return path.index + return None + + def _collect_strings_for_translation(self, content: dict) -> Tuple[List[str], List[Tuple[Any, Any]]]: """ - 根据 self.jsonpaths 从 JSON 内容中提取所有匹配项。 - 与原始代码不同,这里直接返回 Match 对象列表,它同时包含了值和路径信息。 + 根据jsonpath查找匹配项,并递归地从中收集所有字符串以进行翻译。 + 为了防止重复,会跟踪每个字符串的精确位置。 + + 返回: + - original_texts: 一个包含所有待翻译字符串的列表。 + - update_targets: 一个包含更新信息的目标列表,每个元素为 (container, key_or_index)。 """ + original_texts = [] + update_targets = [] + # 使用 (id(container), key_or_index) 来唯一标识一个位置,防止重复添加 + seen_targets = set() + + # 辅助递归函数,用于遍历json对象 + def _traverse(node: Any, container: Any, key_or_index: Any): + # 如果当前节点是字符串,并且其位置尚未被记录 + target_id = (id(container), key_or_index) + if isinstance(node, str): + if target_id not in seen_targets: + original_texts.append(node) + update_targets.append((container, key_or_index)) + seen_targets.add(target_id) + # 如果是字典,则遍历其所有子节点 + elif isinstance(node, dict): + for k, v in node.items(): + _traverse(v, node, k) + # 如果是列表,则遍历其所有子节点 + elif isinstance(node, list): + for i, item in enumerate(node): + _traverse(item, node, i) + + # 1. 查找所有顶层匹配项 all_matches = [] - for path_str in self.jsonpaths: - path_expr = parse(path_str) - matches = path_expr.find(content) - all_matches.extend(matches) - return all_matches + for path_str in self.json_paths: + jsonpath_expr = parse(path_str) + all_matches.extend(jsonpath_expr.find(content)) - def _update_content_with_translations(self, content: dict, matches: list[Any], translated_texts: list[str]): + # 2. 遍历匹配项并启动递归收集 + for match in all_matches: + parent = match.context.value if match.context else None + key_or_index = self._get_key_or_index_from_path(match.path) + + # 直接在匹配到的值上启动遍历 + _traverse(match.value, parent, key_or_index) + + return original_texts, update_targets + + def _apply_translations(self, update_targets: List[Tuple[Any, Any]], translated_texts: List[str]): """ 使用翻译后的文本更新原始JSON内容。 """ - # 使用 zip 将每个匹配项与其对应的翻译文本配对 - for match, translated_text in zip(matches, translated_texts): - # match.full_path 包含了更新原始 content 所需的精确位置信息 - match.full_path.update(content, translated_text) + if len(update_targets) != len(translated_texts): + raise ValueError("The number of translation targets does not match the number of translated texts.") + + for target, text in zip(update_targets, translated_texts): + container, key_or_index = target + # 确保容器和键/索引是有效的,然后执行更新 + if container is not None and key_or_index is not None: + container[key_or_index] = text def translate(self, document: Document) -> Self: """ @@ -65,72 +115,65 @@ class JsonTranslator(AiTranslator): 流程: 1. 解析输入的JSON文档。 - 2. 提取所有符合jsonpath规则的匹配项 (Match对象)。 - 3. 从匹配项中获取原始文本,并批量发送进行翻译。 + 2. 根据jsonpath找到匹配对象,并递归遍历它们以提取所有字符串。 + 3. 批量发送提取的字符串进行翻译。 4. 将翻译回来的文本根据其原始位置,更新回JSON对象中。 - 5. 将更新后的 content 写回 document + 5. 将更新后的 content 写回 document。 """ content = json.loads(document.content.decode()) - # 步骤 1: 提取所有需要翻译的匹配项 - all_matches = self._extract_matches(content) + # 步骤 1: 提取所有需要翻译的字符串及其位置 + original_texts, update_targets = self._collect_strings_for_translation(content) - if not all_matches: - # 如果没有找到任何内容,则无需执行任何操作 + if not original_texts: return self - original_texts = [match.value for match in all_matches] if self.glossary_agent: self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size) if self.translate_agent: self.translate_agent.update_glossary_dict(self.glossary_dict_gen) + # 步骤 2: 批量翻译提取出的文本 if self.translate_agent: translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size) else: translated_texts = original_texts - # 健壮性检查:确保翻译回来的项目数量与发送的一致 + if len(original_texts) != len(translated_texts): raise ValueError("翻译服务返回的项目数量与发送的数量不匹配。") # 步骤 3: 将翻译结果写回原始JSON对象 - self._update_content_with_translations(content, all_matches, translated_texts) + self._apply_translations(update_targets, translated_texts) - # 更新原始 document 对象的内容(可选,但良好实践) - document.content = json.dumps(content, ensure_ascii=False).encode('utf-8') + document.content = json.dumps(content, ensure_ascii=False, indent=2).encode('utf-8') return self - # todo:增加协程粒度 async def translate_async(self, document: Document) -> Self: content = json.loads(document.content.decode()) - # 步骤 1: 提取所有需要翻译的匹配项 - all_matches = self._extract_matches(content) + # 步骤 1: 提取所有需要翻译的字符串及其位置 + original_texts, update_targets = self._collect_strings_for_translation(content) - if not all_matches: - # 如果没有找到任何内容,则无需执行任何操作 + if not original_texts: return self - original_texts = [match.value for match in all_matches] - if self.glossary_agent: self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size) if self.translate_agent: self.translate_agent.update_glossary_dict(self.glossary_dict_gen) - # 步骤 2: 批量翻译提取出的文本 + # 步骤 2: 批量翻译提取出的文本 if self.translate_agent: translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size) else: translated_texts = original_texts - # 健壮性检查:确保翻译回来的项目数量与发送的一致 + if len(original_texts) != len(translated_texts): raise ValueError("翻译服务返回的项目数量与发送的数量不匹配。") # 步骤 3: 将翻译结果写回原始JSON对象 - self._update_content_with_translations(content, all_matches, translated_texts) + self._apply_translations(update_targets, translated_texts) - # 更新原始 document 对象的内容(可选,但良好实践) - document.content = json.dumps(content, ensure_ascii=False).encode('utf-8') + document.content = json.dumps(content, ensure_ascii=False, indent=2).encode('utf-8') return self diff --git a/更新日志.txt b/更新日志.txt index b24898c..7edc5fb 100644 --- a/更新日志.txt +++ b/更新日志.txt @@ -4,6 +4,7 @@ v1.3.1版 2025.8.28 优化 - 优化前端显示效果 - 优化网址在cmd中的显示时机 +- json工作流将翻译json_path匹配对象内的所有字符串 修复 - 修复自动生成的术语表转csv附件下载时出现的问题 ----------------------------------------