From 730eb060bcceb0a083d1b38da69c3937370e9688 Mon Sep 17 00:00:00 2001 From: xunbu Date: Sun, 9 Nov 2025 23:19:52 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BF=AE=E6=AD=A3txt=E7=BF=BB=E8=AF=91?= =?UTF-8?q?=E6=97=B6line=E6=A8=A1=E5=BC=8F=E7=9A=84=E9=94=99=E8=AF=AF?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/static/i18nData.json | 164 +++++++------- docutranslate/static/index.html | 2 +- .../ai_translator/txt_translator.py | 208 ++++-------------- 3 files changed, 128 insertions(+), 246 deletions(-) diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json index c5c97ee..6c92420 100644 --- a/docutranslate/static/i18nData.json +++ b/docutranslate/static/i18nData.json @@ -51,7 +51,7 @@ "jsonSettingsTitleText": "JSON路径配置", "jsonPathLabel": "需要翻译的JSON路径", "jsonPathPlaceholder": "每行一个路径, 例如:\n$.name\n$.*", - "jsonPathHelp": "采用jsonpath-ng的路径选择语法,每一行表示一个json路径。\n 将翻译路径匹配对象内的所有字符串", + "jsonPathHelp": "采用jsonpath-ng的路径选择语法,每一行表示一个json路径。 将翻译路径匹配对象内的所有字符串", "parsingSettingsTitleText": "解析配置", "parsingEngineLabel": "解析引擎", "parsingEngineHelp": "如果上传的文件本身是.md格式,此项可不选。", @@ -81,12 +81,11 @@ "baseUrlLabel": "API 地址 (Base URL)", "baseUrlPlaceholder": "OpenAi兼容地址", "getApiKeyTitle": "获取API Key", - "apiHrefInfo302ai": "👈 通过此链接注册可享1美元免费额度", "apiKeyPlaceholder": "请输入您的API Key", "modelIdLabel": "模型ID", "modelIdPlaceholder": "例如: gpt-4o, glm-4", "systemProxyLabel": "启用系统代理", - "forceJsonLabel": "强制json输出", + "forceJson": "强制json输出", "forceJsonTooltip": "在需要json格式输出时强制模型输出json格式。可能会降低翻译质量,建议指令遵循强的模型关闭", "translationSettingsTitleText": "翻译配置", "targetLanguageLabel": "目标语言", @@ -116,7 +115,7 @@ "glossaryGenConfigCustom": "自定义", "importConfigBtn": "导入配置", "exportConfigBtn": "导出配置", - "githubInfo": "GitHub主页(欢迎star❤):
\n https://github.com/xunbu/docutranslate", + "githubInfo": "GitHub主页(欢迎star❤):
\n https://github.com/xunbu/docutranslate", "qqGroupInfo": "交流QQ群: 1047781902", "taskListTitle": "任务列表", "newTaskBtn": "新建任务", @@ -145,7 +144,7 @@ "closeBtn": "关闭", "downloadBtn": "下载", "tutorialModalTitle": "使用教程", - "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 第一步:选择工作流

    在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。

    提示: 默认已开启“自动选择工作流”。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。

    • 转Markdown再翻译: 适用于翻译PDF、markdown、图片等文件。这是最通用和强大的模式。
    • 纯文本翻译: 用于翻译 .txt 纯文本文件。
    • EPUB翻译: 用于翻译 .epub 电子书文件。
    • DOCX翻译: 用于翻译 .docx Word文档。
    • XLSX翻译: 用于翻译 .xlsx.csv 电子表格文件。
    • SRT字幕翻译: 用于翻译 .srt 字幕文件。
    • ASS字幕翻译: 用于翻译 .ass 特效字幕文件。
    • JSON翻译: 用于翻译 .json 文件中的特定字段。
    • HTML翻译: 用于翻译 .html 网页文件。
  2. 第二步:配置参数

    选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):

    A. 工作流特定选项 (根据您第一步的选择出现):

    • 如果选择“转Markdown再翻译”,请配置 解析配置
      • 解析引擎: 选择一个引擎将您的文件(如PDF)转换为适合翻译的Markdown格式。如果您的文件已经是Markdown格式,则无需选择。
      • Mineru Token: 如果您选择 minerU 引擎,需要在此处填入您的Token。
    • 如果选择“纯文本/DOCX/XLSX/SRT/ASS/EPUB/HTML”,请配置其 翻译选项
      • 插入模式: 定义翻译结果如何放入文档。您可以选择直接“替换”原文,或是在原文之后“附加”,或是在原文之前“前置”。
      • 分隔符: 当选择“附加”或“前置”模式时,此项用于在原文和译文之间插入分隔符(例如ASS格式常用 \\N,EPUB格式常用 <br /> 作为换行分隔符)。
    • 如果选择“JSON翻译”,请配置 JSON路径
      • 需要翻译的JSON路径: 每行输入一个 JSONPath 表达式,将翻译所有与路径匹配的对象中的字符串。例如:$.*(翻译全部字符串),$..description(翻译所有键为description的值)。

    B. 通用选项 (适用于所有工作流):

    • 翻译模型:
      • 选择平台/API 地址/API Key/模型ID: 配置您希望使用的AI翻译服务。模型能力指令遵循越强,出错漏翻的概率越低。
      • 跳过翻译: 勾选此项后,将只执行文档解析和格式转换,不调用AI进行翻译。
    • 翻译配置:
      • 目标语言: 指定翻译的目标语言。
      • 自定义Prompt: 可选,添加额外指令,如“人名保持原文不翻译”。
      • 思考模式: 针对部分支持混合推理的模型进行设置,建议选择“禁用(推荐)”。
      • 分块大小/并发数等: 高级参数,用于调整性能和API请求行为,通常保持默认即可。
    • 术语表:
      • 上传术语表 (可选): 上传CSV文件(需包含'src'和'dst'列)来保证特定术语翻译的统一性和准确性。
      • 自动生成术语表: 启用后,程序会先从原文中提取术语并生成一个术语表,然后再进行翻译。
  3. 第三步:上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 第四步:开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 第五步:查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

    • 预览: 在右侧滑出的面板中进行原文和译文的对照预览。
    • 下载: 下载包括 PDF, DOCX, Markdown 等多种格式的译文。
    • 附件: 如果翻译过程中生成了附加文件(如自动生成的术语表),可在此处下载。
重要提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。您也可以使用新增的“导出配置”和“导入配置”按钮来备份和恢复您的设置。
", + "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 第一步:选择工作流

    在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。

    提示: 默认已开启“自动选择工作流”。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。
    • 转Markdown再翻译: 适用于翻译PDF、markdown、图片等文件。这是最通用和强大的模式。
    • 纯文本翻译: 用于翻译 .txt 纯文本文件。
    • EPUB翻译: 用于翻译 .epub 电子书文件。
    • DOCX翻译: 用于翻译 .docx Word文档。
    • XLSX翻译: 用于翻译 .xlsx.csv 电子表格文件。
    • SRT字幕翻译: 用于翻译 .srt 字幕文件。
    • ASS字幕翻译: 用于翻译 .ass 特效字幕文件。
    • JSON翻译: 用于翻译 .json 文件中的特定字段。
    • HTML翻译: 用于翻译 .html 网页文件。
  2. 第二步:配置参数

    选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):

    A. 工作流特定选项 (根据您第一步的选择出现):

    • 如果选择“转Markdown再翻译”,请配置 解析配置
      • 解析引擎: 选择一个引擎将您的文件(如PDF)转换为适合翻译的Markdown格式。如果您的文件已经是Markdown格式,则无需选择。
      • Mineru Token: 如果您选择 minerU 引擎,需要在此处填入您的Token。
    • 如果选择“纯文本/DOCX/XLSX/SRT/ASS/EPUB/HTML”,请配置其 翻译选项
      • 插入模式: 定义翻译结果如何放入文档。您可以选择直接“替换”原文,或是在原文之后“附加”,或是在原文之前“前置”。
      • 分隔符: 当选择“附加”或“前置”模式时,此项用于在原文和译文之间插入分隔符(例如ASS格式常用 \\N,EPUB格式常用 <br /> 作为换行分隔符)。
    • 如果选择“JSON翻译”,请配置 JSON路径
      • 需要翻译的JSON路径: 每行输入一个 JSONPath 表达式,将翻译所有与路径匹配的对象中的字符串。例如:$.*(翻译全部字符串),$..description(翻译所有键为description的值)。

    B. 通用选项 (适用于所有工作流):

    • 翻译模型:
      • 选择平台/API 地址/API Key/模型ID: 配置您希望使用的AI翻译服务。模型能力指令遵循越强,出错漏翻的概率越低。
      • 跳过翻译: 勾选此项后,将只执行文档解析和格式转换,不调用AI进行翻译。
    • 翻译配置:
      • 目标语言: 指定翻译的目标语言。
      • 自定义Prompt: 可选,添加额外指令,如“人名保持原文不翻译”。
      • 思考模式: 针对部分支持混合推理的模型进行设置,建议选择“禁用(推荐)”。
      • 分块大小/并发数等: 高级参数,用于调整性能和API请求行为,通常保持默认即可。
    • 术语表:
      • 上传术语表 (可选): 上传CSV文件(需包含'src'和'dst'列)来保证特定术语翻译的统一性和准确性。
      • 自动生成术语表: 启用后,程序会先从原文中提取术语并生成一个术语表,然后再进行翻译。
  3. 第三步:上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 第四步:开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 第五步:查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

    • 预览: 在右侧滑出的面板中进行原文和译文的对照预览。
    • 下载: 下载包括 PDF, DOCX, Markdown 等多种格式的译文。
    • 附件: 如果翻译过程中生成了附加文件(如自动生成的术语表),可在此处下载。
重要提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。您也可以使用新增的“导出配置”和“导入配置”按钮来备份和恢复您的设置。
", "tutorialUnderstandBtn": "我明白了", "contributorsModalTitle": "感谢贡献", "contributorsPara1": "DocuTranslate是一个开源项目!大家的需求与使用是项目进步的动力。", @@ -161,44 +160,45 @@ "init_i18n_failed_alert": "加载界面翻译资源失败,请检查网络连接或联系管理员。", "init_failed_alert": "初始化失败,无法连接到后端服务。请检查服务是否运行或刷新页面。", "engineOptionIdentity": "已经是Markdown格式", - "engineOptionMineru": "Mineru (推荐)", + "engineOptionMineru": "Mineru(推荐)", "engineOptionDocling": "Docling", "engineOptionMineruDeploy": "Mineru部署服务", + "apiHrefInfo302ai": "👈由此链接注册可享受1刀免费额度", "glossaryEmpty": "术语表为空", - "status_selectFileFirst": "请先选择文件", - "status_fillRequired": "请填写所有必填项", - "btn_initializing": "初始化中...", - "status_encodingAndSubmitting": "文件编码和提交中...", - "status_requestOk": "请求成功,任务已开始", + "status_selectFileFirst": "请先选择文件!", + "status_fillRequired": "请填写所有必填项!", + "btn_initializing": "初始化...", + "status_encodingAndSubmitting": "编码并提交中...", + "status_requestOk": "请求成功,任务已开始。", "btn_cancelTranslation": "取消翻译", "status_requestFail": "请求失败", "status_initFail": "任务初始化失败", "status_cancelling": "取消中...", - "status_cancelSent": "取消请求已发送", + "status_cancelSent": "已发送取消请求。", "status_cancelFail": "取消失败", - "btn_reTranslate": "重新翻译", "status_gettingStatus": "获取状态中...", - "status_updateError": "状态更新出错", - "preview_loading": "预览加载中...", - "preview_cantReadOriginal": "无法读取原始文件内容。", + "btn_reTranslate": "重新翻译", + "status_updateError": "状态更新出错。", + "preview_loading": "加载预览中...", + "preview_cantReadOriginal": "无法读取原文预览。", "preview_cantPreviewType": "无法预览此文件类型", - "preview_noOriginalCache": "没有缓存的原始文件可供预览。", + "preview_noOriginalCache": "无原文缓存可供预览。", "preview_loadFailed": "预览加载失败。", - "pdf_preparing": "正在准备PDF以便打印...", - "pdf_print_failed": "调用打印功能失败。请尝试手动右键点击预览内容并选择打印。", - "pdf_fetch_failed": "获取预览内容失败,无法生成PDF。", + "pdf_preparing": "正在准备PDF...", + "pdf_print_failed": "调用打印功能失败,请检查浏览器设置或手动保存为PDF。", + "pdf_fetch_failed": "获取用于生成PDF的内容失败。", "preview_bilingual": "双语预览", "preview_translatedOnly": "仅译文预览", - "admin_tasklist_failed": "管理员模式: 加载任务列表失败。", - "configImportSuccess": "配置导入成功!界面已刷新。", - "configImportError": "配置导入失败,文件可能已损坏或格式不正确。" + "admin_tasklist_failed": "管理员模式:加载任务列表失败。", + "configImportSuccess": "配置导入成功!", + "configImportError": "配置文件解析失败,请检查文件格式。" }, "en": { "pageTitle": "DocuTranslate - Interactive Document Translation", "tutorialBtn": "Tutorial", - "projectContributeBtn": "Contribute", + "projectContributeBtn": "Project Collaboration", "workflowTitle": "Select Workflow", - "workflowOptionMarkdown": "To Markdown & Translate (.pdf/.md/.png, etc.)", + "workflowOptionMarkdown": "Convert to Markdown then Translate (.pdf/.md/.png, etc.)", "workflowOptionTxt": "Plain Text Translation (.txt)", "workflowOptionEpub": "EPUB Translation (.epub)", "workflowOptionDocx": "DOCX Translation (.docx)", @@ -207,54 +207,54 @@ "workflowOptionAss": "ASS Subtitle Translation (.ass)", "workflowOptionJson": "JSON Translation (.json)", "workflowOptionHtml": "HTML Translation (.html)", - "autoWorkflowLabel": "Auto-select workflow", + "autoWorkflowLabel": "Auto-select Workflow", "txtSettingsTitleText": "TXT Translation Options", "insertModeLabel": "Insert Mode", - "insertModeReplace": "Replace Original", - "insertModeAppend": "Append to Original", - "insertModePrepend": "Prepend to Original", + "insertModeReplace": "Replace Original (Replace)", + "insertModeAppend": "Append to Original (Append)", + "insertModePrepend": "Prepend to Original (Prepend)", "insertModeHelpTxt": "Choose how to insert the translated text.", "separatorLabel": "Separator", "separatorPlaceholderSimple": "e.g., \\n---\\n", - "separatorHelp": "Characters to separate original and translated text in append/prepend mode. \\n for newline.", + "separatorHelp": "Character used to separate original and translated text in append or prepend mode. \\n represents a newline.", "segmentModeLabel": "Segment Mode", "segmentModeLine": "By Line (Each line is a segment)", "segmentModeParagraph": "By Paragraph (Merge consecutive non-empty lines)", "segmentModeNone": "No Segmentation (Entire text is one segment)", - "segmentModeHelp": "Choose how to split the text into chunks for translation.", + "segmentModeHelp": "Choose how to segment the text for translation.", "docxSettingsTitleText": "DOCX Translation Options", "insertModeHelpDocx": "Choose how to insert the translated text.", "separatorPlaceholderStructured": "e.g., ---", - "separatorHelpDocx": "In append mode, translation starts in a new paragraph. This adds content between original and translated paragraphs. \\n for internal newlines.", + "separatorHelpDocx": "In append mode, the translation will start a new paragraph. This is for adding extra content between original and translated paragraphs. \\n can be used for newlines within the separator.", "xlsxSettingsTitleText": "XLSX Translation Options", - "insertModeHelpXlsx": "Choose how to insert translated text into cells.", - "xlsxTranslateRegionsLabel": "Translate Regions (Optional)", - "xlsxTranslateRegionsPlaceholder": "One region per line, e.g., Sheet1!A1:B10 (applies to all sheets if sheet name is omitted)", + "insertModeHelpXlsx": "Choose how to insert the translated text into cells.", + "xlsxTranslateRegionsLabel": "Translation Regions (Optional)", + "xlsxTranslateRegionsPlaceholder": "One region per line, e.g., Sheet1!A1:B10 (applies to all sheets if sheet name is omitted).", "srtSettingsTitleText": "SRT Translation Options", "insertModeHelpSrt": "Choose how to insert the translated text.", "epubSettingsTitleText": "EPUB Translation Options", "insertModeHelpEpub": "Choose how to insert the translated text.", - "separatorHelpEpub": "In append mode, translation starts in a new block. This adds content between original and translated blocks. \\n for internal newlines.", + "separatorHelpEpub": "In append mode, the translation will start a new block. This is for adding extra content between original and translated blocks. \\n can be used for newlines within the separator.", "htmlSettingsTitleText": "HTML Translation Options", "insertModeHelpHtml": "Choose how to insert the translated text.", - "separatorHelpHtml": "In append mode, translation starts in a new block. This adds content between original and translated blocks. \\n for internal newlines.", + "separatorHelpHtml": "In append mode, the translation will start a new block. This is for adding extra content between original and translated blocks. \\n can be used for newlines within the separator.", "assSettingsTitleText": "ASS Translation Options", "insertModeHelpAss": "Choose how to insert the translated text.", - "separatorPlaceholderAss": "e.g., \\N (newline)", - "separatorHelpAss": "Characters to separate original and translated text in append/prepend mode. \\N is the newline for ASS format.", + "separatorPlaceholderAss": "e.g., \\N (newline character)", + "separatorHelpAss": "Character used to separate original and translated text in append or prepend mode. \\N is the newline character for the ASS format.", "jsonSettingsTitleText": "JSON Path Configuration", "jsonPathLabel": "JSON Paths to Translate", - "jsonPathPlaceholder": "One path per line, e.g.,\n$.name\n$.*", - "jsonPathHelp": "Uses jsonpath-ng syntax. One JSON path per line. All strings within matching objects will be translated.", + "jsonPathPlaceholder": "One path per line, e.g.:\n$.name\n$.*", + "jsonPathHelp": "Uses jsonpath-ng syntax. Each line represents a JSON path. All strings within the matched objects will be translated.", "parsingSettingsTitleText": "Parsing Configuration", "parsingEngineLabel": "Parsing Engine", - "parsingEngineHelp": "Not required if the uploaded file is already in .md format.", + "parsingEngineHelp": "If the uploaded file is already in .md format, this can be skipped.", "getMineruTokenTitle": "Get Mineru Token", - "mineruTokenPlaceholder": "Required when using Mineru engine", + "mineruTokenPlaceholder": "Required when using the Mineru engine", "modelVersionLabel": "Mineru Model Version", "modelVersionVlm": "VLM", "modelVersionPipline": "Pipeline", - "modelVersionHelp": "Mineru VLM is a newer, internal test model.", + "modelVersionHelp": "Mineru VLM is a newer internal test model.", "mineruDeployBaseUrlLabel": "Service Address (Base URL)", "mineruDeployBaseUrlPlaceholder": "e.g., http://127.0.0.1:8000", "mineruDeployBackendLabel": "Backend Type", @@ -271,54 +271,53 @@ "aiSettingsTitleText": "Translation Model", "skipTranslationLabel": "Skip Translation", "platformLabel": "Select Platform", - "platformCustom": "Custom Endpoint", + "platformCustom": "Custom API", "baseUrlLabel": "API Address (Base URL)", "baseUrlPlaceholder": "OpenAI-compatible address", "getApiKeyTitle": "Get API Key", - "apiHrefInfo302ai": "👈 Register via this link for a $1 free credit", "apiKeyPlaceholder": "Please enter your API Key", "modelIdLabel": "Model ID", "modelIdPlaceholder": "e.g., gpt-4o, glm-4", "systemProxyLabel": "Enable System Proxy", - "forceJsonLabel": "Force JSON Output", - "forceJsonTooltip": "Force the model to output in JSON format. May reduce translation quality. Recommended to disable for models with strong instruction following.", + "forceJson": "Force JSON output", + "forceJsonTooltip": "Force the model to output JSON format when required. May reduce translation quality; it's recommended to disable for models that follow instructions well.", "translationSettingsTitleText": "Translation Configuration", "targetLanguageLabel": "Target Language", "targetLanguageCustom": "Other (Custom)", - "customLangPlaceholder": "Enter target language, e.g., Italian", + "customLangPlaceholder": "Please enter the target language, e.g., Italian", "thinkingModeLabel": "Thinking Mode", - "thinkingModeTooltip": "Sets whether mixed-reasoning models should 'think'. Supported by Zhipu glm4.5 series, Volcengine seed1.6 series, SiliconFlow, Google Gemini series, 302AI (partial). Disabling is recommended.", + "thinkingModeTooltip": "Set whether the mixed-inference model should think. Currently supported by Zhipu's glm4.5 series, Volcengine's seed1.6 series, SiliconFlow, Google's Gemini series, and 302AI (partial). Disabling is recommended.", "thinkingModeEnable": "Enable", "thinkingModeDisable": "Disable (Recommended)", "thinkingModeDefault": "Default", "customPromptLabel": "Custom Prompt", - "customPromptPlaceholder": "Optional, e.g., \"Do not translate proper names.\"", + "customPromptPlaceholder": "Optional, e.g., \"Do not translate personal names, keep them in the original language\"", "chunkSizeLabel": "Chunk Size", "resetBtn": "Reset", "concurrentLabel": "Concurrency", "retryLabel": "Retries", "glossaryGenTitle": "Glossary", "glossaryLabel": "Glossary (Optional)", - "glossaryHelp": "Select one or more CSV files. Files must contain 'src' and 'dst' headers for source and destination terms.", + "glossaryHelp": "Select one or more CSV files. Files must contain 'src' and 'dst' headers, representing source and destination terms.", "viewGlossaryBtn": "View Glossary", "clearGlossaryBtn": "Clear", "glossaryGenEnableLabel": "Auto-generate Glossary", "glossaryCustomPromptLabel": "Custom Prompt", - "glossaryCustomPromptPlaceholder": "Prompt for glossary generation", + "glossaryCustomPromptPlaceholder": "Glossary generation prompt", "glossaryGenConfigLabel": "Glossary Generation Config", - "glossaryGenConfigSame": "Same as Translation", + "glossaryGenConfigSame": "Same as Translation Config", "glossaryGenConfigCustom": "Custom", "importConfigBtn": "Import Config", "exportConfigBtn": "Export Config", - "githubInfo": "GitHub Home (stars❤ welcome):
\n https://github.com/xunbu/docutranslate", - "qqGroupInfo": "QQ Group: 1047781902", + "githubInfo": "GitHub Page (stars❤ welcome):
\n https://github.com/xunbu/docutranslate", + "qqGroupInfo": "Discussion QQ Group: 1047781902", "taskListTitle": "Task List", "newTaskBtn": "New Task", - "noTaskPlaceholder": "No tasks yet. Click 'New Task' to get started!", + "noTaskPlaceholder": "No tasks yet. Click \"New Task\" to get started!", "taskCardIdLabel": "Task ID", - "taskCardIdPlaceholder": "Waiting for submission...", + "taskCardIdPlaceholder": "Awaiting submission...", "taskCardFileDrop": "Click or drag file here", - "taskCardFileSelected": "File selected", + "taskCardFileSelected": "File Selected", "taskCardFilenameLabel": "Filename: ", "taskCardLogLabel": "Logs", "copyLogsTooltip": "Copy logs", @@ -332,59 +331,60 @@ "downloadAss": "ASS", "previewTitle": "Preview", "previewBilingualBtn": "Bilingual", - "previewTranslatedOnlyBtn": "Translated Only", + "previewTranslatedOnlyBtn": "Translation Only", "syncScrollTooltip": "Sync Scrolling", "previewOriginal": "Original", "previewTranslated": "Translation", "closeBtn": "Close", "downloadBtn": "Download", "tutorialModalTitle": "Tutorial", - "tutorialModalBody": "

Video tutorials are available on Bilibili by searching for docutranslate.

Welcome to DocuTranslate! Follow these steps to translate your documents:

  1. Step 1: Select Workflow

    At the top of the left settings panel, first choose the processing flow that best suits your file type.

    Tip: 'Auto-select workflow' is enabled by default. Just upload your file, and the system will automatically match the appropriate workflow for you.

    • To Markdown & Translate: Ideal for translating PDFs, markdown, images, etc. This is the most versatile and powerful mode.
    • Plain Text Translation: For translating .txt files.
    • EPUB Translation: For translating .epub e-books.
    • DOCX Translation: For translating .docx Word documents.
    • XLSX Translation: For translating .xlsx or .csv spreadsheets.
    • SRT Subtitle Translation: For translating .srt subtitle files.
    • ASS Subtitle Translation: For translating .ass advanced subtitle files.
    • JSON Translation: For translating specific fields within .json files.
    • HTML Translation: For translating .html web files.
  2. Step 2: Configure Parameters

    After selecting a workflow, relevant configuration options will appear below. Please complete the settings in order (all configurations are automatically saved in your browser):

    A. Workflow-Specific Options (Appear based on your Step 1 choice):

    • If 'To Markdown & Translate' is selected, configure Parsing Configuration:
      • Parsing Engine: Choose an engine to convert your file (like a PDF) into a translation-friendly Markdown format. No selection is needed if your file is already Markdown.
      • Mineru Token: If you choose the minerU engine, you must enter your token here.
    • If 'Plain Text/DOCX/XLSX/SRT/ASS/EPUB/HTML' is selected, configure its Translation Options:
      • Insert Mode: Defines how the translation result is placed. You can 'Replace' the original, 'Append' after, or 'Prepend' before it.
      • Separator: When using 'Append' or 'Prepend', this is used to insert a separator between the original and translated text (e.g., \\N for ASS, <br /> for EPUB).
    • If 'JSON Translation' is selected, configure JSON Paths:
      • JSON Paths to Translate: Enter one JSONPath expression per line to translate all string values matching the path. E.g., $.* (all strings), $..description (all values for the key description).

    B. General Options (Applicable to all workflows):

    • Translation Model:
      • Select Platform/API Address/API Key/Model ID: Configure the AI translation service you want to use. Models with better instruction-following abilities will have a lower chance of errors or missed translations.
      • Skip Translation: If checked, only document parsing and format conversion will be performed, without calling the AI for translation.
    • Translation Configuration:
      • Target Language: Specify the language to translate to.
      • Custom Prompt: Optional, add extra instructions like \"Do not translate proper names.\"
      • Thinking Mode: A setting for certain mixed-reasoning models; 'Disable (Recommended)' is suggested.
      • Chunk Size/Concurrency, etc.: Advanced parameters to adjust performance and API request behavior. Defaults are usually fine.
    • Glossary:
      • Upload Glossary (Optional): Upload a CSV file (with 'src' and 'dst' columns) to ensure consistency and accuracy for specific terms.
      • Auto-generate Glossary: If enabled, the program will first extract terms from the source text to create a glossary before proceeding with the translation.
  3. Step 3: Upload File

    In the task list on the right, click or drag your document into the file upload area.

  4. Step 4: Start Translation

    After the file is selected, click the Start Translation button on the task card. The system will begin processing, and you can watch the real-time progress in the log area.

  5. Step 5: View & Download

    Once translation is complete, action buttons will appear on the task card:

    • Preview: Compare the original and translated text side-by-side in a slide-out panel.
    • Download: Download the translated document in various formats, including PDF, DOCX, Markdown, etc.
    • Attachments: If any additional files were generated (like an auto-generated glossary), download them here.
Important Note: All settings are automatically saved locally in your browser for future use. You can also use the new 'Export Config' and 'Import Config' buttons to back up and restore your settings.
", - "tutorialUnderstandBtn": "I understand", + "tutorialModalBody": "

Video tutorials are available on Bilibili by searching for docutranslate.

Welcome to DocuTranslate! Please follow these steps to translate your documents:

  1. Step 1: Select Workflow

    At the top of the left-side configuration panel, first choose the processing flow that best suits your file type.

    Tip: \"Auto-select Workflow\" is enabled by default. Simply upload your file, and the system will automatically match it with the appropriate workflow to simplify the process.
    • Convert to Markdown then Translate: Suitable for translating PDF, Markdown, images, etc. This is the most versatile and powerful mode.
    • Plain Text Translation: For translating .txt plain text files.
    • EPUB Translation: For translating .epub e-book files.
    • DOCX Translation: For translating .docx Word documents.
    • XLSX Translation: For translating .xlsx or .csv spreadsheet files.
    • SRT Subtitle Translation: For translating .srt subtitle files.
    • ASS Subtitle Translation: For translating .ass advanced subtitle files.
    • JSON Translation: For translating specific fields within .json files.
    • HTML Translation: For translating .html web page files.
  2. Step 2: Configure Parameters

    After selecting a workflow, the relevant configuration options will appear below. Please complete the settings in order (all configurations are automatically saved in your browser):

    A. Workflow-Specific Options (Appears based on your choice in Step 1):

    • If \"Convert to Markdown then Translate\" is selected, configure Parsing Configuration:
      • Parsing Engine: Choose an engine to convert your file (like a PDF) into a translation-friendly Markdown format. No selection is needed if your file is already in Markdown format.
      • Mineru Token: If you choose the minerU engine, you need to enter your token here.
    • If \"Plain Text/DOCX/XLSX/SRT/ASS/EPUB/HTML\" is selected, configure its Translation Options:
      • Insert Mode: Defines how the translation result is placed in the document. You can choose to \"Replace\" the original, \"Append\" it after the original, or \"Prepend\" it before the original.
      • Separator: When \"Append\" or \"Prepend\" mode is selected, this is used to insert a separator between the original and translated text (e.g., \\\\N is common for ASS format, <br /> for EPUB format as a line break).
    • If \"JSON Translation\" is selected, configure JSON Paths:
      • JSON Paths to Translate: Enter one JSONPath expression per line to translate all strings within the matched objects. For example: $.* (translate all strings), $..description (translate all values with the key description).

    B. General Options (Applicable to all workflows):

    • Translation Model:
      • Select Platform/API Address/API Key/Model ID: Configure the AI translation service you wish to use. The better the model follows instructions, the lower the probability of errors and missed translations.
      • Skip Translation: If checked, only document parsing and format conversion will be performed, without calling the AI for translation.
    • Translation Configuration:
      • Target Language: Specify the target language for the translation.
      • Custom Prompt: Optional, add extra instructions, like \"Do not translate personal names.\"
      • Thinking Mode: A setting for some models that support mixed inference; \"Disable (Recommended)\" is the suggested choice.
      • Chunk Size/Concurrency, etc.: Advanced parameters for adjusting performance and API request behavior; usually, the defaults are fine.
    • Glossary:
      • Upload Glossary (Optional): Upload a CSV file (must contain 'src' and 'dst' columns) to ensure consistency and accuracy for specific terms.
      • Auto-generate Glossary: When enabled, the program will first extract terms from the original text to create a glossary before proceeding with the translation.
  3. Step 3: Upload File

    In the task list on the right, click or drag your document into the file upload area.

  4. Step 4: Start Translation

    Once the file is successfully selected, click the Start Translation button on the bottom right of the task card. The system will begin processing the task, and you can view the real-time progress in the log area.

  5. Step 5: View and Download

    After the translation is complete, action buttons will appear on the task card:

    • Preview: Compare the original and translated text side-by-side in a slide-out panel.
    • Download: Download the translation in various formats, including PDF, DOCX, Markdown, etc.
    • Attachments: If any additional files were generated during the process (like an auto-generated glossary), they can be downloaded here.
Important Note: All configurations are automatically saved locally in your browser for future use. You can also use the new \"Export Config\" and \"Import Config\" buttons to back up and restore your settings.
", + "tutorialUnderstandBtn": "I Understand", "contributorsModalTitle": "Thanks for Contributing", "contributorsPara1": "DocuTranslate is an open-source project! The community's needs and usage are the driving force behind its progress.", - "contributorsPara2": "A heartfelt thank you to everyone who has funded the project, submitted code, provided valuable suggestions, and starred the project!", - "contributorsWelcome": "You're welcome to contribute in the following ways:", - "contributorsGithub": "GitHub Home", + "contributorsPara2": "Thank you to everyone who has sponsored the project, submitted code, provided valuable suggestions, and starred the project!", + "contributorsWelcome": "You are welcome to contribute in the following ways:", + "contributorsGithub": "GitHub Page", "contributorsPR": "Submit a Pull Request", "contributorsIssue": "Report an Issue", - "contributorsQQ": "Or contact the author via QQ Group: 1047781902", + "contributorsQQ": "Or contact the author via the QQ group: 1047781902", "glossaryModalTitle": "Current Glossary", "glossaryTableSource": "Source (src)", "glossaryTableDestination": "Destination (dst)", "init_i18n_failed_alert": "Failed to load interface translations. Please check your network connection or contact an administrator.", "init_failed_alert": "Initialization failed, could not connect to the backend service. Please ensure the service is running and refresh the page.", - "engineOptionIdentity": "Already in Markdown format", + "engineOptionIdentity": "Already Markdown Format", "engineOptionMineru": "Mineru (Recommended)", "engineOptionDocling": "Docling", "engineOptionMineruDeploy": "Mineru Deploy Service", + "apiHrefInfo302ai": "👈 Register through this link to enjoy a $1 free credit", "glossaryEmpty": "Glossary is empty", - "status_selectFileFirst": "Please select a file first", - "status_fillRequired": "Please fill in all required fields", + "status_selectFileFirst": "Please select a file first!", + "status_fillRequired": "Please fill in all required fields!", "btn_initializing": "Initializing...", - "status_encodingAndSubmitting": "Encoding and submitting file...", - "status_requestOk": "Request successful, task started", + "status_encodingAndSubmitting": "Encoding and submitting...", + "status_requestOk": "Request successful, task has started.", "btn_cancelTranslation": "Cancel Translation", "status_requestFail": "Request failed", "status_initFail": "Task initialization failed", "status_cancelling": "Cancelling...", - "status_cancelSent": "Cancellation request sent", + "status_cancelSent": "Cancel request sent.", "status_cancelFail": "Cancellation failed", - "btn_reTranslate": "Re-translate", "status_gettingStatus": "Getting status...", - "status_updateError": "Error updating status", + "btn_reTranslate": "Re-translate", + "status_updateError": "Error updating status.", "preview_loading": "Loading preview...", - "preview_cantReadOriginal": "Could not read original file content.", + "preview_cantReadOriginal": "Could not read original file for preview.", "preview_cantPreviewType": "Cannot preview this file type", - "preview_noOriginalCache": "No cached original file to preview.", + "preview_noOriginalCache": "No original file cached for preview.", "preview_loadFailed": "Failed to load preview.", - "pdf_preparing": "Preparing PDF for printing...", - "pdf_print_failed": "Failed to invoke print function. Please try right-clicking the preview content and selecting Print manually.", - "pdf_fetch_failed": "Failed to fetch preview content, cannot generate PDF.", + "pdf_preparing": "Preparing PDF...", + "pdf_print_failed": "Failed to invoke print function. Please check browser settings or save as PDF manually.", + "pdf_fetch_failed": "Failed to fetch content for PDF generation.", "preview_bilingual": "Bilingual Preview", - "preview_translatedOnly": "Translated Only Preview", + "preview_translatedOnly": "Translation Only Preview", "admin_tasklist_failed": "Admin mode: Failed to load task list.", - "configImportSuccess": "Configuration imported successfully! The interface has been updated.", - "configImportError": "Failed to import configuration. The file may be corrupt or incorrectly formatted." + "configImportSuccess": "Configuration imported successfully!", + "configImportError": "Failed to parse config file, please check the file format." } } \ No newline at end of file diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index cd4e8c2..70e47f4 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。

Base URL:

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:

任务列表

LOGO

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file + DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。

Base URL:

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:

任务列表

LOGO

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file diff --git a/docutranslate/translator/ai_translator/txt_translator.py b/docutranslate/translator/ai_translator/txt_translator.py index 880d07b..7c6b833 100644 --- a/docutranslate/translator/ai_translator/txt_translator.py +++ b/docutranslate/translator/ai_translator/txt_translator.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2025 QinHan # SPDX-License-Identifier: MPL-2.0 import asyncio +import re from dataclasses import dataclass from typing import Self, Literal, List @@ -17,18 +18,24 @@ class TXTTranslatorConfig(AiTranslatorConfig): Attributes: insert_mode (Literal["replace", "append", "prepend"]): 指定如何插入翻译文本的模式。 - - "replace": 用译文替换原文。 - - "append": 将译文追加到原文后面。 - - "prepend": 将译文前置到原文前面。 + ▪ "replace": 用译文替换原文。 + + ▪ "append": 将译文追加到原文后面。 + + ▪ "prepend": 将译文前置到原文前面。 + 默认为 "replace"。 separator (str): 在 "append" 或 "prepend" 模式下,用于分隔原文和译文的字符串。 默认为换行符 "\n"。 segment_mode (Literal["line", "paragraph", "none"]): 分段模式。 - - "line": 按行分段(每行独立翻译) - - "paragraph": 按段落分段(连续非空行合并为段落) - - "none": 不分段(全文视为一个段落) + ▪ "line": 按行分段(每行独立翻译) + + ▪ "paragraph": 按段落分段(连续非空行合并为段落) + + ▪ "none": 不分段(全文视为一个段落) + 默认为 "line"。 """ insert_mode: Literal["replace", "append", "prepend"] = "replace" @@ -93,7 +100,7 @@ class TXTTranslator(AiTranslator): if self.segment_mode == "line": return self._segment_by_line(txt_content) - elif self.segment_mode == "paragraph": # paragraph mode + elif self.segment_mode == "paragraph": return self._segment_by_paragraph(txt_content) else: return [txt_content] @@ -101,86 +108,40 @@ class TXTTranslator(AiTranslator): def _segment_by_line(self, txt_content: str) -> List[str]: """ 按行分段模式:每行作为独立分段。 - - Args: - txt_content (str): 文本内容 - - Returns: - List[str]: 按行分段的文本列表 """ - # 简单按行分割,保留所有行(包括空行) return txt_content.splitlines() def _segment_by_paragraph(self, txt_content: str) -> List[str]: """ - 按段落分段模式:连续非空行合并为段落,空行单独处理。 - - Args: - txt_content (str): 文本内容 - - Returns: - List[str]: 按段落分段的文本列表 + 按段落分段模式:使用正则表达式按空行分割,并保留分隔符。 """ - lines = txt_content.splitlines() - segments = [] # 每个元素要么是文本段落,要么是空行标记 - - i = 0 - while i < len(lines): - if lines[i].strip(): # 非空行 → 文本段落 - # 收集连续的非空行 - paragraph_lines = [] - while i < len(lines) and lines[i].strip(): - paragraph_lines.append(lines[i]) - i += 1 - segments.append("\n".join(paragraph_lines)) - else: # 空行 → 空行标记 - # 收集连续的空行 - empty_lines = [] - while i < len(lines) and not lines[i].strip(): - empty_lines.append(lines[i]) - i += 1 - # 用特殊标记表示空行组(保持数量信息) - segments.append(f"@@EMPTY_LINES_{len(empty_lines)}@@") - - return segments + segments = re.split(r'(\n\s*\n)', txt_content) + return [s for s in segments if s] def _after_translate(self, translated_texts: List[str], original_texts: List[str]) -> bytes: """ 翻译后处理步骤:根据分段模式重建文档。 - - Args: - translated_texts (List[str]): 翻译后的文本列表。 - original_texts (List[str]): 原始文本列表。 - - Returns: - bytes: 新的TXT文件内容的字节流。 + 此函数现在接收两个长度完全相同的对齐列表。 """ if self.segment_mode == "line": return self._reconstruct_by_line(translated_texts, original_texts) - elif self.segment_mode == "paragraph": # paragraph mode + elif self.segment_mode == "paragraph": return self._reconstruct_by_paragraph(translated_texts, original_texts) else: return self._reconstruct_none(translated_texts, original_texts) - def _reconstruct_by_line(self, translated_texts: List[str], original_lines: List[str]) -> bytes: + def _reconstruct_by_line(self, translated_lines: List[str], original_lines: List[str]) -> bytes: """ 按行模式重建文档。 - - Args: - translated_texts (List[str]): 翻译后的行列表 - original_lines (List[str]): 原始行列表 - - Returns: - bytes: 重建的文档内容 """ processed_lines = [] for i, original_line in enumerate(original_lines): - # 如果是空行,直接保留 + # 如果原文是空行或仅包含空白字符,则直接保留 if not original_line.strip(): processed_lines.append(original_line) continue - translated_line = translated_texts[i] + translated_line = translated_lines[i] # 根据插入模式更新内容 if self.insert_mode == "replace": @@ -195,57 +156,34 @@ class TXTTranslator(AiTranslator): return "\n".join(processed_lines).encode('utf-8') - def _reconstruct_by_paragraph(self, translated_texts: List[str], original_segments: List[str]) -> bytes: + def _reconstruct_by_paragraph(self, translated_segments: List[str], original_segments: List[str]) -> bytes: """ 按段落模式重建文档。 - - Args: - translated_texts (List[str]): 翻译后的段落列表 - original_segments (List[str]): 原始分段列表 - - Returns: - bytes: 重建的文档内容 """ - result_lines = [] - translated_index = 0 - - for segment in original_segments: - # 处理空行组 - if segment.startswith("@@EMPTY_LINES_"): - empty_count = int(segment.split('_')[-2]) # 提取空行数量 - result_lines.extend([""] * empty_count) + result_parts = [] + for i, original_segment in enumerate(original_segments): + # 如果 segment 是纯空白(即空行分隔符),直接保留 + if not original_segment.strip(): + result_parts.append(original_segment) continue - # 处理文本段落 - if translated_index < len(translated_texts): - translated_text = translated_texts[translated_index] - translated_index += 1 + translated_segment = translated_segments[i] - # 根据插入模式处理 - if self.insert_mode == "replace": - result_lines.append(translated_text) - elif self.insert_mode == "append": - result_lines.append(segment + self.separator + translated_text) - elif self.insert_mode == "prepend": - result_lines.append(translated_text + self.separator + segment) - else: - result_lines.append(translated_text) + # 根据插入模式处理 + if self.insert_mode == "replace": + result_parts.append(translated_segment) + elif self.insert_mode == "append": + result_parts.append(original_segment + self.separator + translated_segment) + elif self.insert_mode == "prepend": + result_parts.append(translated_segment + self.separator + original_segment) else: - # 理论上不会发生,但安全处理 - result_lines.append(segment) + result_parts.append(translated_segment) - return "\n".join(result_lines).encode('utf-8') + return "".join(result_parts).encode('utf-8') def _reconstruct_none(self, translated_texts: List[str], original_texts: List[str]) -> bytes: """ 不分段模式重建文档。 - - Args: - translated_texts (List[str]): 翻译后的文本列表(应只包含一个元素) - original_texts (List[str]): 原始文本列表(应只包含一个元素) - - Returns: - bytes: 重建的文档内容 """ if not translated_texts or not original_texts: return b"" @@ -253,7 +191,6 @@ class TXTTranslator(AiTranslator): original_text = original_texts[0] translated_text = translated_texts[0] - # 根据插入模式处理 if self.insert_mode == "replace": result_text = translated_text elif self.insert_mode == "append": @@ -269,12 +206,6 @@ class TXTTranslator(AiTranslator): def translate(self, document: Document) -> Self: """ 同步翻译TXT文档。 - - Args: - document (Document): 待翻译的文档对象。 - - Returns: - Self: 返回翻译器实例,以支持链式调用。 """ original_segments = self._pre_translate(document) @@ -282,99 +213,50 @@ class TXTTranslator(AiTranslator): self.logger.info("\n文件中没有找到需要翻译的文本内容。") return self - # 过滤出需要翻译的文本段(非空行标记) - if self.segment_mode == "line": - texts_to_translate = [text for text in original_segments if text.strip()] - else: # paragraph mode - texts_to_translate = [text for text in original_segments if not text.startswith("@@EMPTY_LINES_")] + texts_to_translate = [text for text in original_segments if text.strip()] - # --- 步骤 1: (可选) 术语提取 --- if self.glossary_agent and texts_to_translate: self.glossary_dict_gen = self.glossary_agent.send_segments(texts_to_translate, self.chunk_size) if self.translate_agent: self.translate_agent.update_glossary_dict(self.glossary_dict_gen) - # --- 步骤 2: 调用翻译Agent --- translated_texts_map = {} if self.translate_agent and texts_to_translate: translated_segments = self.translate_agent.send_segments(texts_to_translate, self.chunk_size) translated_texts_map = dict(zip(texts_to_translate, translated_segments)) - # 将翻译结果映射回原始分段列表 - final_translated_texts = [] - for segment in original_segments: - if self.segment_mode == "line": - # 行模式:空行保留,非空行翻译 - if segment.strip() and segment in translated_texts_map: - final_translated_texts.append(translated_texts_map[segment]) - else: - final_translated_texts.append(segment) - else: - # 段落模式:空行标记保留,文本段落翻译 - if segment.startswith("@@EMPTY_LINES_"): - final_translated_texts.append(segment) # 空行标记原样保留 - elif segment in translated_texts_map: - final_translated_texts.append(translated_texts_map[segment]) - else: - final_translated_texts.append(segment) + # 【核心逻辑】创建与原始分段列表等长的、完全对齐的最终翻译列表 + final_translated_texts = [translated_texts_map.get(text, text) for text in original_segments] - # --- 步骤 3: 后处理并更新文档内容 --- document.content = self._after_translate(final_translated_texts, original_segments) return self async def translate_async(self, document: Document) -> Self: """ 异步翻译TXT文档。 - - Args: - document (Document): 待翻译的文档对象。 - - Returns: - Self: 返回翻译器实例,以支持链式调用。 """ - # I/O密集型操作在线程中运行 original_segments = await asyncio.to_thread(self._pre_translate, document) if not original_segments: self.logger.info("\n文件中没有找到需要翻译的文本内容。") return self - # 过滤出需要翻译的文本段 - if self.segment_mode == "line": - texts_to_translate = [text for text in original_segments if text.strip()] - else: # paragraph mode - texts_to_translate = [text for text in original_segments if not text.startswith("@@EMPTY_LINES_")] + texts_to_translate = [text for text in original_segments if text.strip()] - # --- 步骤 1: (可选) 术语提取 (异步) --- if self.glossary_agent and texts_to_translate: self.glossary_dict_gen = await self.glossary_agent.send_segments_async(texts_to_translate, self.chunk_size) if self.translate_agent: self.translate_agent.update_glossary_dict(self.glossary_dict_gen) - # --- 步骤 2: 调用翻译Agent (异步) --- translated_texts_map = {} if self.translate_agent and texts_to_translate: translated_segments = await self.translate_agent.send_segments_async(texts_to_translate, self.chunk_size) translated_texts_map = dict(zip(texts_to_translate, translated_segments)) - # 将翻译结果映射回原始分段列表 - final_translated_texts = [] - for segment in original_segments: - if self.segment_mode == "line": - if segment.strip() and segment in translated_texts_map: - final_translated_texts.append(translated_texts_map[segment]) - else: - final_translated_texts.append(segment) - else: - if segment.startswith("@@EMPTY_LINES_"): - final_translated_texts.append(segment) - elif segment in translated_texts_map: - final_translated_texts.append(translated_texts_map[segment]) - else: - final_translated_texts.append(segment) + # 【核心逻辑】创建与原始分段列表等长的、完全对齐的最终翻译列表 + final_translated_texts = [translated_texts_map.get(text, text) for text in original_segments] - # --- 步骤 3: 后处理并更新文档内容 (I/O密集型) --- document.content = await asyncio.to_thread( self._after_translate, final_translated_texts, original_segments ) - return self + return self \ No newline at end of file