From 11ff6aa58ce89066b417824f281ebfdec85bb409 Mon Sep 17 00:00:00 2001 From: xunbu Date: Sun, 9 Nov 2025 10:35:59 +0800 Subject: [PATCH] =?UTF-8?q?txt=E7=BF=BB=E8=AF=91=E6=94=AF=E6=8C=81?= =?UTF-8?q?=E6=8C=89=E8=BF=9E=E7=BB=AD=E6=96=87=E6=9C=AC=E5=88=86=E6=AE=B5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/app.py | 7 +- docutranslate/static/i18nData.json | 226 ++++++++++++------ docutranslate/static/index.html | 2 +- .../ai_translator/txt_translator.py | 218 +++++++++++++---- 4 files changed, 336 insertions(+), 117 deletions(-) diff --git a/docutranslate/app.py b/docutranslate/app.py index e3ec5ca..3acb0bc 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -207,7 +207,7 @@ async def lifespan(app: FastAPI): global_logger.propagate = False global_logger.setLevel(logging.INFO) print("应用启动完成,多任务状态已初始化。") - print(f"服务接口文档: http://127.0.0.1:{app.state.port_to_use}/docs") + print(f"服务接口文档: http://1227.0.0.1:{app.state.port_to_use}/docs") print(f"请用浏览器访问 http://127.0.0.1:{app.state.port_to_use}\n") yield # 清理任何可能残留的临时目录 @@ -469,6 +469,10 @@ class TextWorkflowParams(BaseWorkflowParams): "\n", description="当 insert_mode 为 'append' 或 'prepend' 时,用于分隔原文和译文的分隔符。", ) + segment_mode: Literal["line", "paragraph"] = Field( + "line", + description="分段模式。'line':按行分段(每行独立翻译),'paragraph':按段落分段(连续非空行合并为段落)。", + ) class JsonWorkflowParams(BaseWorkflowParams): @@ -961,6 +965,7 @@ async def _perform_translation( "glossary_dict", "insert_mode", "separator", + "segment_mode", "timeout", "retry", "system_proxy_enable", diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json index 4695b47..ba64903 100644 --- a/docutranslate/static/i18nData.json +++ b/docutranslate/static/i18nData.json @@ -23,6 +23,10 @@ "separatorLabel": "分隔符", "separatorPlaceholderSimple": "例如: \\n---\\n", "separatorHelp": "当插入模式为附加或前置时,用于分隔原文和译文的字符。\\n 代表换行。", + "segmentModeLabel": "分段模式", + "segmentModeLine": "按行分段 (每行独立翻译)", + "segmentModeParagraph": "按段落分段 (合并连续非空行)", + "segmentModeHelp": "选择如何将文本分割成块进行翻译。", "docxSettingsTitleText": "DOCX翻译选项", "insertModeHelpDocx": "选择如何将翻译后的文本插入。", "separatorPlaceholderStructured": "例如: ---", @@ -46,10 +50,14 @@ "jsonSettingsTitleText": "JSON路径配置", "jsonPathLabel": "需要翻译的JSON路径", "jsonPathPlaceholder": "每行一个路径, 例如:\n$.name\n$.*", - "jsonPathHelp": "采用jsonpath-ng的路径选择语法,每一行表示一个json路径。\n 将翻译路径匹配对象内的所有字符串", + "jsonPathHelp": "采用jsonpath-ng的路径选择语法,每一行表示一个json路径。\n 将翻译路径匹配对象内的所有字符串", "parsingSettingsTitleText": "解析配置", "parsingEngineLabel": "解析引擎", "parsingEngineHelp": "如果上传的文件本身是.md格式,此项可不选。", + "engineOptionIdentity": "已经是Markdown格式", + "engineOptionMineru": "Mineru(推荐)", + "engineOptionDocling": "Docling", + "engineOptionMineruDeploy": "Mineru部署服务", "getMineruTokenTitle": "获取Mineru Token", "mineruTokenPlaceholder": "使用Mineru引擎时需要", "modelVersionLabel": "Mineru 模型版本", @@ -73,6 +81,20 @@ "skipTranslationLabel": "跳过翻译", "platformLabel": "选择平台", "platformCustom": "自定义接口", + "platform302AI": "302.AI", + "platformOpenAI": "OpenAI", + "platformGemini": "Gemini", + "platformDeepSeek": "DeepSeek", + "platformDashScope": "阿里云百炼(DashScope)", + "platformVolces": "火山引擎(volces)", + "platformSiliconflow": "硅基流动(siliconflow CN)", + "platformBigmodel": "智谱AI(bigmodel CN)", + "platformDmxapiCN": "DMXAPI_CN", + "platformDmxapiGlobal": "DMXAPI_GLOBAL", + "platformJuguang": "聚光AI(juguang CN)", + "platformOpenRouter": "OpenRouter", + "platformLMStudio": "LM Studio", + "platformOllama": "Ollama", "baseUrlLabel": "API 地址 (Base URL)", "baseUrlPlaceholder": "OpenAi兼容地址", "getApiKeyTitle": "获取API Key", @@ -82,6 +104,17 @@ "systemProxyLabel": "启用系统代理", "translationSettingsTitleText": "翻译配置", "targetLanguageLabel": "目标语言", + "langZh": "中文(简体中文)", + "langEn": "英文(English)", + "langEs": "西班牙文(Español)", + "langFr": "法文(Français)", + "langDe": "德文(Deutsch)", + "langJa": "日文(日本語)", + "langKo": "韩文(한국어)", + "langRu": "俄文(Русский)", + "langPt": "葡萄牙文(Português)", + "langAr": "阿拉伯文(العَرَبِيَّة)", + "langVi": "越南文(tiếng Việt)", "targetLanguageCustom": "其它 (自定义)", "customLangPlaceholder": "请输入目标语言, 例如: Italian", "thinkingModeLabel": "思考模式", @@ -108,7 +141,7 @@ "glossaryGenConfigCustom": "自定义", "importConfigBtn": "导入配置", "exportConfigBtn": "导出配置", - "githubInfo": "GitHub主页(欢迎star❤):
\n https://github.com/xunbu/docutranslate", + "githubInfo": "GitHub主页(欢迎star❤):
\n https://github.com/xunbu/docutranslate", "qqGroupInfo": "交流QQ群: 1047781902", "taskListTitle": "任务列表", "newTaskBtn": "新建任务", @@ -127,7 +160,16 @@ "taskCardStartBtn": "开始翻译", "downloadMdEmbedded": "Markdown(嵌图)", "downloadMdZip": "Markdown压缩包", + "downloadTxt": "TXT", + "downloadJson": "JSON", + "downloadDocx": "DOCX", + "downloadXlsx": "XLSX", + "downloadCsv": "CSV", + "downloadSrt": "SRT", + "downloadEpub": "EPUB", "downloadAss": "ASS", + "downloadHtml": "HTML", + "downloadPdf": "PDF", "previewTitle": "预览", "previewBilingualBtn": "双语", "previewTranslatedOnlyBtn": "仅译文", @@ -152,10 +194,12 @@ "glossaryTableDestination": "译文 (dst)", "init_i18n_failed_alert": "加载界面翻译资源失败,请检查网络连接或联系管理员。", "init_failed_alert": "初始化失败,无法连接到后端服务。请检查服务是否运行或刷新页面。", - "status_selectFileFirst": "请先选择文件", - "status_fillRequired": "请填写所有必填项", + "apiHrefInfo302ai": "👈 通过此链接注册可享1美金免费额度", + "glossaryEmpty": "术语表为空。", + "status_selectFileFirst": "请先选择文件!", + "status_fillRequired": "请填写所有必填项!", "btn_initializing": "初始化中...", - "status_encodingAndSubmitting": "文件编码和提交中...", + "status_encodingAndSubmitting": "文件编码与提交中...", "status_requestOk": "请求成功,任务已开始", "btn_cancelTranslation": "取消翻译", "status_requestFail": "请求失败", @@ -163,35 +207,29 @@ "status_cancelling": "取消中...", "status_cancelSent": "已发送取消请求", "status_cancelFail": "取消失败", - "status_gettingStatus": "获取状态中...", "btn_reTranslate": "重新翻译", - "status_updateError": "状态更新失败", + "status_gettingStatus": "获取状态...", + "status_updateError": "状态更新出错", "preview_loading": "加载预览中...", - "preview_cantReadOriginal": "无法读取原文文件内容", + "preview_cantReadOriginal": "无法读取原始文件内容。", "preview_cantPreviewType": "无法预览此文件类型", - "preview_noOriginalCache": "无原文文件缓存,无法预览", - "preview_loadFailed": "预览加载失败", - "pdf_preparing": "PDF生成中,请稍候...", - "pdf_print_failed": "调用打印功能失败,请尝试手动右键打印", - "pdf_fetch_failed": "获取预览内容失败,无法生成PDF", + "preview_noOriginalCache": "未找到原始文件缓存,无法预览。", + "preview_loadFailed": "加载预览失败", + "pdf_preparing": "正在准备PDF...", + "pdf_print_failed": "调用打印功能失败。请尝试手动保存为PDF。", + "pdf_fetch_failed": "获取译文内容失败,无法生成PDF。", "preview_bilingual": "双语预览", "preview_translatedOnly": "仅译文预览", - "admin_tasklist_failed": "管理员模式:加载任务列表失败", + "admin_tasklist_failed": "管理员模式:加载任务列表失败。", "configImportSuccess": "配置导入成功!", - "configImportError": "配置导入失败,请检查文件格式是否正确。", - "apiHrefInfo302ai": "👈 通过此链接注册可享1美元免费额度", - "glossaryEmpty": "术语表为空", - "engineOptionIdentity": "已经是Markdown格式", - "engineOptionMineru": "Mineru(推荐)", - "engineOptionDocling": "Docling", - "engineOptionMineruDeploy": "Mineru部署服务" + "configImportError": "配置导入失败,文件格式错误。" }, "en": { "pageTitle": "DocuTranslate - Interactive Document Translation", "tutorialBtn": "Tutorial", - "projectContributeBtn": "Project Contribution", + "projectContributeBtn": "Contribute", "workflowTitle": "Select Workflow", - "workflowOptionMarkdown": "Convert to Markdown then Translate (.pdf/.md/.png, etc.)", + "workflowOptionMarkdown": "To Markdown then Translate (.pdf/.md/.png, etc.)", "workflowOptionTxt": "Plain Text Translation (.txt)", "workflowOptionEpub": "EPUB Translation (.epub)", "workflowOptionDocx": "DOCX Translation (.docx)", @@ -200,49 +238,57 @@ "workflowOptionAss": "ASS Subtitle Translation (.ass)", "workflowOptionJson": "JSON Translation (.json)", "workflowOptionHtml": "HTML Translation (.html)", - "autoWorkflowLabel": "Auto-select Workflow", + "autoWorkflowLabel": "Auto-select workflow", "txtSettingsTitleText": "TXT Translation Options", "insertModeLabel": "Insert Mode", - "insertModeReplace": "Replace Original (Replace)", - "insertModeAppend": "Append to Original (Append)", - "insertModePrepend": "Prepend to Original (Prepend)", + "insertModeReplace": "Replace Original", + "insertModeAppend": "Append to Original", + "insertModePrepend": "Prepend to Original", "insertModeHelpTxt": "Choose how to insert the translated text.", "separatorLabel": "Separator", "separatorPlaceholderSimple": "e.g., \\n---\\n", - "separatorHelp": "Character used to separate original and translated text in append or prepend mode. \\n represents a newline.", + "separatorHelp": "Character to separate original and translated text in append/prepend mode. \\n represents a newline.", + "segmentModeLabel": "Segment Mode", + "segmentModeLine": "By Line (Translate each line)", + "segmentModeParagraph": "By Paragraph (Merge consecutive lines)", + "segmentModeHelp": "Choose how to segment the text before translation.", "docxSettingsTitleText": "DOCX Translation Options", "insertModeHelpDocx": "Choose how to insert the translated text.", "separatorPlaceholderStructured": "e.g., ---", - "separatorHelpDocx": "In append mode, the translation will start a new paragraph. This is used to add extra content between the original and translated paragraphs. \\n can be used for newlines within the separator.", + "separatorHelpDocx": "In append mode, the translation will be in a new paragraph. This adds extra content between them. \\n for internal newlines.", "xlsxSettingsTitleText": "XLSX Translation Options", - "insertModeHelpXlsx": "Choose how to insert the translated text into cells.", - "xlsxTranslateRegionsLabel": "Translation Regions (Optional)", + "insertModeHelpXlsx": "Choose how to insert translated text into cells.", + "xlsxTranslateRegionsLabel": "Translate Regions (Optional)", "xlsxTranslateRegionsPlaceholder": "One region per line, e.g., Sheet1!A1:B10 (applies to all sheets if sheet name is omitted)", "srtSettingsTitleText": "SRT Translation Options", "insertModeHelpSrt": "Choose how to insert the translated text.", "epubSettingsTitleText": "EPUB Translation Options", "insertModeHelpEpub": "Choose how to insert the translated text.", - "separatorHelpEpub": "In append mode, the translation will start a new block. This is used to add extra content between the original and translated blocks. \\n can be used for newlines within the separator.", + "separatorHelpEpub": "In append mode, the translation will be a new block. This adds extra content between them. \\n for internal newlines.", "htmlSettingsTitleText": "HTML Translation Options", "insertModeHelpHtml": "Choose how to insert the translated text.", - "separatorHelpHtml": "In append mode, the translation will start a new block. This is used to add extra content between the original and translated blocks. \\n can be used for newlines within the separator.", + "separatorHelpHtml": "In append mode, the translation will be a new block. This adds extra content between them. \\n for internal newlines.", "assSettingsTitleText": "ASS Translation Options", "insertModeHelpAss": "Choose how to insert the translated text.", - "separatorPlaceholderAss": "e.g., \\N (newline character)", - "separatorHelpAss": "Character used to separate original and translated text in append or prepend mode. \\N is the newline character for the ASS format.", + "separatorPlaceholderAss": "e.g., \\N (newline)", + "separatorHelpAss": "Character to separate original and translated text in append/prepend mode. \\N is the newline for ASS format.", "jsonSettingsTitleText": "JSON Path Configuration", "jsonPathLabel": "JSON Paths to Translate", "jsonPathPlaceholder": "One path per line, e.g.:\n$.name\n$.*", - "jsonPathHelp": "Uses jsonpath-ng syntax. Each line represents a JSON path.\n All strings within the matched objects will be translated.", + "jsonPathHelp": "Uses jsonpath-ng syntax. Each line is a JSON path. All strings within matching objects will be translated.", "parsingSettingsTitleText": "Parsing Configuration", "parsingEngineLabel": "Parsing Engine", - "parsingEngineHelp": "If the uploaded file is already in .md format, this option is not required.", + "parsingEngineHelp": "Not required if the uploaded file is already in .md format.", + "engineOptionIdentity": "Already Markdown format", + "engineOptionMineru": "Mineru (Recommended)", + "engineOptionDocling": "Docling", + "engineOptionMineruDeploy": "Mineru Deploy Service", "getMineruTokenTitle": "Get Mineru Token", "mineruTokenPlaceholder": "Required when using Mineru engine", "modelVersionLabel": "Mineru Model Version", "modelVersionVlm": "VLM", "modelVersionPipline": "Pipeline", - "modelVersionHelp": "mineru VLM is a newer model in closed beta.", + "modelVersionHelp": "Mineru VLM is a newer internal test model.", "mineruDeployBaseUrlLabel": "Service Address (Base URL)", "mineruDeployBaseUrlPlaceholder": "e.g., http://127.0.0.1:8000", "mineruDeployBackendLabel": "Backend Type", @@ -259,9 +305,23 @@ "aiSettingsTitleText": "Translation Model", "skipTranslationLabel": "Skip Translation", "platformLabel": "Select Platform", - "platformCustom": "Custom API", + "platformCustom": "Custom Endpoint", + "platform302AI": "302.AI", + "platformOpenAI": "OpenAI", + "platformGemini": "Gemini", + "platformDeepSeek": "DeepSeek", + "platformDashScope": "DashScope (Aliyun)", + "platformVolces": "VolcEngine (volces)", + "platformSiliconflow": "SiliconFlow (CN)", + "platformBigmodel": "Zhipu AI (bigmodel CN)", + "platformDmxapiCN": "DMXAPI_CN", + "platformDmxapiGlobal": "DMXAPI_GLOBAL", + "platformJuguang": "Juguang AI (juguang CN)", + "platformOpenRouter": "OpenRouter", + "platformLMStudio": "LM Studio", + "platformOllama": "Ollama", "baseUrlLabel": "API Address (Base URL)", - "baseUrlPlaceholder": "OpenAI-compatible Address", + "baseUrlPlaceholder": "OpenAI-compatible address", "getApiKeyTitle": "Get API Key", "apiKeyPlaceholder": "Please enter your API Key", "modelIdLabel": "Model ID", @@ -269,33 +329,44 @@ "systemProxyLabel": "Enable System Proxy", "translationSettingsTitleText": "Translation Configuration", "targetLanguageLabel": "Target Language", + "langZh": "Chinese (Simplified)", + "langEn": "English", + "langEs": "Spanish", + "langFr": "French", + "langDe": "German", + "langJa": "Japanese", + "langKo": "Korean", + "langRu": "Russian", + "langPt": "Portuguese", + "langAr": "Arabic", + "langVi": "Vietnamese", "targetLanguageCustom": "Other (Custom)", "customLangPlaceholder": "Enter target language, e.g., Italian", "thinkingModeLabel": "Thinking Mode", - "thinkingModeTooltip": "Sets whether mixed-inference models should 'think'. Currently supported by Zhipu's glm4.5 series, Volcengine's seed1.6 series, SiliconFlow, Google's Gemini series, and 302AI (partial). Disabling thinking is recommended.", + "thinkingModeTooltip": "Sets whether the hybrid inference model should 'think'. Supported by Zhipu's glm4.5 series, VolcEngine's seed1.6 series, SiliconFlow, Google's Gemini series, and 302AI (partial). Disabling is recommended.", "thinkingModeEnable": "Enable", "thinkingModeDisable": "Disable (Recommended)", "thinkingModeDefault": "Default", "customPromptLabel": "Custom Prompt", - "customPromptPlaceholder": "Optional, e.g., 'Do not translate proper names.'", + "customPromptPlaceholder": "Optional, e.g., 'Do not translate personal names.'", "chunkSizeLabel": "Chunk Size", "resetBtn": "Reset", "concurrentLabel": "Concurrency", "retryLabel": "Retries", "glossaryGenTitle": "Glossary", "glossaryLabel": "Glossary (Optional)", - "glossaryHelp": "Select one or more CSV files. Files must contain 'src' and 'dst' headers, representing source and destination text respectively.", + "glossaryHelp": "Select one or more CSV files. Files must have 'src' and 'dst' columns for source and destination terms.", "viewGlossaryBtn": "View Glossary", "clearGlossaryBtn": "Clear", "glossaryGenEnableLabel": "Auto-generate Glossary", "glossaryCustomPromptLabel": "Custom Prompt", - "glossaryCustomPromptPlaceholder": "Glossary generation prompt", + "glossaryCustomPromptPlaceholder": "Prompt for glossary generation", "glossaryGenConfigLabel": "Glossary Generation Config", "glossaryGenConfigSame": "Same as Translation Config", "glossaryGenConfigCustom": "Custom", "importConfigBtn": "Import Config", "exportConfigBtn": "Export Config", - "githubInfo": "GitHub (star welcome❤):
\n https://github.com/xunbu/docutranslate", + "githubInfo": "GitHub Home (stars❤ welcome):
\n https://github.com/xunbu/docutranslate", "qqGroupInfo": "QQ Group: 1047781902", "taskListTitle": "Task List", "newTaskBtn": "New Task", @@ -303,10 +374,10 @@ "taskCardIdLabel": "Task ID", "taskCardIdPlaceholder": "Waiting for submission...", "taskCardFileDrop": "Click or drag file here", - "taskCardFileSelected": "File Selected", + "taskCardFileSelected": "File selected", "taskCardFilenameLabel": "Filename: ", - "taskCardLogLabel": "Log", - "copyLogsTooltip": "Copy Logs", + "taskCardLogLabel": "Logs", + "copyLogsTooltip": "Copy logs", "taskCardStatusWaiting": "Waiting for file upload...", "taskCardPreviewBtn": "Preview", "taskCardDownloadBtn": "Download", @@ -314,7 +385,16 @@ "taskCardStartBtn": "Start Translation", "downloadMdEmbedded": "Markdown (Embedded Images)", "downloadMdZip": "Markdown (Zip)", + "downloadTxt": "TXT", + "downloadJson": "JSON", + "downloadDocx": "DOCX", + "downloadXlsx": "XLSX", + "downloadCsv": "CSV", + "downloadSrt": "SRT", + "downloadEpub": "EPUB", "downloadAss": "ASS", + "downloadHtml": "HTML", + "downloadPdf": "PDF", "previewTitle": "Preview", "previewBilingualBtn": "Bilingual", "previewTranslatedOnlyBtn": "Translated Only", @@ -323,54 +403,50 @@ "previewTranslated": "Translated", "closeBtn": "Close", "downloadBtn": "Download", - "tutorialModalTitle": "Tutorial", - "tutorialModalBody": "

Video tutorials can be found by searching for docutranslate on Bilibili.

Welcome to DocuTranslate! Please follow these steps to translate your documents:

  1. Step 1: Select Workflow

    At the top of the left settings panel, first select the processing flow that best suits your file type.

    Tip: 'Auto-select Workflow' is enabled by default. Simply upload your file, and the system will automatically match the appropriate workflow for you, simplifying the process.

    • Convert to Markdown then Translate: Suitable for translating PDF, Markdown, images, etc. This is the most versatile and powerful mode.
    • Plain Text Translation: For translating .txt plain text files.
    • EPUB Translation: For translating .epub e-book files.
    • DOCX Translation: For translating .docx Word documents.
    • XLSX Translation: For translating .xlsx or .csv spreadsheet files.
    • SRT Subtitle Translation: For translating .srt subtitle files.
    • ASS Subtitle Translation: For translating .ass advanced subtitle files.
    • JSON Translation: For translating specific fields in .json files.
    • HTML Translation: For translating .html web page files.
  2. Step 2: Configure Parameters

    After selecting a workflow, the relevant configuration options will appear below. Please complete the settings in order (all configurations are automatically saved in your browser):

    A. Workflow-Specific Options (Appear based on your choice in Step 1):

    • If you selected 'Convert to Markdown then Translate', configure Parsing Configuration:
      • Parsing Engine: Choose an engine to convert your file (like a PDF) into a translation-friendly Markdown format. If your file is already in Markdown format, no selection is needed.
      • Mineru Token: If you choose the minerU engine, you need to enter your Token here.
    • If you selected 'Plain Text/DOCX/XLSX/SRT/ASS/EPUB/HTML', configure its Translation Options:
      • Insert Mode: Defines how the translation result is placed in the document. You can choose to directly 'Replace' the original, 'Append' after the original, or 'Prepend' before the original.
      • Separator: When 'Append' or 'Prepend' mode is selected, this is used to insert a separator between the original and translated text (e.g., \\N is often used in ASS format, <br /> in EPUB format as a newline separator).
    • If you selected 'JSON Translation', configure JSON Paths:
      • JSON Paths to Translate: Enter one JSONPath expression per line. It will translate all strings in objects matching the path. For example: $.* (translate all strings), $..description (translate all values with the key description).

    B. General Options (Apply to all workflows):

    • Translation Model:
      • Select Platform/API Address/API Key/Model ID: Configure the AI translation service you wish to use. The stronger the model's capabilities, the lower the probability of errors and missed translations.
      • Skip Translation: If checked, only document parsing and format conversion will be performed, without calling the AI for translation.
    • Translation Configuration:
      • Target Language: Specify the target language for the translation.
      • Custom Prompt: Optional, add extra instructions, such as 'Do not translate proper names.'
      • Thinking Mode: A setting for some models that support mixed-inference. It is recommended to select 'Disable (Recommended)'.
      • Chunk Size/Concurrency, etc.: Advanced parameters for adjusting performance and API request behavior. Usually, the default values are fine.
    • Glossary:
      • Upload Glossary (Optional): Upload a CSV file (must contain 'src' and 'dst' columns) to ensure consistency and accuracy of specific term translations.
      • Auto-generate Glossary: When enabled, the program will first extract terms from the original text to generate a glossary before proceeding with the translation.
  3. Step 3: Upload File

    In the task list on the right, click or drag your document into the file upload area.

  4. Step 4: Start Translation

    After the file is successfully selected, click the Start Translation button at the bottom right of the task card. The system will start processing the task, and you can view the real-time progress in the log area.

  5. Step 5: View and Download

    Once the translation is complete, action buttons will appear at the bottom of the task card:

    • Preview: Compare the original and translated text in a side-panel that slides out.
    • Download: Download the translated document in various formats, including PDF, DOCX, Markdown, etc.
    • Attachments: If any additional files were generated during the translation process (like an auto-generated glossary), they can be downloaded here.
Important Note: All configurations are automatically saved locally in your browser for future use. You can also use the new 'Export Config' and 'Import Config' buttons to back up and restore your settings.
", - "tutorialUnderstandBtn": "I Understand", + "tutorialModalTitle": "User Guide", + "tutorialModalBody": "

Video tutorials are available on Bilibili, search for docutranslate.

Welcome to DocuTranslate! Follow these steps to translate your document:

  1. Step 1: Select Workflow

    At the top of the left settings panel, choose the process that best suits your file type.

    Tip: 'Auto-select workflow' is enabled by default. Just upload your file, and the system will automatically match the appropriate workflow.

    • To Markdown then Translate: For PDF, markdown, images, etc. This is the most versatile and powerful mode.
    • Plain Text Translation: For .txt files.
    • EPUB Translation: For .epub e-books.
    • DOCX Translation: For .docx Word documents.
    • XLSX Translation: For .xlsx or .csv spreadsheets.
    • SRT Subtitle Translation: For .srt subtitle files.
    • ASS Subtitle Translation: For .ass advanced subtitle files.
    • JSON Translation: For specific fields in .json files.
    • HTML Translation: For .html web pages.
  2. Step 2: Configure Parameters

    After selecting a workflow, relevant options will appear below. Configure them sequentially (all settings are saved in your browser):

    A. Workflow-Specific Options (Appear based on your choice in Step 1):

    • If 'To Markdown then Translate' is selected, configure Parsing Configuration:
      • Parsing Engine: Choose an engine to convert your file (like a PDF) to a translation-friendly Markdown format. Not needed if your file is already Markdown.
      • Mineru Token: Required if you select the minerU engine.
    • If 'Plain Text/DOCX/XLSX/SRT/ASS/EPUB/HTML' is selected, configure its Translation Options:
      • Insert Mode: Defines how the translation is placed. You can 'Replace' the original, 'Append' after, or 'Prepend' before.
      • Separator: When using 'Append' or 'Prepend', this inserts a separator (e.g., \\N for ASS, <br /> for EPUB).
    • If 'JSON Translation' is selected, configure JSON Paths:
      • JSON Paths to Translate: Enter one JSONPath expression per line. All strings in matching objects will be translated. E.g., $.* (translate all), $..description (translate all 'description' values).

    B. General Options (Apply to all workflows):

    • Translation Model:
      • Select Platform/API Address/API Key/Model ID: Configure your desired AI translation service. Stronger models lead to fewer errors and omissions.
      • Skip Translation: If checked, only document parsing and format conversion will be performed, without calling the AI for translation.
    • Translation Configuration:
      • Target Language: Specify the language to translate to.
      • Custom Prompt: Optional, add extra instructions like 'Do not translate personal names.'
      • Thinking Mode: For certain models, 'Disable (Recommended)' is suggested.
      • Chunk Size/Concurrency, etc.: Advanced parameters for performance tuning; defaults are usually fine.
    • Glossary:
      • Upload Glossary (Optional): Upload a CSV file (with 'src' and 'dst' columns) to ensure consistent and accurate translation of specific terms.
      • Auto-generate Glossary: If enabled, the program will first extract terms from the source text to create a glossary, then proceed with the translation.
  3. Step 3: Upload File

    In the task list on the right, click or drag your document into the file upload area.

  4. Step 4: Start Translation

    After selecting a file, click the Start Translation button on the task card. The process will begin, and you can monitor real-time progress in the log area.

  5. Step 5: View and Download

    Once translation is complete, action buttons will appear on the task card:

    • Preview: Compare the original and translated text side-by-side in a slide-out panel.
    • Download: Download the translated document in various formats, including PDF, DOCX, and Markdown.
    • Attachments: Download any additional files generated during the process (like an auto-generated glossary) here.
Important: All configurations are automatically saved locally in your browser for future use. You can also use the 'Export Config' and 'Import Config' buttons to back up and restore your settings.
", + "tutorialUnderstandBtn": "I understand", "contributorsModalTitle": "Thanks for Contributing", "contributorsPara1": "DocuTranslate is an open-source project! The community's needs and usage are the driving force behind its progress.", - "contributorsPara2": "Thank you to everyone who has funded the project, submitted code, provided valuable suggestions, and starred the project!", - "contributorsWelcome": "We welcome contributions in the following ways:", - "contributorsGithub": "GitHub Homepage", + "contributorsPara2": "A heartfelt thank you to all the friends who have sponsored the project, submitted code, provided valuable suggestions, and starred the project!", + "contributorsWelcome": "You are welcome to contribute in the following ways:", + "contributorsGithub": "GitHub Home", "contributorsPR": "Submit a Pull Request", "contributorsIssue": "Report an Issue", - "contributorsQQ": "Or contact the author via QQ Group: 1047781902", + "contributorsQQ": "Or contact the author via QQ group: 1047781902", "glossaryModalTitle": "Current Glossary", "glossaryTableSource": "Source (src)", "glossaryTableDestination": "Destination (dst)", - "init_i18n_failed_alert": "Failed to load interface translations. Please check your network connection or contact an administrator.", - "init_failed_alert": "Initialization failed, could not connect to the backend service. Please ensure the service is running and refresh the page.", - "status_selectFileFirst": "Please select a file first", - "status_fillRequired": "Please fill in all required fields", + "init_i18n_failed_alert": "Failed to load interface translation resources. Please check your network connection or contact the administrator.", + "init_failed_alert": "Initialization failed. Could not connect to the backend service. Please check if the service is running and refresh the page.", + "apiHrefInfo302ai": "👈 Register through this link to get a $1 free credit", + "glossaryEmpty": "The glossary is empty.", + "status_selectFileFirst": "Please select a file first!", + "status_fillRequired": "Please fill in all required fields!", "btn_initializing": "Initializing...", "status_encodingAndSubmitting": "Encoding and submitting file...", - "status_requestOk": "Request successful, task has started", + "status_requestOk": "Request successful, task started", "btn_cancelTranslation": "Cancel Translation", "status_requestFail": "Request failed", "status_initFail": "Initialization failed", "status_cancelling": "Cancelling...", "status_cancelSent": "Cancellation request sent", "status_cancelFail": "Cancellation failed", - "status_gettingStatus": "Getting status...", "btn_reTranslate": "Re-translate", - "status_updateError": "Status update failed", + "status_gettingStatus": "Getting status...", + "status_updateError": "Status update error", "preview_loading": "Loading preview...", - "preview_cantReadOriginal": "Could not read original file content", + "preview_cantReadOriginal": "Could not read original file content.", "preview_cantPreviewType": "Cannot preview this file type", - "preview_noOriginalCache": "No original file cached, cannot preview", - "preview_loadFailed": "Preview failed to load", - "pdf_preparing": "Generating PDF, please wait...", - "pdf_print_failed": "Failed to invoke print function. Please try right-clicking to print manually.", - "pdf_fetch_failed": "Failed to fetch preview content, cannot generate PDF.", + "preview_noOriginalCache": "Original file cache not found, cannot preview.", + "preview_loadFailed": "Failed to load preview", + "pdf_preparing": "Preparing PDF...", + "pdf_print_failed": "Failed to call print function. Please try saving to PDF manually.", + "pdf_fetch_failed": "Failed to fetch translated content, cannot generate PDF.", "preview_bilingual": "Bilingual Preview", "preview_translatedOnly": "Translated Only Preview", "admin_tasklist_failed": "Admin mode: Failed to load task list.", "configImportSuccess": "Configuration imported successfully!", - "configImportError": "Configuration import failed. Please check if the file format is correct.", - "apiHrefInfo302ai": "👈 Register through this link to enjoy a $1 free credit", - "glossaryEmpty": "Glossary is empty", - "engineOptionIdentity": "Already in Markdown format", - "engineOptionMineru": "Mineru (Recommended)", - "engineOptionDocling": "Docling", - "engineOptionMineruDeploy": "Mineru Deploy Service" + "configImportError": "Failed to import configuration, file format is incorrect." } } \ No newline at end of file diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index a0d3870..bcbf3e5 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。

Base URL:

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:

任务列表

LOGO

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file + DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。

Base URL:

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:

任务列表

LOGO

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file diff --git a/docutranslate/translator/ai_translator/txt_translator.py b/docutranslate/translator/ai_translator/txt_translator.py index da8b2ea..e09ee9e 100644 --- a/docutranslate/translator/ai_translator/txt_translator.py +++ b/docutranslate/translator/ai_translator/txt_translator.py @@ -24,15 +24,21 @@ class TXTTranslatorConfig(AiTranslatorConfig): separator (str): 在 "append" 或 "prepend" 模式下,用于分隔原文和译文的字符串。 默认为换行符 "\n"。 + segment_mode (Literal["line", "paragraph"]): + 分段模式。 + - "line": 按行分段(每行独立翻译) + - "paragraph": 按段落分段(连续非空行合并为段落) + 默认为 "paragraph"。 """ insert_mode: Literal["replace", "append", "prepend"] = "replace" separator: str = "\n" + segment_mode: Literal["line", "paragraph"] = "line" class TXTTranslator(AiTranslator): """ 一个用于翻译纯文本 (.txt) 文件的翻译器。 - 它会按行读取文件内容,对每一行进行翻译,然后根据配置将译文写回。 + 支持按行或按段落两种分段模式进行翻译。 """ def __init__(self, config: TXTTranslatorConfig): @@ -40,7 +46,7 @@ class TXTTranslator(AiTranslator): 初始化 TXTTranslator。 Args: - config (TxtTranslatorConfig): 翻译器的配置。 + config (TXTTranslatorConfig): 翻译器的配置。 """ super().__init__(config=config) self.chunk_size = config.chunk_size @@ -64,16 +70,17 @@ class TXTTranslator(AiTranslator): self.translate_agent = SegmentsTranslateAgent(agent_config) self.insert_mode = config.insert_mode self.separator = config.separator + self.segment_mode = config.segment_mode def _pre_translate(self, document: Document) -> List[str]: """ - 预处理步骤:解析TXT文件,按行分割文本。 + 预处理步骤:根据分段模式解析TXT文件。 Args: document (Document): 待处理的文档对象。 Returns: - List[str]: 待翻译的原文文本行列表。 + List[str]: 分段后的文本列表。 """ try: # 使用 utf-8-sig 解码以处理可能存在的BOM (Byte Order Mark) @@ -82,49 +89,146 @@ class TXTTranslator(AiTranslator): self.logger.error(f"无法解码TXT文件内容,请确保文件编码为UTF-8: {e}") return [] - # 按行分割文本,并保留空行,因为它们可能是格式的一部分 - original_texts = txt_content.splitlines() + if self.segment_mode == "line": + return self._segment_by_line(txt_content) + else: # paragraph mode + return self._segment_by_paragraph(txt_content) - return original_texts + def _segment_by_line(self, txt_content: str) -> List[str]: + """ + 按行分段模式:每行作为独立分段。 + + Args: + txt_content (str): 文本内容 + + Returns: + List[str]: 按行分段的文本列表 + """ + # 简单按行分割,保留所有行(包括空行) + return txt_content.splitlines() + + def _segment_by_paragraph(self, txt_content: str) -> List[str]: + """ + 按段落分段模式:连续非空行合并为段落,空行单独处理。 + + Args: + txt_content (str): 文本内容 + + Returns: + List[str]: 按段落分段的文本列表 + """ + lines = txt_content.splitlines() + segments = [] # 每个元素要么是文本段落,要么是空行标记 + + i = 0 + while i < len(lines): + if lines[i].strip(): # 非空行 → 文本段落 + # 收集连续的非空行 + paragraph_lines = [] + while i < len(lines) and lines[i].strip(): + paragraph_lines.append(lines[i]) + i += 1 + segments.append("\n".join(paragraph_lines)) + else: # 空行 → 空行标记 + # 收集连续的空行 + empty_lines = [] + while i < len(lines) and not lines[i].strip(): + empty_lines.append(lines[i]) + i += 1 + # 用特殊标记表示空行组(保持数量信息) + segments.append(f"@@EMPTY_LINES_{len(empty_lines)}@@") + + return segments def _after_translate(self, translated_texts: List[str], original_texts: List[str]) -> bytes: """ - 翻译后处理步骤:将译文根据配置模式与原文合并,并生成新的TXT文件内容。 + 翻译后处理步骤:根据分段模式重建文档。 Args: - translated_texts (List[str]): 翻译后的文本行列表。 - original_texts (List[str]): 原始文本行列表。 + translated_texts (List[str]): 翻译后的文本列表。 + original_texts (List[str]): 原始文本列表。 Returns: bytes: 新的TXT文件内容的字节流。 """ + if self.segment_mode == "line": + return self._reconstruct_by_line(translated_texts, original_texts) + else: # paragraph mode + return self._reconstruct_by_paragraph(translated_texts, original_texts) + + def _reconstruct_by_line(self, translated_texts: List[str], original_lines: List[str]) -> bytes: + """ + 按行模式重建文档。 + + Args: + translated_texts (List[str]): 翻译后的行列表 + original_lines (List[str]): 原始行列表 + + Returns: + bytes: 重建的文档内容 + """ processed_lines = [] - for i, original_text in enumerate(original_texts): - # 如果原文是空行或仅包含空白字符,则直接保留,不进行翻译处理 - if not original_text.strip(): - processed_lines.append(original_text) + for i, original_line in enumerate(original_lines): + # 如果是空行,直接保留 + if not original_line.strip(): + processed_lines.append(original_line) continue - translated_text = translated_texts[i] + translated_line = translated_texts[i] # 根据插入模式更新内容 if self.insert_mode == "replace": - processed_lines.append(translated_text) + processed_lines.append(translated_line) elif self.insert_mode == "append": - # strip() 避免在原文和译文间产生多余的空白 - processed_lines.append(original_text.strip() + self.separator + translated_text.strip()) + processed_lines.append(original_line.strip() + self.separator + translated_line.strip()) elif self.insert_mode == "prepend": - processed_lines.append(translated_text.strip() + self.separator + original_text.strip()) + processed_lines.append(translated_line.strip() + self.separator + original_line.strip()) else: - self.logger.error(f"不正确的TxtTranslatorConfig参数: insert_mode='{self.insert_mode}'") - # 默认回退到替换模式,避免程序中断 - processed_lines.append(translated_text) + self.logger.error(f"不正确的insert_mode参数: '{self.insert_mode}'") + processed_lines.append(translated_line) - # 将所有处理后的行重新合成为一个字符串,以换行符分隔 - new_txt_content_str = "\n".join(processed_lines) + return "\n".join(processed_lines).encode('utf-8') - # 返回UTF-8编码的字节流 - return new_txt_content_str.encode('utf-8') + def _reconstruct_by_paragraph(self, translated_texts: List[str], original_segments: List[str]) -> bytes: + """ + 按段落模式重建文档。 + + Args: + translated_texts (List[str]): 翻译后的段落列表 + original_segments (List[str]): 原始分段列表 + + Returns: + bytes: 重建的文档内容 + """ + result_lines = [] + translated_index = 0 + + for segment in original_segments: + # 处理空行组 + if segment.startswith("@@EMPTY_LINES_"): + empty_count = int(segment.split('_')[-2]) # 提取空行数量 + result_lines.extend([""] * empty_count) + continue + + # 处理文本段落 + if translated_index < len(translated_texts): + translated_text = translated_texts[translated_index] + translated_index += 1 + + # 根据插入模式处理 + if self.insert_mode == "replace": + result_lines.append(translated_text) + elif self.insert_mode == "append": + result_lines.append(segment + self.separator + translated_text) + elif self.insert_mode == "prepend": + result_lines.append(translated_text + self.separator + segment) + else: + result_lines.append(translated_text) + else: + # 理论上不会发生,但安全处理 + result_lines.append(segment) + + return "\n".join(result_lines).encode('utf-8') def translate(self, document: Document) -> Self: """ @@ -136,14 +240,17 @@ class TXTTranslator(AiTranslator): Returns: Self: 返回翻译器实例,以支持链式调用。 """ - original_texts = self._pre_translate(document) + original_segments = self._pre_translate(document) - if not original_texts: + if not original_segments: self.logger.info("\n文件中没有找到需要翻译的文本内容。") return self - # 过滤掉仅包含空白字符的行,避免不必要的翻译API调用 - texts_to_translate = [text for text in original_texts if text.strip()] + # 过滤出需要翻译的文本段(非空行标记) + if self.segment_mode == "line": + texts_to_translate = [text for text in original_segments if text.strip()] + else: # paragraph mode + texts_to_translate = [text for text in original_segments if not text.startswith("@@EMPTY_LINES_")] # --- 步骤 1: (可选) 术语提取 --- if self.glossary_agent and texts_to_translate: @@ -157,11 +264,26 @@ class TXTTranslator(AiTranslator): translated_segments = self.translate_agent.send_segments(texts_to_translate, self.chunk_size) translated_texts_map = dict(zip(texts_to_translate, translated_segments)) - # 将翻译结果映射回原始行列表,非翻译行保持不变 - final_translated_texts = [translated_texts_map.get(text, text) for text in original_texts] + # 将翻译结果映射回原始分段列表 + final_translated_texts = [] + for segment in original_segments: + if self.segment_mode == "line": + # 行模式:空行保留,非空行翻译 + if segment.strip() and segment in translated_texts_map: + final_translated_texts.append(translated_texts_map[segment]) + else: + final_translated_texts.append(segment) + else: + # 段落模式:空行标记保留,文本段落翻译 + if segment.startswith("@@EMPTY_LINES_"): + final_translated_texts.append(segment) # 空行标记原样保留 + elif segment in translated_texts_map: + final_translated_texts.append(translated_texts_map[segment]) + else: + final_translated_texts.append(segment) # --- 步骤 3: 后处理并更新文档内容 --- - document.content = self._after_translate(final_translated_texts, original_texts) + document.content = self._after_translate(final_translated_texts, original_segments) return self async def translate_async(self, document: Document) -> Self: @@ -175,14 +297,17 @@ class TXTTranslator(AiTranslator): Self: 返回翻译器实例,以支持链式调用。 """ # I/O密集型操作在线程中运行 - original_texts = await asyncio.to_thread(self._pre_translate, document) + original_segments = await asyncio.to_thread(self._pre_translate, document) - if not original_texts: + if not original_segments: self.logger.info("\n文件中没有找到需要翻译的文本内容。") return self - # 过滤掉仅包含空白字符的行 - texts_to_translate = [text for text in original_texts if text.strip()] + # 过滤出需要翻译的文本段 + if self.segment_mode == "line": + texts_to_translate = [text for text in original_segments if text.strip()] + else: # paragraph mode + texts_to_translate = [text for text in original_segments if not text.startswith("@@EMPTY_LINES_")] # --- 步骤 1: (可选) 术语提取 (异步) --- if self.glossary_agent and texts_to_translate: @@ -196,11 +321,24 @@ class TXTTranslator(AiTranslator): translated_segments = await self.translate_agent.send_segments_async(texts_to_translate, self.chunk_size) translated_texts_map = dict(zip(texts_to_translate, translated_segments)) - # 将翻译结果映射回原始行列表 - final_translated_texts = [translated_texts_map.get(text, text) for text in original_texts] + # 将翻译结果映射回原始分段列表 + final_translated_texts = [] + for segment in original_segments: + if self.segment_mode == "line": + if segment.strip() and segment in translated_texts_map: + final_translated_texts.append(translated_texts_map[segment]) + else: + final_translated_texts.append(segment) + else: + if segment.startswith("@@EMPTY_LINES_"): + final_translated_texts.append(segment) + elif segment in translated_texts_map: + final_translated_texts.append(translated_texts_map[segment]) + else: + final_translated_texts.append(segment) # --- 步骤 3: 后处理并更新文档内容 (I/O密集型) --- document.content = await asyncio.to_thread( - self._after_translate, final_translated_texts, original_texts + self._after_translate, final_translated_texts, original_segments ) return self \ No newline at end of file