diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json index 8f9a21c..5bf41e3 100644 --- a/docutranslate/static/i18nData.json +++ b/docutranslate/static/i18nData.json @@ -38,6 +38,8 @@ "separatorHelpEpub": "当插入模式为附加或前置时,用于分隔原文和译文的字符。<br /> 是EPUB (HTML) 中的换行标签。", "htmlSettingsTitleText": "HTML翻译选项", "insertModeHelpHtml": "选择如何将翻译后的文本插入。", + "separatorPlaceholderHtml": "例如:
(换行标签)", + "separatorHelpHtml": "当插入模式为附加或前置时,用于分隔原文和译文的字符。<br> 是HTML中的换行标签。", "assSettingsTitleText": "ASS翻译选项", "insertModeHelpAss": "选择如何将翻译后的文本插入。", "separatorPlaceholderAss": "例如: \\N (换行符)", @@ -53,7 +55,7 @@ "mineruTokenPlaceholder": "使用Mineru引擎时需要", "modelVersionLabel": "Mineru 模型版本", "modelVersionVlm": "VLM", - "modelVersionPipeline": "Pipeline", + "modelVersionPipline": "Pipeline", "modelVersionHelp": "mineru VLM是更新的内测模型。", "formulaOcrLabel": "公式识别", "codeOcrLabel": "代码识别", @@ -105,7 +107,7 @@ "taskCardIdPlaceholder": "等待提交...", "taskCardFileDrop": "点击或拖拽文件到此处", "taskCardFileSelected": "文件已选择", - "taskCardFilenameLabel": "文件名: ", + "taskCardFilenameLabel": "文件名:", "taskCardLogLabel": "日志", "copyLogsTooltip": "复制日志", "taskCardStatusWaiting": "等待上传文件...", @@ -124,7 +126,7 @@ "closeBtn": "关闭", "downloadBtn": "下载", "tutorialModalTitle": "使用教程", - "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 第一步:选择工作流

    在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。

    提示: 默认已开启“自动选择工作流”。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。

  2. 第二步:配置参数

    选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):

    A. 工作流特定选项 (根据您第一步的选择出现):

    B. 通用选项 (适用于所有工作流):

  3. 第三步:上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 第四步:开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 第五步:查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

重要提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。您也可以使用新增的“导出配置”和“导入配置”按钮来备份和恢复您的设置。
", + "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 第一步:选择工作流

    在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。

    提示: 默认已开启“自动选择工作流”。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。

  2. 第二步:配置参数

    选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):

    A. 工作流特定选项 (根据您第一步的选择出现):

    B. 通用选项 (适用于所有工作流):

  3. 第三步:上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 第四步:开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 第五步:查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

重要提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。您也可以使用新增的“导出配置”和“导入配置”按钮来备份和恢复您的设置。
", "tutorialUnderstandBtn": "我明白了", "contributorsModalTitle": "感谢贡献", "contributorsPara1": "DocuTranslate是一个开源项目!大家的需求与使用是项目进步的动力。", @@ -139,45 +141,45 @@ "glossaryTableDestination": "译文 (dst)", "init_i18n_failed_alert": "加载界面翻译资源失败,请检查网络连接或联系管理员。", "init_failed_alert": "初始化失败,无法连接到后端服务。请检查服务是否运行或刷新页面。", - "apiHrefInfo302ai": "👈通过此链接注册可享1美元免费额度", "glossaryEmpty": "术语表为空。", - "engineOptionIdentity": "已经是markdown格式", - "engineOptionMineru": "Mineru(推荐)", - "engineOptionDocling": "Docling(本地解析)", - "status_selectFileFirst": "请先选择文件。", - "status_fillRequired": "请填写所有必填项。", - "btn_initializing": "初始化...", + "engineOptionIdentity": "已经是Markdown格式", + "engineOptionMineru": "Mineru (推荐)", + "engineOptionDocling": "Docling (本地)", + "apiHrefInfo302ai": "👈 通过此链接注册可享1美元免费额度", + "status_selectFileFirst": "请先选择一个文件!", + "status_fillRequired": "请填写所有必填项!", + "btn_initializing": "初始化中...", "status_encodingAndSubmitting": "文件编码和提交中...", "status_requestOk": "请求成功,任务已开始。", "btn_cancelTranslation": "取消翻译", "status_requestFail": "请求失败", - "status_initFail": "任务初始化失败", + "status_initFail": "初始化任务失败", "status_cancelling": "取消中...", - "status_cancelSent": "取消请求已发送。", - "status_cancelFail": "取消失败。", + "status_cancelSent": "已发送取消请求。", + "status_cancelFail": "取消失败", "status_gettingStatus": "获取状态中...", "btn_reTranslate": "重新翻译", "status_updateError": "状态更新失败。", - "preview_loading": "正在加载预览...", - "preview_cantReadOriginal": "无法读取原始文件内容。", - "preview_cantPreviewType": "无法预览此文件类型", - "preview_noOriginalCache": "无原始文件缓存可供预览。", - "preview_loadFailed": "预览加载失败。", "pdf_preparing": "正在准备PDF...", - "pdf_print_failed": "打印PDF失败,请尝试手动从预览中打印或下载HTML。", - "pdf_fetch_failed": "获取翻译内容失败,无法生成PDF。", - "preview_bilingual": "双语预览", + "pdf_print_failed": "启动打印失败,请尝试手动右键打印预览内容。", + "pdf_fetch_failed": "获取预览内容失败,无法生成PDF。", + "preview_loading": "正在加载预览...", + "preview_cantReadOriginal": "无法读取原文文件内容进行预览。", + "preview_cantPreviewType": "无法预览此文件类型", + "preview_noOriginalCache": "无原文文件缓存,无法预览。", + "preview_loadFailed": "加载预览失败。", + "preview_bilingual": "双语对照预览", "preview_translatedOnly": "仅译文预览", "admin_tasklist_failed": "管理员模式:加载任务列表失败。", - "configImportSuccess": "配置导入成功!", - "configImportError": "配置导入失败,文件格式错误。" + "configImportSuccess": "配置已成功导入!", + "configImportError": "导入配置失败,文件格式可能不正确。" }, "en": { "pageTitle": "DocuTranslate - Interactive Document Translation", "tutorialBtn": "Tutorial", "projectContributeBtn": "Project Collaboration", "workflowTitle": "Select Workflow", - "workflowOptionMarkdown": "Convert to Markdown then Translate (.pdf/.md/.png, etc.)", + "workflowOptionMarkdown": "Convert to Markdown then Translate (.pdf/.md/.png etc.)", "workflowOptionTxt": "Plain Text Translation (.txt)", "workflowOptionEpub": "EPUB Translation (.epub)", "workflowOptionDocx": "DOCX Translation (.docx)", @@ -195,49 +197,51 @@ "insertModeHelpTxt": "Choose how to insert the translated text.", "separatorLabel": "Separator", "separatorPlaceholderSimple": "e.g., \\n---\\n", - "separatorHelp": "Characters used to separate the original and translated text in append or prepend mode. \\n represents a newline.", + "separatorHelp": "Characters used to separate the original and translated text in append/prepend mode. \\n represents a newline.", "docxSettingsTitleText": "DOCX Translation Options", "insertModeHelpDocx": "Choose how to insert the translated text.", "separatorPlaceholder": "e.g., \\n---Translation---\\n", "xlsxSettingsTitleText": "XLSX Translation Options", "insertModeHelpXlsx": "Choose how to insert the translated text into cells.", "xlsxTranslateRegionsLabel": "Translation Regions (Optional)", - "xlsxTranslateRegionsPlaceholder": "One region per line, e.g., Sheet1!A1:B10 (applies to all sheets if sheet name is omitted).", + "xlsxTranslateRegionsPlaceholder": "One region per line, e.g., Sheet1!A1:B10 (applies to all sheets if sheet name is omitted)", "srtSettingsTitleText": "SRT Translation Options", "insertModeHelpSrt": "Choose how to insert the translated text.", "epubSettingsTitleText": "EPUB Translation Options", "insertModeHelpEpub": "Choose how to insert the translated text.", "separatorPlaceholderEpub": "e.g.,
(line break tag)", - "separatorHelpEpub": "Characters used to separate original and translated text in append or prepend mode. <br /> is the line break tag in EPUB (HTML).", + "separatorHelpEpub": "Characters used to separate the original and translated text in append/prepend mode. <br /> is the line break tag in EPUB (HTML).", "htmlSettingsTitleText": "HTML Translation Options", "insertModeHelpHtml": "Choose how to insert the translated text.", + "separatorPlaceholderHtml": "e.g.,
(line break tag)", + "separatorHelpHtml": "Characters used to separate the original and translated text in append/prepend mode. <br> is the line break tag in HTML.", "assSettingsTitleText": "ASS Translation Options", "insertModeHelpAss": "Choose how to insert the translated text.", "separatorPlaceholderAss": "e.g., \\N (newline character)", - "separatorHelpAss": "Characters used to separate original and translated text in append or prepend mode. \\N is the newline character in ASS format.", + "separatorHelpAss": "Characters used to separate the original and translated text in append/prepend mode. \\N is the newline character for the ASS format.", "jsonSettingsTitleText": "JSON Path Configuration", "jsonPathLabel": "JSON Paths to Translate", - "jsonPathPlaceholder": "One path per line, e.g.:\n$.name\n$.*", - "jsonPathHelp": "Uses jsonpath-ng syntax. Each line represents a JSON path. All strings within matching objects will be translated.", + "jsonPathPlaceholder": "One path per line, e.g.,\n$.name\n$.*", + "jsonPathHelp": "Uses jsonpath-ng syntax. One JSON path per line. All strings within the matched objects will be translated.", "parsingSettingsTitleText": "Parsing Configuration", "parsingEngineLabel": "Parsing Engine", - "parsingEngineHelp": "This can be omitted if the uploaded file is already in .md format.", + "parsingEngineHelp": "This option can be skipped if the uploaded file is already in .md format.", "getMineruTokenTitle": "Get Mineru Token", "mineruTokenPlaceholder": "Required when using the Mineru engine", "modelVersionLabel": "Mineru Model Version", "modelVersionVlm": "VLM", - "modelVersionPipeline": "Pipeline", - "modelVersionHelp": "mineru VLM is a newer model in internal testing.", + "modelVersionPipline": "Pipeline", + "modelVersionHelp": "Mineru VLM is a newer model in beta testing.", "formulaOcrLabel": "Formula Recognition", "codeOcrLabel": "Code Recognition", "aiSettingsTitleText": "Translation Model", "skipTranslationLabel": "Skip Translation", "platformLabel": "Select Platform", - "platformCustom": "Custom API", + "platformCustom": "Custom Endpoint", "baseUrlLabel": "API Address (Base URL)", - "baseUrlPlaceholder": "OpenAI-compatible Address", + "baseUrlPlaceholder": "OpenAI-compatible address", "getApiKeyTitle": "Get API Key", - "apiKeyPlaceholder": "Please enter your API Key", + "apiKeyPlaceholder": "Enter your API Key", "modelIdLabel": "Model ID", "modelIdPlaceholder": "e.g., gpt-4o, glm-4", "systemProxyLabel": "Enable System Proxy", @@ -246,39 +250,38 @@ "targetLanguageCustom": "Other (Custom)", "customLangPlaceholder": "Enter target language, e.g., Italian", "thinkingModeLabel": "Thinking Mode", - "thinkingModeTooltip": "Set whether the hybrid inference model should 'think'. Currently supported by Zhipu's glm4.5 series, Volcengine's seed1.6 series, SiliconFlow, Google's Gemini series, and 302AI (partially). Disabling 'thinking' is recommended.", + "thinkingModeTooltip": "Sets whether the mixed-inference model should perform 'thinking'. Currently supported by Zhipu's glm4.5 series, Volcengine's seed1.6 series, SiliconFlow platform, Google's Gemini series, and 302AI (partial). Disabling is recommended.", "thinkingModeEnable": "Enable", "thinkingModeDisable": "Disable (Recommended)", "thinkingModeDefault": "Default", "customPromptLabel": "Custom Prompt", - "customPromptPlaceholder": "Optional, e.g., 'Keep proper names in their original language.'", + "customPromptPlaceholder": "Optional, e.g., 'Do not translate personal names'", "chunkSizeLabel": "Chunk Size", "resetBtn": "Reset", "concurrentLabel": "Concurrency", "retryLabel": "Retries", "glossaryGenTitle": "Glossary", "glossaryLabel": "Glossary (Optional)", - "glossaryHelp": "Select one or more CSV files. Files must contain 'src' and 'dst' headers, representing the source and destination terms, respectively.", - "viewGlossaryBtn": "View Glossary", + "glossaryHelp": "Select one or more CSV files. Files must contain 'src' and 'dst' headers for source and destination terms.", "clearGlossaryBtn": "Clear", "glossaryGenEnableLabel": "Auto-generate Glossary", "glossaryCustomPromptLabel": "Custom Prompt", - "glossaryCustomPromptPlaceholder": "Glossary generation prompt", + "glossaryCustomPromptPlaceholder": "Prompt for glossary generation", "glossaryGenConfigLabel": "Glossary Generation Config", "glossaryGenConfigSame": "Same as Translation Config", "glossaryGenConfigCustom": "Custom", "importConfigBtn": "Import Config", "exportConfigBtn": "Export Config", - "githubInfo": "GitHub (stars❤ welcome):
https://github.com/xunbu/docutranslate", + "githubInfo": "GitHub Home (stars welcome❤):
https://github.com/xunbu/docutranslate", "qqGroupInfo": "QQ Group: 1047781902", "taskListTitle": "Task List", "newTaskBtn": "New Task", "noTaskPlaceholder": "No tasks yet. Click 'New Task' to get started!", "taskCardIdLabel": "Task ID", - "taskCardIdPlaceholder": "Pending submission...", + "taskCardIdPlaceholder": "Waiting for submission...", "taskCardFileDrop": "Click or drag file here", "taskCardFileSelected": "File selected", - "taskCardFilenameLabel": "Filename: ", + "taskCardFilenameLabel": "Filename:", "taskCardLogLabel": "Logs", "copyLogsTooltip": "Copy logs", "taskCardStatusWaiting": "Waiting for file upload...", @@ -286,63 +289,64 @@ "taskCardDownloadBtn": "Download", "taskCardAttachmentBtn": "Attachments", "taskCardStartBtn": "Start Translation", - "downloadMdEmbedded": "Markdown (Embedded)", - "downloadMdZip": "Markdown (Zip)", + "downloadMdEmbedded": "Markdown(Embedded)", + "downloadMdZip": "Markdown Zip", "downloadAss": "ASS", "previewTitle": "Preview", "previewBilingualBtn": "Bilingual", "previewTranslatedOnlyBtn": "Translated Only", "previewOriginal": "Original", - "previewTranslated": "Translation", + "previewTranslated": "Translated", "closeBtn": "Close", "downloadBtn": "Download", "tutorialModalTitle": "User Guide", - "tutorialModalBody": "

Video tutorials can be found by searching for docutranslate on Bilibili.

Welcome to DocuTranslate! Please follow these steps to translate your document:

  1. Step 1: Select Workflow

    At the top of the left settings panel, first choose the processing flow that best suits your file type.

    Tip: \"Auto-select Workflow\" is enabled by default. Simply upload your file, and the system will automatically match the appropriate workflow for you, simplifying the process.

  2. Step 2: Configure Parameters

    After selecting a workflow, relevant configuration options will appear below. Please complete the settings in order (all configurations are automatically saved in your browser):

    A. Workflow-Specific Options (Appear based on your choice in Step 1):

    B. General Options (Apply to all workflows):

  3. Step 3: Upload File

    In the task list on the right, click or drag your document into the file upload area.

  4. Step 4: Start Translation

    Once the file is selected, click the Start Translation button on the bottom right of the task card. The system will begin processing, and you can view real-time progress in the log area.

  5. Step 5: View & Download

    After the translation is complete, action buttons will appear on the task card:

Important Note: All settings are automatically saved locally in your browser for future use. You can also use the new \"Export Config\" and \"Import Config\" buttons to back up and restore your settings.
", + "tutorialModalBody": "

Video tutorials are available on Bilibili by searching for docutranslate.

Welcome to DocuTranslate! Follow these steps to translate your documents:

  1. Step 1: Choose a Workflow

    At the top of the left-side panel, select the processing flow that best suits your file type.

    Tip: 'Auto-select Workflow' is enabled by default. Simply upload your file, and the system will automatically choose the appropriate workflow for you.

  2. Step 2: Configure Parameters

    After selecting a workflow, relevant configuration options will appear below. Please set them up as needed (all settings are automatically saved in your browser):

    A. Workflow-Specific Options (Appear based on your choice in Step 1):

    B. General Options (Apply to all workflows):

  3. Step 3: Upload File

    In the task list on the right, click or drag your document into the file upload area.

  4. Step 4: Start Translation

    Once the file is selected, click the Start Translation button on the task card. The system will begin processing, and you can monitor progress in the log area.

  5. Step 5: View and Download

    After the translation is complete, action buttons will appear on the task card:

Important Note: All settings are automatically saved in your browser's local storage for future use. You can also use the 'Export Config' and 'Import Config' buttons to back up and restore your settings.
", "tutorialUnderstandBtn": "I Understand", "contributorsModalTitle": "Thanks for Contributing", - "contributorsPara1": "DocuTranslate is an open-source project! The needs and usage from the community are what drive its progress.", - "contributorsPara2": "Thank you to everyone who has sponsored the project, submitted code, provided valuable suggestions, and starred the project!", - "contributorsWelcome": "We welcome contributions in the following ways:", + "contributorsPara1": "DocuTranslate is an open-source project! The needs and usage of the community are the driving force behind its progress.", + "contributorsPara2": "A heartfelt thank you to everyone who has sponsored the project, submitted code, provided valuable suggestions, and starred the project!", + "contributorsWelcome": "You are welcome to contribute in the following ways:", "contributorsGithub": "GitHub Home", "contributorsPR": "Submit a Pull Request", "contributorsIssue": "Report an Issue", - "contributorsQQ": "Or contact the author via QQ group: 1047781902", + "contributorsQQ": "Or contact the author via QQ Group: 1047781902", "glossaryModalTitle": "Current Glossary", "glossaryTableSource": "Source (src)", "glossaryTableDestination": "Destination (dst)", "init_i18n_failed_alert": "Failed to load interface translations. Please check your network connection or contact an administrator.", "init_failed_alert": "Initialization failed, could not connect to the backend service. Please ensure the service is running and refresh the page.", - "apiHrefInfo302ai": "👈 Register through this link to enjoy a $1 free credit", - "glossaryEmpty": "Glossary is empty.", + "viewGlossaryBtn": "View Glossary", + "glossaryEmpty": "The glossary is empty.", "engineOptionIdentity": "Already in Markdown format", "engineOptionMineru": "Mineru (Recommended)", - "engineOptionDocling": "Docling (Local Parsing)", - "status_selectFileFirst": "Please select a file first.", - "status_fillRequired": "Please fill in all required fields.", + "engineOptionDocling": "Docling (Local)", + "apiHrefInfo302ai": "👈 Register through this link to enjoy a $1 free credit", + "status_selectFileFirst": "Please select a file first!", + "status_fillRequired": "Please fill in all required fields!", "btn_initializing": "Initializing...", "status_encodingAndSubmitting": "Encoding and submitting file...", "status_requestOk": "Request successful, task has started.", "btn_cancelTranslation": "Cancel Translation", "status_requestFail": "Request failed", - "status_initFail": "Task initialization failed", + "status_initFail": "Failed to initialize task", "status_cancelling": "Cancelling...", "status_cancelSent": "Cancellation request sent.", - "status_cancelFail": "Cancellation failed.", + "status_cancelFail": "Cancellation failed", "status_gettingStatus": "Getting status...", "btn_reTranslate": "Re-translate", "status_updateError": "Status update failed.", - "preview_loading": "Loading preview...", - "preview_cantReadOriginal": "Could not read original file content.", - "preview_cantPreviewType": "Cannot preview this file type", - "preview_noOriginalCache": "No original file cache available for preview.", - "preview_loadFailed": "Preview failed to load.", "pdf_preparing": "Preparing PDF...", - "pdf_print_failed": "Failed to print PDF. Please try printing manually from the preview or download the HTML.", - "pdf_fetch_failed": "Failed to fetch translated content, cannot generate PDF.", + "pdf_print_failed": "Failed to start printing. Please try right-clicking the preview and printing manually.", + "pdf_fetch_failed": "Failed to fetch preview content, cannot generate PDF.", + "preview_loading": "Loading preview...", + "preview_cantReadOriginal": "Could not read original file content for preview.", + "preview_cantPreviewType": "Cannot preview this file type", + "preview_noOriginalCache": "No original file cached, cannot preview.", + "preview_loadFailed": "Failed to load preview.", "preview_bilingual": "Bilingual Preview", - "preview_translatedOnly": "Translated-Only Preview", + "preview_translatedOnly": "Translated Only Preview", "admin_tasklist_failed": "Admin mode: Failed to load task list.", "configImportSuccess": "Configuration imported successfully!", - "configImportError": "Configuration import failed. Invalid file format." + "configImportError": "Failed to import configuration. The file format may be incorrect." } } \ No newline at end of file diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index 8739d41..880cdf6 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

Base URL:

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:

任务列表

LOGO

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file + DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

Base URL:

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:

任务列表

LOGO

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file diff --git a/docutranslate/translator/ai_translator/html_translator.py b/docutranslate/translator/ai_translator/html_translator.py index 86da47e..3fceaa3 100644 --- a/docutranslate/translator/ai_translator/html_translator.py +++ b/docutranslate/translator/ai_translator/html_translator.py @@ -150,6 +150,7 @@ class HtmlTranslator(AiTranslator): translated_texts: list[str], original_texts: list[str]) -> bytes: """ 将翻译后的文本写回到BeautifulSoup对象中对应的节点或属性,并返回最终的HTML字节流。 + 【版本 3.0: 修正了对HTML分隔符的支持】 """ if len(translatable_items) != len(translated_texts): self.logger.error("翻译前后的文本片段数量不匹配 (%d vs %d),跳过写入操作以防损坏文件。", @@ -160,36 +161,59 @@ class HtmlTranslator(AiTranslator): translated_text = translated_texts[i] original_text = original_texts[i] - new_content = "" - if self.insert_mode == "replace": - if item['type'] == 'node': - # 对于文本节点,保留原文前后的空白字符,这对维持内联元素的间距至关重要。 - leading_space = original_text[:len(original_text) - len(original_text.lstrip())] - trailing_space = original_text[len(original_text.rstrip()):] - new_content = leading_space + translated_text + trailing_space - else: # 属性 - new_content = translated_text - - elif self.insert_mode == "append": - new_content = original_text + self.separator + translated_text - elif self.insert_mode == "prepend": - new_content = translated_text + self.separator + original_text - else: - self.logger.error(f"不正确的HtmlTranslatorConfig参数: insert_mode='{self.insert_mode}'") - new_content = original_text # 出错时恢复原文 - - # 根据类型将内容写回 if item['type'] == 'node': node = item['object'] - # 检查节点是否仍然在解析树中,以防在处理过程中被移动或删除 - if node.parent: - node.replace_with(NavigableString(new_content)) + if not node.parent: # 确保节点仍然在树中 + continue + + # --- 构造包含HTML的新内容字符串 --- + new_content_str = "" + if self.insert_mode == "replace": + leading_space = original_text[:len(original_text) - len(original_text.lstrip())] + trailing_space = original_text[len(original_text.rstrip()):] + new_content_str = leading_space + translated_text + trailing_space + elif self.insert_mode == "append": + new_content_str = original_text + self.separator + translated_text + elif self.insert_mode == "prepend": + new_content_str = translated_text + self.separator + original_text + else: + self.logger.error(f"不正确的HtmlTranslatorConfig参数: insert_mode='{self.insert_mode}'") + new_content_str = original_text + + # --- 核心修改:正确地将HTML字符串片段插入DOM --- + # 1. 使用一个临时的父标签(如此处的'div')来解析HTML片段, + # 这是在BeautifulSoup中处理片段的标准做法,避免了自动添加。 + temp_soup = BeautifulSoup(f"
{new_content_str}
", 'html.parser') + new_elements = temp_soup.div.contents + + # 2. 将解析出的新元素(可能是文本节点和
标签的混合) + # 以相反的顺序插入到原始节点之后。这样做可以保持它们的原始顺序。 + for element in reversed(new_elements): + node.insert_after(element) + + # 3. 移除原始的文本节点 + node.decompose() + elif item['type'] == 'attribute': + # --- 属性逻辑保持不变,因为属性值不支持HTML --- tag = item['tag'] attr = item['attribute'] - tag[attr] = new_content + new_attr_value = "" + + # 在属性值中,
将被视为普通文本,这是正确的行为 + separator_for_attr = self.separator.replace('
', ' ').replace('
', ' ') + + if self.insert_mode == "replace": + new_attr_value = translated_text + elif self.insert_mode == "append": + new_attr_value = original_text + separator_for_attr + translated_text + elif self.insert_mode == "prepend": + new_attr_value = translated_text + separator_for_attr + original_text + else: + new_attr_value = original_text + + tag[attr] = new_attr_value.strip() - # 将修改后的BeautifulSoup对象编码为utf-8字节流 return soup.encode('utf-8') def translate(self, document: Document) -> Self: