From 0cf0aa15da1b5621f4248e8ee5423d228db63c8d Mon Sep 17 00:00:00 2001 From: xunbu Date: Wed, 20 Aug 2025 13:36:01 +0800 Subject: [PATCH] =?UTF-8?q?=E6=94=AF=E6=8C=81mineruVLM=E6=A8=A1=E5=9E=8B?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/app.py | 9 +++-- .../converter/x2md/converter_mineru.py | 7 ++-- docutranslate/static/i18nData.json | 34 ++++++++++++++++--- docutranslate/static/index.html | 2 +- 4 files changed, 42 insertions(+), 10 deletions(-) diff --git a/docutranslate/app.py b/docutranslate/app.py index e5c264e..de23b6c 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -205,7 +205,7 @@ class BaseWorkflowParams(BaseModel): concurrent: int = Field(default=default_params["concurrent"], description="并发请求数。") temperature: float = Field(default=default_params["temperature"], description="LLM温度参数。") thinking: ThinkingMode = Field(default=default_params["thinking"], description="是否启用深度思考", - examples=["default", "enable", "disable"]), + examples=["default", "enable", "disable"]) custom_prompt: Optional[str] = Field(None, description="用户自定义的翻译Prompt。", alias="custom_prompt") @@ -220,6 +220,7 @@ class MarkdownWorkflowParams(BaseWorkflowParams): mineru_token: Optional[str] = Field(None, description="当 `convert_engine` 为 'mineru' 时必填的API令牌。") formula_ocr: bool = Field(True, description="是否对公式进行OCR识别。对 `mineru` 和 `docling` 均有效。") code_ocr: bool = Field(True, description="是否对代码块进行OCR识别。仅 `docling` 引擎有效。") + model_version: Literal["pipline", "vlm"] = Field("vlm", description="Mineru模型的版本,'vlm'是更新的版本。仅 `mineru` 引擎有效。") @field_validator('mineru_token') def check_mineru_token(cls, v, values): @@ -479,7 +480,8 @@ async def _perform_translation( converter_config = None if payload.convert_engine == 'mineru': converter_config = ConverterMineruConfig(logger=task_logger, mineru_token=payload.mineru_token, - formula_ocr=payload.formula_ocr) + formula_ocr=payload.formula_ocr, + model_version=payload.model_version) elif payload.convert_engine == 'docling' and DOCLING_EXIST: converter_config = ConverterDoclingConfig(logger=task_logger, code_ocr=payload.code_ocr, formula_ocr=payload.formula_ocr) @@ -1214,6 +1216,7 @@ async def temp_translate( temperature: float = Body(default_params["temperature"]), thinking: ThinkingMode = Body(default_params["thinking"]), chunk_size: int = Body(default_params["chunk_size"]), custom_prompt: Optional[str] = Body(None), + model_version: Literal["pipline", "vlm"] = Body("vlm"), ): file_name = Path(file_name) try: @@ -1222,7 +1225,7 @@ async def temp_translate( decoded_content = file_content.encode('utf-8') try: workflow_config = MarkdownBasedWorkflowConfig( - convert_engine="mineru", converter_config=ConverterMineruConfig(mineru_token=mineru_token), + convert_engine="mineru", converter_config=ConverterMineruConfig(mineru_token=mineru_token, model_version=model_version), translator_config=MDTranslatorConfig(base_url=base_url, api_key=api_key, model_id=model_id, to_lang=to_lang, custom_prompt=custom_prompt, temperature=temperature, thinking=thinking, chunk_size=chunk_size, concurrent=concurrent), diff --git a/docutranslate/converter/x2md/converter_mineru.py b/docutranslate/converter/x2md/converter_mineru.py index 58e1ac2..9956bd7 100644 --- a/docutranslate/converter/x2md/converter_mineru.py +++ b/docutranslate/converter/x2md/converter_mineru.py @@ -2,7 +2,7 @@ import asyncio import time import zipfile from dataclasses import dataclass -from typing import Hashable +from typing import Hashable, Literal import httpx @@ -18,9 +18,10 @@ URL = 'https://mineru.net/api/v4/file-urls/batch' class ConverterMineruConfig(X2MarkdownConverterConfig): mineru_token: str formula_ocr: bool = True + model_version: Literal["pipline", "vlm"] = "vlm" def gethash(self) -> Hashable: - return self.formula_ocr + return (self.formula_ocr,self.model_version) timeout = httpx.Timeout( @@ -44,6 +45,7 @@ class ConverterMineru(X2MarkdownConverter): super().__init__(config=config) self.mineru_token = config.mineru_token.strip() self.formula = config.formula_ocr + self.model_version=config.model_version def _get_header(self): return { @@ -56,6 +58,7 @@ class ConverterMineru(X2MarkdownConverter): "enable_formula": self.formula, "language": "auto", "enable_table": True, + "model_version":self.model_version, "files": [ {"name": f"{document.name}", "is_ocr": True} ] diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json index cb88949..412b2b6 100644 --- a/docutranslate/static/i18nData.json +++ b/docutranslate/static/i18nData.json @@ -44,6 +44,10 @@ "engineOptionDocling": "Docling (本地)", "getMineruTokenTitle": "获取Mineru Token", "mineruTokenPlaceholder": "使用Mineru引擎时需要", + "modelVersionLabel": "Mineru 模型版本", + "modelVersionVlm": "VLM", + "modelVersionPipline": "Pipeline", + "modelVersionHelp": "mineru VLM是更新的内测模型。", "formulaOcrLabel": "公式识别", "codeOcrLabel": "代码识别", "aiSettingsTitleText": "翻译模型", @@ -87,6 +91,14 @@ "taskCardStartBtn": "开始翻译", "downloadMdEmbedded": "Markdown(嵌图)", "downloadMdZip": "Markdown压缩包", + "downloadTxt": "TXT", + "downloadJson": "JSON", + "downloadDocx": "DOCX", + "downloadXlsx": "XLSX", + "downloadSrt": "SRT", + "downloadEpub": "EPUB", + "downloadHtml": "HTML", + "downloadPdf": "PDF", "previewTitle": "预览", "previewBilingualBtn": "双语", "previewTranslatedOnlyBtn": "仅译文", @@ -95,7 +107,7 @@ "closeBtn": "关闭", "downloadBtn": "下载", "tutorialModalTitle": "使用教程", - "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 选择工作流

    首先,在配置面板顶部选择您需要的翻译流程。不同的工作流适用于不同类型的文件:

    • 转Markdown再翻译: 适用于翻译PDF、markdown、图片等文件。
    • 纯文本翻译: 用于翻译 .txt 等纯文本文件。
    • JSON翻译: 用于翻译 .json 文件中的特定字段。
    • DOCX翻译: 用于翻译 .docx 文件。
    • XLSX翻译: 用于翻译 .xlsx 电子表格文件。
    • SRT字幕翻译: 用于翻译 .srt 字幕文件。
    • EPUB翻译: 用于翻译 .epub 电子书文件。
    新增功能: \"自动选择工作流\"开关已默认开启。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。

  2. 配置参数

    根据您选择的工作流,完成相应的配置。所有配置项都会自动保存在您的浏览器中。

    • 解析配置 (仅在“转Markdown再翻译”工作流下显示):
      • 解析引擎: 选择一个引擎将您的文件(如PDF)转换为适合翻译的Markdown格式。如果您的文件已经是Markdown格式,则无需选择。
      • Mineru Token: 如果您选择 minerU 引擎,需要在此处填入您的Token。
    • DOCX/XLSX/SRT/EPUB翻译选项 (在对应工作流下显示):
      • 插入模式: 定义翻译结果如何放入文档或字幕。您可以选择直接“替换”原文,或是在原文之后“附加”,或是在原文之前“前置”。
      • 分隔符: 当选择“附加”或“前置”模式时,此项用于在原文和译文之间插入分隔符。
    • JSON路径配置 (仅在“JSON翻译”工作流下显示):
      • 需要翻译的JSON路径: 每行输入一个 JSONPath 表达式,指定需要翻译的字段。
      • 例如:$..description翻译所有键为description的值。$.items[0].name翻译第一个item的name值。
    • 翻译模型:
      • 选择平台/API 地址/API Key/模型 ID: 配置您希望使用的AI翻译服务。
      • 模型ID参考平台文档,建议使用非推理模型或混合推理模型(关闭思考)。
    • 翻译配置:
      • 目标语言/自定义Prompt: 指定翻译的目标语言和附加指令。
      • 思考模式:设置混合推理模型是否进行思考,目前支持智谱的glm4.5系列、阿里云的qwen3系列、火山引擎的seed1.6系列,建议选择禁用思考。
    • 高级参数:
      • 分块大小/并发数/Temperature: 发给ai的分块大小、并发请求数和温度,通常保持默认即可。
  3. 上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

    • 预览: 在右侧滑出的面板中进行原文和译文的对照预览(仅作参考)。
    • 下载: 下载包括 PDF, DOCX, XLSX, HTML, Markdown 等多种格式的译文。
提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。
", + "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 选择工作流

    首先,在配置面板顶部选择您需要的翻译流程。不同的工作流适用于不同类型的文件:

    • 转Markdown再翻译: 适用于翻译PDF、markdown、图片等文件。
    • 纯文本翻译: 用于翻译 .txt 等纯文本文件。
    • JSON翻译: 用于翻译 .json 文件中的特定字段。
    • DOCX翻译: 用于翻译 .docx 文件。
    • XLSX翻译: 用于翻译 .xlsx 电子表格文件。
    • SRT字幕翻译: 用于翻译 .srt 字幕文件。
    • EPUB翻译: 用于翻译 .epub 电子书文件。
    • HTML翻译: 用于翻译 .html 文件。
    新增功能: \"自动选择工作流\"开关已默认开启。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。

  2. 配置参数

    根据您选择的工作流,完成相应的配置。所有配置项都会自动保存在您的浏览器中。

    • 解析配置 (仅在“转Markdown再翻译”工作流下显示):
      • 解析引擎: 选择一个引擎将您的文件(如PDF)转换为适合翻译的Markdown格式。如果您的文件已经是Markdown格式,则无需选择。
      • Mineru Token: 如果您选择 minerU 引擎,需要在此处填入您的Token。
    • DOCX/XLSX/SRT/EPUB/HTML翻译选项 (在对应工作流下显示):
      • 插入模式: 定义翻译结果如何放入文档或字幕。您可以选择直接“替换”原文,或是在原文之后“附加”,或是在原文之前“前置”。
      • 分隔符: 当选择“附加”或“前置”模式时,此项用于在原文和译文之间插入分隔符。
    • JSON路径配置 (仅在“JSON翻译”工作流下显示):
      • 需要翻译的JSON路径: 每行输入一个 JSONPath 表达式,指定需要翻译的字段。
      • 例如:$..description翻译所有键为description的值。$.items[0].name翻译第一个item的name值。
    • 翻译模型:
      • 选择平台/API 地址/API Key/模型 ID: 配置您希望使用的AI翻译服务。
      • 模型ID参考平台文档,建议使用非推理模型或混合推理模型(关闭思考)。
    • 翻译配置:
      • 目标语言/自定义Prompt: 指定翻译的目标语言和附加指令。
      • 思考模式:设置混合推理模型是否进行思考,目前支持智谱的glm4.5系列、阿里云的qwen3系列、火山引擎的seed1.6系列,建议选择禁用思考。
    • 高级参数:
      • 分块大小/并发数/Temperature: 发给ai的分块大小、并发请求数和温度,通常保持默认即可。
  3. 上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

    • 预览: 在右侧滑出的面板中进行原文和译文的对照预览(仅作参考)。
    • 下载: 下载包括 PDF, DOCX, XLSX, HTML, Markdown 等多种格式的译文。
提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。
", "tutorialUnderstandBtn": "我明白了", "contributorsModalTitle": "感谢贡献", "contributorsPara1": "DocuTranslate是一个开源项目!大家的需求与使用是项目进步的动力。", @@ -128,7 +140,8 @@ "pdf_preparing": "PDF准备中,请稍后...", "pdf_print_failed": "自动打印失败,请在预览中手动打印。", "pdf_fetch_failed": "获取HTML内容失败,无法生成PDF。", - "init_failed_alert": "页面初始化失败,请检查后端服务是否正常并刷新页面。", + "init_i18n_failed_alert": "加载界面翻译资源失败,请检查网络连接或联系管理员。", + "init_failed_alert": "初始化失败,无法连接到后端服务。请检查服务是否运行或刷新页面。", "admin_tasklist_failed": "无法从服务器加载任务列表,请检查后台连接。", "btn_startTranslation": "开始翻译", "btn_initializing": "初始化...", @@ -180,6 +193,10 @@ "engineOptionDocling": "Docling (Local)", "getMineruTokenTitle": "Get Mineru Token", "mineruTokenPlaceholder": "Required for Mineru engine", + "modelVersionLabel": "Mineru Model Version", + "modelVersionVlm": "VLM", + "modelVersionPipline": "Pipeline", + "modelVersionHelp": "Mineru VLM is a newer internal beta model.", "formulaOcrLabel": "Formula Recognition", "codeOcrLabel": "Code Recognition", "aiSettingsTitleText": "Translation Model", @@ -223,6 +240,14 @@ "taskCardStartBtn": "Start Translation", "downloadMdEmbedded": "Markdown (Embedded Img)", "downloadMdZip": "Markdown (.zip)", + "downloadTxt": "TXT", + "downloadJson": "JSON", + "downloadDocx": "DOCX", + "downloadXlsx": "XLSX", + "downloadSrt": "SRT", + "downloadEpub": "EPUB", + "downloadHtml": "HTML", + "downloadPdf": "PDF", "previewTitle": "Preview", "previewBilingualBtn": "Bilingual", "previewTranslatedOnlyBtn": "Translated Only", @@ -231,7 +256,7 @@ "closeBtn": "Close", "downloadBtn": "Download", "tutorialModalTitle": "Tutorial", - "tutorialModalBody": "

Video tutorials can be found by searching docutranslate on Bilibili.

Welcome to DocuTranslate! Follow these steps to translate your documents:

  1. Select Workflow

    First, choose the translation process you need from the top of the settings panel. Different workflows are for different file types:

    • Markdown-based: For translating PDF, markdown, images, etc.
    • Plain Text: For translating .txt and other plain text files.
    • JSON: For translating specific fields in .json files.
    • DOCX: For translating .docx files.
    • XLSX: For translating .xlsx spreadsheet files.
    • SRT Subtitle: For translating .srt subtitle files.
    • EPUB: For translating .epub ebook files.
    New Feature: \"Auto-select workflow\" is on by default. Just upload your file, and the system will automatically pick the right workflow for you.

  2. Configure Parameters

    Based on your chosen workflow, complete the necessary settings. All settings are automatically saved in your browser.

    • Parsing Configuration (Only for \"Markdown-based\" workflow):
      • Parsing Engine: Choose an engine to convert your file (like a PDF) into a translation-friendly Markdown format. Not needed if your file is already Markdown.
      • Mineru Token: If you select the minerU engine, you need to enter your token here.
    • DOCX/XLSX/SRT/EPUB Options (For their respective workflows):
      • Insert Mode: Define how the translation is placed in the document. You can \"Replace\" the original, \"Append\" after it, or \"Prepend\" before it.
      • Separator: When using \"Append\" or \"Prepend\", this is used to separate the original and translated text.
    • JSON Path Configuration (Only for \"JSON\" workflow):
      • JSON paths to translate: Enter one JSONPath expression per line to specify which fields to translate.
      • E.g., $..description translates all values with the key 'description'. $.items[0].name translates the name of the first item.
    • Translation Model:
      • Platform/API Base URL/API Key/Model ID: Configure the AI translation service you want to use.
      • Refer to the platform's documentation for Model IDs. It's recommended to use non-inference models or mixed-inference models (with thinking turned off).
    • Translation Configuration:
      • Target Language/Custom Prompt: Specify the target language and any additional instructions.
      • Thinking Mode: Sets whether a mixed-inference model should 'think'. Supported for Zhipu's glm4.5 series, Alibaba's qwen3 series, VolcEngine's seed1.6 series, etc. Disabling is recommended.
    • Advanced Parameters:
      • Chunk Size/Concurrency/Temperature: The size of text chunks sent to the AI, number of concurrent requests, and creativity level. Default values usually work fine.
  3. Upload File

    In the task list on the right, click or drag your document into the file drop area.

  4. Start Translation

    Once the file is selected, click the Start Translation button on the task card. The system will start processing, and you can see real-time progress in the log area.

  5. Review & Download

    After translation is complete, action buttons will appear on the task card:

    • Preview: Opens a side panel for a side-by-side preview of the original and translation (for reference only).
    • Download: Download the translation in various formats, including PDF, DOCX, XLSX, HTML, and Markdown.
Tip: All your settings are automatically saved in your browser's local storage for your next visit.
", + "tutorialModalBody": "

Video tutorials can be found by searching docutranslate on Bilibili.

Welcome to DocuTranslate! Follow these steps to translate your documents:

  1. Select Workflow

    First, choose the translation process you need from the top of the settings panel. Different workflows are for different file types:

    • Markdown-based: For translating PDF, markdown, images, etc.
    • Plain Text: For translating .txt and other plain text files.
    • JSON: For translating specific fields in .json files.
    • DOCX: For translating .docx files.
    • XLSX: For translating .xlsx spreadsheet files.
    • SRT Subtitle: For translating .srt subtitle files.
    • EPUB: For translating .epub ebook files.
    • HTML: For translating .html files.
    New Feature: \"Auto-select workflow\" is on by default. Just upload your file, and the system will automatically pick the right workflow for you, simplifying the process.

  2. Configure Parameters

    Based on your chosen workflow, complete the necessary settings. All settings are automatically saved in your browser.

    • Parsing Configuration (Only for \"Markdown-based\" workflow):
      • Parsing Engine: Choose an engine to convert your file (like a PDF) into a translation-friendly Markdown format. Not needed if your file is already Markdown.
      • Mineru Token: If you select the minerU engine, you need to enter your token here.
    • DOCX/XLSX/SRT/EPUB/HTML Options (For their respective workflows):
      • Insert Mode: Define how the translation is placed in the document. You can \"Replace\" the original, \"Append\" after it, or \"Prepend\" before it.
      • Separator: When using \"Append\" or \"Prepend\", this is used to separate the original and translated text.
    • JSON Path Configuration (Only for \"JSON\" workflow):
      • JSON paths to translate: Enter one JSONPath expression per line to specify which fields to translate.
      • E.g., $..description translates all values with the key 'description'. $.items[0].name translates the name of the first item.
    • Translation Model:
      • Platform/API Base URL/API Key/Model ID: Configure the AI translation service you want to use.
      • Refer to the platform's documentation for Model IDs. It's recommended to use non-inference models or mixed-inference models (with thinking turned off).
    • Translation Configuration:
      • Target Language/Custom Prompt: Specify the target language and any additional instructions.
      • Thinking Mode: Sets whether a mixed-inference model should 'think'. Supported for Zhipu's glm4.5 series, Alibaba's qwen3 series, VolcEngine's seed1.6 series, etc. Disabling is recommended.
    • Advanced Parameters:
      • Chunk Size/Concurrency/Temperature: The size of text chunks sent to the AI, number of concurrent requests, and creativity level. Default values usually work fine.
  3. Upload File

    In the task list on the right, click or drag your document into the file drop area.

  4. Start Translation

    Once the file is selected, click the Start Translation button on the task card. The system will start processing, and you can see real-time progress in the log area.

  5. Review & Download

    After translation is complete, action buttons will appear on the task card:

    • Preview: Opens a side panel for a side-by-side preview of the original and translation (for reference only).
    • Download: Download the translation in various formats, including PDF, DOCX, XLSX, HTML, and Markdown.
Tip: All your settings are automatically saved in your browser's local storage for your next visit.
", "tutorialUnderstandBtn": "Got it", "contributorsModalTitle": "Thanks for Contributing", "contributorsPara1": "DocuTranslate is an open-source project! The community's needs and usage are what drive its progress.", @@ -264,7 +289,8 @@ "pdf_preparing": "Preparing PDF, please wait...", "pdf_print_failed": "Automatic printing failed. Please print manually from the preview.", "pdf_fetch_failed": "Failed to fetch HTML content, cannot generate PDF.", - "init_failed_alert": "Page initialization failed. Please check if the backend service is running and refresh the page.", + "init_i18n_failed_alert": "Failed to load interface translations. Please check your network connection or contact an administrator.", + "init_failed_alert": "Initialization failed, could not connect to the backend service. Please ensure the service is running and refresh the page.", "admin_tasklist_failed": "Could not load task list from server. Please check backend connection.", "btn_startTranslation": "Start Translation", "btn_initializing": "Initializing...", diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index f046344..708d303 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file + DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file