diff --git a/docutranslate/app.py b/docutranslate/app.py index e5c264e..de23b6c 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -205,7 +205,7 @@ class BaseWorkflowParams(BaseModel): concurrent: int = Field(default=default_params["concurrent"], description="并发请求数。") temperature: float = Field(default=default_params["temperature"], description="LLM温度参数。") thinking: ThinkingMode = Field(default=default_params["thinking"], description="是否启用深度思考", - examples=["default", "enable", "disable"]), + examples=["default", "enable", "disable"]) custom_prompt: Optional[str] = Field(None, description="用户自定义的翻译Prompt。", alias="custom_prompt") @@ -220,6 +220,7 @@ class MarkdownWorkflowParams(BaseWorkflowParams): mineru_token: Optional[str] = Field(None, description="当 `convert_engine` 为 'mineru' 时必填的API令牌。") formula_ocr: bool = Field(True, description="是否对公式进行OCR识别。对 `mineru` 和 `docling` 均有效。") code_ocr: bool = Field(True, description="是否对代码块进行OCR识别。仅 `docling` 引擎有效。") + model_version: Literal["pipline", "vlm"] = Field("vlm", description="Mineru模型的版本,'vlm'是更新的版本。仅 `mineru` 引擎有效。") @field_validator('mineru_token') def check_mineru_token(cls, v, values): @@ -479,7 +480,8 @@ async def _perform_translation( converter_config = None if payload.convert_engine == 'mineru': converter_config = ConverterMineruConfig(logger=task_logger, mineru_token=payload.mineru_token, - formula_ocr=payload.formula_ocr) + formula_ocr=payload.formula_ocr, + model_version=payload.model_version) elif payload.convert_engine == 'docling' and DOCLING_EXIST: converter_config = ConverterDoclingConfig(logger=task_logger, code_ocr=payload.code_ocr, formula_ocr=payload.formula_ocr) @@ -1214,6 +1216,7 @@ async def temp_translate( temperature: float = Body(default_params["temperature"]), thinking: ThinkingMode = Body(default_params["thinking"]), chunk_size: int = Body(default_params["chunk_size"]), custom_prompt: Optional[str] = Body(None), + model_version: Literal["pipline", "vlm"] = Body("vlm"), ): file_name = Path(file_name) try: @@ -1222,7 +1225,7 @@ async def temp_translate( decoded_content = file_content.encode('utf-8') try: workflow_config = MarkdownBasedWorkflowConfig( - convert_engine="mineru", converter_config=ConverterMineruConfig(mineru_token=mineru_token), + convert_engine="mineru", converter_config=ConverterMineruConfig(mineru_token=mineru_token, model_version=model_version), translator_config=MDTranslatorConfig(base_url=base_url, api_key=api_key, model_id=model_id, to_lang=to_lang, custom_prompt=custom_prompt, temperature=temperature, thinking=thinking, chunk_size=chunk_size, concurrent=concurrent), diff --git a/docutranslate/converter/x2md/converter_mineru.py b/docutranslate/converter/x2md/converter_mineru.py index 58e1ac2..9956bd7 100644 --- a/docutranslate/converter/x2md/converter_mineru.py +++ b/docutranslate/converter/x2md/converter_mineru.py @@ -2,7 +2,7 @@ import asyncio import time import zipfile from dataclasses import dataclass -from typing import Hashable +from typing import Hashable, Literal import httpx @@ -18,9 +18,10 @@ URL = 'https://mineru.net/api/v4/file-urls/batch' class ConverterMineruConfig(X2MarkdownConverterConfig): mineru_token: str formula_ocr: bool = True + model_version: Literal["pipline", "vlm"] = "vlm" def gethash(self) -> Hashable: - return self.formula_ocr + return (self.formula_ocr,self.model_version) timeout = httpx.Timeout( @@ -44,6 +45,7 @@ class ConverterMineru(X2MarkdownConverter): super().__init__(config=config) self.mineru_token = config.mineru_token.strip() self.formula = config.formula_ocr + self.model_version=config.model_version def _get_header(self): return { @@ -56,6 +58,7 @@ class ConverterMineru(X2MarkdownConverter): "enable_formula": self.formula, "language": "auto", "enable_table": True, + "model_version":self.model_version, "files": [ {"name": f"{document.name}", "is_ocr": True} ] diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json index cb88949..412b2b6 100644 --- a/docutranslate/static/i18nData.json +++ b/docutranslate/static/i18nData.json @@ -44,6 +44,10 @@ "engineOptionDocling": "Docling (本地)", "getMineruTokenTitle": "获取Mineru Token", "mineruTokenPlaceholder": "使用Mineru引擎时需要", + "modelVersionLabel": "Mineru 模型版本", + "modelVersionVlm": "VLM", + "modelVersionPipline": "Pipeline", + "modelVersionHelp": "mineru VLM是更新的内测模型。", "formulaOcrLabel": "公式识别", "codeOcrLabel": "代码识别", "aiSettingsTitleText": "翻译模型", @@ -87,6 +91,14 @@ "taskCardStartBtn": "开始翻译", "downloadMdEmbedded": "Markdown(嵌图)", "downloadMdZip": "Markdown压缩包", + "downloadTxt": "TXT", + "downloadJson": "JSON", + "downloadDocx": "DOCX", + "downloadXlsx": "XLSX", + "downloadSrt": "SRT", + "downloadEpub": "EPUB", + "downloadHtml": "HTML", + "downloadPdf": "PDF", "previewTitle": "预览", "previewBilingualBtn": "双语", "previewTranslatedOnlyBtn": "仅译文", @@ -95,7 +107,7 @@ "closeBtn": "关闭", "downloadBtn": "下载", "tutorialModalTitle": "使用教程", - "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 选择工作流

    首先,在配置面板顶部选择您需要的翻译流程。不同的工作流适用于不同类型的文件:

    新增功能: \"自动选择工作流\"开关已默认开启。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。

  2. 配置参数

    根据您选择的工作流,完成相应的配置。所有配置项都会自动保存在您的浏览器中。

  3. 上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。
", + "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 选择工作流

    首先,在配置面板顶部选择您需要的翻译流程。不同的工作流适用于不同类型的文件:

    新增功能: \"自动选择工作流\"开关已默认开启。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。

  2. 配置参数

    根据您选择的工作流,完成相应的配置。所有配置项都会自动保存在您的浏览器中。

  3. 上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。
", "tutorialUnderstandBtn": "我明白了", "contributorsModalTitle": "感谢贡献", "contributorsPara1": "DocuTranslate是一个开源项目!大家的需求与使用是项目进步的动力。", @@ -128,7 +140,8 @@ "pdf_preparing": "PDF准备中,请稍后...", "pdf_print_failed": "自动打印失败,请在预览中手动打印。", "pdf_fetch_failed": "获取HTML内容失败,无法生成PDF。", - "init_failed_alert": "页面初始化失败,请检查后端服务是否正常并刷新页面。", + "init_i18n_failed_alert": "加载界面翻译资源失败,请检查网络连接或联系管理员。", + "init_failed_alert": "初始化失败,无法连接到后端服务。请检查服务是否运行或刷新页面。", "admin_tasklist_failed": "无法从服务器加载任务列表,请检查后台连接。", "btn_startTranslation": "开始翻译", "btn_initializing": "初始化...", @@ -180,6 +193,10 @@ "engineOptionDocling": "Docling (Local)", "getMineruTokenTitle": "Get Mineru Token", "mineruTokenPlaceholder": "Required for Mineru engine", + "modelVersionLabel": "Mineru Model Version", + "modelVersionVlm": "VLM", + "modelVersionPipline": "Pipeline", + "modelVersionHelp": "Mineru VLM is a newer internal beta model.", "formulaOcrLabel": "Formula Recognition", "codeOcrLabel": "Code Recognition", "aiSettingsTitleText": "Translation Model", @@ -223,6 +240,14 @@ "taskCardStartBtn": "Start Translation", "downloadMdEmbedded": "Markdown (Embedded Img)", "downloadMdZip": "Markdown (.zip)", + "downloadTxt": "TXT", + "downloadJson": "JSON", + "downloadDocx": "DOCX", + "downloadXlsx": "XLSX", + "downloadSrt": "SRT", + "downloadEpub": "EPUB", + "downloadHtml": "HTML", + "downloadPdf": "PDF", "previewTitle": "Preview", "previewBilingualBtn": "Bilingual", "previewTranslatedOnlyBtn": "Translated Only", @@ -231,7 +256,7 @@ "closeBtn": "Close", "downloadBtn": "Download", "tutorialModalTitle": "Tutorial", - "tutorialModalBody": "

Video tutorials can be found by searching docutranslate on Bilibili.

Welcome to DocuTranslate! Follow these steps to translate your documents:

  1. Select Workflow

    First, choose the translation process you need from the top of the settings panel. Different workflows are for different file types:

    New Feature: \"Auto-select workflow\" is on by default. Just upload your file, and the system will automatically pick the right workflow for you.

  2. Configure Parameters

    Based on your chosen workflow, complete the necessary settings. All settings are automatically saved in your browser.

  3. Upload File

    In the task list on the right, click or drag your document into the file drop area.

  4. Start Translation

    Once the file is selected, click the Start Translation button on the task card. The system will start processing, and you can see real-time progress in the log area.

  5. Review & Download

    After translation is complete, action buttons will appear on the task card:

Tip: All your settings are automatically saved in your browser's local storage for your next visit.
", + "tutorialModalBody": "

Video tutorials can be found by searching docutranslate on Bilibili.

Welcome to DocuTranslate! Follow these steps to translate your documents:

  1. Select Workflow

    First, choose the translation process you need from the top of the settings panel. Different workflows are for different file types:

    New Feature: \"Auto-select workflow\" is on by default. Just upload your file, and the system will automatically pick the right workflow for you, simplifying the process.

  2. Configure Parameters

    Based on your chosen workflow, complete the necessary settings. All settings are automatically saved in your browser.

  3. Upload File

    In the task list on the right, click or drag your document into the file drop area.

  4. Start Translation

    Once the file is selected, click the Start Translation button on the task card. The system will start processing, and you can see real-time progress in the log area.

  5. Review & Download

    After translation is complete, action buttons will appear on the task card:

Tip: All your settings are automatically saved in your browser's local storage for your next visit.
", "tutorialUnderstandBtn": "Got it", "contributorsModalTitle": "Thanks for Contributing", "contributorsPara1": "DocuTranslate is an open-source project! The community's needs and usage are what drive its progress.", @@ -264,7 +289,8 @@ "pdf_preparing": "Preparing PDF, please wait...", "pdf_print_failed": "Automatic printing failed. Please print manually from the preview.", "pdf_fetch_failed": "Failed to fetch HTML content, cannot generate PDF.", - "init_failed_alert": "Page initialization failed. Please check if the backend service is running and refresh the page.", + "init_i18n_failed_alert": "Failed to load interface translations. Please check your network connection or contact an administrator.", + "init_failed_alert": "Initialization failed, could not connect to the backend service. Please ensure the service is running and refresh the page.", "admin_tasklist_failed": "Could not load task list from server. Please check backend connection.", "btn_startTranslation": "Start Translation", "btn_initializing": "Initializing...", diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index f046344..708d303 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file + DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file