diff --git a/docutranslate/app.py b/docutranslate/app.py
index c0a2f53..d89c0eb 100644
--- a/docutranslate/app.py
+++ b/docutranslate/app.py
@@ -37,7 +37,6 @@ from fastapi.staticfiles import StaticFiles
from pydantic import (
BaseModel,
Field,
- field_validator,
model_validator,
AliasChoices,
ConfigDict,
@@ -46,7 +45,6 @@ from pydantic import (
from docutranslate import __version__
from docutranslate.agents.agent import ThinkingMode
from docutranslate.agents.glossary_agent import GlossaryAgentConfig
-from docutranslate.exporter.md.types import ConvertEngineType
# --- 核心代码 Imports ---
from docutranslate.global_values.conditional_import import DOCLING_EXIST
@@ -78,6 +76,9 @@ from docutranslate.workflow.xlsx_workflow import XlsxWorkflow, XlsxWorkflowConfi
if DOCLING_EXIST or TYPE_CHECKING:
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig
+# --- 新增的 Import ---
+from docutranslate.converter.x2md.converter_mineru_deploy import ConverterMineruDeployConfig
+# ----------------------
from docutranslate.exporter.md.md2html_exporter import MD2HTMLExporterConfig
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig
from docutranslate.translator.ai_translator.md_translator import MDTranslatorConfig
@@ -399,31 +400,61 @@ class MarkdownWorkflowParams(BaseWorkflowParams):
workflow_type: Literal["markdown_based"] = Field(
..., description="指定使用基于Markdown的翻译工作流。"
)
- convert_engine: ConvertEngineType = Field(
+ convert_engine: Literal["identity", "mineru", "docling", "mineru_deploy"] = Field(
"identity",
- description="选择将文件解析为markdown的引擎。如果输入文件是.md,此项可为`null`或不传。",
- examples=["identity", "mineru", "docling"],
- )
- mineru_token: Optional[str] = Field(
- None, description="当 `convert_engine` 为 'mineru' 时必填的API令牌。"
- )
- formula_ocr: bool = Field(
- True, description="是否对公式进行OCR识别。对 `mineru` 和 `docling` 均有效。"
- )
- code_ocr: bool = Field(
- True, description="是否对代码块进行OCR识别。仅 `docling` 引擎有效。"
- )
- model_version: Literal["pipeline", "vlm"] = Field(
- "vlm", description="Mineru模型的版本,'vlm'是更新的版本。仅 `mineru` 引擎有效。"
+ description="选择将文件解析为markdown的引擎。'mineru_deploy' 适用于本地部署的 MinerU 服务。如果输入文件是.md,此项可为`identity`或不传。",
+ examples=["identity", "mineru", "docling", "mineru_deploy"],
)
- @field_validator("mineru_token")
- def check_mineru_token(cls, v, values):
- if values.data.get("convert_engine") == "mineru" and not v:
+ # --- Engine-Specific Parameters ---
+
+ # -- For "mineru" (Cloud API) --
+ mineru_token: Optional[str] = Field(
+ None, description="[仅当 convert_engine='mineru'] 必填的API令牌。"
+ )
+ model_version: Literal["pipeline", "vlm"] = Field(
+ "vlm", description="[仅当 convert_engine='mineru'] Mineru Cloud模型的版本。"
+ )
+ formula_ocr: bool = Field(
+ True, description="[仅当 convert_engine='mineru' 或 'docling'] 是否对公式进行OCR识别。"
+ )
+
+ # -- For "docling" --
+ code_ocr: bool = Field(
+ True, description="[仅当 convert_engine='docling'] 是否对代码块进行OCR识别。"
+ )
+
+ # -- For "mineru_deploy" (Local Deployment) --
+ mineru_deploy_base_url: Optional[str] = Field(
+ "http://127.0.0.1:8000",
+ description="[仅当 convert_engine='mineru_deploy'] 本地部署的 MinerU 服务地址。",
+ )
+ mineru_deploy_backend: Literal["pipeline", "vlm"] = Field(
+ "pipeline",
+ description="[仅当 convert_engine='mineru_deploy'] 本地部署的 MinerU 服务使用的后端。",
+ )
+ mineru_deploy_formula_enable: bool = Field(
+ True,
+ description="[仅当 convert_engine='mineru_deploy'] 本地部署的服务是否启用公式解析。",
+ )
+ mineru_deploy_start_page_id: int = Field(
+ 0, description="[仅当 convert_engine='mineru_deploy'] 起始解析页面。"
+ )
+ mineru_deploy_end_page_id: int = Field(
+ 99999, description="[仅当 convert_engine='mineru_deploy'] 结束解析页面。"
+ )
+
+ @model_validator(mode="after")
+ def check_engine_params(self):
+ if self.convert_engine == "mineru" and not self.mineru_token:
raise ValueError(
"当 `convert_engine` 为 'mineru' 时,`mineru_token` 字段是必须的。"
)
- return v
+ if self.convert_engine == "mineru_deploy" and not self.mineru_deploy_base_url:
+ raise ValueError(
+ "当 `convert_engine` 为 'mineru_deploy' 时,`mineru_deploy_base_url` 字段是必须的。"
+ )
+ return self
class TextWorkflowParams(BaseWorkflowParams):
@@ -612,6 +643,21 @@ class TranslateServiceRequest(BaseModel):
"model_version": "vlm",
},
},
+ {
+ "file_name": "local_test.pdf",
+ "file_content": "JVBERi0xLjcKJeLjz9MKMSAwIG9iago8PC9...",
+ "payload": {
+ "workflow_type": "markdown_based",
+ "skip_translate": True,
+ "to_lang": "中文",
+ "convert_engine": "mineru_deploy",
+ "mineru_deploy_base_url": "http://127.0.0.1:8000",
+ "mineru_deploy_backend": "pipeline",
+ "mineru_deploy_formula_enable": True,
+ "mineru_deploy_start_page_id": 0,
+ "mineru_deploy_end_page_id": 5,
+ },
+ },
{
"file_name": "product_info.json",
"file_content": "ewogICAgImlkIjogIjEyMzQ1IiwK...",
@@ -874,6 +920,14 @@ async def _perform_translation(
formula_ocr=payload.formula_ocr,
model_version=payload.model_version,
)
+ elif payload.convert_engine == "mineru_deploy":
+ converter_config = ConverterMineruDeployConfig(
+ base_url=payload.mineru_deploy_base_url,
+ backend=payload.mineru_deploy_backend,
+ formula_enable=payload.mineru_deploy_formula_enable,
+ start_page_id=payload.mineru_deploy_start_page_id,
+ end_page_id=payload.mineru_deploy_end_page_id,
+ )
elif payload.convert_engine == "docling" and DOCLING_EXIST:
converter_config = ConverterDoclingConfig(
logger=task_logger,
@@ -1458,7 +1512,6 @@ async def _start_translation_task(
initial_log_msg = f"收到新的翻译请求: {original_filename}"
print(f"[{task_id}] {initial_log_msg}")
- log_history.append(initial_log_msg)
await log_queue.put(initial_log_msg)
try:
@@ -2093,7 +2146,7 @@ async def service_content(
"/engin-list", tags=["Application"], description="返回正在进行的可用的转换引擎"
)
async def service_get_engin_list():
- engin_list = ["mineru"]
+ engin_list = ["mineru", "mineru_deploy"]
if DOCLING_EXIST:
engin_list.append("docling")
return JSONResponse(content=engin_list)
diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json
index 7776e21..0a79abc 100644
--- a/docutranslate/static/i18nData.json
+++ b/docutranslate/static/i18nData.json
@@ -46,12 +46,9 @@
"jsonSettingsTitleText": "JSON路径配置",
"jsonPathLabel": "需要翻译的JSON路径",
"jsonPathPlaceholder": "每行一个路径, 例如:\n$.name\n$.*",
- "jsonPathHelp": "采用jsonpath-ng的路径选择语法,每一行表示一个json路径。 将翻译路径匹配对象内的所有字符串",
+ "jsonPathHelp": "采用jsonpath-ng的路径选择语法,每一行表示一个json路径。\n 将翻译路径匹配对象内的所有字符串",
"parsingSettingsTitleText": "解析配置",
"parsingEngineLabel": "解析引擎",
- "engineOptionIdentity": "已经是markdown格式",
- "engineOptionMineru": "Mineru (推荐)",
- "engineOptionDocling": "Docling (本地解析)",
"parsingEngineHelp": "如果上传的文件本身是.md格式,此项可不选。",
"getMineruTokenTitle": "获取Mineru Token",
"mineruTokenPlaceholder": "使用Mineru引擎时需要",
@@ -59,13 +56,20 @@
"modelVersionVlm": "VLM",
"modelVersionPipline": "Pipeline",
"modelVersionHelp": "mineru VLM是更新的内测模型。",
+ "mineruDeployBaseUrlLabel": "服务地址 (Base URL)",
+ "mineruDeployBaseUrlPlaceholder": "例如: http://127.0.0.1:8000",
+ "mineruDeployBackendLabel": "后端类型",
+ "mineruDeployBackendPipeline": "Pipeline",
+ "mineruDeployBackendVlm": "VLM",
+ "mineruDeployStartPageLabel": "起始页面",
+ "mineruDeployEndPageLabel": "结束页面",
+ "mineruDeployFormulaEnableLabel": "启用公式解析",
"formulaOcrLabel": "公式识别",
"codeOcrLabel": "代码识别",
"aiSettingsTitleText": "翻译模型",
"skipTranslationLabel": "跳过翻译",
"platformLabel": "选择平台",
"platformCustom": "自定义接口",
- "apiHrefInfo302ai": "👈 通过此链接注册可享1美元免费额度",
"baseUrlLabel": "API 地址 (Base URL)",
"baseUrlPlaceholder": "OpenAi兼容地址",
"getApiKeyTitle": "获取API Key",
@@ -101,7 +105,7 @@
"glossaryGenConfigCustom": "自定义",
"importConfigBtn": "导入配置",
"exportConfigBtn": "导出配置",
- "githubInfo": "GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate",
+ "githubInfo": "GitHub主页(欢迎star❤):
\n https://github.com/xunbu/docutranslate",
"qqGroupInfo": "交流QQ群: 1047781902",
"taskListTitle": "任务列表",
"newTaskBtn": "新建任务",
@@ -110,7 +114,7 @@
"taskCardIdPlaceholder": "等待提交...",
"taskCardFileDrop": "点击或拖拽文件到此处",
"taskCardFileSelected": "文件已选择",
- "taskCardFilenameLabel": "文件名:",
+ "taskCardFilenameLabel": "文件名: ",
"taskCardLogLabel": "日志",
"copyLogsTooltip": "复制日志",
"taskCardStatusWaiting": "等待上传文件...",
@@ -130,7 +134,7 @@
"closeBtn": "关闭",
"downloadBtn": "下载",
"tutorialModalTitle": "使用教程",
- "tutorialModalBody": "
视频教程可以在B站搜索 docutranslate 获取。
欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:
在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。
.txt 纯文本文件。.epub 电子书文件。.docx Word文档。.xlsx 或 .csv 电子表格文件。.srt 字幕文件。.ass 特效字幕文件。.json 文件中的特定字段。.html 网页文件。选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):
A. 工作流特定选项 (根据您第一步的选择出现):
minerU 引擎,需要在此处填入您的Token。\\N,EPUB格式常用 <br /> 作为换行分隔符)。$.*(翻译全部字符串),$..description(翻译所有键为description的值)。B. 通用选项 (适用于所有工作流):
在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。
文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。
翻译完成后,任务卡片下方会出现操作按钮:
视频教程可以在B站搜索 docutranslate 获取。
\n欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:
\n\n 在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。\n
.txt 纯文本文件。.epub 电子书文件。.docx Word文档。.xlsx 或 .csv 电子表格文件。.srt 字幕文件。.ass 特效字幕文件。.json 文件中的特定字段。.html 网页文件。\n 选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):
\nA. 工作流特定选项 (根据您第一步的选择出现):
\nminerU 引擎,需要在此处填入您的Token。\n \\N,EPUB格式常用 <br /> 作为换行分隔符)。\n $.*(翻译全部字符串),$..description(翻译所有键为description的值)。\n B. 通用选项 (适用于所有工作流):
\n在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。
\n文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。\n
\n翻译完成后,任务卡片下方会出现操作按钮:
\n正在加载预览...
", - "preview_cantReadOriginal": "无法读取原始文件内容。", - "preview_cantPreviewType": "无法预览此文件类型", - "preview_noOriginalCache": "没有缓存的原始文件用于预览。", - "preview_loadFailed": "\\n for newline.",
+ "separatorHelp": "Characters to separate original and translated text in append/prepend mode. \\n for new line.",
"docxSettingsTitleText": "DOCX Translation Options",
"insertModeHelpDocx": "Choose how to insert the translated text.",
"separatorPlaceholderStructured": "e.g., ---",
- "separatorHelpDocx": "In append mode, translation starts in a new paragraph. This adds extra content between paragraphs. \\n for internal newlines.",
+ "separatorHelpDocx": "In append mode, translation starts in a new paragraph. This adds extra content between paragraphs. \\n for new lines inside.",
"xlsxSettingsTitleText": "XLSX Translation Options",
"insertModeHelpXlsx": "Choose how to insert the translated text into cells.",
- "xlsxTranslateRegionsLabel": "Translate Regions (Optional)",
- "xlsxTranslateRegionsPlaceholder": "One region per line, e.g., Sheet1!A1:B10 (applies to all sheets if name is omitted)",
+ "xlsxTranslateRegionsLabel": "Translation Regions (Optional)",
+ "xlsxTranslateRegionsPlaceholder": "One region per line, e.g., Sheet1!A1:B10 (applies to all sheets if sheet name is omitted)",
"srtSettingsTitleText": "SRT Translation Options",
"insertModeHelpSrt": "Choose how to insert the translated text.",
"epubSettingsTitleText": "EPUB Translation Options",
"insertModeHelpEpub": "Choose how to insert the translated text.",
- "separatorHelpEpub": "In append mode, translation starts in a new block. This adds extra content between blocks. \\n for internal newlines.",
+ "separatorHelpEpub": "In append mode, translation starts in a new block. This adds extra content between blocks. \\n for new lines inside.",
"htmlSettingsTitleText": "HTML Translation Options",
"insertModeHelpHtml": "Choose how to insert the translated text.",
- "separatorHelpHtml": "In append mode, translation starts in a new block. This adds extra content between blocks. \\n for internal newlines.",
+ "separatorHelpHtml": "In append mode, translation starts in a new block. This adds extra content between blocks. \\n for new lines inside.",
"assSettingsTitleText": "ASS Translation Options",
"insertModeHelpAss": "Choose how to insert the translated text.",
"separatorPlaceholderAss": "e.g., \\N (newline)",
"separatorHelpAss": "Characters to separate original and translated text in append/prepend mode. \\N is the newline for ASS format.",
"jsonSettingsTitleText": "JSON Path Configuration",
"jsonPathLabel": "JSON Paths to Translate",
- "jsonPathPlaceholder": "One path per line, e.g.,\n$.name\n$.*",
- "jsonPathHelp": "Uses jsonpath-ng syntax. One JSON path per line. Translates all strings within matching objects.",
+ "jsonPathPlaceholder": "One path per line, e.g.:\n$.name\n$.*",
+ "jsonPathHelp": "Uses jsonpath-ng syntax. Each line is a JSON path. All strings within matching objects will be translated.",
"parsingSettingsTitleText": "Parsing Configuration",
"parsingEngineLabel": "Parsing Engine",
- "engineOptionIdentity": "Already in Markdown format",
- "engineOptionMineru": "Mineru (Recommended)",
- "engineOptionDocling": "Docling (Local Parsing)",
- "parsingEngineHelp": "Not required if the uploaded file is already in .md format.",
+ "parsingEngineHelp": "Not required if your uploaded file is already in .md format.",
"getMineruTokenTitle": "Get Mineru Token",
"mineruTokenPlaceholder": "Required when using Mineru engine",
"modelVersionLabel": "Mineru Model Version",
"modelVersionVlm": "VLM",
"modelVersionPipline": "Pipeline",
"modelVersionHelp": "Mineru VLM is a newer, internal test model.",
+ "mineruDeployBaseUrlLabel": "Service Address (Base URL)",
+ "mineruDeployBaseUrlPlaceholder": "e.g., http://127.0.0.1:8000",
+ "mineruDeployBackendLabel": "Backend Type",
+ "mineruDeployBackendPipeline": "Pipeline",
+ "mineruDeployBackendVlm": "VLM",
+ "mineruDeployStartPageLabel": "Start Page",
+ "mineruDeployEndPageLabel": "End Page",
+ "mineruDeployFormulaEnableLabel": "Enable Formula Parsing",
"formulaOcrLabel": "Formula Recognition",
"codeOcrLabel": "Code Recognition",
"aiSettingsTitleText": "Translation Model",
"skipTranslationLabel": "Skip Translation",
"platformLabel": "Select Platform",
"platformCustom": "Custom Endpoint",
- "apiHrefInfo302ai": "👈 Register through this link to enjoy a $1 free credit",
- "baseUrlLabel": "API Base URL",
- "baseUrlPlaceholder": "OpenAI-compatible URL",
+ "baseUrlLabel": "API Address (Base URL)",
+ "baseUrlPlaceholder": "OpenAI-compatible address",
"getApiKeyTitle": "Get API Key",
- "apiKeyPlaceholder": "Enter your API Key",
+ "apiKeyPlaceholder": "Please enter your API Key",
"modelIdLabel": "Model ID",
"modelIdPlaceholder": "e.g., gpt-4o, glm-4",
"systemProxyLabel": "Enable System Proxy",
@@ -253,12 +266,12 @@
"targetLanguageCustom": "Other (Custom)",
"customLangPlaceholder": "Enter target language, e.g., Italian",
"thinkingModeLabel": "Thinking Mode",
- "thinkingModeTooltip": "Configure whether the hybrid reasoning model should 'think'. Supported by Zhipu's glm4.5 series, Volcengine's seed1.6 series, SiliconFlow, Google's Gemini series, and some 302AI models. Disabling is recommended.",
+ "thinkingModeTooltip": "Sets whether the hybrid inference model should 'think'. Currently supports Zhipu's glm4.5 series, Volcengine's seed1.6 series, SiliconFlow platform, Google's Gemini series, and 302AI (partial). Disabling is recommended.",
"thinkingModeEnable": "Enable",
"thinkingModeDisable": "Disable (Recommended)",
"thinkingModeDefault": "Default",
"customPromptLabel": "Custom Prompt",
- "customPromptPlaceholder": "Optional, e.g., 'Keep proper names in the original language'",
+ "customPromptPlaceholder": "Optional, e.g., 'Keep names in their original language.'",
"chunkSizeLabel": "Chunk Size",
"resetBtn": "Reset",
"concurrentLabel": "Concurrency",
@@ -268,7 +281,7 @@
"glossaryHelp": "Select one or more CSV files. Files must have 'src' and 'dst' columns for source and destination terms.",
"viewGlossaryBtn": "View Glossary",
"clearGlossaryBtn": "Clear",
- "glossaryGenEnableLabel": "Auto-generate Glossary",
+ "glossaryGenEnableLabel": "Auto-generate glossary",
"glossaryCustomPromptLabel": "Custom Prompt",
"glossaryCustomPromptPlaceholder": "Prompt for glossary generation",
"glossaryGenConfigLabel": "Glossary Generation Config",
@@ -276,7 +289,7 @@
"glossaryGenConfigCustom": "Custom",
"importConfigBtn": "Import Config",
"exportConfigBtn": "Export Config",
- "githubInfo": "GitHub Homepage (Star us ❤): Video tutorials are available on Bilibili by searching for docutranslate.
Welcome to DocuTranslate! Follow these steps to translate your documents:
At the top of the left settings panel, choose the processing flow that best suits your file type.
.txt files..epub e-books..docx Word documents..xlsx or .csv spreadsheets..srt subtitle files..ass advanced subtitle files..json files..html web pages.After selecting a workflow, relevant options will appear below. Configure them sequentially (all settings are saved automatically in your browser):
A. Workflow-Specific Options (Appear based on your Step 1 choice):
minerU engine.\\N for ASS, <br /> for EPUB).$.* (all strings), $..description (all values for the key description).B. General Options (Apply to all workflows):
In the task list on the right, click or drag your document into the file upload area.
Once the file is selected, click the Start Translation button on the task card. The process will begin, and you can monitor real-time progress in the log area.
After translation is complete, action buttons will appear on the task card:
Video tutorials are available on Bilibili by searching for docutranslate.
Welcome to DocuTranslate! Follow these steps to translate your documents:
At the top of the left-side settings panel, choose the processing flow that best suits your file type.
.txt files..epub e-books..docx Word documents..xlsx or .csv spreadsheets..srt subtitle files..ass advanced subtitle files..json files..html web files.After selecting a workflow, relevant configuration options will appear below. Please complete the settings in order (all configurations are automatically saved in your browser):
A. Workflow-Specific Options (appear based on your choice in Step 1):
minerU engine, you must enter your token here.\\N is common for ASS, <br /> for EPUB).$.* (translates all strings), $..description (translates all values with the key description).B. General Options (apply to all workflows):
In the task list on the right, click or drag your document into the file upload area.
Once the file is selected, click the Start Translation button on the task card. The system will begin processing, and you can watch the real-time progress in the log area.
After the translation is complete, action buttons will appear on the task card:
Loading preview...
", - "preview_cantReadOriginal": "Could not read original file content.", - "preview_cantPreviewType": "Cannot preview this file type", - "preview_noOriginalCache": "No cached original file to preview.", - "preview_loadFailed": "当前没有任务,点击“新建任务”开始吧!
等待提交...
点击或拖拽文件到此处
当前没有任务,点击“新建任务”开始吧!
等待提交...
点击或拖拽文件到此处