diff --git a/docutranslate/app.py b/docutranslate/app.py index a90eea1..3d6ef63 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -337,7 +337,7 @@ class TranslateServiceRequest(BaseModel): ], ) file_content: str = Field( - ..., description="Base64编码的文件内容。", examples=["JVBERi0xLjQK..."] + ..., description="Base64编码的文件内容。", examples=["JVBERi0xLjcKJeLjz9MKMSAwIG9iago8PC9..."] ) payload: TranslatePayload = Field( ..., description="包含工作流类型和相应参数的载荷。" @@ -666,7 +666,7 @@ async def _perform_translation( "force_json", "rpm", "tpm", - "provider", # Added provider + "provider", }, exclude_none=True, ) @@ -688,7 +688,9 @@ async def _perform_translation( converter_config = ConverterMineruDeployConfig( base_url=payload.mineru_deploy_base_url, backend=payload.mineru_deploy_backend, + parse_method=payload.mineru_deploy_parse_method, formula_enable=payload.mineru_deploy_formula_enable, + table_enable=payload.mineru_deploy_table_enable, start_page_id=payload.mineru_deploy_start_page_id, end_page_id=payload.mineru_deploy_end_page_id, lang_list=payload.mineru_deploy_lang_list, @@ -2214,8 +2216,10 @@ async def service_flat_translate( formula_ocr: bool = Form(True, description="[PDF] 是否启用公式识别"), code_ocr: bool = Form(True, description="[Docling] 是否启用代码块识别"), mineru_deploy_base_url: str = Form("http://127.0.0.1:8000", description="[MinerU Local] 服务地址"), - mineru_deploy_backend: str = Form("VLM", description="[MinerU Local] 后端类型"), + mineru_deploy_backend: str = Form("hybrid-auto-engine", description="[MinerU Local] 后端类型: hybrid-auto-engine, pipeline 等"), + mineru_deploy_parse_method: str = Form("auto", description="[MinerU Local] 解析方法: auto, txt, ocr"), mineru_deploy_formula_enable: bool = Form(True, description="[MinerU Local] 是否启用公式"), + mineru_deploy_table_enable: bool = Form(True, description="[MinerU Local] 是否启用表格"), mineru_deploy_start_page_id: int = Form(0, description="[MinerU Local] 起始页码"), mineru_deploy_end_page_id: int = Form(99999, description="[MinerU Local] 结束页码"), mineru_deploy_lang_list: Optional[List[str]] = Form(None, description="[MinerU Local] 语言列表"), @@ -2317,7 +2321,9 @@ async def service_flat_translate( # --- MinerU 本地部署参数 --- "mineru_deploy_base_url": mineru_deploy_base_url, "mineru_deploy_backend": mineru_deploy_backend, + "mineru_deploy_parse_method": mineru_deploy_parse_method, "mineru_deploy_formula_enable": mineru_deploy_formula_enable, + "mineru_deploy_table_enable": mineru_deploy_table_enable, "mineru_deploy_start_page_id": mineru_deploy_start_page_id, "mineru_deploy_end_page_id": mineru_deploy_end_page_id, "mineru_deploy_lang_list": mineru_deploy_lang_list, @@ -2531,4 +2537,4 @@ def run_app(host=None, port: int | None = None, enable_CORS=False, if __name__ == "__main__": - run_app() + run_app() \ No newline at end of file diff --git a/docutranslate/converter/x2md/converter_mineru_deploy.py b/docutranslate/converter/x2md/converter_mineru_deploy.py index 407505a..189522b 100644 --- a/docutranslate/converter/x2md/converter_mineru_deploy.py +++ b/docutranslate/converter/x2md/converter_mineru_deploy.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: MPL-2.0 import asyncio from dataclasses import dataclass -from typing import Literal, Hashable +from typing import Literal, Hashable, List import httpx @@ -16,24 +16,42 @@ from docutranslate.utils.markdown_utils import embed_inline_image_from_zip @dataclass(kw_only=True) class ConverterMineruDeployConfig(X2MarkdownConverterConfig): base_url: str = "http://127.0.0.1:8000" - output_dir: str = "./output" # 覆盖默认值 ./output - lang_list: list[Literal["ch", "ch_server", "ch_lite", "en", "korean", "japan", "chinese_cht", "ta", "te", "ka", "th", "el", "latin", "arabic", "east_slavic", "cyrillic", "devanagari"]] | None = None - backend: Literal["pipeline", "vlm-transformers", "vlm-mlx-engine", "vlm-vllm-async-engine", "vlm-lmdeploy-engine","vlm-http-client"] = "pipeline" - # parse_method: str = "auto" + output_dir: str = "./output" + # 支持的语言列表 (来自 MinerU API) + lang_list: List[str] | None = None # 默认值在 API 侧处理,这里 None 即可 + + # 后端引擎选项 (更新适配最新的 MinerU API) + backend: Literal[ + "pipeline", + "vlm-auto-engine", + "vlm-http-client", + "hybrid-auto-engine", + "hybrid-http-client" + ] = "hybrid-auto-engine" + + parse_method: Literal["auto", "txt", "ocr"] = "auto" formula_enable: bool = True - # table_enable: bool = True - server_url: str | None = None #(Adapted only for vlm-http-client backend)openai compatible server url, e.g., http://127.0.0.1:30000 - # return_md: bool = True - # return_middle_json: bool = True - # return_model_output: bool = False - # return_content_list: bool = False - # return_images: bool = True - # response_format_zip: bool = True + table_enable: bool = True + + # 用于 vlm-http-client 或 hybrid-http-client 后端 + server_url: str | None = None + + # 返回选项 + return_md: bool = True + return_middle_json: bool = False + return_model_output: bool = False + return_content_list: bool = False + return_images: bool = True + response_format_zip: bool = True + + # 页面范围 start_page_id: int = 0 end_page_id: int = 99999 - def gethash(self) ->Hashable: - return (self.backend,self.formula_enable,self.start_page_id,self.end_page_id) + def gethash(self) -> Hashable: + return (self.backend, self.formula_enable, self.table_enable, + self.parse_method, self.start_page_id, self.end_page_id) + # 配置HTTP客户端 timeout = httpx.Timeout( @@ -57,27 +75,37 @@ class ConverterMineruDeploy(X2MarkdownConverter): self._api_url = f"{self.base_url}/file_parse" - def _build_form_data(self)->dict: + def _build_form_data(self) -> dict: + # httpx 在处理 data 参数时,如果值为 list,会自动展开为多个同名 key (例如 lang_list=ch&lang_list=en) + # 这符合 FastAPI/Starlette 对 List 字段的解析要求 data = { "output_dir": self.config.output_dir, "backend": self.config.backend, - "parse_method": "auto", - "formula_enable": self.config.formula_enable, - "table_enable": True, - "server_url": None, - "return_md": True, - "return_middle_json": True, - "return_model_output": False, - "return_content_list": False, - "return_images": True, - "response_format_zip": True, + "parse_method": self.config.parse_method, + # bool 类型在 multipart/form-data 中通常需要转为字符串 'true'/'false',但 httpx 会处理 python bool + "formula_enable": str(self.config.formula_enable).lower(), + "table_enable": str(self.config.table_enable).lower(), + "return_md": str(self.config.return_md).lower(), + "return_middle_json": str(self.config.return_middle_json).lower(), + "return_model_output": str(self.config.return_model_output).lower(), + "return_content_list": str(self.config.return_content_list).lower(), + "return_images": str(self.config.return_images).lower(), + "response_format_zip": str(self.config.response_format_zip).lower(), "start_page_id": self.config.start_page_id, "end_page_id": self.config.end_page_id } + + if self.config.lang_list: + data["lang_list"] = self.config.lang_list + else: + data["lang_list"] = ["ch"] # 默认值 + + if self.config.server_url: + data["server_url"] = self.config.server_url + return data - - def convert(self,d:Document)->MarkdownDocument: + def convert(self, d: Document) -> MarkdownDocument: self.logger.info("开始解析文件") files = [("files", (d.name, d.content, "application/octet-stream"))] response = client.post( @@ -88,15 +116,15 @@ class ConverterMineruDeploy(X2MarkdownConverter): ) response.raise_for_status() # 检查是否有错误 - md=embed_inline_image_from_zip(response.content,None) + # Mineru API 返回 zip 时包含图片和 md + md = embed_inline_image_from_zip(response.content, None) self.logger.info("已转化为markdown") - return MarkdownDocument.from_bytes(md.encode(),suffix=".md",stem=d.stem) - + return MarkdownDocument.from_bytes(md.encode(), suffix=".md", stem=d.stem) async def convert_async(self, d: Document) -> MarkdownDocument: self.logger.info("开始解析文件") files = [("files", (d.name, d.content, "application/octet-stream"))] - response =await client_async.post( + response = await client_async.post( self._api_url, files=files, data=self._build_form_data(), @@ -104,15 +132,9 @@ class ConverterMineruDeploy(X2MarkdownConverter): ) response.raise_for_status() - md = await asyncio.to_thread(embed_inline_image_from_zip,response.content, None) + md = await asyncio.to_thread(embed_inline_image_from_zip, response.content, None) self.logger.info("已转化为markdown") return MarkdownDocument.from_bytes(md.encode(), suffix=".md", stem=d.stem) def support_format(self) -> list[str]: - return [".pdf", ".doc", ".docx", ".ppt", ".pptx", ".png", ".jpg", ".jpeg"] - -if __name__ == '__main__': - d = Document.from_path(r"C:\Users\jxgm\Desktop\testfiles\table.pdf") - config=ConverterMineruDeployConfig() - converter = ConverterMineruDeploy(config=config) - converter.convert(d) + return [".pdf", ".doc", ".docx", ".ppt", ".pptx", ".png", ".jpg", ".jpeg"] \ No newline at end of file diff --git a/docutranslate/core/schemas.py b/docutranslate/core/schemas.py index 0a9c71e..e734513 100644 --- a/docutranslate/core/schemas.py +++ b/docutranslate/core/schemas.py @@ -252,17 +252,27 @@ class MarkdownWorkflowParams(BaseWorkflowParams): "http://127.0.0.1:8000", description="[仅当 convert_engine='mineru_deploy'] 本地部署的 MinerU 服务地址。", ) + # --- UPDATED BACKEND LIST --- mineru_deploy_backend: Literal[ "pipeline", - "vlm-transformers", - "vlm-mlx-engine", - "vlm-vllm-async-engine", - "vlm-lmdeploy-engine", + "vlm-auto-engine", "vlm-http-client", + "hybrid-auto-engine", + "hybrid-http-client" ] = Field( - "pipeline", + "hybrid-auto-engine", description="[仅当 convert_engine='mineru_deploy'] 本地部署的 MinerU 服务使用的后端。", ) + # --- NEW PARAMETERS START --- + mineru_deploy_parse_method: Literal["auto", "txt", "ocr"] = Field( + "auto", + description="[仅当 convert_engine='mineru_deploy'] 解析方法: auto, txt, ocr" + ) + mineru_deploy_table_enable: bool = Field( + True, + description="[仅当 convert_engine='mineru_deploy'] 本地部署的服务是否启用表格解析。", + ) + # --- NEW PARAMETERS END --- mineru_deploy_formula_enable: bool = Field( True, description="[仅当 convert_engine='mineru_deploy'] 本地部署的服务是否启用公式解析。", @@ -275,13 +285,13 @@ class MarkdownWorkflowParams(BaseWorkflowParams): ) mineru_deploy_lang_list: Optional[List[str]] = Field( None, - description="[仅当 convert_engine='mineru_deploy' 且 backend='pipeline'] 语言列表。", - examples=[None], + description="[仅当 convert_engine='mineru_deploy'] 语言列表, 默认 ['ch']。", + examples=[["ch", "en"]], ) # 修改: 默认值改为 "" mineru_deploy_server_url: Optional[str] = Field( default="", - description="[仅当 convert_engine='mineru_deploy' 且 backend='vlm-http-client'] Server URL.", + description="[仅当 convert_engine='mineru_deploy' 且 backend为http-client相关时] Server URL.", ) @model_validator(mode="after") @@ -312,10 +322,6 @@ class TextWorkflowParams(BaseWorkflowParams): "\n", description="当 insert_mode 为 'append' 或 'prepend' 时,用于分隔原文和译文的分隔符。", ) - segment_mode: Literal["line", "paragraph", "none"] = Field( - "line", - description="分段模式。'line':按行分段(每行独立翻译),'paragraph':按段落分段(连续非空行合并为段落),'none':不分段(全文视为一个段落)。", - ) class JsonWorkflowParams(BaseWorkflowParams): diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json index 6868b46..d0f6714 100644 --- a/docutranslate/static/i18nData.json +++ b/docutranslate/static/i18nData.json @@ -65,6 +65,8 @@ "mineruDeployLangListLabel": "语言列表 (Pipeline模式)", "mineruDeployServerUrlLabel": "Server URL", "mineruDeployServerUrlPlaceholder": "例如: http://127.0.0.1:30000", + "mineruDeployParseMethodLabel": "解析方法 (Parse Method)", + "mineruDeployTableEnableLabel": "表格识别 (Table Recognition)", "mineruDeployStartPageLabel": "起始页码", "mineruDeployEndPageLabel": "结束页码", "mineruDeployFormulaEnableLabel": "启用公式解析", @@ -137,7 +139,7 @@ "previewTranslated": "译文", "closeBtn": "关闭", "tutorialModalTitle": "使用教程", - "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 第一步:选择工作流

    在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。

    提示: 默认已开启“自动选择工作流”。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。
  2. 第二步:配置参数

    选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):

    A. 工作流特定选项 (根据您第一步的选择出现):

    B. 通用选项 (适用于所有工作流):

  3. 第三步:上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 第四步:开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 第五步:查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

重要提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。您也可以使用新增的“导出配置”和“导入配置”按钮来备份和恢复您的设置。
", + "tutorialModalBody": "

视频教程可以在B站搜索 docutranslate 获取。

欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:

  1. 第一步:选择工作流

    在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。

    提示: 默认已开启“自动选择工作流”。您只需上传文件,系统会自动为您匹配合适的工作流,简化操作。
  2. 第二步:配置参数

    选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):

    A. 工作流特定选项 (根据您第一步的选择出现):

    B. 通用选项 (适用于所有工作流):

  3. 第三步:上传文件

    在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。

  4. 第四步:开始翻译

    文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。

  5. 第五步:查看与下载

    翻译完成后,任务卡片下方会出现操作按钮:

重要提示: 所有配置都会自动保存在您的浏览器本地,方便下次使用。您也可以使用新增的“导出配置”和“导入配置”按钮来备份和恢复您的设置。
", "tutorialUnderstandBtn": "我明白了", "contributorsModalTitle": "感谢贡献", "contributorsPara1": "DocuTranslate是一个开源项目!大家的需求与使用是项目进步的动力。", @@ -236,6 +238,8 @@ "mineruDeployLangListLabel": "Language List (Pipeline Mode)", "mineruDeployServerUrlLabel": "Server URL", "mineruDeployServerUrlPlaceholder": "e.g., http://127.0.0.1:30000", + "mineruDeployParseMethodLabel": "Parse Method", + "mineruDeployTableEnableLabel": "Table Recognition", "mineruDeployStartPageLabel": "Start Page", "mineruDeployEndPageLabel": "End Page", "mineruDeployFormulaEnableLabel": "Enable Formula Parsing", diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index 2203427..65e45b4 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

{{ t(currentWorkflowConfig.insertHelpKey || 'insertModeHelpTxt') }}
{{ t('segmentModeHelp') }}

{{ t('parsingEngineHelp') }}
{{ t('modelVersionHelp') }}

{{ t('glossaryHelp') }}

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:{{ version ? 'v' + version : '' }}

{{ t('taskListTitle') }}

LOGO

{{ t('noTaskPlaceholder') }}

{{ t('taskCardIdLabel') }}: {{ task.backendId || t('taskCardIdPlaceholder') }}

{{ t('taskCardFileDrop') }}

{{ t('taskCardFileSelected') }}

{{ t('taskCardFilenameLabel') }} {{ task.fileName || task.file.name }}
{{ t('taskCardLogLabel') }}
{{ task.statusMessage || t('taskCardStatusWaiting') }}
{{ previewMode === 'bilingual' ? t('preview_bilingual') : t('preview_translatedOnly') }}
{{ t('previewOriginal') }}
{{ t('previewTranslated') }}
+ DocuTranslate - 交互式文档翻译

DocuTranslate

{{ t(currentWorkflowConfig.insertHelpKey || 'insertModeHelpTxt') }}
{{ t('segmentModeHelp') }}

{{ t('parsingEngineHelp') }}
{{ t('modelVersionHelp') }}

{{ t('glossaryHelp') }}

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

version:{{ version ? 'v' + version : '' }}

{{ t('taskListTitle') }}

LOGO

{{ t('noTaskPlaceholder') }}

{{ t('taskCardIdLabel') }}: {{ task.backendId || t('taskCardIdPlaceholder') }}

{{ t('taskCardFileDrop') }}

{{ t('taskCardFileSelected') }}

{{ t('taskCardFilenameLabel') }} {{ task.fileName || task.file.name }}
{{ t('taskCardLogLabel') }}
{{ task.statusMessage || t('taskCardStatusWaiting') }}
{{ previewMode === 'bilingual' ? t('preview_bilingual') : t('preview_translatedOnly') }}
{{ t('previewOriginal') }}
{{ t('previewTranslated') }}
\ No newline at end of file