diff --git a/docutranslate/app.py b/docutranslate/app.py index a90eea1..3d6ef63 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -337,7 +337,7 @@ class TranslateServiceRequest(BaseModel): ], ) file_content: str = Field( - ..., description="Base64编码的文件内容。", examples=["JVBERi0xLjQK..."] + ..., description="Base64编码的文件内容。", examples=["JVBERi0xLjcKJeLjz9MKMSAwIG9iago8PC9..."] ) payload: TranslatePayload = Field( ..., description="包含工作流类型和相应参数的载荷。" @@ -666,7 +666,7 @@ async def _perform_translation( "force_json", "rpm", "tpm", - "provider", # Added provider + "provider", }, exclude_none=True, ) @@ -688,7 +688,9 @@ async def _perform_translation( converter_config = ConverterMineruDeployConfig( base_url=payload.mineru_deploy_base_url, backend=payload.mineru_deploy_backend, + parse_method=payload.mineru_deploy_parse_method, formula_enable=payload.mineru_deploy_formula_enable, + table_enable=payload.mineru_deploy_table_enable, start_page_id=payload.mineru_deploy_start_page_id, end_page_id=payload.mineru_deploy_end_page_id, lang_list=payload.mineru_deploy_lang_list, @@ -2214,8 +2216,10 @@ async def service_flat_translate( formula_ocr: bool = Form(True, description="[PDF] 是否启用公式识别"), code_ocr: bool = Form(True, description="[Docling] 是否启用代码块识别"), mineru_deploy_base_url: str = Form("http://127.0.0.1:8000", description="[MinerU Local] 服务地址"), - mineru_deploy_backend: str = Form("VLM", description="[MinerU Local] 后端类型"), + mineru_deploy_backend: str = Form("hybrid-auto-engine", description="[MinerU Local] 后端类型: hybrid-auto-engine, pipeline 等"), + mineru_deploy_parse_method: str = Form("auto", description="[MinerU Local] 解析方法: auto, txt, ocr"), mineru_deploy_formula_enable: bool = Form(True, description="[MinerU Local] 是否启用公式"), + mineru_deploy_table_enable: bool = Form(True, description="[MinerU Local] 是否启用表格"), mineru_deploy_start_page_id: int = Form(0, description="[MinerU Local] 起始页码"), mineru_deploy_end_page_id: int = Form(99999, description="[MinerU Local] 结束页码"), mineru_deploy_lang_list: Optional[List[str]] = Form(None, description="[MinerU Local] 语言列表"), @@ -2317,7 +2321,9 @@ async def service_flat_translate( # --- MinerU 本地部署参数 --- "mineru_deploy_base_url": mineru_deploy_base_url, "mineru_deploy_backend": mineru_deploy_backend, + "mineru_deploy_parse_method": mineru_deploy_parse_method, "mineru_deploy_formula_enable": mineru_deploy_formula_enable, + "mineru_deploy_table_enable": mineru_deploy_table_enable, "mineru_deploy_start_page_id": mineru_deploy_start_page_id, "mineru_deploy_end_page_id": mineru_deploy_end_page_id, "mineru_deploy_lang_list": mineru_deploy_lang_list, @@ -2531,4 +2537,4 @@ def run_app(host=None, port: int | None = None, enable_CORS=False, if __name__ == "__main__": - run_app() + run_app() \ No newline at end of file diff --git a/docutranslate/converter/x2md/converter_mineru_deploy.py b/docutranslate/converter/x2md/converter_mineru_deploy.py index 407505a..189522b 100644 --- a/docutranslate/converter/x2md/converter_mineru_deploy.py +++ b/docutranslate/converter/x2md/converter_mineru_deploy.py @@ -2,7 +2,7 @@ # SPDX-License-Identifier: MPL-2.0 import asyncio from dataclasses import dataclass -from typing import Literal, Hashable +from typing import Literal, Hashable, List import httpx @@ -16,24 +16,42 @@ from docutranslate.utils.markdown_utils import embed_inline_image_from_zip @dataclass(kw_only=True) class ConverterMineruDeployConfig(X2MarkdownConverterConfig): base_url: str = "http://127.0.0.1:8000" - output_dir: str = "./output" # 覆盖默认值 ./output - lang_list: list[Literal["ch", "ch_server", "ch_lite", "en", "korean", "japan", "chinese_cht", "ta", "te", "ka", "th", "el", "latin", "arabic", "east_slavic", "cyrillic", "devanagari"]] | None = None - backend: Literal["pipeline", "vlm-transformers", "vlm-mlx-engine", "vlm-vllm-async-engine", "vlm-lmdeploy-engine","vlm-http-client"] = "pipeline" - # parse_method: str = "auto" + output_dir: str = "./output" + # 支持的语言列表 (来自 MinerU API) + lang_list: List[str] | None = None # 默认值在 API 侧处理,这里 None 即可 + + # 后端引擎选项 (更新适配最新的 MinerU API) + backend: Literal[ + "pipeline", + "vlm-auto-engine", + "vlm-http-client", + "hybrid-auto-engine", + "hybrid-http-client" + ] = "hybrid-auto-engine" + + parse_method: Literal["auto", "txt", "ocr"] = "auto" formula_enable: bool = True - # table_enable: bool = True - server_url: str | None = None #(Adapted only for vlm-http-client backend)openai compatible server url, e.g., http://127.0.0.1:30000 - # return_md: bool = True - # return_middle_json: bool = True - # return_model_output: bool = False - # return_content_list: bool = False - # return_images: bool = True - # response_format_zip: bool = True + table_enable: bool = True + + # 用于 vlm-http-client 或 hybrid-http-client 后端 + server_url: str | None = None + + # 返回选项 + return_md: bool = True + return_middle_json: bool = False + return_model_output: bool = False + return_content_list: bool = False + return_images: bool = True + response_format_zip: bool = True + + # 页面范围 start_page_id: int = 0 end_page_id: int = 99999 - def gethash(self) ->Hashable: - return (self.backend,self.formula_enable,self.start_page_id,self.end_page_id) + def gethash(self) -> Hashable: + return (self.backend, self.formula_enable, self.table_enable, + self.parse_method, self.start_page_id, self.end_page_id) + # 配置HTTP客户端 timeout = httpx.Timeout( @@ -57,27 +75,37 @@ class ConverterMineruDeploy(X2MarkdownConverter): self._api_url = f"{self.base_url}/file_parse" - def _build_form_data(self)->dict: + def _build_form_data(self) -> dict: + # httpx 在处理 data 参数时,如果值为 list,会自动展开为多个同名 key (例如 lang_list=ch&lang_list=en) + # 这符合 FastAPI/Starlette 对 List 字段的解析要求 data = { "output_dir": self.config.output_dir, "backend": self.config.backend, - "parse_method": "auto", - "formula_enable": self.config.formula_enable, - "table_enable": True, - "server_url": None, - "return_md": True, - "return_middle_json": True, - "return_model_output": False, - "return_content_list": False, - "return_images": True, - "response_format_zip": True, + "parse_method": self.config.parse_method, + # bool 类型在 multipart/form-data 中通常需要转为字符串 'true'/'false',但 httpx 会处理 python bool + "formula_enable": str(self.config.formula_enable).lower(), + "table_enable": str(self.config.table_enable).lower(), + "return_md": str(self.config.return_md).lower(), + "return_middle_json": str(self.config.return_middle_json).lower(), + "return_model_output": str(self.config.return_model_output).lower(), + "return_content_list": str(self.config.return_content_list).lower(), + "return_images": str(self.config.return_images).lower(), + "response_format_zip": str(self.config.response_format_zip).lower(), "start_page_id": self.config.start_page_id, "end_page_id": self.config.end_page_id } + + if self.config.lang_list: + data["lang_list"] = self.config.lang_list + else: + data["lang_list"] = ["ch"] # 默认值 + + if self.config.server_url: + data["server_url"] = self.config.server_url + return data - - def convert(self,d:Document)->MarkdownDocument: + def convert(self, d: Document) -> MarkdownDocument: self.logger.info("开始解析文件") files = [("files", (d.name, d.content, "application/octet-stream"))] response = client.post( @@ -88,15 +116,15 @@ class ConverterMineruDeploy(X2MarkdownConverter): ) response.raise_for_status() # 检查是否有错误 - md=embed_inline_image_from_zip(response.content,None) + # Mineru API 返回 zip 时包含图片和 md + md = embed_inline_image_from_zip(response.content, None) self.logger.info("已转化为markdown") - return MarkdownDocument.from_bytes(md.encode(),suffix=".md",stem=d.stem) - + return MarkdownDocument.from_bytes(md.encode(), suffix=".md", stem=d.stem) async def convert_async(self, d: Document) -> MarkdownDocument: self.logger.info("开始解析文件") files = [("files", (d.name, d.content, "application/octet-stream"))] - response =await client_async.post( + response = await client_async.post( self._api_url, files=files, data=self._build_form_data(), @@ -104,15 +132,9 @@ class ConverterMineruDeploy(X2MarkdownConverter): ) response.raise_for_status() - md = await asyncio.to_thread(embed_inline_image_from_zip,response.content, None) + md = await asyncio.to_thread(embed_inline_image_from_zip, response.content, None) self.logger.info("已转化为markdown") return MarkdownDocument.from_bytes(md.encode(), suffix=".md", stem=d.stem) def support_format(self) -> list[str]: - return [".pdf", ".doc", ".docx", ".ppt", ".pptx", ".png", ".jpg", ".jpeg"] - -if __name__ == '__main__': - d = Document.from_path(r"C:\Users\jxgm\Desktop\testfiles\table.pdf") - config=ConverterMineruDeployConfig() - converter = ConverterMineruDeploy(config=config) - converter.convert(d) + return [".pdf", ".doc", ".docx", ".ppt", ".pptx", ".png", ".jpg", ".jpeg"] \ No newline at end of file diff --git a/docutranslate/core/schemas.py b/docutranslate/core/schemas.py index 0a9c71e..e734513 100644 --- a/docutranslate/core/schemas.py +++ b/docutranslate/core/schemas.py @@ -252,17 +252,27 @@ class MarkdownWorkflowParams(BaseWorkflowParams): "http://127.0.0.1:8000", description="[仅当 convert_engine='mineru_deploy'] 本地部署的 MinerU 服务地址。", ) + # --- UPDATED BACKEND LIST --- mineru_deploy_backend: Literal[ "pipeline", - "vlm-transformers", - "vlm-mlx-engine", - "vlm-vllm-async-engine", - "vlm-lmdeploy-engine", + "vlm-auto-engine", "vlm-http-client", + "hybrid-auto-engine", + "hybrid-http-client" ] = Field( - "pipeline", + "hybrid-auto-engine", description="[仅当 convert_engine='mineru_deploy'] 本地部署的 MinerU 服务使用的后端。", ) + # --- NEW PARAMETERS START --- + mineru_deploy_parse_method: Literal["auto", "txt", "ocr"] = Field( + "auto", + description="[仅当 convert_engine='mineru_deploy'] 解析方法: auto, txt, ocr" + ) + mineru_deploy_table_enable: bool = Field( + True, + description="[仅当 convert_engine='mineru_deploy'] 本地部署的服务是否启用表格解析。", + ) + # --- NEW PARAMETERS END --- mineru_deploy_formula_enable: bool = Field( True, description="[仅当 convert_engine='mineru_deploy'] 本地部署的服务是否启用公式解析。", @@ -275,13 +285,13 @@ class MarkdownWorkflowParams(BaseWorkflowParams): ) mineru_deploy_lang_list: Optional[List[str]] = Field( None, - description="[仅当 convert_engine='mineru_deploy' 且 backend='pipeline'] 语言列表。", - examples=[None], + description="[仅当 convert_engine='mineru_deploy'] 语言列表, 默认 ['ch']。", + examples=[["ch", "en"]], ) # 修改: 默认值改为 "" mineru_deploy_server_url: Optional[str] = Field( default="", - description="[仅当 convert_engine='mineru_deploy' 且 backend='vlm-http-client'] Server URL.", + description="[仅当 convert_engine='mineru_deploy' 且 backend为http-client相关时] Server URL.", ) @model_validator(mode="after") @@ -312,10 +322,6 @@ class TextWorkflowParams(BaseWorkflowParams): "\n", description="当 insert_mode 为 'append' 或 'prepend' 时,用于分隔原文和译文的分隔符。", ) - segment_mode: Literal["line", "paragraph", "none"] = Field( - "line", - description="分段模式。'line':按行分段(每行独立翻译),'paragraph':按段落分段(连续非空行合并为段落),'none':不分段(全文视为一个段落)。", - ) class JsonWorkflowParams(BaseWorkflowParams): diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json index 6868b46..d0f6714 100644 --- a/docutranslate/static/i18nData.json +++ b/docutranslate/static/i18nData.json @@ -65,6 +65,8 @@ "mineruDeployLangListLabel": "语言列表 (Pipeline模式)", "mineruDeployServerUrlLabel": "Server URL", "mineruDeployServerUrlPlaceholder": "例如: http://127.0.0.1:30000", + "mineruDeployParseMethodLabel": "解析方法 (Parse Method)", + "mineruDeployTableEnableLabel": "表格识别 (Table Recognition)", "mineruDeployStartPageLabel": "起始页码", "mineruDeployEndPageLabel": "结束页码", "mineruDeployFormulaEnableLabel": "启用公式解析", @@ -137,7 +139,7 @@ "previewTranslated": "译文", "closeBtn": "关闭", "tutorialModalTitle": "使用教程", - "tutorialModalBody": "
视频教程可以在B站搜索 docutranslate 获取。
欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:
在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。
.txt 纯文本文件。.epub 电子书文件。.docx Word文档。.xlsx 或 .csv 电子表格文件。.pptx 幻灯片文件。.srt 字幕文件。.ass 特效字幕文件。.json 文件中的特定字段。.html 网页文件。选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):
A. 工作流特定选项 (根据您第一步的选择出现):
minerU 引擎,需要在此处填入您的Token。\\N,EPUB格式常用 <br /> 作为换行分隔符)。$.*(翻译全部字符串),$..description(翻译所有键为description的值)。B. 通用选项 (适用于所有工作流):
在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。
文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。
翻译完成后,任务卡片下方会出现操作按钮:
视频教程可以在B站搜索 docutranslate 获取。
欢迎使用 DocuTranslate!请按照以下步骤完成文档翻译:
在左侧配置面板的顶部,首先选择最适合您文件类型的处理流程。
.txt 纯文本文件。.epub 电子书文件。.docx Word文档。.xlsx 或 .csv 电子表格文件。.pptx 幻灯片文件。.srt 字幕文件。.ass 特效字幕文件。.json 文件中的特定字段。.html 网页文件。选择工作流后,下方会显示相关的配置选项。请依次完成设置(所有配置都会自动保存在您的浏览器中):
A. 工作流特定选项 (根据您第一步的选择出现):
minerU 引擎,需要在此处填入您的Token。\\N,EPUB格式常用 <br /> 作为换行分隔符)。$.*(翻译全部字符串),$..description(翻译所有键为description的值)。B. 通用选项 (适用于所有工作流):
在右侧的任务列表中,点击或拖拽您的文档到文件上传区域。
文件选择成功后,点击任务卡片右下角的 开始翻译 按钮。系统将开始处理任务,您可以在日志区域查看实时进度。
翻译完成后,任务卡片下方会出现操作按钮:
GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate
交流QQ群: 1047781902
version:{{ version ? 'v' + version : '' }}
{{ t('noTaskPlaceholder') }}
{{ task.backendId || t('taskCardIdPlaceholder') }}
{{ t('taskCardFileDrop') }}
{{ t('taskCardFileSelected') }}
GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate
交流QQ群: 1047781902
version:{{ version ? 'v' + version : '' }}
{{ t('noTaskPlaceholder') }}
{{ task.backendId || t('taskCardIdPlaceholder') }}
{{ t('taskCardFileDrop') }}
{{ t('taskCardFileSelected') }}