支持mineru部署服务

This commit is contained in:
xunbu
2025-11-04 18:03:58 +08:00
parent 34ddb0b94c
commit e2bb898c54
3 changed files with 177 additions and 106 deletions

View File

@@ -37,7 +37,6 @@ from fastapi.staticfiles import StaticFiles
from pydantic import ( from pydantic import (
BaseModel, BaseModel,
Field, Field,
field_validator,
model_validator, model_validator,
AliasChoices, AliasChoices,
ConfigDict, ConfigDict,
@@ -46,7 +45,6 @@ from pydantic import (
from docutranslate import __version__ from docutranslate import __version__
from docutranslate.agents.agent import ThinkingMode from docutranslate.agents.agent import ThinkingMode
from docutranslate.agents.glossary_agent import GlossaryAgentConfig from docutranslate.agents.glossary_agent import GlossaryAgentConfig
from docutranslate.exporter.md.types import ConvertEngineType
# --- 核心代码 Imports --- # --- 核心代码 Imports ---
from docutranslate.global_values.conditional_import import DOCLING_EXIST from docutranslate.global_values.conditional_import import DOCLING_EXIST
@@ -78,6 +76,9 @@ from docutranslate.workflow.xlsx_workflow import XlsxWorkflow, XlsxWorkflowConfi
if DOCLING_EXIST or TYPE_CHECKING: if DOCLING_EXIST or TYPE_CHECKING:
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig
# --- 新增的 Import ---
from docutranslate.converter.x2md.converter_mineru_deploy import ConverterMineruDeployConfig
# ----------------------
from docutranslate.exporter.md.md2html_exporter import MD2HTMLExporterConfig from docutranslate.exporter.md.md2html_exporter import MD2HTMLExporterConfig
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig
from docutranslate.translator.ai_translator.md_translator import MDTranslatorConfig from docutranslate.translator.ai_translator.md_translator import MDTranslatorConfig
@@ -399,31 +400,61 @@ class MarkdownWorkflowParams(BaseWorkflowParams):
workflow_type: Literal["markdown_based"] = Field( workflow_type: Literal["markdown_based"] = Field(
..., description="指定使用基于Markdown的翻译工作流。" ..., description="指定使用基于Markdown的翻译工作流。"
) )
convert_engine: ConvertEngineType = Field( convert_engine: Literal["identity", "mineru", "docling", "mineru_deploy"] = Field(
"identity", "identity",
description="选择将文件解析为markdown的引擎。如果输入文件是.md此项可为`null`或不传。", description="选择将文件解析为markdown的引擎。'mineru_deploy' 适用于本地部署的 MinerU 服务。如果输入文件是.md此项可为`identity`或不传。",
examples=["identity", "mineru", "docling"], examples=["identity", "mineru", "docling", "mineru_deploy"],
)
mineru_token: Optional[str] = Field(
None, description="当 `convert_engine` 为 'mineru' 时必填的API令牌。"
)
formula_ocr: bool = Field(
True, description="是否对公式进行OCR识别。对 `mineru` 和 `docling` 均有效。"
)
code_ocr: bool = Field(
True, description="是否对代码块进行OCR识别。仅 `docling` 引擎有效。"
)
model_version: Literal["pipeline", "vlm"] = Field(
"vlm", description="Mineru模型的版本'vlm'是更新的版本。仅 `mineru` 引擎有效。"
) )
@field_validator("mineru_token") # --- Engine-Specific Parameters ---
def check_mineru_token(cls, v, values):
if values.data.get("convert_engine") == "mineru" and not v: # -- For "mineru" (Cloud API) --
mineru_token: Optional[str] = Field(
None, description="[仅当 convert_engine='mineru'] 必填的API令牌。"
)
model_version: Literal["pipeline", "vlm"] = Field(
"vlm", description="[仅当 convert_engine='mineru'] Mineru Cloud模型的版本。"
)
formula_ocr: bool = Field(
True, description="[仅当 convert_engine='mineru''docling'] 是否对公式进行OCR识别。"
)
# -- For "docling" --
code_ocr: bool = Field(
True, description="[仅当 convert_engine='docling'] 是否对代码块进行OCR识别。"
)
# -- For "mineru_deploy" (Local Deployment) --
mineru_deploy_base_url: Optional[str] = Field(
"http://127.0.0.1:8000",
description="[仅当 convert_engine='mineru_deploy'] 本地部署的 MinerU 服务地址。",
)
mineru_deploy_backend: Literal["pipeline", "vlm"] = Field(
"pipeline",
description="[仅当 convert_engine='mineru_deploy'] 本地部署的 MinerU 服务使用的后端。",
)
mineru_deploy_formula_enable: bool = Field(
True,
description="[仅当 convert_engine='mineru_deploy'] 本地部署的服务是否启用公式解析。",
)
mineru_deploy_start_page_id: int = Field(
0, description="[仅当 convert_engine='mineru_deploy'] 起始解析页面。"
)
mineru_deploy_end_page_id: int = Field(
99999, description="[仅当 convert_engine='mineru_deploy'] 结束解析页面。"
)
@model_validator(mode="after")
def check_engine_params(self):
if self.convert_engine == "mineru" and not self.mineru_token:
raise ValueError( raise ValueError(
"当 `convert_engine` 为 'mineru' 时,`mineru_token` 字段是必须的。" "当 `convert_engine` 为 'mineru' 时,`mineru_token` 字段是必须的。"
) )
return v if self.convert_engine == "mineru_deploy" and not self.mineru_deploy_base_url:
raise ValueError(
"当 `convert_engine` 为 'mineru_deploy' 时,`mineru_deploy_base_url` 字段是必须的。"
)
return self
class TextWorkflowParams(BaseWorkflowParams): class TextWorkflowParams(BaseWorkflowParams):
@@ -612,6 +643,21 @@ class TranslateServiceRequest(BaseModel):
"model_version": "vlm", "model_version": "vlm",
}, },
}, },
{
"file_name": "local_test.pdf",
"file_content": "JVBERi0xLjcKJeLjz9MKMSAwIG9iago8PC9...",
"payload": {
"workflow_type": "markdown_based",
"skip_translate": True,
"to_lang": "中文",
"convert_engine": "mineru_deploy",
"mineru_deploy_base_url": "http://127.0.0.1:8000",
"mineru_deploy_backend": "pipeline",
"mineru_deploy_formula_enable": True,
"mineru_deploy_start_page_id": 0,
"mineru_deploy_end_page_id": 5,
},
},
{ {
"file_name": "product_info.json", "file_name": "product_info.json",
"file_content": "ewogICAgImlkIjogIjEyMzQ1IiwK...", "file_content": "ewogICAgImlkIjogIjEyMzQ1IiwK...",
@@ -874,6 +920,14 @@ async def _perform_translation(
formula_ocr=payload.formula_ocr, formula_ocr=payload.formula_ocr,
model_version=payload.model_version, model_version=payload.model_version,
) )
elif payload.convert_engine == "mineru_deploy":
converter_config = ConverterMineruDeployConfig(
base_url=payload.mineru_deploy_base_url,
backend=payload.mineru_deploy_backend,
formula_enable=payload.mineru_deploy_formula_enable,
start_page_id=payload.mineru_deploy_start_page_id,
end_page_id=payload.mineru_deploy_end_page_id,
)
elif payload.convert_engine == "docling" and DOCLING_EXIST: elif payload.convert_engine == "docling" and DOCLING_EXIST:
converter_config = ConverterDoclingConfig( converter_config = ConverterDoclingConfig(
logger=task_logger, logger=task_logger,
@@ -1458,7 +1512,6 @@ async def _start_translation_task(
initial_log_msg = f"收到新的翻译请求: {original_filename}" initial_log_msg = f"收到新的翻译请求: {original_filename}"
print(f"[{task_id}] {initial_log_msg}") print(f"[{task_id}] {initial_log_msg}")
log_history.append(initial_log_msg)
await log_queue.put(initial_log_msg) await log_queue.put(initial_log_msg)
try: try:
@@ -2093,7 +2146,7 @@ async def service_content(
"/engin-list", tags=["Application"], description="返回正在进行的可用的转换引擎" "/engin-list", tags=["Application"], description="返回正在进行的可用的转换引擎"
) )
async def service_get_engin_list(): async def service_get_engin_list():
engin_list = ["mineru"] engin_list = ["mineru", "mineru_deploy"]
if DOCLING_EXIST: if DOCLING_EXIST:
engin_list.append("docling") engin_list.append("docling")
return JSONResponse(content=engin_list) return JSONResponse(content=engin_list)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long