支持mineru部署服务
This commit is contained in:
@@ -37,7 +37,6 @@ from fastapi.staticfiles import StaticFiles
|
|||||||
from pydantic import (
|
from pydantic import (
|
||||||
BaseModel,
|
BaseModel,
|
||||||
Field,
|
Field,
|
||||||
field_validator,
|
|
||||||
model_validator,
|
model_validator,
|
||||||
AliasChoices,
|
AliasChoices,
|
||||||
ConfigDict,
|
ConfigDict,
|
||||||
@@ -46,7 +45,6 @@ from pydantic import (
|
|||||||
from docutranslate import __version__
|
from docutranslate import __version__
|
||||||
from docutranslate.agents.agent import ThinkingMode
|
from docutranslate.agents.agent import ThinkingMode
|
||||||
from docutranslate.agents.glossary_agent import GlossaryAgentConfig
|
from docutranslate.agents.glossary_agent import GlossaryAgentConfig
|
||||||
from docutranslate.exporter.md.types import ConvertEngineType
|
|
||||||
|
|
||||||
# --- 核心代码 Imports ---
|
# --- 核心代码 Imports ---
|
||||||
from docutranslate.global_values.conditional_import import DOCLING_EXIST
|
from docutranslate.global_values.conditional_import import DOCLING_EXIST
|
||||||
@@ -78,6 +76,9 @@ from docutranslate.workflow.xlsx_workflow import XlsxWorkflow, XlsxWorkflowConfi
|
|||||||
if DOCLING_EXIST or TYPE_CHECKING:
|
if DOCLING_EXIST or TYPE_CHECKING:
|
||||||
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig
|
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig
|
||||||
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig
|
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig
|
||||||
|
# --- 新增的 Import ---
|
||||||
|
from docutranslate.converter.x2md.converter_mineru_deploy import ConverterMineruDeployConfig
|
||||||
|
# ----------------------
|
||||||
from docutranslate.exporter.md.md2html_exporter import MD2HTMLExporterConfig
|
from docutranslate.exporter.md.md2html_exporter import MD2HTMLExporterConfig
|
||||||
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig
|
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig
|
||||||
from docutranslate.translator.ai_translator.md_translator import MDTranslatorConfig
|
from docutranslate.translator.ai_translator.md_translator import MDTranslatorConfig
|
||||||
@@ -399,31 +400,61 @@ class MarkdownWorkflowParams(BaseWorkflowParams):
|
|||||||
workflow_type: Literal["markdown_based"] = Field(
|
workflow_type: Literal["markdown_based"] = Field(
|
||||||
..., description="指定使用基于Markdown的翻译工作流。"
|
..., description="指定使用基于Markdown的翻译工作流。"
|
||||||
)
|
)
|
||||||
convert_engine: ConvertEngineType = Field(
|
convert_engine: Literal["identity", "mineru", "docling", "mineru_deploy"] = Field(
|
||||||
"identity",
|
"identity",
|
||||||
description="选择将文件解析为markdown的引擎。如果输入文件是.md,此项可为`null`或不传。",
|
description="选择将文件解析为markdown的引擎。'mineru_deploy' 适用于本地部署的 MinerU 服务。如果输入文件是.md,此项可为`identity`或不传。",
|
||||||
examples=["identity", "mineru", "docling"],
|
examples=["identity", "mineru", "docling", "mineru_deploy"],
|
||||||
)
|
|
||||||
mineru_token: Optional[str] = Field(
|
|
||||||
None, description="当 `convert_engine` 为 'mineru' 时必填的API令牌。"
|
|
||||||
)
|
|
||||||
formula_ocr: bool = Field(
|
|
||||||
True, description="是否对公式进行OCR识别。对 `mineru` 和 `docling` 均有效。"
|
|
||||||
)
|
|
||||||
code_ocr: bool = Field(
|
|
||||||
True, description="是否对代码块进行OCR识别。仅 `docling` 引擎有效。"
|
|
||||||
)
|
|
||||||
model_version: Literal["pipeline", "vlm"] = Field(
|
|
||||||
"vlm", description="Mineru模型的版本,'vlm'是更新的版本。仅 `mineru` 引擎有效。"
|
|
||||||
)
|
)
|
||||||
|
|
||||||
@field_validator("mineru_token")
|
# --- Engine-Specific Parameters ---
|
||||||
def check_mineru_token(cls, v, values):
|
|
||||||
if values.data.get("convert_engine") == "mineru" and not v:
|
# -- For "mineru" (Cloud API) --
|
||||||
|
mineru_token: Optional[str] = Field(
|
||||||
|
None, description="[仅当 convert_engine='mineru'] 必填的API令牌。"
|
||||||
|
)
|
||||||
|
model_version: Literal["pipeline", "vlm"] = Field(
|
||||||
|
"vlm", description="[仅当 convert_engine='mineru'] Mineru Cloud模型的版本。"
|
||||||
|
)
|
||||||
|
formula_ocr: bool = Field(
|
||||||
|
True, description="[仅当 convert_engine='mineru' 或 'docling'] 是否对公式进行OCR识别。"
|
||||||
|
)
|
||||||
|
|
||||||
|
# -- For "docling" --
|
||||||
|
code_ocr: bool = Field(
|
||||||
|
True, description="[仅当 convert_engine='docling'] 是否对代码块进行OCR识别。"
|
||||||
|
)
|
||||||
|
|
||||||
|
# -- For "mineru_deploy" (Local Deployment) --
|
||||||
|
mineru_deploy_base_url: Optional[str] = Field(
|
||||||
|
"http://127.0.0.1:8000",
|
||||||
|
description="[仅当 convert_engine='mineru_deploy'] 本地部署的 MinerU 服务地址。",
|
||||||
|
)
|
||||||
|
mineru_deploy_backend: Literal["pipeline", "vlm"] = Field(
|
||||||
|
"pipeline",
|
||||||
|
description="[仅当 convert_engine='mineru_deploy'] 本地部署的 MinerU 服务使用的后端。",
|
||||||
|
)
|
||||||
|
mineru_deploy_formula_enable: bool = Field(
|
||||||
|
True,
|
||||||
|
description="[仅当 convert_engine='mineru_deploy'] 本地部署的服务是否启用公式解析。",
|
||||||
|
)
|
||||||
|
mineru_deploy_start_page_id: int = Field(
|
||||||
|
0, description="[仅当 convert_engine='mineru_deploy'] 起始解析页面。"
|
||||||
|
)
|
||||||
|
mineru_deploy_end_page_id: int = Field(
|
||||||
|
99999, description="[仅当 convert_engine='mineru_deploy'] 结束解析页面。"
|
||||||
|
)
|
||||||
|
|
||||||
|
@model_validator(mode="after")
|
||||||
|
def check_engine_params(self):
|
||||||
|
if self.convert_engine == "mineru" and not self.mineru_token:
|
||||||
raise ValueError(
|
raise ValueError(
|
||||||
"当 `convert_engine` 为 'mineru' 时,`mineru_token` 字段是必须的。"
|
"当 `convert_engine` 为 'mineru' 时,`mineru_token` 字段是必须的。"
|
||||||
)
|
)
|
||||||
return v
|
if self.convert_engine == "mineru_deploy" and not self.mineru_deploy_base_url:
|
||||||
|
raise ValueError(
|
||||||
|
"当 `convert_engine` 为 'mineru_deploy' 时,`mineru_deploy_base_url` 字段是必须的。"
|
||||||
|
)
|
||||||
|
return self
|
||||||
|
|
||||||
|
|
||||||
class TextWorkflowParams(BaseWorkflowParams):
|
class TextWorkflowParams(BaseWorkflowParams):
|
||||||
@@ -612,6 +643,21 @@ class TranslateServiceRequest(BaseModel):
|
|||||||
"model_version": "vlm",
|
"model_version": "vlm",
|
||||||
},
|
},
|
||||||
},
|
},
|
||||||
|
{
|
||||||
|
"file_name": "local_test.pdf",
|
||||||
|
"file_content": "JVBERi0xLjcKJeLjz9MKMSAwIG9iago8PC9...",
|
||||||
|
"payload": {
|
||||||
|
"workflow_type": "markdown_based",
|
||||||
|
"skip_translate": True,
|
||||||
|
"to_lang": "中文",
|
||||||
|
"convert_engine": "mineru_deploy",
|
||||||
|
"mineru_deploy_base_url": "http://127.0.0.1:8000",
|
||||||
|
"mineru_deploy_backend": "pipeline",
|
||||||
|
"mineru_deploy_formula_enable": True,
|
||||||
|
"mineru_deploy_start_page_id": 0,
|
||||||
|
"mineru_deploy_end_page_id": 5,
|
||||||
|
},
|
||||||
|
},
|
||||||
{
|
{
|
||||||
"file_name": "product_info.json",
|
"file_name": "product_info.json",
|
||||||
"file_content": "ewogICAgImlkIjogIjEyMzQ1IiwK...",
|
"file_content": "ewogICAgImlkIjogIjEyMzQ1IiwK...",
|
||||||
@@ -874,6 +920,14 @@ async def _perform_translation(
|
|||||||
formula_ocr=payload.formula_ocr,
|
formula_ocr=payload.formula_ocr,
|
||||||
model_version=payload.model_version,
|
model_version=payload.model_version,
|
||||||
)
|
)
|
||||||
|
elif payload.convert_engine == "mineru_deploy":
|
||||||
|
converter_config = ConverterMineruDeployConfig(
|
||||||
|
base_url=payload.mineru_deploy_base_url,
|
||||||
|
backend=payload.mineru_deploy_backend,
|
||||||
|
formula_enable=payload.mineru_deploy_formula_enable,
|
||||||
|
start_page_id=payload.mineru_deploy_start_page_id,
|
||||||
|
end_page_id=payload.mineru_deploy_end_page_id,
|
||||||
|
)
|
||||||
elif payload.convert_engine == "docling" and DOCLING_EXIST:
|
elif payload.convert_engine == "docling" and DOCLING_EXIST:
|
||||||
converter_config = ConverterDoclingConfig(
|
converter_config = ConverterDoclingConfig(
|
||||||
logger=task_logger,
|
logger=task_logger,
|
||||||
@@ -1458,7 +1512,6 @@ async def _start_translation_task(
|
|||||||
|
|
||||||
initial_log_msg = f"收到新的翻译请求: {original_filename}"
|
initial_log_msg = f"收到新的翻译请求: {original_filename}"
|
||||||
print(f"[{task_id}] {initial_log_msg}")
|
print(f"[{task_id}] {initial_log_msg}")
|
||||||
log_history.append(initial_log_msg)
|
|
||||||
await log_queue.put(initial_log_msg)
|
await log_queue.put(initial_log_msg)
|
||||||
|
|
||||||
try:
|
try:
|
||||||
@@ -2093,7 +2146,7 @@ async def service_content(
|
|||||||
"/engin-list", tags=["Application"], description="返回正在进行的可用的转换引擎"
|
"/engin-list", tags=["Application"], description="返回正在进行的可用的转换引擎"
|
||||||
)
|
)
|
||||||
async def service_get_engin_list():
|
async def service_get_engin_list():
|
||||||
engin_list = ["mineru"]
|
engin_list = ["mineru", "mineru_deploy"]
|
||||||
if DOCLING_EXIST:
|
if DOCLING_EXIST:
|
||||||
engin_list.append("docling")
|
engin_list.append("docling")
|
||||||
return JSONResponse(content=engin_list)
|
return JSONResponse(content=engin_list)
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user