支持mineruVLM模型

This commit is contained in:
xunbu
2025-08-20 13:36:01 +08:00
parent d3d74564f1
commit 0cf0aa15da
4 changed files with 42 additions and 10 deletions

View File

@@ -205,7 +205,7 @@ class BaseWorkflowParams(BaseModel):
concurrent: int = Field(default=default_params["concurrent"], description="并发请求数。") concurrent: int = Field(default=default_params["concurrent"], description="并发请求数。")
temperature: float = Field(default=default_params["temperature"], description="LLM温度参数。") temperature: float = Field(default=default_params["temperature"], description="LLM温度参数。")
thinking: ThinkingMode = Field(default=default_params["thinking"], description="是否启用深度思考", thinking: ThinkingMode = Field(default=default_params["thinking"], description="是否启用深度思考",
examples=["default", "enable", "disable"]), examples=["default", "enable", "disable"])
custom_prompt: Optional[str] = Field(None, description="用户自定义的翻译Prompt。", alias="custom_prompt") custom_prompt: Optional[str] = Field(None, description="用户自定义的翻译Prompt。", alias="custom_prompt")
@@ -220,6 +220,7 @@ class MarkdownWorkflowParams(BaseWorkflowParams):
mineru_token: Optional[str] = Field(None, description="当 `convert_engine` 为 'mineru' 时必填的API令牌。") mineru_token: Optional[str] = Field(None, description="当 `convert_engine` 为 'mineru' 时必填的API令牌。")
formula_ocr: bool = Field(True, description="是否对公式进行OCR识别。对 `mineru` 和 `docling` 均有效。") formula_ocr: bool = Field(True, description="是否对公式进行OCR识别。对 `mineru` 和 `docling` 均有效。")
code_ocr: bool = Field(True, description="是否对代码块进行OCR识别。仅 `docling` 引擎有效。") code_ocr: bool = Field(True, description="是否对代码块进行OCR识别。仅 `docling` 引擎有效。")
model_version: Literal["pipline", "vlm"] = Field("vlm", description="Mineru模型的版本'vlm'是更新的版本。仅 `mineru` 引擎有效。")
@field_validator('mineru_token') @field_validator('mineru_token')
def check_mineru_token(cls, v, values): def check_mineru_token(cls, v, values):
@@ -479,7 +480,8 @@ async def _perform_translation(
converter_config = None converter_config = None
if payload.convert_engine == 'mineru': if payload.convert_engine == 'mineru':
converter_config = ConverterMineruConfig(logger=task_logger, mineru_token=payload.mineru_token, converter_config = ConverterMineruConfig(logger=task_logger, mineru_token=payload.mineru_token,
formula_ocr=payload.formula_ocr) formula_ocr=payload.formula_ocr,
model_version=payload.model_version)
elif payload.convert_engine == 'docling' and DOCLING_EXIST: elif payload.convert_engine == 'docling' and DOCLING_EXIST:
converter_config = ConverterDoclingConfig(logger=task_logger, code_ocr=payload.code_ocr, converter_config = ConverterDoclingConfig(logger=task_logger, code_ocr=payload.code_ocr,
formula_ocr=payload.formula_ocr) formula_ocr=payload.formula_ocr)
@@ -1214,6 +1216,7 @@ async def temp_translate(
temperature: float = Body(default_params["temperature"]), temperature: float = Body(default_params["temperature"]),
thinking: ThinkingMode = Body(default_params["thinking"]), thinking: ThinkingMode = Body(default_params["thinking"]),
chunk_size: int = Body(default_params["chunk_size"]), custom_prompt: Optional[str] = Body(None), chunk_size: int = Body(default_params["chunk_size"]), custom_prompt: Optional[str] = Body(None),
model_version: Literal["pipline", "vlm"] = Body("vlm"),
): ):
file_name = Path(file_name) file_name = Path(file_name)
try: try:
@@ -1222,7 +1225,7 @@ async def temp_translate(
decoded_content = file_content.encode('utf-8') decoded_content = file_content.encode('utf-8')
try: try:
workflow_config = MarkdownBasedWorkflowConfig( workflow_config = MarkdownBasedWorkflowConfig(
convert_engine="mineru", converter_config=ConverterMineruConfig(mineru_token=mineru_token), convert_engine="mineru", converter_config=ConverterMineruConfig(mineru_token=mineru_token, model_version=model_version),
translator_config=MDTranslatorConfig(base_url=base_url, api_key=api_key, model_id=model_id, translator_config=MDTranslatorConfig(base_url=base_url, api_key=api_key, model_id=model_id,
to_lang=to_lang, custom_prompt=custom_prompt, temperature=temperature, to_lang=to_lang, custom_prompt=custom_prompt, temperature=temperature,
thinking=thinking, chunk_size=chunk_size, concurrent=concurrent), thinking=thinking, chunk_size=chunk_size, concurrent=concurrent),

View File

@@ -2,7 +2,7 @@ import asyncio
import time import time
import zipfile import zipfile
from dataclasses import dataclass from dataclasses import dataclass
from typing import Hashable from typing import Hashable, Literal
import httpx import httpx
@@ -18,9 +18,10 @@ URL = 'https://mineru.net/api/v4/file-urls/batch'
class ConverterMineruConfig(X2MarkdownConverterConfig): class ConverterMineruConfig(X2MarkdownConverterConfig):
mineru_token: str mineru_token: str
formula_ocr: bool = True formula_ocr: bool = True
model_version: Literal["pipline", "vlm"] = "vlm"
def gethash(self) -> Hashable: def gethash(self) -> Hashable:
return self.formula_ocr return (self.formula_ocr,self.model_version)
timeout = httpx.Timeout( timeout = httpx.Timeout(
@@ -44,6 +45,7 @@ class ConverterMineru(X2MarkdownConverter):
super().__init__(config=config) super().__init__(config=config)
self.mineru_token = config.mineru_token.strip() self.mineru_token = config.mineru_token.strip()
self.formula = config.formula_ocr self.formula = config.formula_ocr
self.model_version=config.model_version
def _get_header(self): def _get_header(self):
return { return {
@@ -56,6 +58,7 @@ class ConverterMineru(X2MarkdownConverter):
"enable_formula": self.formula, "enable_formula": self.formula,
"language": "auto", "language": "auto",
"enable_table": True, "enable_table": True,
"model_version":self.model_version,
"files": [ "files": [
{"name": f"{document.name}", "is_ocr": True} {"name": f"{document.name}", "is_ocr": True}
] ]

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long