This commit is contained in:
xunbu
2025-07-29 23:19:48 +08:00
parent 59005d7d8c
commit 46a7cf4663
11 changed files with 38 additions and 32 deletions

View File

@@ -9,7 +9,7 @@ import time
import uuid import uuid
from contextlib import asynccontextmanager, closing from contextlib import asynccontextmanager, closing
from pathlib import Path from pathlib import Path
from typing import List, Dict, Any, Optional, Literal, Union, Annotated from typing import List, Dict, Any, Optional, Literal, Union, Annotated, TYPE_CHECKING
from urllib.parse import quote from urllib.parse import quote
import httpx import httpx
@@ -20,12 +20,14 @@ from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse, Fil
from fastapi.staticfiles import StaticFiles from fastapi.staticfiles import StaticFiles
from pydantic import BaseModel, Field from pydantic import BaseModel, Field
from docutranslate.global_values.conditional_import import DOCLING_FLAG
# --- 核心代码重构后的新 Imports --- # --- 核心代码重构后的新 Imports ---
from docutranslate.manager.base_manager import BaseManager from docutranslate.manager.base_manager import BaseManager
from docutranslate.manager.md_based_manager import MarkdownBasedManager from docutranslate.manager.md_based_manager import MarkdownBasedManager
from docutranslate.manager.txt_manager import TXTManager from docutranslate.manager.txt_manager import TXTManager
from docutranslate.manager.interfaces import HTMLExportable, MDFormatsExportable, TXTExportable from docutranslate.manager.interfaces import HTMLExportable, MDFormatsExportable, TXTExportable
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig if DOCLING_FLAG or TYPE_CHECKING:
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig
from docutranslate.exporter.md2x.md2html_exporter import MD2HTMLExportConfig from docutranslate.exporter.md2x.md2html_exporter import MD2HTMLExportConfig
from docutranslate.exporter.txt2x.txt2html_exporter import TXT2HTMLExportConfig from docutranslate.exporter.txt2x.txt2html_exporter import TXT2HTMLExportConfig
@@ -451,9 +453,10 @@ def _cancel_translation_logic(task_id: str):
description=""" description="""
接收一个包含文件内容Base64编码和工作流参数的JSON请求启动一个后台翻译任务。 接收一个包含文件内容Base64编码和工作流参数的JSON请求启动一个后台翻译任务。
- **工作流选择**: 请求体中的 `payload.workflow_type` 字段决定了本次任务的类型(如 `markdown` 或 `text`)。 - **工作流选择**: 请求体中的 `payload.workflow_type` 字段决定了本次任务的类型(如 `markdown_based` 或 `txt`)。
- **动态参数**: 根据所选工作流API需要不同的参数集。请参考下面的Schema或示例。 - **动态参数**: 根据所选工作流API需要不同的参数集。请参考下面的Schema或示例。
- **异步处理**: 此端点会立即返回任务ID客户端需轮询状态接口获取进度。 - **异步处理**: 此端点会立即返回任务ID客户端需轮询状态接口获取进度。
""", """,
responses={ responses={
200: { 200: {

View File

@@ -1,9 +1,8 @@
from typing import Protocol, runtime_checkable from typing import Protocol
from docutranslate.ir.document import Document from docutranslate.ir.document import Document
@runtime_checkable
class Converter(Protocol): class Converter(Protocol):
def convert(self, document: Document) -> Document: def convert(self, document: Document) -> Document:
... ...

View File

@@ -1,5 +1,3 @@
from typing import runtime_checkable
from typing import Protocol from typing import Protocol
from docutranslate.converter.interfaces import Converter from docutranslate.converter.interfaces import Converter
from docutranslate.ir.document import Document from docutranslate.ir.document import Document
@@ -7,7 +5,6 @@ from docutranslate.ir.markdown_document import MarkdownDocument
@runtime_checkable
class X2MarkdownConverter(Converter,Protocol): class X2MarkdownConverter(Converter,Protocol):
""" """
负责将其它格式的文件转换为markdown 负责将其它格式的文件转换为markdown

View File

@@ -1,4 +1,4 @@
from typing import Protocol, runtime_checkable, TypeVar, Any, Self from typing import Protocol, TypeVar, Any, Self
from docutranslate.exporter.export_config import ExportConfig from docutranslate.exporter.export_config import ExportConfig
from docutranslate.ir.document import Document from docutranslate.ir.document import Document
@@ -6,7 +6,6 @@ from docutranslate.ir.document import Document
D_in = TypeVar('D_in', bound=Document) D_in = TypeVar('D_in', bound=Document)
@runtime_checkable
class Exporter(Protocol[D_in]): class Exporter(Protocol[D_in]):
@classmethod @classmethod
def from_config(cls, export_config: ExportConfig | None = None) -> Self: def from_config(cls, export_config: ExportConfig | None = None) -> Self:

View File

@@ -1,10 +1,8 @@
from dataclasses import dataclass from dataclasses import dataclass
from typing import runtime_checkable
from docutranslate.exporter.export_config import ExportConfig from docutranslate.exporter.export_config import ExportConfig
from docutranslate.exporter.md2x.interfaces import MDExporter from docutranslate.exporter.md2x.interfaces import MDExporter
from docutranslate.ir.markdown_document import MarkdownDocument,Document from docutranslate.ir.markdown_document import MarkdownDocument,Document
from docutranslate.utils.markdown_utils import unembed_base64_images_to_zip
@dataclass @dataclass

View File

@@ -1,7 +1,14 @@
from typing import Literal from typing import Literal, TYPE_CHECKING
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig
from docutranslate.global_values.conditional_import import DOCLING_FLAG
if DOCLING_FLAG or TYPE_CHECKING:
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig
convert_engin_type = Literal["mineru", "docling"] convert_engin_type = Literal["mineru", "docling"]
x2md_convert_config_type = ConverterDoclingConfig | ConverterMineruConfig
if DOCLING_FLAG or TYPE_CHECKING:
x2md_convert_config_type = ConverterDoclingConfig | ConverterMineruConfig
else:
x2md_convert_config_type = ConverterMineruConfig

View File

@@ -16,4 +16,4 @@ def conditional_import(packagename,alias=None):
available_packages[packagename]=False available_packages[packagename]=False
return False return False
conditional_import("docling") DOCLING_FLAG=conditional_import("docling")

View File

@@ -1,11 +1,10 @@
from pathlib import Path from pathlib import Path
from typing import Protocol, runtime_checkable, Self, TypeVar from typing import Protocol, Self, TypeVar, runtime_checkable
from docutranslate.exporter.export_config import ExportConfig from docutranslate.exporter.export_config import ExportConfig
T = TypeVar("T", bound=ExportConfig) T = TypeVar("T", bound=ExportConfig)
@runtime_checkable @runtime_checkable
class HTMLExportable(Protocol[T]): class HTMLExportable(Protocol[T]):
def export_to_html(self, export_config: T | None = None) -> str: def export_to_html(self, export_config: T | None = None) -> str:
@@ -14,7 +13,6 @@ class HTMLExportable(Protocol[T]):
def save_as_html(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self: def save_as_html(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self:
... ...
@runtime_checkable @runtime_checkable
class MDExportable(Protocol[T]): class MDExportable(Protocol[T]):
@@ -24,7 +22,6 @@ class MDExportable(Protocol[T]):
def save_as_markdown(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self: def save_as_markdown(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self:
... ...
@runtime_checkable @runtime_checkable
class MDZIPExportable(Protocol[T]): class MDZIPExportable(Protocol[T]):
@@ -34,12 +31,10 @@ class MDZIPExportable(Protocol[T]):
def save_as_markdown_zip(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self: def save_as_markdown_zip(self, name: str, output_dir: Path | str, export_config: T | None = None) -> Self:
... ...
@runtime_checkable @runtime_checkable
class MDFormatsExportable(MDZIPExportable[T], MDExportable[T], Protocol): class MDFormatsExportable(MDZIPExportable[T], MDExportable[T], Protocol):
... ...
@runtime_checkable @runtime_checkable
class TXTExportable(Protocol[T]): class TXTExportable(Protocol[T]):
def export_to_txt(self) -> str: def export_to_txt(self) -> str:

View File

@@ -1,9 +1,12 @@
import asyncio import asyncio
from pathlib import Path from pathlib import Path
from typing import Self, Literal, overload from typing import Self, Literal, overload, TYPE_CHECKING
from docutranslate.cacher import md_based_convert_cacher from docutranslate.cacher import md_based_convert_cacher
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig, ConverterDocling from docutranslate.global_values.conditional_import import DOCLING_FLAG
if DOCLING_FLAG or TYPE_CHECKING:
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig, ConverterDocling
from docutranslate.converter.x2md.converter_identity import ConverterIdentity from docutranslate.converter.x2md.converter_identity import ConverterIdentity
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig, ConverterMineru from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig, ConverterMineru
from docutranslate.converter.x2md.interfaces import X2MarkdownConverter from docutranslate.converter.x2md.interfaces import X2MarkdownConverter
@@ -19,10 +22,16 @@ from docutranslate.translater.md_translator import MDTranslateConfig, MDTranslat
class MarkdownBasedManager(BaseManager, HTMLExportable, MDFormatsExportable): class MarkdownBasedManager(BaseManager, HTMLExportable, MDFormatsExportable):
def __init__(self, *args, **kwargs): def __init__(self, *args, **kwargs):
super().__init__(*args, **kwargs) super().__init__(*args, **kwargs)
self._converter_factory: dict[str:tuple[X2MarkdownConverter, x2md_convert_config_type]] = {
"mineru": (ConverterMineru, ConverterMineruConfig), if DOCLING_FLAG or TYPE_CHECKING:
"docling": (ConverterDocling, ConverterDoclingConfig), self._converter_factory: dict[str:tuple[X2MarkdownConverter, x2md_convert_config_type]] = {
} "mineru": (ConverterMineru, ConverterMineruConfig),
"docling": (ConverterDocling, ConverterDoclingConfig)
}
else:
self._converter_factory: dict[str:tuple[X2MarkdownConverter, x2md_convert_config_type]] = {
"mineru": (ConverterMineru, ConverterMineruConfig),
}
def _get_document_md(self, convert_engin: convert_engin_type | None, def _get_document_md(self, convert_engin: convert_engin_type | None,
convert_config: x2md_convert_config_type | None): convert_config: x2md_convert_config_type | None):
@@ -57,7 +66,7 @@ class MarkdownBasedManager(BaseManager, HTMLExportable, MDFormatsExportable):
@overload @overload
def translate(self, convert_engin: Literal["docling"], def translate(self, convert_engin: Literal["docling"],
convert_config: ConverterDoclingConfig, translate_config: MDTranslateConfig) -> Self: convert_config: "ConverterDoclingConfig", translate_config: MDTranslateConfig) -> Self:
... ...
@overload @overload

View File

@@ -1,4 +1,4 @@
from typing import runtime_checkable, Protocol, TypeVar from typing import Protocol, TypeVar
from docutranslate.agents import Agent from docutranslate.agents import Agent
from docutranslate.ir.document import Document from docutranslate.ir.document import Document
@@ -6,7 +6,6 @@ from docutranslate.ir.document import Document
T=TypeVar('T',bound=Document) T=TypeVar('T',bound=Document)
V=TypeVar('V',bound=Agent) V=TypeVar('V',bound=Agent)
@runtime_checkable
class Translator(Protocol[T,V]): class Translator(Protocol[T,V]):
""" """
翻译中间文本原地替换Translator不做格式转换 翻译中间文本原地替换Translator不做格式转换

View File

@@ -9,7 +9,7 @@ a = Analysis(
hookspath=[], hookspath=[],
hooksconfig={}, hooksconfig={},
runtime_hooks=[], runtime_hooks=[],
excludes=["docling","docutranslate.converter.converter_docling"], excludes=["docling","docutranslate.converter.x2md.converter_docling"],
noarchive=False, noarchive=False,
optimize=0, optimize=0,
) )