pptx支持

This commit is contained in:
xunbu
2025-11-29 21:51:09 +08:00
parent 0ff1c87870
commit 63e6101064
4 changed files with 171 additions and 63 deletions

View File

@@ -65,6 +65,9 @@ from docutranslate.workflow.base import Workflow
from docutranslate.workflow.docx_workflow import DocxWorkflow, DocxWorkflowConfig from docutranslate.workflow.docx_workflow import DocxWorkflow, DocxWorkflowConfig
from docutranslate.workflow.epub_workflow import EpubWorkflow, EpubWorkflowConfig from docutranslate.workflow.epub_workflow import EpubWorkflow, EpubWorkflowConfig
from docutranslate.workflow.html_workflow import HtmlWorkflow, HtmlWorkflowConfig from docutranslate.workflow.html_workflow import HtmlWorkflow, HtmlWorkflowConfig
# --- 新增的 Import ---
from docutranslate.workflow.pptx_workflow import PPTXWorkflow, PPTXWorkflowConfig
# ----------------------
from docutranslate.workflow.interfaces import DocxExportable, EpubExportable from docutranslate.workflow.interfaces import DocxExportable, EpubExportable
from docutranslate.workflow.interfaces import ( from docutranslate.workflow.interfaces import (
HTMLExportable, HTMLExportable,
@@ -75,6 +78,7 @@ from docutranslate.workflow.interfaces import (
SrtExportable, SrtExportable,
CsvExportable, CsvExportable,
AssExportable, AssExportable,
PPTXExportable, # Added PPTXExportable
) )
from docutranslate.workflow.json_workflow import JsonWorkflow, JsonWorkflowConfig from docutranslate.workflow.json_workflow import JsonWorkflow, JsonWorkflowConfig
from docutranslate.workflow.md_based_workflow import ( from docutranslate.workflow.md_based_workflow import (
@@ -88,9 +92,7 @@ from docutranslate.workflow.xlsx_workflow import XlsxWorkflow, XlsxWorkflowConfi
if DOCLING_EXIST or TYPE_CHECKING: if DOCLING_EXIST or TYPE_CHECKING:
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig
# --- 新增的 Import ---
from docutranslate.converter.x2md.converter_mineru_deploy import ConverterMineruDeployConfig from docutranslate.converter.x2md.converter_mineru_deploy import ConverterMineruDeployConfig
# ----------------------
from docutranslate.exporter.md.md2html_exporter import MD2HTMLExporterConfig from docutranslate.exporter.md.md2html_exporter import MD2HTMLExporterConfig
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig
from docutranslate.translator.ai_translator.md_translator import MDTranslatorConfig from docutranslate.translator.ai_translator.md_translator import MDTranslatorConfig
@@ -108,8 +110,8 @@ from docutranslate.exporter.epub.epub2html_exporter import Epub2HTMLExporterConf
from docutranslate.translator.ai_translator.html_translator import HtmlTranslatorConfig from docutranslate.translator.ai_translator.html_translator import HtmlTranslatorConfig
from docutranslate.translator.ai_translator.ass_translator import AssTranslatorConfig from docutranslate.translator.ai_translator.ass_translator import AssTranslatorConfig
from docutranslate.exporter.ass.ass2html_exporter import Ass2HTMLExporterConfig from docutranslate.exporter.ass.ass2html_exporter import Ass2HTMLExporterConfig
from docutranslate.translator.ai_translator.pptx_translator import PPTXTranslatorConfig
# ------------------------------------ from docutranslate.exporter.pptx.pptx2html_exporter import PPTX2HTMLExporterConfig
from docutranslate.logger import global_logger from docutranslate.logger import global_logger
from docutranslate.translator import default_params from docutranslate.translator import default_params
@@ -133,6 +135,7 @@ WORKFLOW_DICT: Dict[str, Type[Workflow]] = {
"epub": EpubWorkflow, "epub": EpubWorkflow,
"html": HtmlWorkflow, "html": HtmlWorkflow,
"ass": AssWorkflow, "ass": AssWorkflow,
"pptx": PPTXWorkflow, # Added PPTXWorkflow
} }
# --- 媒体类型映射 --- # --- 媒体类型映射 ---
@@ -148,6 +151,7 @@ MEDIA_TYPES = {
"srt": "text/plain; charset=utf-8", "srt": "text/plain; charset=utf-8",
"epub": "application/epub+zip", "epub": "application/epub+zip",
"ass": "text/plain; charset=utf-8", "ass": "text/plain; charset=utf-8",
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", # Added PPTX MIME
} }
@@ -609,6 +613,25 @@ class AssWorkflowParams(BaseWorkflowParams):
# --- ASS WORKFLOW PARAMS END --- # --- ASS WORKFLOW PARAMS END ---
# --- PPTX WORKFLOW PARAMS START ---
class PPTXWorkflowParams(BaseWorkflowParams):
workflow_type: Literal["pptx"] = Field(
..., description="指定使用PPTX的翻译工作流。"
)
insert_mode: Literal["replace", "append", "prepend"] = Field(
"replace",
description="翻译文本的插入模式。'replace':替换原文,'append':附加到原文后,'prepend':附加到原文前。",
)
separator: str = Field(
"\n",
description="当 insert_mode 为 'append''prepend' 时,用于分隔原文和译文的分隔符。",
)
# target_cjk_font removed as per request
# --- PPTX WORKFLOW PARAMS END ---
# 3. 使用可辨识联合类型Discriminated Union将它们组合起来 # 3. 使用可辨识联合类型Discriminated Union将它们组合起来
TranslatePayload = Annotated[ TranslatePayload = Annotated[
Union[ Union[
@@ -621,6 +644,7 @@ TranslatePayload = Annotated[
EpubWorkflowParams, EpubWorkflowParams,
HtmlWorkflowParams, HtmlWorkflowParams,
AssWorkflowParams, AssWorkflowParams,
PPTXWorkflowParams,
], ],
Field(discriminator="workflow_type"), Field(discriminator="workflow_type"),
] ]
@@ -639,6 +663,7 @@ class TranslateServiceRequest(BaseModel):
"my_book.epub", "my_book.epub",
"index.html", "index.html",
"dialogue.ass", "dialogue.ass",
"presentation.pptx",
], ],
) )
file_content: str = Field( file_content: str = Field(
@@ -864,6 +889,26 @@ class TranslateServiceRequest(BaseModel):
"retry": default_params["retry"], "retry": default_params["retry"],
}, },
}, },
{
"file_name": "presentation.pptx",
"file_content": "UEsDBBQAAAAIA... (base64-encoded pptx)",
"payload": {
"workflow_type": "pptx",
"skip_translate": False,
"base_url": "https://api.openai.com/v1",
"api_key": "sk-your-api-key-here",
"model_id": "gpt-4o",
"to_lang": "中文",
"insert_mode": "replace",
"separator": "\n",
"chunk_size": default_params["chunk_size"],
"concurrent": default_params["concurrent"],
"temperature": default_params["temperature"],
"timeout": default_params["timeout"],
"thinking": "default",
"retry": default_params["retry"],
},
},
] ]
} }
) )
@@ -1283,6 +1328,46 @@ async def _perform_translation(
workflow = AssWorkflow(config=workflow_config) workflow = AssWorkflow(config=workflow_config)
# --- ASS WORKFLOW LOGIC END --- # --- ASS WORKFLOW LOGIC END ---
# --- PPTX WORKFLOW LOGIC START ---
elif isinstance(payload, PPTXWorkflowParams):
task_logger.info("构建 PPTXWorkflow 配置。")
translator_args = payload.model_dump(
include={
"skip_translate",
"base_url",
"api_key",
"model_id",
"to_lang",
"custom_prompt",
"temperature",
"thinking",
"chunk_size",
"concurrent",
"insert_mode",
"separator",
"glossary_dict",
"timeout",
"retry",
"system_proxy_enable",
"force_json",
},
exclude_none=True,
)
translator_args["glossary_generate_enable"] = (
payload.glossary_generate_enable
)
translator_args["glossary_agent_config"] = build_glossary_agent_config()
translator_config = PPTXTranslatorConfig(**translator_args)
html_exporter_config = PPTX2HTMLExporterConfig(cdn=True)
workflow_config = PPTXWorkflowConfig(
translator_config=translator_config,
html_exporter_config=html_exporter_config,
logger=task_logger,
)
workflow = PPTXWorkflow(config=workflow_config)
# --- PPTX WORKFLOW LOGIC END ---
else: else:
raise TypeError(f"工作流类型 '{payload.workflow_type}' 的处理逻辑未实现。") raise TypeError(f"工作流类型 '{payload.workflow_type}' 的处理逻辑未实现。")
@@ -1313,30 +1398,7 @@ async def _perform_translation(
# 定义导出函数映射 # 定义导出函数映射
export_map = {} export_map = {}
# 根据 workflow 的类型填充导出映射
if isinstance(workflow, HTMLExportable):
html_config = None
if isinstance(workflow, MarkdownBasedWorkflow):
html_config = MD2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, TXTWorkflow):
html_config = TXT2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, JsonWorkflow):
html_config = Json2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, XlsxWorkflow):
html_config = Xlsx2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, DocxWorkflow):
html_config = Docx2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, SrtWorkflow):
html_config = Srt2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, EpubWorkflow):
html_config = Epub2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, AssWorkflow):
html_config = Ass2HTMLExporterConfig(cdn=is_cdn_available)
export_map["html"] = (
lambda: workflow.export_to_html(html_config),
f"{filename_stem}_translated.html",
True,
)
if isinstance(workflow, MDFormatsExportable): if isinstance(workflow, MDFormatsExportable):
export_map["markdown"] = ( export_map["markdown"] = (
workflow.export_to_markdown, workflow.export_to_markdown,
@@ -1396,6 +1458,39 @@ async def _perform_translation(
f"{filename_stem}_translated.ass", f"{filename_stem}_translated.ass",
True, True,
) )
if isinstance(workflow, PPTXExportable):
export_map["pptx"] = (
workflow.export_to_pptx,
f"{filename_stem}_translated.pptx",
False,
)
# 根据 workflow 的类型填充导出映射
if isinstance(workflow, HTMLExportable):
html_config = None
if isinstance(workflow, MarkdownBasedWorkflow):
html_config = MD2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, TXTWorkflow):
html_config = TXT2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, JsonWorkflow):
html_config = Json2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, XlsxWorkflow):
html_config = Xlsx2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, DocxWorkflow):
html_config = Docx2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, SrtWorkflow):
html_config = Srt2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, EpubWorkflow):
html_config = Epub2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, AssWorkflow):
html_config = Ass2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, PPTXWorkflow):
html_config = PPTX2HTMLExporterConfig(cdn=is_cdn_available)
export_map["html"] = (
lambda: workflow.export_to_html(html_config),
f"{filename_stem}_translated.html",
True,
)
# 循环生成文件 # 循环生成文件
for file_type, (export_func, filename, is_string_output) in export_map.items(): for file_type, (export_func, filename, is_string_output) in export_map.items():
@@ -1616,7 +1711,7 @@ def _cancel_translation_logic(task_id: str):
description=""" description="""
接收一个包含文件内容Base64编码和工作流参数的JSON请求启动一个后台翻译任务。 接收一个包含文件内容Base64编码和工作流参数的JSON请求启动一个后台翻译任务。
- **工作流选择**: 请求体中的 `payload.workflow_type` 字段决定了本次任务的类型(如 `markdown_based`, `txt`, `json`, `xlsx`, `docx`, `srt`, `epub`, `html`, `ass`)。 - **工作流选择**: 请求体中的 `payload.workflow_type` 字段决定了本次任务的类型(如 `markdown_based`, `txt`, `json`, `xlsx`, `docx`, `srt`, `epub`, `html`, `ass`, `pptx`)。
- **动态参数**: 根据所选工作流API需要不同的参数集。请参考下面的Schema或示例。 - **动态参数**: 根据所选工作流API需要不同的参数集。请参考下面的Schema或示例。
- **异步处理**: 此端点会立即返回任务ID客户端需轮询状态接口获取进度。 - **异步处理**: 此端点会立即返回任务ID客户端需轮询状态接口获取进度。
""", """,
@@ -1960,6 +2055,27 @@ async def service_release_task(task_id: str):
}, },
}, },
# --- ASS STATUS EXAMPLE END --- # --- ASS STATUS EXAMPLE END ---
# --- PPTX STATUS EXAMPLE START ---
"completed_pptx": {
"summary": "已完成 (PPTX)",
"value": {
"task_id": "a1b2c3d6",
"is_processing": False,
"status_message": "翻译成功!用时 30.50 秒。",
"error_flag": False,
"download_ready": True,
"original_filename_stem": "presentation",
"original_filename": "presentation.pptx",
"task_start_time": 1678890300.0,
"task_end_time": 1678890330.50,
"downloads": {
"pptx": "/service/download/a1b2c3d6/pptx",
"html": "/service/download/a1b2c3d6/html",
},
"attachment": {},
},
},
# --- PPTX STATUS EXAMPLE END ---
"error": { "error": {
"summary": "失败", "summary": "失败",
"value": { "value": {
@@ -2052,6 +2168,7 @@ FileType = Literal[
"srt", "srt",
"epub", "epub",
"ass", "ass",
"pptx",
] ]
@@ -2077,6 +2194,9 @@ FileType = Literal[
"application/epub+zip": { "application/epub+zip": {
"schema": {"type": "string", "format": "binary"} "schema": {"type": "string", "format": "binary"}
}, },
"application/vnd.openxmlformats-officedocument.presentationml.presentation": {
"schema": {"type": "string", "format": "binary"}
},
}, },
}, },
404: { 404: {
@@ -2092,7 +2212,7 @@ async def service_download_file(
file_type: FileType = FastApiPath( file_type: FileType = FastApiPath(
..., ...,
description="要下载的文件类型。", description="要下载的文件类型。",
examples=["html", "json", "csv", "docx", "srt", "epub", "ass"], examples=["html", "json", "csv", "docx", "srt", "epub", "ass", "pptx"],
), ),
): ):
task_state = tasks_state.get(task_id) task_state = tasks_state.get(task_id)
@@ -2198,6 +2318,14 @@ async def service_download_attachment(
"content": "UEsDBBQAAAAIA... (base64-encoded string)", "content": "UEsDBBQAAAAIA... (base64-encoded string)",
}, },
}, },
"pptx_base64": {
"summary": "PPTX 内容 (Base64)",
"value": {
"file_type": "pptx",
"filename": "my_presentation_translated.pptx",
"content": "UEsDBBQAAAAIA... (base64-encoded string)",
},
},
} }
} }
}, },
@@ -2215,7 +2343,7 @@ async def service_content(
file_type: FileType = FastApiPath( file_type: FileType = FastApiPath(
..., ...,
description="要获取内容的文件类型。", description="要获取内容的文件类型。",
examples=["html", "json", "csv", "docx", "srt", "epub", "ass"], examples=["html", "json", "csv", "docx", "srt", "epub", "ass", "pptx"],
), ),
): ):
task_state = tasks_state.get(task_id) task_state = tasks_state.get(task_id)

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -22,9 +22,6 @@ from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTr
class PPTXTranslatorConfig(AiTranslatorConfig): class PPTXTranslatorConfig(AiTranslatorConfig):
insert_mode: Literal["replace", "append", "prepend"] = "replace" insert_mode: Literal["replace", "append", "prepend"] = "replace"
separator: str = "\n" separator: str = "\n"
# 指定翻译后的中文字体(东亚字体),防止乱码或回退到宋体
# 推荐使用 "Microsoft YaHei" (微软雅黑) 或 "DengXian" (等线)
target_cjk_font: str = "Microsoft YaHei"
# ---------------- 主类 ---------------- # ---------------- 主类 ----------------
@@ -35,7 +32,7 @@ class PPTXTranslator(AiTranslator):
改进特性: 改进特性:
1. 深度遍历:支持母版、版式、备注页、以及隐藏在 AlternateContent (兼容性块) 中的文本。 1. 深度遍历:支持母版、版式、备注页、以及隐藏在 AlternateContent (兼容性块) 中的文本。
2. 公式保护:智能检测文本间的公式,防止翻译后文字错位。 2. 公式保护:智能检测文本间的公式,防止翻译后文字错位。
3. 字体美化:中西文字体分离,中文使用微软雅黑,英文保持原样 3. 样式保留:翻译后完全保留原有的中英文字体设置,不做强制覆盖
4. 布局自适应:防止翻译后文本溢出。 4. 布局自适应:防止翻译后文本溢出。
""" """
@@ -54,7 +51,6 @@ class PPTXTranslator(AiTranslator):
self.translate_agent = SegmentsTranslateAgent(agent_config) self.translate_agent = SegmentsTranslateAgent(agent_config)
self.insert_mode = config.insert_mode self.insert_mode = config.insert_mode
self.separator = config.separator self.separator = config.separator
self.target_cjk_font = config.target_cjk_font
# ---------------- 辅助函数:样式与字体 ---------------- # ---------------- 辅助函数:样式与字体 ----------------
@@ -111,18 +107,6 @@ class PPTXTranslator(AiTranslator):
return True return True
def _set_east_asian_font(self, run, font_name: str):
"""设置 Run 的东亚字体 (解决中文乱码/宋体问题)。"""
if not font_name:
return
try:
rPr = run.font._element.get_or_add_rPr()
# 设置 ea (East Asian) 字体,不影响 latin (西文) 字体
ea = rPr.get_or_add_ea()
ea.set(qn('a:typeface'), font_name)
except Exception:
pass
# ---------------- 核心遍历逻辑 ---------------- # ---------------- 核心遍历逻辑 ----------------
def _process_text_frame(self, text_frame: TextFrame, elements: List[Dict[str, Any]], texts: List[str]): def _process_text_frame(self, text_frame: TextFrame, elements: List[Dict[str, Any]], texts: List[str]):
@@ -283,12 +267,10 @@ class PPTXTranslator(AiTranslator):
primary_run = runs[0] primary_run = runs[0]
try: try:
# 1. 写入文本 # 1. 写入文本 (python-pptx 会自动保留原有的 rPr 属性,即保留默认字体)
primary_run.text = text_to_set primary_run.text = text_to_set
# 2. 设置东亚字体 (保留西文字体设置) # 2. (已移除字体强制设置逻辑,以保留 PPT 原样)
if self.target_cjk_font:
self._set_east_asian_font(primary_run, self.target_cjk_font)
# 3. 处理溢出 # 3. 处理溢出
text_frame = element_info.get("text_frame") text_frame = element_info.get("text_frame")