pptx支持
This commit is contained in:
@@ -65,6 +65,9 @@ from docutranslate.workflow.base import Workflow
|
||||
from docutranslate.workflow.docx_workflow import DocxWorkflow, DocxWorkflowConfig
|
||||
from docutranslate.workflow.epub_workflow import EpubWorkflow, EpubWorkflowConfig
|
||||
from docutranslate.workflow.html_workflow import HtmlWorkflow, HtmlWorkflowConfig
|
||||
# --- 新增的 Import ---
|
||||
from docutranslate.workflow.pptx_workflow import PPTXWorkflow, PPTXWorkflowConfig
|
||||
# ----------------------
|
||||
from docutranslate.workflow.interfaces import DocxExportable, EpubExportable
|
||||
from docutranslate.workflow.interfaces import (
|
||||
HTMLExportable,
|
||||
@@ -75,6 +78,7 @@ from docutranslate.workflow.interfaces import (
|
||||
SrtExportable,
|
||||
CsvExportable,
|
||||
AssExportable,
|
||||
PPTXExportable, # Added PPTXExportable
|
||||
)
|
||||
from docutranslate.workflow.json_workflow import JsonWorkflow, JsonWorkflowConfig
|
||||
from docutranslate.workflow.md_based_workflow import (
|
||||
@@ -88,9 +92,7 @@ from docutranslate.workflow.xlsx_workflow import XlsxWorkflow, XlsxWorkflowConfi
|
||||
if DOCLING_EXIST or TYPE_CHECKING:
|
||||
from docutranslate.converter.x2md.converter_docling import ConverterDoclingConfig
|
||||
from docutranslate.converter.x2md.converter_mineru import ConverterMineruConfig
|
||||
# --- 新增的 Import ---
|
||||
from docutranslate.converter.x2md.converter_mineru_deploy import ConverterMineruDeployConfig
|
||||
# ----------------------
|
||||
from docutranslate.exporter.md.md2html_exporter import MD2HTMLExporterConfig
|
||||
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig
|
||||
from docutranslate.translator.ai_translator.md_translator import MDTranslatorConfig
|
||||
@@ -108,8 +110,8 @@ from docutranslate.exporter.epub.epub2html_exporter import Epub2HTMLExporterConf
|
||||
from docutranslate.translator.ai_translator.html_translator import HtmlTranslatorConfig
|
||||
from docutranslate.translator.ai_translator.ass_translator import AssTranslatorConfig
|
||||
from docutranslate.exporter.ass.ass2html_exporter import Ass2HTMLExporterConfig
|
||||
|
||||
# ------------------------------------
|
||||
from docutranslate.translator.ai_translator.pptx_translator import PPTXTranslatorConfig
|
||||
from docutranslate.exporter.pptx.pptx2html_exporter import PPTX2HTMLExporterConfig
|
||||
|
||||
from docutranslate.logger import global_logger
|
||||
from docutranslate.translator import default_params
|
||||
@@ -133,6 +135,7 @@ WORKFLOW_DICT: Dict[str, Type[Workflow]] = {
|
||||
"epub": EpubWorkflow,
|
||||
"html": HtmlWorkflow,
|
||||
"ass": AssWorkflow,
|
||||
"pptx": PPTXWorkflow, # Added PPTXWorkflow
|
||||
}
|
||||
|
||||
# --- 媒体类型映射 ---
|
||||
@@ -148,6 +151,7 @@ MEDIA_TYPES = {
|
||||
"srt": "text/plain; charset=utf-8",
|
||||
"epub": "application/epub+zip",
|
||||
"ass": "text/plain; charset=utf-8",
|
||||
"pptx": "application/vnd.openxmlformats-officedocument.presentationml.presentation", # Added PPTX MIME
|
||||
}
|
||||
|
||||
|
||||
@@ -609,6 +613,25 @@ class AssWorkflowParams(BaseWorkflowParams):
|
||||
# --- ASS WORKFLOW PARAMS END ---
|
||||
|
||||
|
||||
# --- PPTX WORKFLOW PARAMS START ---
|
||||
class PPTXWorkflowParams(BaseWorkflowParams):
|
||||
workflow_type: Literal["pptx"] = Field(
|
||||
..., description="指定使用PPTX的翻译工作流。"
|
||||
)
|
||||
insert_mode: Literal["replace", "append", "prepend"] = Field(
|
||||
"replace",
|
||||
description="翻译文本的插入模式。'replace':替换原文,'append':附加到原文后,'prepend':附加到原文前。",
|
||||
)
|
||||
separator: str = Field(
|
||||
"\n",
|
||||
description="当 insert_mode 为 'append' 或 'prepend' 时,用于分隔原文和译文的分隔符。",
|
||||
)
|
||||
# target_cjk_font removed as per request
|
||||
|
||||
|
||||
# --- PPTX WORKFLOW PARAMS END ---
|
||||
|
||||
|
||||
# 3. 使用可辨识联合类型(Discriminated Union)将它们组合起来
|
||||
TranslatePayload = Annotated[
|
||||
Union[
|
||||
@@ -621,6 +644,7 @@ TranslatePayload = Annotated[
|
||||
EpubWorkflowParams,
|
||||
HtmlWorkflowParams,
|
||||
AssWorkflowParams,
|
||||
PPTXWorkflowParams,
|
||||
],
|
||||
Field(discriminator="workflow_type"),
|
||||
]
|
||||
@@ -639,6 +663,7 @@ class TranslateServiceRequest(BaseModel):
|
||||
"my_book.epub",
|
||||
"index.html",
|
||||
"dialogue.ass",
|
||||
"presentation.pptx",
|
||||
],
|
||||
)
|
||||
file_content: str = Field(
|
||||
@@ -864,6 +889,26 @@ class TranslateServiceRequest(BaseModel):
|
||||
"retry": default_params["retry"],
|
||||
},
|
||||
},
|
||||
{
|
||||
"file_name": "presentation.pptx",
|
||||
"file_content": "UEsDBBQAAAAIA... (base64-encoded pptx)",
|
||||
"payload": {
|
||||
"workflow_type": "pptx",
|
||||
"skip_translate": False,
|
||||
"base_url": "https://api.openai.com/v1",
|
||||
"api_key": "sk-your-api-key-here",
|
||||
"model_id": "gpt-4o",
|
||||
"to_lang": "中文",
|
||||
"insert_mode": "replace",
|
||||
"separator": "\n",
|
||||
"chunk_size": default_params["chunk_size"],
|
||||
"concurrent": default_params["concurrent"],
|
||||
"temperature": default_params["temperature"],
|
||||
"timeout": default_params["timeout"],
|
||||
"thinking": "default",
|
||||
"retry": default_params["retry"],
|
||||
},
|
||||
},
|
||||
]
|
||||
}
|
||||
)
|
||||
@@ -1283,6 +1328,46 @@ async def _perform_translation(
|
||||
workflow = AssWorkflow(config=workflow_config)
|
||||
# --- ASS WORKFLOW LOGIC END ---
|
||||
|
||||
# --- PPTX WORKFLOW LOGIC START ---
|
||||
elif isinstance(payload, PPTXWorkflowParams):
|
||||
task_logger.info("构建 PPTXWorkflow 配置。")
|
||||
translator_args = payload.model_dump(
|
||||
include={
|
||||
"skip_translate",
|
||||
"base_url",
|
||||
"api_key",
|
||||
"model_id",
|
||||
"to_lang",
|
||||
"custom_prompt",
|
||||
"temperature",
|
||||
"thinking",
|
||||
"chunk_size",
|
||||
"concurrent",
|
||||
"insert_mode",
|
||||
"separator",
|
||||
"glossary_dict",
|
||||
"timeout",
|
||||
"retry",
|
||||
"system_proxy_enable",
|
||||
"force_json",
|
||||
},
|
||||
exclude_none=True,
|
||||
)
|
||||
translator_args["glossary_generate_enable"] = (
|
||||
payload.glossary_generate_enable
|
||||
)
|
||||
translator_args["glossary_agent_config"] = build_glossary_agent_config()
|
||||
translator_config = PPTXTranslatorConfig(**translator_args)
|
||||
|
||||
html_exporter_config = PPTX2HTMLExporterConfig(cdn=True)
|
||||
workflow_config = PPTXWorkflowConfig(
|
||||
translator_config=translator_config,
|
||||
html_exporter_config=html_exporter_config,
|
||||
logger=task_logger,
|
||||
)
|
||||
workflow = PPTXWorkflow(config=workflow_config)
|
||||
# --- PPTX WORKFLOW LOGIC END ---
|
||||
|
||||
else:
|
||||
raise TypeError(f"工作流类型 '{payload.workflow_type}' 的处理逻辑未实现。")
|
||||
|
||||
@@ -1313,30 +1398,7 @@ async def _perform_translation(
|
||||
# 定义导出函数映射
|
||||
export_map = {}
|
||||
|
||||
# 根据 workflow 的类型填充导出映射
|
||||
if isinstance(workflow, HTMLExportable):
|
||||
html_config = None
|
||||
if isinstance(workflow, MarkdownBasedWorkflow):
|
||||
html_config = MD2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, TXTWorkflow):
|
||||
html_config = TXT2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, JsonWorkflow):
|
||||
html_config = Json2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, XlsxWorkflow):
|
||||
html_config = Xlsx2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, DocxWorkflow):
|
||||
html_config = Docx2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, SrtWorkflow):
|
||||
html_config = Srt2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, EpubWorkflow):
|
||||
html_config = Epub2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, AssWorkflow):
|
||||
html_config = Ass2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
export_map["html"] = (
|
||||
lambda: workflow.export_to_html(html_config),
|
||||
f"{filename_stem}_translated.html",
|
||||
True,
|
||||
)
|
||||
|
||||
if isinstance(workflow, MDFormatsExportable):
|
||||
export_map["markdown"] = (
|
||||
workflow.export_to_markdown,
|
||||
@@ -1396,6 +1458,39 @@ async def _perform_translation(
|
||||
f"{filename_stem}_translated.ass",
|
||||
True,
|
||||
)
|
||||
if isinstance(workflow, PPTXExportable):
|
||||
export_map["pptx"] = (
|
||||
workflow.export_to_pptx,
|
||||
f"{filename_stem}_translated.pptx",
|
||||
False,
|
||||
)
|
||||
|
||||
# 根据 workflow 的类型填充导出映射
|
||||
if isinstance(workflow, HTMLExportable):
|
||||
html_config = None
|
||||
if isinstance(workflow, MarkdownBasedWorkflow):
|
||||
html_config = MD2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, TXTWorkflow):
|
||||
html_config = TXT2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, JsonWorkflow):
|
||||
html_config = Json2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, XlsxWorkflow):
|
||||
html_config = Xlsx2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, DocxWorkflow):
|
||||
html_config = Docx2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, SrtWorkflow):
|
||||
html_config = Srt2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, EpubWorkflow):
|
||||
html_config = Epub2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, AssWorkflow):
|
||||
html_config = Ass2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
elif isinstance(workflow, PPTXWorkflow):
|
||||
html_config = PPTX2HTMLExporterConfig(cdn=is_cdn_available)
|
||||
export_map["html"] = (
|
||||
lambda: workflow.export_to_html(html_config),
|
||||
f"{filename_stem}_translated.html",
|
||||
True,
|
||||
)
|
||||
|
||||
# 循环生成文件
|
||||
for file_type, (export_func, filename, is_string_output) in export_map.items():
|
||||
@@ -1616,7 +1711,7 @@ def _cancel_translation_logic(task_id: str):
|
||||
description="""
|
||||
接收一个包含文件内容(Base64编码)和工作流参数的JSON请求,启动一个后台翻译任务。
|
||||
|
||||
- **工作流选择**: 请求体中的 `payload.workflow_type` 字段决定了本次任务的类型(如 `markdown_based`, `txt`, `json`, `xlsx`, `docx`, `srt`, `epub`, `html`, `ass`)。
|
||||
- **工作流选择**: 请求体中的 `payload.workflow_type` 字段决定了本次任务的类型(如 `markdown_based`, `txt`, `json`, `xlsx`, `docx`, `srt`, `epub`, `html`, `ass`, `pptx`)。
|
||||
- **动态参数**: 根据所选工作流,API需要不同的参数集。请参考下面的Schema或示例。
|
||||
- **异步处理**: 此端点会立即返回任务ID,客户端需轮询状态接口获取进度。
|
||||
""",
|
||||
@@ -1960,6 +2055,27 @@ async def service_release_task(task_id: str):
|
||||
},
|
||||
},
|
||||
# --- ASS STATUS EXAMPLE END ---
|
||||
# --- PPTX STATUS EXAMPLE START ---
|
||||
"completed_pptx": {
|
||||
"summary": "已完成 (PPTX)",
|
||||
"value": {
|
||||
"task_id": "a1b2c3d6",
|
||||
"is_processing": False,
|
||||
"status_message": "翻译成功!用时 30.50 秒。",
|
||||
"error_flag": False,
|
||||
"download_ready": True,
|
||||
"original_filename_stem": "presentation",
|
||||
"original_filename": "presentation.pptx",
|
||||
"task_start_time": 1678890300.0,
|
||||
"task_end_time": 1678890330.50,
|
||||
"downloads": {
|
||||
"pptx": "/service/download/a1b2c3d6/pptx",
|
||||
"html": "/service/download/a1b2c3d6/html",
|
||||
},
|
||||
"attachment": {},
|
||||
},
|
||||
},
|
||||
# --- PPTX STATUS EXAMPLE END ---
|
||||
"error": {
|
||||
"summary": "失败",
|
||||
"value": {
|
||||
@@ -2052,6 +2168,7 @@ FileType = Literal[
|
||||
"srt",
|
||||
"epub",
|
||||
"ass",
|
||||
"pptx",
|
||||
]
|
||||
|
||||
|
||||
@@ -2077,6 +2194,9 @@ FileType = Literal[
|
||||
"application/epub+zip": {
|
||||
"schema": {"type": "string", "format": "binary"}
|
||||
},
|
||||
"application/vnd.openxmlformats-officedocument.presentationml.presentation": {
|
||||
"schema": {"type": "string", "format": "binary"}
|
||||
},
|
||||
},
|
||||
},
|
||||
404: {
|
||||
@@ -2092,7 +2212,7 @@ async def service_download_file(
|
||||
file_type: FileType = FastApiPath(
|
||||
...,
|
||||
description="要下载的文件类型。",
|
||||
examples=["html", "json", "csv", "docx", "srt", "epub", "ass"],
|
||||
examples=["html", "json", "csv", "docx", "srt", "epub", "ass", "pptx"],
|
||||
),
|
||||
):
|
||||
task_state = tasks_state.get(task_id)
|
||||
@@ -2198,6 +2318,14 @@ async def service_download_attachment(
|
||||
"content": "UEsDBBQAAAAIA... (base64-encoded string)",
|
||||
},
|
||||
},
|
||||
"pptx_base64": {
|
||||
"summary": "PPTX 内容 (Base64)",
|
||||
"value": {
|
||||
"file_type": "pptx",
|
||||
"filename": "my_presentation_translated.pptx",
|
||||
"content": "UEsDBBQAAAAIA... (base64-encoded string)",
|
||||
},
|
||||
},
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -2215,7 +2343,7 @@ async def service_content(
|
||||
file_type: FileType = FastApiPath(
|
||||
...,
|
||||
description="要获取内容的文件类型。",
|
||||
examples=["html", "json", "csv", "docx", "srt", "epub", "ass"],
|
||||
examples=["html", "json", "csv", "docx", "srt", "epub", "ass", "pptx"],
|
||||
),
|
||||
):
|
||||
task_state = tasks_state.get(task_id)
|
||||
|
||||
File diff suppressed because one or more lines are too long
File diff suppressed because one or more lines are too long
@@ -22,9 +22,6 @@ from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTr
|
||||
class PPTXTranslatorConfig(AiTranslatorConfig):
|
||||
insert_mode: Literal["replace", "append", "prepend"] = "replace"
|
||||
separator: str = "\n"
|
||||
# 指定翻译后的中文字体(东亚字体),防止乱码或回退到宋体
|
||||
# 推荐使用 "Microsoft YaHei" (微软雅黑) 或 "DengXian" (等线)
|
||||
target_cjk_font: str = "Microsoft YaHei"
|
||||
|
||||
|
||||
# ---------------- 主类 ----------------
|
||||
@@ -35,7 +32,7 @@ class PPTXTranslator(AiTranslator):
|
||||
改进特性:
|
||||
1. 深度遍历:支持母版、版式、备注页、以及隐藏在 AlternateContent (兼容性块) 中的文本。
|
||||
2. 公式保护:智能检测文本间的公式,防止翻译后文字错位。
|
||||
3. 字体美化:中西文字体分离,中文使用微软雅黑,英文保持原样。
|
||||
3. 样式保留:翻译后完全保留原有的中英文字体设置,不做强制覆盖。
|
||||
4. 布局自适应:防止翻译后文本溢出。
|
||||
"""
|
||||
|
||||
@@ -54,7 +51,6 @@ class PPTXTranslator(AiTranslator):
|
||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||
self.insert_mode = config.insert_mode
|
||||
self.separator = config.separator
|
||||
self.target_cjk_font = config.target_cjk_font
|
||||
|
||||
# ---------------- 辅助函数:样式与字体 ----------------
|
||||
|
||||
@@ -111,18 +107,6 @@ class PPTXTranslator(AiTranslator):
|
||||
|
||||
return True
|
||||
|
||||
def _set_east_asian_font(self, run, font_name: str):
|
||||
"""设置 Run 的东亚字体 (解决中文乱码/宋体问题)。"""
|
||||
if not font_name:
|
||||
return
|
||||
try:
|
||||
rPr = run.font._element.get_or_add_rPr()
|
||||
# 设置 ea (East Asian) 字体,不影响 latin (西文) 字体
|
||||
ea = rPr.get_or_add_ea()
|
||||
ea.set(qn('a:typeface'), font_name)
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
# ---------------- 核心遍历逻辑 ----------------
|
||||
|
||||
def _process_text_frame(self, text_frame: TextFrame, elements: List[Dict[str, Any]], texts: List[str]):
|
||||
@@ -283,12 +267,10 @@ class PPTXTranslator(AiTranslator):
|
||||
primary_run = runs[0]
|
||||
|
||||
try:
|
||||
# 1. 写入文本
|
||||
# 1. 写入文本 (python-pptx 会自动保留原有的 rPr 属性,即保留默认字体)
|
||||
primary_run.text = text_to_set
|
||||
|
||||
# 2. 设置东亚字体 (保留西文字体设置)
|
||||
if self.target_cjk_font:
|
||||
self._set_east_asian_font(primary_run, self.target_cjk_font)
|
||||
# 2. (已移除字体强制设置逻辑,以保留 PPT 原样)
|
||||
|
||||
# 3. 处理溢出
|
||||
text_frame = element_info.get("text_frame")
|
||||
|
||||
Reference in New Issue
Block a user