Initial commit: 包装审核 POC、Docker 与前后端

Made-with: Cursor
This commit is contained in:
2026-04-15 17:18:49 +08:00
commit bbb4dd43b3
74 changed files with 297415 additions and 0 deletions

255
backend/app/main.py Normal file
View File

@@ -0,0 +1,255 @@
"""FastAPI application entry point."""
from __future__ import annotations
import asyncio
import json
import logging
import queue as thread_queue
import shutil
import threading
import uuid
from datetime import datetime
from pathlib import Path
from typing import Optional
from fastapi import FastAPI, File, HTTPException, UploadFile
from fastapi.middleware.cors import CORSMiddleware
from fastapi.responses import FileResponse, StreamingResponse
from fastapi.staticfiles import StaticFiles
from backend.app import pipeline
# --------------------------------------------------------------------------- #
# Logging + SSE broadcast #
# --------------------------------------------------------------------------- #
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
)
logger = logging.getLogger(__name__)
_log_buffer: list[dict] = [] # 最近 200 条,供新连接回放
_log_queues: list[thread_queue.Queue] = []
_log_lock = threading.Lock()
class _BroadcastHandler(logging.Handler):
"""把日志记录广播给所有 SSE 客户端。"""
def emit(self, record: logging.LogRecord) -> None:
entry = {
"time": datetime.fromtimestamp(record.created).strftime("%H:%M:%S"),
"level": record.levelname,
"name": record.name.replace("backend.app.", ""),
"msg": record.getMessage(),
}
with _log_lock:
_log_buffer.append(entry)
if len(_log_buffer) > 200:
_log_buffer.pop(0)
for q in _log_queues:
try:
q.put_nowait(entry)
except thread_queue.Full:
pass
# 挂到根 logger覆盖所有模块日志
_broadcast_handler = _BroadcastHandler()
logging.getLogger().addHandler(_broadcast_handler)
# --------------------------------------------------------------------------- #
# Paths & constants #
# --------------------------------------------------------------------------- #
_ROOT = Path(__file__).resolve().parents[2]
UPLOADS_DIR = _ROOT / "data" / "uploads"
OUTPUTS_DIR = _ROOT / "data" / "outputs"
_DEFAULT_AI_NAME = "【2026-04-09】端午 - 背标 - 天问.ai"
_DEFAULT_WORD_NAME = "天问礼品粽【260331】.docx"
_DEFAULT_AI = _ROOT / _DEFAULT_AI_NAME
_DEFAULT_WORD = _ROOT / _DEFAULT_WORD_NAME
ALLOWED_AI_EXT = {".ai", ".pdf"}
ALLOWED_WORD_EXT = {".docx"}
# --------------------------------------------------------------------------- #
# App #
# --------------------------------------------------------------------------- #
app = FastAPI(
title="诸老大包装审核 API",
description="Upload an Illustrator file and a Word document to validate packaging copy.",
version="2.0.0",
)
app.add_middleware(
CORSMiddleware,
allow_origins=["*"],
allow_methods=["*"],
allow_headers=["*"],
)
# --------------------------------------------------------------------------- #
# Helpers #
# --------------------------------------------------------------------------- #
def _save_upload(upload: UploadFile, dest: Path) -> None:
dest.parent.mkdir(parents=True, exist_ok=True)
with dest.open("wb") as fh:
fh.write(upload.file.read())
def _copy_default(src: Optional[Path], dest: Path, label: str) -> None:
if src is None or not src.exists():
raise HTTPException(
status_code=400,
detail=f"未上传{label}且找不到默认样例文件,请上传文件后重试。",
)
dest.parent.mkdir(parents=True, exist_ok=True)
shutil.copy2(src, dest)
# --------------------------------------------------------------------------- #
# Endpoints #
# --------------------------------------------------------------------------- #
@app.get("/api/logs/stream")
async def log_stream() -> StreamingResponse:
"""SSE 端点:实时推送后端日志给前端侧边栏。"""
q: thread_queue.Queue = thread_queue.Queue(maxsize=500)
with _log_lock:
_log_queues.append(q)
recent = list(_log_buffer)
async def generate():
try:
# 先把缓冲区里的历史日志推过去
for entry in recent:
yield f"data: {json.dumps(entry, ensure_ascii=False)}\n\n"
# 再持续推新日志
while True:
batch: list[dict] = []
try:
while True:
batch.append(q.get_nowait())
except thread_queue.Empty:
pass
for entry in batch:
yield f"data: {json.dumps(entry, ensure_ascii=False)}\n\n"
if not batch:
yield ": keepalive\n\n"
await asyncio.sleep(0.25)
finally:
with _log_lock:
if q in _log_queues:
_log_queues.remove(q)
return StreamingResponse(
generate(),
media_type="text/event-stream",
headers={
"Cache-Control": "no-cache",
"X-Accel-Buffering": "no",
"Connection": "keep-alive",
},
)
@app.post("/api/process")
async def process_endpoint(
ai_file: Optional[UploadFile] = File(None),
word_file: Optional[UploadFile] = File(None),
) -> dict:
"""运行完整 pipelineAI → PDF → MinerU → Word 校验。"""
job_id = uuid.uuid4().hex
upload_dir = UPLOADS_DIR / job_id
output_dir = OUTPUTS_DIR / job_id
logger.info("POST /api/process job_id=%s", job_id)
# ── Resolve AI file ──────────────────────────────────────────────────── #
if ai_file is not None:
original_name = Path(ai_file.filename or "source.ai").name
suffix = Path(original_name).suffix.lower()
if suffix not in ALLOWED_AI_EXT:
raise HTTPException(
status_code=400,
detail=f"不支持的 AI 文件格式 '{suffix}',请上传 .ai 或 PDF。",
)
ai_path = upload_dir / original_name
_save_upload(ai_file, ai_path)
else:
ai_path = upload_dir / (_DEFAULT_AI.name if _DEFAULT_AI else "source.ai")
_copy_default(_DEFAULT_AI, ai_path, "AI 设计文件")
# ── Resolve Word file ────────────────────────────────────────────────── #
if word_file is not None:
suffix = Path(word_file.filename or "").suffix.lower()
if suffix not in ALLOWED_WORD_EXT:
raise HTTPException(
status_code=400,
detail=f"不支持的 Word 文件格式 '{suffix}',请上传 .docx。",
)
word_path = upload_dir / f"reference{suffix}"
_save_upload(word_file, word_path)
else:
word_path = upload_dir / (_DEFAULT_WORD.name if _DEFAULT_WORD else "reference.docx")
_copy_default(_DEFAULT_WORD, word_path, "Word 校对稿")
# ── Run pipeline in thread pool不阻塞事件循环SSE 可正常推日志) ─── #
try:
loop = asyncio.get_event_loop()
result = await loop.run_in_executor(
None,
pipeline.process_document,
ai_path,
word_path,
output_dir,
job_id,
)
except FileNotFoundError as exc:
logger.exception("Pipeline error (not found): job_id=%s", job_id)
raise HTTPException(status_code=404, detail=str(exc)) from exc
except RuntimeError as exc:
logger.exception("Pipeline error (runtime): job_id=%s", job_id)
raise HTTPException(status_code=422, detail=str(exc)) from exc
except Exception as exc:
logger.exception("Pipeline error (unexpected): job_id=%s", job_id)
raise HTTPException(status_code=500, detail=f"处理失败:{exc}") from exc
return result
@app.get("/api/files/{job_id}/{file_path:path}")
async def serve_file(job_id: str, file_path: str) -> FileResponse:
"""提供 job 产物文件(预览 PDF、JSON 等)。"""
target = OUTPUTS_DIR / job_id / file_path
if not target.exists() or not target.is_file():
raise HTTPException(status_code=404, detail="文件不存在")
suffix = target.suffix.lower()
media_type = {
".pdf": "application/pdf",
".json": "application/json",
".md": "text/markdown",
}.get(suffix, "application/octet-stream")
return FileResponse(target, media_type=media_type)
@app.get("/api/health")
async def health() -> dict:
return {"status": "ok"}
# 生产镜像Vite 构建产物与 API 同源,无需配置 VITE_API_BASE_URL
_dist = _ROOT / "frontend" / "dist"
if _dist.is_dir():
app.mount("/", StaticFiles(directory=str(_dist), html=True), name="frontend")