Initial commit: 包装审核 POC、Docker 与前后端

Made-with: Cursor
2026-04-15 17:18:49 +08:00
commit bbb4dd43b3
74 changed files with 297415 additions and 0 deletions
--- a/backend/app/main.py
+++ b/backend/app/main.py
@@ -0,0 +1,255 @@
+"""FastAPI application entry point."""
+from __future__ import annotations
+
+import asyncio
+import json
+import logging
+import queue as thread_queue
+import shutil
+import threading
+import uuid
+from datetime import datetime
+from pathlib import Path
+from typing import Optional
+
+from fastapi import FastAPI, File, HTTPException, UploadFile
+from fastapi.middleware.cors import CORSMiddleware
+from fastapi.responses import FileResponse, StreamingResponse
+from fastapi.staticfiles import StaticFiles
+
+from backend.app import pipeline
+
+# --------------------------------------------------------------------------- #
+# Logging + SSE broadcast                                                       #
+# --------------------------------------------------------------------------- #
+
+logging.basicConfig(
+    level=logging.INFO,
+    format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
+)
+logger = logging.getLogger(__name__)
+
+_log_buffer: list[dict] = []          # 最近 200 条，供新连接回放
+_log_queues: list[thread_queue.Queue] = []
+_log_lock = threading.Lock()
+
+
+class _BroadcastHandler(logging.Handler):
+    """把日志记录广播给所有 SSE 客户端。"""
+
+    def emit(self, record: logging.LogRecord) -> None:
+        entry = {
+            "time": datetime.fromtimestamp(record.created).strftime("%H:%M:%S"),
+            "level": record.levelname,
+            "name": record.name.replace("backend.app.", ""),
+            "msg": record.getMessage(),
+        }
+        with _log_lock:
+            _log_buffer.append(entry)
+            if len(_log_buffer) > 200:
+                _log_buffer.pop(0)
+            for q in _log_queues:
+                try:
+                    q.put_nowait(entry)
+                except thread_queue.Full:
+                    pass
+
+
+# 挂到根 logger，覆盖所有模块日志
+_broadcast_handler = _BroadcastHandler()
+logging.getLogger().addHandler(_broadcast_handler)
+
+# --------------------------------------------------------------------------- #
+# Paths & constants                                                             #
+# --------------------------------------------------------------------------- #
+
+_ROOT = Path(__file__).resolve().parents[2]
+UPLOADS_DIR = _ROOT / "data" / "uploads"
+OUTPUTS_DIR = _ROOT / "data" / "outputs"
+
+_DEFAULT_AI_NAME = "【2026-04-09】端午 - 背标 - 天问.ai"
+_DEFAULT_WORD_NAME = "天问礼品粽【260331】.docx"
+_DEFAULT_AI = _ROOT / _DEFAULT_AI_NAME
+_DEFAULT_WORD = _ROOT / _DEFAULT_WORD_NAME
+
+ALLOWED_AI_EXT = {".ai", ".pdf"}
+ALLOWED_WORD_EXT = {".docx"}
+
+# --------------------------------------------------------------------------- #
+# App                                                                           #
+# --------------------------------------------------------------------------- #
+
+app = FastAPI(
+    title="诸老大包装审核 API",
+    description="Upload an Illustrator file and a Word document to validate packaging copy.",
+    version="2.0.0",
+)
+
+app.add_middleware(
+    CORSMiddleware,
+    allow_origins=["*"],
+    allow_methods=["*"],
+    allow_headers=["*"],
+)
+
+# --------------------------------------------------------------------------- #
+# Helpers                                                                       #
+# --------------------------------------------------------------------------- #
+
+def _save_upload(upload: UploadFile, dest: Path) -> None:
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    with dest.open("wb") as fh:
+        fh.write(upload.file.read())
+
+
+def _copy_default(src: Optional[Path], dest: Path, label: str) -> None:
+    if src is None or not src.exists():
+        raise HTTPException(
+            status_code=400,
+            detail=f"未上传{label}且找不到默认样例文件，请上传文件后重试。",
+        )
+    dest.parent.mkdir(parents=True, exist_ok=True)
+    shutil.copy2(src, dest)
+
+
+# --------------------------------------------------------------------------- #
+# Endpoints                                                                     #
+# --------------------------------------------------------------------------- #
+
+@app.get("/api/logs/stream")
+async def log_stream() -> StreamingResponse:
+    """SSE 端点：实时推送后端日志给前端侧边栏。"""
+    q: thread_queue.Queue = thread_queue.Queue(maxsize=500)
+    with _log_lock:
+        _log_queues.append(q)
+        recent = list(_log_buffer)
+
+    async def generate():
+        try:
+            # 先把缓冲区里的历史日志推过去
+            for entry in recent:
+                yield f"data: {json.dumps(entry, ensure_ascii=False)}\n\n"
+
+            # 再持续推新日志
+            while True:
+                batch: list[dict] = []
+                try:
+                    while True:
+                        batch.append(q.get_nowait())
+                except thread_queue.Empty:
+                    pass
+
+                for entry in batch:
+                    yield f"data: {json.dumps(entry, ensure_ascii=False)}\n\n"
+
+                if not batch:
+                    yield ": keepalive\n\n"
+
+                await asyncio.sleep(0.25)
+        finally:
+            with _log_lock:
+                if q in _log_queues:
+                    _log_queues.remove(q)
+
+    return StreamingResponse(
+        generate(),
+        media_type="text/event-stream",
+        headers={
+            "Cache-Control": "no-cache",
+            "X-Accel-Buffering": "no",
+            "Connection": "keep-alive",
+        },
+    )
+
+
+@app.post("/api/process")
+async def process_endpoint(
+    ai_file: Optional[UploadFile] = File(None),
+    word_file: Optional[UploadFile] = File(None),
+) -> dict:
+    """运行完整 pipeline：AI → PDF → MinerU → Word 校验。"""
+    job_id = uuid.uuid4().hex
+    upload_dir = UPLOADS_DIR / job_id
+    output_dir = OUTPUTS_DIR / job_id
+
+    logger.info("POST /api/process  job_id=%s", job_id)
+
+    # ── Resolve AI file ──────────────────────────────────────────────────── #
+    if ai_file is not None:
+        original_name = Path(ai_file.filename or "source.ai").name
+        suffix = Path(original_name).suffix.lower()
+        if suffix not in ALLOWED_AI_EXT:
+            raise HTTPException(
+                status_code=400,
+                detail=f"不支持的 AI 文件格式 '{suffix}'，请上传 .ai 或 PDF。",
+            )
+        ai_path = upload_dir / original_name
+        _save_upload(ai_file, ai_path)
+    else:
+        ai_path = upload_dir / (_DEFAULT_AI.name if _DEFAULT_AI else "source.ai")
+        _copy_default(_DEFAULT_AI, ai_path, "AI 设计文件")
+
+    # ── Resolve Word file ────────────────────────────────────────────────── #
+    if word_file is not None:
+        suffix = Path(word_file.filename or "").suffix.lower()
+        if suffix not in ALLOWED_WORD_EXT:
+            raise HTTPException(
+                status_code=400,
+                detail=f"不支持的 Word 文件格式 '{suffix}'，请上传 .docx。",
+            )
+        word_path = upload_dir / f"reference{suffix}"
+        _save_upload(word_file, word_path)
+    else:
+        word_path = upload_dir / (_DEFAULT_WORD.name if _DEFAULT_WORD else "reference.docx")
+        _copy_default(_DEFAULT_WORD, word_path, "Word 校对稿")
+
+    # ── Run pipeline in thread pool（不阻塞事件循环，SSE 可正常推日志） ─── #
+    try:
+        loop = asyncio.get_event_loop()
+        result = await loop.run_in_executor(
+            None,
+            pipeline.process_document,
+            ai_path,
+            word_path,
+            output_dir,
+            job_id,
+        )
+    except FileNotFoundError as exc:
+        logger.exception("Pipeline error (not found): job_id=%s", job_id)
+        raise HTTPException(status_code=404, detail=str(exc)) from exc
+    except RuntimeError as exc:
+        logger.exception("Pipeline error (runtime): job_id=%s", job_id)
+        raise HTTPException(status_code=422, detail=str(exc)) from exc
+    except Exception as exc:
+        logger.exception("Pipeline error (unexpected): job_id=%s", job_id)
+        raise HTTPException(status_code=500, detail=f"处理失败：{exc}") from exc
+
+    return result
+
+
+@app.get("/api/files/{job_id}/{file_path:path}")
+async def serve_file(job_id: str, file_path: str) -> FileResponse:
+    """提供 job 产物文件（预览 PDF、JSON 等）。"""
+    target = OUTPUTS_DIR / job_id / file_path
+    if not target.exists() or not target.is_file():
+        raise HTTPException(status_code=404, detail="文件不存在")
+
+    suffix = target.suffix.lower()
+    media_type = {
+        ".pdf": "application/pdf",
+        ".json": "application/json",
+        ".md": "text/markdown",
+    }.get(suffix, "application/octet-stream")
+
+    return FileResponse(target, media_type=media_type)
+
+
+@app.get("/api/health")
+async def health() -> dict:
+    return {"status": "ok"}
+
+
+# 生产镜像：Vite 构建产物与 API 同源，无需配置 VITE_API_BASE_URL
+_dist = _ROOT / "frontend" / "dist"
+if _dist.is_dir():
+    app.mount("/", StaticFiles(directory=str(_dist), html=True), name="frontend")