Initial commit: 包装审核 POC、Docker 与前后端
Made-with: Cursor
16
.dockerignore
Normal file
@@ -0,0 +1,16 @@
|
||||
.git
|
||||
.gitignore
|
||||
**/__pycache__
|
||||
**/*.pyc
|
||||
**/.pytest_cache
|
||||
**/.mypy_cache
|
||||
**/.ruff_cache
|
||||
frontend/node_modules
|
||||
frontend/dist
|
||||
.runtime
|
||||
.tmp_*
|
||||
.env
|
||||
*.md
|
||||
.DS_Store
|
||||
tests
|
||||
.pytest_cache
|
||||
19
.gitignore
vendored
Normal file
@@ -0,0 +1,19 @@
|
||||
.env
|
||||
.env.*
|
||||
!.env.example
|
||||
.DS_Store
|
||||
.runtime/
|
||||
.tmp_*/
|
||||
__pycache__/
|
||||
*.py[cod]
|
||||
.pytest_cache/
|
||||
.mypy_cache/
|
||||
.ruff_cache/
|
||||
.venv/
|
||||
venv/
|
||||
|
||||
frontend/node_modules/
|
||||
frontend/dist/
|
||||
|
||||
data/uploads/
|
||||
data/outputs/
|
||||
33
Dockerfile
Normal file
@@ -0,0 +1,33 @@
|
||||
# ---- 前端构建 ----
|
||||
FROM node:22-bookworm-slim AS frontend-build
|
||||
WORKDIR /src/frontend
|
||||
COPY frontend/package.json frontend/package-lock.json* ./
|
||||
RUN npm ci 2>/dev/null || npm install
|
||||
COPY frontend/ ./
|
||||
RUN npm run build
|
||||
|
||||
# ---- 运行环境 ----
|
||||
FROM python:3.12-slim-bookworm AS runtime
|
||||
WORKDIR /app
|
||||
|
||||
RUN apt-get update \
|
||||
&& apt-get install -y --no-install-recommends \
|
||||
ghostscript \
|
||||
pandoc \
|
||||
libglib2.0-0 \
|
||||
libgomp1 \
|
||||
&& rm -rf /var/lib/apt/lists/*
|
||||
|
||||
COPY requirements.txt .
|
||||
RUN pip install --no-cache-dir -r requirements.txt
|
||||
|
||||
COPY backend/ ./backend/
|
||||
RUN mkdir -p data/uploads data/outputs
|
||||
|
||||
COPY --from=frontend-build /src/frontend/dist ./frontend/dist
|
||||
|
||||
ENV PYTHONUNBUFFERED=1
|
||||
ENV PYTHONDONTWRITEBYTECODE=1
|
||||
|
||||
EXPOSE 8010
|
||||
CMD ["uvicorn", "backend.app.main:app", "--host", "0.0.0.0", "--port", "8010"]
|
||||
BIN
Lark Helper 2026-04-14 11.49.44.png
Normal file
|
After Width: | Height: | Size: 47 KiB |
1
backend/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Backend package for the AI field validation preview tool."""
|
||||
1
backend/app/__init__.py
Normal file
@@ -0,0 +1 @@
|
||||
"""Backend application package."""
|
||||
103
backend/app/barcode_detector.py
Normal file
@@ -0,0 +1,103 @@
|
||||
"""Detect and decode barcodes / QR codes from an image file using zxing-cpp."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
from dataclasses import dataclass, field
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# Human-readable names for zxing BarcodeFormat values
|
||||
_FORMAT_NAMES: dict[str, str] = {
|
||||
"QRCode": "二维码 (QR Code)",
|
||||
"DataMatrix": "DataMatrix",
|
||||
"Aztec": "Aztec",
|
||||
"PDF417": "PDF417",
|
||||
"MicroQRCode": "微型二维码",
|
||||
"RMQRCode": "R型QR码",
|
||||
"EAN8": "EAN-8",
|
||||
"EAN13": "EAN-13",
|
||||
"UPCE": "UPC-E",
|
||||
"UPCA": "UPC-A",
|
||||
"Code39": "Code 39",
|
||||
"Code93": "Code 93",
|
||||
"Code128": "Code 128",
|
||||
"ITF": "ITF(交叉二五码)",
|
||||
"Codabar": "Codabar",
|
||||
"DataBar": "DataBar",
|
||||
"DataBarExpanded": "DataBar Expanded",
|
||||
"MaxiCode": "MaxiCode",
|
||||
"DXFilmEdge": "DXFilmEdge",
|
||||
"LinearCodes": "一维码",
|
||||
"MatrixCodes": "矩阵码",
|
||||
}
|
||||
|
||||
|
||||
@dataclass
|
||||
class BarcodeResult:
|
||||
format: str # zxing format string, e.g. "EAN13"
|
||||
format_label: str # Chinese-friendly label
|
||||
text: str # decoded text / number
|
||||
# bounding box in image pixels (top-left origin)
|
||||
x0: int
|
||||
y0: int
|
||||
x1: int
|
||||
y1: int
|
||||
valid: bool # zxing isValid
|
||||
|
||||
|
||||
def detect_barcodes(image_path: Path) -> list[BarcodeResult]:
|
||||
"""Scan *image_path* for all barcodes and QR codes.
|
||||
|
||||
Returns a list of :class:`BarcodeResult`, one entry per detected code.
|
||||
Returns an empty list when nothing is found or on error.
|
||||
"""
|
||||
try:
|
||||
import zxingcpp
|
||||
except ImportError:
|
||||
logger.warning("zxing-cpp not installed; barcode detection skipped")
|
||||
return []
|
||||
|
||||
try:
|
||||
from PIL import Image
|
||||
img = Image.open(image_path).convert("RGB")
|
||||
except Exception as exc:
|
||||
logger.warning("barcode_detector: cannot open image %s: %s", image_path, exc)
|
||||
return []
|
||||
|
||||
try:
|
||||
results = zxingcpp.read_barcodes(img)
|
||||
except Exception as exc:
|
||||
logger.warning("barcode_detector: zxing scan failed: %s", exc)
|
||||
return []
|
||||
|
||||
output: list[BarcodeResult] = []
|
||||
for r in results:
|
||||
fmt_str = str(r.format).replace("BarcodeFormat.", "")
|
||||
label = _FORMAT_NAMES.get(fmt_str, fmt_str)
|
||||
|
||||
# zxing-cpp position: r.position is a quadrilateral with four points
|
||||
try:
|
||||
pts = r.position
|
||||
xs = [pts.top_left.x, pts.top_right.x, pts.bottom_right.x, pts.bottom_left.x]
|
||||
ys = [pts.top_left.y, pts.top_right.y, pts.bottom_right.y, pts.bottom_left.y]
|
||||
x0, y0, x1, y1 = int(min(xs)), int(min(ys)), int(max(xs)), int(max(ys))
|
||||
except Exception:
|
||||
x0 = y0 = x1 = y1 = 0
|
||||
|
||||
output.append(BarcodeResult(
|
||||
format=fmt_str,
|
||||
format_label=label,
|
||||
text=r.text,
|
||||
x0=x0, y0=y0, x1=x1, y1=y1,
|
||||
valid=r.valid,
|
||||
))
|
||||
logger.info(
|
||||
"barcode_detector: found %s text=%r bbox=(%d,%d,%d,%d)",
|
||||
fmt_str, r.text, x0, y0, x1, y1,
|
||||
)
|
||||
|
||||
if not output:
|
||||
logger.info("barcode_detector: no barcode/QR found in %s", image_path.name)
|
||||
|
||||
return output
|
||||
132
backend/app/image_classifier.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""用 Qwen VL 对图片内容做语义分类,判断是否为二维码/条码。
|
||||
|
||||
调用方式
|
||||
--------
|
||||
::
|
||||
|
||||
from backend.app.image_classifier import is_qr_code
|
||||
|
||||
result = is_qr_code(Path("crop.png"), api_key="sk-...")
|
||||
if result:
|
||||
# 再交给条码识别模块处理
|
||||
...
|
||||
|
||||
设计原则
|
||||
--------
|
||||
* 只做"是/否"的单一判断,不解码内容(解码交给 barcode_detector)。
|
||||
* 复用 region_detector 中已有的 API key / base_url 读取逻辑。
|
||||
* 网络或模型调用失败时返回 False,保证 pipeline 可降级运行。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 使用轻量级的 7B 视觉模型,速度快、成本低
|
||||
_DEFAULT_MODEL = "qwen2.5-vl-7b-instruct"
|
||||
|
||||
_CLASSIFY_PROMPT = (
|
||||
"请仔细观察这张图片。\n"
|
||||
"问题:图片中是否包含二维码(QR Code)或任何类型的条形码?\n"
|
||||
'请只回答"是"或"否",不要输出其他任何内容。'
|
||||
)
|
||||
|
||||
|
||||
def _encode_image(image_path: Path, max_side: int = 512) -> str:
|
||||
"""将图片缩放后编码为 base64 PNG 字符串。
|
||||
|
||||
对小图(如 MinerU 裁出的图片块)保持原尺寸;
|
||||
对大图做等比缩放以减少 token 消耗。
|
||||
"""
|
||||
from PIL import Image
|
||||
|
||||
with Image.open(image_path) as img:
|
||||
img = img.convert("RGB")
|
||||
w, h = img.size
|
||||
if max(w, h) > max_side:
|
||||
scale = max_side / max(w, h)
|
||||
img = img.resize((max(1, round(w * scale)), max(1, round(h * scale))), Image.LANCZOS)
|
||||
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="PNG")
|
||||
|
||||
return base64.b64encode(buf.getvalue()).decode()
|
||||
|
||||
|
||||
def is_qr_code(
|
||||
image_path: Path,
|
||||
api_key: str | None = None,
|
||||
model: str = _DEFAULT_MODEL,
|
||||
) -> bool:
|
||||
"""调用 Qwen VL 判断图片是否包含二维码或条形码。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
image_path:
|
||||
待分类的图片路径。
|
||||
api_key:
|
||||
DashScope API Key;若为 None 则从环境变量 / .env 文件自动读取。
|
||||
model:
|
||||
使用的模型名称,默认为 qwen2.5-vl-7b-instruct。
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True → 大模型认为图片中存在二维码/条形码
|
||||
False → 不存在,或调用失败(降级返回 False)
|
||||
"""
|
||||
# 延迟导入,避免在未配置环境时影响模块加载
|
||||
from backend.app.region_detector import _get_api_key, _get_base_url
|
||||
from openai import OpenAI
|
||||
|
||||
key = api_key or _get_api_key()
|
||||
if not key:
|
||||
logger.warning("image_classifier: DASHSCOPE_API_KEY 未配置,跳过 QR 语义判断")
|
||||
return False
|
||||
|
||||
try:
|
||||
b64 = _encode_image(image_path)
|
||||
except Exception as exc:
|
||||
logger.warning("image_classifier: 图片编码失败 (%s),跳过分类", exc)
|
||||
return False
|
||||
|
||||
client = OpenAI(api_key=key, base_url=_get_base_url())
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{b64}"},
|
||||
},
|
||||
{"type": "text", "text": _CLASSIFY_PROMPT},
|
||||
],
|
||||
}
|
||||
],
|
||||
max_tokens=10,
|
||||
temperature=0.0,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("image_classifier: Qwen VL 调用失败 (%s),跳过分类", exc)
|
||||
return False
|
||||
|
||||
raw = (response.choices[0].message.content or "").strip()
|
||||
logger.debug("image_classifier: 模型原始回复 = %r", raw)
|
||||
|
||||
# 兼容"是"/"否"以及"Yes"/"No"等输出
|
||||
answer = raw.lower()
|
||||
result = answer.startswith("是") or answer.startswith("yes")
|
||||
logger.info(
|
||||
"image_classifier: %s → %s(原始回复:%r)",
|
||||
image_path.name,
|
||||
"二维码/条码" if result else "非二维码",
|
||||
raw,
|
||||
)
|
||||
return result
|
||||
255
backend/app/main.py
Normal file
@@ -0,0 +1,255 @@
|
||||
"""FastAPI application entry point."""
|
||||
from __future__ import annotations
|
||||
|
||||
import asyncio
|
||||
import json
|
||||
import logging
|
||||
import queue as thread_queue
|
||||
import shutil
|
||||
import threading
|
||||
import uuid
|
||||
from datetime import datetime
|
||||
from pathlib import Path
|
||||
from typing import Optional
|
||||
|
||||
from fastapi import FastAPI, File, HTTPException, UploadFile
|
||||
from fastapi.middleware.cors import CORSMiddleware
|
||||
from fastapi.responses import FileResponse, StreamingResponse
|
||||
from fastapi.staticfiles import StaticFiles
|
||||
|
||||
from backend.app import pipeline
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Logging + SSE broadcast #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s %(levelname)s [%(name)s] %(message)s",
|
||||
)
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_log_buffer: list[dict] = [] # 最近 200 条,供新连接回放
|
||||
_log_queues: list[thread_queue.Queue] = []
|
||||
_log_lock = threading.Lock()
|
||||
|
||||
|
||||
class _BroadcastHandler(logging.Handler):
|
||||
"""把日志记录广播给所有 SSE 客户端。"""
|
||||
|
||||
def emit(self, record: logging.LogRecord) -> None:
|
||||
entry = {
|
||||
"time": datetime.fromtimestamp(record.created).strftime("%H:%M:%S"),
|
||||
"level": record.levelname,
|
||||
"name": record.name.replace("backend.app.", ""),
|
||||
"msg": record.getMessage(),
|
||||
}
|
||||
with _log_lock:
|
||||
_log_buffer.append(entry)
|
||||
if len(_log_buffer) > 200:
|
||||
_log_buffer.pop(0)
|
||||
for q in _log_queues:
|
||||
try:
|
||||
q.put_nowait(entry)
|
||||
except thread_queue.Full:
|
||||
pass
|
||||
|
||||
|
||||
# 挂到根 logger,覆盖所有模块日志
|
||||
_broadcast_handler = _BroadcastHandler()
|
||||
logging.getLogger().addHandler(_broadcast_handler)
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Paths & constants #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
_ROOT = Path(__file__).resolve().parents[2]
|
||||
UPLOADS_DIR = _ROOT / "data" / "uploads"
|
||||
OUTPUTS_DIR = _ROOT / "data" / "outputs"
|
||||
|
||||
_DEFAULT_AI_NAME = "【2026-04-09】端午 - 背标 - 天问.ai"
|
||||
_DEFAULT_WORD_NAME = "天问礼品粽【260331】.docx"
|
||||
_DEFAULT_AI = _ROOT / _DEFAULT_AI_NAME
|
||||
_DEFAULT_WORD = _ROOT / _DEFAULT_WORD_NAME
|
||||
|
||||
ALLOWED_AI_EXT = {".ai", ".pdf"}
|
||||
ALLOWED_WORD_EXT = {".docx"}
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# App #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
app = FastAPI(
|
||||
title="诸老大包装审核 API",
|
||||
description="Upload an Illustrator file and a Word document to validate packaging copy.",
|
||||
version="2.0.0",
|
||||
)
|
||||
|
||||
app.add_middleware(
|
||||
CORSMiddleware,
|
||||
allow_origins=["*"],
|
||||
allow_methods=["*"],
|
||||
allow_headers=["*"],
|
||||
)
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Helpers #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
def _save_upload(upload: UploadFile, dest: Path) -> None:
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
with dest.open("wb") as fh:
|
||||
fh.write(upload.file.read())
|
||||
|
||||
|
||||
def _copy_default(src: Optional[Path], dest: Path, label: str) -> None:
|
||||
if src is None or not src.exists():
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"未上传{label}且找不到默认样例文件,请上传文件后重试。",
|
||||
)
|
||||
dest.parent.mkdir(parents=True, exist_ok=True)
|
||||
shutil.copy2(src, dest)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Endpoints #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
@app.get("/api/logs/stream")
|
||||
async def log_stream() -> StreamingResponse:
|
||||
"""SSE 端点:实时推送后端日志给前端侧边栏。"""
|
||||
q: thread_queue.Queue = thread_queue.Queue(maxsize=500)
|
||||
with _log_lock:
|
||||
_log_queues.append(q)
|
||||
recent = list(_log_buffer)
|
||||
|
||||
async def generate():
|
||||
try:
|
||||
# 先把缓冲区里的历史日志推过去
|
||||
for entry in recent:
|
||||
yield f"data: {json.dumps(entry, ensure_ascii=False)}\n\n"
|
||||
|
||||
# 再持续推新日志
|
||||
while True:
|
||||
batch: list[dict] = []
|
||||
try:
|
||||
while True:
|
||||
batch.append(q.get_nowait())
|
||||
except thread_queue.Empty:
|
||||
pass
|
||||
|
||||
for entry in batch:
|
||||
yield f"data: {json.dumps(entry, ensure_ascii=False)}\n\n"
|
||||
|
||||
if not batch:
|
||||
yield ": keepalive\n\n"
|
||||
|
||||
await asyncio.sleep(0.25)
|
||||
finally:
|
||||
with _log_lock:
|
||||
if q in _log_queues:
|
||||
_log_queues.remove(q)
|
||||
|
||||
return StreamingResponse(
|
||||
generate(),
|
||||
media_type="text/event-stream",
|
||||
headers={
|
||||
"Cache-Control": "no-cache",
|
||||
"X-Accel-Buffering": "no",
|
||||
"Connection": "keep-alive",
|
||||
},
|
||||
)
|
||||
|
||||
|
||||
@app.post("/api/process")
|
||||
async def process_endpoint(
|
||||
ai_file: Optional[UploadFile] = File(None),
|
||||
word_file: Optional[UploadFile] = File(None),
|
||||
) -> dict:
|
||||
"""运行完整 pipeline:AI → PDF → MinerU → Word 校验。"""
|
||||
job_id = uuid.uuid4().hex
|
||||
upload_dir = UPLOADS_DIR / job_id
|
||||
output_dir = OUTPUTS_DIR / job_id
|
||||
|
||||
logger.info("POST /api/process job_id=%s", job_id)
|
||||
|
||||
# ── Resolve AI file ──────────────────────────────────────────────────── #
|
||||
if ai_file is not None:
|
||||
original_name = Path(ai_file.filename or "source.ai").name
|
||||
suffix = Path(original_name).suffix.lower()
|
||||
if suffix not in ALLOWED_AI_EXT:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的 AI 文件格式 '{suffix}',请上传 .ai 或 PDF。",
|
||||
)
|
||||
ai_path = upload_dir / original_name
|
||||
_save_upload(ai_file, ai_path)
|
||||
else:
|
||||
ai_path = upload_dir / (_DEFAULT_AI.name if _DEFAULT_AI else "source.ai")
|
||||
_copy_default(_DEFAULT_AI, ai_path, "AI 设计文件")
|
||||
|
||||
# ── Resolve Word file ────────────────────────────────────────────────── #
|
||||
if word_file is not None:
|
||||
suffix = Path(word_file.filename or "").suffix.lower()
|
||||
if suffix not in ALLOWED_WORD_EXT:
|
||||
raise HTTPException(
|
||||
status_code=400,
|
||||
detail=f"不支持的 Word 文件格式 '{suffix}',请上传 .docx。",
|
||||
)
|
||||
word_path = upload_dir / f"reference{suffix}"
|
||||
_save_upload(word_file, word_path)
|
||||
else:
|
||||
word_path = upload_dir / (_DEFAULT_WORD.name if _DEFAULT_WORD else "reference.docx")
|
||||
_copy_default(_DEFAULT_WORD, word_path, "Word 校对稿")
|
||||
|
||||
# ── Run pipeline in thread pool(不阻塞事件循环,SSE 可正常推日志) ─── #
|
||||
try:
|
||||
loop = asyncio.get_event_loop()
|
||||
result = await loop.run_in_executor(
|
||||
None,
|
||||
pipeline.process_document,
|
||||
ai_path,
|
||||
word_path,
|
||||
output_dir,
|
||||
job_id,
|
||||
)
|
||||
except FileNotFoundError as exc:
|
||||
logger.exception("Pipeline error (not found): job_id=%s", job_id)
|
||||
raise HTTPException(status_code=404, detail=str(exc)) from exc
|
||||
except RuntimeError as exc:
|
||||
logger.exception("Pipeline error (runtime): job_id=%s", job_id)
|
||||
raise HTTPException(status_code=422, detail=str(exc)) from exc
|
||||
except Exception as exc:
|
||||
logger.exception("Pipeline error (unexpected): job_id=%s", job_id)
|
||||
raise HTTPException(status_code=500, detail=f"处理失败:{exc}") from exc
|
||||
|
||||
return result
|
||||
|
||||
|
||||
@app.get("/api/files/{job_id}/{file_path:path}")
|
||||
async def serve_file(job_id: str, file_path: str) -> FileResponse:
|
||||
"""提供 job 产物文件(预览 PDF、JSON 等)。"""
|
||||
target = OUTPUTS_DIR / job_id / file_path
|
||||
if not target.exists() or not target.is_file():
|
||||
raise HTTPException(status_code=404, detail="文件不存在")
|
||||
|
||||
suffix = target.suffix.lower()
|
||||
media_type = {
|
||||
".pdf": "application/pdf",
|
||||
".json": "application/json",
|
||||
".md": "text/markdown",
|
||||
}.get(suffix, "application/octet-stream")
|
||||
|
||||
return FileResponse(target, media_type=media_type)
|
||||
|
||||
|
||||
@app.get("/api/health")
|
||||
async def health() -> dict:
|
||||
return {"status": "ok"}
|
||||
|
||||
|
||||
# 生产镜像:Vite 构建产物与 API 同源,无需配置 VITE_API_BASE_URL
|
||||
_dist = _ROOT / "frontend" / "dist"
|
||||
if _dist.is_dir():
|
||||
app.mount("/", StaticFiles(directory=str(_dist), html=True), name="frontend")
|
||||
248
backend/app/mineru_client.py
Normal file
@@ -0,0 +1,248 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import logging
|
||||
import time
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import requests
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
MINERU_BASE = "https://mineru.net/api/v4"
|
||||
|
||||
TERMINAL_STATES = {"done", "failed"}
|
||||
IN_PROGRESS_STATE_LABELS = {
|
||||
"waiting-file": "等待文件上传",
|
||||
"pending": "排队中",
|
||||
"running": "解析中",
|
||||
"converting": "格式转换中",
|
||||
}
|
||||
|
||||
|
||||
class MineruClientError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class MineruClient:
|
||||
"""MinerU 精准解析 API 客户端(需要 Token)。
|
||||
|
||||
针对本地图片文件的完整调用流程:
|
||||
1. POST /file-urls/batch → 获取 batch_id + OSS 签名上传 URL
|
||||
2. PUT 上传图片到 OSS → 系统自动感知并提交解析任务
|
||||
3. GET /extract-results/batch/{batch_id} 轮询直到 state=done
|
||||
4. 下载 full_zip_url,解压提取结构化 JSON
|
||||
|
||||
文件限制:≤ 200MB,≤ 600 页
|
||||
支持格式:PDF、图片(png/jpg/jpeg/jp2/webp/gif/bmp)、Doc、Docx、Ppt、PPTx
|
||||
"""
|
||||
|
||||
def __init__(
|
||||
self,
|
||||
api_key: str,
|
||||
model_version: str = "vlm",
|
||||
language: str = "ch",
|
||||
enable_table: bool = True,
|
||||
is_ocr: bool = True,
|
||||
enable_formula: bool = True,
|
||||
poll_interval: float = 3.0,
|
||||
timeout: float = 300.0,
|
||||
) -> None:
|
||||
self.api_key = api_key
|
||||
self.model_version = model_version
|
||||
self.language = language
|
||||
self.enable_table = enable_table
|
||||
self.is_ocr = is_ocr
|
||||
self.enable_formula = enable_formula
|
||||
self.poll_interval = poll_interval
|
||||
self.timeout = timeout
|
||||
|
||||
def parse_image(self, image_path: Path, output_dir: Path) -> dict:
|
||||
"""解析本地图片文件,返回结构化 JSON 数据。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
image_path:
|
||||
本地图片路径(png/jpg/jpeg/jp2/webp/gif/bmp)
|
||||
output_dir:
|
||||
中间产物(zip、解压目录)的存放目录
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
包含 pdf_info 的结构化 JSON(layout.json 或 content_list.json)
|
||||
"""
|
||||
image_path = Path(image_path)
|
||||
if not image_path.exists():
|
||||
raise FileNotFoundError(f"图片文件不存在: {image_path}")
|
||||
|
||||
output_dir = Path(output_dir)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
logger.info("MinerU 精准解析开始: %s", image_path.name)
|
||||
batch_id, upload_url = self._request_upload_url(image_path.name)
|
||||
logger.info("MinerU 批次已创建: batch_id=%s", batch_id)
|
||||
|
||||
self._upload_file(upload_url, image_path)
|
||||
logger.info("MinerU 文件上传完成: %s(系统自动提交解析)", image_path.name)
|
||||
|
||||
zip_url = self._poll_batch_until_done(batch_id)
|
||||
logger.info("MinerU 解析完成: batch_id=%s", batch_id)
|
||||
|
||||
zip_path = self._download_zip(zip_url, output_dir)
|
||||
extract_dir = output_dir / "result"
|
||||
self._extract_zip(zip_path, extract_dir)
|
||||
|
||||
result = self._load_structured_json(extract_dir)
|
||||
logger.info("MinerU 结构化 JSON 加载完毕")
|
||||
return result
|
||||
|
||||
# ------------------------------------------------------------------
|
||||
# 内部方法
|
||||
# ------------------------------------------------------------------
|
||||
|
||||
def _auth_headers(self) -> dict[str, str]:
|
||||
return {
|
||||
"Authorization": f"Bearer {self.api_key}",
|
||||
"Content-Type": "application/json",
|
||||
}
|
||||
|
||||
def _request_upload_url(self, file_name: str) -> tuple[str, str]:
|
||||
"""申请批量上传链接,返回 (batch_id, oss_upload_url)。"""
|
||||
payload = {
|
||||
"files": [{"name": file_name, "is_ocr": self.is_ocr}],
|
||||
"model_version": self.model_version,
|
||||
"language": self.language,
|
||||
"enable_table": self.enable_table,
|
||||
"enable_formula": self.enable_formula,
|
||||
}
|
||||
try:
|
||||
resp = requests.post(
|
||||
f"{MINERU_BASE}/file-urls/batch",
|
||||
headers=self._auth_headers(),
|
||||
json=payload,
|
||||
timeout=30,
|
||||
)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise MineruClientError(f"MinerU 申请上传 URL 失败: {exc}") from exc
|
||||
|
||||
body = resp.json()
|
||||
if body.get("code") != 0:
|
||||
raise MineruClientError(f"MinerU 申请上传 URL 失败: {body.get('msg')}")
|
||||
|
||||
data = body.get("data", {})
|
||||
batch_id = data.get("batch_id")
|
||||
file_urls = data.get("file_urls", [])
|
||||
if not batch_id or not file_urls:
|
||||
raise MineruClientError("MinerU 返回的 batch_id 或 file_urls 为空")
|
||||
|
||||
return batch_id, file_urls[0]
|
||||
|
||||
def _upload_file(self, upload_url: str, image_path: Path) -> None:
|
||||
"""将图片 PUT 上传到 OSS。上传时无需设置 Content-Type。"""
|
||||
try:
|
||||
with image_path.open("rb") as f:
|
||||
resp = requests.put(upload_url, data=f, timeout=120)
|
||||
except requests.RequestException as exc:
|
||||
raise MineruClientError(f"MinerU 文件上传网络错误: {exc}") from exc
|
||||
|
||||
if resp.status_code not in (200, 201):
|
||||
raise MineruClientError(
|
||||
f"MinerU 文件上传失败: HTTP {resp.status_code} {resp.text[:200]}"
|
||||
)
|
||||
|
||||
def _poll_batch_until_done(self, batch_id: str) -> str:
|
||||
"""轮询批次结果,返回 full_zip_url。"""
|
||||
url = f"{MINERU_BASE}/extract-results/batch/{batch_id}"
|
||||
deadline = time.monotonic() + self.timeout
|
||||
|
||||
while time.monotonic() < deadline:
|
||||
try:
|
||||
resp = requests.get(url, headers=self._auth_headers(), timeout=30)
|
||||
resp.raise_for_status()
|
||||
except requests.RequestException as exc:
|
||||
raise MineruClientError(f"MinerU 查询批次状态失败: {exc}") from exc
|
||||
|
||||
body = resp.json()
|
||||
if body.get("code") != 0:
|
||||
raise MineruClientError(f"MinerU 查询批次失败: {body.get('msg')}")
|
||||
|
||||
results: list[dict] = body.get("data", {}).get("extract_result", [])
|
||||
if not results:
|
||||
time.sleep(self.poll_interval)
|
||||
continue
|
||||
|
||||
item = results[0]
|
||||
state = item.get("state", "")
|
||||
label = IN_PROGRESS_STATE_LABELS.get(state, state)
|
||||
logger.info("MinerU 批次状态: batch_id=%s state=%s (%s)", batch_id, state, label)
|
||||
|
||||
if state == "done":
|
||||
zip_url = item.get("full_zip_url")
|
||||
if not zip_url:
|
||||
raise MineruClientError("MinerU 完成但未返回 full_zip_url")
|
||||
return zip_url
|
||||
|
||||
if state == "failed":
|
||||
err_msg = item.get("err_msg") or "未知错误"
|
||||
raise MineruClientError(f"MinerU 解析失败: {err_msg}")
|
||||
|
||||
time.sleep(self.poll_interval)
|
||||
|
||||
raise MineruClientError(
|
||||
f"MinerU 轮询超时 ({self.timeout:.0f}s): batch_id={batch_id}"
|
||||
)
|
||||
|
||||
def _download_zip(self, zip_url: str, output_dir: Path) -> Path:
|
||||
"""下载结果 zip 包到本地。"""
|
||||
target = output_dir / "mineru_result.zip"
|
||||
try:
|
||||
resp = requests.get(zip_url, timeout=120, stream=True)
|
||||
resp.raise_for_status()
|
||||
with target.open("wb") as f:
|
||||
for chunk in resp.iter_content(chunk_size=8192):
|
||||
f.write(chunk)
|
||||
except requests.RequestException as exc:
|
||||
raise MineruClientError(f"MinerU zip 下载失败: {exc}") from exc
|
||||
logger.info("MinerU zip 下载完毕: %s", target)
|
||||
return target
|
||||
|
||||
def _extract_zip(self, zip_path: Path, extract_dir: Path) -> None:
|
||||
extract_dir.mkdir(parents=True, exist_ok=True)
|
||||
with zipfile.ZipFile(zip_path) as archive:
|
||||
archive.extractall(extract_dir)
|
||||
logger.info("MinerU zip 解压完毕: %s", extract_dir)
|
||||
|
||||
def _load_structured_json(self, extract_dir: Path) -> dict:
|
||||
"""从解压目录中找到并加载包含 pdf_info 的结构化 JSON。
|
||||
|
||||
MinerU zip 结构说明:
|
||||
layout.json → 中间处理结果(对应 middle.json)
|
||||
*_content_list.json → 内容列表
|
||||
*_model.json → 模型推理结果
|
||||
full.md → Markdown 解析结果
|
||||
"""
|
||||
candidates = [
|
||||
*sorted(extract_dir.rglob("layout.json")),
|
||||
*sorted(extract_dir.rglob("*layout*.json")),
|
||||
*sorted(extract_dir.rglob("*_content_list*.json")),
|
||||
*sorted(extract_dir.rglob("*.json")),
|
||||
]
|
||||
seen: set[Path] = set()
|
||||
for candidate in candidates:
|
||||
if candidate in seen:
|
||||
continue
|
||||
seen.add(candidate)
|
||||
try:
|
||||
parsed = json.loads(candidate.read_text(encoding="utf-8"))
|
||||
except (json.JSONDecodeError, OSError):
|
||||
continue
|
||||
if isinstance(parsed, dict) and isinstance(parsed.get("pdf_info"), list):
|
||||
logger.info("MinerU 结构化 JSON 选用: %s", candidate.name)
|
||||
return parsed
|
||||
|
||||
raise MineruClientError(
|
||||
"MinerU 结果 zip 中未找到包含 pdf_info 的结构化 JSON"
|
||||
)
|
||||
299
backend/app/mineru_parser.py
Normal file
@@ -0,0 +1,299 @@
|
||||
"""Parse MinerU structured JSON (layout.json / middle.json) into field records."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import re
|
||||
from dataclasses import dataclass, field
|
||||
|
||||
from bs4 import BeautifulSoup
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
|
||||
def _extract_table_text(html: str) -> str:
|
||||
"""将表格 HTML 转为可供文本匹配的多行字符串。
|
||||
|
||||
每行格式:单元格1|单元格2|单元格3
|
||||
同一行内的单元格用 | 连接,行与行之间用换行分隔。
|
||||
"""
|
||||
try:
|
||||
soup = BeautifulSoup(html, "html.parser")
|
||||
rows = []
|
||||
for tr in soup.find_all("tr"):
|
||||
cells = [td.get_text(strip=True) for td in tr.find_all(["td", "th"])]
|
||||
if any(cells):
|
||||
rows.append("|".join(cells))
|
||||
return "\n".join(rows)
|
||||
except Exception:
|
||||
# 解析失败时退回正则粗提取
|
||||
return re.sub(r"<[^>]+>", " ", html).strip()
|
||||
|
||||
# 1 pt = 0.352778 mm
|
||||
PT_TO_MM = 0.352778
|
||||
|
||||
# LaTeX inline-equation → Unicode 映射(仅处理标签文件中常见的符号)
|
||||
_LATEX_TO_UNICODE: dict[str, str] = {
|
||||
r"\times": "×",
|
||||
r"\div": "÷",
|
||||
r"\pm": "±",
|
||||
r"\mp": "∓",
|
||||
r"\cdot": "·",
|
||||
r"\leq": "≤",
|
||||
r"\geq": "≥",
|
||||
r"\neq": "≠",
|
||||
r"\approx": "≈",
|
||||
r"\infty": "∞",
|
||||
r"\circ": "°",
|
||||
r"\degree": "°",
|
||||
r"\alpha": "α",
|
||||
r"\beta": "β",
|
||||
r"\gamma": "γ",
|
||||
r"\delta": "δ",
|
||||
r"\mu": "μ",
|
||||
r"\%": "%",
|
||||
}
|
||||
|
||||
# MinerU 有时将 ^{\circ} 输出为 ^{circ}(缺少反斜杠)
|
||||
# 用正则统一匹配两种写法
|
||||
_SUPERSCRIPT_DEGREE_RE = re.compile(r"\^\{\\?circ\}", re.IGNORECASE)
|
||||
|
||||
|
||||
@dataclass
|
||||
class MineruDocument:
|
||||
page_width: float # points
|
||||
page_height: float # points
|
||||
fields: list[dict] # list of field dicts ready for the API response
|
||||
|
||||
|
||||
def _page_size(page: dict) -> tuple[float, float]:
|
||||
"""Return (width, height) in points for a MinerU page entry."""
|
||||
# MinerU stores page size as [width, height] in `page_size`
|
||||
size = page.get("page_size") or page.get("page_size_pt") or []
|
||||
if isinstance(size, (list, tuple)) and len(size) >= 2:
|
||||
return float(size[0]), float(size[1])
|
||||
# Fallback: inspect block bboxes
|
||||
return 595.0, 842.0 # A4 default
|
||||
|
||||
|
||||
def _latex_to_text(expr: str) -> str:
|
||||
"""将简单的 LaTeX 表达式转换为可读文本(逐一替换已知符号)。"""
|
||||
result = expr.strip()
|
||||
# 优先处理上标度数:^{circ} 或 ^{\circ} → °
|
||||
result = _SUPERSCRIPT_DEGREE_RE.sub("°", result)
|
||||
# 其他上标 ^{...} / 下标 _{...}:去掉包装,只保留内容
|
||||
result = re.sub(r"[\^_]\{([^}]*)\}", r"\1", result)
|
||||
for latex, uni in _LATEX_TO_UNICODE.items():
|
||||
result = result.replace(latex, uni)
|
||||
# 剩余未识别的命令(如 \foo)直接去掉反斜杠,降级为原始字母
|
||||
result = re.sub(r"\\([A-Za-z]+)", r"\1", result)
|
||||
return result
|
||||
|
||||
|
||||
def _span_content(span: dict) -> str:
|
||||
"""从 span 中提取可供匹配的文本内容。
|
||||
|
||||
- type == "table":解析 html 字段,转为行列文本
|
||||
- type == "inline_equation":LaTeX → Unicode 文本
|
||||
- 其他类型:取 content 字段,并修复常见 LaTeX 上标残留(如 ^{circ})
|
||||
"""
|
||||
span_type = span.get("type") or ""
|
||||
if span_type == "table":
|
||||
html = span.get("html") or ""
|
||||
return _extract_table_text(html) if html else ""
|
||||
if span_type == "inline_equation":
|
||||
return _latex_to_text((span.get("content") or "").strip())
|
||||
# 普通文本 span:MinerU 有时在 content 中直接嵌入 LaTeX 上标(如 ^{circ})
|
||||
raw = (span.get("content") or "").strip()
|
||||
return _SUPERSCRIPT_DEGREE_RE.sub("°", raw)
|
||||
|
||||
|
||||
def _iter_lines(block: dict):
|
||||
"""Yield (line, block) tuples for all lines in a block.
|
||||
|
||||
Handles two MinerU structures:
|
||||
- Flat: block → lines → spans (text/title/etc.)
|
||||
- Nested: block → blocks → lines → spans (table blocks)
|
||||
"""
|
||||
lines = block.get("lines")
|
||||
if lines:
|
||||
for line in lines:
|
||||
yield line, block
|
||||
else:
|
||||
# Table blocks (and some other types) have a nested `blocks` layer
|
||||
for inner in block.get("blocks", []):
|
||||
for line in inner.get("lines", []):
|
||||
yield line, block
|
||||
|
||||
|
||||
def _iter_line_fields(page: dict):
|
||||
"""Yield one record per non-empty *line* across the whole page.
|
||||
|
||||
Each yielded tuple is ``(merged_text, line, first_text_span, block)`` where:
|
||||
- ``merged_text`` – all span contents concatenated (LaTeX already converted)
|
||||
- ``line`` – the MinerU line dict (carries the authoritative bbox)
|
||||
- ``first_text_span`` – first span that has font metadata, or ``None``
|
||||
- ``block`` – the containing block (carries ``type``)
|
||||
|
||||
Merging at the line level correctly handles footer / title blocks where a
|
||||
single printed sentence is split across many spans (e.g. text + inline_equation
|
||||
+ text …). Table blocks still produce one record per table because they have
|
||||
exactly one span (type="table") per line.
|
||||
"""
|
||||
def _process_block_set(blocks_iter):
|
||||
for block in blocks_iter:
|
||||
for line, src_block in _iter_lines(block):
|
||||
spans = line.get("spans", [])
|
||||
if not spans:
|
||||
continue
|
||||
|
||||
parts: list[str] = []
|
||||
first_text_span: dict | None = None
|
||||
table_html: str | None = None
|
||||
for span in spans:
|
||||
content = _span_content(span)
|
||||
if content:
|
||||
parts.append(content)
|
||||
if span.get("type") == "table":
|
||||
# 保留原始 HTML,前端可用于渲染含 colspan/rowspan 的复杂表格
|
||||
table_html = span.get("html") or None
|
||||
elif first_text_span is None:
|
||||
first_text_span = span
|
||||
|
||||
merged = "".join(parts)
|
||||
if merged:
|
||||
yield merged, line, first_text_span, src_block, table_html
|
||||
|
||||
yield from _process_block_set(page.get("para_blocks", []))
|
||||
yield from _process_block_set(page.get("blocks", []))
|
||||
|
||||
|
||||
def _bbox(obj: dict) -> tuple[float, float, float, float]:
|
||||
"""Return (x0, y0, x1, y1) from an object's bbox field."""
|
||||
bbox = obj.get("bbox") or [0, 0, 0, 0]
|
||||
if isinstance(bbox, (list, tuple)) and len(bbox) >= 4:
|
||||
return float(bbox[0]), float(bbox[1]), float(bbox[2]), float(bbox[3])
|
||||
return 0.0, 0.0, 0.0, 0.0
|
||||
|
||||
|
||||
def parse_mineru_fields(data: dict) -> MineruDocument:
|
||||
"""Convert raw MinerU structured JSON into a :class:`MineruDocument`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
data:
|
||||
The parsed JSON dict returned by :class:`~backend.app.mineru_client.MineruClient`.
|
||||
Must contain a ``pdf_info`` list with one entry per page.
|
||||
|
||||
Returns
|
||||
-------
|
||||
MineruDocument
|
||||
Holds page dimensions and a flat list of text field dicts.
|
||||
"""
|
||||
pdf_info: list[dict] = data.get("pdf_info", [])
|
||||
if not pdf_info:
|
||||
logger.warning("MinerU JSON contains empty pdf_info")
|
||||
return MineruDocument(page_width=595.0, page_height=842.0, fields=[])
|
||||
|
||||
# Use the first page's dimensions for the preview
|
||||
first_page = pdf_info[0]
|
||||
page_width, page_height = _page_size(first_page)
|
||||
|
||||
fields: list[dict] = []
|
||||
for page in pdf_info:
|
||||
page_idx = int(page.get("page_idx", 0))
|
||||
page_num = page_idx + 1
|
||||
pw, ph = _page_size(page)
|
||||
|
||||
for content, line, font_span, _block, table_html in _iter_line_fields(page):
|
||||
# bbox comes from the line (covers all spans in one visual row)
|
||||
x0, y0, x1, y1 = _bbox(line)
|
||||
|
||||
font_size_pt: float | None = None
|
||||
font_name: str | None = None
|
||||
if font_span is not None:
|
||||
raw_size = font_span.get("size") or font_span.get("font_size")
|
||||
if raw_size is not None:
|
||||
try:
|
||||
font_size_pt = float(raw_size)
|
||||
except (TypeError, ValueError):
|
||||
pass
|
||||
font_name = font_span.get("font") or font_span.get("font_name") or None
|
||||
|
||||
font_height_mm: float | None = (
|
||||
round(font_size_pt * PT_TO_MM, 2) if font_size_pt else None
|
||||
)
|
||||
|
||||
block_type = (_block.get("type") or "text").strip() or "text"
|
||||
|
||||
fields.append(
|
||||
{
|
||||
"page": page_num,
|
||||
"block_type": block_type,
|
||||
"text": content,
|
||||
"table_html": table_html,
|
||||
"font_name": font_name,
|
||||
"font_size_pt": round(font_size_pt, 2) if font_size_pt else None,
|
||||
"font_height_mm": font_height_mm,
|
||||
"x0_pt": round(x0, 2),
|
||||
"top_pt": round(y0, 2),
|
||||
"x1_pt": round(x1, 2),
|
||||
"bottom_pt": round(y1, 2),
|
||||
}
|
||||
)
|
||||
|
||||
logger.info(
|
||||
"MinerU parser extracted %d fields across %d page(s)",
|
||||
len(fields),
|
||||
len(pdf_info),
|
||||
)
|
||||
return MineruDocument(
|
||||
page_width=page_width,
|
||||
page_height=page_height,
|
||||
fields=fields,
|
||||
)
|
||||
|
||||
|
||||
def parse_mineru_image_blocks(data: dict) -> list[dict]:
|
||||
"""从 MinerU 结构化 JSON 中提取所有 image 类型的 block。
|
||||
|
||||
Returns
|
||||
-------
|
||||
list of dict,每项包含:
|
||||
- page : 页码(从 1 起)
|
||||
- block_type : "image"
|
||||
- img_path : MinerU 在 zip 包内记录的相对路径(可能为 None)
|
||||
- x0_pt, top_pt, x1_pt, bottom_pt : block 边界框(与文本字段坐标系相同)
|
||||
"""
|
||||
pdf_info: list[dict] = data.get("pdf_info", [])
|
||||
images: list[dict] = []
|
||||
|
||||
for page in pdf_info:
|
||||
page_idx = int(page.get("page_idx", 0))
|
||||
page_num = page_idx + 1
|
||||
|
||||
for blocks_key in ("para_blocks", "blocks"):
|
||||
for block in page.get(blocks_key, []):
|
||||
if (block.get("type") or "").strip().lower() != "image":
|
||||
continue
|
||||
x0, y0, x1, y1 = _bbox(block)
|
||||
# MinerU 有时把图片路径放在这几个字段中
|
||||
img_path = (
|
||||
block.get("img_path")
|
||||
or block.get("image_path")
|
||||
or block.get("path")
|
||||
or None
|
||||
)
|
||||
images.append(
|
||||
{
|
||||
"page": page_num,
|
||||
"block_type": "image",
|
||||
"img_path": img_path,
|
||||
"x0_pt": round(x0, 2),
|
||||
"top_pt": round(y0, 2),
|
||||
"x1_pt": round(x1, 2),
|
||||
"bottom_pt": round(y1, 2),
|
||||
}
|
||||
)
|
||||
|
||||
logger.info("MinerU parser found %d image block(s)", len(images))
|
||||
return images
|
||||
507
backend/app/pipeline.py
Normal file
@@ -0,0 +1,507 @@
|
||||
"""Core processing pipeline: AI → PDF → PNG → Qwen crop → MinerU → validate."""
|
||||
from __future__ import annotations
|
||||
|
||||
import logging
|
||||
import os
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
from backend.app.barcode_detector import detect_barcodes
|
||||
from backend.app.image_classifier import is_qr_code
|
||||
from backend.app.mineru_client import MineruClient, MineruClientError
|
||||
from backend.app.mineru_parser import parse_mineru_fields, parse_mineru_image_blocks
|
||||
from backend.app.text_validation import validate_field_against_word
|
||||
from backend.app.word_parser import extract_word_html, extract_word_text
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Environment helpers #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
def _get_mineru_api_key() -> str:
|
||||
"""Read MINERU_API_KEY from the process environment or the project .env file."""
|
||||
value = os.environ.get("MINERU_API_KEY", "").strip()
|
||||
if value:
|
||||
return value
|
||||
|
||||
for candidate in (
|
||||
Path(__file__).resolve().parents[2] / ".env",
|
||||
Path(__file__).resolve().parents[3] / ".env",
|
||||
):
|
||||
if not candidate.exists():
|
||||
continue
|
||||
for raw in candidate.read_text(encoding="utf-8").splitlines():
|
||||
line = raw.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
key, val = line.split("=", 1)
|
||||
if key.strip() == "MINERU_API_KEY":
|
||||
cleaned = val.strip().strip('"').strip("'")
|
||||
if cleaned:
|
||||
logger.info("Loaded MINERU_API_KEY from %s", candidate)
|
||||
return cleaned
|
||||
return ""
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# AI → PDF conversion #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
def _ai_to_pdf(ai_path: Path, output_dir: Path) -> Path:
|
||||
"""Convert an Adobe Illustrator file to PDF, keeping the original filename stem.
|
||||
|
||||
Modern .ai files (CS and later) are internally PDF-based; pypdf can copy
|
||||
them directly. Legacy EPS-based .ai files require Ghostscript.
|
||||
If the uploaded file is already a PDF it is copied as-is.
|
||||
"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
pdf_path = output_dir / f"{ai_path.stem}.pdf"
|
||||
|
||||
with ai_path.open("rb") as fh:
|
||||
header = fh.read(8)
|
||||
|
||||
if header.startswith(b"%PDF-"):
|
||||
# PDF-based .ai or an actual PDF – re-write with pypdf for cleanliness
|
||||
try:
|
||||
from pypdf import PdfReader, PdfWriter
|
||||
|
||||
reader = PdfReader(str(ai_path))
|
||||
writer = PdfWriter()
|
||||
for page in reader.pages:
|
||||
writer.add_page(page)
|
||||
with pdf_path.open("wb") as fh:
|
||||
writer.write(fh)
|
||||
logger.info("Converted PDF-based .ai via pypdf: %s", ai_path.name)
|
||||
except Exception as exc:
|
||||
logger.warning("pypdf failed (%s), falling back to direct copy", exc)
|
||||
shutil.copy2(ai_path, pdf_path)
|
||||
else:
|
||||
# Legacy EPS-based .ai → Ghostscript
|
||||
gs = shutil.which("/opt/homebrew/bin/gs") or shutil.which("gs") or shutil.which("ghostscript")
|
||||
if gs is None:
|
||||
raise RuntimeError(
|
||||
"Cannot convert legacy .ai file: Ghostscript is not installed. "
|
||||
"Run: brew install ghostscript"
|
||||
)
|
||||
import subprocess
|
||||
|
||||
result = subprocess.run(
|
||||
[gs, "-dNOPAUSE", "-dBATCH", "-dSAFER",
|
||||
"-sDEVICE=pdfwrite", f"-sOutputFile={pdf_path}", str(ai_path)],
|
||||
capture_output=True, text=True, timeout=120,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(
|
||||
f"Ghostscript failed (exit {result.returncode}):\n{result.stderr.strip()}"
|
||||
)
|
||||
logger.info("Converted legacy .ai via Ghostscript: %s", ai_path.name)
|
||||
|
||||
return pdf_path
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# PDF → PNG rasterisation #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
def _pdf_to_png(pdf_path: Path, output_dir: Path, dpi: int = 150) -> Path:
|
||||
"""Rasterise the first page of a PDF to a PNG.
|
||||
|
||||
Tries, in order:
|
||||
1. Ghostscript (if installed)
|
||||
2. PyMuPDF (pip install pymupdf)
|
||||
|
||||
Uses a safe output filename ``page1.png`` to avoid issues with special
|
||||
characters in the source PDF name.
|
||||
Returns the path of the generated PNG.
|
||||
"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
# Use a safe filename – special chars / spaces in the PDF stem can cause
|
||||
# Ghostscript to silently produce no output.
|
||||
png_path = output_dir / "page1.png"
|
||||
|
||||
# ── 1. Ghostscript ────────────────────────────────────────────────────── #
|
||||
gs = (
|
||||
shutil.which("/opt/homebrew/bin/gs")
|
||||
or shutil.which("/usr/local/bin/gs")
|
||||
or shutil.which("ghostscript")
|
||||
)
|
||||
if gs:
|
||||
result = subprocess.run(
|
||||
[
|
||||
gs, "-dNOPAUSE", "-dBATCH", "-dSAFER",
|
||||
"-sDEVICE=png16m", f"-r{dpi}",
|
||||
"-dFirstPage=1", "-dLastPage=1",
|
||||
f"-sOutputFile={png_path}", str(pdf_path),
|
||||
],
|
||||
capture_output=True, text=True, timeout=60,
|
||||
)
|
||||
if result.returncode == 0 and png_path.exists():
|
||||
w, h = _png_size(png_path)
|
||||
logger.info(
|
||||
"Rasterised PDF → PNG via Ghostscript at %d DPI: %dx%d px (%d KB)",
|
||||
dpi, w, h, png_path.stat().st_size // 1024,
|
||||
)
|
||||
return png_path
|
||||
logger.warning("Ghostscript rasterisation failed (exit %d): %s",
|
||||
result.returncode, result.stderr[:300])
|
||||
|
||||
# ── 2. PyMuPDF fallback ───────────────────────────────────────────────── #
|
||||
try:
|
||||
import fitz # PyMuPDF
|
||||
|
||||
doc = fitz.open(str(pdf_path))
|
||||
page = doc[0]
|
||||
zoom = dpi / 72.0
|
||||
mat = fitz.Matrix(zoom, zoom)
|
||||
pix = page.get_pixmap(matrix=mat, alpha=False)
|
||||
pix.save(str(png_path))
|
||||
doc.close()
|
||||
w, h = _png_size(png_path)
|
||||
logger.info(
|
||||
"Rasterised PDF → PNG via PyMuPDF at %d DPI: %dx%d px (%d KB)",
|
||||
dpi, w, h, png_path.stat().st_size // 1024,
|
||||
)
|
||||
return png_path
|
||||
except ImportError:
|
||||
raise RuntimeError(
|
||||
"Cannot rasterise PDF to PNG: neither Ghostscript nor PyMuPDF is "
|
||||
"available. Run: pip install pymupdf OR brew install ghostscript"
|
||||
)
|
||||
except Exception as exc:
|
||||
raise RuntimeError(f"Cannot rasterise PDF to PNG: {exc}") from exc
|
||||
|
||||
|
||||
def _png_size(png_path: Path) -> tuple[int, int]:
|
||||
"""Return (width, height) in pixels of a PNG file."""
|
||||
from PIL import Image
|
||||
with Image.open(png_path) as img:
|
||||
return img.size # (width, height)
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Qwen VL region crop #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
def _crop_label_region(png_path: Path, output_dir: Path) -> Path:
|
||||
"""Detect the main label area with Qwen VL and crop to it.
|
||||
|
||||
If DASHSCOPE_API_KEY is missing or detection fails, returns the original
|
||||
PNG unchanged so the pipeline continues without interruption.
|
||||
"""
|
||||
from backend.app.region_detector import (
|
||||
_get_api_key,
|
||||
crop_and_save,
|
||||
detect_regions,
|
||||
merge_regions,
|
||||
)
|
||||
|
||||
api_key = _get_api_key()
|
||||
if not api_key:
|
||||
logger.info("DASHSCOPE_API_KEY not configured – skipping AI crop, using full image")
|
||||
return png_path
|
||||
|
||||
try:
|
||||
regions, _ = detect_regions(png_path, api_key=api_key, api_max_side=1024)
|
||||
except Exception as exc:
|
||||
logger.warning("Qwen region detection failed (%s) – using full image", exc)
|
||||
return png_path
|
||||
|
||||
if not regions:
|
||||
logger.warning("No regions detected by Qwen – using full image")
|
||||
return png_path
|
||||
|
||||
merged = merge_regions(regions)
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
cropped_png = output_dir / "cropped_label.png"
|
||||
|
||||
# crop_and_save writes to numbered files; rename for predictability
|
||||
results = crop_and_save(png_path, [merged], output_dir / "_tmp")
|
||||
if not results:
|
||||
return png_path
|
||||
|
||||
import shutil as _sh
|
||||
_sh.move(results[0]["path"], str(cropped_png))
|
||||
|
||||
w, h = _png_size(cropped_png)
|
||||
logger.info(
|
||||
"Qwen crop: bbox=(%d,%d)-(%d,%d) → %s (%dx%d px)",
|
||||
merged.x1, merged.y1, merged.x2, merged.y2,
|
||||
cropped_png.name, w, h,
|
||||
)
|
||||
return cropped_png
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# MinerU image-block QR processing #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
def _process_image_blocks(
|
||||
mineru_data: dict,
|
||||
source_image: Path,
|
||||
output_dir: Path,
|
||||
) -> list[dict]:
|
||||
"""对 MinerU 解析出的每个 image 类型 block 执行二维码识别流程。
|
||||
|
||||
流程
|
||||
----
|
||||
1. 从 mineru_data 中提取所有 image block(含 bbox 坐标)。
|
||||
2. 按 bbox 从 source_image(高清裁剪图)中裁出对应区域,保存为临时 PNG。
|
||||
3. 调用 Qwen VL 判断裁出的图片是否为二维码/条形码。
|
||||
4. 如果判断为"是",再调用 zxing 条码模块进行精确解码。
|
||||
5. 返回每个 image block 的处理结果列表。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
mineru_data:
|
||||
MinerU 结构化 JSON(包含 pdf_info)。
|
||||
source_image:
|
||||
用于裁剪的高清源图(即发送给 MinerU 的那张 PNG)。
|
||||
output_dir:
|
||||
裁剪图临时存放目录。
|
||||
|
||||
Returns
|
||||
-------
|
||||
list of dict
|
||||
每项对应一个 image block,包含:
|
||||
- page, block_type, x0_pt, top_pt, x1_pt, bottom_pt
|
||||
- is_qr_code : bool — 大模型语义判断结果
|
||||
- barcodes : list — zxing 解码结果(is_qr_code=False 时为空列表)
|
||||
- crop_path : str — 裁剪图相对路径(调试用)
|
||||
"""
|
||||
from PIL import Image
|
||||
|
||||
image_blocks = parse_mineru_image_blocks(mineru_data)
|
||||
if not image_blocks:
|
||||
return []
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
results: list[dict] = []
|
||||
|
||||
with Image.open(source_image) as src_img:
|
||||
img_w, img_h = src_img.size
|
||||
|
||||
for idx, block in enumerate(image_blocks, start=1):
|
||||
# ── 裁剪 ──────────────────────────────────────────────────────── #
|
||||
x0 = max(0, int(block["x0_pt"]))
|
||||
y0 = max(0, int(block["top_pt"]))
|
||||
x1 = min(img_w, int(block["x1_pt"]))
|
||||
y1 = min(img_h, int(block["bottom_pt"]))
|
||||
|
||||
if x1 <= x0 or y1 <= y0:
|
||||
logger.warning(
|
||||
"_process_image_blocks: block %d 边界框无效 (%d,%d)-(%d,%d),跳过",
|
||||
idx, x0, y0, x1, y1,
|
||||
)
|
||||
results.append({**block, "is_qr_code": False, "barcodes": [], "crop_path": None})
|
||||
continue
|
||||
|
||||
crop = src_img.crop((x0, y0, x1, y1))
|
||||
crop_file = output_dir / f"block_{idx:03d}_p{block['page']}.png"
|
||||
crop.save(crop_file)
|
||||
logger.info(
|
||||
"_process_image_blocks: block %d saved crop %s (%dx%d px)",
|
||||
idx, crop_file.name, x1 - x0, y1 - y0,
|
||||
)
|
||||
|
||||
# ── Qwen VL 语义判断 ──────────────────────────────────────────── #
|
||||
qr_detected = is_qr_code(crop_file)
|
||||
|
||||
# ── 条码解码(仅在语义判断为二维码时执行)────────────────────── #
|
||||
barcodes: list[dict] = []
|
||||
if qr_detected:
|
||||
logger.info(
|
||||
"_process_image_blocks: block %d 被识别为二维码,启动条码解码",
|
||||
idx,
|
||||
)
|
||||
raw_barcodes = detect_barcodes(crop_file)
|
||||
barcodes = [
|
||||
{
|
||||
"format": b.format,
|
||||
"format_label": b.format_label,
|
||||
"text": b.text,
|
||||
"x0": b.x0,
|
||||
"y0": b.y0,
|
||||
"x1": b.x1,
|
||||
"y1": b.y1,
|
||||
"valid": b.valid,
|
||||
}
|
||||
for b in raw_barcodes
|
||||
]
|
||||
if barcodes:
|
||||
logger.info(
|
||||
"_process_image_blocks: block %d 条码解码成功,共 %d 条",
|
||||
idx, len(barcodes),
|
||||
)
|
||||
else:
|
||||
logger.warning(
|
||||
"_process_image_blocks: block %d 语义判断为二维码,但 zxing 未能解码",
|
||||
idx,
|
||||
)
|
||||
|
||||
results.append(
|
||||
{
|
||||
**block,
|
||||
"is_qr_code": qr_detected,
|
||||
"barcodes": barcodes,
|
||||
"crop_path": str(crop_file),
|
||||
}
|
||||
)
|
||||
|
||||
return results
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Public API #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
def process_document(
|
||||
ai_path: Path,
|
||||
word_path: Path,
|
||||
output_dir: Path,
|
||||
job_id: str,
|
||||
) -> dict:
|
||||
"""Full pipeline: AI → PDF → PNG → Qwen crop → MinerU → validate.
|
||||
|
||||
Steps
|
||||
-----
|
||||
1. AI / PDF file → clean PDF
|
||||
2. PDF → high-res PNG (Ghostscript, 150 DPI)
|
||||
3. PNG → Qwen VL detects main label area → cropped PNG
|
||||
(graceful fallback to full PNG when key is absent)
|
||||
4. Cropped PNG → MinerU structured-JSON extraction
|
||||
5. MinerU fields → validate against Word reference document
|
||||
|
||||
Returns
|
||||
-------
|
||||
dict
|
||||
``{ preview: {...}, fields: [...] }`` matching the frontend
|
||||
``ProcessResponse`` type. ``preview.type`` is ``"png"`` and
|
||||
``pageWidthPt`` / ``pageHeightPt`` hold the cropped image dimensions
|
||||
in pixels (coord system is pixel-aligned for the PNG overlay).
|
||||
"""
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
# ── 1. AI → PDF ──────────────────────────────────────────────────────── #
|
||||
logger.info("Step 1/5 – Converting AI to PDF: %s", ai_path.name)
|
||||
pdf_path = _ai_to_pdf(ai_path, output_dir)
|
||||
|
||||
# ── 2. PDF → PNG ─────────────────────────────────────────────────────── #
|
||||
logger.info("Step 2/5 – Rasterising PDF to PNG (150 DPI)")
|
||||
png_path = _pdf_to_png(pdf_path, output_dir / "raster", dpi=150)
|
||||
|
||||
# ── 3. Qwen VL crop ───────────────────────────────────────────────────── #
|
||||
logger.info("Step 3/5 – AI region detection & crop")
|
||||
cropped_path = _crop_label_region(png_path, output_dir / "crop")
|
||||
|
||||
# Relative URL fragment understood by /api/files/{job_id}/{file_path}
|
||||
cropped_rel = cropped_path.relative_to(output_dir).as_posix()
|
||||
img_w, img_h = _png_size(cropped_path)
|
||||
|
||||
# ── 3b. Barcode detection ─────────────────────────────────────────────── #
|
||||
logger.info("Step 3b – Scanning for barcodes / QR codes")
|
||||
barcodes = detect_barcodes(cropped_path)
|
||||
|
||||
# Crop each barcode region for frontend display
|
||||
barcode_crops_dir = output_dir / "barcode_crops"
|
||||
barcode_crops_dir.mkdir(parents=True, exist_ok=True)
|
||||
from PIL import Image as _PILImage # noqa: PLC0415
|
||||
with _PILImage.open(cropped_path) as _src_img:
|
||||
_src_w, _src_h = _src_img.size
|
||||
for _bi, _b in enumerate(barcodes):
|
||||
_pad = 12
|
||||
_cx0 = max(0, _b.x0 - _pad)
|
||||
_cy0 = max(0, _b.y0 - _pad)
|
||||
_cx1 = min(_src_w, _b.x1 + _pad)
|
||||
_cy1 = min(_src_h, _b.y1 + _pad)
|
||||
_crop = _src_img.crop((_cx0, _cy0, _cx1, _cy1))
|
||||
_crop.save(barcode_crops_dir / f"barcode_{_bi}.png")
|
||||
|
||||
barcode_results = [
|
||||
{
|
||||
"format": b.format,
|
||||
"format_label": b.format_label,
|
||||
"text": b.text,
|
||||
"x0": b.x0,
|
||||
"y0": b.y0,
|
||||
"x1": b.x1,
|
||||
"y1": b.y1,
|
||||
"valid": b.valid,
|
||||
"crop_url": f"/api/files/{job_id}/barcode_crops/barcode_{i}.png",
|
||||
}
|
||||
for i, b in enumerate(barcodes)
|
||||
]
|
||||
logger.info("Step 3b – Found %d barcode(s)", len(barcode_results))
|
||||
|
||||
# ── 4. MinerU parsing ────────────────────────────────────────────────── #
|
||||
logger.info("Step 4/5 – Sending cropped PNG to MinerU: %s", cropped_path.name)
|
||||
mineru_api_key = _get_mineru_api_key()
|
||||
if not mineru_api_key:
|
||||
raise RuntimeError("MINERU_API_KEY is not configured")
|
||||
|
||||
mineru_dir = output_dir / "mineru"
|
||||
client = MineruClient(api_key=mineru_api_key)
|
||||
mineru_data = client.parse_image(cropped_path, mineru_dir)
|
||||
|
||||
# ── 5. Parse + validate ───────────────────────────────────────────────── #
|
||||
logger.info("Step 5/5 – Parsing MinerU result and validating against Word")
|
||||
doc = parse_mineru_fields(mineru_data)
|
||||
word_text = extract_word_text(word_path)
|
||||
word_html = extract_word_html(word_path)
|
||||
|
||||
fields: list[dict] = []
|
||||
for idx, field in enumerate(doc.fields, start=1):
|
||||
validation = validate_field_against_word(field["text"], word_text)
|
||||
fields.append(
|
||||
{
|
||||
"id": f"field-{idx}",
|
||||
**field,
|
||||
"normalized_text": validation.normalized_text,
|
||||
"validation_status": validation.status,
|
||||
"validation_reason": validation.reason,
|
||||
"matched_excerpt": validation.matched_excerpt,
|
||||
}
|
||||
)
|
||||
|
||||
_STATUS_RANK = {"matched": 0, "unmatched": 1, "empty_or_garbled": 2}
|
||||
fields.sort(key=lambda f: (
|
||||
_STATUS_RANK.get(f["validation_status"], 9),
|
||||
f["page"],
|
||||
f["top_pt"],
|
||||
f["x0_pt"],
|
||||
))
|
||||
|
||||
logger.info(
|
||||
"Pipeline done: job_id=%s fields=%d matched=%d unmatched=%d garbled=%d",
|
||||
job_id,
|
||||
len(fields),
|
||||
sum(1 for f in fields if f["validation_status"] == "matched"),
|
||||
sum(1 for f in fields if f["validation_status"] == "unmatched"),
|
||||
sum(1 for f in fields if f["validation_status"] == "empty_or_garbled"),
|
||||
)
|
||||
|
||||
# ── 5b. Image blocks: QR semantic check → barcode decode ─────────────── #
|
||||
image_block_results = _process_image_blocks(
|
||||
mineru_data=mineru_data,
|
||||
source_image=cropped_path,
|
||||
output_dir=output_dir / "image_blocks",
|
||||
)
|
||||
logger.info("Step 5b – Processed %d image block(s) from MinerU", len(image_block_results))
|
||||
|
||||
return {
|
||||
"preview": {
|
||||
# type='png': frontend renders <img> + overlay (not PDF canvas)
|
||||
"type": "png",
|
||||
"url": f"/api/files/{job_id}/{cropped_rel}",
|
||||
# For PNG the "pt" fields carry pixel dimensions so overlay
|
||||
# scale factors remain 1:1 at 100% zoom.
|
||||
"pageWidthPt": img_w,
|
||||
"pageHeightPt": img_h,
|
||||
},
|
||||
"fields": fields,
|
||||
"word_text": word_text,
|
||||
"word_html": word_html,
|
||||
"barcodes": barcode_results,
|
||||
"image_blocks": image_block_results,
|
||||
}
|
||||
372
backend/app/region_detector.py
Normal file
@@ -0,0 +1,372 @@
|
||||
"""Detect main regions in a label image via Qwen2.5-VL (DashScope).
|
||||
|
||||
Workflow
|
||||
--------
|
||||
1. Read the original image; record its exact dimensions (orig_w × orig_h).
|
||||
2. Downscale a copy to fit within ``api_max_side`` for the API call
|
||||
(faster upload, lower token cost). Record api_w × api_h.
|
||||
3. Send the downscaled image to Qwen VL.
|
||||
4. Parse the response coordinates (which are relative to the api image):
|
||||
a. Qwen2.5-VL grounding tokens <|box_start|>(x1,y1),(x2,y2)<|box_end|>
|
||||
– normalised to [0, 1000] of the *api* image.
|
||||
b. Fallback: JSON array ``[{"label": "...", "bbox": [x1,y1,x2,y2]}, ...]``
|
||||
– pixel values in the *api* image coordinate space.
|
||||
5. Scale coordinates back to the original image space:
|
||||
x_orig = round(x_api * orig_w / api_w)
|
||||
6. Crop from the **original** high-resolution file → full-quality output.
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import json
|
||||
import logging
|
||||
import os
|
||||
import re
|
||||
from pathlib import Path
|
||||
from typing import NamedTuple
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
_DASHSCOPE_BASE_URL_DEFAULT = "https://dashscope.aliyuncs.com/compatible-mode/v1"
|
||||
# Model alias: the 7B variant is faster ("flash"); swap for 72B for higher accuracy
|
||||
DEFAULT_MODEL = "qwen2.5-vl-7b-instruct"
|
||||
|
||||
_GROUNDING_RE = re.compile(
|
||||
r"<\|object_ref_start\|>(.*?)<\|object_ref_end\|>"
|
||||
r"<\|box_start\|>\((\d+),(\d+)\),\((\d+),(\d+)\)<\|box_end\|>",
|
||||
re.DOTALL,
|
||||
)
|
||||
|
||||
_DEFAULT_PROMPT = (
|
||||
"请检测图像中食品包装标签的所有主要内容区域(如:主产品信息表格、"
|
||||
"营养成分表、标题、配料表、厂商信息、条码区等)。"
|
||||
"以JSON列表输出,格式为:\n"
|
||||
'[{"label": "区域名称", "bbox": [x1, y1, x2, y2]}, ...]'
|
||||
"\n坐标为实际像素值(整数),原点在左上角。"
|
||||
)
|
||||
|
||||
|
||||
class Region(NamedTuple):
|
||||
label: str
|
||||
x1: int
|
||||
y1: int
|
||||
x2: int
|
||||
y2: int
|
||||
|
||||
@property
|
||||
def width(self) -> int:
|
||||
return self.x2 - self.x1
|
||||
|
||||
@property
|
||||
def height(self) -> int:
|
||||
return self.y2 - self.y1
|
||||
|
||||
|
||||
def _read_dotenv(path: Path) -> dict[str, str]:
|
||||
"""Parse a simple KEY=VALUE .env file into a dict."""
|
||||
result: dict[str, str] = {}
|
||||
if not path.exists():
|
||||
return result
|
||||
for raw in path.read_text(encoding="utf-8").splitlines():
|
||||
line = raw.strip()
|
||||
if not line or line.startswith("#") or "=" not in line:
|
||||
continue
|
||||
k, v = line.split("=", 1)
|
||||
result[k.strip()] = v.strip().strip('"').strip("'")
|
||||
return result
|
||||
|
||||
|
||||
def _load_env() -> dict[str, str]:
|
||||
"""Merge .env files (project root → parent → home) into a single dict."""
|
||||
merged: dict[str, str] = {}
|
||||
for p in [
|
||||
Path(__file__).resolve().parents[2] / ".env",
|
||||
Path(__file__).resolve().parents[3] / ".env",
|
||||
Path.home() / ".env",
|
||||
]:
|
||||
merged.update(_read_dotenv(p))
|
||||
return merged
|
||||
|
||||
|
||||
def _get_api_key() -> str:
|
||||
"""Read DASHSCOPE_API_KEY from env vars then .env files."""
|
||||
val = os.environ.get("DASHSCOPE_API_KEY", "").strip()
|
||||
if val:
|
||||
return val
|
||||
return _load_env().get("DASHSCOPE_API_KEY", "")
|
||||
|
||||
|
||||
def _get_base_url() -> str:
|
||||
"""Read DASHSCOPE_BASE_URL from env vars then .env files."""
|
||||
val = os.environ.get("DASHSCOPE_BASE_URL", "").strip()
|
||||
if val:
|
||||
return val
|
||||
return _load_env().get("DASHSCOPE_BASE_URL", _DASHSCOPE_BASE_URL_DEFAULT)
|
||||
|
||||
|
||||
def _encode_image_for_api(
|
||||
image_path: Path,
|
||||
max_side: int = 1024,
|
||||
) -> tuple[str, int, int]:
|
||||
"""Downscale image to fit within *max_side* × *max_side*, encode as PNG base64.
|
||||
|
||||
Returns
|
||||
-------
|
||||
b64 : str
|
||||
Base64-encoded PNG of the (possibly resized) image.
|
||||
api_w : int
|
||||
Width of the image that was actually sent to the API.
|
||||
api_h : int
|
||||
Height of the image that was actually sent to the API.
|
||||
"""
|
||||
import io
|
||||
from PIL import Image
|
||||
|
||||
with Image.open(image_path) as img:
|
||||
# Convert to RGB so PNG encoding always works
|
||||
img = img.convert("RGB")
|
||||
orig_w, orig_h = img.size
|
||||
|
||||
if max(orig_w, orig_h) > max_side:
|
||||
scale = max_side / max(orig_w, orig_h)
|
||||
api_w = max(1, round(orig_w * scale))
|
||||
api_h = max(1, round(orig_h * scale))
|
||||
api_img = img.resize((api_w, api_h), Image.LANCZOS)
|
||||
else:
|
||||
api_w, api_h = orig_w, orig_h
|
||||
api_img = img
|
||||
|
||||
buf = io.BytesIO()
|
||||
api_img.save(buf, format="PNG")
|
||||
|
||||
b64 = base64.b64encode(buf.getvalue()).decode()
|
||||
return b64, api_w, api_h
|
||||
|
||||
|
||||
def _parse_grounding_tokens(text: str, api_w: int, api_h: int) -> list[Region]:
|
||||
"""Parse <|box_start|>(x1,y1),(x2,y2)<|box_end|> tokens.
|
||||
|
||||
Qwen2.5-VL normalises coordinates to [0, 1000] of the *api* image.
|
||||
Returns pixel coordinates in the api image space.
|
||||
"""
|
||||
regions: list[Region] = []
|
||||
for m in _GROUNDING_RE.finditer(text):
|
||||
label = m.group(1).strip()
|
||||
x1 = round(int(m.group(2)) * api_w / 1000)
|
||||
y1 = round(int(m.group(3)) * api_h / 1000)
|
||||
x2 = round(int(m.group(4)) * api_w / 1000)
|
||||
y2 = round(int(m.group(5)) * api_h / 1000)
|
||||
regions.append(Region(label, x1, y1, x2, y2))
|
||||
return regions
|
||||
|
||||
|
||||
def _parse_json_regions(text: str) -> list[Region]:
|
||||
"""Fallback: extract bbox from a JSON object or array in the response."""
|
||||
clean = re.sub(r"<\|[^|]+\|>", "", text)
|
||||
clean = re.sub(r"```[a-z]*", "", clean).strip("`").strip()
|
||||
|
||||
def _extract_bbox(item: dict) -> list | None:
|
||||
"""Try multiple known bbox key names, including nested dicts."""
|
||||
for key in ("bbox", "bbox_2d", "box", "coordinates", "bounding_box"):
|
||||
v = item.get(key)
|
||||
if isinstance(v, (list, tuple)) and len(v) >= 4:
|
||||
return list(v)
|
||||
# e.g. {"label": {"bbox_2d": [...]}}
|
||||
if isinstance(v, dict):
|
||||
inner = _extract_bbox(v)
|
||||
if inner:
|
||||
return inner
|
||||
# Recurse into all dict values
|
||||
for v in item.values():
|
||||
if isinstance(v, dict):
|
||||
inner = _extract_bbox(v)
|
||||
if inner:
|
||||
return inner
|
||||
return None
|
||||
|
||||
def _region_from_dict(item: dict) -> Region | None:
|
||||
bbox = _extract_bbox(item)
|
||||
if not bbox or len(bbox) < 4:
|
||||
return None
|
||||
# Label: try common keys; skip if value is a nested dict
|
||||
raw_label = (item.get("label") or item.get("name") or item.get("type") or "主内容区")
|
||||
label = raw_label if isinstance(raw_label, str) else "主内容区"
|
||||
x1, y1, x2, y2 = (int(v) for v in bbox[:4])
|
||||
return Region(label, x1, y1, x2, y2)
|
||||
|
||||
# Try single JSON object first (default prompt returns one object)
|
||||
obj_start, obj_end = clean.find("{"), clean.rfind("}")
|
||||
if obj_start != -1 and obj_end > obj_start:
|
||||
try:
|
||||
obj = json.loads(clean[obj_start : obj_end + 1])
|
||||
if isinstance(obj, dict):
|
||||
r = _region_from_dict(obj)
|
||||
if r:
|
||||
return [r]
|
||||
except json.JSONDecodeError:
|
||||
pass
|
||||
|
||||
# Fallback to JSON array
|
||||
arr_start, arr_end = clean.find("["), clean.rfind("]")
|
||||
if arr_start == -1 or arr_end <= arr_start:
|
||||
return []
|
||||
try:
|
||||
items = json.loads(clean[arr_start : arr_end + 1])
|
||||
except json.JSONDecodeError:
|
||||
return []
|
||||
regions: list[Region] = []
|
||||
for item in items:
|
||||
if isinstance(item, dict):
|
||||
r = _region_from_dict(item)
|
||||
if r:
|
||||
regions.append(r)
|
||||
return regions
|
||||
|
||||
|
||||
def detect_regions(
|
||||
image_path: Path,
|
||||
api_key: str | None = None,
|
||||
model: str = DEFAULT_MODEL,
|
||||
prompt: str = _DEFAULT_PROMPT,
|
||||
api_max_side: int = 1024,
|
||||
) -> tuple[list[Region], str]:
|
||||
"""Call Qwen VL to detect main regions.
|
||||
|
||||
The image is downscaled to *api_max_side* before the API call for speed
|
||||
and cost efficiency. Returned ``Region`` coordinates are always mapped
|
||||
back to the **original** image pixel space, so ``crop_and_save`` will
|
||||
produce full-resolution output.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
api_max_side:
|
||||
Maximum side length (px) of the image sent to the API.
|
||||
Increase for very large originals where detection needs more detail.
|
||||
|
||||
Returns
|
||||
-------
|
||||
regions : list[Region]
|
||||
Bounding boxes in **original** image coordinates.
|
||||
raw_response : str
|
||||
Full model text (for debugging).
|
||||
"""
|
||||
from openai import OpenAI
|
||||
from PIL import Image
|
||||
|
||||
key = api_key or _get_api_key()
|
||||
if not key:
|
||||
raise RuntimeError(
|
||||
"DASHSCOPE_API_KEY not set. "
|
||||
"Add it to the project .env or set the environment variable."
|
||||
)
|
||||
|
||||
# ── 1. Original dimensions ────────────────────────────────────────────
|
||||
with Image.open(image_path) as img:
|
||||
orig_w, orig_h = img.size
|
||||
|
||||
# ── 2. Downscale for API; remember api dims for coordinate mapping ─────
|
||||
b64, api_w, api_h = _encode_image_for_api(image_path, max_side=api_max_side)
|
||||
logger.info(
|
||||
"Calling %s on %s orig=%dx%d → api=%dx%d (scale=%.3f)",
|
||||
model, image_path.name, orig_w, orig_h, api_w, api_h, api_w / orig_w,
|
||||
)
|
||||
|
||||
# ── 3. API call ───────────────────────────────────────────────────────
|
||||
client = OpenAI(api_key=key, base_url=_get_base_url())
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{b64}"},
|
||||
},
|
||||
{"type": "text", "text": prompt},
|
||||
],
|
||||
}
|
||||
],
|
||||
)
|
||||
|
||||
raw = response.choices[0].message.content or ""
|
||||
logger.debug("Qwen VL raw response:\n%s", raw)
|
||||
|
||||
# ── 4. Parse — coordinates are in api-image space ─────────────────────
|
||||
regions = _parse_grounding_tokens(raw, api_w, api_h)
|
||||
if regions:
|
||||
logger.info("Parsed %d region(s) from grounding tokens", len(regions))
|
||||
else:
|
||||
regions = _parse_json_regions(raw)
|
||||
if regions:
|
||||
logger.info("Parsed %d region(s) from JSON fallback", len(regions))
|
||||
|
||||
if not regions:
|
||||
logger.warning("No regions parsed from response:\n%s", raw[:400])
|
||||
return [], raw
|
||||
|
||||
# ── 5. Scale coordinates back to original image space ─────────────────
|
||||
sx, sy = orig_w / api_w, orig_h / api_h
|
||||
original_regions = [
|
||||
Region(r.label,
|
||||
round(r.x1 * sx), round(r.y1 * sy),
|
||||
round(r.x2 * sx), round(r.y2 * sy))
|
||||
for r in regions
|
||||
]
|
||||
logger.info(
|
||||
"Coordinates remapped api(%dx%d) → orig(%dx%d)",
|
||||
api_w, api_h, orig_w, orig_h,
|
||||
)
|
||||
return original_regions, raw
|
||||
|
||||
|
||||
def merge_regions(regions: list[Region], label: str = "主内容区") -> Region:
|
||||
"""Return the union bounding box of all regions as a single Region."""
|
||||
if not regions:
|
||||
raise ValueError("Cannot merge empty region list")
|
||||
x1 = min(r.x1 for r in regions)
|
||||
y1 = min(r.y1 for r in regions)
|
||||
x2 = max(r.x2 for r in regions)
|
||||
y2 = max(r.y2 for r in regions)
|
||||
return Region(label, x1, y1, x2, y2)
|
||||
|
||||
|
||||
def crop_and_save(
|
||||
image_path: Path,
|
||||
regions: list[Region],
|
||||
output_dir: Path,
|
||||
) -> list[dict]:
|
||||
"""Crop each region and save as PNG.
|
||||
|
||||
Returns
|
||||
-------
|
||||
list of dicts with keys: label, bbox, path
|
||||
"""
|
||||
from PIL import Image
|
||||
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
results: list[dict] = []
|
||||
|
||||
with Image.open(image_path) as img:
|
||||
img_w, img_h = img.size
|
||||
for i, region in enumerate(regions, start=1):
|
||||
# Clamp to image bounds
|
||||
x1 = max(0, region.x1)
|
||||
y1 = max(0, region.y1)
|
||||
x2 = min(img_w, region.x2)
|
||||
y2 = min(img_h, region.y2)
|
||||
if x2 <= x1 or y2 <= y1:
|
||||
logger.warning("Skipping zero-area region: %s", region.label)
|
||||
continue
|
||||
cropped = img.crop((x1, y1, x2, y2))
|
||||
safe_name = re.sub(r"[^\w\u4e00-\u9fff-]", "_", region.label)[:40]
|
||||
out_path = output_dir / f"{i:02d}_{safe_name}.png"
|
||||
cropped.save(out_path)
|
||||
logger.info("Saved region [%s] → %s", region.label, out_path.name)
|
||||
results.append({
|
||||
"label": region.label,
|
||||
"bbox": [x1, y1, x2, y2],
|
||||
"path": str(out_path),
|
||||
})
|
||||
|
||||
return results
|
||||
266
backend/app/text_validation.py
Normal file
@@ -0,0 +1,266 @@
|
||||
"""Validate extracted text blocks against a Word document's content."""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import unicodedata
|
||||
from dataclasses import dataclass
|
||||
from difflib import SequenceMatcher
|
||||
|
||||
|
||||
# Minimum SequenceMatcher ratio to count as a match (strict: content must be nearly identical)
|
||||
MATCH_THRESHOLD = 0.95
|
||||
# For multi-row tables: individual row match threshold
|
||||
TABLE_ROW_SINGLE_THRESHOLD = 0.95
|
||||
# For multi-row tables: fraction of valid rows that must match
|
||||
TABLE_ROW_MATCH_THRESHOLD = 0.5
|
||||
# Characters below this length are treated as too short to validate
|
||||
MIN_TEXT_LENGTH = 2
|
||||
|
||||
|
||||
@dataclass
|
||||
class ValidationResult:
|
||||
status: str # "matched" | "unmatched" | "empty_or_garbled"
|
||||
reason: str
|
||||
normalized_text: str
|
||||
matched_excerpt: str | None
|
||||
|
||||
|
||||
# 圆圈序号 ①②③...⑳(NFKC 之前处理,避免转为数字后难以区分)
|
||||
_CIRCLED_NUM_RE = re.compile(r"^[①-⑳]")
|
||||
# 数字列表前缀:"1. " "2." "3. " 等(NFKC 之后处理)
|
||||
_LIST_NUM_RE = re.compile(r"^\d{1,2}[.\s]+")
|
||||
# 句末/列表标点(中英文等价符,忽略差异;保留小数点和冒号)
|
||||
_PUNCT_RE = re.compile(r"[,。;!?、…,;!?]")
|
||||
|
||||
|
||||
def _normalize(text: str) -> str:
|
||||
"""Collapse whitespace and normalise unicode for comparison.
|
||||
|
||||
额外处理:
|
||||
- 去掉首部圆圈序号(①②③)和数字列表前缀(1. 2.)
|
||||
- 忽略中英文标点差异(,。;vs ,.)
|
||||
- 统一 dash 并去掉 dash 两侧空格(50 – 60 → 50-60)
|
||||
"""
|
||||
text = text.lstrip()
|
||||
# 先去圆圈序号(在 NFKC 前,避免 ③→3 后与普通数字混淆)
|
||||
text = _CIRCLED_NUM_RE.sub("", text).lstrip()
|
||||
# Unicode 归一化(全角→半角、① → 1、:→ :、(→ ( 等)
|
||||
text = unicodedata.normalize("NFKC", text)
|
||||
# Strip markdown bold/italic markers
|
||||
text = re.sub(r"\*+", "", text)
|
||||
# 破折号变体归一化:en-dash / em-dash / minus sign → hyphen
|
||||
text = re.sub(r"[–—−]", "-", text)
|
||||
# 去掉 dash 两侧空格:"50 - 60" → "50-60"
|
||||
text = re.sub(r"\s*-\s*", "-", text)
|
||||
# 去掉数字列表前缀(NFKC 后,如 "3. " "4. ")
|
||||
text = _LIST_NUM_RE.sub("", text.lstrip())
|
||||
# 忽略句末/列表标点差异
|
||||
text = _PUNCT_RE.sub("", text)
|
||||
text = re.sub(r"\s+", " ", text).strip()
|
||||
return text
|
||||
|
||||
|
||||
def _is_garbled(text: str) -> bool:
|
||||
"""Return True when text is empty, too short, or mostly non-printable."""
|
||||
if not text or len(text) < MIN_TEXT_LENGTH:
|
||||
return True
|
||||
printable = sum(1 for c in text if not unicodedata.category(c).startswith("C"))
|
||||
return printable / len(text) < 0.5
|
||||
|
||||
|
||||
def _word_lines(word_text: str) -> list[str]:
|
||||
"""Split Word Markdown into non-empty normalised lines for matching.
|
||||
|
||||
Grid-table separator rows (e.g. ``+-----+-----+``) are filtered out
|
||||
because they carry no semantic content and would skew similarity scores.
|
||||
"""
|
||||
_SEP_RE = re.compile(r"^[+\-=| ]+$")
|
||||
lines = []
|
||||
for raw in word_text.splitlines():
|
||||
norm = _normalize(raw)
|
||||
if not norm:
|
||||
continue
|
||||
# Skip pandoc grid-table separator rows
|
||||
if _SEP_RE.match(norm.replace(" ", "")):
|
||||
continue
|
||||
lines.append(norm)
|
||||
return lines
|
||||
|
||||
|
||||
def _match_against_line(needle: str, line: str) -> tuple[float, str]:
|
||||
"""Return (ratio, excerpt) for needle vs a single Word line.
|
||||
|
||||
When the needle (MinerU row) is significantly shorter than the Word line
|
||||
(because the Word table has more product columns), a plain
|
||||
SequenceMatcher ratio under-counts matching content. We also compute
|
||||
*needle coverage* — the fraction of the needle's characters that appear
|
||||
in the line — and take the higher of the two scores.
|
||||
"""
|
||||
# Exact substring
|
||||
if needle in line:
|
||||
idx = line.index(needle)
|
||||
return 1.0, line[idx: idx + len(needle) + 20].strip()
|
||||
|
||||
matcher = SequenceMatcher(None, needle, line, autojunk=False)
|
||||
ratio = matcher.ratio()
|
||||
|
||||
# Coverage ratio: useful when MinerU row is a partial view of a wider table
|
||||
if len(needle) > 0 and len(needle) < len(line):
|
||||
match_chars = sum(t for _, _, t in matcher.get_matching_blocks())
|
||||
coverage = match_chars / len(needle)
|
||||
# Apply a small discount to avoid false positives on very short needles
|
||||
ratio = max(ratio, coverage * 0.95)
|
||||
|
||||
# 表格行(含 | 分隔符)可能很长,给更多上下文以便前端完整渲染
|
||||
max_len = 400 if line.lstrip().startswith("|") else 120
|
||||
return ratio, line[:max_len].strip()
|
||||
|
||||
|
||||
def _match_single_line(norm: str, word_lines: list[str]) -> tuple[float, str]:
|
||||
"""在 word_lines 中找与 norm 最相似的行,返回 (best_ratio, best_excerpt)。"""
|
||||
best_ratio = 0.0
|
||||
best_excerpt = ""
|
||||
for line in word_lines:
|
||||
ratio, excerpt = _match_against_line(norm, line)
|
||||
if ratio > best_ratio:
|
||||
best_ratio = ratio
|
||||
best_excerpt = excerpt
|
||||
if best_ratio == 1.0:
|
||||
break
|
||||
return best_ratio, best_excerpt
|
||||
|
||||
|
||||
def _validate_table_against_word(raw_rows: list[str], word_text: str) -> ValidationResult:
|
||||
"""多行表格逐行匹配,聚合命中率。
|
||||
|
||||
策略
|
||||
----
|
||||
- 对每一行分别调用单行匹配,达到阈值则计为命中。
|
||||
- 命中率 ≥ TABLE_ROW_MATCH_THRESHOLD(50%)即视为整体匹配。
|
||||
- matched_excerpt 收集命中行的 Word 摘录,前端可渲染为表格。
|
||||
"""
|
||||
word_lines = _word_lines(word_text)
|
||||
if not word_lines:
|
||||
norm_full = _normalize(" ".join(raw_rows))
|
||||
return ValidationResult(
|
||||
status="unmatched",
|
||||
reason="Word 文档为空",
|
||||
normalized_text=norm_full,
|
||||
matched_excerpt=None,
|
||||
)
|
||||
|
||||
matched = 0
|
||||
skipped = 0
|
||||
excerpts: list[str] = []
|
||||
seen_excerpts: set[str] = set()
|
||||
|
||||
for row in raw_rows:
|
||||
norm_row = _normalize(row)
|
||||
if _is_garbled(norm_row):
|
||||
skipped += 1
|
||||
continue
|
||||
ratio, exc = _match_single_line(norm_row, word_lines)
|
||||
if ratio >= TABLE_ROW_SINGLE_THRESHOLD:
|
||||
matched += 1
|
||||
if exc and exc not in seen_excerpts:
|
||||
excerpts.append(exc)
|
||||
seen_excerpts.add(exc)
|
||||
|
||||
valid_count = len(raw_rows) - skipped
|
||||
norm_full = _normalize(" ".join(raw_rows))
|
||||
|
||||
if valid_count == 0:
|
||||
return ValidationResult(
|
||||
status="empty_or_garbled",
|
||||
reason="表格文本为空或全部为乱码",
|
||||
normalized_text=norm_full,
|
||||
matched_excerpt=None,
|
||||
)
|
||||
|
||||
match_rate = matched / valid_count
|
||||
excerpt_text = "\n".join(excerpts) if excerpts else None
|
||||
|
||||
if match_rate >= TABLE_ROW_MATCH_THRESHOLD:
|
||||
return ValidationResult(
|
||||
status="matched",
|
||||
reason=f"表格 {matched}/{valid_count} 行与 Word 匹配(命中率 {match_rate:.0%})",
|
||||
normalized_text=norm_full,
|
||||
matched_excerpt=excerpt_text,
|
||||
)
|
||||
|
||||
return ValidationResult(
|
||||
status="unmatched",
|
||||
reason=f"表格仅 {matched}/{valid_count} 行与 Word 匹配(命中率 {match_rate:.0%},阈值 {TABLE_ROW_MATCH_THRESHOLD:.0%})",
|
||||
normalized_text=norm_full,
|
||||
matched_excerpt=excerpt_text,
|
||||
)
|
||||
|
||||
|
||||
def validate_field_against_word(text: str, word_text: str) -> ValidationResult:
|
||||
"""Check whether *text* matches any line of *word_text*.
|
||||
|
||||
- 单行文本:找 Word 中最相似的一行,相似度 ≥ 0.82 视为匹配。
|
||||
- 多行文本(表格):逐行匹配,命中率 ≥ 50% 视为整体匹配。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
text:
|
||||
The OCR-extracted text block to validate.
|
||||
word_text:
|
||||
Full Markdown text extracted from the reference Word document.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ValidationResult
|
||||
Contains status, a human-readable reason, the normalised text,
|
||||
and the best-matching line from the Word document (if any).
|
||||
"""
|
||||
# 多行文本(表格):逐行匹配
|
||||
raw_rows = [r.strip() for r in text.splitlines() if r.strip()]
|
||||
if len(raw_rows) > 1:
|
||||
return _validate_table_against_word(raw_rows, word_text)
|
||||
|
||||
# 单行匹配
|
||||
norm = _normalize(text)
|
||||
|
||||
if _is_garbled(norm):
|
||||
return ValidationResult(
|
||||
status="empty_or_garbled",
|
||||
reason="文本为空或包含乱码",
|
||||
normalized_text=norm,
|
||||
matched_excerpt=None,
|
||||
)
|
||||
|
||||
word_lines = _word_lines(word_text)
|
||||
if not word_lines:
|
||||
return ValidationResult(
|
||||
status="unmatched",
|
||||
reason="Word 文档为空",
|
||||
normalized_text=norm,
|
||||
matched_excerpt=None,
|
||||
)
|
||||
|
||||
best_ratio, best_excerpt = _match_single_line(norm, word_lines)
|
||||
|
||||
if best_ratio == 1.0:
|
||||
return ValidationResult(
|
||||
status="matched",
|
||||
reason="与 Word 某行内容完全匹配",
|
||||
normalized_text=norm,
|
||||
matched_excerpt=best_excerpt,
|
||||
)
|
||||
|
||||
if best_ratio >= MATCH_THRESHOLD:
|
||||
return ValidationResult(
|
||||
status="matched",
|
||||
reason=f"与 Word 某行相似度 {best_ratio:.0%},判定为匹配",
|
||||
normalized_text=norm,
|
||||
matched_excerpt=best_excerpt,
|
||||
)
|
||||
|
||||
return ValidationResult(
|
||||
status="unmatched",
|
||||
reason=f"在 Word 中未找到匹配行(最高相似度 {best_ratio:.0%})",
|
||||
normalized_text=norm,
|
||||
matched_excerpt=best_excerpt or None,
|
||||
)
|
||||
147
backend/app/word_parser.py
Normal file
@@ -0,0 +1,147 @@
|
||||
"""Extract text / HTML from a Word (.docx) document via pandoc."""
|
||||
from __future__ import annotations
|
||||
|
||||
import re
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def extract_word_text(path: Path) -> str:
|
||||
"""Convert *path* (.docx) to Markdown with pandoc and return the result.
|
||||
|
||||
Pandoc preserves tables, lists, bold/italic, and paragraph structure far
|
||||
better than python-docx plain-text extraction. The returned string is
|
||||
cleaned of pandoc-specific span attributes (e.g. ``{.mark}``) that are
|
||||
irrelevant for text matching.
|
||||
|
||||
Falls back to python-docx plain-text extraction if pandoc is not installed.
|
||||
"""
|
||||
pandoc = shutil.which("pandoc")
|
||||
if pandoc:
|
||||
return _extract_via_pandoc(path, pandoc)
|
||||
return _extract_via_docx(path)
|
||||
|
||||
|
||||
def extract_word_html(path: Path) -> str | None:
|
||||
"""Convert *path* (.docx) to an HTML fragment preserving merged table cells.
|
||||
|
||||
Uses pandoc (``-t html5``) which correctly maps Word's ``<w:gridSpan>`` /
|
||||
``<w:vMerge>`` to HTML ``colspan`` / ``rowspan`` attributes.
|
||||
|
||||
Returns ``None`` when pandoc is unavailable or conversion fails.
|
||||
The returned string is a ``<body>`` fragment (no ``<html>`` / ``<head>``),
|
||||
with inline ``style`` attributes and ``<colgroup>`` stripped so that the
|
||||
frontend can apply its own CSS.
|
||||
"""
|
||||
pandoc = shutil.which("pandoc")
|
||||
if not pandoc:
|
||||
return None
|
||||
try:
|
||||
result = subprocess.run(
|
||||
[pandoc, str(path), "-f", "docx", "-t", "html5", "--wrap=none"],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
except Exception:
|
||||
return None
|
||||
|
||||
if result.returncode != 0:
|
||||
return None
|
||||
|
||||
return _clean_word_html(result.stdout)
|
||||
|
||||
|
||||
def _clean_word_html(html: str) -> str:
|
||||
"""Extract <body> content and strip noise added by pandoc."""
|
||||
# 取 <body> 内容
|
||||
m = re.search(r"<body[^>]*>(.*?)</body>", html, re.DOTALL | re.IGNORECASE)
|
||||
body = m.group(1).strip() if m else html
|
||||
|
||||
# 删除 <colgroup> 块(含列宽 inline style,由前端 CSS 接管)
|
||||
body = re.sub(r"<colgroup[^>]*>.*?</colgroup>", "", body, flags=re.DOTALL | re.IGNORECASE)
|
||||
# 删除所有 style="..." 属性
|
||||
body = re.sub(r'\s+style="[^"]*"', "", body)
|
||||
# 删除 pandoc 输出的空 <p></p>
|
||||
body = re.sub(r"<p>\s*</p>", "", body)
|
||||
|
||||
return body.strip()
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# Pandoc path #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
def _extract_via_pandoc(path: Path, pandoc: str) -> str:
|
||||
result = subprocess.run(
|
||||
[
|
||||
pandoc,
|
||||
str(path),
|
||||
"-f", "docx",
|
||||
"-t", "markdown",
|
||||
"--wrap=none",
|
||||
],
|
||||
capture_output=True,
|
||||
text=True,
|
||||
timeout=60,
|
||||
)
|
||||
if result.returncode != 0:
|
||||
raise RuntimeError(
|
||||
f"pandoc failed (exit {result.returncode}):\n{result.stderr.strip()}"
|
||||
)
|
||||
pandoc_text = _clean_pandoc_markdown(result.stdout)
|
||||
|
||||
# pandoc 会丢弃包含浮动形状(AlternateContent / WPS 图形)的段落的文字内容。
|
||||
# 用 python-docx 补充:找出 pandoc 没有输出的段落文本,追加到末尾。
|
||||
# 对文本匹配无副作用(最坏情况是轻微重复,不影响 SequenceMatcher 结果)。
|
||||
try:
|
||||
from docx import Document # type: ignore
|
||||
doc = Document(str(path))
|
||||
missing: list[str] = []
|
||||
for para in doc.paragraphs:
|
||||
text = para.text.strip()
|
||||
if text and text not in pandoc_text:
|
||||
missing.append(text)
|
||||
if missing:
|
||||
pandoc_text = pandoc_text + "\n" + "\n".join(missing)
|
||||
except Exception:
|
||||
pass # python-docx 不可用时静默降级,pandoc 结果仍然有效
|
||||
|
||||
return pandoc_text
|
||||
|
||||
|
||||
def _clean_pandoc_markdown(text: str) -> str:
|
||||
"""Remove pandoc-specific inline attributes that noise up text matching."""
|
||||
# [text]{.mark} / [text]{#id .cls key=val} → text
|
||||
text = re.sub(r"\[([^\]]*)\]\{[^}]*\}", r"\1", text)
|
||||
# Leftover bare {…} attribute blocks on their own
|
||||
text = re.sub(r"\{[^}]*\}", "", text)
|
||||
return text
|
||||
|
||||
|
||||
# --------------------------------------------------------------------------- #
|
||||
# python-docx fallback #
|
||||
# --------------------------------------------------------------------------- #
|
||||
|
||||
def _extract_via_docx(path: Path) -> str:
|
||||
from docx import Document # type: ignore
|
||||
|
||||
doc = Document(str(path))
|
||||
lines = [para.text for para in doc.paragraphs if para.text.strip()]
|
||||
|
||||
seen_cells: set[int] = set()
|
||||
for table in doc.tables:
|
||||
for row in table.rows:
|
||||
cells: list[str] = []
|
||||
for cell in row.cells:
|
||||
if id(cell) in seen_cells:
|
||||
continue
|
||||
seen_cells.add(id(cell))
|
||||
text = cell.text.strip()
|
||||
if text:
|
||||
cells.append(text)
|
||||
if cells:
|
||||
lines.append("|".join(cells))
|
||||
|
||||
return "\n".join(lines)
|
||||
12
docker-compose.yml
Normal file
@@ -0,0 +1,12 @@
|
||||
services:
|
||||
web:
|
||||
build: .
|
||||
ports:
|
||||
- "80761:8010"
|
||||
environment:
|
||||
MINERU_API_KEY: ${MINERU_API_KEY:-}
|
||||
volumes:
|
||||
- zld-data:/app/data
|
||||
|
||||
volumes:
|
||||
zld-data:
|
||||
801
docs/superpowers/plans/2026-04-14-mineru-ai-word-parse.md
Normal file
@@ -0,0 +1,801 @@
|
||||
# MinerU AI Word Parse Implementation Plan
|
||||
|
||||
> **For agentic workers:** REQUIRED SUB-SKILL: Use superpowers:subagent-driven-development (recommended) or superpowers:executing-plans to implement this plan task-by-task. Steps use checkbox (`- [ ]`) syntax for tracking.
|
||||
|
||||
**Goal:** Replace the old Illustrator text extraction source with MinerU JSON blocks, then compare those blocks against the uploaded Word document and highlight MinerU bounding boxes in the existing preview.
|
||||
|
||||
**Architecture:** Keep `/api/process` and the current React response shape. Add a focused MinerU JSON mapper and a small MinerU HTTP client, then update `backend/app/pipeline.py` so it converts `.ai` to `preview.pdf`, sends that PDF to MinerU, maps returned blocks to fields, and validates each field against Word text. Frontend changes are limited to making font metadata optional and copy text match the new MinerU-backed flow.
|
||||
|
||||
**Tech Stack:** Python 3, FastAPI, stdlib `urllib`, stdlib `zipfile`, `python-docx`, `pypdf`, React, TypeScript, Vite, pytest.
|
||||
|
||||
---
|
||||
|
||||
## File Structure
|
||||
|
||||
- Create `backend/app/mineru_parser.py`: Convert MinerU `middle.json`-style data into normalized field dictionaries with text and bbox coordinates.
|
||||
- Create `backend/app/mineru_client.py`: Submit a local PDF to MinerU, poll for completion, download and extract the result zip, and load structured JSON.
|
||||
- Modify `backend/app/pipeline.py`: Use AI-to-PDF preview conversion, MinerU parsing, and Word validation instead of old AI text fields.
|
||||
- Modify `frontend/src/types.ts`: Make font fields optional because MinerU output does not provide Illustrator font metadata.
|
||||
- Modify `frontend/src/App.tsx`: Keep UI behavior, adjust product copy/status copy to MinerU-backed OCR/layout results, and avoid unsafe numeric formatting on optional font metadata.
|
||||
- Create `tests/backend/test_mineru_parser.py`: Unit tests for sample JSON extraction, HTML table conversion, bbox mapping, and empty block handling.
|
||||
- Create `tests/backend/test_mineru_client.py`: Unit tests for MinerU HTTP client success/failure control flow with mocked `urllib`.
|
||||
- Modify `tests/backend/test_pipeline.py`: Mock MinerU calls and assert Word validation plus preview/highlight payload.
|
||||
- Modify `tests/backend/test_api.py`: Mock MinerU calls for endpoint tests and add missing-token failure coverage.
|
||||
|
||||
## Task 1: MinerU JSON Mapper
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/app/mineru_parser.py`
|
||||
- Test: `tests/backend/test_mineru_parser.py`
|
||||
|
||||
- [ ] **Step 1: Write failing parser tests**
|
||||
|
||||
```python
|
||||
# tests/backend/test_mineru_parser.py
|
||||
from __future__ import annotations
|
||||
|
||||
from backend.app.mineru_parser import parse_mineru_fields
|
||||
|
||||
|
||||
def test_parse_mineru_fields_extracts_text_and_bbox() -> None:
|
||||
payload = {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [2772, 1961],
|
||||
"para_blocks": [
|
||||
{
|
||||
"bbox": [704, 134, 2106, 229],
|
||||
"type": "title",
|
||||
"lines": [
|
||||
{
|
||||
"spans": [
|
||||
{
|
||||
"type": "text",
|
||||
"content": "食品名称:天问礼品粽",
|
||||
"bbox": [704, 134, 2106, 229],
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
parsed = parse_mineru_fields(payload)
|
||||
|
||||
assert parsed.page_width == 2772
|
||||
assert parsed.page_height == 1961
|
||||
assert parsed.fields == [
|
||||
{
|
||||
"page": 1,
|
||||
"text": "食品名称:天问礼品粽",
|
||||
"font_name": "",
|
||||
"font_size_pt": None,
|
||||
"font_height_mm": None,
|
||||
"x0_pt": 704.0,
|
||||
"top_pt": 134.0,
|
||||
"x1_pt": 2106.0,
|
||||
"bottom_pt": 229.0,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_parse_mineru_fields_turns_table_html_into_text() -> None:
|
||||
payload = {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [1000, 800],
|
||||
"para_blocks": [
|
||||
{
|
||||
"bbox": [10, 20, 300, 200],
|
||||
"type": "table",
|
||||
"lines": [
|
||||
{
|
||||
"spans": [
|
||||
{
|
||||
"type": "table",
|
||||
"html": "<table><tr><td>品种</td><td>规格</td></tr><tr><td>黑猪肉粽</td><td>130克×1</td></tr></table>",
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
parsed = parse_mineru_fields(payload)
|
||||
|
||||
assert parsed.fields[0]["text"] == "品种 规格 黑猪肉粽 130克×1"
|
||||
|
||||
|
||||
def test_parse_mineru_fields_skips_empty_decorative_blocks() -> None:
|
||||
payload = {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [1000, 800],
|
||||
"para_blocks": [
|
||||
{"bbox": [1, 2, 3, 4], "type": "image", "lines": [{"spans": [{"type": "image"}]}]},
|
||||
{"bbox": [5, 6, 7, 8], "type": "text", "lines": [{"spans": [{"content": " "}]}]},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
parsed = parse_mineru_fields(payload)
|
||||
|
||||
assert parsed.fields == []
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run parser tests and verify they fail**
|
||||
|
||||
Run: `pytest tests/backend/test_mineru_parser.py -v`
|
||||
|
||||
Expected: FAIL with `ModuleNotFoundError: No module named 'backend.app.mineru_parser'`.
|
||||
|
||||
- [ ] **Step 3: Implement the MinerU parser**
|
||||
|
||||
```python
|
||||
# backend/app/mineru_parser.py
|
||||
from __future__ import annotations
|
||||
|
||||
import html
|
||||
import re
|
||||
from dataclasses import dataclass
|
||||
from typing import Any
|
||||
|
||||
|
||||
@dataclass(slots=True)
|
||||
class ParsedMineruDocument:
|
||||
page_width: float
|
||||
page_height: float
|
||||
fields: list[dict]
|
||||
|
||||
|
||||
TAG_RE = re.compile(r"<[^>]+>")
|
||||
WHITESPACE_RE = re.compile(r"\s+")
|
||||
|
||||
|
||||
def _clean_text(value: str) -> str:
|
||||
without_tags = TAG_RE.sub(" ", html.unescape(value))
|
||||
return WHITESPACE_RE.sub(" ", without_tags).strip()
|
||||
|
||||
|
||||
def _span_text(span: dict[str, Any]) -> str:
|
||||
if isinstance(span.get("content"), str):
|
||||
return _clean_text(span["content"])
|
||||
if isinstance(span.get("html"), str):
|
||||
return _clean_text(span["html"])
|
||||
return ""
|
||||
|
||||
|
||||
def _block_text(block: dict[str, Any]) -> str:
|
||||
pieces: list[str] = []
|
||||
for line in block.get("lines") or []:
|
||||
for span in line.get("spans") or []:
|
||||
text = _span_text(span)
|
||||
if text:
|
||||
pieces.append(text)
|
||||
if not pieces and isinstance(block.get("text"), str):
|
||||
pieces.append(_clean_text(block["text"]))
|
||||
return WHITESPACE_RE.sub(" ", " ".join(pieces)).strip()
|
||||
|
||||
|
||||
def _bbox(block: dict[str, Any]) -> tuple[float, float, float, float] | None:
|
||||
raw_bbox = block.get("bbox")
|
||||
if not isinstance(raw_bbox, list) or len(raw_bbox) != 4:
|
||||
return None
|
||||
try:
|
||||
x0, y0, x1, y1 = [float(value) for value in raw_bbox]
|
||||
except (TypeError, ValueError):
|
||||
return None
|
||||
if x1 <= x0 or y1 <= y0:
|
||||
return None
|
||||
return x0, y0, x1, y1
|
||||
|
||||
|
||||
def _page_size(page: dict[str, Any]) -> tuple[float, float]:
|
||||
raw_size = page.get("page_size")
|
||||
if isinstance(raw_size, list) and len(raw_size) >= 2:
|
||||
return float(raw_size[0]), float(raw_size[1])
|
||||
return 1.0, 1.0
|
||||
|
||||
|
||||
def parse_mineru_fields(payload: dict[str, Any]) -> ParsedMineruDocument:
|
||||
pages = payload.get("pdf_info")
|
||||
if not isinstance(pages, list) or not pages:
|
||||
raise ValueError("MinerU JSON does not contain pdf_info pages")
|
||||
|
||||
first_width, first_height = _page_size(pages[0])
|
||||
fields: list[dict] = []
|
||||
|
||||
for page in pages:
|
||||
page_number = int(page.get("page_idx", 0)) + 1
|
||||
for block in page.get("para_blocks") or []:
|
||||
text = _block_text(block)
|
||||
box = _bbox(block)
|
||||
if not text or box is None:
|
||||
continue
|
||||
x0, y0, x1, y1 = box
|
||||
fields.append(
|
||||
{
|
||||
"page": page_number,
|
||||
"text": text,
|
||||
"font_name": "",
|
||||
"font_size_pt": None,
|
||||
"font_height_mm": None,
|
||||
"x0_pt": x0,
|
||||
"top_pt": y0,
|
||||
"x1_pt": x1,
|
||||
"bottom_pt": y1,
|
||||
}
|
||||
)
|
||||
|
||||
return ParsedMineruDocument(page_width=first_width, page_height=first_height, fields=fields)
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run parser tests and verify they pass**
|
||||
|
||||
Run: `pytest tests/backend/test_mineru_parser.py -v`
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
## Task 2: MinerU HTTP Client
|
||||
|
||||
**Files:**
|
||||
- Create: `backend/app/mineru_client.py`
|
||||
- Test: `tests/backend/test_mineru_client.py`
|
||||
|
||||
- [ ] **Step 1: Write failing MinerU client tests**
|
||||
|
||||
```python
|
||||
# tests/backend/test_mineru_client.py
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from urllib.error import HTTPError
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.app import mineru_client
|
||||
from backend.app.mineru_client import MineruClient, MineruClientError
|
||||
|
||||
|
||||
class FakeResponse:
|
||||
def __init__(self, status: int, body: bytes):
|
||||
self.status = status
|
||||
self._body = body
|
||||
|
||||
def read(self) -> bytes:
|
||||
return self._body
|
||||
|
||||
def __enter__(self) -> "FakeResponse":
|
||||
return self
|
||||
|
||||
def __exit__(self, *_args: object) -> None:
|
||||
return None
|
||||
|
||||
|
||||
def _zip_with_json() -> bytes:
|
||||
buffer = io.BytesIO()
|
||||
with zipfile.ZipFile(buffer, "w") as archive:
|
||||
archive.writestr("demo_middle.json", json.dumps({"pdf_info": [{"page_idx": 0, "page_size": [1, 1], "para_blocks": []}]}))
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
def test_submit_pdf_downloads_and_loads_structured_json(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
calls: list[str] = []
|
||||
|
||||
def fake_urlopen(request, timeout=0):
|
||||
url = request.full_url if hasattr(request, "full_url") else request
|
||||
calls.append(str(url))
|
||||
if str(url).endswith("/api/v4/file-urls/batch"):
|
||||
return FakeResponse(200, json.dumps({"code": 0, "data": {"batch_id": "batch-1", "file_urls": ["https://upload.example/file"]}}).encode())
|
||||
if str(url) == "https://upload.example/file":
|
||||
return FakeResponse(200, b"")
|
||||
if str(url).endswith("/api/v4/extract-results/batch/batch-1"):
|
||||
return FakeResponse(200, json.dumps({"code": 0, "data": {"extract_result": [{"state": "done", "full_zip_url": "https://download.example/result.zip"}]}}).encode())
|
||||
if str(url) == "https://download.example/result.zip":
|
||||
return FakeResponse(200, _zip_with_json())
|
||||
raise AssertionError(f"unexpected URL {url}")
|
||||
|
||||
monkeypatch.setattr(mineru_client.request, "urlopen", fake_urlopen)
|
||||
pdf_path = tmp_path / "preview.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.7")
|
||||
|
||||
payload = MineruClient(api_key="secret", poll_interval_seconds=0, max_polls=1).parse_pdf(pdf_path, tmp_path)
|
||||
|
||||
assert payload["pdf_info"][0]["page_size"] == [1, 1]
|
||||
assert calls == [
|
||||
"https://mineru.net/api/v4/file-urls/batch",
|
||||
"https://upload.example/file",
|
||||
"https://mineru.net/api/v4/extract-results/batch/batch-1",
|
||||
"https://download.example/result.zip",
|
||||
]
|
||||
assert (tmp_path / "mineru_result.zip").exists()
|
||||
|
||||
|
||||
def test_submit_pdf_raises_on_failed_task(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
def fake_urlopen(request, timeout=0):
|
||||
url = request.full_url if hasattr(request, "full_url") else request
|
||||
if str(url).endswith("/api/v4/file-urls/batch"):
|
||||
return FakeResponse(200, json.dumps({"code": 0, "data": {"batch_id": "batch-1", "file_urls": ["https://upload.example/file"]}}).encode())
|
||||
if str(url) == "https://upload.example/file":
|
||||
return FakeResponse(200, b"")
|
||||
if str(url).endswith("/api/v4/extract-results/batch/batch-1"):
|
||||
return FakeResponse(200, json.dumps({"code": 0, "data": {"extract_result": [{"state": "failed", "err_msg": "bad pdf"}]}}).encode())
|
||||
raise AssertionError(f"unexpected URL {url}")
|
||||
|
||||
monkeypatch.setattr(mineru_client.request, "urlopen", fake_urlopen)
|
||||
pdf_path = tmp_path / "preview.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.7")
|
||||
|
||||
with pytest.raises(MineruClientError, match="bad pdf"):
|
||||
MineruClient(api_key="secret", poll_interval_seconds=0, max_polls=1).parse_pdf(pdf_path, tmp_path)
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Run client tests and verify they fail**
|
||||
|
||||
Run: `pytest tests/backend/test_mineru_client.py -v`
|
||||
|
||||
Expected: FAIL with `ModuleNotFoundError: No module named 'backend.app.mineru_client'`.
|
||||
|
||||
- [ ] **Step 3: Implement the MinerU client**
|
||||
|
||||
```python
|
||||
# backend/app/mineru_client.py
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import time
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
from urllib import request
|
||||
from urllib.error import HTTPError, URLError
|
||||
|
||||
|
||||
class MineruClientError(RuntimeError):
|
||||
pass
|
||||
|
||||
|
||||
class MineruClient:
|
||||
def __init__(self, api_key: str, poll_interval_seconds: float = 2.0, max_polls: int = 90) -> None:
|
||||
self.api_key = api_key
|
||||
self.poll_interval_seconds = poll_interval_seconds
|
||||
self.max_polls = max_polls
|
||||
|
||||
def parse_pdf(self, pdf_path: Path, output_dir: Path) -> dict:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
batch_id, upload_url = self._request_upload_url(pdf_path.name)
|
||||
self._upload_file(upload_url, pdf_path)
|
||||
zip_url = self._poll_result(batch_id)
|
||||
zip_path = self._download_zip(zip_url, output_dir)
|
||||
extract_dir = output_dir / "mineru_result"
|
||||
self._extract_zip(zip_path, extract_dir)
|
||||
return self._load_structured_json(extract_dir)
|
||||
|
||||
def _headers(self) -> dict[str, str]:
|
||||
return {"Authorization": f"Bearer {self.api_key}", "Accept": "*/*"}
|
||||
|
||||
def _json_request(self, url: str, method: str = "GET", payload: dict | None = None) -> dict:
|
||||
body = None if payload is None else json.dumps(payload).encode("utf-8")
|
||||
headers = self._headers()
|
||||
if payload is not None:
|
||||
headers["Content-Type"] = "application/json"
|
||||
req = request.Request(url, data=body, headers=headers, method=method)
|
||||
try:
|
||||
with request.urlopen(req, timeout=30) as response:
|
||||
data = json.loads(response.read().decode("utf-8"))
|
||||
except (HTTPError, URLError, TimeoutError, json.JSONDecodeError) as exc:
|
||||
raise MineruClientError(f"MinerU request failed: {exc}") from exc
|
||||
if data.get("code") != 0:
|
||||
raise MineruClientError(str(data.get("msg") or "MinerU API returned an error"))
|
||||
return data
|
||||
|
||||
def _request_upload_url(self, filename: str) -> tuple[str, str]:
|
||||
data = self._json_request(
|
||||
"https://mineru.net/api/v4/file-urls/batch",
|
||||
method="POST",
|
||||
payload={"files": [{"name": filename, "data_id": filename}], "model_version": "vlm"},
|
||||
)
|
||||
batch_id = data["data"]["batch_id"]
|
||||
file_urls = data["data"]["file_urls"]
|
||||
if not file_urls:
|
||||
raise MineruClientError("MinerU did not return an upload URL")
|
||||
return batch_id, file_urls[0]
|
||||
|
||||
def _upload_file(self, upload_url: str, pdf_path: Path) -> None:
|
||||
req = request.Request(upload_url, data=pdf_path.read_bytes(), method="PUT")
|
||||
try:
|
||||
with request.urlopen(req, timeout=120) as response:
|
||||
if response.status >= 400:
|
||||
raise MineruClientError(f"MinerU upload failed with HTTP {response.status}")
|
||||
except (HTTPError, URLError, TimeoutError) as exc:
|
||||
raise MineruClientError(f"MinerU upload failed: {exc}") from exc
|
||||
|
||||
def _poll_result(self, batch_id: str) -> str:
|
||||
url = f"https://mineru.net/api/v4/extract-results/batch/{batch_id}"
|
||||
for _attempt in range(self.max_polls):
|
||||
data = self._json_request(url)
|
||||
extract_result = data.get("data", {}).get("extract_result")
|
||||
if isinstance(extract_result, list):
|
||||
result = extract_result[0] if extract_result else {}
|
||||
else:
|
||||
result = extract_result or {}
|
||||
state = result.get("state")
|
||||
if state == "done":
|
||||
zip_url = result.get("full_zip_url")
|
||||
if not zip_url:
|
||||
raise MineruClientError("MinerU finished without full_zip_url")
|
||||
return zip_url
|
||||
if state == "failed":
|
||||
raise MineruClientError(str(result.get("err_msg") or "MinerU parsing failed"))
|
||||
time.sleep(self.poll_interval_seconds)
|
||||
raise MineruClientError("MinerU polling timed out")
|
||||
|
||||
def _download_zip(self, zip_url: str, output_dir: Path) -> Path:
|
||||
target = output_dir / "mineru_result.zip"
|
||||
req = request.Request(zip_url, headers={"Accept": "*/*"}, method="GET")
|
||||
try:
|
||||
with request.urlopen(req, timeout=120) as response:
|
||||
target.write_bytes(response.read())
|
||||
except (HTTPError, URLError, TimeoutError) as exc:
|
||||
raise MineruClientError(f"MinerU zip download failed: {exc}") from exc
|
||||
return target
|
||||
|
||||
def _extract_zip(self, zip_path: Path, extract_dir: Path) -> None:
|
||||
extract_dir.mkdir(parents=True, exist_ok=True)
|
||||
with zipfile.ZipFile(zip_path) as archive:
|
||||
archive.extractall(extract_dir)
|
||||
|
||||
def _load_structured_json(self, extract_dir: Path) -> dict:
|
||||
candidates = sorted(extract_dir.rglob("*middle.json")) + sorted(extract_dir.rglob("*_model.json"))
|
||||
if not candidates:
|
||||
raise MineruClientError("MinerU result zip did not contain structured JSON")
|
||||
return json.loads(candidates[0].read_text(encoding="utf-8"))
|
||||
```
|
||||
|
||||
- [ ] **Step 4: Run client tests and verify they pass**
|
||||
|
||||
Run: `pytest tests/backend/test_mineru_client.py -v`
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
## Task 3: Pipeline Integration
|
||||
|
||||
**Files:**
|
||||
- Modify: `backend/app/pipeline.py`
|
||||
- Modify: `tests/backend/test_pipeline.py`
|
||||
- Modify: `tests/backend/test_api.py`
|
||||
|
||||
- [ ] **Step 1: Write failing pipeline tests with a mocked MinerU document**
|
||||
|
||||
```python
|
||||
# tests/backend/test_pipeline.py
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.app import pipeline
|
||||
from backend.app.pipeline import process_files
|
||||
|
||||
|
||||
WORKDIR = Path("/Users/icemilk/Workspace/zld_POC")
|
||||
AI_FILE = WORKDIR / "【2026-04-09】端午 - 背标 - 天问.ai"
|
||||
DOCX_FILE = WORKDIR / "天问礼品粽【260331】.docx"
|
||||
OUTPUT_DIR = WORKDIR / ".tmp_test_output"
|
||||
|
||||
|
||||
def test_process_files_builds_preview_and_mineru_field_results(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
def fake_parse_with_mineru(_preview_path: Path, _output_dir: Path):
|
||||
return {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [2772, 1961],
|
||||
"para_blocks": [
|
||||
{
|
||||
"bbox": [704, 134, 2106, 229],
|
||||
"lines": [{"spans": [{"content": "食品名称:天问礼品粽"}]}],
|
||||
},
|
||||
{
|
||||
"bbox": [10, 20, 40, 60],
|
||||
"lines": [{"spans": [{"content": "Word中不存在的内容"}]}],
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", fake_parse_with_mineru)
|
||||
|
||||
result = process_files(AI_FILE, DOCX_FILE, OUTPUT_DIR, job_id="test-job")
|
||||
|
||||
assert result["preview"]["type"] == "pdf"
|
||||
assert result["preview"]["url"] == "/api/files/test-job/preview.pdf"
|
||||
assert result["preview"]["pageWidthPt"] == 2772
|
||||
assert result["preview"]["pageHeightPt"] == 1961
|
||||
assert result["fields"][0]["text"] == "食品名称:天问礼品粽"
|
||||
assert result["fields"][0]["validation_status"] == "matched"
|
||||
assert result["fields"][0]["x0_pt"] == 704.0
|
||||
assert any(field["validation_status"] == "unmatched" for field in result["fields"])
|
||||
assert (OUTPUT_DIR / "preview.pdf").exists()
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Replace API tests with mocked MinerU coverage**
|
||||
|
||||
```python
|
||||
# tests/backend/test_api.py
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from backend.app import pipeline
|
||||
from backend.app.main import app
|
||||
|
||||
|
||||
WORKDIR = Path("/Users/icemilk/Workspace/zld_POC")
|
||||
AI_FILE = WORKDIR / "【2026-04-09】端午 - 背标 - 天问.ai"
|
||||
DOCX_FILE = WORKDIR / "天问礼品粽【260331】.docx"
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
def fake_mineru_payload() -> dict:
|
||||
return {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [2772, 1961],
|
||||
"para_blocks": [
|
||||
{
|
||||
"bbox": [704, 134, 2106, 229],
|
||||
"lines": [{"spans": [{"content": "食品名称:天问礼品粽"}]}],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def test_process_endpoint_returns_preview_and_fields(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", lambda _preview_path, _output_dir: fake_mineru_payload())
|
||||
|
||||
with AI_FILE.open("rb") as ai_fp, DOCX_FILE.open("rb") as docx_fp:
|
||||
response = client.post(
|
||||
"/api/process",
|
||||
files={
|
||||
"ai_file": (AI_FILE.name, ai_fp, "application/postscript"),
|
||||
"word_file": (
|
||||
DOCX_FILE.name,
|
||||
docx_fp,
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
payload = response.json()
|
||||
assert payload["preview"]["type"] == "pdf"
|
||||
assert payload["preview"]["pageWidthPt"] == 2772
|
||||
assert payload["fields"]
|
||||
assert payload["fields"][0]["text"] == "食品名称:天问礼品粽"
|
||||
|
||||
|
||||
def test_process_endpoint_uses_default_sample_files_when_uploads_are_missing(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", lambda _preview_path, _output_dir: fake_mineru_payload())
|
||||
|
||||
response = client.post("/api/process")
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
payload = response.json()
|
||||
assert payload["preview"]["type"] == "pdf"
|
||||
assert payload["fields"]
|
||||
assert any(field["text"] for field in payload["fields"])
|
||||
|
||||
|
||||
def test_process_endpoint_surfaces_missing_mineru_key(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
def fake_parse_with_mineru(_preview_path, _output_dir):
|
||||
raise RuntimeError("MINERU_API_KEY is required")
|
||||
|
||||
monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", fake_parse_with_mineru)
|
||||
|
||||
response = client.post("/api/process")
|
||||
|
||||
assert response.status_code == 500
|
||||
assert response.json()["detail"] == "MINERU_API_KEY is required"
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Run integration tests and verify they fail**
|
||||
|
||||
Run: `pytest tests/backend/test_pipeline.py tests/backend/test_api.py -v`
|
||||
|
||||
Expected: FAIL because `pipeline._parse_preview_with_mineru` does not exist and `process_files` still uses `ai_document.fields`.
|
||||
|
||||
- [ ] **Step 4: Update `backend/app/pipeline.py`**
|
||||
|
||||
```python
|
||||
# backend/app/pipeline.py
|
||||
from __future__ import annotations
|
||||
|
||||
import os
|
||||
import shutil
|
||||
from pathlib import Path
|
||||
|
||||
from backend.app.ai_parser import parse_ai_document
|
||||
from backend.app.mineru_client import MineruClient
|
||||
from backend.app.mineru_parser import parse_mineru_fields
|
||||
from backend.app.text_validation import validate_field_against_word
|
||||
from backend.app.word_parser import extract_word_text
|
||||
|
||||
|
||||
def _sort_key(field: dict) -> tuple[int, int, float, float]:
|
||||
status_rank = {"matched": 0, "unmatched": 1, "empty_or_garbled": 2}
|
||||
return (
|
||||
status_rank.get(field["validation_status"], 9),
|
||||
field["page"],
|
||||
field["top_pt"],
|
||||
field["x0_pt"],
|
||||
)
|
||||
|
||||
|
||||
def _parse_preview_with_mineru(preview_path: Path, output_dir: Path) -> dict:
|
||||
api_key = os.environ.get("MINERU_API_KEY", "").strip()
|
||||
if not api_key:
|
||||
raise RuntimeError("MINERU_API_KEY is required")
|
||||
return MineruClient(api_key=api_key).parse_pdf(preview_path, output_dir / "mineru")
|
||||
|
||||
|
||||
def process_files(ai_path: Path, word_path: Path, output_dir: Path, job_id: str | None = None) -> dict:
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
ai_document = parse_ai_document(ai_path, output_dir / "parsed")
|
||||
word_text = extract_word_text(word_path)
|
||||
|
||||
preview_filename = "preview.pdf"
|
||||
preview_target = output_dir / preview_filename
|
||||
if ai_document.preview_path != preview_target:
|
||||
shutil.copy2(ai_document.preview_path, preview_target)
|
||||
|
||||
mineru_payload = _parse_preview_with_mineru(preview_target, output_dir)
|
||||
mineru_document = parse_mineru_fields(mineru_payload)
|
||||
|
||||
fields: list[dict] = []
|
||||
for index, field in enumerate(mineru_document.fields, start=1):
|
||||
validation = validate_field_against_word(field["text"], word_text)
|
||||
fields.append(
|
||||
{
|
||||
"id": f"field-{index}",
|
||||
**field,
|
||||
"normalized_text": validation.normalized_text,
|
||||
"validation_status": validation.status,
|
||||
"validation_reason": validation.reason,
|
||||
"matched_excerpt": validation.matched_excerpt,
|
||||
}
|
||||
)
|
||||
|
||||
fields.sort(key=_sort_key)
|
||||
|
||||
preview_url = f"/api/files/{job_id}/{preview_filename}" if job_id else preview_filename
|
||||
return {
|
||||
"preview": {
|
||||
"type": "pdf",
|
||||
"url": preview_url,
|
||||
"pageWidthPt": mineru_document.page_width,
|
||||
"pageHeightPt": mineru_document.page_height,
|
||||
},
|
||||
"fields": fields,
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 5: Run integration tests and verify they pass**
|
||||
|
||||
Run: `pytest tests/backend/test_pipeline.py tests/backend/test_api.py -v`
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
## Task 4: Frontend Type and Copy Compatibility
|
||||
|
||||
**Files:**
|
||||
- Modify: `frontend/src/types.ts`
|
||||
- Modify: `frontend/src/App.tsx`
|
||||
|
||||
- [ ] **Step 1: Update TypeScript types**
|
||||
|
||||
```ts
|
||||
// frontend/src/types.ts
|
||||
export type FieldResult = {
|
||||
id: string
|
||||
page: number
|
||||
text: string
|
||||
font_name?: string | null
|
||||
font_size_pt?: number | null
|
||||
font_height_mm?: number | null
|
||||
x0_pt: number
|
||||
top_pt: number
|
||||
x1_pt: number
|
||||
bottom_pt: number
|
||||
normalized_text: string
|
||||
validation_status: ValidationStatus
|
||||
validation_reason: string
|
||||
matched_excerpt: string | null
|
||||
}
|
||||
```
|
||||
|
||||
- [ ] **Step 2: Update `App.tsx` display guards and copy**
|
||||
|
||||
```tsx
|
||||
// Replace the hero copy with:
|
||||
<p className="hero-copy">
|
||||
上传 Illustrator 源文件与 Word 校对稿,系统会将设计文件转换为 PDF 后交给 MinerU 解析,
|
||||
再把识别出的版面文字与 Word 内容逐块比对。
|
||||
</p>
|
||||
|
||||
// Replace the font metadata rendering with:
|
||||
<div className="field-meta">
|
||||
<span>第 {field.page} 页</span>
|
||||
{field.font_name ? <span>{field.font_name}</span> : null}
|
||||
{typeof field.font_size_pt === 'number' ? <span>{field.font_size_pt} pt</span> : null}
|
||||
{typeof field.font_height_mm === 'number' ? <span>{field.font_height_mm.toFixed(1)} mm</span> : null}
|
||||
</div>
|
||||
```
|
||||
|
||||
- [ ] **Step 3: Run frontend type check**
|
||||
|
||||
Run: `cd frontend && npm run build`
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
## Task 5: Full Verification
|
||||
|
||||
**Files:**
|
||||
- No new files.
|
||||
|
||||
- [ ] **Step 1: Run backend tests**
|
||||
|
||||
Run: `pytest tests/backend/test_mineru_parser.py tests/backend/test_mineru_client.py tests/backend/test_pipeline.py tests/backend/test_api.py -v`
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 2: Run frontend build**
|
||||
|
||||
Run: `cd frontend && npm run build`
|
||||
|
||||
Expected: PASS.
|
||||
|
||||
- [ ] **Step 3: Run local manual verification with the real MinerU API**
|
||||
|
||||
Set `MINERU_API_KEY` in the shell environment, then run the backend:
|
||||
|
||||
```bash
|
||||
./scripts/start_backend.sh
|
||||
```
|
||||
|
||||
Run frontend in another terminal:
|
||||
|
||||
```bash
|
||||
./scripts/start_frontend.sh
|
||||
```
|
||||
|
||||
Open the frontend, upload the sample `.ai` and `.docx`, click `开始解析`, and verify:
|
||||
|
||||
- The request completes without leaking the token to browser requests.
|
||||
- The right preview shows the PDF.
|
||||
- The left result list contains MinerU-derived text blocks.
|
||||
- Clicking a result card highlights the corresponding MinerU bbox on the right preview.
|
||||
- Blocks found in the Word document show `校验成功`; missing blocks show `校验失败`.
|
||||
|
||||
- [ ] **Step 4: Skip commit in this workspace**
|
||||
|
||||
This project directory is not a git repository, so do not run `git commit`. Report the changed file list in the final response instead.
|
||||
108
docs/superpowers/specs/2026-04-14-mineru-ai-word-parse-design.md
Normal file
@@ -0,0 +1,108 @@
|
||||
# MinerU AI Word Parse Design
|
||||
|
||||
## Goal
|
||||
|
||||
Replace the current AI text extraction pipeline with a MinerU-backed flow:
|
||||
|
||||
1. Accept an Illustrator `.ai` file and a Word `.docx` file.
|
||||
2. Convert or normalize the `.ai` file into a PDF preview artifact.
|
||||
3. Upload the PDF artifact to MinerU for document parsing.
|
||||
4. Read MinerU JSON output blocks and their bounding boxes.
|
||||
5. Compare MinerU text output against the Word document text.
|
||||
6. Return field results that the existing React preview can highlight on the right side.
|
||||
|
||||
## Non-Goals
|
||||
|
||||
- Do not keep the old `parse_ai_document(...).fields` text extraction as the source of validation fields.
|
||||
- Do not expose the MinerU API key to the frontend.
|
||||
- Do not require a public callback URL; use polling because this is a local backend.
|
||||
- Do not add a new manual annotation workflow.
|
||||
|
||||
## Backend Flow
|
||||
|
||||
The `/api/process` endpoint keeps its current two-file upload contract: `ai_file` and `word_file`.
|
||||
|
||||
For each job, the backend creates the existing runtime upload/output directories. The `.ai` file is converted into a PDF preview artifact using the existing `backend.app.ai_parser.parse_ai_document` conversion behavior. The resulting `preview.pdf` is copied into the job output directory and returned as the preview URL.
|
||||
|
||||
The backend then submits that PDF preview artifact to MinerU using the documented local-file upload flow:
|
||||
|
||||
1. `POST https://mineru.net/api/v4/file-urls/batch` with one file entry and `model_version: "vlm"`.
|
||||
2. `PUT` the generated upload URL with the PDF bytes.
|
||||
3. Poll `GET https://mineru.net/api/v4/extract-results/batch/{batch_id}` until the single file reaches `done`, `failed`, or a timeout.
|
||||
4. When `done`, download `full_zip_url` into the job output directory.
|
||||
5. Extract the zip into the job output directory and locate the structured JSON output.
|
||||
|
||||
The API token is read from `MINERU_API_KEY`. If it is missing, the backend returns a clear configuration error instead of attempting the request.
|
||||
|
||||
## MinerU JSON Mapping
|
||||
|
||||
The primary parser reads MinerU `middle.json`-style output because the sample JSON contains:
|
||||
|
||||
- `pdf_info[]`
|
||||
- `page_idx`
|
||||
- `page_size`
|
||||
- `para_blocks[]`
|
||||
- `discarded_blocks[]`
|
||||
- block-level `bbox: [x0, y0, x1, y1]`
|
||||
- nested `lines[].spans[]` with `content`, `html`, and span-level `bbox`
|
||||
|
||||
Each top-level `para_blocks` item becomes one validation result. For blocks with nested line/span content, the backend concatenates text-like span content. Table spans with `html` are converted to readable text by stripping tags and HTML entities. If a block has no readable text, it can still be returned as `empty_or_garbled` when useful, but empty decorative blocks should be skipped.
|
||||
|
||||
Coordinate mapping:
|
||||
|
||||
- MinerU uses pixel-like page coordinates with origin at the top-left.
|
||||
- The frontend preview expects top-left coordinates named `x0_pt`, `top_pt`, `x1_pt`, and `bottom_pt`.
|
||||
- The backend returns MinerU coordinates directly as field coordinates and sets preview `pageWidthPt/pageHeightPt` from `page_size`, because the frontend scales both preview and overlay from the same coordinate system.
|
||||
|
||||
For multi-page output, `page` is `page_idx + 1`.
|
||||
|
||||
## Word Comparison
|
||||
|
||||
The Word document remains the validation baseline. The backend uses the existing `extract_word_text` and `validate_field_against_word` behavior:
|
||||
|
||||
- MinerU block text is normalized and compared against the Word full text.
|
||||
- The result status remains `matched`, `unmatched`, or `empty_or_garbled`.
|
||||
- The response keeps a `fields` array compatible with the current React UI.
|
||||
|
||||
This preserves the existing sidebar and highlighter behavior while changing the field source from old AI PDF text extraction to MinerU OCR/layout extraction.
|
||||
|
||||
## Frontend Contract
|
||||
|
||||
The current `ProcessResponse` shape should remain mostly compatible:
|
||||
|
||||
- `preview.type`: `pdf`
|
||||
- `preview.url`: generated PDF preview URL
|
||||
- `preview.pageWidthPt`: MinerU page width
|
||||
- `preview.pageHeightPt`: MinerU page height
|
||||
- `fields[]`: validation blocks with text, status, reason, matched excerpt, page, and coordinates
|
||||
|
||||
Small frontend changes may be needed to make optional typography metadata safe because MinerU blocks do not provide Illustrator font names or font sizes.
|
||||
|
||||
The right preview continues to render `preview.pdf` and draw overlay rectangles from `fields[]`.
|
||||
|
||||
## Error Handling
|
||||
|
||||
Return actionable API errors for:
|
||||
|
||||
- Unsupported upload types.
|
||||
- `.ai` to PDF conversion failure.
|
||||
- Missing `MINERU_API_KEY`.
|
||||
- MinerU upload URL request failure.
|
||||
- MinerU upload PUT failure.
|
||||
- MinerU polling timeout.
|
||||
- MinerU task failure, including `err_msg` when present.
|
||||
- Missing structured JSON in the downloaded zip.
|
||||
|
||||
The API key must not be logged or included in response payloads.
|
||||
|
||||
## Testing
|
||||
|
||||
Backend tests should cover:
|
||||
|
||||
- MinerU JSON block extraction from a sample local JSON file.
|
||||
- HTML table text conversion.
|
||||
- Coordinate mapping from MinerU bbox into field coordinates.
|
||||
- Word comparison integration using mocked MinerU results.
|
||||
- MinerU client control flow with mocked HTTP responses.
|
||||
|
||||
Manual verification should run the backend and frontend locally with `MINERU_API_KEY` set, upload the sample `.ai` and `.docx`, and confirm that result cards appear and corresponding boxes highlight on the right preview.
|
||||
24
frontend/.gitignore
vendored
Normal file
@@ -0,0 +1,24 @@
|
||||
# Logs
|
||||
logs
|
||||
*.log
|
||||
npm-debug.log*
|
||||
yarn-debug.log*
|
||||
yarn-error.log*
|
||||
pnpm-debug.log*
|
||||
lerna-debug.log*
|
||||
|
||||
node_modules
|
||||
dist
|
||||
dist-ssr
|
||||
*.local
|
||||
|
||||
# Editor directories and files
|
||||
.vscode/*
|
||||
!.vscode/extensions.json
|
||||
.idea
|
||||
.DS_Store
|
||||
*.suo
|
||||
*.ntvs*
|
||||
*.njsproj
|
||||
*.sln
|
||||
*.sw?
|
||||
73
frontend/README.md
Normal file
@@ -0,0 +1,73 @@
|
||||
# React + TypeScript + Vite
|
||||
|
||||
This template provides a minimal setup to get React working in Vite with HMR and some ESLint rules.
|
||||
|
||||
Currently, two official plugins are available:
|
||||
|
||||
- [@vitejs/plugin-react](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react) uses [Oxc](https://oxc.rs)
|
||||
- [@vitejs/plugin-react-swc](https://github.com/vitejs/vite-plugin-react/blob/main/packages/plugin-react-swc) uses [SWC](https://swc.rs/)
|
||||
|
||||
## React Compiler
|
||||
|
||||
The React Compiler is not enabled on this template because of its impact on dev & build performances. To add it, see [this documentation](https://react.dev/learn/react-compiler/installation).
|
||||
|
||||
## Expanding the ESLint configuration
|
||||
|
||||
If you are developing a production application, we recommend updating the configuration to enable type-aware lint rules:
|
||||
|
||||
```js
|
||||
export default defineConfig([
|
||||
globalIgnores(['dist']),
|
||||
{
|
||||
files: ['**/*.{ts,tsx}'],
|
||||
extends: [
|
||||
// Other configs...
|
||||
|
||||
// Remove tseslint.configs.recommended and replace with this
|
||||
tseslint.configs.recommendedTypeChecked,
|
||||
// Alternatively, use this for stricter rules
|
||||
tseslint.configs.strictTypeChecked,
|
||||
// Optionally, add this for stylistic rules
|
||||
tseslint.configs.stylisticTypeChecked,
|
||||
|
||||
// Other configs...
|
||||
],
|
||||
languageOptions: {
|
||||
parserOptions: {
|
||||
project: ['./tsconfig.node.json', './tsconfig.app.json'],
|
||||
tsconfigRootDir: import.meta.dirname,
|
||||
},
|
||||
// other options...
|
||||
},
|
||||
},
|
||||
])
|
||||
```
|
||||
|
||||
You can also install [eslint-plugin-react-x](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-x) and [eslint-plugin-react-dom](https://github.com/Rel1cx/eslint-react/tree/main/packages/plugins/eslint-plugin-react-dom) for React-specific lint rules:
|
||||
|
||||
```js
|
||||
// eslint.config.js
|
||||
import reactX from 'eslint-plugin-react-x'
|
||||
import reactDom from 'eslint-plugin-react-dom'
|
||||
|
||||
export default defineConfig([
|
||||
globalIgnores(['dist']),
|
||||
{
|
||||
files: ['**/*.{ts,tsx}'],
|
||||
extends: [
|
||||
// Other configs...
|
||||
// Enable lint rules for React
|
||||
reactX.configs['recommended-typescript'],
|
||||
// Enable lint rules for React DOM
|
||||
reactDom.configs.recommended,
|
||||
],
|
||||
languageOptions: {
|
||||
parserOptions: {
|
||||
project: ['./tsconfig.node.json', './tsconfig.app.json'],
|
||||
tsconfigRootDir: import.meta.dirname,
|
||||
},
|
||||
// other options...
|
||||
},
|
||||
},
|
||||
])
|
||||
```
|
||||
23
frontend/eslint.config.js
Normal file
@@ -0,0 +1,23 @@
|
||||
import js from '@eslint/js'
|
||||
import globals from 'globals'
|
||||
import reactHooks from 'eslint-plugin-react-hooks'
|
||||
import reactRefresh from 'eslint-plugin-react-refresh'
|
||||
import tseslint from 'typescript-eslint'
|
||||
import { defineConfig, globalIgnores } from 'eslint/config'
|
||||
|
||||
export default defineConfig([
|
||||
globalIgnores(['dist']),
|
||||
{
|
||||
files: ['**/*.{ts,tsx}'],
|
||||
extends: [
|
||||
js.configs.recommended,
|
||||
tseslint.configs.recommended,
|
||||
reactHooks.configs.flat.recommended,
|
||||
reactRefresh.configs.vite,
|
||||
],
|
||||
languageOptions: {
|
||||
ecmaVersion: 2020,
|
||||
globals: globals.browser,
|
||||
},
|
||||
},
|
||||
])
|
||||
13
frontend/index.html
Normal file
@@ -0,0 +1,13 @@
|
||||
<!doctype html>
|
||||
<html lang="zh-CN">
|
||||
<head>
|
||||
<meta charset="UTF-8" />
|
||||
<link rel="icon" type="image/svg+xml" href="/favicon.svg" />
|
||||
<meta name="viewport" content="width=device-width, initial-scale=1.0" />
|
||||
<title>诸老大包装审核</title>
|
||||
</head>
|
||||
<body>
|
||||
<div id="root"></div>
|
||||
<script type="module" src="/src/main.tsx"></script>
|
||||
</body>
|
||||
</html>
|
||||
3568
frontend/package-lock.json
generated
Normal file
33
frontend/package.json
Normal file
@@ -0,0 +1,33 @@
|
||||
{
|
||||
"name": "frontend",
|
||||
"private": true,
|
||||
"version": "0.0.0",
|
||||
"type": "module",
|
||||
"scripts": {
|
||||
"dev": "vite",
|
||||
"build": "tsc -b && vite build",
|
||||
"lint": "eslint .",
|
||||
"preview": "vite preview"
|
||||
},
|
||||
"dependencies": {
|
||||
"pdfjs-dist": "^4.10.38",
|
||||
"react": "^19.2.4",
|
||||
"react-dom": "^19.2.4"
|
||||
},
|
||||
"devDependencies": {
|
||||
"@eslint/js": "^9.39.4",
|
||||
"@tailwindcss/vite": "^4.2.2",
|
||||
"@types/node": "^24.12.2",
|
||||
"@types/react": "^19.2.14",
|
||||
"@types/react-dom": "^19.2.3",
|
||||
"@vitejs/plugin-react": "^6.0.1",
|
||||
"eslint": "^9.39.4",
|
||||
"eslint-plugin-react-hooks": "^7.0.1",
|
||||
"eslint-plugin-react-refresh": "^0.5.2",
|
||||
"globals": "^17.4.0",
|
||||
"tailwindcss": "^4.2.2",
|
||||
"typescript": "~6.0.2",
|
||||
"typescript-eslint": "^8.58.0",
|
||||
"vite": "^8.0.4"
|
||||
}
|
||||
}
|
||||
1
frontend/public/favicon.svg
Normal file
|
After Width: | Height: | Size: 9.3 KiB |
24
frontend/public/icons.svg
Normal file
@@ -0,0 +1,24 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg">
|
||||
<symbol id="bluesky-icon" viewBox="0 0 16 17">
|
||||
<g clip-path="url(#bluesky-clip)"><path fill="#08060d" d="M7.75 7.735c-.693-1.348-2.58-3.86-4.334-5.097-1.68-1.187-2.32-.981-2.74-.79C.188 2.065.1 2.812.1 3.251s.241 3.602.398 4.13c.52 1.744 2.367 2.333 4.07 2.145-2.495.37-4.71 1.278-1.805 4.512 3.196 3.309 4.38-.71 4.987-2.746.608 2.036 1.307 5.91 4.93 2.746 2.72-2.746.747-4.143-1.747-4.512 1.702.189 3.55-.4 4.07-2.145.156-.528.397-3.691.397-4.13s-.088-1.186-.575-1.406c-.42-.19-1.06-.395-2.741.79-1.755 1.24-3.64 3.752-4.334 5.099"/></g>
|
||||
<defs><clipPath id="bluesky-clip"><path fill="#fff" d="M.1.85h15.3v15.3H.1z"/></clipPath></defs>
|
||||
</symbol>
|
||||
<symbol id="discord-icon" viewBox="0 0 20 19">
|
||||
<path fill="#08060d" d="M16.224 3.768a14.5 14.5 0 0 0-3.67-1.153c-.158.286-.343.67-.47.976a13.5 13.5 0 0 0-4.067 0c-.128-.306-.317-.69-.476-.976A14.4 14.4 0 0 0 3.868 3.77C1.546 7.28.916 10.703 1.231 14.077a14.7 14.7 0 0 0 4.5 2.306q.545-.748.965-1.587a9.5 9.5 0 0 1-1.518-.74q.191-.14.372-.293c2.927 1.369 6.107 1.369 8.999 0q.183.152.372.294-.723.437-1.52.74.418.838.963 1.588a14.6 14.6 0 0 0 4.504-2.308c.37-3.911-.63-7.302-2.644-10.309m-9.13 8.234c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.894 0 1.614.82 1.599 1.82.001 1-.705 1.82-1.6 1.82m5.91 0c-.878 0-1.599-.82-1.599-1.82 0-.998.705-1.82 1.6-1.82.893 0 1.614.82 1.599 1.82 0 1-.706 1.82-1.6 1.82"/>
|
||||
</symbol>
|
||||
<symbol id="documentation-icon" viewBox="0 0 21 20">
|
||||
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="m15.5 13.333 1.533 1.322c.645.555.967.833.967 1.178s-.322.623-.967 1.179L15.5 18.333m-3.333-5-1.534 1.322c-.644.555-.966.833-.966 1.178s.322.623.966 1.179l1.534 1.321"/>
|
||||
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M17.167 10.836v-4.32c0-1.41 0-2.117-.224-2.68-.359-.906-1.118-1.621-2.08-1.96-.599-.21-1.349-.21-2.848-.21-2.623 0-3.935 0-4.983.369-1.684.591-3.013 1.842-3.641 3.428C3 6.449 3 7.684 3 10.154v2.122c0 2.558 0 3.838.706 4.726q.306.383.713.671c.76.536 1.79.64 3.581.66"/>
|
||||
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M3 10a2.78 2.78 0 0 1 2.778-2.778c.555 0 1.209.097 1.748-.047.48-.129.854-.503.982-.982.145-.54.048-1.194.048-1.749a2.78 2.78 0 0 1 2.777-2.777"/>
|
||||
</symbol>
|
||||
<symbol id="github-icon" viewBox="0 0 19 19">
|
||||
<path fill="#08060d" fill-rule="evenodd" d="M9.356 1.85C5.05 1.85 1.57 5.356 1.57 9.694a7.84 7.84 0 0 0 5.324 7.44c.387.079.528-.168.528-.376 0-.182-.013-.805-.013-1.454-2.165.467-2.616-.935-2.616-.935-.349-.91-.864-1.143-.864-1.143-.71-.48.051-.48.051-.48.787.051 1.2.805 1.2.805.695 1.194 1.817.857 2.268.649.064-.507.27-.857.49-1.052-1.728-.182-3.545-.857-3.545-3.87 0-.857.31-1.558.8-2.104-.078-.195-.349-1 .077-2.078 0 0 .657-.208 2.14.805a7.5 7.5 0 0 1 1.946-.26c.657 0 1.328.092 1.946.26 1.483-1.013 2.14-.805 2.14-.805.426 1.078.155 1.883.078 2.078.502.546.799 1.247.799 2.104 0 3.013-1.818 3.675-3.558 3.87.284.247.528.714.528 1.454 0 1.052-.012 1.896-.012 2.156 0 .208.142.455.528.377a7.84 7.84 0 0 0 5.324-7.441c.013-4.338-3.48-7.844-7.773-7.844" clip-rule="evenodd"/>
|
||||
</symbol>
|
||||
<symbol id="social-icon" viewBox="0 0 20 20">
|
||||
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M12.5 6.667a4.167 4.167 0 1 0-8.334 0 4.167 4.167 0 0 0 8.334 0"/>
|
||||
<path fill="none" stroke="#aa3bff" stroke-linecap="round" stroke-linejoin="round" stroke-width="1.35" d="M2.5 16.667a5.833 5.833 0 0 1 8.75-5.053m3.837.474.513 1.035c.07.144.257.282.414.309l.93.155c.596.1.736.536.307.965l-.723.73a.64.64 0 0 0-.152.531l.207.903c.164.715-.213.991-.84.618l-.872-.52a.63.63 0 0 0-.577 0l-.872.52c-.624.373-1.003.094-.84-.618l.207-.903a.64.64 0 0 0-.152-.532l-.723-.729c-.426-.43-.289-.864.306-.964l.93-.156a.64.64 0 0 0 .412-.31l.513-1.034c.28-.562.735-.562 1.012 0"/>
|
||||
</symbol>
|
||||
<symbol id="x-icon" viewBox="0 0 19 19">
|
||||
<path fill="#08060d" fill-rule="evenodd" d="M1.893 1.98c.052.072 1.245 1.769 2.653 3.77l2.892 4.114c.183.261.333.48.333.486s-.068.089-.152.183l-.522.593-.765.867-3.597 4.087c-.375.426-.734.834-.798.905a1 1 0 0 0-.118.148c0 .01.236.017.664.017h.663l.729-.83c.4-.457.796-.906.879-.999a692 692 0 0 0 1.794-2.038c.034-.037.301-.34.594-.675l.551-.624.345-.392a7 7 0 0 1 .34-.374c.006 0 .93 1.306 2.052 2.903l2.084 2.965.045.063h2.275c1.87 0 2.273-.003 2.266-.021-.008-.02-1.098-1.572-3.894-5.547-2.013-2.862-2.28-3.246-2.273-3.266.008-.019.282-.332 2.085-2.38l2-2.274 1.567-1.782c.022-.028-.016-.03-.65-.03h-.674l-.3.342a871 871 0 0 1-1.782 2.025c-.067.075-.405.458-.75.852a100 100 0 0 1-.803.91c-.148.172-.299.344-.99 1.127-.304.343-.32.358-.345.327-.015-.019-.904-1.282-1.976-2.808L6.365 1.85H1.8zm1.782.91 8.078 11.294c.772 1.08 1.413 1.973 1.425 1.984.016.017.241.02 1.05.017l1.03-.004-2.694-3.766L7.796 5.75 5.722 2.852l-1.039-.004-1.039-.004z" clip-rule="evenodd"/>
|
||||
</symbol>
|
||||
</svg>
|
||||
|
After Width: | Height: | Size: 4.9 KiB |
1549
frontend/src/App.css
Normal file
710
frontend/src/App.tsx
Normal file
@@ -0,0 +1,710 @@
|
||||
import { useCallback, useMemo, useRef, useState } from 'react'
|
||||
import type { FormEvent, KeyboardEvent } from 'react'
|
||||
import './App.css'
|
||||
import { LogSidebar } from './components/LogSidebar'
|
||||
import { PdfPreview } from './components/PdfPreview'
|
||||
import type { BarcodeResult, FieldResult, ProcessResponse, ValidationStatus } from './types'
|
||||
import { MOCK_RESULT } from './mockData'
|
||||
|
||||
|
||||
const API_BASE_URL = import.meta.env.VITE_API_BASE_URL ?? ''
|
||||
const DEFAULT_AI_FILENAME = '【2026-04-09】端午 - 背标 - 天问.ai'
|
||||
const DEFAULT_WORD_FILENAME = '天问礼品粽【260331】.docx'
|
||||
|
||||
|
||||
/**
|
||||
* 表格渲染组件:
|
||||
* - 优先使用 MinerU 原始 HTML(保留 colspan/rowspan,适合营养成分表等复杂表格)
|
||||
* - 回退到 | 分隔文本解析(简单表格)
|
||||
*/
|
||||
function FieldTable({ text, tableHtml }: { text: string; tableHtml?: string | null }) {
|
||||
if (tableHtml) {
|
||||
return (
|
||||
<div
|
||||
className="field-table-wrap field-table-html"
|
||||
// HTML 来自自有 MinerU 解析结果,非用户输入,风险可控
|
||||
// eslint-disable-next-line react/no-danger
|
||||
dangerouslySetInnerHTML={{ __html: tableHtml }}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
// 回退:| 分隔文本 → React 表格
|
||||
const rows = text
|
||||
.split('\n')
|
||||
.map((r) => r.trim())
|
||||
.filter(Boolean)
|
||||
.map((r) => r.split('|'))
|
||||
if (rows.length === 0) return null
|
||||
const [header, ...body] = rows
|
||||
return (
|
||||
<div className="field-table-wrap">
|
||||
<table className="field-table">
|
||||
<thead>
|
||||
<tr>
|
||||
{header.map((cell, i) => (
|
||||
<th key={i}>{cell}</th>
|
||||
))}
|
||||
</tr>
|
||||
</thead>
|
||||
<tbody>
|
||||
{body.map((row, ri) => (
|
||||
<tr key={ri}>
|
||||
{row.map((cell, ci) => (
|
||||
<td key={ci}>{cell}</td>
|
||||
))}
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
/**
|
||||
* 检测依据渲染:
|
||||
* - GFM pipe 表格行(`| 能量 | 1004千焦 | ...`)→ HTML 表格
|
||||
* - 全角 | 分隔行 → HTML 表格
|
||||
* - block_type==='table' 且无法识别为表格结构时 → 回退到 tableHtml 渲染
|
||||
* - 普通文本 → <p>
|
||||
*/
|
||||
function ExcerptView({
|
||||
excerpt,
|
||||
reason,
|
||||
blockType,
|
||||
tableHtml,
|
||||
}: {
|
||||
excerpt: string | null
|
||||
reason: string
|
||||
blockType?: string
|
||||
tableHtml?: string | null
|
||||
}) {
|
||||
if (excerpt) {
|
||||
// ① GFM 半角 pipe 表格(pandoc Markdown 输出)
|
||||
const halfPipeRows = excerpt
|
||||
.split('\n')
|
||||
.map((r) => r.trim())
|
||||
.filter((r) => r.startsWith('|'))
|
||||
.filter((r) => !/^\|[\s\-:|\s]+\|$/.test(r))
|
||||
.map((r) =>
|
||||
r
|
||||
.replace(/^\|/, '')
|
||||
.replace(/\|$/, '')
|
||||
.split('|')
|
||||
.map((c) => c.trim()),
|
||||
)
|
||||
.filter((row) => row.length > 0 && row.some(Boolean))
|
||||
|
||||
if (halfPipeRows.length > 0) {
|
||||
return (
|
||||
<div className="field-table-wrap field-excerpt-table">
|
||||
<table className="field-table">
|
||||
<tbody>
|
||||
{halfPipeRows.map((row, ri) => (
|
||||
<tr key={ri}>
|
||||
{row.map((cell, ci) => (
|
||||
<td key={ci}>{cell || '—'}</td>
|
||||
))}
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ② 全角 | 分隔(python-docx 回退 / MinerU 表格文本)
|
||||
const fullPipeRows = excerpt
|
||||
.split('\n')
|
||||
.map((r) => r.trim())
|
||||
.filter((r) => r.includes('|'))
|
||||
.map((r) => r.split('|').map((c) => c.trim()))
|
||||
.filter((row) => row.length > 1 && row.some(Boolean))
|
||||
|
||||
if (fullPipeRows.length > 0) {
|
||||
return (
|
||||
<div className="field-table-wrap field-excerpt-table">
|
||||
<table className="field-table">
|
||||
<tbody>
|
||||
{fullPipeRows.map((row, ri) => (
|
||||
<tr key={ri}>
|
||||
{row.map((cell, ci) => (
|
||||
<td key={ci}>{cell || '—'}</td>
|
||||
))}
|
||||
</tr>
|
||||
))}
|
||||
</tbody>
|
||||
</table>
|
||||
</div>
|
||||
)
|
||||
}
|
||||
|
||||
// ③ 表格字段但摘要是纯文本 → 回退用字段自身的 tableHtml 作为参考
|
||||
if (blockType === 'table' && tableHtml) {
|
||||
return (
|
||||
<div
|
||||
className="field-table-wrap field-table-html field-excerpt-table"
|
||||
// eslint-disable-next-line react/no-danger
|
||||
dangerouslySetInnerHTML={{ __html: tableHtml }}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
return <p className="field-reason">{excerpt}</p>
|
||||
}
|
||||
|
||||
// 无摘要但有 tableHtml
|
||||
if (blockType === 'table' && tableHtml) {
|
||||
return (
|
||||
<div
|
||||
className="field-table-wrap field-table-html field-excerpt-table"
|
||||
// eslint-disable-next-line react/no-danger
|
||||
dangerouslySetInnerHTML={{ __html: tableHtml }}
|
||||
/>
|
||||
)
|
||||
}
|
||||
|
||||
return <p className="field-reason">{reason || '—'}</p>
|
||||
}
|
||||
|
||||
function statusLabel(status: FieldResult['validation_status']): string {
|
||||
if (status === 'matched') return '校验成功'
|
||||
if (status === 'empty_or_garbled') return '文本异常'
|
||||
return '校验失败'
|
||||
}
|
||||
|
||||
function statusIcon(status: FieldResult['validation_status']): string {
|
||||
if (status === 'matched') return '✓'
|
||||
if (status === 'empty_or_garbled') return '!'
|
||||
return '✗'
|
||||
}
|
||||
|
||||
function statusTone(status: FieldResult['validation_status']): string {
|
||||
if (status === 'matched') return '通过'
|
||||
if (status === 'empty_or_garbled') return '异常'
|
||||
return '失败'
|
||||
}
|
||||
|
||||
|
||||
function App() {
|
||||
const [aiFile, setAiFile] = useState<File | null>(null)
|
||||
const [wordFile, setWordFile] = useState<File | null>(null)
|
||||
const [result, setResult] = useState<ProcessResponse | null>(null)
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [hoveredFieldId, setHoveredFieldId] = useState<string | null>(null)
|
||||
const [selectedFieldId, setSelectedFieldId] = useState<string | null>(null)
|
||||
const [selectedBarcodeIndex, setSelectedBarcodeIndex] = useState<number | null>(null)
|
||||
const [sidebarTab, setSidebarTab] = useState<'fields' | 'word' | 'barcode'>('fields')
|
||||
const [sidebarWidth, setSidebarWidth] = useState(360)
|
||||
const isResizing = useRef(false)
|
||||
const fieldCardRefs = useRef<Map<string, HTMLDivElement>>(new Map())
|
||||
const barcodeCardRefs = useRef<Map<number, HTMLDivElement>>(new Map())
|
||||
|
||||
/** 点击统计卡片 → 跳到该类型第一个字段 */
|
||||
function jumpToFirstOfStatus(status: ValidationStatus | 'all') {
|
||||
if (!result) return
|
||||
const target =
|
||||
status === 'all'
|
||||
? result.fields[0]
|
||||
: result.fields.find((f) => f.validation_status === status)
|
||||
if (!target) return
|
||||
|
||||
setSidebarTab('fields')
|
||||
setSelectedFieldId(target.id)
|
||||
|
||||
requestAnimationFrame(() => {
|
||||
fieldCardRefs.current.get(target.id)?.scrollIntoView({
|
||||
behavior: 'smooth',
|
||||
block: 'nearest',
|
||||
})
|
||||
})
|
||||
}
|
||||
|
||||
/**
|
||||
* 右侧预览点击字段 overlay → 左侧切到字段 tab 并滚动到对应卡片。
|
||||
*/
|
||||
const handlePreviewFieldSelect = useCallback((fieldId: string) => {
|
||||
setSelectedFieldId(fieldId)
|
||||
setSidebarTab('fields')
|
||||
requestAnimationFrame(() => {
|
||||
fieldCardRefs.current.get(fieldId)?.scrollIntoView({
|
||||
behavior: 'smooth',
|
||||
block: 'nearest',
|
||||
})
|
||||
})
|
||||
}, [])
|
||||
|
||||
/**
|
||||
* 右侧预览点击条码 overlay → 切到条码 tab 并滚动到对应卡片。
|
||||
*/
|
||||
const handlePreviewBarcodeSelect = useCallback((index: number) => {
|
||||
setSelectedBarcodeIndex(index)
|
||||
setSidebarTab('barcode')
|
||||
requestAnimationFrame(() => {
|
||||
barcodeCardRefs.current.get(index)?.scrollIntoView({
|
||||
behavior: 'smooth',
|
||||
block: 'nearest',
|
||||
})
|
||||
})
|
||||
}, [])
|
||||
|
||||
const handleResizeStart = useCallback((e: React.MouseEvent) => {
|
||||
e.preventDefault()
|
||||
isResizing.current = true
|
||||
const startX = e.clientX
|
||||
const startWidth = sidebarWidth
|
||||
|
||||
function onMove(ev: MouseEvent) {
|
||||
if (!isResizing.current) return
|
||||
const next = Math.max(220, Math.min(640, startWidth + ev.clientX - startX))
|
||||
setSidebarWidth(next)
|
||||
}
|
||||
|
||||
function onUp() {
|
||||
isResizing.current = false
|
||||
document.body.style.cursor = ''
|
||||
document.body.style.userSelect = ''
|
||||
document.removeEventListener('mousemove', onMove)
|
||||
document.removeEventListener('mouseup', onUp)
|
||||
}
|
||||
|
||||
document.body.style.cursor = 'col-resize'
|
||||
document.body.style.userSelect = 'none'
|
||||
document.addEventListener('mousemove', onMove)
|
||||
document.addEventListener('mouseup', onUp)
|
||||
}, [sidebarWidth])
|
||||
|
||||
const summary = useMemo(() => {
|
||||
const fields = result?.fields ?? []
|
||||
return {
|
||||
total: fields.length,
|
||||
matched: fields.filter((f) => f.validation_status === 'matched').length,
|
||||
unmatched: fields.filter((f) => f.validation_status === 'unmatched').length,
|
||||
garbled: fields.filter((f) => f.validation_status === 'empty_or_garbled').length,
|
||||
}
|
||||
}, [result])
|
||||
|
||||
function handleFieldCardKeyDown(event: KeyboardEvent<HTMLDivElement>, fieldId: string) {
|
||||
if (event.key !== 'Enter' && event.key !== ' ') {
|
||||
return
|
||||
}
|
||||
|
||||
event.preventDefault()
|
||||
setSelectedFieldId(fieldId)
|
||||
}
|
||||
|
||||
async function handleSubmit(event: FormEvent<HTMLFormElement>) {
|
||||
event.preventDefault()
|
||||
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
|
||||
try {
|
||||
const requestInit: RequestInit = { method: 'POST' }
|
||||
|
||||
if (aiFile || wordFile) {
|
||||
const formData = new FormData()
|
||||
if (aiFile) {
|
||||
formData.append('ai_file', aiFile)
|
||||
}
|
||||
if (wordFile) {
|
||||
formData.append('word_file', wordFile)
|
||||
}
|
||||
requestInit.body = formData
|
||||
}
|
||||
|
||||
const response = await fetch(`${API_BASE_URL}/api/process`, requestInit)
|
||||
|
||||
const payload = await response.json()
|
||||
if (!response.ok) {
|
||||
throw new Error(payload.detail ?? '处理失败,请稍后重试。')
|
||||
}
|
||||
|
||||
setResult(payload)
|
||||
setSelectedFieldId(payload.fields[0]?.id ?? null)
|
||||
setHoveredFieldId(null)
|
||||
} catch (caughtError) {
|
||||
setResult(null)
|
||||
setSelectedFieldId(null)
|
||||
setHoveredFieldId(null)
|
||||
setError(caughtError instanceof Error ? caughtError.message : '发生未知错误')
|
||||
} finally {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
<LogSidebar apiBaseUrl={API_BASE_URL} />
|
||||
<main className="app-shell">
|
||||
{/* ── 顶部区(向下滚动后被 workspace 覆盖) ── */}
|
||||
<section className="top-panel">
|
||||
<div className="top-panel-intro">
|
||||
<div className="brand-icon" aria-hidden="true">🔍</div>
|
||||
<div>
|
||||
<p className="eyebrow">诸老大包装审核</p>
|
||||
<h1>智能合规审核系统</h1>
|
||||
<p className="hero-copy">
|
||||
上传 Illustrator 源文件与 Word 校对稿,系统会将设计文件转换为 PDF 后交给 MinerU 解析,
|
||||
再把识别出的版面文字与 Word 内容逐块比对,并在右侧预览中同步高亮定位。
|
||||
</p>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<form className="upload-panel" onSubmit={handleSubmit}>
|
||||
<label className="upload-field">
|
||||
<span>AI 设计文件</span>
|
||||
<div className={`upload-dropzone ${(aiFile ?? DEFAULT_AI_FILENAME) ? 'has-file' : ''}`}>
|
||||
<span className="upload-dropzone-icon">{aiFile ? '📄' : '🧪'}</span>
|
||||
<span className="upload-dropzone-text">
|
||||
{aiFile?.name ?? DEFAULT_AI_FILENAME}
|
||||
</span>
|
||||
<input
|
||||
type="file"
|
||||
accept=".ai,.pdf"
|
||||
onChange={(e) => setAiFile(e.target.files?.[0] ?? null)}
|
||||
/>
|
||||
</div>
|
||||
</label>
|
||||
|
||||
<label className="upload-field">
|
||||
<span>Word 校对稿</span>
|
||||
<div className={`upload-dropzone ${(wordFile ?? DEFAULT_WORD_FILENAME) ? 'has-file' : ''}`}>
|
||||
<span className="upload-dropzone-icon">{wordFile ? '📝' : '🧪'}</span>
|
||||
<span className="upload-dropzone-text">
|
||||
{wordFile?.name ?? DEFAULT_WORD_FILENAME}
|
||||
</span>
|
||||
<input
|
||||
type="file"
|
||||
accept=".docx"
|
||||
onChange={(e) => setWordFile(e.target.files?.[0] ?? null)}
|
||||
/>
|
||||
</div>
|
||||
</label>
|
||||
|
||||
<button className="primary-button" type="submit" disabled={loading}>
|
||||
{loading ? '解析中…' : '开始解析'}
|
||||
</button>
|
||||
</form>
|
||||
|
||||
{/* 仅开发环境显示:快速载入写死测试数据 */}
|
||||
{import.meta.env.DEV && (
|
||||
<button
|
||||
className="mock-button"
|
||||
type="button"
|
||||
onClick={() => {
|
||||
setResult(MOCK_RESULT)
|
||||
setSelectedFieldId(MOCK_RESULT.fields[0]?.id ?? null)
|
||||
setHoveredFieldId(null)
|
||||
setError(null)
|
||||
}}
|
||||
>
|
||||
🧪 载入测试数据
|
||||
</button>
|
||||
)}
|
||||
|
||||
{error ? <p className="error-banner">{error}</p> : null}
|
||||
</section>
|
||||
|
||||
{/* ── 工作区(sticky,滚动后固定为全屏) ── */}
|
||||
<section
|
||||
className="workspace"
|
||||
style={{ gridTemplateColumns: `${sidebarWidth}px 20px minmax(0, 1fr)` }}
|
||||
>
|
||||
<aside className="sidebar">
|
||||
<div className="sidebar-header">
|
||||
<div>
|
||||
<p className="section-kicker">字段列表</p>
|
||||
<h2>解析结果</h2>
|
||||
</div>
|
||||
{result ? (
|
||||
<div className="stats-grid">
|
||||
<div
|
||||
className="stat-card stat-card-clickable"
|
||||
role="button"
|
||||
tabIndex={0}
|
||||
title="跳到第一个字段"
|
||||
onClick={() => jumpToFirstOfStatus('all')}
|
||||
onKeyDown={(e) => (e.key === 'Enter' || e.key === ' ') && jumpToFirstOfStatus('all')}
|
||||
>
|
||||
<span>总数</span>
|
||||
<strong>{summary.total}</strong>
|
||||
</div>
|
||||
<div
|
||||
className={`stat-card matched${summary.matched > 0 ? ' stat-card-clickable' : ''}`}
|
||||
role={summary.matched > 0 ? 'button' : undefined}
|
||||
tabIndex={summary.matched > 0 ? 0 : undefined}
|
||||
title={summary.matched > 0 ? '跳到第一个通过项' : undefined}
|
||||
onClick={() => summary.matched > 0 && jumpToFirstOfStatus('matched')}
|
||||
onKeyDown={(e) => summary.matched > 0 && (e.key === 'Enter' || e.key === ' ') && jumpToFirstOfStatus('matched')}
|
||||
>
|
||||
<span>成功</span>
|
||||
<strong>{summary.matched}</strong>
|
||||
</div>
|
||||
<div
|
||||
className={`stat-card unmatched${summary.unmatched > 0 ? ' stat-card-clickable' : ''}`}
|
||||
role={summary.unmatched > 0 ? 'button' : undefined}
|
||||
tabIndex={summary.unmatched > 0 ? 0 : undefined}
|
||||
title={summary.unmatched > 0 ? '跳到第一个失败项' : undefined}
|
||||
onClick={() => summary.unmatched > 0 && jumpToFirstOfStatus('unmatched')}
|
||||
onKeyDown={(e) => summary.unmatched > 0 && (e.key === 'Enter' || e.key === ' ') && jumpToFirstOfStatus('unmatched')}
|
||||
>
|
||||
<span>失败</span>
|
||||
<strong>{summary.unmatched}</strong>
|
||||
</div>
|
||||
<div
|
||||
className={`stat-card garbled${summary.garbled > 0 ? ' stat-card-clickable' : ''}`}
|
||||
role={summary.garbled > 0 ? 'button' : undefined}
|
||||
tabIndex={summary.garbled > 0 ? 0 : undefined}
|
||||
title={summary.garbled > 0 ? '跳到第一个异常项' : undefined}
|
||||
onClick={() => summary.garbled > 0 && jumpToFirstOfStatus('empty_or_garbled')}
|
||||
onKeyDown={(e) => summary.garbled > 0 && (e.key === 'Enter' || e.key === ' ') && jumpToFirstOfStatus('empty_or_garbled')}
|
||||
>
|
||||
<span>异常</span>
|
||||
<strong>{summary.garbled}</strong>
|
||||
</div>
|
||||
</div>
|
||||
) : null}
|
||||
</div>
|
||||
|
||||
{/* ── Tab 切换 ── */}
|
||||
<div className="sidebar-tabs">
|
||||
<button
|
||||
className={`sidebar-tab${sidebarTab === 'fields' ? ' active' : ''}`}
|
||||
onClick={() => setSidebarTab('fields')}
|
||||
type="button"
|
||||
>
|
||||
字段解析结果
|
||||
</button>
|
||||
<button
|
||||
className={`sidebar-tab${sidebarTab === 'word' ? ' active' : ''}`}
|
||||
onClick={() => setSidebarTab('word')}
|
||||
type="button"
|
||||
>
|
||||
Word 解析结果
|
||||
</button>
|
||||
<button
|
||||
className={`sidebar-tab${sidebarTab === 'barcode' ? ' active' : ''}`}
|
||||
onClick={() => setSidebarTab('barcode')}
|
||||
type="button"
|
||||
>
|
||||
条形码识别
|
||||
</button>
|
||||
</div>
|
||||
|
||||
{/* ── Tab 内容 ── */}
|
||||
{sidebarTab === 'fields' ? (
|
||||
<div className="field-list">
|
||||
{result?.fields.length ? (
|
||||
result.fields.map((field) => {
|
||||
const isSelected = field.id === selectedFieldId
|
||||
const isHovered = field.id === hoveredFieldId
|
||||
|
||||
return (
|
||||
<div
|
||||
key={field.id}
|
||||
ref={(node) => {
|
||||
if (node) fieldCardRefs.current.set(field.id, node)
|
||||
else fieldCardRefs.current.delete(field.id)
|
||||
}}
|
||||
className={[
|
||||
'field-card',
|
||||
`status-${field.validation_status}`,
|
||||
isSelected ? 'selected' : '',
|
||||
isHovered ? 'hovered' : '',
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(' ')}
|
||||
role="button"
|
||||
tabIndex={0}
|
||||
aria-pressed={isSelected}
|
||||
onMouseEnter={() => setHoveredFieldId(field.id)}
|
||||
onMouseLeave={() =>
|
||||
setHoveredFieldId((cur) => (cur === field.id ? null : cur))
|
||||
}
|
||||
onClick={() => setSelectedFieldId(field.id)}
|
||||
onKeyDown={(event) => handleFieldCardKeyDown(event, field.id)}
|
||||
>
|
||||
<div className="field-card-header">
|
||||
<div className="field-card-title-group">
|
||||
<span className="field-label">审核项</span>
|
||||
<span className="field-id">{field.id}</span>
|
||||
</div>
|
||||
<span className={`pill pill-${field.validation_status}`}>
|
||||
{statusIcon(field.validation_status)} {statusTone(field.validation_status)}
|
||||
</span>
|
||||
</div>
|
||||
<div className="field-section">
|
||||
<span className="field-label">审核内容</span>
|
||||
{field.block_type === 'table' && (field.table_html || field.text.includes('|')) ? (
|
||||
<FieldTable text={field.text} tableHtml={field.table_html} />
|
||||
) : (
|
||||
<pre className="field-text">
|
||||
{field.text || field.normalized_text || '(版面文字为空)'}
|
||||
</pre>
|
||||
)}
|
||||
</div>
|
||||
<div className="field-section">
|
||||
<span className="field-label">检测依据</span>
|
||||
<ExcerptView
|
||||
excerpt={field.matched_excerpt ?? null}
|
||||
reason={field.validation_reason}
|
||||
blockType={field.block_type}
|
||||
tableHtml={field.table_html}
|
||||
/>
|
||||
{field.validation_status !== 'matched' && field.validation_reason && (
|
||||
<p className="field-reason-note">{field.validation_reason}</p>
|
||||
)}
|
||||
</div>
|
||||
<div className="field-footer">
|
||||
<div className="field-status-block">
|
||||
<span className="field-label">状态</span>
|
||||
<span className={`field-status-text status-text-${field.validation_status}`}>
|
||||
{statusLabel(field.validation_status)}
|
||||
</span>
|
||||
</div>
|
||||
<div className="field-meta">
|
||||
<span>第 {field.page} 页</span>
|
||||
{field.font_name ? <span>{field.font_name}</span> : null}
|
||||
{typeof field.font_size_pt === 'number' ? <span>{field.font_size_pt} pt</span> : null}
|
||||
{typeof field.font_height_mm === 'number' ? <span>{field.font_height_mm.toFixed(1)} mm</span> : null}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})
|
||||
) : (
|
||||
<div className="empty-panel">
|
||||
<span className="empty-icon">📋</span>
|
||||
<p>上传文件并解析后,这里将显示逐字段的校验结果。</p>
|
||||
<p>点击任意字段,右侧预览会自动聚焦放大对应区域。</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : sidebarTab === 'word' ? (
|
||||
<div className="field-list word-text-panel">
|
||||
{result?.word_html ? (
|
||||
/* pandoc HTML:保留 colspan/rowspan,直接注入 */
|
||||
<div
|
||||
className="word-html-wrap"
|
||||
// HTML 来自自有 pandoc 转换,非用户输入,风险可控
|
||||
// eslint-disable-next-line react/no-danger
|
||||
dangerouslySetInnerHTML={{ __html: result.word_html }}
|
||||
/>
|
||||
) : result?.word_text ? (
|
||||
/* 回退:纯文本逐行渲染 */
|
||||
result.word_text.split('\n').map((line, i) => {
|
||||
const cells = line.split('|')
|
||||
return cells.length > 1 ? (
|
||||
<div key={i} className="word-table-row">
|
||||
{cells.map((cell, j) => (
|
||||
<span key={j} className="word-table-cell">{cell}</span>
|
||||
))}
|
||||
</div>
|
||||
) : (
|
||||
<p key={i} className="word-text-line">{line}</p>
|
||||
)
|
||||
})
|
||||
) : (
|
||||
<div className="empty-panel">
|
||||
<span className="empty-icon">📝</span>
|
||||
<p>解析完成后,这里将显示从 Word 校对稿中提取的全部文本内容。</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
) : (
|
||||
<div className="field-list barcode-panel">
|
||||
{result?.barcodes?.length ? (
|
||||
result.barcodes.map((bc: BarcodeResult, i: number) => {
|
||||
const isSelected = i === selectedBarcodeIndex
|
||||
return (
|
||||
<div
|
||||
key={i}
|
||||
ref={(node) => {
|
||||
if (node) barcodeCardRefs.current.set(i, node)
|
||||
else barcodeCardRefs.current.delete(i)
|
||||
}}
|
||||
className={[
|
||||
'barcode-card',
|
||||
bc.valid ? '' : 'barcode-invalid',
|
||||
isSelected ? 'selected' : '',
|
||||
].filter(Boolean).join(' ')}
|
||||
role="button"
|
||||
tabIndex={0}
|
||||
onClick={() => setSelectedBarcodeIndex(i)}
|
||||
onKeyDown={(e) => (e.key === 'Enter' || e.key === ' ') && setSelectedBarcodeIndex(i)}
|
||||
>
|
||||
<div className="barcode-card-header">
|
||||
<span className="barcode-format-badge">{bc.format_label}</span>
|
||||
{!bc.valid && <span className="barcode-invalid-badge">识别可疑</span>}
|
||||
</div>
|
||||
{bc.crop_url && (
|
||||
<img
|
||||
src={`${API_BASE_URL}${bc.crop_url}`}
|
||||
alt={`${bc.format_label} 条码图片`}
|
||||
className="barcode-crop-img"
|
||||
/>
|
||||
)}
|
||||
<pre className="barcode-text">{bc.text}</pre>
|
||||
<div className="barcode-meta">
|
||||
<span>原始格式:{bc.format}</span>
|
||||
<span>位置:({bc.x0}, {bc.y0}) – ({bc.x1}, {bc.y1})</span>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})
|
||||
) : (
|
||||
<div className="empty-panel">
|
||||
<span className="empty-icon">🔲</span>
|
||||
<p>解析完成后,这里将显示在包装图中检测到的条形码和二维码内容。</p>
|
||||
<p>支持 EAN-13、Code 128、QR Code 等主流格式。</p>
|
||||
</div>
|
||||
)}
|
||||
</div>
|
||||
)}
|
||||
</aside>
|
||||
|
||||
{/* ── 拖拽分隔条 ── */}
|
||||
<div
|
||||
className="resize-handle"
|
||||
onMouseDown={handleResizeStart}
|
||||
role="separator"
|
||||
aria-orientation="vertical"
|
||||
aria-label="拖动调整面板宽度"
|
||||
/>
|
||||
|
||||
<section className="preview-panel">
|
||||
<div className="preview-header">
|
||||
<div>
|
||||
<p className="section-kicker">AI 预览</p>
|
||||
<h2>预览与高亮</h2>
|
||||
</div>
|
||||
</div>
|
||||
{result ? (
|
||||
<PdfPreview
|
||||
preview={result.preview}
|
||||
fields={result.fields}
|
||||
barcodes={result.barcodes}
|
||||
activeFieldId={selectedFieldId}
|
||||
hoveredFieldId={hoveredFieldId}
|
||||
activeBarcodeIndex={selectedBarcodeIndex}
|
||||
apiBaseUrl={API_BASE_URL}
|
||||
onFieldSelect={handlePreviewFieldSelect}
|
||||
onBarcodeSelect={handlePreviewBarcodeSelect}
|
||||
/>
|
||||
) : (
|
||||
<div className="empty-preview">
|
||||
<span className="empty-icon">🖼️</span>
|
||||
<p>AI 预览将在解析完成后显示于此处。</p>
|
||||
<p>悬停或点击左侧字段,对应区域会同步高亮并自动放大。</p>
|
||||
</div>
|
||||
)}
|
||||
</section>
|
||||
</section>
|
||||
</main>
|
||||
</>
|
||||
)
|
||||
}
|
||||
|
||||
export default App
|
||||
BIN
frontend/src/assets/hero.png
Normal file
|
After Width: | Height: | Size: 44 KiB |
1
frontend/src/assets/react.svg
Normal file
@@ -0,0 +1 @@
|
||||
<svg xmlns="http://www.w3.org/2000/svg" xmlns:xlink="http://www.w3.org/1999/xlink" aria-hidden="true" role="img" class="iconify iconify--logos" width="35.93" height="32" preserveAspectRatio="xMidYMid meet" viewBox="0 0 256 228"><path fill="#00D8FF" d="M210.483 73.824a171.49 171.49 0 0 0-8.24-2.597c.465-1.9.893-3.777 1.273-5.621c6.238-30.281 2.16-54.676-11.769-62.708c-13.355-7.7-35.196.329-57.254 19.526a171.23 171.23 0 0 0-6.375 5.848a155.866 155.866 0 0 0-4.241-3.917C100.759 3.829 77.587-4.822 63.673 3.233C50.33 10.957 46.379 33.89 51.995 62.588a170.974 170.974 0 0 0 1.892 8.48c-3.28.932-6.445 1.924-9.474 2.98C17.309 83.498 0 98.307 0 113.668c0 15.865 18.582 31.778 46.812 41.427a145.52 145.52 0 0 0 6.921 2.165a167.467 167.467 0 0 0-2.01 9.138c-5.354 28.2-1.173 50.591 12.134 58.266c13.744 7.926 36.812-.22 59.273-19.855a145.567 145.567 0 0 0 5.342-4.923a168.064 168.064 0 0 0 6.92 6.314c21.758 18.722 43.246 26.282 56.54 18.586c13.731-7.949 18.194-32.003 12.4-61.268a145.016 145.016 0 0 0-1.535-6.842c1.62-.48 3.21-.974 4.76-1.488c29.348-9.723 48.443-25.443 48.443-41.52c0-15.417-17.868-30.326-45.517-39.844Zm-6.365 70.984c-1.4.463-2.836.91-4.3 1.345c-3.24-10.257-7.612-21.163-12.963-32.432c5.106-11 9.31-21.767 12.459-31.957c2.619.758 5.16 1.557 7.61 2.4c23.69 8.156 38.14 20.213 38.14 29.504c0 9.896-15.606 22.743-40.946 31.14Zm-10.514 20.834c2.562 12.94 2.927 24.64 1.23 33.787c-1.524 8.219-4.59 13.698-8.382 15.893c-8.067 4.67-25.32-1.4-43.927-17.412a156.726 156.726 0 0 1-6.437-5.87c7.214-7.889 14.423-17.06 21.459-27.246c12.376-1.098 24.068-2.894 34.671-5.345a134.17 134.17 0 0 1 1.386 6.193ZM87.276 214.515c-7.882 2.783-14.16 2.863-17.955.675c-8.075-4.657-11.432-22.636-6.853-46.752a156.923 156.923 0 0 1 1.869-8.499c10.486 2.32 22.093 3.988 34.498 4.994c7.084 9.967 14.501 19.128 21.976 27.15a134.668 134.668 0 0 1-4.877 4.492c-9.933 8.682-19.886 14.842-28.658 17.94ZM50.35 144.747c-12.483-4.267-22.792-9.812-29.858-15.863c-6.35-5.437-9.555-10.836-9.555-15.216c0-9.322 13.897-21.212 37.076-29.293c2.813-.98 5.757-1.905 8.812-2.773c3.204 10.42 7.406 21.315 12.477 32.332c-5.137 11.18-9.399 22.249-12.634 32.792a134.718 134.718 0 0 1-6.318-1.979Zm12.378-84.26c-4.811-24.587-1.616-43.134 6.425-47.789c8.564-4.958 27.502 2.111 47.463 19.835a144.318 144.318 0 0 1 3.841 3.545c-7.438 7.987-14.787 17.08-21.808 26.988c-12.04 1.116-23.565 2.908-34.161 5.309a160.342 160.342 0 0 1-1.76-7.887Zm110.427 27.268a347.8 347.8 0 0 0-7.785-12.803c8.168 1.033 15.994 2.404 23.343 4.08c-2.206 7.072-4.956 14.465-8.193 22.045a381.151 381.151 0 0 0-7.365-13.322Zm-45.032-43.861c5.044 5.465 10.096 11.566 15.065 18.186a322.04 322.04 0 0 0-30.257-.006c4.974-6.559 10.069-12.652 15.192-18.18ZM82.802 87.83a323.167 323.167 0 0 0-7.227 13.238c-3.184-7.553-5.909-14.98-8.134-22.152c7.304-1.634 15.093-2.97 23.209-3.984a321.524 321.524 0 0 0-7.848 12.897Zm8.081 65.352c-8.385-.936-16.291-2.203-23.593-3.793c2.26-7.3 5.045-14.885 8.298-22.6a321.187 321.187 0 0 0 7.257 13.246c2.594 4.48 5.28 8.868 8.038 13.147Zm37.542 31.03c-5.184-5.592-10.354-11.779-15.403-18.433c4.902.192 9.899.29 14.978.29c5.218 0 10.376-.117 15.453-.343c-4.985 6.774-10.018 12.97-15.028 18.486Zm52.198-57.817c3.422 7.8 6.306 15.345 8.596 22.52c-7.422 1.694-15.436 3.058-23.88 4.071a382.417 382.417 0 0 0 7.859-13.026a347.403 347.403 0 0 0 7.425-13.565Zm-16.898 8.101a358.557 358.557 0 0 1-12.281 19.815a329.4 329.4 0 0 1-23.444.823c-7.967 0-15.716-.248-23.178-.732a310.202 310.202 0 0 1-12.513-19.846h.001a307.41 307.41 0 0 1-10.923-20.627a310.278 310.278 0 0 1 10.89-20.637l-.001.001a307.318 307.318 0 0 1 12.413-19.761c7.613-.576 15.42-.876 23.31-.876H128c7.926 0 15.743.303 23.354.883a329.357 329.357 0 0 1 12.335 19.695a358.489 358.489 0 0 1 11.036 20.54a329.472 329.472 0 0 1-11 20.722Zm22.56-122.124c8.572 4.944 11.906 24.881 6.52 51.026c-.344 1.668-.73 3.367-1.15 5.09c-10.622-2.452-22.155-4.275-34.23-5.408c-7.034-10.017-14.323-19.124-21.64-27.008a160.789 160.789 0 0 1 5.888-5.4c18.9-16.447 36.564-22.941 44.612-18.3ZM128 90.808c12.625 0 22.86 10.235 22.86 22.86s-10.235 22.86-22.86 22.86s-22.86-10.235-22.86-22.86s10.235-22.86 22.86-22.86Z"></path></svg>
|
||||
|
After Width: | Height: | Size: 4.0 KiB |
1
frontend/src/assets/vite.svg
Normal file
|
After Width: | Height: | Size: 8.5 KiB |
143
frontend/src/components/LogSidebar.tsx
Normal file
@@ -0,0 +1,143 @@
|
||||
import { useEffect, useRef, useState } from 'react'
|
||||
|
||||
type LogLevel = 'DEBUG' | 'INFO' | 'WARNING' | 'ERROR' | 'CRITICAL'
|
||||
|
||||
interface LogEntry {
|
||||
time: string
|
||||
level: LogLevel
|
||||
name: string
|
||||
msg: string
|
||||
}
|
||||
|
||||
interface Props {
|
||||
/** Base URL for the API, e.g. "" for proxied Vite dev server */
|
||||
apiBaseUrl?: string
|
||||
}
|
||||
|
||||
export function LogSidebar({ apiBaseUrl = '' }: Props) {
|
||||
const [open, setOpen] = useState(false)
|
||||
const [logs, setLogs] = useState<LogEntry[]>([])
|
||||
const [autoScroll, setAutoScroll] = useState(true)
|
||||
const [connected, setConnected] = useState(false)
|
||||
const bodyRef = useRef<HTMLDivElement>(null)
|
||||
const errorCount = logs.filter(l => l.level === 'ERROR' || l.level === 'CRITICAL').length
|
||||
|
||||
// Connect to SSE
|
||||
useEffect(() => {
|
||||
let source: EventSource | null = null
|
||||
let retryTimer: ReturnType<typeof setTimeout> | null = null
|
||||
|
||||
const connect = () => {
|
||||
source = new EventSource(`${apiBaseUrl}/api/logs/stream`)
|
||||
source.onopen = () => setConnected(true)
|
||||
source.onmessage = (e) => {
|
||||
const entry = JSON.parse(e.data) as LogEntry
|
||||
setLogs(prev => {
|
||||
const next = prev.length >= 400 ? prev.slice(-399) : prev
|
||||
return [...next, entry]
|
||||
})
|
||||
}
|
||||
source.onerror = () => {
|
||||
setConnected(false)
|
||||
source?.close()
|
||||
// Auto-reconnect after 3s
|
||||
retryTimer = setTimeout(connect, 3000)
|
||||
}
|
||||
}
|
||||
|
||||
connect()
|
||||
|
||||
return () => {
|
||||
retryTimer && clearTimeout(retryTimer)
|
||||
source?.close()
|
||||
}
|
||||
}, [apiBaseUrl])
|
||||
|
||||
// Auto-scroll to bottom
|
||||
useEffect(() => {
|
||||
if (autoScroll && bodyRef.current) {
|
||||
bodyRef.current.scrollTop = bodyRef.current.scrollHeight
|
||||
}
|
||||
}, [logs, autoScroll])
|
||||
|
||||
const handleScroll = (e: React.UIEvent<HTMLDivElement>) => {
|
||||
const el = e.currentTarget
|
||||
const atBottom = el.scrollHeight - el.scrollTop <= el.clientHeight + 32
|
||||
setAutoScroll(atBottom)
|
||||
}
|
||||
|
||||
return (
|
||||
<>
|
||||
{/* ── Toggle button, always pinned to left edge ── */}
|
||||
<button
|
||||
className={`log-toggle${open ? ' log-toggle--open' : ''}`}
|
||||
onClick={() => setOpen(v => !v)}
|
||||
title={open ? '收起日志面板' : '展开日志面板'}
|
||||
aria-label={open ? '收起日志面板' : '展开日志面板'}
|
||||
>
|
||||
<span className="log-toggle-arrow">{open ? '◀' : '▶'}</span>
|
||||
<span className={`log-toggle-dot${connected ? ' log-toggle-dot--on' : ''}`} />
|
||||
{!open && errorCount > 0 && (
|
||||
<span className="log-toggle-badge">{errorCount}</span>
|
||||
)}
|
||||
</button>
|
||||
|
||||
{/* ── Sidebar panel ── */}
|
||||
<aside className={`log-sidebar${open ? ' log-sidebar--open' : ''}`}>
|
||||
<header className="log-sidebar-header">
|
||||
<span className="log-sidebar-title">后端日志</span>
|
||||
<div className="log-sidebar-actions">
|
||||
<button
|
||||
className={`log-btn${autoScroll ? ' log-btn--active' : ''}`}
|
||||
onClick={() => {
|
||||
const newVal = !autoScroll
|
||||
setAutoScroll(newVal)
|
||||
if (newVal && bodyRef.current) {
|
||||
bodyRef.current.scrollTop = bodyRef.current.scrollHeight
|
||||
}
|
||||
}}
|
||||
title={autoScroll ? '暂停自动滚动' : '恢复自动滚动'}
|
||||
>
|
||||
{autoScroll ? '⬇ 跟随' : '⏸ 已暂停'}
|
||||
</button>
|
||||
<button
|
||||
className="log-btn"
|
||||
onClick={() => setLogs([])}
|
||||
title="清空日志"
|
||||
>
|
||||
清空
|
||||
</button>
|
||||
</div>
|
||||
</header>
|
||||
|
||||
<div
|
||||
className="log-body"
|
||||
ref={bodyRef}
|
||||
onScroll={handleScroll}
|
||||
>
|
||||
{logs.length === 0 ? (
|
||||
<div className="log-empty">等待日志…</div>
|
||||
) : (
|
||||
logs.map((entry, i) => (
|
||||
<div
|
||||
key={i}
|
||||
className={`log-entry log-entry--${entry.level.toLowerCase()}`}
|
||||
>
|
||||
<span className="log-time">{entry.time}</span>
|
||||
<span className="log-level">{entry.level}</span>
|
||||
<span className="log-name">{entry.name}</span>
|
||||
<span className="log-msg">{entry.msg}</span>
|
||||
</div>
|
||||
))
|
||||
)}
|
||||
</div>
|
||||
|
||||
{/* Connection status bar */}
|
||||
<div className={`log-status-bar${connected ? ' log-status-bar--on' : ''}`}>
|
||||
<span className="log-status-dot" />
|
||||
{connected ? '已连接' : '连接断开,重试中…'}
|
||||
</div>
|
||||
</aside>
|
||||
</>
|
||||
)
|
||||
}
|
||||
441
frontend/src/components/PdfPreview.tsx
Normal file
@@ -0,0 +1,441 @@
|
||||
import { useEffect, useMemo, useRef, useState } from 'react'
|
||||
import type { KeyboardEvent } from 'react'
|
||||
import { GlobalWorkerOptions, getDocument } from 'pdfjs-dist'
|
||||
import type { PDFDocumentProxy } from 'pdfjs-dist'
|
||||
import pdfWorkerUrl from 'pdfjs-dist/build/pdf.worker.min.mjs?url'
|
||||
import type { BarcodeResult, FieldResult, PreviewMeta } from '../types'
|
||||
import {
|
||||
getOverlayRect,
|
||||
getScrollTarget,
|
||||
getTargetZoom,
|
||||
type ContainerSize,
|
||||
type PageMetric,
|
||||
} from './pdfPreviewMath'
|
||||
|
||||
GlobalWorkerOptions.workerSrc = pdfWorkerUrl
|
||||
|
||||
|
||||
type PdfPreviewProps = {
|
||||
preview: PreviewMeta
|
||||
fields: FieldResult[]
|
||||
barcodes?: BarcodeResult[]
|
||||
activeFieldId: string | null
|
||||
hoveredFieldId: string | null
|
||||
activeBarcodeIndex?: number | null
|
||||
apiBaseUrl: string
|
||||
onFieldSelect: (fieldId: string) => void
|
||||
onBarcodeSelect?: (index: number) => void
|
||||
}
|
||||
|
||||
export function PdfPreview({
|
||||
preview,
|
||||
fields,
|
||||
barcodes,
|
||||
activeFieldId,
|
||||
hoveredFieldId,
|
||||
activeBarcodeIndex,
|
||||
apiBaseUrl,
|
||||
onFieldSelect,
|
||||
onBarcodeSelect,
|
||||
}: PdfPreviewProps) {
|
||||
const [zoom, setZoom] = useState(1)
|
||||
const [pdfDocument, setPdfDocument] = useState<PDFDocumentProxy | null>(null)
|
||||
const [pageNumbers, setPageNumbers] = useState<number[]>([])
|
||||
const [pageMetrics, setPageMetrics] = useState<Record<number, PageMetric>>({})
|
||||
const [containerSize, setContainerSize] = useState<ContainerSize>({ width: 0, height: 0 })
|
||||
const [loading, setLoading] = useState(false)
|
||||
const [error, setError] = useState<string | null>(null)
|
||||
const [isFieldZoomed, setIsFieldZoomed] = useState(false)
|
||||
const canvasRefs = useRef<Array<HTMLCanvasElement | null>>([])
|
||||
const pageWrapRefs = useRef<Array<HTMLDivElement | null>>([])
|
||||
const previewScrollRef = useRef<HTMLDivElement | null>(null)
|
||||
|
||||
function handleOverlayKeyDown(event: KeyboardEvent<HTMLDivElement>, fieldId: string) {
|
||||
if (event.key !== 'Enter' && event.key !== ' ') {
|
||||
return
|
||||
}
|
||||
|
||||
event.preventDefault()
|
||||
onFieldSelect(fieldId)
|
||||
}
|
||||
|
||||
// 用 ref 持有最新的 containerSize / fitScale,避免 activeFieldId effect 产生额外依赖
|
||||
const containerSizeRef = useRef(containerSize)
|
||||
containerSizeRef.current = containerSize
|
||||
|
||||
const previewUrl = useMemo(() => {
|
||||
if (/^https?:\/\//.test(preview.url)) {
|
||||
return preview.url
|
||||
}
|
||||
return `${apiBaseUrl}${preview.url}`
|
||||
}, [apiBaseUrl, preview.url])
|
||||
|
||||
const fitScale = useMemo(() => {
|
||||
if (!containerSize.width || !containerSize.height) {
|
||||
return 1
|
||||
}
|
||||
const usableWidth = Math.max(containerSize.width - 32, 320)
|
||||
const usableHeight = Math.max(containerSize.height - 56, 240)
|
||||
const widthScale = usableWidth / preview.pageWidthPt
|
||||
const heightScale = usableHeight / preview.pageHeightPt
|
||||
return Math.min(widthScale, heightScale)
|
||||
}, [containerSize.height, containerSize.width, preview.pageHeightPt, preview.pageWidthPt])
|
||||
|
||||
const fitScaleRef = useRef(fitScale)
|
||||
fitScaleRef.current = fitScale
|
||||
|
||||
const effectiveScale = useMemo(() => fitScale * zoom, [fitScale, zoom])
|
||||
|
||||
// 监听容器尺寸
|
||||
useEffect(() => {
|
||||
const container = previewScrollRef.current
|
||||
if (!container) {
|
||||
return
|
||||
}
|
||||
|
||||
const updateSize = () => {
|
||||
setContainerSize({
|
||||
width: container.clientWidth,
|
||||
height: container.clientHeight,
|
||||
})
|
||||
}
|
||||
|
||||
updateSize()
|
||||
const observer = new ResizeObserver(updateSize)
|
||||
observer.observe(container)
|
||||
|
||||
return () => {
|
||||
observer.disconnect()
|
||||
}
|
||||
}, [])
|
||||
|
||||
// ── PNG 模式:跳过 pdfjs,直接从图片尺寸计算 metrics ─────────────────────
|
||||
useEffect(() => {
|
||||
if (preview.type !== 'png') return
|
||||
setLoading(false)
|
||||
setError(null)
|
||||
setPageNumbers([1])
|
||||
// pageWidthPt / pageHeightPt 在 PNG 模式下存的是像素尺寸
|
||||
setPageMetrics({
|
||||
1: {
|
||||
width: preview.pageWidthPt * effectiveScale,
|
||||
height: preview.pageHeightPt * effectiveScale,
|
||||
},
|
||||
})
|
||||
}, [preview.type, preview.pageWidthPt, preview.pageHeightPt, effectiveScale])
|
||||
|
||||
// 加载 PDF 文档(PNG 模式跳过)
|
||||
useEffect(() => {
|
||||
if (preview.type === 'png') return
|
||||
|
||||
let disposed = false
|
||||
const loadingTask = getDocument(previewUrl)
|
||||
|
||||
async function loadPreview() {
|
||||
setLoading(true)
|
||||
setError(null)
|
||||
setPageMetrics({})
|
||||
try {
|
||||
const pdf = await loadingTask.promise
|
||||
if (disposed) {
|
||||
return
|
||||
}
|
||||
setPdfDocument(pdf)
|
||||
setPageNumbers(Array.from({ length: pdf.numPages }, (_, index) => index + 1))
|
||||
} catch (caughtError) {
|
||||
if (!disposed) {
|
||||
setError(caughtError instanceof Error ? caughtError.message : 'PDF 预览加载失败')
|
||||
}
|
||||
} finally {
|
||||
if (!disposed) {
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
void loadPreview()
|
||||
|
||||
return () => {
|
||||
disposed = true
|
||||
setPdfDocument(null)
|
||||
void loadingTask.destroy()
|
||||
}
|
||||
}, [preview.type, previewUrl])
|
||||
|
||||
// 渲染 PDF 页面(canvas;PNG 模式跳过)
|
||||
useEffect(() => {
|
||||
if (preview.type === 'png') return
|
||||
|
||||
let cancelled = false
|
||||
|
||||
async function renderPages() {
|
||||
if (!pdfDocument || pageNumbers.length === 0) {
|
||||
return
|
||||
}
|
||||
|
||||
const metrics: Record<number, PageMetric> = {}
|
||||
const outputScale = typeof window === 'undefined' ? 1 : Math.min(window.devicePixelRatio || 1, 2.5)
|
||||
|
||||
for (const pageNumber of pageNumbers) {
|
||||
const page = await pdfDocument.getPage(pageNumber)
|
||||
const viewport = page.getViewport({ scale: effectiveScale })
|
||||
const canvas = canvasRefs.current[pageNumber - 1]
|
||||
if (!canvas) {
|
||||
continue
|
||||
}
|
||||
|
||||
const context = canvas.getContext('2d')
|
||||
if (!context) {
|
||||
continue
|
||||
}
|
||||
|
||||
canvas.width = Math.floor(viewport.width * outputScale)
|
||||
canvas.height = Math.floor(viewport.height * outputScale)
|
||||
canvas.style.width = `${viewport.width}px`
|
||||
canvas.style.height = `${viewport.height}px`
|
||||
|
||||
await page.render({
|
||||
canvasContext: context,
|
||||
viewport,
|
||||
transform: outputScale === 1 ? undefined : [outputScale, 0, 0, outputScale, 0, 0],
|
||||
}).promise
|
||||
metrics[pageNumber] = { width: viewport.width, height: viewport.height }
|
||||
}
|
||||
|
||||
if (!cancelled) {
|
||||
setPageMetrics(metrics)
|
||||
setLoading(false)
|
||||
}
|
||||
}
|
||||
|
||||
void renderPages()
|
||||
|
||||
return () => {
|
||||
cancelled = true
|
||||
}
|
||||
}, [preview.type, effectiveScale, pageNumbers, pdfDocument])
|
||||
|
||||
// 点击字段时:自动计算最优缩放,然后滚动到目标页
|
||||
useEffect(() => {
|
||||
const activeField = fields.find((field) => field.id === activeFieldId)
|
||||
|
||||
if (!activeField) {
|
||||
// 取消选中 → 恢复适应页面
|
||||
setZoom(1)
|
||||
setIsFieldZoomed(false)
|
||||
return
|
||||
}
|
||||
|
||||
const { width: cw, height: ch } = containerSizeRef.current
|
||||
const fs = fitScaleRef.current
|
||||
|
||||
if (cw > 0 && ch > 0 && fs > 0) {
|
||||
const targetZoom = getTargetZoom(activeField, { width: cw, height: ch }, fs)
|
||||
setZoom(targetZoom)
|
||||
setIsFieldZoomed(targetZoom > 1.05)
|
||||
}
|
||||
}, [activeFieldId, fields])
|
||||
|
||||
useEffect(() => {
|
||||
const activeField = fields.find((field) => field.id === activeFieldId)
|
||||
if (!activeField) {
|
||||
return
|
||||
}
|
||||
|
||||
const container = previewScrollRef.current
|
||||
const pageWrap = pageWrapRefs.current[activeField.page - 1]
|
||||
const metric = pageMetrics[activeField.page]
|
||||
if (!container || !pageWrap || !metric) {
|
||||
return
|
||||
}
|
||||
|
||||
const rect = getOverlayRect(
|
||||
activeField,
|
||||
metric.width / preview.pageWidthPt,
|
||||
metric.height / preview.pageHeightPt,
|
||||
)
|
||||
|
||||
const timer = window.setTimeout(() => {
|
||||
const target = getScrollTarget({
|
||||
containerWidth: container.clientWidth,
|
||||
containerHeight: container.clientHeight,
|
||||
scrollWidth: container.scrollWidth,
|
||||
scrollHeight: container.scrollHeight,
|
||||
pageOffsetLeft: pageWrap.offsetLeft,
|
||||
pageOffsetTop: pageWrap.offsetTop,
|
||||
rect,
|
||||
})
|
||||
|
||||
container.scrollTo({
|
||||
left: target.left,
|
||||
top: target.top,
|
||||
behavior: 'smooth',
|
||||
})
|
||||
}, 120)
|
||||
|
||||
return () => window.clearTimeout(timer)
|
||||
}, [activeFieldId, fields, pageMetrics, preview.pageHeightPt, preview.pageWidthPt])
|
||||
|
||||
return (
|
||||
<>
|
||||
{/* 工具栏:通过 CSS absolute 贴到 .preview-panel 右上角 */}
|
||||
<div className="preview-toolbar">
|
||||
<div className="zoom-rail" role="group" aria-label="预览缩放">
|
||||
{isFieldZoomed && (
|
||||
<span className="zoom-field-indicator">已聚焦字段</span>
|
||||
)}
|
||||
<button
|
||||
className="toolbar-button toolbar-button-quiet"
|
||||
type="button"
|
||||
aria-label="缩小"
|
||||
onClick={() => {
|
||||
setZoom((v) => Math.max(0.6, v - 0.1))
|
||||
setIsFieldZoomed(false)
|
||||
}}
|
||||
>
|
||||
−
|
||||
</button>
|
||||
<button
|
||||
className="toolbar-button toolbar-button-ghost"
|
||||
type="button"
|
||||
onClick={() => {
|
||||
setZoom(1)
|
||||
setIsFieldZoomed(false)
|
||||
}}
|
||||
>
|
||||
适应页面
|
||||
</button>
|
||||
<span className="zoom-readout">{Math.round(effectiveScale * 100)}%</span>
|
||||
<button
|
||||
className="toolbar-button toolbar-button-quiet"
|
||||
type="button"
|
||||
aria-label="放大"
|
||||
onClick={() => {
|
||||
setZoom((v) => Math.min(3, v + 0.1))
|
||||
setIsFieldZoomed(false)
|
||||
}}
|
||||
>
|
||||
+
|
||||
</button>
|
||||
</div>
|
||||
</div>
|
||||
|
||||
<div className="pdf-preview-root">
|
||||
{error ? <p className="preview-error">{error}</p> : null}
|
||||
{loading ? <p className="preview-loading">PDF 正在渲染…</p> : null}
|
||||
|
||||
<div className="preview-scroll" ref={previewScrollRef}>
|
||||
{pageNumbers.map((pageNumber, index) => {
|
||||
const pageFields = fields.filter((field) => field.page === pageNumber)
|
||||
const metric = pageMetrics[pageNumber]
|
||||
const xScale = metric ? metric.width / preview.pageWidthPt : effectiveScale
|
||||
const yScale = metric ? metric.height / preview.pageHeightPt : effectiveScale
|
||||
|
||||
return (
|
||||
<div
|
||||
key={pageNumber}
|
||||
className="page-frame"
|
||||
>
|
||||
<div className="page-label">第 {pageNumber} 页</div>
|
||||
<div
|
||||
className="page-canvas-wrap"
|
||||
style={{ width: metric?.width ?? preview.pageWidthPt * effectiveScale }}
|
||||
ref={(node) => {
|
||||
pageWrapRefs.current[index] = node
|
||||
}}
|
||||
>
|
||||
{preview.type === 'png' ? (
|
||||
/* PNG 模式:直接显示裁剪后的图片 */
|
||||
<img
|
||||
src={previewUrl}
|
||||
alt="预览"
|
||||
style={{
|
||||
display: 'block',
|
||||
width: metric?.width ?? preview.pageWidthPt * effectiveScale,
|
||||
height: metric?.height ?? preview.pageHeightPt * effectiveScale,
|
||||
borderRadius: 8,
|
||||
}}
|
||||
draggable={false}
|
||||
/>
|
||||
) : (
|
||||
<canvas
|
||||
ref={(node) => {
|
||||
canvasRefs.current[index] = node
|
||||
}}
|
||||
/>
|
||||
)}
|
||||
<div className="overlay-layer">
|
||||
{pageFields.map((field) => {
|
||||
const isActive = field.id === activeFieldId
|
||||
const isHovered = field.id === hoveredFieldId
|
||||
const rect = getOverlayRect(field, xScale, yScale)
|
||||
|
||||
return (
|
||||
<div
|
||||
key={field.id}
|
||||
className={[
|
||||
'overlay-box',
|
||||
`overlay-${field.validation_status}`,
|
||||
field.block_type ? `overlay-block-${field.block_type}` : '',
|
||||
isActive ? 'active' : '',
|
||||
isHovered ? 'hovered' : '',
|
||||
]
|
||||
.filter(Boolean)
|
||||
.join(' ')}
|
||||
style={{
|
||||
left: rect.left,
|
||||
top: rect.top,
|
||||
width: rect.width,
|
||||
height: rect.height,
|
||||
['--overlay-stroke-width' as string]: `${rect.strokeWidth}px`,
|
||||
}}
|
||||
title={field.text}
|
||||
role="button"
|
||||
tabIndex={0}
|
||||
aria-label={field.text}
|
||||
aria-pressed={isActive}
|
||||
onClick={() => onFieldSelect(field.id)}
|
||||
onKeyDown={(event) => handleOverlayKeyDown(event, field.id)}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
|
||||
{/* 条码 overlay(仅渲染在第 1 页,坐标系与裁剪图一致) */}
|
||||
{pageNumber === 1 && barcodes?.map((bc, bi) => {
|
||||
const isActive = bi === activeBarcodeIndex
|
||||
return (
|
||||
<div
|
||||
key={`barcode-${bi}`}
|
||||
className={['overlay-box', 'overlay-barcode', isActive ? 'active' : ''].filter(Boolean).join(' ')}
|
||||
style={{
|
||||
left: bc.x0 * xScale,
|
||||
top: bc.y0 * yScale,
|
||||
width: (bc.x1 - bc.x0) * xScale,
|
||||
height: (bc.y1 - bc.y0) * yScale,
|
||||
['--overlay-stroke-width' as string]: '2px',
|
||||
}}
|
||||
title={`${bc.format_label}:${bc.text}`}
|
||||
role="button"
|
||||
tabIndex={0}
|
||||
aria-label={`${bc.format_label} ${bc.text}`}
|
||||
aria-pressed={isActive}
|
||||
onClick={() => onBarcodeSelect?.(bi)}
|
||||
onKeyDown={(e) => {
|
||||
if (e.key === 'Enter' || e.key === ' ') {
|
||||
e.preventDefault()
|
||||
onBarcodeSelect?.(bi)
|
||||
}
|
||||
}}
|
||||
/>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</div>
|
||||
)
|
||||
})}
|
||||
</div>
|
||||
</div>
|
||||
</>
|
||||
)
|
||||
}
|
||||
97
frontend/src/components/pdfPreviewMath.ts
Normal file
@@ -0,0 +1,97 @@
|
||||
import type { FieldResult } from '../types'
|
||||
|
||||
export type ContainerSize = {
|
||||
width: number
|
||||
height: number
|
||||
}
|
||||
|
||||
export type PageMetric = {
|
||||
width: number
|
||||
height: number
|
||||
}
|
||||
|
||||
export type OverlayRect = {
|
||||
left: number
|
||||
top: number
|
||||
width: number
|
||||
height: number
|
||||
strokeWidth: number
|
||||
}
|
||||
|
||||
export type ScrollTargetInput = {
|
||||
containerWidth: number
|
||||
containerHeight: number
|
||||
scrollWidth: number
|
||||
scrollHeight: number
|
||||
pageOffsetLeft: number
|
||||
pageOffsetTop: number
|
||||
rect: OverlayRect
|
||||
}
|
||||
|
||||
export type ScrollTarget = {
|
||||
left: number
|
||||
top: number
|
||||
}
|
||||
|
||||
export function clamp(value: number, min: number, max: number): number {
|
||||
if (max < min) {
|
||||
return min
|
||||
}
|
||||
return Math.min(Math.max(value, min), max)
|
||||
}
|
||||
|
||||
export function getOverlayRect(field: FieldResult, xScale: number, yScale: number): OverlayRect {
|
||||
const rawWidth = Math.max((field.x1_pt - field.x0_pt) * xScale, 0)
|
||||
const rawHeight = Math.max((field.bottom_pt - field.top_pt) * yScale, 0)
|
||||
const scaleFloor = Math.min(xScale, yScale)
|
||||
const minSize = scaleFloor < 0.45 ? 4 : scaleFloor < 0.8 ? 6 : 8
|
||||
const width = Math.max(rawWidth, minSize)
|
||||
const height = Math.max(rawHeight, minSize)
|
||||
const strokeWidth = scaleFloor < 0.5 ? 1 : scaleFloor < 1 ? 1.5 : 2
|
||||
|
||||
return {
|
||||
left: field.x0_pt * xScale - (width - rawWidth) / 2,
|
||||
top: field.top_pt * yScale - (height - rawHeight) / 2,
|
||||
width,
|
||||
height,
|
||||
strokeWidth,
|
||||
}
|
||||
}
|
||||
|
||||
export function getTargetZoom(
|
||||
field: FieldResult,
|
||||
containerSize: ContainerSize,
|
||||
fitScale: number,
|
||||
): number {
|
||||
const { width: containerWidth, height: containerHeight } = containerSize
|
||||
if (containerWidth <= 0 || containerHeight <= 0 || fitScale <= 0) {
|
||||
return 1
|
||||
}
|
||||
|
||||
const fieldWidth = Math.max(Math.abs(field.x1_pt - field.x0_pt), 24)
|
||||
const fieldHeight = Math.max(Math.abs(field.bottom_pt - field.top_pt), 24)
|
||||
const zoomWidth = (containerWidth * 0.45) / (fieldWidth * fitScale)
|
||||
const zoomHeight = (containerHeight * 0.45) / (fieldHeight * fitScale)
|
||||
|
||||
return clamp(Math.min(zoomWidth, zoomHeight), 1, 2.8)
|
||||
}
|
||||
|
||||
export function getScrollTarget(input: ScrollTargetInput): ScrollTarget {
|
||||
const {
|
||||
containerWidth,
|
||||
containerHeight,
|
||||
scrollWidth,
|
||||
scrollHeight,
|
||||
pageOffsetLeft,
|
||||
pageOffsetTop,
|
||||
rect,
|
||||
} = input
|
||||
|
||||
const centerLeft = pageOffsetLeft + rect.left + rect.width / 2
|
||||
const centerTop = pageOffsetTop + rect.top + rect.height / 2
|
||||
|
||||
return {
|
||||
left: clamp(centerLeft - containerWidth / 2, 0, scrollWidth - containerWidth),
|
||||
top: clamp(centerTop - containerHeight / 2, 0, scrollHeight - containerHeight),
|
||||
}
|
||||
}
|
||||
64
frontend/src/index.css
Normal file
@@ -0,0 +1,64 @@
|
||||
@import "tailwindcss";
|
||||
|
||||
:root {
|
||||
font-family:
|
||||
'IBM Plex Sans', 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei',
|
||||
sans-serif;
|
||||
line-height: 1.5;
|
||||
font-weight: 400;
|
||||
color: #4d3b2d;
|
||||
background:
|
||||
radial-gradient(circle at top left, rgba(255, 236, 214, 0.95), transparent 32%),
|
||||
radial-gradient(circle at top right, rgba(224, 210, 187, 0.72), transparent 28%),
|
||||
linear-gradient(180deg, #f6efe6 0%, #efe2d1 100%);
|
||||
background-attachment: fixed;
|
||||
font-synthesis: none;
|
||||
text-rendering: optimizeLegibility;
|
||||
-webkit-font-smoothing: antialiased;
|
||||
-moz-osx-font-smoothing: grayscale;
|
||||
}
|
||||
|
||||
* {
|
||||
box-sizing: border-box;
|
||||
}
|
||||
|
||||
html,
|
||||
body,
|
||||
#root {
|
||||
min-height: 100%;
|
||||
}
|
||||
|
||||
body {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
button,
|
||||
input,
|
||||
textarea,
|
||||
select {
|
||||
font: inherit;
|
||||
}
|
||||
|
||||
h1,
|
||||
h2,
|
||||
p {
|
||||
margin: 0;
|
||||
}
|
||||
|
||||
h1,
|
||||
h2 {
|
||||
font-family:
|
||||
'Avenir Next', 'PingFang SC', 'Hiragino Sans GB', 'Microsoft YaHei',
|
||||
sans-serif;
|
||||
color: #23160d;
|
||||
}
|
||||
|
||||
h1 {
|
||||
font-size: clamp(2rem, 3vw, 3.2rem);
|
||||
line-height: 1.05;
|
||||
max-width: 18ch;
|
||||
}
|
||||
|
||||
h2 {
|
||||
font-size: 1.25rem;
|
||||
}
|
||||
10
frontend/src/main.tsx
Normal file
@@ -0,0 +1,10 @@
|
||||
import { StrictMode } from 'react'
|
||||
import { createRoot } from 'react-dom/client'
|
||||
import './index.css'
|
||||
import App from './App.tsx'
|
||||
|
||||
createRoot(document.getElementById('root')!).render(
|
||||
<StrictMode>
|
||||
<App />
|
||||
</StrictMode>,
|
||||
)
|
||||
656
frontend/src/mockData.ts
Normal file
@@ -0,0 +1,656 @@
|
||||
import type { ProcessResponse } from './types'
|
||||
|
||||
export const MOCK_JOB_ID = '8db8b6393957487d974b8cdc043d2edc'
|
||||
|
||||
export const MOCK_RESULT: ProcessResponse = {
|
||||
"preview": {
|
||||
"type": "png",
|
||||
"url": "/api/files/8db8b6393957487d974b8cdc043d2edc/crop/cropped_label.png",
|
||||
"pageWidthPt": 1986,
|
||||
"pageHeightPt": 1026
|
||||
},
|
||||
"fields": [
|
||||
{
|
||||
"id": "field-000",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "过敏原信息:此产品含有肤质的谷物及其制品、大豆及其制品、蛋类及其制品、坚果及其果仁类制品。此生产线也加工含有甲壳纲类动物及其制品、鱼类及其制品、乳及乳制品、花生及其制品的食品。",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 173,
|
||||
"top_pt": 431,
|
||||
"x1_pt": 558,
|
||||
"bottom_pt": 451,
|
||||
"normalized_text": "过敏原信息:此产品含有肤质的谷物及其制品、大豆及其制品、蛋类及其制品、坚果及其果仁类制品。此生产线也加工含有甲壳纲类动物及其制品、鱼类及其制品、乳及乳制品、花生及其制品的食品。",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "过敏原信息:此产品含有肤质的谷物及其制品、大豆及其制品、蛋类及其制品、坚果及其果"
|
||||
},
|
||||
{
|
||||
"id": "field-001",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "生产日期/保质期到期日:见礼盒底面喷码处",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 450,
|
||||
"x1_pt": 262,
|
||||
"bottom_pt": 467,
|
||||
"normalized_text": "生产日期/保质期到期日:见礼盒底面喷码处",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "生产日期/保质期到期日:见礼盒底面喷码处"
|
||||
},
|
||||
{
|
||||
"id": "field-002",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "保质期:6个月",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 467,
|
||||
"x1_pt": 203,
|
||||
"bottom_pt": 483,
|
||||
"normalized_text": "保质期:6个月",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "保质期:6个月"
|
||||
},
|
||||
{
|
||||
"id": "field-003",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "注意:如发现真空包装袋破损或膨胀,请勿食用,并在保质期内及时向当地经销商调换。",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 483,
|
||||
"x1_pt": 344,
|
||||
"bottom_pt": 501,
|
||||
"normalized_text": "注意:如发现真空包装袋破损或膨胀,请勿食用,并在保质期内及时向当地经销商调换。",
|
||||
"validation_status": "unmatched",
|
||||
"validation_reason": "版面文字与 Word 校对稿存在差异,请人工核查",
|
||||
"matched_excerpt": null
|
||||
},
|
||||
{
|
||||
"id": "field-004",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "贮存条件:常温干燥通风处保存 ",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 501,
|
||||
"x1_pt": 237,
|
||||
"bottom_pt": 519,
|
||||
"normalized_text": "贮存条件:常温干燥通风处保存 ",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "贮存条件:常温干燥通风处保存 "
|
||||
},
|
||||
{
|
||||
"id": "field-005",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "委托单位:湖州诸老大实业股份有限公司地址:浙江省湖州市吴兴区高新区科创园A幢317室服务热线:400-603-1887",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 518,
|
||||
"x1_pt": 412,
|
||||
"bottom_pt": 536,
|
||||
"normalized_text": "委托单位:湖州诸老大实业股份有限公司地址:浙江省湖州市吴兴区高新区科创园A幢317室服务热线:400-603-1887",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "委托单位:湖州诸老大实业股份有限公司地址:浙江省湖州市吴兴区高新区科创园A幢31"
|
||||
},
|
||||
{
|
||||
"id": "field-006",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "【老大黑猪五花肉粽子/老大双蛋黄黑猪肉粽子/酱香黑猪肉粽子/花胶鸡火腿老汤粽子/黑松露味五花肉粽子/高汤五花肉粽子/经典洗沙粽子/玫瑰白玉洗沙粽子/黑芝麻核桃粽子/新会陈皮洗沙粽子】",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 536,
|
||||
"x1_pt": 562,
|
||||
"bottom_pt": 553,
|
||||
"normalized_text": "【老大黑猪五花肉粽子/老大双蛋黄黑猪肉粽子/酱香黑猪肉粽子/花胶鸡火腿老汤粽子/黑松露味五花肉粽子/高汤五花肉粽子/经典洗沙粽子/玫瑰白玉洗沙粽子/黑芝麻核桃粽子/新会陈皮洗沙粽子】",
|
||||
"validation_status": "empty_or_garbled",
|
||||
"validation_reason": "识别结果为空或包含乱码,无法有效校验",
|
||||
"matched_excerpt": null
|
||||
},
|
||||
{
|
||||
"id": "field-007",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "受委托单位:浙江诸老大供应链管理有限公司地址:浙江省嘉兴市海盐县望海街道顾家路5号产地:浙江省嘉兴市",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 553,
|
||||
"x1_pt": 399,
|
||||
"bottom_pt": 571,
|
||||
"normalized_text": "受委托单位:浙江诸老大供应链管理有限公司地址:浙江省嘉兴市海盐县望海街道顾家路5号产地:浙江省嘉兴市",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "受委托单位:浙江诸老大供应链管理有限公司地址:浙江省嘉兴市海盐县望海街道顾家路5"
|
||||
},
|
||||
{
|
||||
"id": "field-008",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "电话:0573-86981666食品生产许可证编号:SC11133042404806产品标准代号:GB/T46259",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 571,
|
||||
"x1_pt": 369,
|
||||
"bottom_pt": 589,
|
||||
"normalized_text": "电话:0573-86981666食品生产许可证编号:SC11133042404806产品标准代号:GB/T46259",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "电话:0573-86981666食品生产许可证编号:SC111330424048"
|
||||
},
|
||||
{
|
||||
"id": "field-009",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "【草木灰咸鸭蛋】",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 603,
|
||||
"x1_pt": 208,
|
||||
"bottom_pt": 621,
|
||||
"normalized_text": "【草木灰咸鸭蛋】",
|
||||
"validation_status": "unmatched",
|
||||
"validation_reason": "版面文字与 Word 校对稿存在差异,请人工核查",
|
||||
"matched_excerpt": null
|
||||
},
|
||||
{
|
||||
"id": "field-010",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "(受委托单位详见喷码处)",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 621,
|
||||
"x1_pt": 225,
|
||||
"bottom_pt": 639,
|
||||
"normalized_text": "(受委托单位详见喷码处)",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "(受委托单位详见喷码处)"
|
||||
},
|
||||
{
|
||||
"id": "field-011",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "(A)受委托单位:高邮市秦邮蛋品有限公司",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 639,
|
||||
"x1_pt": 258,
|
||||
"bottom_pt": 657,
|
||||
"normalized_text": "(A)受委托单位:高邮市秦邮蛋品有限公司",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "(A)受委托单位:高邮市秦邮蛋品有限公司"
|
||||
},
|
||||
{
|
||||
"id": "field-012",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "受委托单位地址:高邮城南经济新区兴区路5号",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 657,
|
||||
"x1_pt": 268,
|
||||
"bottom_pt": 674,
|
||||
"normalized_text": "受委托单位地址:高邮城南经济新区兴区路5号",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "受委托单位地址:高邮城南经济新区兴区路5号"
|
||||
},
|
||||
{
|
||||
"id": "field-013",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "产地:江苏省扬州市",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 674,
|
||||
"x1_pt": 214,
|
||||
"bottom_pt": 691,
|
||||
"normalized_text": "产地:江苏省扬州市",
|
||||
"validation_status": "unmatched",
|
||||
"validation_reason": "版面文字与 Word 校对稿存在差异,请人工核查",
|
||||
"matched_excerpt": null
|
||||
},
|
||||
{
|
||||
"id": "field-014",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "产品类型:真空包装熟咸鸭蛋 ",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 691,
|
||||
"x1_pt": 233,
|
||||
"bottom_pt": 709,
|
||||
"normalized_text": "产品类型:真空包装熟咸鸭蛋 ",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "产品类型:真空包装熟咸鸭蛋 "
|
||||
},
|
||||
{
|
||||
"id": "field-015",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "等级:奎级",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 709,
|
||||
"x1_pt": 196,
|
||||
"bottom_pt": 726,
|
||||
"normalized_text": "等级:奎级",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "等级:奎级"
|
||||
},
|
||||
{
|
||||
"id": "field-016",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "电话:400-8118-252 400-8289-800",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 726,
|
||||
"x1_pt": 247,
|
||||
"bottom_pt": 743,
|
||||
"normalized_text": "电话:400-8118-252 400-8289-800",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "电话:400-8118-252 400-8289-800"
|
||||
},
|
||||
{
|
||||
"id": "field-017",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "食品生产许可证编号:SC11932108400062",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 743,
|
||||
"x1_pt": 261,
|
||||
"bottom_pt": 761,
|
||||
"normalized_text": "食品生产许可证编号:SC11932108400062",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "食品生产许可证编号:SC11932108400062"
|
||||
},
|
||||
{
|
||||
"id": "field-018",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "产品标准代号:Q/QYDP0001S ",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 761,
|
||||
"x1_pt": 237,
|
||||
"bottom_pt": 778,
|
||||
"normalized_text": "产品标准代号:Q/QYDP0001S ",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "产品标准代号:Q/QYDP0001S "
|
||||
},
|
||||
{
|
||||
"id": "field-019",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "食用方法:本品为熟制品,开袋去壳即食。冬季加热出油后",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 777,
|
||||
"x1_pt": 287,
|
||||
"bottom_pt": 795,
|
||||
"normalized_text": "食用方法:本品为熟制品,开袋去壳即食。冬季加热出油后",
|
||||
"validation_status": "unmatched",
|
||||
"validation_reason": "版面文字与 Word 校对稿存在差异,请人工核查",
|
||||
"matched_excerpt": null
|
||||
},
|
||||
{
|
||||
"id": "field-020",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "食用味道更佳。蛋品易破碎,若发现异味或真空袋漏气、胀气,",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 795,
|
||||
"x1_pt": 293,
|
||||
"bottom_pt": 813,
|
||||
"normalized_text": "食用味道更佳。蛋品易破碎,若发现异味或真空袋漏气、胀气,",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "食用味道更佳。蛋品易破碎,若发现异味或真空袋漏气、胀气,"
|
||||
},
|
||||
{
|
||||
"id": "field-021",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "请勿食用。可与当地经销商调换",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 174,
|
||||
"top_pt": 812,
|
||||
"x1_pt": 237,
|
||||
"bottom_pt": 829,
|
||||
"normalized_text": "请勿食用。可与当地经销商调换",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "请勿食用。可与当地经销商调换"
|
||||
},
|
||||
{
|
||||
"id": "field-022",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "(B)受委托单位:高邮三宝食品有限公司",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 315,
|
||||
"top_pt": 608,
|
||||
"x1_pt": 393,
|
||||
"bottom_pt": 625,
|
||||
"normalized_text": "(B)受委托单位:高邮三宝食品有限公司",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "(B)受委托单位:高邮三宝食品有限公司"
|
||||
},
|
||||
{
|
||||
"id": "field-023",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "受委托单位地址:高邮市甘燥镇三郎庙路38号",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 315,
|
||||
"top_pt": 625,
|
||||
"x1_pt": 406,
|
||||
"bottom_pt": 643,
|
||||
"normalized_text": "受委托单位地址:高邮市甘燥镇三郎庙路38号",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "受委托单位地址:高邮市甘燥镇三郎庙路38号"
|
||||
},
|
||||
{
|
||||
"id": "field-024",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "产地:江苏省扬州市",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 315,
|
||||
"top_pt": 643,
|
||||
"x1_pt": 356,
|
||||
"bottom_pt": 660,
|
||||
"normalized_text": "产地:江苏省扬州市",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "产地:江苏省扬州市"
|
||||
},
|
||||
{
|
||||
"id": "field-025",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "产品类型:高邮咸鸭蛋软罐头(熟)",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 315,
|
||||
"top_pt": 660,
|
||||
"x1_pt": 382,
|
||||
"bottom_pt": 677,
|
||||
"normalized_text": "产品类型:高邮咸鸭蛋软罐头(熟)",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "产品类型:高邮咸鸭蛋软罐头(熟)"
|
||||
},
|
||||
{
|
||||
"id": "field-026",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "贮存条件:常温、阴凉干燥,通风处",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 315,
|
||||
"top_pt": 677,
|
||||
"x1_pt": 382,
|
||||
"bottom_pt": 695,
|
||||
"normalized_text": "贮存条件:常温、阴凉干燥,通风处",
|
||||
"validation_status": "unmatched",
|
||||
"validation_reason": "版面文字与 Word 校对稿存在差异,请人工核查",
|
||||
"matched_excerpt": null
|
||||
},
|
||||
{
|
||||
"id": "field-027",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "电话:400-690-2811",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 315,
|
||||
"top_pt": 695,
|
||||
"x1_pt": 357,
|
||||
"bottom_pt": 711,
|
||||
"normalized_text": "电话:400-690-2811",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "电话:400-690-2811"
|
||||
},
|
||||
{
|
||||
"id": "field-028",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "食品生产许可证编号:SC11932108401305",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 315,
|
||||
"top_pt": 711,
|
||||
"x1_pt": 402,
|
||||
"bottom_pt": 729,
|
||||
"normalized_text": "食品生产许可证编号:SC11932108401305",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "食品生产许可证编号:SC11932108401305"
|
||||
},
|
||||
{
|
||||
"id": "field-029",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "产品标准代号:GB/T19050",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 315,
|
||||
"top_pt": 729,
|
||||
"x1_pt": 371,
|
||||
"bottom_pt": 745,
|
||||
"normalized_text": "产品标准代号:GB/T19050",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "产品标准代号:GB/T19050"
|
||||
},
|
||||
{
|
||||
"id": "field-030",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "食用方法:去壳即食,冬季加温出油更佳,如出现真",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 315,
|
||||
"top_pt": 745,
|
||||
"x1_pt": 414,
|
||||
"bottom_pt": 763,
|
||||
"normalized_text": "食用方法:去壳即食,冬季加温出油更佳,如出现真",
|
||||
"validation_status": "empty_or_garbled",
|
||||
"validation_reason": "识别结果为空或包含乱码,无法有效校验",
|
||||
"matched_excerpt": null
|
||||
},
|
||||
{
|
||||
"id": "field-031",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "空咸鸭蛋漏气、胀袋、变质请勿食用",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 315,
|
||||
"top_pt": 763,
|
||||
"x1_pt": 385,
|
||||
"bottom_pt": 781,
|
||||
"normalized_text": "空咸鸭蛋漏气、胀袋、变质请勿食用",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "空咸鸭蛋漏气、胀袋、变质请勿食用"
|
||||
},
|
||||
{
|
||||
"id": "field-032",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "等级:叁级",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 315,
|
||||
"top_pt": 781,
|
||||
"x1_pt": 338,
|
||||
"bottom_pt": 799,
|
||||
"normalized_text": "等级:叁级",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "等级:叁级"
|
||||
},
|
||||
{
|
||||
"id": "field-033",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "粽子食用方法:水煮加热法",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 446,
|
||||
"top_pt": 589,
|
||||
"x1_pt": 508,
|
||||
"bottom_pt": 606,
|
||||
"normalized_text": "粽子食用方法:水煮加热法",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "粽子食用方法:水煮加热法"
|
||||
},
|
||||
{
|
||||
"id": "field-034",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "生产日期:",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 603,
|
||||
"top_pt": 768,
|
||||
"x1_pt": 642,
|
||||
"bottom_pt": 801,
|
||||
"normalized_text": "生产日期:",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "生产日期:"
|
||||
},
|
||||
{
|
||||
"id": "field-035",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "保质期到期日:",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 604,
|
||||
"top_pt": 802,
|
||||
"x1_pt": 659,
|
||||
"bottom_pt": 835,
|
||||
"normalized_text": "保质期到期日:",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "文字内容与 Word 稿一致",
|
||||
"matched_excerpt": "保质期到期日:"
|
||||
},
|
||||
{
|
||||
"id": "field-036",
|
||||
"page": 1,
|
||||
"block_type": "text",
|
||||
"text": "食品名称:卜居礼品粽(粽子/草木灰咸鸭蛋/低糖原味绿豆糕) 净含量:1.73千克(粽子:170克 \\times 3+130克 \\times 5, 草木灰咸鸭蛋:70克 \\times 6, 低糖原味绿豆糕:25克 \\times 6)",
|
||||
"font_name": "SourceHanSansCN-Regular",
|
||||
"font_size_pt": 8.0,
|
||||
"font_height_mm": 2.8,
|
||||
"x0_pt": 175,
|
||||
"top_pt": 862,
|
||||
"x1_pt": 878,
|
||||
"bottom_pt": 909,
|
||||
"normalized_text": "食品名称:卜居礼品粽(粽子/草木灰咸鸭蛋/低糖原味绿豆糕) 净含量:1.73千克(粽子:170克 \\times 3+130克 \\times 5, 草木灰咸鸭蛋:70克 \\times 6, 低糖原味绿豆糕:25克 \\times 6)",
|
||||
"validation_status": "unmatched",
|
||||
"validation_reason": "版面文字与 Word 校对稿存在差异,请人工核查",
|
||||
"matched_excerpt": null
|
||||
}
|
||||
],
|
||||
"word_text": "诸老大粽子配料表:糯米、猪肉(黑猪)、酱油、食盐、白砂糖、味精。",
|
||||
"barcodes": [
|
||||
{
|
||||
"format": "EAN_13",
|
||||
"format_label": "EAN-13",
|
||||
"text": "6901234567890",
|
||||
"x0": 820,
|
||||
"y0": 180,
|
||||
"x1": 900,
|
||||
"y1": 280,
|
||||
"valid": true
|
||||
}
|
||||
]
|
||||
}
|
||||
69
frontend/src/types.ts
Normal file
@@ -0,0 +1,69 @@
|
||||
export type ValidationStatus = 'matched' | 'unmatched' | 'empty_or_garbled'
|
||||
|
||||
/** MinerU block types (non-exhaustive; extra values from newer models are allowed). */
|
||||
export type BlockType =
|
||||
| 'text'
|
||||
| 'para'
|
||||
| 'title'
|
||||
| 'table'
|
||||
| 'figure'
|
||||
| 'figure_caption'
|
||||
| 'table_caption'
|
||||
| 'equation'
|
||||
| 'interline_equation'
|
||||
| 'list'
|
||||
| (string & {}) // allow unknown future values
|
||||
|
||||
export type PreviewMeta = {
|
||||
/**
|
||||
* 'pdf' – rendered via pdfjs (canvas per page)
|
||||
* 'png' – rendered as <img> (pageWidthPt/pageHeightPt hold pixel dimensions)
|
||||
* 'svg' – reserved
|
||||
*/
|
||||
type: 'pdf' | 'png' | 'svg'
|
||||
url: string
|
||||
pageWidthPt: number
|
||||
pageHeightPt: number
|
||||
}
|
||||
|
||||
export type FieldResult = {
|
||||
id: string
|
||||
page: number
|
||||
block_type?: BlockType
|
||||
text: string
|
||||
/** MinerU 原始表格 HTML(含 colspan/rowspan),仅 block_type==='table' 时存在 */
|
||||
table_html?: string | null
|
||||
font_name?: string | null
|
||||
font_size_pt?: number | null
|
||||
font_height_mm?: number | null
|
||||
x0_pt: number
|
||||
top_pt: number
|
||||
x1_pt: number
|
||||
bottom_pt: number
|
||||
normalized_text: string
|
||||
validation_status: ValidationStatus
|
||||
validation_reason: string
|
||||
matched_excerpt: string | null
|
||||
}
|
||||
|
||||
export type BarcodeResult = {
|
||||
format: string
|
||||
format_label: string
|
||||
text: string
|
||||
x0: number
|
||||
y0: number
|
||||
x1: number
|
||||
y1: number
|
||||
valid: boolean
|
||||
/** 后端裁剪出的条码图片相对 URL,用于条形码面板预览 */
|
||||
crop_url?: string
|
||||
}
|
||||
|
||||
export type ProcessResponse = {
|
||||
preview: PreviewMeta
|
||||
fields: FieldResult[]
|
||||
word_text?: string
|
||||
/** pandoc 转换的 HTML 片段,保留 colspan/rowspan,用于 Word 解析结果 tab 展示 */
|
||||
word_html?: string | null
|
||||
barcodes?: BarcodeResult[]
|
||||
}
|
||||
84
frontend/tests/pdfPreviewMath.test.ts
Normal file
@@ -0,0 +1,84 @@
|
||||
import assert from 'node:assert/strict'
|
||||
import test from 'node:test'
|
||||
import type { FieldResult } from '../src/types'
|
||||
import { getOverlayRect, getScrollTarget, getTargetZoom } from '../src/components/pdfPreviewMath'
|
||||
|
||||
const field: FieldResult = {
|
||||
id: 'field-1',
|
||||
page: 1,
|
||||
text: '食品名称:天问礼品粽',
|
||||
font_name: 'Mock Font',
|
||||
font_size_pt: 12,
|
||||
font_height_mm: 4,
|
||||
x0_pt: 100,
|
||||
top_pt: 80,
|
||||
x1_pt: 180,
|
||||
bottom_pt: 116,
|
||||
normalized_text: '食品名称天问礼品粽',
|
||||
validation_status: 'matched',
|
||||
validation_reason: 'matched',
|
||||
matched_excerpt: '食品名称:天问礼品粽',
|
||||
}
|
||||
|
||||
test('getOverlayRect keeps tiny fields visible without drifting away from center', () => {
|
||||
const rect = getOverlayRect(
|
||||
{
|
||||
...field,
|
||||
x1_pt: 101,
|
||||
bottom_pt: 81,
|
||||
},
|
||||
0.4,
|
||||
0.4,
|
||||
)
|
||||
|
||||
assert.equal(rect.width, 4)
|
||||
assert.equal(rect.height, 4)
|
||||
assert.equal(rect.strokeWidth, 1)
|
||||
assert.equal(rect.left, 38.2)
|
||||
assert.equal(rect.top, 30.2)
|
||||
})
|
||||
|
||||
test('getTargetZoom caps focus zoom and keeps normal fit as the floor', () => {
|
||||
assert.equal(
|
||||
getTargetZoom(field, { width: 1200, height: 900 }, 0.5),
|
||||
2.8,
|
||||
)
|
||||
|
||||
assert.equal(
|
||||
getTargetZoom(field, { width: 0, height: 900 }, 0.5),
|
||||
1,
|
||||
)
|
||||
})
|
||||
|
||||
test('getScrollTarget centers the active field and clamps to scroll bounds', () => {
|
||||
const rect = getOverlayRect(field, 2, 2)
|
||||
const centered = getScrollTarget({
|
||||
containerWidth: 800,
|
||||
containerHeight: 600,
|
||||
scrollWidth: 2200,
|
||||
scrollHeight: 1800,
|
||||
pageOffsetLeft: 260,
|
||||
pageOffsetTop: 420,
|
||||
rect,
|
||||
})
|
||||
|
||||
assert.deepEqual(centered, {
|
||||
left: 140,
|
||||
top: 316,
|
||||
})
|
||||
|
||||
const clamped = getScrollTarget({
|
||||
containerWidth: 800,
|
||||
containerHeight: 600,
|
||||
scrollWidth: 1200,
|
||||
scrollHeight: 900,
|
||||
pageOffsetLeft: 20,
|
||||
pageOffsetTop: 30,
|
||||
rect,
|
||||
})
|
||||
|
||||
assert.deepEqual(clamped, {
|
||||
left: 0,
|
||||
top: 0,
|
||||
})
|
||||
})
|
||||
25
frontend/tsconfig.app.json
Normal file
@@ -0,0 +1,25 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.app.tsbuildinfo",
|
||||
"target": "es2023",
|
||||
"lib": ["ES2023", "DOM", "DOM.Iterable"],
|
||||
"module": "esnext",
|
||||
"types": ["vite/client"],
|
||||
"skipLibCheck": true,
|
||||
|
||||
/* Bundler mode */
|
||||
"moduleResolution": "bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"moduleDetection": "force",
|
||||
"noEmit": true,
|
||||
"jsx": "react-jsx",
|
||||
|
||||
/* Linting */
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"erasableSyntaxOnly": true,
|
||||
"noFallthroughCasesInSwitch": true
|
||||
},
|
||||
"include": ["src"]
|
||||
}
|
||||
7
frontend/tsconfig.json
Normal file
@@ -0,0 +1,7 @@
|
||||
{
|
||||
"files": [],
|
||||
"references": [
|
||||
{ "path": "./tsconfig.app.json" },
|
||||
{ "path": "./tsconfig.node.json" }
|
||||
]
|
||||
}
|
||||
24
frontend/tsconfig.node.json
Normal file
@@ -0,0 +1,24 @@
|
||||
{
|
||||
"compilerOptions": {
|
||||
"tsBuildInfoFile": "./node_modules/.tmp/tsconfig.node.tsbuildinfo",
|
||||
"target": "es2023",
|
||||
"lib": ["ES2023"],
|
||||
"module": "esnext",
|
||||
"types": ["node"],
|
||||
"skipLibCheck": true,
|
||||
|
||||
/* Bundler mode */
|
||||
"moduleResolution": "bundler",
|
||||
"allowImportingTsExtensions": true,
|
||||
"verbatimModuleSyntax": true,
|
||||
"moduleDetection": "force",
|
||||
"noEmit": true,
|
||||
|
||||
/* Linting */
|
||||
"noUnusedLocals": true,
|
||||
"noUnusedParameters": true,
|
||||
"erasableSyntaxOnly": true,
|
||||
"noFallthroughCasesInSwitch": true
|
||||
},
|
||||
"include": ["vite.config.ts"]
|
||||
}
|
||||
13
frontend/vite.config.ts
Normal file
@@ -0,0 +1,13 @@
|
||||
import { defineConfig } from 'vite'
|
||||
import react from '@vitejs/plugin-react'
|
||||
import tailwindcss from '@tailwindcss/vite'
|
||||
|
||||
// https://vite.dev/config/
|
||||
export default defineConfig({
|
||||
plugins: [tailwindcss(), react()],
|
||||
server: {
|
||||
proxy: {
|
||||
'/api': 'http://127.0.0.1:8010',
|
||||
},
|
||||
},
|
||||
})
|
||||
BIN
full_view.png
Normal file
|
After Width: | Height: | Size: 187 KiB |
104
parse_ai.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def detect_kind(path: Path) -> str:
|
||||
with path.open("rb") as f:
|
||||
head = f.read(4096)
|
||||
if b"%PDF-" in head or b"%%PDF" in head:
|
||||
return "pdf-compatible-ai"
|
||||
return "eps-like-ai"
|
||||
|
||||
|
||||
def parse_pdf_ai(path: Path) -> dict:
|
||||
from pypdf import PdfReader
|
||||
|
||||
reader = PdfReader(str(path))
|
||||
page_sizes = []
|
||||
image_count = 0
|
||||
texts = []
|
||||
|
||||
for page in reader.pages:
|
||||
box = page.mediabox
|
||||
page_sizes.append(
|
||||
{
|
||||
"width_pt": round(float(box.width), 2),
|
||||
"height_pt": round(float(box.height), 2),
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
text = (page.extract_text() or "").strip()
|
||||
if text:
|
||||
texts.append(text[:2000])
|
||||
except Exception as exc:
|
||||
texts.append(f"[extract_text failed: {type(exc).__name__}]")
|
||||
|
||||
try:
|
||||
images = getattr(page, "images", [])
|
||||
image_count += len(list(images))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
metadata = {}
|
||||
for key, value in (reader.metadata or {}).items():
|
||||
metadata[str(key)] = str(value)
|
||||
|
||||
return {
|
||||
"kind": "pdf-compatible-ai",
|
||||
"pages": len(reader.pages),
|
||||
"page_sizes": page_sizes,
|
||||
"metadata": metadata,
|
||||
"image_count": image_count,
|
||||
"text_samples": texts,
|
||||
}
|
||||
|
||||
|
||||
def parse_eps_like_ai(path: Path) -> dict:
|
||||
raw = path.read_bytes()
|
||||
text = raw.decode("latin1", errors="ignore")
|
||||
|
||||
bbox = re.search(r"%%BoundingBox:\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)", text)
|
||||
strings = re.findall(r"\(([^()\r\n]{2,200})\)", text)
|
||||
|
||||
return {
|
||||
"kind": "eps-like-ai",
|
||||
"bounding_box": tuple(map(int, bbox.groups())) if bbox else None,
|
||||
"text_samples": strings[:50],
|
||||
}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Parse basic info from an Adobe Illustrator .ai file.")
|
||||
parser.add_argument("file", type=Path, help="Path to the .ai file")
|
||||
parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output")
|
||||
args = parser.parse_args()
|
||||
|
||||
path = args.file.expanduser().resolve()
|
||||
if not path.exists():
|
||||
raise SystemExit(f"File not found: {path}")
|
||||
|
||||
kind = detect_kind(path)
|
||||
result = {
|
||||
"file": str(path),
|
||||
"size_bytes": path.stat().st_size,
|
||||
}
|
||||
|
||||
if kind == "pdf-compatible-ai":
|
||||
result.update(parse_pdf_ai(path))
|
||||
else:
|
||||
result.update(parse_eps_like_ai(path))
|
||||
|
||||
if args.pretty:
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
else:
|
||||
print(json.dumps(result, ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
11
requirements.txt
Normal file
@@ -0,0 +1,11 @@
|
||||
fastapi
|
||||
uvicorn
|
||||
python-docx
|
||||
pypdf
|
||||
Pillow
|
||||
python-multipart
|
||||
pytest
|
||||
numpy
|
||||
opencv-python
|
||||
zxing-cpp
|
||||
requests
|
||||
108
scripts/detect_layout_boxes.py
Normal file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from backend.app.layout_cv import process_image
|
||||
|
||||
|
||||
def detect_kind(path: Path) -> str:
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tif", ".tiff"}:
|
||||
return "image"
|
||||
if suffix == ".pdf":
|
||||
return "pdf"
|
||||
if suffix == ".ai":
|
||||
with path.open("rb") as file:
|
||||
head = file.read(4096)
|
||||
if b"%PDF-" in head or b"%%PDF" in head:
|
||||
return "pdf-compatible-ai"
|
||||
return "eps-like-ai"
|
||||
raise ValueError(f"Unsupported input type: {path.suffix}")
|
||||
|
||||
|
||||
def ensure_raster_image(source: Path, workdir: Path) -> Path:
|
||||
kind = detect_kind(source)
|
||||
if kind == "image":
|
||||
return source
|
||||
|
||||
pdf_path = source
|
||||
if kind in {"pdf-compatible-ai", "eps-like-ai"}:
|
||||
converted_dir = workdir / "converted"
|
||||
converted_dir.mkdir(parents=True, exist_ok=True)
|
||||
command = [
|
||||
"soffice",
|
||||
"--headless",
|
||||
"--convert-to",
|
||||
"pdf",
|
||||
"--outdir",
|
||||
str(converted_dir),
|
||||
str(source),
|
||||
]
|
||||
completed = subprocess.run(command, capture_output=True, text=True, check=False)
|
||||
if completed.returncode != 0:
|
||||
raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to convert AI to PDF")
|
||||
pdf_path = converted_dir / f"{source.stem}.pdf"
|
||||
|
||||
image_dir = workdir / "rasterized"
|
||||
image_dir.mkdir(parents=True, exist_ok=True)
|
||||
image_path = image_dir / f"{source.stem}.png"
|
||||
|
||||
if shutil.which("pdftoppm"):
|
||||
command = ["pdftoppm", "-png", "-singlefile", "-r", "220", str(pdf_path), str(image_path.with_suffix(""))]
|
||||
completed = subprocess.run(command, capture_output=True, text=True, check=False)
|
||||
if completed.returncode != 0:
|
||||
raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to rasterize PDF")
|
||||
return image_path
|
||||
|
||||
if shutil.which("magick"):
|
||||
command = ["magick", "-density", "220", str(pdf_path), "-quality", "100", str(image_path)]
|
||||
completed = subprocess.run(command, capture_output=True, text=True, check=False)
|
||||
if completed.returncode != 0:
|
||||
raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to rasterize PDF")
|
||||
return image_path
|
||||
|
||||
raise RuntimeError("Neither pdftoppm nor magick is available for PDF rasterization")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Detect text-line and outer rectangle boxes from a label image.")
|
||||
parser.add_argument("input", type=Path, help="Path to an image, PDF, or AI file")
|
||||
parser.add_argument("--output-dir", type=Path, default=Path("outputs/layout_boxes"), help="Directory for preview and JSON")
|
||||
args = parser.parse_args()
|
||||
|
||||
source = args.input.expanduser().resolve()
|
||||
output_dir = args.output_dir.expanduser().resolve()
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
raster_path = ensure_raster_image(source, output_dir)
|
||||
annotated, boxes = process_image(raster_path)
|
||||
|
||||
preview_path = output_dir / f"{source.stem}.boxed.png"
|
||||
json_path = output_dir / f"{source.stem}.boxes.json"
|
||||
cv2.imwrite(str(preview_path), annotated)
|
||||
json_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"source": str(source),
|
||||
"raster_path": str(raster_path),
|
||||
"preview_path": str(preview_path),
|
||||
"boxes": [box.to_dict() for box in boxes],
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
print(json.dumps({"preview_path": str(preview_path), "json_path": str(json_path)}, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
109
scripts/detect_regions.py
Normal file
@@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Standalone CLI: detect main regions in a label image via Qwen VL, then crop.
|
||||
|
||||
Usage
|
||||
-----
|
||||
python scripts/detect_regions.py <image_path> [--model MODEL] [--out OUT_DIR] [--key KEY]
|
||||
|
||||
Example
|
||||
-------
|
||||
python scripts/detect_regions.py data/sample.png
|
||||
python scripts/detect_regions.py data/sample.png --model qwen2.5-vl-72b-instruct
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# ── make sure the project root is on sys.path ──────────────────────────────
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s – %(message)s",
|
||||
)
|
||||
logger = logging.getLogger("detect_regions")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Detect & crop main regions in a label image using Qwen VL"
|
||||
)
|
||||
parser.add_argument("image", help="Path to input image (PNG/JPEG)")
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
default="qwen2.5-vl-7b-instruct",
|
||||
help="DashScope model ID (default: qwen2.5-vl-7b-instruct)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--out",
|
||||
default=None,
|
||||
help="Output directory for cropped regions (default: <image_dir>/regions/)",
|
||||
)
|
||||
parser.add_argument("--key", default=None, help="DASHSCOPE_API_KEY (overrides env)")
|
||||
parser.add_argument(
|
||||
"--api-max-side", type=int, default=1024,
|
||||
help="Max side length (px) of image sent to API (default: 1024). "
|
||||
"Crop is always done on the original full-res file.",
|
||||
)
|
||||
parser.add_argument("--no-crop", action="store_true", help="Only print coords, don't crop")
|
||||
parser.add_argument("--split", action="store_true",
|
||||
help="Save each detected region separately (default: merge into one)")
|
||||
args = parser.parse_args()
|
||||
|
||||
image_path = Path(args.image).expanduser().resolve()
|
||||
if not image_path.exists():
|
||||
parser.error(f"Image not found: {image_path}")
|
||||
|
||||
output_dir = Path(args.out).expanduser().resolve() if args.out else image_path.parent / "regions"
|
||||
|
||||
from backend.app.region_detector import detect_regions, crop_and_save, merge_regions
|
||||
|
||||
logger.info("Image: %s", image_path)
|
||||
logger.info("Model: %s", args.model)
|
||||
|
||||
regions, raw_response = detect_regions(
|
||||
image_path,
|
||||
api_key=args.key or None,
|
||||
model=args.model,
|
||||
api_max_side=args.api_max_side,
|
||||
)
|
||||
|
||||
if not regions:
|
||||
logger.error("No regions detected. Raw model response:\n%s", raw_response)
|
||||
sys.exit(1)
|
||||
|
||||
print("\n── Detected regions ──────────────────────────────────────")
|
||||
for i, r in enumerate(regions, 1):
|
||||
print(f" {i:02d}. [{r.label}] bbox=({r.x1},{r.y1})-({r.x2},{r.y2}) "
|
||||
f"size={r.width}×{r.height}px")
|
||||
print()
|
||||
|
||||
coords_json = [
|
||||
{"label": r.label, "bbox": [r.x1, r.y1, r.x2, r.y2]}
|
||||
for r in regions
|
||||
]
|
||||
print("JSON:")
|
||||
print(json.dumps(coords_json, ensure_ascii=False, indent=2))
|
||||
|
||||
if not args.no_crop:
|
||||
if args.split:
|
||||
save_regions = regions
|
||||
else:
|
||||
merged = merge_regions(regions)
|
||||
save_regions = [merged]
|
||||
print(f"\n── Merged bbox: ({merged.x1},{merged.y1})-({merged.x2},{merged.y2})"
|
||||
f" size={merged.width}×{merged.height}px")
|
||||
|
||||
results = crop_and_save(image_path, save_regions, output_dir)
|
||||
print(f"\n── Cropped file(s) saved to: {output_dir} ──")
|
||||
for item in results:
|
||||
print(f" • {Path(item['path']).name} ← {item['label']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
7
scripts/start_backend.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
|
||||
cd "$ROOT_DIR"
|
||||
python3 -m uvicorn backend.app.main:app --host 127.0.0.1 --port 8010 --reload
|
||||
80
scripts/start_dev.sh
Executable file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
BACKEND_PORT="${BACKEND_PORT:-8010}"
|
||||
FRONTEND_PORT="${FRONTEND_PORT:-5173}"
|
||||
|
||||
port_pid() {
|
||||
lsof -tiTCP:"$1" -sTCP:LISTEN 2>/dev/null | head -n 1 || true
|
||||
}
|
||||
|
||||
port_cmd() {
|
||||
local pid
|
||||
pid="$(port_pid "$1")"
|
||||
if [[ -z "$pid" ]]; then
|
||||
return 0
|
||||
fi
|
||||
ps -p "$pid" -o command= 2>/dev/null || true
|
||||
}
|
||||
|
||||
print_port_conflict() {
|
||||
local port="$1"
|
||||
local pid
|
||||
local cmd
|
||||
pid="$(port_pid "$port")"
|
||||
cmd="$(port_cmd "$port")"
|
||||
echo "Port $port is already in use." >&2
|
||||
if [[ -n "$pid" ]]; then
|
||||
echo " PID: $pid" >&2
|
||||
fi
|
||||
if [[ -n "$cmd" ]]; then
|
||||
echo " CMD: $cmd" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
if [[ -n "${BACKEND_PID:-}" ]]; then
|
||||
kill "$BACKEND_PID" 2>/dev/null || true
|
||||
fi
|
||||
if [[ -n "${FRONTEND_PID:-}" ]]; then
|
||||
kill "$FRONTEND_PID" 2>/dev/null || true
|
||||
fi
|
||||
}
|
||||
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
if [[ -n "$(port_pid "$BACKEND_PORT")" ]]; then
|
||||
print_port_conflict "$BACKEND_PORT"
|
||||
echo "Set BACKEND_PORT to another port or stop the existing process first." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -n "$(port_pid "$FRONTEND_PORT")" ]]; then
|
||||
print_port_conflict "$FRONTEND_PORT"
|
||||
echo "Set FRONTEND_PORT to another port or stop the existing process first." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "$ROOT_DIR"
|
||||
# Prefer miniconda Python (has all project deps); fall back to conda/system python3
|
||||
PYTHON="${CONDA_PREFIX:-}/bin/python"
|
||||
if [[ ! -x "$PYTHON" ]]; then
|
||||
PYTHON="$(command -v /Users/icemilk/miniconda3/bin/python 2>/dev/null || command -v python3)"
|
||||
fi
|
||||
"$PYTHON" -m uvicorn backend.app.main:app --host 127.0.0.1 --port "$BACKEND_PORT" --reload \
|
||||
> >(sed 's/^/[backend] /') \
|
||||
2> >(sed 's/^/[backend] /' >&2) &
|
||||
BACKEND_PID=$!
|
||||
|
||||
cd "$ROOT_DIR/frontend"
|
||||
npm run dev -- --host 127.0.0.1 --port "$FRONTEND_PORT" \
|
||||
> >(sed 's/^/[frontend] /') \
|
||||
2> >(sed 's/^/[frontend] /' >&2) &
|
||||
FRONTEND_PID=$!
|
||||
|
||||
echo "Backend: http://127.0.0.1:${BACKEND_PORT}"
|
||||
echo "Frontend: http://127.0.0.1:${FRONTEND_PORT}"
|
||||
echo "Press Ctrl+C to stop both services."
|
||||
|
||||
wait "$BACKEND_PID" "$FRONTEND_PID"
|
||||
7
scripts/start_frontend.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
|
||||
cd "$ROOT_DIR/frontend"
|
||||
npm run dev -- --host 127.0.0.1 --port 5173
|
||||
192
segment_ai_regions.py
Normal file
@@ -0,0 +1,192 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
from PIL import Image, ImageDraw, ImageFont
|
||||
|
||||
|
||||
WORKDIR = Path("/Users/icemilk/Workspace/zld_POC")
|
||||
TEXT_BLOCKS = WORKDIR / "【2026-04-09】端午-背标-天问.text_blocks.json"
|
||||
IMAGE_PATH = WORKDIR / "1.jpg"
|
||||
OUT_IMAGE = WORKDIR / "【2026-04-09】端午-背标-天问.region_overlay.png"
|
||||
OUT_JSON = WORKDIR / "【2026-04-09】端午-背标-天问.regions.json"
|
||||
|
||||
PAGE_WIDTH_PT = 1363.4
|
||||
PAGE_HEIGHT_PT = 942.06
|
||||
|
||||
|
||||
def load_blocks() -> list[dict]:
|
||||
return json.loads(TEXT_BLOCKS.read_text(encoding="utf-8"))
|
||||
|
||||
|
||||
def overlaps(a: tuple[float, float, float, float], b: tuple[float, float, float, float]) -> bool:
|
||||
ax0, ay0, ax1, ay1 = a
|
||||
bx0, by0, bx1, by1 = b
|
||||
return not (ax1 < bx0 or bx1 < ax0 or ay1 < by0 or by1 < ay0)
|
||||
|
||||
|
||||
def expanded_box(block: dict, pad_x: float = 24.0, pad_y: float = 18.0) -> tuple[float, float, float, float]:
|
||||
return (
|
||||
block["x0_pt"] - pad_x,
|
||||
block["top_pt"] - pad_y,
|
||||
block["x1_pt"] + pad_x,
|
||||
block["bottom_pt"] + pad_y,
|
||||
)
|
||||
|
||||
|
||||
def region_bbox(blocks: list[dict], margin_x: float = 20.0, margin_y: float = 14.0) -> dict:
|
||||
x0 = min(b["x0_pt"] for b in blocks) - margin_x
|
||||
y0 = min(b["top_pt"] for b in blocks) - margin_y
|
||||
x1 = max(b["x1_pt"] for b in blocks) + margin_x
|
||||
y1 = max(b["bottom_pt"] for b in blocks) + margin_y
|
||||
return {"x0_pt": max(0, x0), "top_pt": max(0, y0), "x1_pt": x1, "bottom_pt": y1}
|
||||
|
||||
|
||||
def classify(region: dict) -> str:
|
||||
return region["label"]
|
||||
|
||||
|
||||
def to_px(x_pt: float, y_pt: float, img_w: int, img_h: int) -> tuple[int, int]:
|
||||
return (
|
||||
round(x_pt / PAGE_WIDTH_PT * img_w),
|
||||
round(y_pt / PAGE_HEIGHT_PT * img_h),
|
||||
)
|
||||
|
||||
|
||||
def match_any(text: str, patterns: list[str]) -> bool:
|
||||
return any(p in text for p in patterns)
|
||||
|
||||
|
||||
def semantic_groups(blocks: list[dict]) -> list[tuple[str, list[dict]]]:
|
||||
groups: list[tuple[str, list[dict]]] = []
|
||||
|
||||
defs = [
|
||||
(
|
||||
"header_basic",
|
||||
lambda b: b["top_pt"] < 140 and match_any(
|
||||
b["text"], ["品名", "成品尺寸", "材质", "工艺", "盒型"]
|
||||
),
|
||||
),
|
||||
(
|
||||
"header_rules",
|
||||
lambda b: b["top_pt"] < 140 and match_any(
|
||||
b["text"], ["日期", "设计比例", "字体大小规范", "常规内容最小高度", "净含量最小高度", "条形码"]
|
||||
),
|
||||
),
|
||||
(
|
||||
"workflow_notes",
|
||||
lambda b: b["x0_pt"] > 1180 or match_any(b["text"], ["签稿流程", "设计师", "品控", "安冬梅"]),
|
||||
),
|
||||
(
|
||||
"version_info",
|
||||
lambda b: "版本号" in b["text"],
|
||||
),
|
||||
(
|
||||
"upper_main",
|
||||
lambda b: 250 <= b["top_pt"] <= 540 and b["x0_pt"] < 820 and not match_any(b["text"], ["营养成分表"]),
|
||||
),
|
||||
(
|
||||
"cooking_box",
|
||||
lambda b: 560 <= b["top_pt"] <= 650 and 500 <= b["x0_pt"] <= 680,
|
||||
),
|
||||
(
|
||||
"seal_mark",
|
||||
lambda b: 560 <= b["top_pt"] <= 650 and 680 < b["x0_pt"] <= 760,
|
||||
),
|
||||
(
|
||||
"nutrition_table",
|
||||
lambda b: 520 <= b["top_pt"] <= 670 and b["x0_pt"] < 960,
|
||||
),
|
||||
(
|
||||
"lower_left_details",
|
||||
lambda b: 590 <= b["top_pt"] <= 705 and b["x0_pt"] < 520,
|
||||
),
|
||||
(
|
||||
"date_box",
|
||||
lambda b: match_any(b["text"], ["生产日期", "保质期到期日"]) and b["x0_pt"] > 650,
|
||||
),
|
||||
(
|
||||
"bottom_title",
|
||||
lambda b: b["top_pt"] > 705 and b["x0_pt"] < 980,
|
||||
),
|
||||
]
|
||||
|
||||
remaining = blocks[:]
|
||||
for label, predicate in defs:
|
||||
matched = [b for b in remaining if predicate(b)]
|
||||
if matched:
|
||||
groups.append((label, matched))
|
||||
ids = {id(b) for b in matched}
|
||||
remaining = [b for b in remaining if id(b) not in ids]
|
||||
|
||||
if remaining:
|
||||
# Keep any leftovers visible so we can inspect missed areas.
|
||||
leftovers = [b for b in remaining if re.search(r"\S", b["text"])]
|
||||
if leftovers:
|
||||
groups.append(("unassigned", leftovers))
|
||||
|
||||
return groups
|
||||
|
||||
|
||||
def build_regions(blocks: list[dict]) -> list[dict]:
|
||||
regions = []
|
||||
for idx, (label, group) in enumerate(semantic_groups(blocks), start=1):
|
||||
bbox = region_bbox(group)
|
||||
sample = " ".join(b["text"] for b in sorted(group, key=lambda b: (b["top_pt"], b["x0_pt"]))[:4])
|
||||
region = {
|
||||
"region_id": idx,
|
||||
"label": label,
|
||||
"bbox": bbox,
|
||||
"block_count": len(group),
|
||||
"sample_text": sample[:120],
|
||||
}
|
||||
regions.append(region)
|
||||
return regions
|
||||
|
||||
|
||||
def draw_regions(regions: list[dict]) -> None:
|
||||
image = Image.open(IMAGE_PATH).convert("RGBA")
|
||||
draw = ImageDraw.Draw(image, "RGBA")
|
||||
colors = [
|
||||
(255, 99, 71, 255),
|
||||
(65, 105, 225, 255),
|
||||
(50, 205, 50, 255),
|
||||
(255, 165, 0, 255),
|
||||
(148, 0, 211, 255),
|
||||
(0, 191, 255, 255),
|
||||
(220, 20, 60, 255),
|
||||
(46, 139, 87, 255),
|
||||
]
|
||||
font = ImageFont.load_default()
|
||||
|
||||
for i, region in enumerate(regions):
|
||||
color = colors[i % len(colors)]
|
||||
bbox = region["bbox"]
|
||||
x0, y0 = to_px(bbox["x0_pt"], bbox["top_pt"], image.width, image.height)
|
||||
x1, y1 = to_px(bbox["x1_pt"], bbox["bottom_pt"], image.width, image.height)
|
||||
draw.rectangle([x0, y0, x1, y1], outline=color[:3], width=5)
|
||||
tag = f"R{region['region_id']} {region['label']}"
|
||||
tx0 = max(8, x0 + 8)
|
||||
ty0 = max(8, y0 + 8)
|
||||
tw, th = draw.textbbox((tx0, ty0), tag, font=font)[2:]
|
||||
draw.rectangle([tx0 - 4, ty0 - 2, tx0 + tw + 4, ty0 + th + 2], fill=(255, 255, 255, 220))
|
||||
draw.text((tx0, ty0), tag, fill=(0, 0, 0, 255), font=font)
|
||||
|
||||
image.save(OUT_IMAGE)
|
||||
|
||||
|
||||
def main() -> None:
|
||||
blocks = load_blocks()
|
||||
regions = build_regions(blocks)
|
||||
OUT_JSON.write_text(json.dumps(regions, ensure_ascii=False, indent=2), encoding="utf-8")
|
||||
draw_regions(regions)
|
||||
print(OUT_IMAGE)
|
||||
print(OUT_JSON)
|
||||
print(f"regions={len(regions)}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
85
tests/backend/test_ai_parser.py
Normal file
@@ -0,0 +1,85 @@
|
||||
from types import SimpleNamespace
|
||||
|
||||
from backend.app.ai_parser import (
|
||||
_estimate_text_width,
|
||||
_estimate_text_width_from_text_matrix,
|
||||
_page_horizontal_offset,
|
||||
_text_rect_from_matrix,
|
||||
)
|
||||
|
||||
|
||||
def test_text_rect_from_matrix_uses_rendered_height_and_baseline() -> None:
|
||||
font_size_pt, x0_pt, top_pt, x1_pt, bottom_pt = _text_rect_from_matrix(
|
||||
"食品名称: 天问礼品粽 (粽子/草木灰咸鸭蛋)",
|
||||
[19.3618, 0.0, 0.0, 21.0, 435.9155, 629.3184],
|
||||
942.06,
|
||||
None,
|
||||
)
|
||||
|
||||
assert font_size_pt == 21.0
|
||||
assert x0_pt == 435.92
|
||||
assert top_pt == 291.74
|
||||
assert bottom_pt == 312.74
|
||||
assert x1_pt > x0_pt
|
||||
|
||||
|
||||
def test_text_rect_from_matrix_handles_small_text_without_collapsing_height() -> None:
|
||||
font_size_pt, x0_pt, top_pt, x1_pt, bottom_pt = _text_rect_from_matrix(
|
||||
"儿童青少年应避免过量摄入盐油糖。",
|
||||
[4.3157, 0.0, 0.0, 8.0, 680.7383741, 516.1778],
|
||||
942.06,
|
||||
None,
|
||||
)
|
||||
|
||||
assert font_size_pt == 8.0
|
||||
assert x0_pt == 680.74
|
||||
assert top_pt == 417.88
|
||||
assert bottom_pt == 425.88
|
||||
assert x1_pt > x0_pt
|
||||
|
||||
|
||||
def test_text_rect_from_matrix_applies_page_horizontal_offset() -> None:
|
||||
font_size_pt, x0_pt, top_pt, x1_pt, bottom_pt = _text_rect_from_matrix(
|
||||
"材质:",
|
||||
[7.0652, 0.0, 0.0, 12.36, 190.6111, 873.561],
|
||||
942.06,
|
||||
None,
|
||||
24.21,
|
||||
)
|
||||
|
||||
assert font_size_pt == 12.36
|
||||
assert x0_pt == 166.4
|
||||
assert top_pt == 56.14
|
||||
assert bottom_pt == 68.5
|
||||
assert x1_pt > x0_pt
|
||||
|
||||
|
||||
def test_page_horizontal_offset_uses_artbox_left_inset() -> None:
|
||||
page = SimpleNamespace(
|
||||
artbox=SimpleNamespace(left=24.2137, width=1314.7563),
|
||||
cropbox=SimpleNamespace(width=1363.4),
|
||||
)
|
||||
|
||||
assert _page_horizontal_offset(page) == 24.2137
|
||||
|
||||
|
||||
def test_text_matrix_width_is_tighter_than_fallback_for_food_name() -> None:
|
||||
text = "食品名称: 天问礼品粽 (粽子/草木灰咸鸭蛋)"
|
||||
reference_width = 374.51
|
||||
|
||||
fallback_width = round(_estimate_text_width(text, 21.0), 2)
|
||||
matrix_width = round(_estimate_text_width_from_text_matrix(text, 19.3618) or 0.0, 2)
|
||||
|
||||
assert matrix_width > 0
|
||||
assert abs(matrix_width - reference_width) < abs(fallback_width - reference_width)
|
||||
|
||||
|
||||
def test_text_matrix_width_is_tighter_than_fallback_for_small_heading() -> None:
|
||||
text = "营养成分表"
|
||||
reference_width = 21.75
|
||||
|
||||
fallback_width = round(_estimate_text_width(text, 8.0), 2)
|
||||
matrix_width = round(_estimate_text_width_from_text_matrix(text, 4.3157) or 0.0, 2)
|
||||
|
||||
assert matrix_width > 0
|
||||
assert abs(matrix_width - reference_width) <= abs(fallback_width - reference_width)
|
||||
47
tests/backend/test_ai_render_crop.py
Normal file
@@ -0,0 +1,47 @@
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from backend.app.ai_render_crop import detect_main_content_box, process_ai_render_crop
|
||||
|
||||
|
||||
WORKDIR = Path("/Users/icemilk/Workspace/zld_POC")
|
||||
AI_FILE = WORKDIR / "【2026-04-09】端午 - 背标 - 天问.ai"
|
||||
OUTPUT_DIR = WORKDIR / ".tmp_test_render_crop"
|
||||
|
||||
|
||||
def test_detect_main_content_box_finds_centered_content() -> None:
|
||||
image = np.full((400, 600, 3), 255, dtype=np.uint8)
|
||||
cv2.rectangle(image, (120, 90), (520, 310), (10, 10, 10), 3)
|
||||
cv2.putText(image, "MAIN CONTENT", (150, 210), cv2.FONT_HERSHEY_SIMPLEX, 1.2, (20, 20, 20), 3, cv2.LINE_AA)
|
||||
|
||||
x0, y0, x1, y1 = detect_main_content_box(image)
|
||||
|
||||
assert x0 < 120
|
||||
assert y0 < 90
|
||||
assert x1 > 520
|
||||
assert y1 > 310
|
||||
|
||||
|
||||
def test_process_ai_render_crop_outputs_full_and_cropped_images() -> None:
|
||||
result = process_ai_render_crop(AI_FILE, OUTPUT_DIR)
|
||||
|
||||
assert result["fullImage"]["url"].endswith(".png")
|
||||
assert result["croppedImage"]["url"].endswith(".png")
|
||||
assert result["cropBox"]["x0"] >= 0
|
||||
assert result["cropBox"]["y0"] >= 0
|
||||
assert result["cropBox"]["x1"] > result["cropBox"]["x0"]
|
||||
assert result["cropBox"]["y1"] > result["cropBox"]["y0"]
|
||||
|
||||
full_path = OUTPUT_DIR / Path(result["fullImage"]["url"]).name
|
||||
cropped_path = OUTPUT_DIR / Path(result["croppedImage"]["url"]).name
|
||||
assert full_path.exists()
|
||||
assert cropped_path.exists()
|
||||
|
||||
full_image = cv2.imread(str(full_path))
|
||||
cropped_image = cv2.imread(str(cropped_path))
|
||||
assert full_image is not None
|
||||
assert cropped_image is not None
|
||||
assert cropped_image.shape[1] < full_image.shape[1]
|
||||
assert cropped_image.shape[0] < full_image.shape[0]
|
||||
31
tests/backend/test_ai_render_crop_api.py
Normal file
@@ -0,0 +1,31 @@
|
||||
from pathlib import Path
|
||||
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from backend.app.main import app
|
||||
|
||||
|
||||
WORKDIR = Path("/Users/icemilk/Workspace/zld_POC")
|
||||
AI_FILE = WORKDIR / "【2026-04-09】端午 - 背标 - 天问.ai"
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
def test_ai_render_crop_endpoint_returns_two_images_and_crop_box() -> None:
|
||||
with AI_FILE.open("rb") as ai_fp:
|
||||
response = client.post(
|
||||
"/api/ai-render-crop",
|
||||
files={
|
||||
"ai_file": (AI_FILE.name, ai_fp, "application/postscript"),
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
payload = response.json()
|
||||
assert payload["fullImage"]["type"] == "image"
|
||||
assert payload["croppedImage"]["type"] == "image"
|
||||
assert payload["fullImage"]["url"].endswith(".png")
|
||||
assert payload["croppedImage"]["url"].endswith(".png")
|
||||
assert payload["cropBox"]["x1"] > payload["cropBox"]["x0"]
|
||||
assert payload["cropBox"]["y1"] > payload["cropBox"]["y0"]
|
||||
169
tests/backend/test_api.py
Normal file
@@ -0,0 +1,169 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
from fastapi.testclient import TestClient
|
||||
|
||||
from backend.app import pipeline
|
||||
from backend.app import main
|
||||
from backend.app.main import app
|
||||
|
||||
|
||||
WORKDIR = Path("/Users/icemilk/Workspace/zld_POC")
|
||||
AI_FILE = WORKDIR / "【2026-04-09】端午 - 背标 - 天问.ai"
|
||||
DOCX_FILE = WORKDIR / "天问礼品粽【260331】.docx"
|
||||
|
||||
|
||||
client = TestClient(app)
|
||||
|
||||
|
||||
def fake_mineru_payload() -> dict:
|
||||
return {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [2772, 1961],
|
||||
"para_blocks": [
|
||||
{
|
||||
"bbox": [704, 134, 2106, 229],
|
||||
"lines": [{"spans": [{"content": "食品名称:天问礼品粽"}]}],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
|
||||
def test_process_endpoint_returns_preview_and_fields(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", lambda _preview_path, _output_dir: fake_mineru_payload())
|
||||
|
||||
with AI_FILE.open("rb") as ai_fp, DOCX_FILE.open("rb") as docx_fp:
|
||||
response = client.post(
|
||||
"/api/process",
|
||||
files={
|
||||
"ai_file": (AI_FILE.name, ai_fp, "application/postscript"),
|
||||
"word_file": (
|
||||
DOCX_FILE.name,
|
||||
docx_fp,
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
),
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
payload = response.json()
|
||||
assert payload["preview"]["type"] == "pdf"
|
||||
assert payload["fields"]
|
||||
assert payload["preview"]["pageWidthPt"] == 2772
|
||||
assert payload["fields"][0]["text"] == "食品名称:天问礼品粽"
|
||||
|
||||
|
||||
def test_process_endpoint_uses_default_sample_files_when_uploads_are_missing(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", lambda _preview_path, _output_dir: fake_mineru_payload())
|
||||
|
||||
response = client.post("/api/process")
|
||||
|
||||
assert response.status_code == 200
|
||||
|
||||
payload = response.json()
|
||||
assert payload["preview"]["type"] == "pdf"
|
||||
assert payload["fields"]
|
||||
assert any(field["text"] for field in payload["fields"])
|
||||
|
||||
|
||||
def test_process_endpoint_surfaces_missing_mineru_key(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
def fake_parse_with_mineru(_preview_path, _output_dir):
|
||||
raise RuntimeError("MINERU_API_KEY is required")
|
||||
|
||||
monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", fake_parse_with_mineru)
|
||||
|
||||
response = client.post("/api/process")
|
||||
|
||||
assert response.status_code == 500
|
||||
assert response.json()["detail"] == "MINERU_API_KEY is required"
|
||||
|
||||
|
||||
def test_mineru_extract_endpoint_returns_job_preview_and_blocks(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
monkeypatch.setattr(
|
||||
pipeline,
|
||||
"extract_mineru_result",
|
||||
lambda _ai_path, _output_dir, job_id=None: {
|
||||
"jobId": job_id,
|
||||
"preview": {
|
||||
"type": "pdf",
|
||||
"url": f"/api/files/{job_id}/preview.pdf",
|
||||
"pageWidthPt": 2772,
|
||||
"pageHeightPt": 1961,
|
||||
},
|
||||
"artifacts": {
|
||||
"json": {"path": "/tmp/structured.json", "url": f"/api/files/{job_id}/mineru/structured.json"},
|
||||
"markdown": {"path": "/tmp/full.md", "url": f"/api/files/{job_id}/mineru/full.md"},
|
||||
},
|
||||
"blocks": [{"id": "block-1", "text": "食品名称:天问礼品粽", "page": 1, "x0_pt": 1, "top_pt": 2, "x1_pt": 3, "bottom_pt": 4}],
|
||||
},
|
||||
)
|
||||
|
||||
with AI_FILE.open("rb") as ai_fp:
|
||||
response = client.post(
|
||||
"/api/mineru-extract",
|
||||
files={"ai_file": (AI_FILE.name, ai_fp, "application/postscript")},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["jobId"]
|
||||
assert payload["preview"]["type"] == "pdf"
|
||||
assert payload["artifacts"]["json"]["url"].endswith("/mineru/structured.json")
|
||||
assert payload["artifacts"]["markdown"]["url"].endswith("/mineru/full.md")
|
||||
assert payload["blocks"][0]["id"] == "block-1"
|
||||
|
||||
|
||||
def test_compare_word_endpoint_returns_compared_fields(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
monkeypatch.setattr(main, "OUTPUTS_DIR", tmp_path)
|
||||
(tmp_path / "test-job").mkdir(parents=True, exist_ok=True)
|
||||
monkeypatch.setattr(
|
||||
pipeline,
|
||||
"compare_word_with_mineru",
|
||||
lambda _word_path, _output_dir, job_id=None: {
|
||||
"jobId": job_id,
|
||||
"preview": {
|
||||
"type": "pdf",
|
||||
"url": f"/api/files/{job_id}/preview.pdf",
|
||||
"pageWidthPt": 2772,
|
||||
"pageHeightPt": 1961,
|
||||
},
|
||||
"fields": [
|
||||
{
|
||||
"id": "field-1",
|
||||
"text": "食品名称:天问礼品粽",
|
||||
"page": 1,
|
||||
"x0_pt": 1,
|
||||
"top_pt": 2,
|
||||
"x1_pt": 3,
|
||||
"bottom_pt": 4,
|
||||
"normalized_text": "食品名称:天问礼品粽",
|
||||
"validation_status": "matched",
|
||||
"validation_reason": "normalized text found in Word content",
|
||||
"matched_excerpt": "食品名称:天问礼品粽",
|
||||
}
|
||||
],
|
||||
},
|
||||
)
|
||||
|
||||
with DOCX_FILE.open("rb") as docx_fp:
|
||||
response = client.post(
|
||||
"/api/compare-word",
|
||||
data={"job_id": "test-job"},
|
||||
files={
|
||||
"word_file": (
|
||||
DOCX_FILE.name,
|
||||
docx_fp,
|
||||
"application/vnd.openxmlformats-officedocument.wordprocessingml.document",
|
||||
)
|
||||
},
|
||||
)
|
||||
|
||||
assert response.status_code == 200
|
||||
payload = response.json()
|
||||
assert payload["jobId"] == "test-job"
|
||||
assert payload["fields"][0]["validation_status"] == "matched"
|
||||
30
tests/backend/test_barcode_cv.py
Normal file
@@ -0,0 +1,30 @@
|
||||
from pathlib import Path
|
||||
|
||||
from backend.app.barcode_cv import decode_barcode_image
|
||||
|
||||
|
||||
WORKDIR = Path("/Users/icemilk/Workspace/zld_POC")
|
||||
|
||||
|
||||
def test_decode_barcode_image_reads_ean13_from_original_sample() -> None:
|
||||
result = decode_barcode_image(WORKDIR / "1.jpg")
|
||||
|
||||
assert result["text"] == "6954930015983"
|
||||
assert result["format"] == "EAN_13"
|
||||
assert result["valid_checksum"] is True
|
||||
|
||||
|
||||
def test_decode_barcode_image_reads_ean13_from_ma1_sample() -> None:
|
||||
result = decode_barcode_image(WORKDIR / "ma1.png")
|
||||
|
||||
assert result["text"] == "6954930015983"
|
||||
assert result["format"] == "EAN_13"
|
||||
assert result["valid_checksum"] is True
|
||||
|
||||
|
||||
def test_decode_barcode_image_reads_ean13_from_ma2_sample() -> None:
|
||||
result = decode_barcode_image(WORKDIR / "ma2.png")
|
||||
|
||||
assert result["text"] == "6954930016737"
|
||||
assert result["format"] == "EAN_13"
|
||||
assert result["valid_checksum"] is True
|
||||
33
tests/backend/test_layout_cv.py
Normal file
@@ -0,0 +1,33 @@
|
||||
import cv2
|
||||
import numpy as np
|
||||
|
||||
from backend.app.layout_cv import Box, detect_text_lines, merge_text_and_rectangles
|
||||
|
||||
|
||||
def test_merge_text_and_rectangles_keeps_outer_table_box_and_drops_nested_cells() -> None:
|
||||
text_lines = [
|
||||
Box(20, 20, 120, 36, "line", "配料"),
|
||||
Box(20, 40, 120, 56, "line", "糯米"),
|
||||
Box(20, 60, 120, 76, "line", "红豆"),
|
||||
]
|
||||
rectangles = [
|
||||
Box(10, 10, 150, 90, "rectangle"),
|
||||
Box(12, 12, 78, 44, "rectangle"),
|
||||
Box(82, 12, 148, 44, "rectangle"),
|
||||
]
|
||||
|
||||
merged = merge_text_and_rectangles(text_lines, rectangles)
|
||||
|
||||
assert [box.kind for box in merged] == ["rectangle", "line", "line", "line"]
|
||||
assert merged[0].as_tuple() == (10, 10, 150, 90)
|
||||
|
||||
|
||||
def test_detect_text_lines_finds_two_text_rows_without_ocr() -> None:
|
||||
image = np.full((220, 420, 3), 255, dtype=np.uint8)
|
||||
cv2.putText(image, "LINE ONE", (20, 70), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2, cv2.LINE_AA)
|
||||
cv2.putText(image, "LINE TWO", (20, 140), cv2.FONT_HERSHEY_SIMPLEX, 1.0, (0, 0, 0), 2, cv2.LINE_AA)
|
||||
|
||||
lines = detect_text_lines(image)
|
||||
|
||||
assert len(lines) == 2
|
||||
assert lines[0].y1 < lines[1].y0
|
||||
192
tests/backend/test_mineru_client.py
Normal file
@@ -0,0 +1,192 @@
|
||||
from __future__ import annotations
|
||||
|
||||
import io
|
||||
import json
|
||||
import zipfile
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
import requests
|
||||
|
||||
from backend.app import mineru_client
|
||||
from backend.app.mineru_client import MineruClient, MineruClientError
|
||||
|
||||
|
||||
class FakeResponse:
|
||||
def __init__(self, status: int, body: bytes):
|
||||
self.status = status
|
||||
self._body = body
|
||||
|
||||
def read(self) -> bytes:
|
||||
return self._body
|
||||
|
||||
def __enter__(self) -> "FakeResponse":
|
||||
return self
|
||||
|
||||
def __exit__(self, *_args: object) -> None:
|
||||
return None
|
||||
|
||||
|
||||
class FakeRequestsResponse:
|
||||
def __init__(self, status_code: int, text: str = ""):
|
||||
self.status_code = status_code
|
||||
self.text = text
|
||||
|
||||
|
||||
def _zip_with_json() -> bytes:
|
||||
buffer = io.BytesIO()
|
||||
with zipfile.ZipFile(buffer, "w") as archive:
|
||||
archive.writestr(
|
||||
"demo_middle.json",
|
||||
json.dumps({"pdf_info": [{"page_idx": 0, "page_size": [1, 1], "para_blocks": []}]}),
|
||||
)
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
def _zip_with_layout_and_model() -> bytes:
|
||||
buffer = io.BytesIO()
|
||||
with zipfile.ZipFile(buffer, "w") as archive:
|
||||
archive.writestr("demo_model.json", json.dumps([[{"type": "header"}]]))
|
||||
archive.writestr(
|
||||
"layout.json",
|
||||
json.dumps({"pdf_info": [{"page_idx": 0, "page_size": [2, 2], "para_blocks": []}]}),
|
||||
)
|
||||
return buffer.getvalue()
|
||||
|
||||
|
||||
def test_submit_pdf_downloads_and_loads_structured_json(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
calls: list[str] = []
|
||||
|
||||
def fake_urlopen(request_obj, timeout=0):
|
||||
url = request_obj.full_url if hasattr(request_obj, "full_url") else request_obj
|
||||
calls.append(str(url))
|
||||
if str(url).endswith("/api/v4/file-urls/batch"):
|
||||
return FakeResponse(
|
||||
200,
|
||||
json.dumps(
|
||||
{"code": 0, "data": {"batch_id": "batch-1", "file_urls": ["https://upload.example/file"]}}
|
||||
).encode(),
|
||||
)
|
||||
if str(url) == "https://upload.example/file":
|
||||
raise AssertionError("upload URL should be handled by requests.put")
|
||||
if str(url).endswith("/api/v4/extract/task"):
|
||||
return FakeResponse(
|
||||
200,
|
||||
json.dumps({"code": 0, "data": {"task_id": "task-1"}}).encode(),
|
||||
)
|
||||
if str(url).endswith("/api/v4/extract/task/task-1"):
|
||||
return FakeResponse(
|
||||
200,
|
||||
json.dumps({"code": 0, "data": {"state": "done", "full_zip_url": "https://download.example/result.zip"}}).encode(),
|
||||
)
|
||||
if str(url) == "https://download.example/result.zip":
|
||||
return FakeResponse(200, _zip_with_json())
|
||||
raise AssertionError(f"unexpected URL {url}")
|
||||
|
||||
monkeypatch.setattr(mineru_client.request, "urlopen", fake_urlopen)
|
||||
monkeypatch.setattr(mineru_client.requests, "put", lambda url, data, timeout=0: FakeRequestsResponse(200))
|
||||
pdf_path = tmp_path / "preview.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.7")
|
||||
|
||||
payload = MineruClient(api_key="secret", poll_interval_seconds=0, max_polls=1).parse_pdf(pdf_path, tmp_path)
|
||||
|
||||
assert payload["pdf_info"][0]["page_size"] == [1, 1]
|
||||
assert calls == [
|
||||
"https://mineru.net/api/v4/file-urls/batch",
|
||||
"https://mineru.net/api/v4/extract/task",
|
||||
"https://mineru.net/api/v4/extract/task/task-1",
|
||||
"https://download.example/result.zip",
|
||||
]
|
||||
assert (tmp_path / "mineru_result.zip").exists()
|
||||
|
||||
|
||||
def test_submit_pdf_raises_on_failed_task(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
def fake_urlopen(request_obj, timeout=0):
|
||||
url = request_obj.full_url if hasattr(request_obj, "full_url") else request_obj
|
||||
if str(url).endswith("/api/v4/file-urls/batch"):
|
||||
return FakeResponse(
|
||||
200,
|
||||
json.dumps(
|
||||
{"code": 0, "data": {"batch_id": "batch-1", "file_urls": ["https://upload.example/file"]}}
|
||||
).encode(),
|
||||
)
|
||||
if str(url) == "https://upload.example/file":
|
||||
raise AssertionError("upload URL should be handled by requests.put")
|
||||
if str(url).endswith("/api/v4/extract/task"):
|
||||
return FakeResponse(
|
||||
200,
|
||||
json.dumps({"code": 0, "data": {"task_id": "task-1"}}).encode(),
|
||||
)
|
||||
if str(url).endswith("/api/v4/extract/task/task-1"):
|
||||
return FakeResponse(
|
||||
200,
|
||||
json.dumps({"code": 0, "data": {"state": "failed", "err_msg": "bad pdf"}}).encode(),
|
||||
)
|
||||
raise AssertionError(f"unexpected URL {url}")
|
||||
|
||||
monkeypatch.setattr(mineru_client.request, "urlopen", fake_urlopen)
|
||||
monkeypatch.setattr(mineru_client.requests, "put", lambda url, data, timeout=0: FakeRequestsResponse(200))
|
||||
pdf_path = tmp_path / "preview.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.7")
|
||||
|
||||
with pytest.raises(MineruClientError, match="bad pdf"):
|
||||
MineruClient(api_key="secret", poll_interval_seconds=0, max_polls=1).parse_pdf(pdf_path, tmp_path)
|
||||
|
||||
|
||||
def test_submit_pdf_raises_on_upload_http_error(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
def fake_urlopen(request_obj, timeout=0):
|
||||
url = request_obj.full_url if hasattr(request_obj, "full_url") else request_obj
|
||||
if str(url).endswith("/api/v4/file-urls/batch"):
|
||||
return FakeResponse(
|
||||
200,
|
||||
json.dumps(
|
||||
{"code": 0, "data": {"batch_id": "batch-1", "file_urls": ["https://upload.example/file"]}}
|
||||
).encode(),
|
||||
)
|
||||
raise AssertionError(f"unexpected URL {url}")
|
||||
|
||||
monkeypatch.setattr(mineru_client.request, "urlopen", fake_urlopen)
|
||||
monkeypatch.setattr(
|
||||
mineru_client.requests,
|
||||
"put",
|
||||
lambda url, data, timeout=0: FakeRequestsResponse(403, "SignatureDoesNotMatch"),
|
||||
)
|
||||
pdf_path = tmp_path / "preview.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.7")
|
||||
|
||||
with pytest.raises(MineruClientError, match="HTTP 403"):
|
||||
MineruClient(api_key="secret", poll_interval_seconds=0, max_polls=1).parse_pdf(pdf_path, tmp_path)
|
||||
|
||||
|
||||
def test_submit_pdf_prefers_layout_json_over_model_json(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
def fake_urlopen(request_obj, timeout=0):
|
||||
url = request_obj.full_url if hasattr(request_obj, "full_url") else request_obj
|
||||
if str(url).endswith("/api/v4/file-urls/batch"):
|
||||
return FakeResponse(
|
||||
200,
|
||||
json.dumps(
|
||||
{"code": 0, "data": {"batch_id": "batch-1", "file_urls": ["https://upload.example/file"]}}
|
||||
).encode(),
|
||||
)
|
||||
if str(url).endswith("/api/v4/extract/task"):
|
||||
return FakeResponse(
|
||||
200,
|
||||
json.dumps({"code": 0, "data": {"task_id": "task-1"}}).encode(),
|
||||
)
|
||||
if str(url).endswith("/api/v4/extract/task/task-1"):
|
||||
return FakeResponse(
|
||||
200,
|
||||
json.dumps({"code": 0, "data": {"state": "done", "full_zip_url": "https://download.example/result.zip"}}).encode(),
|
||||
)
|
||||
if str(url) == "https://download.example/result.zip":
|
||||
return FakeResponse(200, _zip_with_layout_and_model())
|
||||
raise AssertionError(f"unexpected URL {url}")
|
||||
|
||||
monkeypatch.setattr(mineru_client.request, "urlopen", fake_urlopen)
|
||||
monkeypatch.setattr(mineru_client.requests, "put", lambda url, data, timeout=0: FakeRequestsResponse(200))
|
||||
pdf_path = tmp_path / "preview.pdf"
|
||||
pdf_path.write_bytes(b"%PDF-1.7")
|
||||
|
||||
payload = MineruClient(api_key="secret", poll_interval_seconds=0, max_polls=1).parse_pdf(pdf_path, tmp_path)
|
||||
|
||||
assert payload["pdf_info"][0]["page_size"] == [2, 2]
|
||||
99
tests/backend/test_mineru_parser.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from backend.app.mineru_parser import parse_mineru_fields
|
||||
|
||||
|
||||
def test_parse_mineru_fields_extracts_text_and_bbox() -> None:
|
||||
payload = {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [2772, 1961],
|
||||
"para_blocks": [
|
||||
{
|
||||
"bbox": [704, 134, 2106, 229],
|
||||
"type": "title",
|
||||
"lines": [
|
||||
{
|
||||
"spans": [
|
||||
{
|
||||
"type": "text",
|
||||
"content": "食品名称:天问礼品粽",
|
||||
"bbox": [704, 134, 2106, 229],
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
parsed = parse_mineru_fields(payload)
|
||||
|
||||
assert parsed.page_width == 2772
|
||||
assert parsed.page_height == 1961
|
||||
assert parsed.fields == [
|
||||
{
|
||||
"page": 1,
|
||||
"text": "食品名称:天问礼品粽",
|
||||
"font_name": "",
|
||||
"font_size_pt": None,
|
||||
"font_height_mm": None,
|
||||
"x0_pt": 704.0,
|
||||
"top_pt": 134.0,
|
||||
"x1_pt": 2106.0,
|
||||
"bottom_pt": 229.0,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_parse_mineru_fields_turns_table_html_into_text() -> None:
|
||||
payload = {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [1000, 800],
|
||||
"para_blocks": [
|
||||
{
|
||||
"bbox": [10, 20, 300, 200],
|
||||
"type": "table",
|
||||
"lines": [
|
||||
{
|
||||
"spans": [
|
||||
{
|
||||
"type": "table",
|
||||
"html": "<table><tr><td>品种</td><td>规格</td></tr><tr><td>黑猪肉粽</td><td>130克×1</td></tr></table>",
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
parsed = parse_mineru_fields(payload)
|
||||
|
||||
assert parsed.fields[0]["text"] == "品种 规格 黑猪肉粽 130克×1"
|
||||
|
||||
|
||||
def test_parse_mineru_fields_skips_empty_decorative_blocks() -> None:
|
||||
payload = {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [1000, 800],
|
||||
"para_blocks": [
|
||||
{"bbox": [1, 2, 3, 4], "type": "image", "lines": [{"spans": [{"type": "image"}]}]},
|
||||
{"bbox": [5, 6, 7, 8], "type": "text", "lines": [{"spans": [{"content": " "}]}]},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
parsed = parse_mineru_fields(payload)
|
||||
|
||||
assert parsed.fields == []
|
||||
74
tests/backend/test_pipeline.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.app import pipeline
|
||||
from backend.app.pipeline import process_files
|
||||
|
||||
|
||||
WORKDIR = Path("/Users/icemilk/Workspace/zld_POC")
|
||||
AI_FILE = WORKDIR / "【2026-04-09】端午 - 背标 - 天问.ai"
|
||||
DOCX_FILE = WORKDIR / "天问礼品粽【260331】.docx"
|
||||
OUTPUT_DIR = WORKDIR / ".tmp_test_output"
|
||||
|
||||
|
||||
def test_process_files_builds_preview_and_mineru_field_results(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
def fake_parse_with_mineru(_preview_path: Path, _output_dir: Path):
|
||||
return {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [2772, 1961],
|
||||
"para_blocks": [
|
||||
{
|
||||
"bbox": [704, 134, 2106, 229],
|
||||
"lines": [{"spans": [{"content": "食品名称:天问礼品粽"}]}],
|
||||
},
|
||||
{
|
||||
"bbox": [10, 20, 40, 60],
|
||||
"lines": [{"spans": [{"content": "Word中不存在的内容"}]}],
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", fake_parse_with_mineru)
|
||||
|
||||
result = process_files(AI_FILE, DOCX_FILE, OUTPUT_DIR, job_id="test-job")
|
||||
|
||||
assert result["preview"]["type"] == "pdf"
|
||||
assert result["preview"]["url"] == "/api/files/test-job/preview.pdf"
|
||||
assert result["preview"]["pageWidthPt"] == 2772
|
||||
assert result["preview"]["pageHeightPt"] == 1961
|
||||
assert result["fields"][0]["text"] == "食品名称:天问礼品粽"
|
||||
assert result["fields"][0]["validation_status"] == "matched"
|
||||
assert result["fields"][0]["x0_pt"] == 704.0
|
||||
assert any(field["validation_status"] == "unmatched" for field in result["fields"])
|
||||
assert (OUTPUT_DIR / "preview.pdf").exists()
|
||||
|
||||
|
||||
def test_parse_preview_with_mineru_reads_key_from_env_file(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
captured: dict[str, str] = {}
|
||||
|
||||
class FakeMineruClient:
|
||||
def __init__(self, api_key: str) -> None:
|
||||
captured["api_key"] = api_key
|
||||
|
||||
def parse_pdf(self, preview_path: Path, output_dir: Path) -> dict:
|
||||
return {"preview_path": str(preview_path), "output_dir": str(output_dir)}
|
||||
|
||||
env_file = tmp_path / ".env"
|
||||
env_file.write_text("MINERU_API_KEY=from-env-file\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.delenv("MINERU_API_KEY", raising=False)
|
||||
monkeypatch.setattr(pipeline, "ENV_FILE_CANDIDATES", (env_file,))
|
||||
monkeypatch.setattr(pipeline, "MineruClient", FakeMineruClient)
|
||||
|
||||
preview_path = tmp_path / "preview.pdf"
|
||||
preview_path.write_bytes(b"%PDF-1.7")
|
||||
|
||||
result = pipeline._parse_preview_with_mineru(preview_path, tmp_path)
|
||||
|
||||
assert captured["api_key"] == "from-env-file"
|
||||
assert result["preview_path"] == str(preview_path)
|
||||
32
tests/backend/test_text_validation.py
Normal file
@@ -0,0 +1,32 @@
|
||||
from backend.app.text_validation import classify_text_block, normalize_text, validate_field_against_word
|
||||
|
||||
|
||||
def test_normalize_text_collapses_whitespace_and_full_width_punctuation() -> None:
|
||||
raw = " 食品生产许可证编号:\nSC11133042404806 "
|
||||
|
||||
assert normalize_text(raw) == "食品生产许可证编号:SC11133042404806"
|
||||
|
||||
|
||||
def test_classify_text_block_marks_garbled_text() -> None:
|
||||
assert classify_text_block("<EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><>-<2D><>") == "empty_or_garbled"
|
||||
assert classify_text_block(" ") == "empty_or_garbled"
|
||||
assert classify_text_block("食品名称:天问礼品粽") == "candidate"
|
||||
|
||||
|
||||
def test_validate_field_against_word_returns_excerpt_for_match() -> None:
|
||||
word_text = "电话:0573-86981666 食品生产许可证编号:SC11133042404806 产品标准代号:GB/T 46259"
|
||||
|
||||
result = validate_field_against_word("食品生产许可证编号:SC11133042404806", word_text)
|
||||
|
||||
assert result.status == "matched"
|
||||
assert result.reason == "normalized text found in Word content"
|
||||
assert "SC11133042404806" in (result.matched_excerpt or "")
|
||||
|
||||
|
||||
def test_validate_field_against_word_rejects_missing_text() -> None:
|
||||
word_text = "产品标准代号:GB/T 46259"
|
||||
|
||||
result = validate_field_against_word("食品生产许可证编号:SC11133042404806", word_text)
|
||||
|
||||
assert result.status == "unmatched"
|
||||
assert result.matched_excerpt is None
|
||||
3782
【2026-04-09】端午 - 背标 - 天问.ai
Normal file
146
【2026-04-09】端午-背标-天问.regions.json
Normal file
@@ -0,0 +1,146 @@
|
||||
[
|
||||
{
|
||||
"region_id": 1,
|
||||
"label": "header_basic",
|
||||
"bbox": {
|
||||
"x0_pt": 16.439999999999998,
|
||||
"top_pt": 28.29,
|
||||
"x1_pt": 677.14,
|
||||
"bottom_pt": 111.13
|
||||
},
|
||||
"block_count": 5,
|
||||
"sample_text": "品名:天问礼品粽背标 盒型:/ 成品尺寸: 材质:"
|
||||
},
|
||||
{
|
||||
"region_id": 2,
|
||||
"label": "header_rules",
|
||||
"bbox": {
|
||||
"x0_pt": 585.17,
|
||||
"top_pt": 28.08,
|
||||
"x1_pt": 1198.74,
|
||||
"bottom_pt": 107.5
|
||||
},
|
||||
"block_count": 7,
|
||||
"sample_text": "日期:<E69C9F><EFBC9A><EFBFBD><EFBFBD>-<2D><>-<2D><> 设计比例:<E4BE8B>:<3A> 字体大小规范 常规内容最小高度:<3A>mm"
|
||||
},
|
||||
{
|
||||
"region_id": 3,
|
||||
"label": "workflow_notes",
|
||||
"bbox": {
|
||||
"x0_pt": 1184.47,
|
||||
"top_pt": 95.47,
|
||||
"x1_pt": 1361.05,
|
||||
"bottom_pt": 501.32
|
||||
},
|
||||
"block_count": 10,
|
||||
"sample_text": "诸老大产品包装签稿流程 设计师 字体、索材、元素可商用,没有涉及 侵权,对最终的整体视觉呈现负责。"
|
||||
},
|
||||
{
|
||||
"region_id": 4,
|
||||
"label": "version_info",
|
||||
"bbox": {
|
||||
"x0_pt": 118.02000000000001,
|
||||
"top_pt": 144.61,
|
||||
"x1_pt": 226.92,
|
||||
"bottom_pt": 182.38
|
||||
},
|
||||
"block_count": 1,
|
||||
"sample_text": "版本号(Version"
|
||||
},
|
||||
{
|
||||
"region_id": 5,
|
||||
"label": "upper_main",
|
||||
"bbox": {
|
||||
"x0_pt": 254.8,
|
||||
"top_pt": 299.7,
|
||||
"x1_pt": 847.25,
|
||||
"bottom_pt": 556.2
|
||||
},
|
||||
"block_count": 140,
|
||||
"sample_text": "食品名称:天问礼品粽(粽子/草木灰咸鸭蛋) 品种 产品类别 规格"
|
||||
},
|
||||
{
|
||||
"region_id": 6,
|
||||
"label": "cooking_box",
|
||||
"bbox": {
|
||||
"x0_pt": 514.44,
|
||||
"top_pt": 572.33,
|
||||
"x1_pt": 858.42,
|
||||
"bottom_pt": 669.02
|
||||
},
|
||||
"block_count": 6,
|
||||
"sample_text": "粽子食用方法:水煮加热法 ➀拆除包装后,将粽子放入沸水中(水量浸没粽子); ➁常压下,待水沸腾<E6B2B8><E885BE>-<2D><>分钟,直至粽子煮透; ➂去除扎线,剥开粽叶即可食用;"
|
||||
},
|
||||
{
|
||||
"region_id": 7,
|
||||
"label": "seal_mark",
|
||||
"bbox": {
|
||||
"x0_pt": 660.74,
|
||||
"top_pt": 549.39,
|
||||
"x1_pt": 789.99,
|
||||
"bottom_pt": 659.0
|
||||
},
|
||||
"block_count": 23,
|
||||
"sample_text": "每<><E6AF8F><EFBFBD>克(g) NRV% <20><>% 能量"
|
||||
},
|
||||
{
|
||||
"region_id": 8,
|
||||
"label": "nutrition_table",
|
||||
"bbox": {
|
||||
"x0_pt": 252.74,
|
||||
"top_pt": 506.22,
|
||||
"x1_pt": 962.71,
|
||||
"bottom_pt": 685.32
|
||||
},
|
||||
"block_count": 87,
|
||||
"sample_text": "<22>% <20>% <20>毫克(mg) <20>毫克(mg)"
|
||||
},
|
||||
{
|
||||
"region_id": 9,
|
||||
"label": "lower_left_details",
|
||||
"bbox": {
|
||||
"x0_pt": 254.8,
|
||||
"top_pt": 656.14,
|
||||
"x1_pt": 494.34,
|
||||
"bottom_pt": 717.63
|
||||
},
|
||||
"block_count": 7,
|
||||
"sample_text": "产品标准代号:Q/QYDP <20><><EFBFBD><EFBFBD>S 空咸鸭蛋漏气、胀袋、变质请勿食用 食用方法:本品为熟制品,开袋去壳即食。冬季加热出油后"
|
||||
},
|
||||
{
|
||||
"region_id": 10,
|
||||
"label": "date_box",
|
||||
"bbox": {
|
||||
"x0_pt": 664.63,
|
||||
"top_pt": 666.79,
|
||||
"x1_pt": 760.53,
|
||||
"bottom_pt": 726.25
|
||||
},
|
||||
"block_count": 2,
|
||||
"sample_text": "生产日期: 保质期到期日:"
|
||||
},
|
||||
{
|
||||
"region_id": 11,
|
||||
"label": "bottom_title",
|
||||
"bbox": {
|
||||
"x0_pt": 257.2,
|
||||
"top_pt": 718.22,
|
||||
"x1_pt": 968.85,
|
||||
"bottom_pt": 765.91
|
||||
},
|
||||
"block_count": 1,
|
||||
"sample_text": "食品名称:卜居礼品粽(粽子/草木灰咸鸭蛋/低糖原味绿豆糕)净含量:<E9878F>.<2E><>千克(粽子:<E5AD90><EFBC9A><EFBFBD>克×<E5858B>+<EFBFBD><EFBC8B><EFBFBD>克×<E5858B>,草木灰咸鸭蛋:<E89B8B><EFBC9A>克×<E5858B>,低糖原味绿豆糕:<E7B395><EFBC9A>克×<E5858B>)"
|
||||
},
|
||||
{
|
||||
"region_id": 12,
|
||||
"label": "unassigned",
|
||||
"bbox": {
|
||||
"x0_pt": 17.490000000000002,
|
||||
"top_pt": 71.23,
|
||||
"x1_pt": 1068.34,
|
||||
"bottom_pt": 533.74
|
||||
},
|
||||
"block_count": 70,
|
||||
"sample_text": "字体高度:><>mm 字体高度:><>mm <20><><EFBFBD>mm(宽)×<><C397><EFBFBD>mm(高) /"
|
||||
}
|
||||
]
|
||||