#!/usr/bin/env python3 from __future__ import annotations import json import re from pathlib import Path from PIL import Image, ImageDraw, ImageFont WORKDIR = Path("/Users/icemilk/Workspace/zld_POC") TEXT_BLOCKS = WORKDIR / "【2026-04-09】端午-背标-天问.text_blocks.json" IMAGE_PATH = WORKDIR / "1.jpg" OUT_IMAGE = WORKDIR / "【2026-04-09】端午-背标-天问.region_overlay.png" OUT_JSON = WORKDIR / "【2026-04-09】端午-背标-天问.regions.json" PAGE_WIDTH_PT = 1363.4 PAGE_HEIGHT_PT = 942.06 def load_blocks() -> list[dict]: return json.loads(TEXT_BLOCKS.read_text(encoding="utf-8")) def overlaps(a: tuple[float, float, float, float], b: tuple[float, float, float, float]) -> bool: ax0, ay0, ax1, ay1 = a bx0, by0, bx1, by1 = b return not (ax1 < bx0 or bx1 < ax0 or ay1 < by0 or by1 < ay0) def expanded_box(block: dict, pad_x: float = 24.0, pad_y: float = 18.0) -> tuple[float, float, float, float]: return ( block["x0_pt"] - pad_x, block["top_pt"] - pad_y, block["x1_pt"] + pad_x, block["bottom_pt"] + pad_y, ) def region_bbox(blocks: list[dict], margin_x: float = 20.0, margin_y: float = 14.0) -> dict: x0 = min(b["x0_pt"] for b in blocks) - margin_x y0 = min(b["top_pt"] for b in blocks) - margin_y x1 = max(b["x1_pt"] for b in blocks) + margin_x y1 = max(b["bottom_pt"] for b in blocks) + margin_y return {"x0_pt": max(0, x0), "top_pt": max(0, y0), "x1_pt": x1, "bottom_pt": y1} def classify(region: dict) -> str: return region["label"] def to_px(x_pt: float, y_pt: float, img_w: int, img_h: int) -> tuple[int, int]: return ( round(x_pt / PAGE_WIDTH_PT * img_w), round(y_pt / PAGE_HEIGHT_PT * img_h), ) def match_any(text: str, patterns: list[str]) -> bool: return any(p in text for p in patterns) def semantic_groups(blocks: list[dict]) -> list[tuple[str, list[dict]]]: groups: list[tuple[str, list[dict]]] = [] defs = [ ( "header_basic", lambda b: b["top_pt"] < 140 and match_any( b["text"], ["品名", "成品尺寸", "材质", "工艺", "盒型"] ), ), ( "header_rules", lambda b: b["top_pt"] < 140 and match_any( b["text"], ["日期", "设计比例", "字体大小规范", "常规内容最小高度", "净含量最小高度", "条形码"] ), ), ( "workflow_notes", lambda b: b["x0_pt"] > 1180 or match_any(b["text"], ["签稿流程", "设计师", "品控", "安冬梅"]), ), ( "version_info", lambda b: "版本号" in b["text"], ), ( "upper_main", lambda b: 250 <= b["top_pt"] <= 540 and b["x0_pt"] < 820 and not match_any(b["text"], ["营养成分表"]), ), ( "cooking_box", lambda b: 560 <= b["top_pt"] <= 650 and 500 <= b["x0_pt"] <= 680, ), ( "seal_mark", lambda b: 560 <= b["top_pt"] <= 650 and 680 < b["x0_pt"] <= 760, ), ( "nutrition_table", lambda b: 520 <= b["top_pt"] <= 670 and b["x0_pt"] < 960, ), ( "lower_left_details", lambda b: 590 <= b["top_pt"] <= 705 and b["x0_pt"] < 520, ), ( "date_box", lambda b: match_any(b["text"], ["生产日期", "保质期到期日"]) and b["x0_pt"] > 650, ), ( "bottom_title", lambda b: b["top_pt"] > 705 and b["x0_pt"] < 980, ), ] remaining = blocks[:] for label, predicate in defs: matched = [b for b in remaining if predicate(b)] if matched: groups.append((label, matched)) ids = {id(b) for b in matched} remaining = [b for b in remaining if id(b) not in ids] if remaining: # Keep any leftovers visible so we can inspect missed areas. leftovers = [b for b in remaining if re.search(r"\S", b["text"])] if leftovers: groups.append(("unassigned", leftovers)) return groups def build_regions(blocks: list[dict]) -> list[dict]: regions = [] for idx, (label, group) in enumerate(semantic_groups(blocks), start=1): bbox = region_bbox(group) sample = " ".join(b["text"] for b in sorted(group, key=lambda b: (b["top_pt"], b["x0_pt"]))[:4]) region = { "region_id": idx, "label": label, "bbox": bbox, "block_count": len(group), "sample_text": sample[:120], } regions.append(region) return regions def draw_regions(regions: list[dict]) -> None: image = Image.open(IMAGE_PATH).convert("RGBA") draw = ImageDraw.Draw(image, "RGBA") colors = [ (255, 99, 71, 255), (65, 105, 225, 255), (50, 205, 50, 255), (255, 165, 0, 255), (148, 0, 211, 255), (0, 191, 255, 255), (220, 20, 60, 255), (46, 139, 87, 255), ] font = ImageFont.load_default() for i, region in enumerate(regions): color = colors[i % len(colors)] bbox = region["bbox"] x0, y0 = to_px(bbox["x0_pt"], bbox["top_pt"], image.width, image.height) x1, y1 = to_px(bbox["x1_pt"], bbox["bottom_pt"], image.width, image.height) draw.rectangle([x0, y0, x1, y1], outline=color[:3], width=5) tag = f"R{region['region_id']} {region['label']}" tx0 = max(8, x0 + 8) ty0 = max(8, y0 + 8) tw, th = draw.textbbox((tx0, ty0), tag, font=font)[2:] draw.rectangle([tx0 - 4, ty0 - 2, tx0 + tw + 4, ty0 + th + 2], fill=(255, 255, 255, 220)) draw.text((tx0, ty0), tag, fill=(0, 0, 0, 255), font=font) image.save(OUT_IMAGE) def main() -> None: blocks = load_blocks() regions = build_regions(blocks) OUT_JSON.write_text(json.dumps(regions, ensure_ascii=False, indent=2), encoding="utf-8") draw_regions(regions) print(OUT_IMAGE) print(OUT_JSON) print(f"regions={len(regions)}") if __name__ == "__main__": main()