ZLD_POC/scripts/detect_layout_boxes.py

#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import shutil
import subprocess
from pathlib import Path

import cv2

from backend.app.layout_cv import process_image


def detect_kind(path: Path) -> str:
    suffix = path.suffix.lower()
    if suffix in {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tif", ".tiff"}:
        return "image"
    if suffix == ".pdf":
        return "pdf"
    if suffix == ".ai":
        with path.open("rb") as file:
            head = file.read(4096)
        if b"%PDF-" in head or b"%%PDF" in head:
            return "pdf-compatible-ai"
        return "eps-like-ai"
    raise ValueError(f"Unsupported input type: {path.suffix}")


def ensure_raster_image(source: Path, workdir: Path) -> Path:
    kind = detect_kind(source)
    if kind == "image":
        return source

    pdf_path = source
    if kind in {"pdf-compatible-ai", "eps-like-ai"}:
        converted_dir = workdir / "converted"
        converted_dir.mkdir(parents=True, exist_ok=True)
        command = [
            "soffice",
            "--headless",
            "--convert-to",
            "pdf",
            "--outdir",
            str(converted_dir),
            str(source),
        ]
        completed = subprocess.run(command, capture_output=True, text=True, check=False)
        if completed.returncode != 0:
            raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to convert AI to PDF")
        pdf_path = converted_dir / f"{source.stem}.pdf"

    image_dir = workdir / "rasterized"
    image_dir.mkdir(parents=True, exist_ok=True)
    image_path = image_dir / f"{source.stem}.png"

    if shutil.which("pdftoppm"):
        command = ["pdftoppm", "-png", "-singlefile", "-r", "220", str(pdf_path), str(image_path.with_suffix(""))]
        completed = subprocess.run(command, capture_output=True, text=True, check=False)
        if completed.returncode != 0:
            raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to rasterize PDF")
        return image_path

    if shutil.which("magick"):
        command = ["magick", "-density", "220", str(pdf_path), "-quality", "100", str(image_path)]
        completed = subprocess.run(command, capture_output=True, text=True, check=False)
        if completed.returncode != 0:
            raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to rasterize PDF")
        return image_path

    raise RuntimeError("Neither pdftoppm nor magick is available for PDF rasterization")


def main() -> None:
    parser = argparse.ArgumentParser(description="Detect text-line and outer rectangle boxes from a label image.")
    parser.add_argument("input", type=Path, help="Path to an image, PDF, or AI file")
    parser.add_argument("--output-dir", type=Path, default=Path("outputs/layout_boxes"), help="Directory for preview and JSON")
    args = parser.parse_args()

    source = args.input.expanduser().resolve()
    output_dir = args.output_dir.expanduser().resolve()
    output_dir.mkdir(parents=True, exist_ok=True)

    raster_path = ensure_raster_image(source, output_dir)
    annotated, boxes = process_image(raster_path)

    preview_path = output_dir / f"{source.stem}.boxed.png"
    json_path = output_dir / f"{source.stem}.boxes.json"
    cv2.imwrite(str(preview_path), annotated)
    json_path.write_text(
        json.dumps(
            {
                "source": str(source),
                "raster_path": str(raster_path),
                "preview_path": str(preview_path),
                "boxes": [box.to_dict() for box in boxes],
            },
            ensure_ascii=False,
            indent=2,
        ),
        encoding="utf-8",
    )

    print(json.dumps({"preview_path": str(preview_path), "json_path": str(json_path)}, ensure_ascii=False, indent=2))


if __name__ == "__main__":
    main()