Files
ZLD_POC/scripts/detect_layout_boxes.py
2026-04-15 17:18:49 +08:00

109 lines
3.8 KiB
Python

#!/usr/bin/env python3
from __future__ import annotations
import argparse
import json
import shutil
import subprocess
from pathlib import Path
import cv2
from backend.app.layout_cv import process_image
def detect_kind(path: Path) -> str:
suffix = path.suffix.lower()
if suffix in {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tif", ".tiff"}:
return "image"
if suffix == ".pdf":
return "pdf"
if suffix == ".ai":
with path.open("rb") as file:
head = file.read(4096)
if b"%PDF-" in head or b"%%PDF" in head:
return "pdf-compatible-ai"
return "eps-like-ai"
raise ValueError(f"Unsupported input type: {path.suffix}")
def ensure_raster_image(source: Path, workdir: Path) -> Path:
kind = detect_kind(source)
if kind == "image":
return source
pdf_path = source
if kind in {"pdf-compatible-ai", "eps-like-ai"}:
converted_dir = workdir / "converted"
converted_dir.mkdir(parents=True, exist_ok=True)
command = [
"soffice",
"--headless",
"--convert-to",
"pdf",
"--outdir",
str(converted_dir),
str(source),
]
completed = subprocess.run(command, capture_output=True, text=True, check=False)
if completed.returncode != 0:
raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to convert AI to PDF")
pdf_path = converted_dir / f"{source.stem}.pdf"
image_dir = workdir / "rasterized"
image_dir.mkdir(parents=True, exist_ok=True)
image_path = image_dir / f"{source.stem}.png"
if shutil.which("pdftoppm"):
command = ["pdftoppm", "-png", "-singlefile", "-r", "220", str(pdf_path), str(image_path.with_suffix(""))]
completed = subprocess.run(command, capture_output=True, text=True, check=False)
if completed.returncode != 0:
raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to rasterize PDF")
return image_path
if shutil.which("magick"):
command = ["magick", "-density", "220", str(pdf_path), "-quality", "100", str(image_path)]
completed = subprocess.run(command, capture_output=True, text=True, check=False)
if completed.returncode != 0:
raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to rasterize PDF")
return image_path
raise RuntimeError("Neither pdftoppm nor magick is available for PDF rasterization")
def main() -> None:
parser = argparse.ArgumentParser(description="Detect text-line and outer rectangle boxes from a label image.")
parser.add_argument("input", type=Path, help="Path to an image, PDF, or AI file")
parser.add_argument("--output-dir", type=Path, default=Path("outputs/layout_boxes"), help="Directory for preview and JSON")
args = parser.parse_args()
source = args.input.expanduser().resolve()
output_dir = args.output_dir.expanduser().resolve()
output_dir.mkdir(parents=True, exist_ok=True)
raster_path = ensure_raster_image(source, output_dir)
annotated, boxes = process_image(raster_path)
preview_path = output_dir / f"{source.stem}.boxed.png"
json_path = output_dir / f"{source.stem}.boxes.json"
cv2.imwrite(str(preview_path), annotated)
json_path.write_text(
json.dumps(
{
"source": str(source),
"raster_path": str(raster_path),
"preview_path": str(preview_path),
"boxes": [box.to_dict() for box in boxes],
},
ensure_ascii=False,
indent=2,
),
encoding="utf-8",
)
print(json.dumps({"preview_path": str(preview_path), "json_path": str(json_path)}, ensure_ascii=False, indent=2))
if __name__ == "__main__":
main()