#!/usr/bin/env python3
from __future__ import annotations

import argparse
import json
import re
from pathlib import Path


def detect_kind(path: Path) -> str:
    with path.open("rb") as f:
        head = f.read(4096)
    if b"%PDF-" in head or b"%%PDF" in head:
        return "pdf-compatible-ai"
    return "eps-like-ai"


def parse_pdf_ai(path: Path) -> dict:
    from pypdf import PdfReader

    reader = PdfReader(str(path))
    page_sizes = []
    image_count = 0
    texts = []

    for page in reader.pages:
        box = page.mediabox
        page_sizes.append(
            {
                "width_pt": round(float(box.width), 2),
                "height_pt": round(float(box.height), 2),
            }
        )

        try:
            text = (page.extract_text() or "").strip()
            if text:
                texts.append(text[:2000])
        except Exception as exc:
            texts.append(f"[extract_text failed: {type(exc).__name__}]")

        try:
            images = getattr(page, "images", [])
            image_count += len(list(images))
        except Exception:
            pass

    metadata = {}
    for key, value in (reader.metadata or {}).items():
        metadata[str(key)] = str(value)

    return {
        "kind": "pdf-compatible-ai",
        "pages": len(reader.pages),
        "page_sizes": page_sizes,
        "metadata": metadata,
        "image_count": image_count,
        "text_samples": texts,
    }


def parse_eps_like_ai(path: Path) -> dict:
    raw = path.read_bytes()
    text = raw.decode("latin1", errors="ignore")

    bbox = re.search(r"%%BoundingBox:\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)", text)
    strings = re.findall(r"\(([^()\r\n]{2,200})\)", text)

    return {
        "kind": "eps-like-ai",
        "bounding_box": tuple(map(int, bbox.groups())) if bbox else None,
        "text_samples": strings[:50],
    }


def main() -> None:
    parser = argparse.ArgumentParser(description="Parse basic info from an Adobe Illustrator .ai file.")
    parser.add_argument("file", type=Path, help="Path to the .ai file")
    parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output")
    args = parser.parse_args()

    path = args.file.expanduser().resolve()
    if not path.exists():
        raise SystemExit(f"File not found: {path}")

    kind = detect_kind(path)
    result = {
        "file": str(path),
        "size_bytes": path.stat().st_size,
    }

    if kind == "pdf-compatible-ai":
        result.update(parse_pdf_ai(path))
    else:
        result.update(parse_eps_like_ai(path))

    if args.pretty:
        print(json.dumps(result, ensure_ascii=False, indent=2))
    else:
        print(json.dumps(result, ensure_ascii=False))


if __name__ == "__main__":
    main()