#!/usr/bin/env python3 from __future__ import annotations import argparse import json import re from pathlib import Path def detect_kind(path: Path) -> str: with path.open("rb") as f: head = f.read(4096) if b"%PDF-" in head or b"%%PDF" in head: return "pdf-compatible-ai" return "eps-like-ai" def parse_pdf_ai(path: Path) -> dict: from pypdf import PdfReader reader = PdfReader(str(path)) page_sizes = [] image_count = 0 texts = [] for page in reader.pages: box = page.mediabox page_sizes.append( { "width_pt": round(float(box.width), 2), "height_pt": round(float(box.height), 2), } ) try: text = (page.extract_text() or "").strip() if text: texts.append(text[:2000]) except Exception as exc: texts.append(f"[extract_text failed: {type(exc).__name__}]") try: images = getattr(page, "images", []) image_count += len(list(images)) except Exception: pass metadata = {} for key, value in (reader.metadata or {}).items(): metadata[str(key)] = str(value) return { "kind": "pdf-compatible-ai", "pages": len(reader.pages), "page_sizes": page_sizes, "metadata": metadata, "image_count": image_count, "text_samples": texts, } def parse_eps_like_ai(path: Path) -> dict: raw = path.read_bytes() text = raw.decode("latin1", errors="ignore") bbox = re.search(r"%%BoundingBox:\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)", text) strings = re.findall(r"\(([^()\r\n]{2,200})\)", text) return { "kind": "eps-like-ai", "bounding_box": tuple(map(int, bbox.groups())) if bbox else None, "text_samples": strings[:50], } def main() -> None: parser = argparse.ArgumentParser(description="Parse basic info from an Adobe Illustrator .ai file.") parser.add_argument("file", type=Path, help="Path to the .ai file") parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output") args = parser.parse_args() path = args.file.expanduser().resolve() if not path.exists(): raise SystemExit(f"File not found: {path}") kind = detect_kind(path) result = { "file": str(path), "size_bytes": path.stat().st_size, } if kind == "pdf-compatible-ai": result.update(parse_pdf_ai(path)) else: result.update(parse_eps_like_ai(path)) if args.pretty: print(json.dumps(result, ensure_ascii=False, indent=2)) else: print(json.dumps(result, ensure_ascii=False)) if __name__ == "__main__": main()