Initial commit: 包装审核 POC、Docker 与前后端
Made-with: Cursor
This commit is contained in:
104
parse_ai.py
Normal file
104
parse_ai.py
Normal file
@@ -0,0 +1,104 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import re
|
||||
from pathlib import Path
|
||||
|
||||
|
||||
def detect_kind(path: Path) -> str:
|
||||
with path.open("rb") as f:
|
||||
head = f.read(4096)
|
||||
if b"%PDF-" in head or b"%%PDF" in head:
|
||||
return "pdf-compatible-ai"
|
||||
return "eps-like-ai"
|
||||
|
||||
|
||||
def parse_pdf_ai(path: Path) -> dict:
|
||||
from pypdf import PdfReader
|
||||
|
||||
reader = PdfReader(str(path))
|
||||
page_sizes = []
|
||||
image_count = 0
|
||||
texts = []
|
||||
|
||||
for page in reader.pages:
|
||||
box = page.mediabox
|
||||
page_sizes.append(
|
||||
{
|
||||
"width_pt": round(float(box.width), 2),
|
||||
"height_pt": round(float(box.height), 2),
|
||||
}
|
||||
)
|
||||
|
||||
try:
|
||||
text = (page.extract_text() or "").strip()
|
||||
if text:
|
||||
texts.append(text[:2000])
|
||||
except Exception as exc:
|
||||
texts.append(f"[extract_text failed: {type(exc).__name__}]")
|
||||
|
||||
try:
|
||||
images = getattr(page, "images", [])
|
||||
image_count += len(list(images))
|
||||
except Exception:
|
||||
pass
|
||||
|
||||
metadata = {}
|
||||
for key, value in (reader.metadata or {}).items():
|
||||
metadata[str(key)] = str(value)
|
||||
|
||||
return {
|
||||
"kind": "pdf-compatible-ai",
|
||||
"pages": len(reader.pages),
|
||||
"page_sizes": page_sizes,
|
||||
"metadata": metadata,
|
||||
"image_count": image_count,
|
||||
"text_samples": texts,
|
||||
}
|
||||
|
||||
|
||||
def parse_eps_like_ai(path: Path) -> dict:
|
||||
raw = path.read_bytes()
|
||||
text = raw.decode("latin1", errors="ignore")
|
||||
|
||||
bbox = re.search(r"%%BoundingBox:\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)\s+([-\d]+)", text)
|
||||
strings = re.findall(r"\(([^()\r\n]{2,200})\)", text)
|
||||
|
||||
return {
|
||||
"kind": "eps-like-ai",
|
||||
"bounding_box": tuple(map(int, bbox.groups())) if bbox else None,
|
||||
"text_samples": strings[:50],
|
||||
}
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Parse basic info from an Adobe Illustrator .ai file.")
|
||||
parser.add_argument("file", type=Path, help="Path to the .ai file")
|
||||
parser.add_argument("--pretty", action="store_true", help="Pretty-print JSON output")
|
||||
args = parser.parse_args()
|
||||
|
||||
path = args.file.expanduser().resolve()
|
||||
if not path.exists():
|
||||
raise SystemExit(f"File not found: {path}")
|
||||
|
||||
kind = detect_kind(path)
|
||||
result = {
|
||||
"file": str(path),
|
||||
"size_bytes": path.stat().st_size,
|
||||
}
|
||||
|
||||
if kind == "pdf-compatible-ai":
|
||||
result.update(parse_pdf_ai(path))
|
||||
else:
|
||||
result.update(parse_eps_like_ai(path))
|
||||
|
||||
if args.pretty:
|
||||
print(json.dumps(result, ensure_ascii=False, indent=2))
|
||||
else:
|
||||
print(json.dumps(result, ensure_ascii=False))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
Reference in New Issue
Block a user