110 lines
3.8 KiB
Python
110 lines
3.8 KiB
Python
#!/usr/bin/env python3
|
||
"""Standalone CLI: detect main regions in a label image via Qwen VL, then crop.
|
||
|
||
Usage
|
||
-----
|
||
python scripts/detect_regions.py <image_path> [--model MODEL] [--out OUT_DIR] [--key KEY]
|
||
|
||
Example
|
||
-------
|
||
python scripts/detect_regions.py data/sample.png
|
||
python scripts/detect_regions.py data/sample.png --model qwen2.5-vl-72b-instruct
|
||
"""
|
||
from __future__ import annotations
|
||
|
||
import argparse
|
||
import json
|
||
import logging
|
||
import sys
|
||
from pathlib import Path
|
||
|
||
# ── make sure the project root is on sys.path ──────────────────────────────
|
||
ROOT = Path(__file__).resolve().parent.parent
|
||
sys.path.insert(0, str(ROOT))
|
||
|
||
logging.basicConfig(
|
||
level=logging.INFO,
|
||
format="%(asctime)s [%(levelname)s] %(name)s – %(message)s",
|
||
)
|
||
logger = logging.getLogger("detect_regions")
|
||
|
||
|
||
def main() -> None:
|
||
parser = argparse.ArgumentParser(
|
||
description="Detect & crop main regions in a label image using Qwen VL"
|
||
)
|
||
parser.add_argument("image", help="Path to input image (PNG/JPEG)")
|
||
parser.add_argument(
|
||
"--model",
|
||
default="qwen2.5-vl-7b-instruct",
|
||
help="DashScope model ID (default: qwen2.5-vl-7b-instruct)",
|
||
)
|
||
parser.add_argument(
|
||
"--out",
|
||
default=None,
|
||
help="Output directory for cropped regions (default: <image_dir>/regions/)",
|
||
)
|
||
parser.add_argument("--key", default=None, help="DASHSCOPE_API_KEY (overrides env)")
|
||
parser.add_argument(
|
||
"--api-max-side", type=int, default=1024,
|
||
help="Max side length (px) of image sent to API (default: 1024). "
|
||
"Crop is always done on the original full-res file.",
|
||
)
|
||
parser.add_argument("--no-crop", action="store_true", help="Only print coords, don't crop")
|
||
parser.add_argument("--split", action="store_true",
|
||
help="Save each detected region separately (default: merge into one)")
|
||
args = parser.parse_args()
|
||
|
||
image_path = Path(args.image).expanduser().resolve()
|
||
if not image_path.exists():
|
||
parser.error(f"Image not found: {image_path}")
|
||
|
||
output_dir = Path(args.out).expanduser().resolve() if args.out else image_path.parent / "regions"
|
||
|
||
from backend.app.region_detector import detect_regions, crop_and_save, merge_regions
|
||
|
||
logger.info("Image: %s", image_path)
|
||
logger.info("Model: %s", args.model)
|
||
|
||
regions, raw_response = detect_regions(
|
||
image_path,
|
||
api_key=args.key or None,
|
||
model=args.model,
|
||
api_max_side=args.api_max_side,
|
||
)
|
||
|
||
if not regions:
|
||
logger.error("No regions detected. Raw model response:\n%s", raw_response)
|
||
sys.exit(1)
|
||
|
||
print("\n── Detected regions ──────────────────────────────────────")
|
||
for i, r in enumerate(regions, 1):
|
||
print(f" {i:02d}. [{r.label}] bbox=({r.x1},{r.y1})-({r.x2},{r.y2}) "
|
||
f"size={r.width}×{r.height}px")
|
||
print()
|
||
|
||
coords_json = [
|
||
{"label": r.label, "bbox": [r.x1, r.y1, r.x2, r.y2]}
|
||
for r in regions
|
||
]
|
||
print("JSON:")
|
||
print(json.dumps(coords_json, ensure_ascii=False, indent=2))
|
||
|
||
if not args.no_crop:
|
||
if args.split:
|
||
save_regions = regions
|
||
else:
|
||
merged = merge_regions(regions)
|
||
save_regions = [merged]
|
||
print(f"\n── Merged bbox: ({merged.x1},{merged.y1})-({merged.x2},{merged.y2})"
|
||
f" size={merged.width}×{merged.height}px")
|
||
|
||
results = crop_and_save(image_path, save_regions, output_dir)
|
||
print(f"\n── Cropped file(s) saved to: {output_dir} ──")
|
||
for item in results:
|
||
print(f" • {Path(item['path']).name} ← {item['label']}")
|
||
|
||
|
||
if __name__ == "__main__":
|
||
main()
|