#!/usr/bin/env python3 """Standalone CLI: detect main regions in a label image via Qwen VL, then crop. Usage ----- python scripts/detect_regions.py [--model MODEL] [--out OUT_DIR] [--key KEY] Example ------- python scripts/detect_regions.py data/sample.png python scripts/detect_regions.py data/sample.png --model qwen2.5-vl-72b-instruct """ from __future__ import annotations import argparse import json import logging import sys from pathlib import Path # ── make sure the project root is on sys.path ────────────────────────────── ROOT = Path(__file__).resolve().parent.parent sys.path.insert(0, str(ROOT)) logging.basicConfig( level=logging.INFO, format="%(asctime)s [%(levelname)s] %(name)s – %(message)s", ) logger = logging.getLogger("detect_regions") def main() -> None: parser = argparse.ArgumentParser( description="Detect & crop main regions in a label image using Qwen VL" ) parser.add_argument("image", help="Path to input image (PNG/JPEG)") parser.add_argument( "--model", default="qwen2.5-vl-7b-instruct", help="DashScope model ID (default: qwen2.5-vl-7b-instruct)", ) parser.add_argument( "--out", default=None, help="Output directory for cropped regions (default: /regions/)", ) parser.add_argument("--key", default=None, help="DASHSCOPE_API_KEY (overrides env)") parser.add_argument( "--api-max-side", type=int, default=1024, help="Max side length (px) of image sent to API (default: 1024). " "Crop is always done on the original full-res file.", ) parser.add_argument("--no-crop", action="store_true", help="Only print coords, don't crop") parser.add_argument("--split", action="store_true", help="Save each detected region separately (default: merge into one)") args = parser.parse_args() image_path = Path(args.image).expanduser().resolve() if not image_path.exists(): parser.error(f"Image not found: {image_path}") output_dir = Path(args.out).expanduser().resolve() if args.out else image_path.parent / "regions" from backend.app.region_detector import detect_regions, crop_and_save, merge_regions logger.info("Image: %s", image_path) logger.info("Model: %s", args.model) regions, raw_response = detect_regions( image_path, api_key=args.key or None, model=args.model, api_max_side=args.api_max_side, ) if not regions: logger.error("No regions detected. Raw model response:\n%s", raw_response) sys.exit(1) print("\n── Detected regions ──────────────────────────────────────") for i, r in enumerate(regions, 1): print(f" {i:02d}. [{r.label}] bbox=({r.x1},{r.y1})-({r.x2},{r.y2}) " f"size={r.width}×{r.height}px") print() coords_json = [ {"label": r.label, "bbox": [r.x1, r.y1, r.x2, r.y2]} for r in regions ] print("JSON:") print(json.dumps(coords_json, ensure_ascii=False, indent=2)) if not args.no_crop: if args.split: save_regions = regions else: merged = merge_regions(regions) save_regions = [merged] print(f"\n── Merged bbox: ({merged.x1},{merged.y1})-({merged.x2},{merged.y2})" f" size={merged.width}×{merged.height}px") results = crop_and_save(image_path, save_regions, output_dir) print(f"\n── Cropped file(s) saved to: {output_dir} ──") for item in results: print(f" • {Path(item['path']).name} ← {item['label']}") if __name__ == "__main__": main()