Files
ZLD_POC/scripts/detect_regions.py
2026-04-15 17:18:49 +08:00

110 lines
3.8 KiB
Python
Raw Permalink Blame History

This file contains ambiguous Unicode characters
This file contains Unicode characters that might be confused with other characters. If you think that this is intentional, you can safely ignore this warning. Use the Escape button to reveal them.
#!/usr/bin/env python3
"""Standalone CLI: detect main regions in a label image via Qwen VL, then crop.
Usage
-----
python scripts/detect_regions.py <image_path> [--model MODEL] [--out OUT_DIR] [--key KEY]
Example
-------
python scripts/detect_regions.py data/sample.png
python scripts/detect_regions.py data/sample.png --model qwen2.5-vl-72b-instruct
"""
from __future__ import annotations
import argparse
import json
import logging
import sys
from pathlib import Path
# ── make sure the project root is on sys.path ──────────────────────────────
ROOT = Path(__file__).resolve().parent.parent
sys.path.insert(0, str(ROOT))
logging.basicConfig(
level=logging.INFO,
format="%(asctime)s [%(levelname)s] %(name)s %(message)s",
)
logger = logging.getLogger("detect_regions")
def main() -> None:
parser = argparse.ArgumentParser(
description="Detect & crop main regions in a label image using Qwen VL"
)
parser.add_argument("image", help="Path to input image (PNG/JPEG)")
parser.add_argument(
"--model",
default="qwen2.5-vl-7b-instruct",
help="DashScope model ID (default: qwen2.5-vl-7b-instruct)",
)
parser.add_argument(
"--out",
default=None,
help="Output directory for cropped regions (default: <image_dir>/regions/)",
)
parser.add_argument("--key", default=None, help="DASHSCOPE_API_KEY (overrides env)")
parser.add_argument(
"--api-max-side", type=int, default=1024,
help="Max side length (px) of image sent to API (default: 1024). "
"Crop is always done on the original full-res file.",
)
parser.add_argument("--no-crop", action="store_true", help="Only print coords, don't crop")
parser.add_argument("--split", action="store_true",
help="Save each detected region separately (default: merge into one)")
args = parser.parse_args()
image_path = Path(args.image).expanduser().resolve()
if not image_path.exists():
parser.error(f"Image not found: {image_path}")
output_dir = Path(args.out).expanduser().resolve() if args.out else image_path.parent / "regions"
from backend.app.region_detector import detect_regions, crop_and_save, merge_regions
logger.info("Image: %s", image_path)
logger.info("Model: %s", args.model)
regions, raw_response = detect_regions(
image_path,
api_key=args.key or None,
model=args.model,
api_max_side=args.api_max_side,
)
if not regions:
logger.error("No regions detected. Raw model response:\n%s", raw_response)
sys.exit(1)
print("\n── Detected regions ──────────────────────────────────────")
for i, r in enumerate(regions, 1):
print(f" {i:02d}. [{r.label}] bbox=({r.x1},{r.y1})-({r.x2},{r.y2}) "
f"size={r.width}×{r.height}px")
print()
coords_json = [
{"label": r.label, "bbox": [r.x1, r.y1, r.x2, r.y2]}
for r in regions
]
print("JSON:")
print(json.dumps(coords_json, ensure_ascii=False, indent=2))
if not args.no_crop:
if args.split:
save_regions = regions
else:
merged = merge_regions(regions)
save_regions = [merged]
print(f"\n── Merged bbox: ({merged.x1},{merged.y1})-({merged.x2},{merged.y2})"
f" size={merged.width}×{merged.height}px")
results = crop_and_save(image_path, save_regions, output_dir)
print(f"\n── Cropped file(s) saved to: {output_dir} ──")
for item in results:
print(f"{Path(item['path']).name}{item['label']}")
if __name__ == "__main__":
main()