Initial commit: 包装审核 POC、Docker 与前后端
Made-with: Cursor
This commit is contained in:
108
scripts/detect_layout_boxes.py
Normal file
108
scripts/detect_layout_boxes.py
Normal file
@@ -0,0 +1,108 @@
|
||||
#!/usr/bin/env python3
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import shutil
|
||||
import subprocess
|
||||
from pathlib import Path
|
||||
|
||||
import cv2
|
||||
|
||||
from backend.app.layout_cv import process_image
|
||||
|
||||
|
||||
def detect_kind(path: Path) -> str:
|
||||
suffix = path.suffix.lower()
|
||||
if suffix in {".jpg", ".jpeg", ".png", ".webp", ".bmp", ".tif", ".tiff"}:
|
||||
return "image"
|
||||
if suffix == ".pdf":
|
||||
return "pdf"
|
||||
if suffix == ".ai":
|
||||
with path.open("rb") as file:
|
||||
head = file.read(4096)
|
||||
if b"%PDF-" in head or b"%%PDF" in head:
|
||||
return "pdf-compatible-ai"
|
||||
return "eps-like-ai"
|
||||
raise ValueError(f"Unsupported input type: {path.suffix}")
|
||||
|
||||
|
||||
def ensure_raster_image(source: Path, workdir: Path) -> Path:
|
||||
kind = detect_kind(source)
|
||||
if kind == "image":
|
||||
return source
|
||||
|
||||
pdf_path = source
|
||||
if kind in {"pdf-compatible-ai", "eps-like-ai"}:
|
||||
converted_dir = workdir / "converted"
|
||||
converted_dir.mkdir(parents=True, exist_ok=True)
|
||||
command = [
|
||||
"soffice",
|
||||
"--headless",
|
||||
"--convert-to",
|
||||
"pdf",
|
||||
"--outdir",
|
||||
str(converted_dir),
|
||||
str(source),
|
||||
]
|
||||
completed = subprocess.run(command, capture_output=True, text=True, check=False)
|
||||
if completed.returncode != 0:
|
||||
raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to convert AI to PDF")
|
||||
pdf_path = converted_dir / f"{source.stem}.pdf"
|
||||
|
||||
image_dir = workdir / "rasterized"
|
||||
image_dir.mkdir(parents=True, exist_ok=True)
|
||||
image_path = image_dir / f"{source.stem}.png"
|
||||
|
||||
if shutil.which("pdftoppm"):
|
||||
command = ["pdftoppm", "-png", "-singlefile", "-r", "220", str(pdf_path), str(image_path.with_suffix(""))]
|
||||
completed = subprocess.run(command, capture_output=True, text=True, check=False)
|
||||
if completed.returncode != 0:
|
||||
raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to rasterize PDF")
|
||||
return image_path
|
||||
|
||||
if shutil.which("magick"):
|
||||
command = ["magick", "-density", "220", str(pdf_path), "-quality", "100", str(image_path)]
|
||||
completed = subprocess.run(command, capture_output=True, text=True, check=False)
|
||||
if completed.returncode != 0:
|
||||
raise RuntimeError(completed.stderr.strip() or completed.stdout.strip() or "failed to rasterize PDF")
|
||||
return image_path
|
||||
|
||||
raise RuntimeError("Neither pdftoppm nor magick is available for PDF rasterization")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(description="Detect text-line and outer rectangle boxes from a label image.")
|
||||
parser.add_argument("input", type=Path, help="Path to an image, PDF, or AI file")
|
||||
parser.add_argument("--output-dir", type=Path, default=Path("outputs/layout_boxes"), help="Directory for preview and JSON")
|
||||
args = parser.parse_args()
|
||||
|
||||
source = args.input.expanduser().resolve()
|
||||
output_dir = args.output_dir.expanduser().resolve()
|
||||
output_dir.mkdir(parents=True, exist_ok=True)
|
||||
|
||||
raster_path = ensure_raster_image(source, output_dir)
|
||||
annotated, boxes = process_image(raster_path)
|
||||
|
||||
preview_path = output_dir / f"{source.stem}.boxed.png"
|
||||
json_path = output_dir / f"{source.stem}.boxes.json"
|
||||
cv2.imwrite(str(preview_path), annotated)
|
||||
json_path.write_text(
|
||||
json.dumps(
|
||||
{
|
||||
"source": str(source),
|
||||
"raster_path": str(raster_path),
|
||||
"preview_path": str(preview_path),
|
||||
"boxes": [box.to_dict() for box in boxes],
|
||||
},
|
||||
ensure_ascii=False,
|
||||
indent=2,
|
||||
),
|
||||
encoding="utf-8",
|
||||
)
|
||||
|
||||
print(json.dumps({"preview_path": str(preview_path), "json_path": str(json_path)}, ensure_ascii=False, indent=2))
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
109
scripts/detect_regions.py
Normal file
109
scripts/detect_regions.py
Normal file
@@ -0,0 +1,109 @@
|
||||
#!/usr/bin/env python3
|
||||
"""Standalone CLI: detect main regions in a label image via Qwen VL, then crop.
|
||||
|
||||
Usage
|
||||
-----
|
||||
python scripts/detect_regions.py <image_path> [--model MODEL] [--out OUT_DIR] [--key KEY]
|
||||
|
||||
Example
|
||||
-------
|
||||
python scripts/detect_regions.py data/sample.png
|
||||
python scripts/detect_regions.py data/sample.png --model qwen2.5-vl-72b-instruct
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import argparse
|
||||
import json
|
||||
import logging
|
||||
import sys
|
||||
from pathlib import Path
|
||||
|
||||
# ── make sure the project root is on sys.path ──────────────────────────────
|
||||
ROOT = Path(__file__).resolve().parent.parent
|
||||
sys.path.insert(0, str(ROOT))
|
||||
|
||||
logging.basicConfig(
|
||||
level=logging.INFO,
|
||||
format="%(asctime)s [%(levelname)s] %(name)s – %(message)s",
|
||||
)
|
||||
logger = logging.getLogger("detect_regions")
|
||||
|
||||
|
||||
def main() -> None:
|
||||
parser = argparse.ArgumentParser(
|
||||
description="Detect & crop main regions in a label image using Qwen VL"
|
||||
)
|
||||
parser.add_argument("image", help="Path to input image (PNG/JPEG)")
|
||||
parser.add_argument(
|
||||
"--model",
|
||||
default="qwen2.5-vl-7b-instruct",
|
||||
help="DashScope model ID (default: qwen2.5-vl-7b-instruct)",
|
||||
)
|
||||
parser.add_argument(
|
||||
"--out",
|
||||
default=None,
|
||||
help="Output directory for cropped regions (default: <image_dir>/regions/)",
|
||||
)
|
||||
parser.add_argument("--key", default=None, help="DASHSCOPE_API_KEY (overrides env)")
|
||||
parser.add_argument(
|
||||
"--api-max-side", type=int, default=1024,
|
||||
help="Max side length (px) of image sent to API (default: 1024). "
|
||||
"Crop is always done on the original full-res file.",
|
||||
)
|
||||
parser.add_argument("--no-crop", action="store_true", help="Only print coords, don't crop")
|
||||
parser.add_argument("--split", action="store_true",
|
||||
help="Save each detected region separately (default: merge into one)")
|
||||
args = parser.parse_args()
|
||||
|
||||
image_path = Path(args.image).expanduser().resolve()
|
||||
if not image_path.exists():
|
||||
parser.error(f"Image not found: {image_path}")
|
||||
|
||||
output_dir = Path(args.out).expanduser().resolve() if args.out else image_path.parent / "regions"
|
||||
|
||||
from backend.app.region_detector import detect_regions, crop_and_save, merge_regions
|
||||
|
||||
logger.info("Image: %s", image_path)
|
||||
logger.info("Model: %s", args.model)
|
||||
|
||||
regions, raw_response = detect_regions(
|
||||
image_path,
|
||||
api_key=args.key or None,
|
||||
model=args.model,
|
||||
api_max_side=args.api_max_side,
|
||||
)
|
||||
|
||||
if not regions:
|
||||
logger.error("No regions detected. Raw model response:\n%s", raw_response)
|
||||
sys.exit(1)
|
||||
|
||||
print("\n── Detected regions ──────────────────────────────────────")
|
||||
for i, r in enumerate(regions, 1):
|
||||
print(f" {i:02d}. [{r.label}] bbox=({r.x1},{r.y1})-({r.x2},{r.y2}) "
|
||||
f"size={r.width}×{r.height}px")
|
||||
print()
|
||||
|
||||
coords_json = [
|
||||
{"label": r.label, "bbox": [r.x1, r.y1, r.x2, r.y2]}
|
||||
for r in regions
|
||||
]
|
||||
print("JSON:")
|
||||
print(json.dumps(coords_json, ensure_ascii=False, indent=2))
|
||||
|
||||
if not args.no_crop:
|
||||
if args.split:
|
||||
save_regions = regions
|
||||
else:
|
||||
merged = merge_regions(regions)
|
||||
save_regions = [merged]
|
||||
print(f"\n── Merged bbox: ({merged.x1},{merged.y1})-({merged.x2},{merged.y2})"
|
||||
f" size={merged.width}×{merged.height}px")
|
||||
|
||||
results = crop_and_save(image_path, save_regions, output_dir)
|
||||
print(f"\n── Cropped file(s) saved to: {output_dir} ──")
|
||||
for item in results:
|
||||
print(f" • {Path(item['path']).name} ← {item['label']}")
|
||||
|
||||
|
||||
if __name__ == "__main__":
|
||||
main()
|
||||
7
scripts/start_backend.sh
Executable file
7
scripts/start_backend.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
|
||||
cd "$ROOT_DIR"
|
||||
python3 -m uvicorn backend.app.main:app --host 127.0.0.1 --port 8010 --reload
|
||||
80
scripts/start_dev.sh
Executable file
80
scripts/start_dev.sh
Executable file
@@ -0,0 +1,80 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
BACKEND_PORT="${BACKEND_PORT:-8010}"
|
||||
FRONTEND_PORT="${FRONTEND_PORT:-5173}"
|
||||
|
||||
port_pid() {
|
||||
lsof -tiTCP:"$1" -sTCP:LISTEN 2>/dev/null | head -n 1 || true
|
||||
}
|
||||
|
||||
port_cmd() {
|
||||
local pid
|
||||
pid="$(port_pid "$1")"
|
||||
if [[ -z "$pid" ]]; then
|
||||
return 0
|
||||
fi
|
||||
ps -p "$pid" -o command= 2>/dev/null || true
|
||||
}
|
||||
|
||||
print_port_conflict() {
|
||||
local port="$1"
|
||||
local pid
|
||||
local cmd
|
||||
pid="$(port_pid "$port")"
|
||||
cmd="$(port_cmd "$port")"
|
||||
echo "Port $port is already in use." >&2
|
||||
if [[ -n "$pid" ]]; then
|
||||
echo " PID: $pid" >&2
|
||||
fi
|
||||
if [[ -n "$cmd" ]]; then
|
||||
echo " CMD: $cmd" >&2
|
||||
fi
|
||||
}
|
||||
|
||||
cleanup() {
|
||||
if [[ -n "${BACKEND_PID:-}" ]]; then
|
||||
kill "$BACKEND_PID" 2>/dev/null || true
|
||||
fi
|
||||
if [[ -n "${FRONTEND_PID:-}" ]]; then
|
||||
kill "$FRONTEND_PID" 2>/dev/null || true
|
||||
fi
|
||||
}
|
||||
|
||||
trap cleanup EXIT INT TERM
|
||||
|
||||
if [[ -n "$(port_pid "$BACKEND_PORT")" ]]; then
|
||||
print_port_conflict "$BACKEND_PORT"
|
||||
echo "Set BACKEND_PORT to another port or stop the existing process first." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -n "$(port_pid "$FRONTEND_PORT")" ]]; then
|
||||
print_port_conflict "$FRONTEND_PORT"
|
||||
echo "Set FRONTEND_PORT to another port or stop the existing process first." >&2
|
||||
exit 1
|
||||
fi
|
||||
|
||||
cd "$ROOT_DIR"
|
||||
# Prefer miniconda Python (has all project deps); fall back to conda/system python3
|
||||
PYTHON="${CONDA_PREFIX:-}/bin/python"
|
||||
if [[ ! -x "$PYTHON" ]]; then
|
||||
PYTHON="$(command -v /Users/icemilk/miniconda3/bin/python 2>/dev/null || command -v python3)"
|
||||
fi
|
||||
"$PYTHON" -m uvicorn backend.app.main:app --host 127.0.0.1 --port "$BACKEND_PORT" --reload \
|
||||
> >(sed 's/^/[backend] /') \
|
||||
2> >(sed 's/^/[backend] /' >&2) &
|
||||
BACKEND_PID=$!
|
||||
|
||||
cd "$ROOT_DIR/frontend"
|
||||
npm run dev -- --host 127.0.0.1 --port "$FRONTEND_PORT" \
|
||||
> >(sed 's/^/[frontend] /') \
|
||||
2> >(sed 's/^/[frontend] /' >&2) &
|
||||
FRONTEND_PID=$!
|
||||
|
||||
echo "Backend: http://127.0.0.1:${BACKEND_PORT}"
|
||||
echo "Frontend: http://127.0.0.1:${FRONTEND_PORT}"
|
||||
echo "Press Ctrl+C to stop both services."
|
||||
|
||||
wait "$BACKEND_PID" "$FRONTEND_PID"
|
||||
7
scripts/start_frontend.sh
Executable file
7
scripts/start_frontend.sh
Executable file
@@ -0,0 +1,7 @@
|
||||
#!/usr/bin/env bash
|
||||
set -euo pipefail
|
||||
|
||||
ROOT_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")/.." && pwd)"
|
||||
|
||||
cd "$ROOT_DIR/frontend"
|
||||
npm run dev -- --host 127.0.0.1 --port 5173
|
||||
Reference in New Issue
Block a user