Initial commit: 包装审核 POC、Docker 与前后端

Made-with: Cursor
2026-04-15 17:18:49 +08:00
commit bbb4dd43b3
74 changed files with 297415 additions and 0 deletions
--- a/backend/app/image_classifier.py
+++ b/backend/app/image_classifier.py
@@ -0,0 +1,132 @@
+"""用 Qwen VL 对图片内容做语义分类，判断是否为二维码/条码。
+
+调用方式
+--------
+::
+
+    from backend.app.image_classifier import is_qr_code
+
+    result = is_qr_code(Path("crop.png"), api_key="sk-...")
+    if result:
+        # 再交给条码识别模块处理
+        ...
+
+设计原则
+--------
+* 只做"是/否"的单一判断，不解码内容（解码交给 barcode_detector）。
+* 复用 region_detector 中已有的 API key / base_url 读取逻辑。
+* 网络或模型调用失败时返回 False，保证 pipeline 可降级运行。
+"""
+from __future__ import annotations
+
+import base64
+import io
+import logging
+from pathlib import Path
+
+logger = logging.getLogger(__name__)
+
+# 使用轻量级的 7B 视觉模型，速度快、成本低
+_DEFAULT_MODEL = "qwen2.5-vl-7b-instruct"
+
+_CLASSIFY_PROMPT = (
+    "请仔细观察这张图片。\n"
+    "问题：图片中是否包含二维码（QR Code）或任何类型的条形码？\n"
+    '请只回答"是"或"否"，不要输出其他任何内容。'
+)
+
+
+def _encode_image(image_path: Path, max_side: int = 512) -> str:
+    """将图片缩放后编码为 base64 PNG 字符串。
+
+    对小图（如 MinerU 裁出的图片块）保持原尺寸；
+    对大图做等比缩放以减少 token 消耗。
+    """
+    from PIL import Image
+
+    with Image.open(image_path) as img:
+        img = img.convert("RGB")
+        w, h = img.size
+        if max(w, h) > max_side:
+            scale = max_side / max(w, h)
+            img = img.resize((max(1, round(w * scale)), max(1, round(h * scale))), Image.LANCZOS)
+
+        buf = io.BytesIO()
+        img.save(buf, format="PNG")
+
+    return base64.b64encode(buf.getvalue()).decode()
+
+
+def is_qr_code(
+    image_path: Path,
+    api_key: str | None = None,
+    model: str = _DEFAULT_MODEL,
+) -> bool:
+    """调用 Qwen VL 判断图片是否包含二维码或条形码。
+
+    Parameters
+    ----------
+    image_path:
+        待分类的图片路径。
+    api_key:
+        DashScope API Key；若为 None 则从环境变量 / .env 文件自动读取。
+    model:
+        使用的模型名称，默认为 qwen2.5-vl-7b-instruct。
+
+    Returns
+    -------
+    bool
+        True  → 大模型认为图片中存在二维码/条形码
+        False → 不存在，或调用失败（降级返回 False）
+    """
+    # 延迟导入，避免在未配置环境时影响模块加载
+    from backend.app.region_detector import _get_api_key, _get_base_url
+    from openai import OpenAI
+
+    key = api_key or _get_api_key()
+    if not key:
+        logger.warning("image_classifier: DASHSCOPE_API_KEY 未配置，跳过 QR 语义判断")
+        return False
+
+    try:
+        b64 = _encode_image(image_path)
+    except Exception as exc:
+        logger.warning("image_classifier: 图片编码失败 (%s)，跳过分类", exc)
+        return False
+
+    client = OpenAI(api_key=key, base_url=_get_base_url())
+    try:
+        response = client.chat.completions.create(
+            model=model,
+            messages=[
+                {
+                    "role": "user",
+                    "content": [
+                        {
+                            "type": "image_url",
+                            "image_url": {"url": f"data:image/png;base64,{b64}"},
+                        },
+                        {"type": "text", "text": _CLASSIFY_PROMPT},
+                    ],
+                }
+            ],
+            max_tokens=10,
+            temperature=0.0,
+        )
+    except Exception as exc:
+        logger.warning("image_classifier: Qwen VL 调用失败 (%s)，跳过分类", exc)
+        return False
+
+    raw = (response.choices[0].message.content or "").strip()
+    logger.debug("image_classifier: 模型原始回复 = %r", raw)
+
+    # 兼容"是"/"否"以及"Yes"/"No"等输出
+    answer = raw.lower()
+    result = answer.startswith("是") or answer.startswith("yes")
+    logger.info(
+        "image_classifier: %s → %s（原始回复：%r）",
+        image_path.name,
+        "二维码/条码" if result else "非二维码",
+        raw,
+    )
+    return result