Initial commit: 包装审核 POC、Docker 与前后端
Made-with: Cursor
This commit is contained in:
132
backend/app/image_classifier.py
Normal file
132
backend/app/image_classifier.py
Normal file
@@ -0,0 +1,132 @@
|
||||
"""用 Qwen VL 对图片内容做语义分类,判断是否为二维码/条码。
|
||||
|
||||
调用方式
|
||||
--------
|
||||
::
|
||||
|
||||
from backend.app.image_classifier import is_qr_code
|
||||
|
||||
result = is_qr_code(Path("crop.png"), api_key="sk-...")
|
||||
if result:
|
||||
# 再交给条码识别模块处理
|
||||
...
|
||||
|
||||
设计原则
|
||||
--------
|
||||
* 只做"是/否"的单一判断,不解码内容(解码交给 barcode_detector)。
|
||||
* 复用 region_detector 中已有的 API key / base_url 读取逻辑。
|
||||
* 网络或模型调用失败时返回 False,保证 pipeline 可降级运行。
|
||||
"""
|
||||
from __future__ import annotations
|
||||
|
||||
import base64
|
||||
import io
|
||||
import logging
|
||||
from pathlib import Path
|
||||
|
||||
logger = logging.getLogger(__name__)
|
||||
|
||||
# 使用轻量级的 7B 视觉模型,速度快、成本低
|
||||
_DEFAULT_MODEL = "qwen2.5-vl-7b-instruct"
|
||||
|
||||
_CLASSIFY_PROMPT = (
|
||||
"请仔细观察这张图片。\n"
|
||||
"问题:图片中是否包含二维码(QR Code)或任何类型的条形码?\n"
|
||||
'请只回答"是"或"否",不要输出其他任何内容。'
|
||||
)
|
||||
|
||||
|
||||
def _encode_image(image_path: Path, max_side: int = 512) -> str:
|
||||
"""将图片缩放后编码为 base64 PNG 字符串。
|
||||
|
||||
对小图(如 MinerU 裁出的图片块)保持原尺寸;
|
||||
对大图做等比缩放以减少 token 消耗。
|
||||
"""
|
||||
from PIL import Image
|
||||
|
||||
with Image.open(image_path) as img:
|
||||
img = img.convert("RGB")
|
||||
w, h = img.size
|
||||
if max(w, h) > max_side:
|
||||
scale = max_side / max(w, h)
|
||||
img = img.resize((max(1, round(w * scale)), max(1, round(h * scale))), Image.LANCZOS)
|
||||
|
||||
buf = io.BytesIO()
|
||||
img.save(buf, format="PNG")
|
||||
|
||||
return base64.b64encode(buf.getvalue()).decode()
|
||||
|
||||
|
||||
def is_qr_code(
|
||||
image_path: Path,
|
||||
api_key: str | None = None,
|
||||
model: str = _DEFAULT_MODEL,
|
||||
) -> bool:
|
||||
"""调用 Qwen VL 判断图片是否包含二维码或条形码。
|
||||
|
||||
Parameters
|
||||
----------
|
||||
image_path:
|
||||
待分类的图片路径。
|
||||
api_key:
|
||||
DashScope API Key;若为 None 则从环境变量 / .env 文件自动读取。
|
||||
model:
|
||||
使用的模型名称,默认为 qwen2.5-vl-7b-instruct。
|
||||
|
||||
Returns
|
||||
-------
|
||||
bool
|
||||
True → 大模型认为图片中存在二维码/条形码
|
||||
False → 不存在,或调用失败(降级返回 False)
|
||||
"""
|
||||
# 延迟导入,避免在未配置环境时影响模块加载
|
||||
from backend.app.region_detector import _get_api_key, _get_base_url
|
||||
from openai import OpenAI
|
||||
|
||||
key = api_key or _get_api_key()
|
||||
if not key:
|
||||
logger.warning("image_classifier: DASHSCOPE_API_KEY 未配置,跳过 QR 语义判断")
|
||||
return False
|
||||
|
||||
try:
|
||||
b64 = _encode_image(image_path)
|
||||
except Exception as exc:
|
||||
logger.warning("image_classifier: 图片编码失败 (%s),跳过分类", exc)
|
||||
return False
|
||||
|
||||
client = OpenAI(api_key=key, base_url=_get_base_url())
|
||||
try:
|
||||
response = client.chat.completions.create(
|
||||
model=model,
|
||||
messages=[
|
||||
{
|
||||
"role": "user",
|
||||
"content": [
|
||||
{
|
||||
"type": "image_url",
|
||||
"image_url": {"url": f"data:image/png;base64,{b64}"},
|
||||
},
|
||||
{"type": "text", "text": _CLASSIFY_PROMPT},
|
||||
],
|
||||
}
|
||||
],
|
||||
max_tokens=10,
|
||||
temperature=0.0,
|
||||
)
|
||||
except Exception as exc:
|
||||
logger.warning("image_classifier: Qwen VL 调用失败 (%s),跳过分类", exc)
|
||||
return False
|
||||
|
||||
raw = (response.choices[0].message.content or "").strip()
|
||||
logger.debug("image_classifier: 模型原始回复 = %r", raw)
|
||||
|
||||
# 兼容"是"/"否"以及"Yes"/"No"等输出
|
||||
answer = raw.lower()
|
||||
result = answer.startswith("是") or answer.startswith("yes")
|
||||
logger.info(
|
||||
"image_classifier: %s → %s(原始回复:%r)",
|
||||
image_path.name,
|
||||
"二维码/条码" if result else "非二维码",
|
||||
raw,
|
||||
)
|
||||
return result
|
||||
Reference in New Issue
Block a user