Initial commit: 包装审核 POC、Docker 与前后端

Made-with: Cursor
This commit is contained in:
2026-04-15 17:18:49 +08:00
commit bbb4dd43b3
74 changed files with 297415 additions and 0 deletions

View File

@@ -0,0 +1,99 @@
from __future__ import annotations
from backend.app.mineru_parser import parse_mineru_fields
def test_parse_mineru_fields_extracts_text_and_bbox() -> None:
payload = {
"pdf_info": [
{
"page_idx": 0,
"page_size": [2772, 1961],
"para_blocks": [
{
"bbox": [704, 134, 2106, 229],
"type": "title",
"lines": [
{
"spans": [
{
"type": "text",
"content": "食品名称:天问礼品粽",
"bbox": [704, 134, 2106, 229],
}
]
}
],
}
],
}
]
}
parsed = parse_mineru_fields(payload)
assert parsed.page_width == 2772
assert parsed.page_height == 1961
assert parsed.fields == [
{
"page": 1,
"text": "食品名称:天问礼品粽",
"font_name": "",
"font_size_pt": None,
"font_height_mm": None,
"x0_pt": 704.0,
"top_pt": 134.0,
"x1_pt": 2106.0,
"bottom_pt": 229.0,
}
]
def test_parse_mineru_fields_turns_table_html_into_text() -> None:
payload = {
"pdf_info": [
{
"page_idx": 0,
"page_size": [1000, 800],
"para_blocks": [
{
"bbox": [10, 20, 300, 200],
"type": "table",
"lines": [
{
"spans": [
{
"type": "table",
"html": "<table><tr><td>品种</td><td>规格</td></tr><tr><td>黑猪肉粽</td><td>130克×1</td></tr></table>",
}
]
}
],
}
],
}
]
}
parsed = parse_mineru_fields(payload)
assert parsed.fields[0]["text"] == "品种 规格 黑猪肉粽 130克×1"
def test_parse_mineru_fields_skips_empty_decorative_blocks() -> None:
payload = {
"pdf_info": [
{
"page_idx": 0,
"page_size": [1000, 800],
"para_blocks": [
{"bbox": [1, 2, 3, 4], "type": "image", "lines": [{"spans": [{"type": "image"}]}]},
{"bbox": [5, 6, 7, 8], "type": "text", "lines": [{"spans": [{"content": " "}]}]},
],
}
]
}
parsed = parse_mineru_fields(payload)
assert parsed.fields == []