Initial commit: 包装审核 POC、Docker 与前后端
Made-with: Cursor
This commit is contained in:
99
tests/backend/test_mineru_parser.py
Normal file
99
tests/backend/test_mineru_parser.py
Normal file
@@ -0,0 +1,99 @@
|
||||
from __future__ import annotations
|
||||
|
||||
from backend.app.mineru_parser import parse_mineru_fields
|
||||
|
||||
|
||||
def test_parse_mineru_fields_extracts_text_and_bbox() -> None:
|
||||
payload = {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [2772, 1961],
|
||||
"para_blocks": [
|
||||
{
|
||||
"bbox": [704, 134, 2106, 229],
|
||||
"type": "title",
|
||||
"lines": [
|
||||
{
|
||||
"spans": [
|
||||
{
|
||||
"type": "text",
|
||||
"content": "食品名称:天问礼品粽",
|
||||
"bbox": [704, 134, 2106, 229],
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
parsed = parse_mineru_fields(payload)
|
||||
|
||||
assert parsed.page_width == 2772
|
||||
assert parsed.page_height == 1961
|
||||
assert parsed.fields == [
|
||||
{
|
||||
"page": 1,
|
||||
"text": "食品名称:天问礼品粽",
|
||||
"font_name": "",
|
||||
"font_size_pt": None,
|
||||
"font_height_mm": None,
|
||||
"x0_pt": 704.0,
|
||||
"top_pt": 134.0,
|
||||
"x1_pt": 2106.0,
|
||||
"bottom_pt": 229.0,
|
||||
}
|
||||
]
|
||||
|
||||
|
||||
def test_parse_mineru_fields_turns_table_html_into_text() -> None:
|
||||
payload = {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [1000, 800],
|
||||
"para_blocks": [
|
||||
{
|
||||
"bbox": [10, 20, 300, 200],
|
||||
"type": "table",
|
||||
"lines": [
|
||||
{
|
||||
"spans": [
|
||||
{
|
||||
"type": "table",
|
||||
"html": "<table><tr><td>品种</td><td>规格</td></tr><tr><td>黑猪肉粽</td><td>130克×1</td></tr></table>",
|
||||
}
|
||||
]
|
||||
}
|
||||
],
|
||||
}
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
parsed = parse_mineru_fields(payload)
|
||||
|
||||
assert parsed.fields[0]["text"] == "品种 规格 黑猪肉粽 130克×1"
|
||||
|
||||
|
||||
def test_parse_mineru_fields_skips_empty_decorative_blocks() -> None:
|
||||
payload = {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [1000, 800],
|
||||
"para_blocks": [
|
||||
{"bbox": [1, 2, 3, 4], "type": "image", "lines": [{"spans": [{"type": "image"}]}]},
|
||||
{"bbox": [5, 6, 7, 8], "type": "text", "lines": [{"spans": [{"content": " "}]}]},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
parsed = parse_mineru_fields(payload)
|
||||
|
||||
assert parsed.fields == []
|
||||
Reference in New Issue
Block a user