from __future__ import annotations from backend.app.mineru_parser import parse_mineru_fields def test_parse_mineru_fields_extracts_text_and_bbox() -> None: payload = { "pdf_info": [ { "page_idx": 0, "page_size": [2772, 1961], "para_blocks": [ { "bbox": [704, 134, 2106, 229], "type": "title", "lines": [ { "spans": [ { "type": "text", "content": "食品名称:天问礼品粽", "bbox": [704, 134, 2106, 229], } ] } ], } ], } ] } parsed = parse_mineru_fields(payload) assert parsed.page_width == 2772 assert parsed.page_height == 1961 assert parsed.fields == [ { "page": 1, "text": "食品名称:天问礼品粽", "font_name": "", "font_size_pt": None, "font_height_mm": None, "x0_pt": 704.0, "top_pt": 134.0, "x1_pt": 2106.0, "bottom_pt": 229.0, } ] def test_parse_mineru_fields_turns_table_html_into_text() -> None: payload = { "pdf_info": [ { "page_idx": 0, "page_size": [1000, 800], "para_blocks": [ { "bbox": [10, 20, 300, 200], "type": "table", "lines": [ { "spans": [ { "type": "table", "html": "
| 品种 | 规格 |
| 黑猪肉粽 | 130克×1 |