from pathlib import Path import pytest from fastapi.testclient import TestClient from backend.app import pipeline from backend.app import main from backend.app.main import app WORKDIR = Path("/Users/icemilk/Workspace/zld_POC") AI_FILE = WORKDIR / "【2026-04-09】端午 - 背标 - 天问.ai" DOCX_FILE = WORKDIR / "天问礼品粽【260331】.docx" client = TestClient(app) def fake_mineru_payload() -> dict: return { "pdf_info": [ { "page_idx": 0, "page_size": [2772, 1961], "para_blocks": [ { "bbox": [704, 134, 2106, 229], "lines": [{"spans": [{"content": "食品名称:天问礼品粽"}]}], } ], } ] } def test_process_endpoint_returns_preview_and_fields(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", lambda _preview_path, _output_dir: fake_mineru_payload()) with AI_FILE.open("rb") as ai_fp, DOCX_FILE.open("rb") as docx_fp: response = client.post( "/api/process", files={ "ai_file": (AI_FILE.name, ai_fp, "application/postscript"), "word_file": ( DOCX_FILE.name, docx_fp, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ), }, ) assert response.status_code == 200 payload = response.json() assert payload["preview"]["type"] == "pdf" assert payload["fields"] assert payload["preview"]["pageWidthPt"] == 2772 assert payload["fields"][0]["text"] == "食品名称:天问礼品粽" def test_process_endpoint_uses_default_sample_files_when_uploads_are_missing(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", lambda _preview_path, _output_dir: fake_mineru_payload()) response = client.post("/api/process") assert response.status_code == 200 payload = response.json() assert payload["preview"]["type"] == "pdf" assert payload["fields"] assert any(field["text"] for field in payload["fields"]) def test_process_endpoint_surfaces_missing_mineru_key(monkeypatch: pytest.MonkeyPatch) -> None: def fake_parse_with_mineru(_preview_path, _output_dir): raise RuntimeError("MINERU_API_KEY is required") monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", fake_parse_with_mineru) response = client.post("/api/process") assert response.status_code == 500 assert response.json()["detail"] == "MINERU_API_KEY is required" def test_mineru_extract_endpoint_returns_job_preview_and_blocks(monkeypatch: pytest.MonkeyPatch) -> None: monkeypatch.setattr( pipeline, "extract_mineru_result", lambda _ai_path, _output_dir, job_id=None: { "jobId": job_id, "preview": { "type": "pdf", "url": f"/api/files/{job_id}/preview.pdf", "pageWidthPt": 2772, "pageHeightPt": 1961, }, "artifacts": { "json": {"path": "/tmp/structured.json", "url": f"/api/files/{job_id}/mineru/structured.json"}, "markdown": {"path": "/tmp/full.md", "url": f"/api/files/{job_id}/mineru/full.md"}, }, "blocks": [{"id": "block-1", "text": "食品名称:天问礼品粽", "page": 1, "x0_pt": 1, "top_pt": 2, "x1_pt": 3, "bottom_pt": 4}], }, ) with AI_FILE.open("rb") as ai_fp: response = client.post( "/api/mineru-extract", files={"ai_file": (AI_FILE.name, ai_fp, "application/postscript")}, ) assert response.status_code == 200 payload = response.json() assert payload["jobId"] assert payload["preview"]["type"] == "pdf" assert payload["artifacts"]["json"]["url"].endswith("/mineru/structured.json") assert payload["artifacts"]["markdown"]["url"].endswith("/mineru/full.md") assert payload["blocks"][0]["id"] == "block-1" def test_compare_word_endpoint_returns_compared_fields(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: monkeypatch.setattr(main, "OUTPUTS_DIR", tmp_path) (tmp_path / "test-job").mkdir(parents=True, exist_ok=True) monkeypatch.setattr( pipeline, "compare_word_with_mineru", lambda _word_path, _output_dir, job_id=None: { "jobId": job_id, "preview": { "type": "pdf", "url": f"/api/files/{job_id}/preview.pdf", "pageWidthPt": 2772, "pageHeightPt": 1961, }, "fields": [ { "id": "field-1", "text": "食品名称:天问礼品粽", "page": 1, "x0_pt": 1, "top_pt": 2, "x1_pt": 3, "bottom_pt": 4, "normalized_text": "食品名称:天问礼品粽", "validation_status": "matched", "validation_reason": "normalized text found in Word content", "matched_excerpt": "食品名称:天问礼品粽", } ], }, ) with DOCX_FILE.open("rb") as docx_fp: response = client.post( "/api/compare-word", data={"job_id": "test-job"}, files={ "word_file": ( DOCX_FILE.name, docx_fp, "application/vnd.openxmlformats-officedocument.wordprocessingml.document", ) }, ) assert response.status_code == 200 payload = response.json() assert payload["jobId"] == "test-job" assert payload["fields"][0]["validation_status"] == "matched"