from pathlib import Path import pytest from backend.app import pipeline from backend.app.pipeline import process_files WORKDIR = Path("/Users/icemilk/Workspace/zld_POC") AI_FILE = WORKDIR / "【2026-04-09】端午 - 背标 - 天问.ai" DOCX_FILE = WORKDIR / "天问礼品粽【260331】.docx" OUTPUT_DIR = WORKDIR / ".tmp_test_output" def test_process_files_builds_preview_and_mineru_field_results(monkeypatch: pytest.MonkeyPatch) -> None: def fake_parse_with_mineru(_preview_path: Path, _output_dir: Path): return { "pdf_info": [ { "page_idx": 0, "page_size": [2772, 1961], "para_blocks": [ { "bbox": [704, 134, 2106, 229], "lines": [{"spans": [{"content": "食品名称:天问礼品粽"}]}], }, { "bbox": [10, 20, 40, 60], "lines": [{"spans": [{"content": "Word中不存在的内容"}]}], }, ], } ] } monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", fake_parse_with_mineru) result = process_files(AI_FILE, DOCX_FILE, OUTPUT_DIR, job_id="test-job") assert result["preview"]["type"] == "pdf" assert result["preview"]["url"] == "/api/files/test-job/preview.pdf" assert result["preview"]["pageWidthPt"] == 2772 assert result["preview"]["pageHeightPt"] == 1961 assert result["fields"][0]["text"] == "食品名称:天问礼品粽" assert result["fields"][0]["validation_status"] == "matched" assert result["fields"][0]["x0_pt"] == 704.0 assert any(field["validation_status"] == "unmatched" for field in result["fields"]) assert (OUTPUT_DIR / "preview.pdf").exists() def test_parse_preview_with_mineru_reads_key_from_env_file(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None: captured: dict[str, str] = {} class FakeMineruClient: def __init__(self, api_key: str) -> None: captured["api_key"] = api_key def parse_pdf(self, preview_path: Path, output_dir: Path) -> dict: return {"preview_path": str(preview_path), "output_dir": str(output_dir)} env_file = tmp_path / ".env" env_file.write_text("MINERU_API_KEY=from-env-file\n", encoding="utf-8") monkeypatch.delenv("MINERU_API_KEY", raising=False) monkeypatch.setattr(pipeline, "ENV_FILE_CANDIDATES", (env_file,)) monkeypatch.setattr(pipeline, "MineruClient", FakeMineruClient) preview_path = tmp_path / "preview.pdf" preview_path.write_bytes(b"%PDF-1.7") result = pipeline._parse_preview_with_mineru(preview_path, tmp_path) assert captured["api_key"] == "from-env-file" assert result["preview_path"] == str(preview_path)