Initial commit: 包装审核 POC、Docker 与前后端
Made-with: Cursor
This commit is contained in:
74
tests/backend/test_pipeline.py
Normal file
74
tests/backend/test_pipeline.py
Normal file
@@ -0,0 +1,74 @@
|
||||
from pathlib import Path
|
||||
|
||||
import pytest
|
||||
|
||||
from backend.app import pipeline
|
||||
from backend.app.pipeline import process_files
|
||||
|
||||
|
||||
WORKDIR = Path("/Users/icemilk/Workspace/zld_POC")
|
||||
AI_FILE = WORKDIR / "【2026-04-09】端午 - 背标 - 天问.ai"
|
||||
DOCX_FILE = WORKDIR / "天问礼品粽【260331】.docx"
|
||||
OUTPUT_DIR = WORKDIR / ".tmp_test_output"
|
||||
|
||||
|
||||
def test_process_files_builds_preview_and_mineru_field_results(monkeypatch: pytest.MonkeyPatch) -> None:
|
||||
def fake_parse_with_mineru(_preview_path: Path, _output_dir: Path):
|
||||
return {
|
||||
"pdf_info": [
|
||||
{
|
||||
"page_idx": 0,
|
||||
"page_size": [2772, 1961],
|
||||
"para_blocks": [
|
||||
{
|
||||
"bbox": [704, 134, 2106, 229],
|
||||
"lines": [{"spans": [{"content": "食品名称:天问礼品粽"}]}],
|
||||
},
|
||||
{
|
||||
"bbox": [10, 20, 40, 60],
|
||||
"lines": [{"spans": [{"content": "Word中不存在的内容"}]}],
|
||||
},
|
||||
],
|
||||
}
|
||||
]
|
||||
}
|
||||
|
||||
monkeypatch.setattr(pipeline, "_parse_preview_with_mineru", fake_parse_with_mineru)
|
||||
|
||||
result = process_files(AI_FILE, DOCX_FILE, OUTPUT_DIR, job_id="test-job")
|
||||
|
||||
assert result["preview"]["type"] == "pdf"
|
||||
assert result["preview"]["url"] == "/api/files/test-job/preview.pdf"
|
||||
assert result["preview"]["pageWidthPt"] == 2772
|
||||
assert result["preview"]["pageHeightPt"] == 1961
|
||||
assert result["fields"][0]["text"] == "食品名称:天问礼品粽"
|
||||
assert result["fields"][0]["validation_status"] == "matched"
|
||||
assert result["fields"][0]["x0_pt"] == 704.0
|
||||
assert any(field["validation_status"] == "unmatched" for field in result["fields"])
|
||||
assert (OUTPUT_DIR / "preview.pdf").exists()
|
||||
|
||||
|
||||
def test_parse_preview_with_mineru_reads_key_from_env_file(monkeypatch: pytest.MonkeyPatch, tmp_path: Path) -> None:
|
||||
captured: dict[str, str] = {}
|
||||
|
||||
class FakeMineruClient:
|
||||
def __init__(self, api_key: str) -> None:
|
||||
captured["api_key"] = api_key
|
||||
|
||||
def parse_pdf(self, preview_path: Path, output_dir: Path) -> dict:
|
||||
return {"preview_path": str(preview_path), "output_dir": str(output_dir)}
|
||||
|
||||
env_file = tmp_path / ".env"
|
||||
env_file.write_text("MINERU_API_KEY=from-env-file\n", encoding="utf-8")
|
||||
|
||||
monkeypatch.delenv("MINERU_API_KEY", raising=False)
|
||||
monkeypatch.setattr(pipeline, "ENV_FILE_CANDIDATES", (env_file,))
|
||||
monkeypatch.setattr(pipeline, "MineruClient", FakeMineruClient)
|
||||
|
||||
preview_path = tmp_path / "preview.pdf"
|
||||
preview_path.write_bytes(b"%PDF-1.7")
|
||||
|
||||
result = pipeline._parse_preview_with_mineru(preview_path, tmp_path)
|
||||
|
||||
assert captured["api_key"] == "from-env-file"
|
||||
assert result["preview_path"] == str(preview_path)
|
||||
Reference in New Issue
Block a user