Initial commit: 包装审核 POC、Docker 与前后端

Made-with: Cursor
This commit is contained in:
2026-04-15 17:18:49 +08:00
commit bbb4dd43b3
74 changed files with 297415 additions and 0 deletions

View File

@@ -0,0 +1,32 @@
from backend.app.text_validation import classify_text_block, normalize_text, validate_field_against_word
def test_normalize_text_collapses_whitespace_and_full_width_punctuation() -> None:
raw = " 食品生产许可证编号:\nSC11133042404806 "
assert normalize_text(raw) == "食品生产许可证编号:SC11133042404806"
def test_classify_text_block_marks_garbled_text() -> None:
assert classify_text_block("<EFBFBD><EFBFBD><EFBFBD><EFBFBD>-<2D><>-<2D><>") == "empty_or_garbled"
assert classify_text_block(" ") == "empty_or_garbled"
assert classify_text_block("食品名称:天问礼品粽") == "candidate"
def test_validate_field_against_word_returns_excerpt_for_match() -> None:
word_text = "电话0573-86981666 食品生产许可证编号SC11133042404806 产品标准代号GB/T 46259"
result = validate_field_against_word("食品生产许可证编号SC11133042404806", word_text)
assert result.status == "matched"
assert result.reason == "normalized text found in Word content"
assert "SC11133042404806" in (result.matched_excerpt or "")
def test_validate_field_against_word_rejects_missing_text() -> None:
word_text = "产品标准代号GB/T 46259"
result = validate_field_against_word("食品生产许可证编号SC11133042404806", word_text)
assert result.status == "unmatched"
assert result.matched_excerpt is None