尝试增加对docx的支持

This commit is contained in:
xunbu
2025-08-05 11:24:18 +08:00
parent 8e2f9e65da
commit caf1fa17eb
10 changed files with 295 additions and 3 deletions

23
uv.lock generated
View File

@@ -142,6 +142,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/7e/d4/7ebdbd03970677812aac39c869717059dbb71a4cfc033ca6e5221787892c/click-8.1.8-py3-none-any.whl", hash = "sha256:63c132bbbed01578a06712a2d1f497bb62d9c1c0d329b7903a866228027263b2", size = 98188 },
]
[[package]]
name = "cobble"
version = "0.1.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/54/7a/a507c709be2c96e1bb6102eb7b7f4026c5e5e223ef7d745a17d239e9d844/cobble-0.1.4.tar.gz", hash = "sha256:de38be1539992c8a06e569630717c485a5f91be2192c461ea2b220607dfa78aa", size = 3805 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d5/e1/3714a2f371985215c219c2a70953d38e3eed81ef165aed061d21de0e998b/cobble-0.1.4-py3-none-any.whl", hash = "sha256:36c91b1655e599fd428e2b95fdd5f0da1ca2e9f1abb0bc871dec21a0e78a2b44", size = 3984 },
]
[[package]]
name = "colorama"
version = "0.4.6"
@@ -294,6 +303,7 @@ dependencies = [
{ name = "httpx" },
{ name = "json-repair" },
{ name = "jsonpath-ng" },
{ name = "mammoth" },
{ name = "markdown2" },
{ name = "openpyxl" },
{ name = "xlsx2html" },
@@ -319,6 +329,7 @@ requires-dist = [
{ name = "httpx", specifier = "==0.27.2" },
{ name = "json-repair", specifier = ">=0.48.0" },
{ name = "jsonpath-ng", specifier = ">=1.7.0" },
{ name = "mammoth", specifier = ">=1.10.0" },
{ name = "markdown2", specifier = ">=2.5.3" },
{ name = "opencv-python", marker = "extra == 'docling'", specifier = ">=4.11.0.86" },
{ name = "openpyxl", specifier = ">=3.1.5" },
@@ -727,6 +738,18 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/5d/c059c180c84f7962db0aeae7c3b9303ed1d73d76f2bfbc32bc231c8be314/macholib-1.16.3-py2.py3-none-any.whl", hash = "sha256:0e315d7583d38b8c77e815b1ecbdbf504a8258d8b3e17b61165c6feb60d18f2c", size = 38094 },
]
[[package]]
name = "mammoth"
version = "1.10.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "cobble" },
]
sdist = { url = "https://files.pythonhosted.org/packages/89/0d/2ab86f37021b4c50fe72354acd226b1e31a10497e51f6cbd7e3d1eca1181/mammoth-1.10.0.tar.gz", hash = "sha256:cb6fbba41ccf8b5502859c457177d87a833fef0e0b1d4e6fd23ec372fe892c30", size = 52285 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/a6/67/36eeb3a8726df3b282ba99ec126323871cffdbcf3b7a1db64ca9bbe4abc1/mammoth-1.10.0-py2.py3-none-any.whl", hash = "sha256:a1c87d5b98ca30230394267f98614b58b14b50f8031dc33ac9a535c6ab04eb99", size = 53823 },
]
[[package]]
name = "markdown-it-py"
version = "3.0.0"