From 010ef31372ce9506e616e57a50d3cdea94ec544c Mon Sep 17 00:00:00 2001 From: xunbu Date: Sat, 17 Jan 2026 17:32:25 +0800 Subject: [PATCH] =?UTF-8?q?=E4=BD=BF=E7=94=A8charset=5Fnormalizer=E6=9B=BF?= =?UTF-8?q?=E4=BB=A3chardet?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/converter/x2xlsx/converter_csv2xlsx.py | 5 ++--- pyproject.toml | 1 - uv.lock | 11 ----------- 3 files changed, 2 insertions(+), 15 deletions(-) diff --git a/docutranslate/converter/x2xlsx/converter_csv2xlsx.py b/docutranslate/converter/x2xlsx/converter_csv2xlsx.py index 51c1d6d..f72c82e 100644 --- a/docutranslate/converter/x2xlsx/converter_csv2xlsx.py +++ b/docutranslate/converter/x2xlsx/converter_csv2xlsx.py @@ -7,8 +7,7 @@ from dataclasses import dataclass from io import BytesIO, StringIO from typing import Hashable -# 引入 chardet 用于编码检测 -import chardet +import charset_normalizer import openpyxl from docutranslate.converter.x2xlsx.base import X2XlsxConverter, X2XlsxConverterConfig @@ -47,7 +46,7 @@ class ConverterCsv2Xlsx(X2XlsxConverter): try: # --- 1. 自动检测文件编码 --- # 为提高性能,只取文件头部一部分进行检测 - detection_result = chardet.detect(document.content[:4096]) + detection_result = charset_normalizer.detect(document.content[:4096]) encoding = detection_result['encoding'] or 'utf-8' # 提供一个默认值 confidence = detection_result['confidence'] self.logger.info(f"检测到文件编码为: {encoding} (置信度: {confidence:.2%})") diff --git a/pyproject.toml b/pyproject.toml index f767689..3e9bd78 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -16,7 +16,6 @@ dependencies = [ "beautifulsoup4>=4.13.4", "markdown>=3.8.2", "pymdown-extensions>=10.16.1", - "chardet>=5.2.0", "pysubs2>=1.8.0", "httpx>=0.28.1", "python-pptx>=1.0.2", diff --git a/uv.lock b/uv.lock index a54b6a3..c9f4847 100644 --- a/uv.lock +++ b/uv.lock @@ -101,15 +101,6 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/70/7d/9bc192684cea499815ff478dfcdc13835ddf401365057044fb721ec6bddb/certifi-2025.11.12-py3-none-any.whl", hash = "sha256:97de8790030bbd5c2d96b7ec782fc2f7820ef8dba6db909ccf95449f2d062d4b", size = 159438 }, ] -[[package]] -name = "chardet" -version = "5.2.0" -source = { registry = "https://pypi.org/simple" } -sdist = { url = "https://files.pythonhosted.org/packages/f3/0d/f7b6ab21ec75897ed80c17d79b15951a719226b9fababf1e40ea74d69079/chardet-5.2.0.tar.gz", hash = "sha256:1b3b6ff479a8c414bc3fa2c0852995695c4a026dcd6d0633b2dd092ca39c1cf7", size = 2069618 } -wheels = [ - { url = "https://files.pythonhosted.org/packages/38/6f/f5fbc992a329ee4e0f288c1fe0e2ad9485ed064cac731ed2fe47dcc38cbf/chardet-5.2.0-py3-none-any.whl", hash = "sha256:e1cf59446890a00105fe7b7912492ea04b6e6f06d4b742b2c788469e34c82970", size = 199385 }, -] - [[package]] name = "charset-normalizer" version = "3.4.4" @@ -379,7 +370,6 @@ name = "docutranslate" source = { editable = "." } dependencies = [ { name = "beautifulsoup4" }, - { name = "chardet" }, { name = "fastapi", extra = ["standard"] }, { name = "httpx" }, { name = "json-repair" }, @@ -414,7 +404,6 @@ dev = [ [package.metadata] requires-dist = [ { name = "beautifulsoup4", specifier = ">=4.13.4" }, - { name = "chardet", specifier = ">=5.2.0" }, { name = "docling", marker = "extra == 'docling'", specifier = ">=2.40.0" }, { name = "fastapi", extras = ["standard"], specifier = ">=0.115.12" }, { name = "hf-xet", marker = "extra == 'docling'", specifier = ">=1.1.10" },