This commit is contained in:
xunbu
2025-12-18 00:18:45 +08:00
parent 9c21a5240f
commit 583615ebde
4 changed files with 115 additions and 37 deletions

View File

@@ -6,15 +6,33 @@ import docutranslate
# 初始化列表 # 初始化列表
datas = [] datas = []
binaries = [] binaries = []
hiddenimports = ['markdown.extensions.tables', 'pymdownx.arithmatex',
'pymdownx.superfences', 'pymdownx.highlight', 'pygments',
'docling_ibm_models',
'docling_parse', 'cv2']
# --- 核心修改:在列表中添加 'tiktoken' --- # --- 核心修改 1: 添加 tiktoken_ext.openai_public 到隐式导入 ---
# tiktoken: 解决 Unknown encoding cl100k_base 问题 # 必须显式导入这个模块,否则 tiktoken 找不到 cl100k_base 编码
# easyocr, docling 等: 收集其他依赖 hiddenimports = [
packages_to_collect = ['easyocr', 'docling', 'pygments', 'docling_ibm_models', 'tiktoken'] 'markdown.extensions.tables',
'pymdownx.arithmatex',
'pymdownx.superfences',
'pymdownx.highlight',
'pygments',
'docling_ibm_models',
'docling_parse',
'cv2',
'tiktoken_ext.openai_public' # <--- 新增:确保编码插件被识别
]
# --- 核心修改 2: 在列表中添加 'tiktoken_ext' ---
# tiktoken: 核心库
# tiktoken_ext: 包含编码定义文件 (解决 Unknown encoding 问题)
# easyocr, docling 等: 其他依赖
packages_to_collect = [
'easyocr',
'docling',
'pygments',
'docling_ibm_models',
'tiktoken',
'tiktoken_ext' # <--- 新增:必须收集扩展包
]
for package in packages_to_collect: for package in packages_to_collect:
try: try:
@@ -29,6 +47,8 @@ for package in packages_to_collect:
try: try:
datas += copy_metadata('docling-ibm-models') # 这里必须用连字符(pip名) datas += copy_metadata('docling-ibm-models') # 这里必须用连字符(pip名)
datas += copy_metadata('docling-parse') # 预防性添加 datas += copy_metadata('docling-parse') # 预防性添加
# 也可以预防性添加 tiktoken 的元数据,虽然 collect_all 通常已经处理了
datas += copy_metadata('tiktoken')
except Exception as e: except Exception as e:
print(f"Warning: Failed to copy metadata: {e}") print(f"Warning: Failed to copy metadata: {e}")

View File

@@ -3,12 +3,25 @@ import os
from PyInstaller.utils.hooks import collect_data_files, collect_all from PyInstaller.utils.hooks import collect_data_files, collect_all
import docutranslate import docutranslate
# --- 核心修改开始:收集 tiktoken 的所有依赖 --- # --- 核心修改开始:同时收集 tiktoken 和 tiktoken_ext ---
# tiktoken 依赖动态加载的插件和二进制文件,必须全部收集
tmp_ret = collect_all('tiktoken') # 1. 收集 tiktoken 主包
tik_datas = tmp_ret[0] ret_tik = collect_all('tiktoken')
tik_binaries = tmp_ret[1] tik_datas = ret_tik[0]
tik_hiddenimports = tmp_ret[2] tik_binaries = ret_tik[1]
tik_hiddenimports = ret_tik[2]
# 2. 关键修复:收集 tiktoken_ext
# cl100k_base 等编码定义在这个扩展包里,必须显式收集
ret_ext = collect_all('tiktoken_ext')
tik_datas += ret_ext[0]
tik_binaries += ret_ext[1]
tik_hiddenimports += ret_ext[2]
# 3. 双重保险:强制加入具体的插件模块
# 有时候 collect_all 扫描不到动态加载的 openai_public这里手动补上
tik_hiddenimports.append('tiktoken_ext.openai_public')
# --- 核心修改结束 --- # --- 核心修改结束 ---
datas = [ datas = [
@@ -24,18 +37,19 @@ hiddenimports = [
'pymdownx.superfences', 'pymdownx.superfences',
'pymdownx.highlight', 'pymdownx.highlight',
'pygments', 'pygments',
*tik_hiddenimports # 合并 tiktoken 的隐式导入 (包含 tiktoken_ext.openai_public 等) *tik_hiddenimports
] ]
a = Analysis( a = Analysis(
['docutranslate/app.py'], ['docutranslate/app.py'],
pathex=[], pathex=[],
binaries=tik_binaries, # 确保包含了 tiktoken 的二进制文件 binaries=tik_binaries,
datas=datas, datas=datas,
hiddenimports=hiddenimports, hiddenimports=hiddenimports,
hookspath=[], hookspath=[],
hooksconfig={}, hooksconfig={},
runtime_hooks=[], runtime_hooks=[],
# 注意exclude docling 可能导致部分依赖丢失,确保你真的不需要它
excludes=["docling", "docutranslate.converter.x2md.converter_docling"], excludes=["docling", "docutranslate.converter.x2md.converter_docling"],
noarchive=False, noarchive=False,
optimize=0, optimize=0,

View File

@@ -1,35 +1,57 @@
# -*- mode: python ; coding: utf-8 -*- # -*- mode: python ; coding: utf-8 -*-
from PyInstaller.utils.hooks import collect_data_files from PyInstaller.utils.hooks import collect_data_files, collect_all
import docutranslate import docutranslate
# --- 核心修改开始:收集 tiktoken 及其扩展依赖 ---
# 1. 收集 tiktoken 主包
ret_tik = collect_all('tiktoken')
tik_datas = ret_tik[0]
tik_binaries = ret_tik[1]
tik_hiddenimports = ret_tik[2]
# 2. 核心修复:收集 tiktoken_ext
# cl100k_base 等编码定义文件位于此处,必须显式收集
ret_ext = collect_all('tiktoken_ext')
tik_datas += ret_ext[0]
tik_binaries += ret_ext[1]
tik_hiddenimports += ret_ext[2]
# 3. 强制加入具体的插件模块
# 解决 "ValueError: Unknown encoding cl100k_base"
tik_hiddenimports.append('tiktoken_ext.openai_public')
# --- 核心修改结束 ---
datas = [ datas = [
('./docutranslate/static', 'docutranslate/static'), ('./docutranslate/static', 'docutranslate/static'),
('./docutranslate/template', 'docutranslate/template') ('./docutranslate/template', 'docutranslate/template'),
*collect_data_files('pygments'),
*tik_datas
] ]
# 只收集 pygments 的数据文件
datas += collect_data_files('pygments')
hiddenimports = [ hiddenimports = [
'markdown.extensions.tables', 'markdown.extensions.tables',
'pymdownx.arithmatex', 'pymdownx.arithmatex',
'pymdownx.superfences', 'pymdownx.superfences',
'pymdownx.highlight', 'pymdownx.highlight',
'pygments' 'pygments',
*tik_hiddenimports # 合并 tiktoken 和 tiktoken_ext 的隐式导入
] ]
a = Analysis( a = Analysis(
['docutranslate/app.py'], ['docutranslate/app.py'],
pathex=[], pathex=[],
binaries=[], binaries=tik_binaries, # 确保包含了 tiktoken 的二进制文件
datas=datas, datas=datas,
hiddenimports=hiddenimports, hiddenimports=hiddenimports,
hookspath=[], hookspath=[],
hooksconfig={}, hooksconfig={},
runtime_hooks=[], runtime_hooks=[],
# 保持原有的排除项
excludes=["docling", "docutranslate.converter.x2md.converter_docling"], excludes=["docling", "docutranslate.converter.x2md.converter_docling"],
noarchive=False, noarchive=False,
target_arch='universal2', target_arch='universal2', # 保留 Mac 通用架构设置
optimize=0, optimize=0,
) )
@@ -52,5 +74,5 @@ exe = EXE(
argv_emulation=False, argv_emulation=False,
codesign_identity=None, codesign_identity=None,
entitlements_file=None, entitlements_file=None,
icon='DocuTranslate.icns', icon='DocuTranslate.icns', # 保留 Mac 图标
) )

View File

@@ -1,35 +1,57 @@
# -*- mode: python ; coding: utf-8 -*- # -*- mode: python ; coding: utf-8 -*-
from PyInstaller.utils.hooks import collect_data_files from PyInstaller.utils.hooks import collect_data_files, collect_all
import docutranslate import docutranslate
# --- 核心修改开始:收集 tiktoken 及其扩展依赖 ---
# 1. 收集 tiktoken 主包
ret_tik = collect_all('tiktoken')
tik_datas = ret_tik[0]
tik_binaries = ret_tik[1]
tik_hiddenimports = ret_tik[2]
# 2. 核心修复:收集 tiktoken_ext
# cl100k_base 等编码定义文件位于此处,必须显式收集
ret_ext = collect_all('tiktoken_ext')
tik_datas += ret_ext[0]
tik_binaries += ret_ext[1]
tik_hiddenimports += ret_ext[2]
# 3. 强制加入具体的插件模块
# 解决 "ValueError: Unknown encoding cl100k_base"
tik_hiddenimports.append('tiktoken_ext.openai_public')
# --- 核心修改结束 ---
datas = [ datas = [
('./docutranslate/static', 'docutranslate/static'), ('./docutranslate/static', 'docutranslate/static'),
('./docutranslate/template', 'docutranslate/template') ('./docutranslate/template', 'docutranslate/template'),
*collect_data_files('pygments'),
*tik_datas
] ]
# 只收集 pygments 的数据文件
datas += collect_data_files('pygments')
hiddenimports = [ hiddenimports = [
'markdown.extensions.tables', 'markdown.extensions.tables',
'pymdownx.arithmatex', 'pymdownx.arithmatex',
'pymdownx.superfences', 'pymdownx.superfences',
'pymdownx.highlight', 'pymdownx.highlight',
'pygments' 'pygments',
*tik_hiddenimports # 合并 tiktoken 和 tiktoken_ext 的隐式导入
] ]
a = Analysis( a = Analysis(
['docutranslate/app.py'], ['docutranslate/app.py'],
pathex=[], pathex=[],
binaries=[], binaries=tik_binaries, # 确保包含了 tiktoken 的二进制文件
datas=datas, datas=datas,
hiddenimports=hiddenimports, hiddenimports=hiddenimports,
hookspath=[], hookspath=[],
hooksconfig={}, hooksconfig={},
runtime_hooks=[], runtime_hooks=[],
# 保持原有的排除项
excludes=["docling", "docutranslate.converter.x2md.converter_docling"], excludes=["docling", "docutranslate.converter.x2md.converter_docling"],
noarchive=False, noarchive=False,
target_arch='x86_64', target_arch='universal2', # 保留 Mac 通用架构设置
optimize=0, optimize=0,
) )
@@ -41,7 +63,7 @@ exe = EXE(
a.binaries, a.binaries,
a.datas, a.datas,
[], [],
name=f'DocuTranslate-{docutranslate.__version__}-mac', name=f'DocuTranslate-{docutranslate.__version__}-mac-x86',
debug=False, debug=False,
bootloader_ignore_signals=False, bootloader_ignore_signals=False,
strip=False, strip=False,
@@ -52,5 +74,5 @@ exe = EXE(
argv_emulation=False, argv_emulation=False,
codesign_identity=None, codesign_identity=None,
entitlements_file=None, entitlements_file=None,
icon='DocuTranslate.icns', icon='DocuTranslate.icns', # 保留 Mac 图标
) )