diff --git a/full.spec b/full.spec index dcf394d..8f78488 100644 --- a/full.spec +++ b/full.spec @@ -6,15 +6,33 @@ import docutranslate # 初始化列表 datas = [] binaries = [] -hiddenimports = ['markdown.extensions.tables', 'pymdownx.arithmatex', - 'pymdownx.superfences', 'pymdownx.highlight', 'pygments', - 'docling_ibm_models', - 'docling_parse', 'cv2'] -# --- 核心修改:在列表中添加 'tiktoken' --- -# tiktoken: 解决 Unknown encoding cl100k_base 问题 -# easyocr, docling 等: 收集其他依赖 -packages_to_collect = ['easyocr', 'docling', 'pygments', 'docling_ibm_models', 'tiktoken'] +# --- 核心修改 1: 添加 tiktoken_ext.openai_public 到隐式导入 --- +# 必须显式导入这个模块,否则 tiktoken 找不到 cl100k_base 编码 +hiddenimports = [ + 'markdown.extensions.tables', + 'pymdownx.arithmatex', + 'pymdownx.superfences', + 'pymdownx.highlight', + 'pygments', + 'docling_ibm_models', + 'docling_parse', + 'cv2', + 'tiktoken_ext.openai_public' # <--- 新增:确保编码插件被识别 +] + +# --- 核心修改 2: 在列表中添加 'tiktoken_ext' --- +# tiktoken: 核心库 +# tiktoken_ext: 包含编码定义文件 (解决 Unknown encoding 问题) +# easyocr, docling 等: 其他依赖 +packages_to_collect = [ + 'easyocr', + 'docling', + 'pygments', + 'docling_ibm_models', + 'tiktoken', + 'tiktoken_ext' # <--- 新增:必须收集扩展包 +] for package in packages_to_collect: try: @@ -29,6 +47,8 @@ for package in packages_to_collect: try: datas += copy_metadata('docling-ibm-models') # 这里必须用连字符(pip名) datas += copy_metadata('docling-parse') # 预防性添加 + # 也可以预防性添加 tiktoken 的元数据,虽然 collect_all 通常已经处理了 + datas += copy_metadata('tiktoken') except Exception as e: print(f"Warning: Failed to copy metadata: {e}") diff --git a/lite.spec b/lite.spec index e91e876..2351f78 100644 --- a/lite.spec +++ b/lite.spec @@ -3,12 +3,25 @@ import os from PyInstaller.utils.hooks import collect_data_files, collect_all import docutranslate -# --- 核心修改开始:收集 tiktoken 的所有依赖 --- -# tiktoken 依赖动态加载的插件和二进制文件,必须全部收集 -tmp_ret = collect_all('tiktoken') -tik_datas = tmp_ret[0] -tik_binaries = tmp_ret[1] -tik_hiddenimports = tmp_ret[2] +# --- 核心修改开始:同时收集 tiktoken 和 tiktoken_ext --- + +# 1. 收集 tiktoken 主包 +ret_tik = collect_all('tiktoken') +tik_datas = ret_tik[0] +tik_binaries = ret_tik[1] +tik_hiddenimports = ret_tik[2] + +# 2. 关键修复:收集 tiktoken_ext +# cl100k_base 等编码定义在这个扩展包里,必须显式收集 +ret_ext = collect_all('tiktoken_ext') +tik_datas += ret_ext[0] +tik_binaries += ret_ext[1] +tik_hiddenimports += ret_ext[2] + +# 3. 双重保险:强制加入具体的插件模块 +# 有时候 collect_all 扫描不到动态加载的 openai_public,这里手动补上 +tik_hiddenimports.append('tiktoken_ext.openai_public') + # --- 核心修改结束 --- datas = [ @@ -24,18 +37,19 @@ hiddenimports = [ 'pymdownx.superfences', 'pymdownx.highlight', 'pygments', - *tik_hiddenimports # 合并 tiktoken 的隐式导入 (包含 tiktoken_ext.openai_public 等) + *tik_hiddenimports ] a = Analysis( ['docutranslate/app.py'], pathex=[], - binaries=tik_binaries, # 确保包含了 tiktoken 的二进制文件 + binaries=tik_binaries, datas=datas, hiddenimports=hiddenimports, hookspath=[], hooksconfig={}, runtime_hooks=[], + # 注意:exclude docling 可能导致部分依赖丢失,确保你真的不需要它 excludes=["docling", "docutranslate.converter.x2md.converter_docling"], noarchive=False, optimize=0, diff --git a/lite_mac.spec b/lite_mac.spec index b2e90ea..5293f70 100644 --- a/lite_mac.spec +++ b/lite_mac.spec @@ -1,35 +1,57 @@ # -*- mode: python ; coding: utf-8 -*- -from PyInstaller.utils.hooks import collect_data_files +from PyInstaller.utils.hooks import collect_data_files, collect_all import docutranslate +# --- 核心修改开始:收集 tiktoken 及其扩展依赖 --- + +# 1. 收集 tiktoken 主包 +ret_tik = collect_all('tiktoken') +tik_datas = ret_tik[0] +tik_binaries = ret_tik[1] +tik_hiddenimports = ret_tik[2] + +# 2. 核心修复:收集 tiktoken_ext +# cl100k_base 等编码定义文件位于此处,必须显式收集 +ret_ext = collect_all('tiktoken_ext') +tik_datas += ret_ext[0] +tik_binaries += ret_ext[1] +tik_hiddenimports += ret_ext[2] + +# 3. 强制加入具体的插件模块 +# 解决 "ValueError: Unknown encoding cl100k_base" +tik_hiddenimports.append('tiktoken_ext.openai_public') + +# --- 核心修改结束 --- + datas = [ ('./docutranslate/static', 'docutranslate/static'), - ('./docutranslate/template', 'docutranslate/template') + ('./docutranslate/template', 'docutranslate/template'), + *collect_data_files('pygments'), + *tik_datas ] -# 只收集 pygments 的数据文件 -datas += collect_data_files('pygments') - hiddenimports = [ 'markdown.extensions.tables', 'pymdownx.arithmatex', 'pymdownx.superfences', 'pymdownx.highlight', - 'pygments' + 'pygments', + *tik_hiddenimports # 合并 tiktoken 和 tiktoken_ext 的隐式导入 ] a = Analysis( ['docutranslate/app.py'], pathex=[], - binaries=[], + binaries=tik_binaries, # 确保包含了 tiktoken 的二进制文件 datas=datas, hiddenimports=hiddenimports, hookspath=[], hooksconfig={}, runtime_hooks=[], - excludes=["docling","docutranslate.converter.x2md.converter_docling"], + # 保持原有的排除项 + excludes=["docling", "docutranslate.converter.x2md.converter_docling"], noarchive=False, - target_arch='universal2', + target_arch='universal2', # 保留 Mac 通用架构设置 optimize=0, ) @@ -52,5 +74,5 @@ exe = EXE( argv_emulation=False, codesign_identity=None, entitlements_file=None, - icon='DocuTranslate.icns', + icon='DocuTranslate.icns', # 保留 Mac 图标 ) \ No newline at end of file diff --git a/lite_mac_x86_64.spec b/lite_mac_x86_64.spec index a777d62..6dc5a59 100644 --- a/lite_mac_x86_64.spec +++ b/lite_mac_x86_64.spec @@ -1,35 +1,57 @@ # -*- mode: python ; coding: utf-8 -*- -from PyInstaller.utils.hooks import collect_data_files +from PyInstaller.utils.hooks import collect_data_files, collect_all import docutranslate +# --- 核心修改开始:收集 tiktoken 及其扩展依赖 --- + +# 1. 收集 tiktoken 主包 +ret_tik = collect_all('tiktoken') +tik_datas = ret_tik[0] +tik_binaries = ret_tik[1] +tik_hiddenimports = ret_tik[2] + +# 2. 核心修复:收集 tiktoken_ext +# cl100k_base 等编码定义文件位于此处,必须显式收集 +ret_ext = collect_all('tiktoken_ext') +tik_datas += ret_ext[0] +tik_binaries += ret_ext[1] +tik_hiddenimports += ret_ext[2] + +# 3. 强制加入具体的插件模块 +# 解决 "ValueError: Unknown encoding cl100k_base" +tik_hiddenimports.append('tiktoken_ext.openai_public') + +# --- 核心修改结束 --- + datas = [ ('./docutranslate/static', 'docutranslate/static'), - ('./docutranslate/template', 'docutranslate/template') + ('./docutranslate/template', 'docutranslate/template'), + *collect_data_files('pygments'), + *tik_datas ] -# 只收集 pygments 的数据文件 -datas += collect_data_files('pygments') - hiddenimports = [ 'markdown.extensions.tables', 'pymdownx.arithmatex', 'pymdownx.superfences', 'pymdownx.highlight', - 'pygments' + 'pygments', + *tik_hiddenimports # 合并 tiktoken 和 tiktoken_ext 的隐式导入 ] a = Analysis( ['docutranslate/app.py'], pathex=[], - binaries=[], + binaries=tik_binaries, # 确保包含了 tiktoken 的二进制文件 datas=datas, hiddenimports=hiddenimports, hookspath=[], hooksconfig={}, runtime_hooks=[], - excludes=["docling","docutranslate.converter.x2md.converter_docling"], + # 保持原有的排除项 + excludes=["docling", "docutranslate.converter.x2md.converter_docling"], noarchive=False, - target_arch='x86_64', + target_arch='universal2', # 保留 Mac 通用架构设置 optimize=0, ) @@ -41,7 +63,7 @@ exe = EXE( a.binaries, a.datas, [], - name=f'DocuTranslate-{docutranslate.__version__}-mac', + name=f'DocuTranslate-{docutranslate.__version__}-mac-x86', debug=False, bootloader_ignore_signals=False, strip=False, @@ -52,5 +74,5 @@ exe = EXE( argv_emulation=False, codesign_identity=None, entitlements_file=None, - icon='DocuTranslate.icns', + icon='DocuTranslate.icns', # 保留 Mac 图标 ) \ No newline at end of file