From c29ea337e01418adc6353562ebba77bf9286a3e0 Mon Sep 17 00:00:00 2001 From: xunbu Date: Wed, 21 May 2025 10:24:32 +0800 Subject: [PATCH] =?UTF-8?q?=E5=B0=86docling=E8=AE=BE=E4=B8=BA=E5=8A=A8?= =?UTF-8?q?=E6=80=81=E5=AF=BC=E5=85=A5?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- .idea/workspace.xml | 126 +++--- README.md | 20 +- docutranslate/__init__.py | 4 + docutranslate/app.py | 11 +- docutranslate/converter/__init__.py | 5 +- docutranslate/global_values/__init__.py | 2 + .../global_values/conditional_import.py | 17 + docutranslate/static/index.html | 31 +- docutranslate/translater.py | 9 +- app.spec => full.spec | 0 lite.spec | 44 ++ pyproject.toml | 4 +- uv.lock | 419 +++++++----------- 13 files changed, 361 insertions(+), 331 deletions(-) create mode 100644 docutranslate/global_values/__init__.py create mode 100644 docutranslate/global_values/conditional_import.py rename app.spec => full.spec (100%) create mode 100644 lite.spec diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 1e11674..1a0c10e 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -5,7 +5,19 @@ + + + + + + + + + + + + - { - "keyToString": { - "DefaultHtmlFileTemplate": "HTML File", - "JavaScript 调试.output.html (1).executor": "Run", - "JavaScript 调试.output.html.executor": "Run", - "JavaScript 调试.regex.md_中文.html.executor": "Run", - "JavaScript 调试.regex_中文.html.executor": "Run", - "JavaScript 调试.test.html.executor": "Run", - "JavaScript 调试.test2.html.executor": "Run", - "JavaScript 调试.test2_英文.html.executor": "Run", - "JavaScript 调试.test4-1_中文.html.executor": "Run", - "JavaScript 调试.互联网认证授权机制.html.executor": "Run", - "JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run", - "JavaScript 调试.毕业论文_英文.html.executor": "Run", - "ModuleVcsDetector.initialDetectionPerformed": "true", - "Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run", - "Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run", - "Python 测试.pytest (test_html.py 内).executor": "Run", - "Python.1test.executor": "Run", - "Python.2test2 (1).executor": "Run", - "Python.PDFtranslater (1).executor": "Run", - "Python.PDFtranslater (2).executor": "Run", - "Python.agent.executor": "Debug", - "Python.agent_utils.executor": "Run", - "Python.app (1).executor": "Run", - "Python.app.executor": "Run", - "Python.app2.executor": "Run", - "Python.app_test (1).executor": "Run", - "Python.convert.executor": "Run", - "Python.converter_docling.executor": "Run", - "Python.converter_mineru.executor": "Run", - "Python.markdown_splitter.executor": "Debug", - "Python.markdown_utils.executor": "Run", - "Python.test.executor": "Run", - "Python.test1.executor": "Run", - "Python.test2.executor": "Run", - "Python.test3.executor": "Run", - "Python.test4.executor": "Run", - "Python.testhtml.executor": "Run", - "Python.translater.executor": "Run", - "Python.切分测试.executor": "Run", - "RunOnceActivity.ShowReadmeOnStart": "true", - "RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true", - "RunOnceActivity.git.unshallow": "true", - "git-widget-placeholder": "main", - "last_opened_file_path": "C:/Users/jxgm/Desktop/translate/docutranslate", - "list.type.of.created.stylesheet": "CSS", - "node.js.detected.package.eslint": "true", - "node.js.detected.package.tslint": "true", - "node.js.selected.package.eslint": "(autodetect)", - "node.js.selected.package.tslint": "(autodetect)", - "nodejs_package_manager_path": "npm", - "settings.editor.selected.configurable": "preferences.pluginManager", - "vue.rearranger.settings.migration": "true" + +}]]> @@ -599,6 +611,8 @@ + + @@ -617,7 +631,7 @@ - + diff --git a/README.md b/README.md index 5686a26..c58fc46 100644 --- a/README.md +++ b/README.md @@ -19,11 +19,17 @@ # 安装 使用pip -`pip install docutranslate` +`pip install docutranslate` +`pip install docling`#如果使用docling进行文档解析 使用uv `uv init` -`uv add docutranslate` +`uv add docutranslate` +`uv add docling`#如果使用docling进行文档解析 + +使用git +`git clone https://github.com/xunbu/docutranslate.git` +`uv sync` # 支持的文件格式 @@ -112,9 +118,9 @@ from docutranslate.translater import FileTranslater translater = FileTranslater(base_url="", key="", model_id="", - convert_engin="docling" # 默认使用docling - # convert_engin="mineru",# 使用mineru - # mineru_token="<申请的mineru_token>"#使用mineru时必填 + convert_engin="mineru",# 使用mineru解析文档 + mineru_token="<申请的mineru_token>"#使用mineru时必填 + # convert_engin="docling" # 使用docling解析文档 ) # 不开启公式、代码识别(默认输出为markdown文件) @@ -173,13 +179,13 @@ translater = FileTranslater(base_url="", # 默认的模型baseurl max_concurrent=20, # 并发数,受到ai平台并发量限制,如果文章很长建议适当加大到20以上 timeout=2000, # 调用api的超时时间 docling_artifact=None, # 使用提前下载好的docling模型 - convert_engin="mineru", # 可选docling或minerU + convert_engin="mineru", # 可选minerU或docling mineru_token="", # minerU的token,使用minerU时必填 tips=True # 开场提示 ) ``` - +> 使用docling需要先`pip install docling`或`uv add docling` ### 翻译文件 ```python diff --git a/docutranslate/__init__.py b/docutranslate/__init__.py index addea15..ed3c1d2 100644 --- a/docutranslate/__init__.py +++ b/docutranslate/__init__.py @@ -1,2 +1,6 @@ + + + + from .translater import FileTranslater from .app import app \ No newline at end of file diff --git a/docutranslate/app.py b/docutranslate/app.py index 9bc7c6e..d999db4 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -9,12 +9,13 @@ from urllib.parse import quote import uvicorn from fastapi import FastAPI, File, Form, UploadFile, Request, HTTPException from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse,FileResponse -from fastapi.templating import Jinja2Templates from fastapi.staticfiles import StaticFiles from docutranslate import FileTranslater # Assuming FileTranslater is in docutranslate module from docutranslate.logger import translater_logger from docutranslate.utils.resource_utils import resource_path +from docutranslate.global_values import available_packages +DOCLING_EXIST=True if available_packages.get("docling") else False app = FastAPI() STATIC_DIR=resource_path("static") @@ -35,7 +36,6 @@ current_state: Dict[str, Any] = { "task_end_time": 0, "current_task_ref": None, } -templates = Jinja2Templates(directory=".") # Not strictly used if index.html is served as FileResponse MAX_LOG_HISTORY = 200 log_history: List[str] = [] @@ -325,6 +325,13 @@ async def cancel_translate_task(): return JSONResponse(content={"cancelled": True, "message": "取消请求已发送。请等待状态更新。"}) +@app.get("/get-engin-list") +async def get_engin_list(): + engin_list=["mineru"] + if DOCLING_EXIST: + engin_list.append("docling") + return JSONResponse(content=engin_list) + @app.get("/get-status") async def get_status(): global current_state diff --git a/docutranslate/converter/__init__.py b/docutranslate/converter/__init__.py index 058266d..a631540 100644 --- a/docutranslate/converter/__init__.py +++ b/docutranslate/converter/__init__.py @@ -1,3 +1,6 @@ from .converter import Document,Converter from .converter_mineru import ConverterMineru -from .converter_docling import ConverterDocling \ No newline at end of file + +from docutranslate.global_values import conditional_import +if conditional_import("docling"): + from .converter_docling import ConverterDocling diff --git a/docutranslate/global_values/__init__.py b/docutranslate/global_values/__init__.py new file mode 100644 index 0000000..c0a7c3f --- /dev/null +++ b/docutranslate/global_values/__init__.py @@ -0,0 +1,2 @@ + +from .conditional_import import available_packages,conditional_import diff --git a/docutranslate/global_values/conditional_import.py b/docutranslate/global_values/conditional_import.py new file mode 100644 index 0000000..ede5259 --- /dev/null +++ b/docutranslate/global_values/conditional_import.py @@ -0,0 +1,17 @@ +import importlib + +available_packages={} + +def conditional_import(packagename,alias=None): + try: + imported= importlib.import_module(packagename) + if alias: + globals()[alias]=imported + else: + globals()[packagename]=imported + available_packages[packagename]=True + return True + except ImportError: + print(f"package:{packagename}不存在") + available_packages[packagename]=False + return False diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index cf52ca3..309ee7b 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -225,7 +225,7 @@