diff --git a/README.md b/README.md index 25074d1..c0ace2b 100644 --- a/README.md +++ b/README.md @@ -28,9 +28,9 @@ 1. `uv init` 2. `uv add docutranslate` -3. `uv add docling`#如果需要使用docling进行文档解析 +3. `uv add docutranslate[docling]`#如果需要使用docling进行文档解析 -使用git +使用git(需下载uv) 1. `git clone https://github.com/xunbu/docutranslate.git` 2. `uv sync` @@ -59,9 +59,11 @@ 使用minerU将文档转换为markdown时,需要在minerU平台申请token -1. 打开[minerU官网](https://mineru.net/apiManage/docs)申请token +1. 打开[minerU官网](https://mineru.net/apiManage/docs)申请API 2. 申请成功后,在[API Token管理界面](https://mineru.net/apiManage/token)创建API Token +> mineru token有14天有效期,若过期请创建新的token + ## 使用docling引擎注意事项 使用docling将文档转换为markdown时,需要下载模型到本地(也可以提前下载,见FAQ),因此可能会遇到一些网络问题 @@ -185,8 +187,8 @@ from docutranslate import FileTranslater translater = FileTranslater(base_url="", # 默认的模型baseurl key="", # 默认的大语言模型平台api-key model_id="", # 默认的模型id - chunksize=3000, # markdown分块长度(单位byte),分块越大效果越好(也越慢),不建议超过8000 - max_concurrent=30, # 并发数,受到ai平台并发量限制,如果文章很长建议适当加大到20以上 + chunk_size=3000, # markdown分块长度(单位byte),分块越大效果越好(也越慢),不建议超过8000 + concurrent=30, # 并发数,受到ai平台并发量限制,如果文章很长建议适当加大到20以上 timeout=2000, # 调用api的超时时间 docling_artifact=None, # 使用提前下载好的docling模型 convert_engin="mineru", # 可选minerU或docling diff --git a/docutranslate/app.py b/docutranslate/app.py index 08fff5d..35965be 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -15,6 +15,7 @@ from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse, Fil from fastapi.staticfiles import StaticFiles from docutranslate import FileTranslater, __version__ from docutranslate.logger import translater_logger +from docutranslate.translater import default_params from docutranslate.utils.resource_utils import resource_path from docutranslate.global_values import available_packages @@ -119,6 +120,9 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori base_url=params['base_url'], key=params['apikey'], model_id=params['model_id'], + chunk_size=params['chunk_size'], + concurrent=params['concurrent'], + temperature=params['temperature'], convert_engin=params['convert_engin'], mineru_token=params['mineru_token'], ) @@ -135,7 +139,8 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori md_content = ft.export_to_markdown() try: - await httpx_client.head("https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/contrib/auto-render.min.js", timeout=3) + await httpx_client.head("https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/contrib/auto-render.min.js", + timeout=3) html_content = ft.export_to_html(title=current_state["original_filename_stem"], cdn=True) except (httpx.TimeoutException, httpx.RequestError) as e: translater_logger.info(f"连接s4.zstatic.net失败,错误信息:{e}") @@ -213,6 +218,9 @@ async def handle_translate( refine_markdown: bool = Form(False), convert_engin: str = Form(...), mineru_token: Optional[str] = Form(None), + chunk_size: int = Form(...), + concurrent: int = Form(...), + temperature: float = Form(...), custom_prompt_translate: Optional[str] = Form(None), file: UploadFile = File(...) ): @@ -283,6 +291,9 @@ async def handle_translate( "code_ocr": code_ocr, "refine_markdown": refine_markdown, "convert_engin": convert_engin, "mineru_token": mineru_token, + "chunk_size":chunk_size, + "concurrent":concurrent, + "temperature":temperature, "custom_prompt_translate": custom_prompt_translate, } @@ -420,6 +431,11 @@ async def download_html(filename_with_ext: str): ) +@app.get("/translate/default_param") +def get_default_param(): + return JSONResponse(content=default_params) + + @app.get("/meta") async def get_app_version(): return JSONResponse(content={"version": __version__}) diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index c2aafd1..51b6f1a 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -267,6 +267,10 @@ font-weight: bold; } + .clickable { + cursor: pointer; + } + @media (max-width: 768px) { .form-grid { grid-template-columns: 1fr; @@ -409,6 +413,28 @@
高级选项
+
+ + +
+
+ + +
+
+ + +
+ @@ -470,7 +496,7 @@
-