From ba2b4abba5be9a723cedf79656cbed67dfa88196 Mon Sep 17 00:00:00 2001 From: xunbu Date: Wed, 21 May 2025 12:52:49 +0800 Subject: [PATCH] small fix --- .idea/workspace.xml | 6 ++-- docutranslate/app.py | 16 +++++----- docutranslate/static/index.html | 15 ++++++---- docutranslate/utils/convert.py | 53 --------------------------------- pyproject.toml | 1 - 5 files changed, 20 insertions(+), 71 deletions(-) delete mode 100644 docutranslate/utils/convert.py diff --git a/.idea/workspace.xml b/.idea/workspace.xml index b340bf1..cf69dd2 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -6,7 +6,9 @@ + + @@ -621,7 +623,7 @@ - + diff --git a/docutranslate/app.py b/docutranslate/app.py index d999db4..8c8d600 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -177,20 +177,18 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori # --- API Endpoints --- @app.get("/", response_class=HTMLResponse) async def main_page(request: Request): - # Serve index.html from the static directory or root project directory - # Assuming index.html is at the same level as app.py or in STATIC_DIR - # For simplicity, if index.html is at root: - # return FileResponse(Path(__file__).parent / "index.html") - # If using Jinja2Templates and index.html is in "templates" folder: - # return templates.TemplateResponse("index.html", {"request": request}) - # Using FileResponse for index.html directly: index_path = Path("index.html") # Adjust if index.html is elsewhere if not index_path.exists(): # Fallback to static dir if not in root index_path = STATIC_DIR / "index.html" if not index_path.exists(): raise HTTPException(status_code=404, detail="index.html not found") - return FileResponse(index_path) + no_cache_headers = { + "Cache-Control": "no-store, no-cache, must-revalidate, max-age=0", + "Pragma": "no-cache", # 兼容 HTTP/1.0 + "Expires": "0", # 兼容旧版代理/缓存 + } + return FileResponse(index_path,headers=no_cache_headers) @app.post("/translate") @@ -408,7 +406,7 @@ async def download_html(filename_with_ext: str): def run_app(): print("正在启动 DocuTranslate WebUI") - print("请访问 http://127.0.0.1:8010") + print("请访问 http://127.0.0.1:8010 (ctrl+点击链接即可打开)") uvicorn.run(app, host="127.0.0.1", port=8010, workers=1) diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index 5591f1a..fad8090 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -229,7 +229,7 @@
@@ -360,6 +360,7 @@ return defaultValue; } } + //api访问地址到获取地址的映射 const apiHrefMap = { "https://openrouter.ai/api/v1": "https://openrouter.ai/settings/keys", @@ -513,12 +514,14 @@ console.warn(`get engine list failed: ${response.status}`); return; } - const status = await response.json(); + const enginList = await response.json(); statusMsg.textContent = '正在初始化'; - status.forEach((engin) => { - let option = convertEnginSelect.querySelector(`option[value="${engin}"]`); - option.disabled = true; - option.textContent += "(不可用)" + let options = convertEnginSelect.querySelectors(`option[value="${engin}"]`); + options.forEach((option) => { + if (!enginList.includes(option.value)) { + option.disabled = true; + option.textContent += "(不可用)" + } }) if (status.includes(convertEnginSelect.value)) { convertEnginSelect.value = "mineru"; diff --git a/docutranslate/utils/convert.py b/docutranslate/utils/convert.py deleted file mode 100644 index 6b2b633..0000000 --- a/docutranslate/utils/convert.py +++ /dev/null @@ -1,53 +0,0 @@ -import os -from pathlib import Path - -from docling.datamodel.base_models import InputFormat -from docling.datamodel.document import DocumentStream -from docling.datamodel.pipeline_options import PdfPipelineOptions, TableFormerMode -from docling.datamodel.settings import settings -from docling.document_converter import DocumentConverter, PdfFormatOption -from docling_core.types.doc import ImageRefMode -from huggingface_hub.errors import LocalEntryNotFoundError - -from docutranslate.logger import translater_logger - -IMAGE_RESOLUTION_SCALE = 4 - - -def file2markdown_embed_images(file_path: Path | str | DocumentStream, formula=False, code=False, - artifacts_path: Path | str | None = None) -> str: - translater_logger.info(f"正在将文档转换为markdown") - pipeline_options = PdfPipelineOptions(artifacts_path=artifacts_path) - pipeline_options.do_ocr = False - pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE - pipeline_options.generate_picture_images = True - # pipeline_options.table_structure_options.mode = TableFormerMode.FAST - pipeline_options.table_structure_options.do_cell_matching = False - if formula: - pipeline_options.do_formula_enrichment = True - if code: - pipeline_options.do_code_enrichment = True - # pipeline_options.accelerator_options= AcceleratorOptions( - # num_threads=4, device=AcceleratorDevice.AUTO - # ) - # 打印时间 - settings.debug.profile_pipeline_timings = True - converter = DocumentConverter(format_options={ - InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) - - }) - try: - conversion_result = converter.convert(file_path) - result = conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED) - except LocalEntryNotFoundError: - translater_logger.info(f"无法连接huggingface,正在尝试换源") - os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com' - conversion_result = converter.convert(file_path) - result = conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED) - translater_logger.info(f"已转换为markdown") - translater_logger.info(f"pdf转换耗时: {conversion_result.timings["pipeline_total"].times}") - return result - - -if __name__ == '__main__': - pass diff --git a/pyproject.toml b/pyproject.toml index cfb0fb3..149ae05 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,7 +8,6 @@ dependencies = [ "httpx", "markdown2", "fastapi[standard]>=0.115.12", - "docling>=2.33.0", ] [project.scripts] docutranslate = "docutranslate.cli:main"