修复了文件名不能是中文的bug
This commit is contained in:
120
.idea/workspace.xml
generated
120
.idea/workspace.xml
generated
@@ -6,10 +6,8 @@
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
|
||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/translater.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/translater.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/app.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/utils/convert.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/convert.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/utils/markdown_splitter.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/markdown_splitter.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
|
||||
</list>
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
@@ -39,62 +37,62 @@
|
||||
<option name="hideEmptyMiddlePackages" value="true" />
|
||||
<option name="showLibraryContents" value="true" />
|
||||
</component>
|
||||
<component name="PropertiesComponent"><![CDATA[{
|
||||
"keyToString": {
|
||||
"DefaultHtmlFileTemplate": "HTML File",
|
||||
"JavaScript 调试.output.html (1).executor": "Run",
|
||||
"JavaScript 调试.output.html.executor": "Run",
|
||||
"JavaScript 调试.regex.md_中文.html.executor": "Run",
|
||||
"JavaScript 调试.regex_中文.html.executor": "Run",
|
||||
"JavaScript 调试.test.html.executor": "Run",
|
||||
"JavaScript 调试.test2.html.executor": "Run",
|
||||
"JavaScript 调试.test2_英文.html.executor": "Run",
|
||||
"JavaScript 调试.test4-1_中文.html.executor": "Run",
|
||||
"JavaScript 调试.互联网认证授权机制.html.executor": "Run",
|
||||
"JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run",
|
||||
"JavaScript 调试.毕业论文_英文.html.executor": "Run",
|
||||
"ModuleVcsDetector.initialDetectionPerformed": "true",
|
||||
"Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
|
||||
"Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
|
||||
"Python 测试.pytest (test_html.py 内).executor": "Run",
|
||||
"Python.2test2 (1).executor": "Run",
|
||||
"Python.PDFtranslater (1).executor": "Run",
|
||||
"Python.PDFtranslater (2).executor": "Run",
|
||||
"Python.agent.executor": "Debug",
|
||||
"Python.agent_utils.executor": "Run",
|
||||
"Python.app (1).executor": "Run",
|
||||
"Python.app.executor": "Run",
|
||||
"Python.app2.executor": "Run",
|
||||
"Python.app_test (1).executor": "Run",
|
||||
"Python.convert.executor": "Run",
|
||||
"Python.markdown_splitter.executor": "Debug",
|
||||
"Python.markdown_utils.executor": "Run",
|
||||
"Python.test.executor": "Run",
|
||||
"Python.test1.executor": "Run",
|
||||
"Python.test2.executor": "Run",
|
||||
"Python.test3.executor": "Run",
|
||||
"Python.test4.executor": "Run",
|
||||
"Python.testhtml.executor": "Run",
|
||||
"Python.translater.executor": "Run",
|
||||
"Python.切分测试.executor": "Run",
|
||||
"RunOnceActivity.ShowReadmeOnStart": "true",
|
||||
"RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true",
|
||||
"RunOnceActivity.git.unshallow": "true",
|
||||
"git-widget-placeholder": "main",
|
||||
"last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/tests/files",
|
||||
"node.js.detected.package.eslint": "true",
|
||||
"node.js.detected.package.tslint": "true",
|
||||
"node.js.selected.package.eslint": "(autodetect)",
|
||||
"node.js.selected.package.tslint": "(autodetect)",
|
||||
"nodejs_package_manager_path": "npm",
|
||||
"settings.editor.selected.configurable": "preferences.pluginManager",
|
||||
"vue.rearranger.settings.migration": "true"
|
||||
<component name="PropertiesComponent">{
|
||||
"keyToString": {
|
||||
"DefaultHtmlFileTemplate": "HTML File",
|
||||
"JavaScript 调试.output.html (1).executor": "Run",
|
||||
"JavaScript 调试.output.html.executor": "Run",
|
||||
"JavaScript 调试.regex.md_中文.html.executor": "Run",
|
||||
"JavaScript 调试.regex_中文.html.executor": "Run",
|
||||
"JavaScript 调试.test.html.executor": "Run",
|
||||
"JavaScript 调试.test2.html.executor": "Run",
|
||||
"JavaScript 调试.test2_英文.html.executor": "Run",
|
||||
"JavaScript 调试.test4-1_中文.html.executor": "Run",
|
||||
"JavaScript 调试.互联网认证授权机制.html.executor": "Run",
|
||||
"JavaScript 调试.互联网认证授权机制_英文.html.executor": "Run",
|
||||
"JavaScript 调试.毕业论文_英文.html.executor": "Run",
|
||||
"ModuleVcsDetector.initialDetectionPerformed": "true",
|
||||
"Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run",
|
||||
"Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run",
|
||||
"Python 测试.pytest (test_html.py 内).executor": "Run",
|
||||
"Python.2test2 (1).executor": "Run",
|
||||
"Python.PDFtranslater (1).executor": "Run",
|
||||
"Python.PDFtranslater (2).executor": "Run",
|
||||
"Python.agent.executor": "Debug",
|
||||
"Python.agent_utils.executor": "Run",
|
||||
"Python.app (1).executor": "Run",
|
||||
"Python.app.executor": "Run",
|
||||
"Python.app2.executor": "Run",
|
||||
"Python.app_test (1).executor": "Run",
|
||||
"Python.convert.executor": "Run",
|
||||
"Python.markdown_splitter.executor": "Debug",
|
||||
"Python.markdown_utils.executor": "Run",
|
||||
"Python.test.executor": "Run",
|
||||
"Python.test1.executor": "Run",
|
||||
"Python.test2.executor": "Run",
|
||||
"Python.test3.executor": "Run",
|
||||
"Python.test4.executor": "Run",
|
||||
"Python.testhtml.executor": "Run",
|
||||
"Python.translater.executor": "Run",
|
||||
"Python.切分测试.executor": "Run",
|
||||
"RunOnceActivity.ShowReadmeOnStart": "true",
|
||||
"RunOnceActivity.TerminalTabsStorage.copyFrom.TerminalArrangementManager": "true",
|
||||
"RunOnceActivity.git.unshallow": "true",
|
||||
"git-widget-placeholder": "main",
|
||||
"last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/dist/DocuTranslate",
|
||||
"node.js.detected.package.eslint": "true",
|
||||
"node.js.detected.package.tslint": "true",
|
||||
"node.js.selected.package.eslint": "(autodetect)",
|
||||
"node.js.selected.package.tslint": "(autodetect)",
|
||||
"nodejs_package_manager_path": "npm",
|
||||
"settings.editor.selected.configurable": "preferences.pluginManager",
|
||||
"vue.rearranger.settings.migration": "true"
|
||||
}
|
||||
}]]></component>
|
||||
}</component>
|
||||
<component name="RecentsManager">
|
||||
<key name="CopyFile.RECENT_KEYS">
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\files" />
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\DocuTranslate" />
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\tests\files" />
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\docutranslate\agents" />
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist" />
|
||||
<recent name="C:\Users\jxgm\Desktop\FileTranslate\dist\app" />
|
||||
@@ -562,7 +560,9 @@
|
||||
<workItem from="1747452556852" duration="1474000" />
|
||||
<workItem from="1747478362092" duration="1735000" />
|
||||
<workItem from="1747553452592" duration="4624000" />
|
||||
<workItem from="1747578049178" duration="2689000" />
|
||||
<workItem from="1747578049178" duration="3519000" />
|
||||
<workItem from="1747583338894" duration="404000" />
|
||||
<workItem from="1747612671258" duration="614000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
@@ -570,11 +570,11 @@
|
||||
<option name="version" value="3" />
|
||||
</component>
|
||||
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
||||
<SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747579997941" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747583505329" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747472297913" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746963490689" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$pytest__test_html_py__.coverage" NAME="pytest (test_html.py 内) 覆盖结果" MODIFIED="1747554037236" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$pytest__test_html_py__.coverage" NAME="pytest (test_html.py 内) 覆盖结果" MODIFIED="1747554037236" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/PDFtranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746617703678" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages/utils" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$app2.coverage" NAME="app2 覆盖结果" MODIFIED="1747108180309" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$app.coverage" NAME="app 覆盖结果" MODIFIED="1747448464521" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate" />
|
||||
@@ -582,9 +582,9 @@
|
||||
<SUITE FILE_PATH="coverage/filetranslate$test3.coverage" NAME="test3 覆盖结果" MODIFIED="1746884110572" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$app__1_.coverage" NAME="app (1) 覆盖结果" MODIFIED="1747136094477" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$markdown_splitter.coverage" NAME="markdown_splitter 覆盖结果" MODIFIED="1746805063874" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
|
||||
<SUITE FILE_PATH="coverage/PDFtranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746629433597" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$PDFtranslater__2_.coverage" NAME="PDFtranslater (2) 覆盖结果" MODIFIED="1746679546680" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/filetranslate_packages" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$agent.coverage" NAME="agent 覆盖结果" MODIFIED="1746805293987" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/Agents" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$PDFtranslater__2_.coverage" NAME="PDFtranslater (2) 覆盖结果" MODIFIED="1746679546680" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/filetranslate_packages" />
|
||||
<SUITE FILE_PATH="coverage/PDFtranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1746629433597" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$2test2__1_.coverage" NAME="2test2 (1) 覆盖结果" MODIFIED="1747579915531" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$agent_utils.coverage" NAME="agent_utils 覆盖结果" MODIFIED="1746708534311" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$.coverage" NAME="切分测试 覆盖结果" MODIFIED="1747187128847" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
|
||||
@@ -2,8 +2,10 @@ import asyncio
|
||||
import io
|
||||
import logging
|
||||
import time
|
||||
import urllib
|
||||
from pathlib import Path
|
||||
from typing import List, Dict, Any, Optional
|
||||
from urllib.parse import quote
|
||||
|
||||
import uvicorn
|
||||
from fastapi import FastAPI, File, Form, UploadFile, Request, HTTPException
|
||||
@@ -1061,7 +1063,7 @@ async def download_markdown(filename_with_ext: str):
|
||||
return StreamingResponse(
|
||||
io.StringIO(current_state["markdown_content"]),
|
||||
media_type="text/markdown",
|
||||
headers={"Content-Disposition": f"attachment; filename=\"{actual_filename}\""}
|
||||
headers={"Content-Disposition": f"attachment; filename*=UTF-8''{quote(actual_filename, safe='', encoding='utf-8')}"}
|
||||
)
|
||||
|
||||
|
||||
@@ -1079,7 +1081,7 @@ async def download_html(filename_with_ext: str):
|
||||
return HTMLResponse(
|
||||
content=current_state["html_content"],
|
||||
media_type="text/html",
|
||||
headers={"Content-Disposition": f"attachment; filename=\"{actual_filename}\""}
|
||||
headers={"Content-Disposition": f"attachment; filename*=UTF-8''{quote(actual_filename, safe='', encoding='utf-8')}"}
|
||||
)
|
||||
|
||||
|
||||
|
||||
@@ -1,22 +1,27 @@
|
||||
import os
|
||||
from huggingface_hub.errors import LocalEntryNotFoundError
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions, AcceleratorOptions, AcceleratorDevice
|
||||
from docling_core.types.doc import ImageRefMode
|
||||
from pathlib import Path
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
|
||||
from docling.datamodel.base_models import InputFormat
|
||||
from docling.datamodel.document import DocumentStream
|
||||
from docling.datamodel.pipeline_options import PdfPipelineOptions, TableFormerMode
|
||||
from docling.datamodel.settings import settings
|
||||
from docling.document_converter import DocumentConverter, PdfFormatOption
|
||||
from docling_core.types.doc import ImageRefMode
|
||||
from huggingface_hub.errors import LocalEntryNotFoundError
|
||||
|
||||
from docutranslate.logger import translater_logger
|
||||
|
||||
IMAGE_RESOLUTION_SCALE = 4
|
||||
|
||||
|
||||
def file2markdown_embed_images(file_path: Path | str|DocumentStream, formula=False, code=False,artifacts_path:Path|str|None=None) -> str:
|
||||
def file2markdown_embed_images(file_path: Path | str | DocumentStream, formula=False, code=False,
|
||||
artifacts_path: Path | str | None = None) -> str:
|
||||
translater_logger.info(f"正在将文档转换为markdown")
|
||||
pipeline_options = PdfPipelineOptions(artifacts_path=artifacts_path)
|
||||
pipeline_options.do_ocr = False
|
||||
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
|
||||
pipeline_options.generate_picture_images = True
|
||||
# pipeline_options.table_structure_options.mode = TableFormerMode.FAST
|
||||
pipeline_options.table_structure_options.do_cell_matching = False
|
||||
if formula:
|
||||
pipeline_options.do_formula_enrichment = True
|
||||
@@ -43,5 +48,6 @@ def file2markdown_embed_images(file_path: Path | str|DocumentStream, formula=Fal
|
||||
translater_logger.info(f"pdf转换耗时: {conversion_result.timings["pipeline_total"].times}")
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
@@ -1,6 +1,6 @@
|
||||
[project]
|
||||
name = "docutranslate"
|
||||
version = "0.2.16"
|
||||
version = "0.2.17"
|
||||
description = "文件翻译工具"
|
||||
readme = "README.md"
|
||||
requires-python = ">=3.10"
|
||||
|
||||
Reference in New Issue
Block a user