修复空白分块造成翻译错误的问题
This commit is contained in:
17
.idea/workspace.xml
generated
17
.idea/workspace.xml
generated
@@ -5,19 +5,10 @@
|
||||
</component>
|
||||
<component name="ChangeListManager">
|
||||
<list default="true" id="6b18b44a-df57-4212-a857-9e291ebe5dd2" name="更改" comment="">
|
||||
<change afterPath="$PROJECT_DIR$/docutranslate/global_values/__init__.py" afterDir="false" />
|
||||
<change afterPath="$PROJECT_DIR$/docutranslate/global_values/conditional_import.py" afterDir="false" />
|
||||
<change afterPath="$PROJECT_DIR$/lite.spec" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/.idea/workspace.xml" beforeDir="false" afterPath="$PROJECT_DIR$/.idea/workspace.xml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/README.md" beforeDir="false" afterPath="$PROJECT_DIR$/README.md" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/app.spec" beforeDir="false" afterPath="$PROJECT_DIR$/full.spec" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/__init__.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/__init__.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/app.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/app.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/converter/__init__.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/converter/__init__.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/agents/agent.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/agents/agent.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/static/index.html" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/static/index.html" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/translater.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/translater.py" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/pyproject.toml" beforeDir="false" afterPath="$PROJECT_DIR$/pyproject.toml" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/uv.lock" beforeDir="false" afterPath="$PROJECT_DIR$/uv.lock" afterDir="false" />
|
||||
<change beforePath="$PROJECT_DIR$/docutranslate/utils/markdown_splitter.py" beforeDir="false" afterPath="$PROJECT_DIR$/docutranslate/utils/markdown_splitter.py" afterDir="false" />
|
||||
</list>
|
||||
<option name="SHOW_DIALOG" value="false" />
|
||||
<option name="HIGHLIGHT_CONFLICTS" value="true" />
|
||||
@@ -612,7 +603,7 @@
|
||||
<workItem from="1747739438735" duration="826000" />
|
||||
<workItem from="1747740341909" duration="145000" />
|
||||
<workItem from="1747752718385" duration="81000" />
|
||||
<workItem from="1747754618316" duration="9919000" />
|
||||
<workItem from="1747754618316" duration="12015000" />
|
||||
</task>
|
||||
<servers />
|
||||
</component>
|
||||
@@ -631,7 +622,7 @@
|
||||
</option>
|
||||
</component>
|
||||
<component name="com.intellij.coverage.CoverageDataManagerImpl">
|
||||
<SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747794139415" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$app_test__1_.coverage" NAME="app_test (1) 覆盖结果" MODIFIED="1747796284047" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$test.coverage" NAME="test 覆盖结果" MODIFIED="1747472297913" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/tests" />
|
||||
<SUITE FILE_PATH="coverage/filetranslate$convert.coverage" NAME="convert 覆盖结果" MODIFIED="1746963490689" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/docutranslate/utils" />
|
||||
<SUITE FILE_PATH="coverage/PDFtranslate$PDFtranslater__1_.coverage" NAME="PDFtranslater (1) 覆盖结果" MODIFIED="1746633258205" SOURCE_PROVIDER="com.intellij.coverage.DefaultCoverageFileProvider" RUNNER="coverage.py" COVERAGE_BY_TEST_ENABLED="false" COVERAGE_TRACING_ENABLED="false" WORKING_DIRECTORY="$PROJECT_DIR$/pdftranslate_packages" />
|
||||
|
||||
@@ -84,6 +84,7 @@ class Agent:
|
||||
result = response.json()["choices"][0]["message"]["content"]
|
||||
return result
|
||||
except httpx.HTTPStatusError as e:
|
||||
print(f"prompt:{prompt}\nsystem_prompt:{system_prompt}")
|
||||
raise Exception(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") from e
|
||||
except httpx.RequestError as e:
|
||||
raise Exception(f"AI请求连接错误 (async): {e}") from e
|
||||
|
||||
@@ -600,22 +600,24 @@
|
||||
.then(htmlContent => {
|
||||
iframe.onload = () => {
|
||||
iframe.onload = null;
|
||||
try {
|
||||
const iframeWindow = iframe.contentWindow;
|
||||
if (!iframeWindow) throw new Error("无法访问打印框架。");
|
||||
iframeWindow.document.title = currentFileName + '_translated.pdf';
|
||||
iframeWindow.focus();
|
||||
iframeWindow.print();
|
||||
} catch (err) {
|
||||
console.error('打印PDF出错:', err);
|
||||
statusMsg.textContent = '无法直接生成PDF。请预览HTML后,使用浏览器的打印功能 (Ctrl+P) 保存。';
|
||||
statusMsg.className = 'error-message';
|
||||
} finally {
|
||||
setTimeout(() => {
|
||||
downloadPdfBtn.disabled = false;
|
||||
downloadPdfBtn.textContent = '下载 PDF';
|
||||
}, 2000);
|
||||
}
|
||||
setTimeout(() => {
|
||||
try {
|
||||
const iframeWindow = iframe.contentWindow;
|
||||
if (!iframeWindow) throw new Error("无法访问打印框架。");
|
||||
iframeWindow.document.title = currentFileName + '_translated.pdf';
|
||||
iframeWindow.focus();
|
||||
iframeWindow.print();
|
||||
} catch (err) {
|
||||
console.error('打印PDF出错:', err);
|
||||
statusMsg.textContent = '无法直接生成PDF。请预览HTML后,使用浏览器的打印功能 (Ctrl+P) 保存。';
|
||||
statusMsg.className = 'error-message';
|
||||
} finally {
|
||||
setTimeout(() => {
|
||||
downloadPdfBtn.disabled = false;
|
||||
downloadPdfBtn.textContent = '下载 PDF';
|
||||
}, 2000);
|
||||
}
|
||||
}, 500)
|
||||
};
|
||||
iframe.srcdoc = htmlContent;
|
||||
})
|
||||
|
||||
@@ -11,10 +11,12 @@ class MarkdownBlockSplitter:
|
||||
max_block_size: 每个块的最大字符数
|
||||
"""
|
||||
self.max_block_size = max_block_size
|
||||
|
||||
@staticmethod
|
||||
def _get_bytes(text:str)->int:
|
||||
def _get_bytes(text: str) -> int:
|
||||
return len(text.encode('utf-8'))
|
||||
|
||||
#TODO: 修复分块有时候会有空白块的问题
|
||||
def split_markdown(self, markdown_text: str) -> List[str]:
|
||||
"""
|
||||
将Markdown文本分割成指定大小的块
|
||||
@@ -230,18 +232,22 @@ def split_markdown_text(markdown_text, max_block_size=5000):
|
||||
可以通过''.join(chunks)重建的Markdown块列表
|
||||
"""
|
||||
splitter = MarkdownBlockSplitter(max_block_size=max_block_size)
|
||||
return splitter.split_markdown(markdown_text)
|
||||
chunks = splitter.split_markdown(markdown_text)
|
||||
#过滤空白块
|
||||
chunks=[chunk for chunk in chunks if chunk.strip()]
|
||||
return chunks
|
||||
|
||||
|
||||
def join_markdown_texts(markdown_texts:list[str])->str:
|
||||
result=""
|
||||
def join_markdown_texts(markdown_texts: list[str]) -> str:
|
||||
result = ""
|
||||
for text in markdown_texts:
|
||||
#只有表格会收到多余空行的影响
|
||||
# 只有表格会收到多余空行的影响
|
||||
if text.lstrip().startswith("|"):
|
||||
result=result+"\n"+text
|
||||
result = result + "\n" + text
|
||||
else:
|
||||
result+="\n\n"+text
|
||||
result += "\n\n" + text
|
||||
return result
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
pass
|
||||
Reference in New Issue
Block a user