修正版本号

This commit is contained in:
xunbu
2025-06-19 19:20:31 +08:00
parent a7de5bc5b9
commit 7f885542fa
4 changed files with 196 additions and 63 deletions

View File

@@ -28,9 +28,9 @@
1. `uv init`
2. `uv add docutranslate`
3. `uv add docling`#如果需要使用docling进行文档解析
3. `uv add docutranslate[docling]`#如果需要使用docling进行文档解析
使用git
使用git需下载uv
1. `git clone https://github.com/xunbu/docutranslate.git`
2. `uv sync`
@@ -59,9 +59,11 @@
使用minerU将文档转换为markdown时需要在minerU平台申请token
1. 打开[minerU官网](https://mineru.net/apiManage/docs)申请token
1. 打开[minerU官网](https://mineru.net/apiManage/docs)申请API
2. 申请成功后,在[API Token管理界面](https://mineru.net/apiManage/token)创建API Token
> mineru token有14天有效期若过期请创建新的token
## 使用docling引擎注意事项
使用docling将文档转换为markdown时需要下载模型到本地也可以提前下载见FAQ因此可能会遇到一些网络问题
@@ -185,8 +187,8 @@ from docutranslate import FileTranslater
translater = FileTranslater(base_url="<baseurl>", # 默认的模型baseurl
key="<api-key>", # 默认的大语言模型平台api-key
model_id="<model-id>", # 默认的模型id
chunksize=3000, # markdown分块长度单位byte分块越大效果越好也越慢不建议超过8000
max_concurrent=30, # 并发数受到ai平台并发量限制如果文章很长建议适当加大到20以上
chunk_size=3000, # markdown分块长度单位byte分块越大效果越好也越慢不建议超过8000
concurrent=30, # 并发数受到ai平台并发量限制如果文章很长建议适当加大到20以上
timeout=2000, # 调用api的超时时间
docling_artifact=None, # 使用提前下载好的docling模型
convert_engin="mineru", # 可选minerU或docling

View File

@@ -15,6 +15,7 @@ from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse, Fil
from fastapi.staticfiles import StaticFiles
from docutranslate import FileTranslater, __version__
from docutranslate.logger import translater_logger
from docutranslate.translater import default_params
from docutranslate.utils.resource_utils import resource_path
from docutranslate.global_values import available_packages
@@ -119,6 +120,9 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori
base_url=params['base_url'],
key=params['apikey'],
model_id=params['model_id'],
chunk_size=params['chunk_size'],
concurrent=params['concurrent'],
temperature=params['temperature'],
convert_engin=params['convert_engin'],
mineru_token=params['mineru_token'],
)
@@ -135,7 +139,8 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori
md_content = ft.export_to_markdown()
try:
await httpx_client.head("https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/contrib/auto-render.min.js", timeout=3)
await httpx_client.head("https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/contrib/auto-render.min.js",
timeout=3)
html_content = ft.export_to_html(title=current_state["original_filename_stem"], cdn=True)
except (httpx.TimeoutException, httpx.RequestError) as e:
translater_logger.info(f"连接s4.zstatic.net失败错误信息{e}")
@@ -213,6 +218,9 @@ async def handle_translate(
refine_markdown: bool = Form(False),
convert_engin: str = Form(...),
mineru_token: Optional[str] = Form(None),
chunk_size: int = Form(...),
concurrent: int = Form(...),
temperature: float = Form(...),
custom_prompt_translate: Optional[str] = Form(None),
file: UploadFile = File(...)
):
@@ -283,6 +291,9 @@ async def handle_translate(
"code_ocr": code_ocr, "refine_markdown": refine_markdown,
"convert_engin": convert_engin,
"mineru_token": mineru_token,
"chunk_size":chunk_size,
"concurrent":concurrent,
"temperature":temperature,
"custom_prompt_translate": custom_prompt_translate,
}
@@ -420,6 +431,11 @@ async def download_html(filename_with_ext: str):
)
@app.get("/translate/default_param")
def get_default_param():
return JSONResponse(content=default_params)
@app.get("/meta")
async def get_app_version():
return JSONResponse(content={"version": __version__})

View File

@@ -267,6 +267,10 @@
font-weight: bold;
}
.clickable {
cursor: pointer;
}
@media (max-width: 768px) {
.form-grid {
grid-template-columns: 1fr;
@@ -409,6 +413,28 @@
<details>
<summary>高级选项</summary>
<div class="form-group">
<div>
<label for="chunk-size-slider"><span
style="display:inline-block;min-width: 15vw">分块大小(bytes):</span><span
id="chunk-size-display"></span><span class="clickable" id="chunk-size-reset"
style="color: #2e7d32">🗘</span></label>
<input type="range" id="chunk-size-slider" name="chunk_size" min="1000" max="6000" step="100">
</div>
<div>
<label for="concurrent-slider"><span
style="display:inline-block;min-width: 15vw">并发请求数:</span><span
id="concurrent-display"></span><span class="clickable" id="concurrent-reset"
style="color: #2e7d32">🗘</span></label>
<input type="range" id="concurrent-slider" name="concurrent" min="1" max="60" step="1">
</div>
<div>
<label for="temperature-slider"><span
style="display:inline-block;min-width: 15vw">temperature:</span><span
id="temperature-display"></span><span class="clickable" id="temperature-reset"
style="color: #2e7d32">🗘</span></label>
<input type="range" id="temperature-slider" name="temperature" min="0" max="1" step="0.1">
</div>
<label for="custom_prompt_translate"></label>
<textarea class="prompt-area" type="text" id="custom_prompt_translate"
name="custom_prompt_translate" placeholder="翻译提示"></textarea>
@@ -470,7 +496,7 @@
</div>
<iframe id="printFrame" style="display:none;"></iframe>
<script>
<script type="module">
const platformSelect = document.getElementById('platform_select');
const apiHref = document.getElementById('api_href')
const baseUrlGroup = document.getElementById('baseUrlGroup');
@@ -486,6 +512,17 @@
const mineruTokenGroup = document.getElementById('mineruTokenGroup');
const mineruTokenInput = document.getElementById('mineru_token');
const chunkSizeSlider = document.getElementById('chunk-size-slider')
const chunkSizeDisplay = document.getElementById('chunk-size-display')
const chunkSizeReset = document.getElementById('chunk-size-reset')
const concurrentSlider = document.getElementById('concurrent-slider')
const concurrentDisplay = document.getElementById('concurrent-display')
const concurrentReset = document.getElementById("concurrent-reset")
const temperatureSlider = document.getElementById('temperature-slider')
const temperatureDisplay = document.getElementById('temperature-display')
const temperatureReset = document.getElementById("temperature-reset")
const form = document.getElementById('translateForm');
const submitButton = document.getElementById('submitButton');
const logArea = document.getElementById('logArea');
@@ -521,6 +558,64 @@
let statusPollIntervalId = null;
let isTranslating = false;
let default_param;
// 初始化调用
async function init() {
try {
const response = await fetch("/meta")
const meta = await response.json();
versionDisplay.textContent = `版本号:${meta.version}`;
} catch (error) {
console.warn("获取版本号失败", error);
}
try {
const response = await fetch('/get-engin-list')
if (!response.ok) {
console.warn(`get engine list failed: ${response.status}`);
return;
}
const enginList = await response.json();
statusMsg.textContent = '正在初始化';
let options = convertEnginSelect.querySelectorAll(`option`);
let currentEngineDisabled = false;
options.forEach((option) => {
if (!enginList.includes(option.value)) {
option.disabled = true;
option.textContent += " (不可用)";
if (option.value === convertEnginSelect.value) {
currentEngineDisabled = true;
}
}
});
if (currentEngineDisabled) {
const mineruOption = convertEnginSelect.querySelector('option[value="mineru"]');
if (mineruOption && !mineruOption.disabled) {
convertEnginSelect.value = "mineru";
} else {
const firstAvailable = convertEnginSelect.querySelector('option:not([disabled])');
if (firstAvailable) convertEnginSelect.value = firstAvailable.value;
}
updateConvertEnginUI();
}
statusMsg.textContent = '初始化完成';
} catch (error) {
console.warn("Error get engin-list", error);
statusMsg.textContent = '引擎列表初始化失败';
statusMsg.className = 'error-message';
}
try {
const response = await fetch("/translate/default_param")
default_param = await response.json();
} catch (error) {
statusMsg.textContent = error.toString();
statusMsg.className = 'error-message';
}
}
await init()
function saveToStorage(key, value) {
try {
localStorage.setItem(key, value);
@@ -572,6 +667,7 @@
saveToStorage('translator_last_platform', selectedPlatformValue);
}
function updateConvertEnginUI() {
const selectedEngin = convertEnginSelect.value;
if (selectedEngin === 'mineru') {
@@ -585,11 +681,50 @@
saveToStorage('translator_convert_engin', selectedEngin);
}
function updateChunkSizeUI() {
let value = chunkSizeSlider.value
chunkSizeDisplay.textContent = value;
if (value !== default_param["chunk_size"].toString()) {
chunkSizeReset.style.visibility = 'visible';
} else {
chunkSizeReset.style.visibility = 'hidden';
}
saveToStorage('chunk_size', value)
}
function updateTemperatureUI() {
let value = temperatureSlider.value
temperatureDisplay.textContent = value;
if (value !== default_param["temperature"].toString()) {
temperatureReset.style.visibility = 'visible';
} else {
temperatureReset.style.visibility = 'hidden';
}
saveToStorage('temperature', value)
}
function updateConcurrentUI() {
let value = concurrentSlider.value
concurrentDisplay.textContent = value;
if (value !== default_param["concurrent"].toString()) {
concurrentReset.style.visibility = 'visible';
} else {
concurrentReset.style.visibility = 'hidden';
}
saveToStorage('concurrent', value)
}
function loadSettings() {
platformSelect.value = getFromStorage('translator_last_platform', 'custom');
updatePlatformUI();
convertEnginSelect.value = getFromStorage('translator_convert_engin', 'mineru');
updateConvertEnginUI();
chunkSizeSlider.value = getFromStorage("chunk_size", default_param["chunk_size"])
updateChunkSizeUI()
concurrentSlider.value = getFromStorage("concurrent", default_param["concurrent"])
updateConcurrentUI()
temperatureSlider.value = getFromStorage("temperature", default_param["temperature"])
updateTemperatureUI()
toLangSelect.value = getFromStorage('translator_to_lang', '中文');
formulaCheckbox.checked = getFromStorage('translator_formula_ocr') === 'true';
codeCheckbox.checked = getFromStorage('translator_code_ocr') === 'true';
@@ -710,52 +845,6 @@
}
}, false);
// 初始化调用
(async () => {
try {
const response = await fetch("/meta")
let meta = await response.json();
versionDisplay.textContent = `版本号:${meta.version}`;
} catch (error) {
console.warn("获取版本号失败", error);
}
try {
const response = await fetch('/get-engin-list')
if (!response.ok) {
console.warn(`get engine list failed: ${response.status}`);
return;
}
const enginList = await response.json();
statusMsg.textContent = '正在初始化';
let options = convertEnginSelect.querySelectorAll(`option`);
let currentEngineDisabled = false;
options.forEach((option) => {
if (!enginList.includes(option.value)) {
option.disabled = true;
option.textContent += " (不可用)";
if (option.value === convertEnginSelect.value) {
currentEngineDisabled = true;
}
}
});
if (currentEngineDisabled) {
const mineruOption = convertEnginSelect.querySelector('option[value="mineru"]');
if (mineruOption && !mineruOption.disabled) {
convertEnginSelect.value = "mineru";
} else {
const firstAvailable = convertEnginSelect.querySelector('option:not([disabled])');
if (firstAvailable) convertEnginSelect.value = firstAvailable.value;
}
updateConvertEnginUI();
}
statusMsg.textContent = '初始化完成';
} catch (error) {
console.warn("Error get engin-list", error);
statusMsg.textContent = '引擎列表初始化失败';
statusMsg.className = 'error-message';
}
})();
async function pollLogs() {
try {
@@ -990,6 +1079,23 @@
}
}
chunkSizeSlider.addEventListener('input', updateChunkSizeUI)
chunkSizeReset.addEventListener('click', () => {
chunkSizeSlider.value = default_param["chunk_size"]
updateChunkSizeUI()
})
concurrentSlider.addEventListener('input', updateConcurrentUI)
concurrentReset.addEventListener('click', () => {
concurrentSlider.value = default_param["concurrent"]
updateConcurrentUI()
})
temperatureSlider.addEventListener('input', updateTemperatureUI)
temperatureReset.addEventListener('click', () => {
temperatureSlider.value = default_param["temperature"]
updateTemperatureUI()
})
submitButton.addEventListener('click', async function (event) {
event.preventDefault();
console.log(fileInput)

View File

@@ -18,11 +18,16 @@ DOCLING_FLAG = True if available_packages.get("docling") else False
if DOCLING_FLAG:
from docutranslate.converter import ConverterDocling
default_params={
"chunk_size":3000,
"concurrent":30,
"temperature":0.7,
}
class FileTranslater:
def __init__(self, file_path: Path | str | None = None, chunksize: int = 3000,
base_url="", key=None, model_id="", temperature=0.7,
max_concurrent=30, timeout=2000,
def __init__(self, file_path: Path | str | None = None, chunk_size: int = default_params["chunk_size"],
base_url:str|None=None, key=None, model_id:str|None=None, temperature=default_params["temperature"],
concurrent:int=default_params["concurrent"], timeout=2000,
convert_engin: Literal["docling", "mineru"] = "mineru",
docling_artifact: Path | str | None = None,
mineru_token: str = None, cache=True):
@@ -30,11 +35,11 @@ class FileTranslater:
self.mineru_token = mineru_token.strip() if mineru_token is not None else None
self._mask_dict = MaskDict()
self.markdown: str = ""
self.chunksize = chunksize
self.max_concurrent = max_concurrent
self.base_url: str = base_url
self.key: str = key if key is not None else "xx"
self.model_id: str = model_id
self.chunk_size = chunk_size
self.concurrent = concurrent
self.base_url= base_url
self.key = key if key is not None else "xx"
self.model_id = model_id
self.temperature = temperature
self.docling_artifact = docling_artifact
if docling_artifact is None:
@@ -67,17 +72,21 @@ class FileTranslater:
return self
def _split_markdown_into_chunks(self) -> list[str]:
chunks: list[str] = split_markdown_text(self.markdown, self.chunksize)
chunks: list[str] = split_markdown_text(self.markdown, self.chunk_size)
translater_logger.info(f"markdown分为{len(chunks)}")
return chunks
def _default_agent_params(self) -> AgentArgs:
if self.base_url is None:
raise Exception("base_url为空")
if self.model_id is None:
raise Exception("model_id为空")
result: AgentArgs = {
"baseurl": self.base_url,
"key": self.key,
"model_id": self.model_id,
"temperature": self.temperature,
"max_concurrent": self.max_concurrent,
"max_concurrent": self.concurrent,
"timeout": self.timeout
}
return result