修正版本号
This commit is contained in:
12
README.md
12
README.md
@@ -28,9 +28,9 @@
|
|||||||
|
|
||||||
1. `uv init`
|
1. `uv init`
|
||||||
2. `uv add docutranslate`
|
2. `uv add docutranslate`
|
||||||
3. `uv add docling`#如果需要使用docling进行文档解析
|
3. `uv add docutranslate[docling]`#如果需要使用docling进行文档解析
|
||||||
|
|
||||||
使用git
|
使用git(需下载uv)
|
||||||
|
|
||||||
1. `git clone https://github.com/xunbu/docutranslate.git`
|
1. `git clone https://github.com/xunbu/docutranslate.git`
|
||||||
2. `uv sync`
|
2. `uv sync`
|
||||||
@@ -59,9 +59,11 @@
|
|||||||
|
|
||||||
使用minerU将文档转换为markdown时,需要在minerU平台申请token
|
使用minerU将文档转换为markdown时,需要在minerU平台申请token
|
||||||
|
|
||||||
1. 打开[minerU官网](https://mineru.net/apiManage/docs)申请token
|
1. 打开[minerU官网](https://mineru.net/apiManage/docs)申请API
|
||||||
2. 申请成功后,在[API Token管理界面](https://mineru.net/apiManage/token)创建API Token
|
2. 申请成功后,在[API Token管理界面](https://mineru.net/apiManage/token)创建API Token
|
||||||
|
|
||||||
|
> mineru token有14天有效期,若过期请创建新的token
|
||||||
|
|
||||||
## 使用docling引擎注意事项
|
## 使用docling引擎注意事项
|
||||||
|
|
||||||
使用docling将文档转换为markdown时,需要下载模型到本地(也可以提前下载,见FAQ),因此可能会遇到一些网络问题
|
使用docling将文档转换为markdown时,需要下载模型到本地(也可以提前下载,见FAQ),因此可能会遇到一些网络问题
|
||||||
@@ -185,8 +187,8 @@ from docutranslate import FileTranslater
|
|||||||
translater = FileTranslater(base_url="<baseurl>", # 默认的模型baseurl
|
translater = FileTranslater(base_url="<baseurl>", # 默认的模型baseurl
|
||||||
key="<api-key>", # 默认的大语言模型平台api-key
|
key="<api-key>", # 默认的大语言模型平台api-key
|
||||||
model_id="<model-id>", # 默认的模型id
|
model_id="<model-id>", # 默认的模型id
|
||||||
chunksize=3000, # markdown分块长度(单位byte),分块越大效果越好(也越慢),不建议超过8000
|
chunk_size=3000, # markdown分块长度(单位byte),分块越大效果越好(也越慢),不建议超过8000
|
||||||
max_concurrent=30, # 并发数,受到ai平台并发量限制,如果文章很长建议适当加大到20以上
|
concurrent=30, # 并发数,受到ai平台并发量限制,如果文章很长建议适当加大到20以上
|
||||||
timeout=2000, # 调用api的超时时间
|
timeout=2000, # 调用api的超时时间
|
||||||
docling_artifact=None, # 使用提前下载好的docling模型
|
docling_artifact=None, # 使用提前下载好的docling模型
|
||||||
convert_engin="mineru", # 可选minerU或docling
|
convert_engin="mineru", # 可选minerU或docling
|
||||||
|
|||||||
@@ -15,6 +15,7 @@ from fastapi.responses import HTMLResponse, JSONResponse, StreamingResponse, Fil
|
|||||||
from fastapi.staticfiles import StaticFiles
|
from fastapi.staticfiles import StaticFiles
|
||||||
from docutranslate import FileTranslater, __version__
|
from docutranslate import FileTranslater, __version__
|
||||||
from docutranslate.logger import translater_logger
|
from docutranslate.logger import translater_logger
|
||||||
|
from docutranslate.translater import default_params
|
||||||
from docutranslate.utils.resource_utils import resource_path
|
from docutranslate.utils.resource_utils import resource_path
|
||||||
from docutranslate.global_values import available_packages
|
from docutranslate.global_values import available_packages
|
||||||
|
|
||||||
@@ -119,6 +120,9 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori
|
|||||||
base_url=params['base_url'],
|
base_url=params['base_url'],
|
||||||
key=params['apikey'],
|
key=params['apikey'],
|
||||||
model_id=params['model_id'],
|
model_id=params['model_id'],
|
||||||
|
chunk_size=params['chunk_size'],
|
||||||
|
concurrent=params['concurrent'],
|
||||||
|
temperature=params['temperature'],
|
||||||
convert_engin=params['convert_engin'],
|
convert_engin=params['convert_engin'],
|
||||||
mineru_token=params['mineru_token'],
|
mineru_token=params['mineru_token'],
|
||||||
)
|
)
|
||||||
@@ -135,7 +139,8 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori
|
|||||||
|
|
||||||
md_content = ft.export_to_markdown()
|
md_content = ft.export_to_markdown()
|
||||||
try:
|
try:
|
||||||
await httpx_client.head("https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/contrib/auto-render.min.js", timeout=3)
|
await httpx_client.head("https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/contrib/auto-render.min.js",
|
||||||
|
timeout=3)
|
||||||
html_content = ft.export_to_html(title=current_state["original_filename_stem"], cdn=True)
|
html_content = ft.export_to_html(title=current_state["original_filename_stem"], cdn=True)
|
||||||
except (httpx.TimeoutException, httpx.RequestError) as e:
|
except (httpx.TimeoutException, httpx.RequestError) as e:
|
||||||
translater_logger.info(f"连接s4.zstatic.net失败,错误信息:{e}")
|
translater_logger.info(f"连接s4.zstatic.net失败,错误信息:{e}")
|
||||||
@@ -213,6 +218,9 @@ async def handle_translate(
|
|||||||
refine_markdown: bool = Form(False),
|
refine_markdown: bool = Form(False),
|
||||||
convert_engin: str = Form(...),
|
convert_engin: str = Form(...),
|
||||||
mineru_token: Optional[str] = Form(None),
|
mineru_token: Optional[str] = Form(None),
|
||||||
|
chunk_size: int = Form(...),
|
||||||
|
concurrent: int = Form(...),
|
||||||
|
temperature: float = Form(...),
|
||||||
custom_prompt_translate: Optional[str] = Form(None),
|
custom_prompt_translate: Optional[str] = Form(None),
|
||||||
file: UploadFile = File(...)
|
file: UploadFile = File(...)
|
||||||
):
|
):
|
||||||
@@ -283,6 +291,9 @@ async def handle_translate(
|
|||||||
"code_ocr": code_ocr, "refine_markdown": refine_markdown,
|
"code_ocr": code_ocr, "refine_markdown": refine_markdown,
|
||||||
"convert_engin": convert_engin,
|
"convert_engin": convert_engin,
|
||||||
"mineru_token": mineru_token,
|
"mineru_token": mineru_token,
|
||||||
|
"chunk_size":chunk_size,
|
||||||
|
"concurrent":concurrent,
|
||||||
|
"temperature":temperature,
|
||||||
"custom_prompt_translate": custom_prompt_translate,
|
"custom_prompt_translate": custom_prompt_translate,
|
||||||
}
|
}
|
||||||
|
|
||||||
@@ -420,6 +431,11 @@ async def download_html(filename_with_ext: str):
|
|||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
@app.get("/translate/default_param")
|
||||||
|
def get_default_param():
|
||||||
|
return JSONResponse(content=default_params)
|
||||||
|
|
||||||
|
|
||||||
@app.get("/meta")
|
@app.get("/meta")
|
||||||
async def get_app_version():
|
async def get_app_version():
|
||||||
return JSONResponse(content={"version": __version__})
|
return JSONResponse(content={"version": __version__})
|
||||||
|
|||||||
@@ -267,6 +267,10 @@
|
|||||||
font-weight: bold;
|
font-weight: bold;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
.clickable {
|
||||||
|
cursor: pointer;
|
||||||
|
}
|
||||||
|
|
||||||
@media (max-width: 768px) {
|
@media (max-width: 768px) {
|
||||||
.form-grid {
|
.form-grid {
|
||||||
grid-template-columns: 1fr;
|
grid-template-columns: 1fr;
|
||||||
@@ -409,6 +413,28 @@
|
|||||||
<details>
|
<details>
|
||||||
<summary>高级选项</summary>
|
<summary>高级选项</summary>
|
||||||
<div class="form-group">
|
<div class="form-group">
|
||||||
|
<div>
|
||||||
|
<label for="chunk-size-slider"><span
|
||||||
|
style="display:inline-block;min-width: 15vw">分块大小(bytes):</span><span
|
||||||
|
id="chunk-size-display"></span><span class="clickable" id="chunk-size-reset"
|
||||||
|
style="color: #2e7d32">🗘</span></label>
|
||||||
|
<input type="range" id="chunk-size-slider" name="chunk_size" min="1000" max="6000" step="100">
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label for="concurrent-slider"><span
|
||||||
|
style="display:inline-block;min-width: 15vw">并发请求数:</span><span
|
||||||
|
id="concurrent-display"></span><span class="clickable" id="concurrent-reset"
|
||||||
|
style="color: #2e7d32">🗘</span></label>
|
||||||
|
<input type="range" id="concurrent-slider" name="concurrent" min="1" max="60" step="1">
|
||||||
|
</div>
|
||||||
|
<div>
|
||||||
|
<label for="temperature-slider"><span
|
||||||
|
style="display:inline-block;min-width: 15vw">temperature:</span><span
|
||||||
|
id="temperature-display"></span><span class="clickable" id="temperature-reset"
|
||||||
|
style="color: #2e7d32">🗘</span></label>
|
||||||
|
<input type="range" id="temperature-slider" name="temperature" min="0" max="1" step="0.1">
|
||||||
|
</div>
|
||||||
|
|
||||||
<label for="custom_prompt_translate"></label>
|
<label for="custom_prompt_translate"></label>
|
||||||
<textarea class="prompt-area" type="text" id="custom_prompt_translate"
|
<textarea class="prompt-area" type="text" id="custom_prompt_translate"
|
||||||
name="custom_prompt_translate" placeholder="翻译提示"></textarea>
|
name="custom_prompt_translate" placeholder="翻译提示"></textarea>
|
||||||
@@ -470,7 +496,7 @@
|
|||||||
</div>
|
</div>
|
||||||
|
|
||||||
<iframe id="printFrame" style="display:none;"></iframe>
|
<iframe id="printFrame" style="display:none;"></iframe>
|
||||||
<script>
|
<script type="module">
|
||||||
const platformSelect = document.getElementById('platform_select');
|
const platformSelect = document.getElementById('platform_select');
|
||||||
const apiHref = document.getElementById('api_href')
|
const apiHref = document.getElementById('api_href')
|
||||||
const baseUrlGroup = document.getElementById('baseUrlGroup');
|
const baseUrlGroup = document.getElementById('baseUrlGroup');
|
||||||
@@ -486,6 +512,17 @@
|
|||||||
const mineruTokenGroup = document.getElementById('mineruTokenGroup');
|
const mineruTokenGroup = document.getElementById('mineruTokenGroup');
|
||||||
const mineruTokenInput = document.getElementById('mineru_token');
|
const mineruTokenInput = document.getElementById('mineru_token');
|
||||||
|
|
||||||
|
const chunkSizeSlider = document.getElementById('chunk-size-slider')
|
||||||
|
const chunkSizeDisplay = document.getElementById('chunk-size-display')
|
||||||
|
const chunkSizeReset = document.getElementById('chunk-size-reset')
|
||||||
|
const concurrentSlider = document.getElementById('concurrent-slider')
|
||||||
|
const concurrentDisplay = document.getElementById('concurrent-display')
|
||||||
|
const concurrentReset = document.getElementById("concurrent-reset")
|
||||||
|
const temperatureSlider = document.getElementById('temperature-slider')
|
||||||
|
const temperatureDisplay = document.getElementById('temperature-display')
|
||||||
|
const temperatureReset = document.getElementById("temperature-reset")
|
||||||
|
|
||||||
|
|
||||||
const form = document.getElementById('translateForm');
|
const form = document.getElementById('translateForm');
|
||||||
const submitButton = document.getElementById('submitButton');
|
const submitButton = document.getElementById('submitButton');
|
||||||
const logArea = document.getElementById('logArea');
|
const logArea = document.getElementById('logArea');
|
||||||
@@ -521,6 +558,64 @@
|
|||||||
let statusPollIntervalId = null;
|
let statusPollIntervalId = null;
|
||||||
let isTranslating = false;
|
let isTranslating = false;
|
||||||
|
|
||||||
|
|
||||||
|
let default_param;
|
||||||
|
|
||||||
|
// 初始化调用
|
||||||
|
async function init() {
|
||||||
|
try {
|
||||||
|
const response = await fetch("/meta")
|
||||||
|
const meta = await response.json();
|
||||||
|
versionDisplay.textContent = `版本号:${meta.version}`;
|
||||||
|
} catch (error) {
|
||||||
|
console.warn("获取版本号失败", error);
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const response = await fetch('/get-engin-list')
|
||||||
|
if (!response.ok) {
|
||||||
|
console.warn(`get engine list failed: ${response.status}`);
|
||||||
|
return;
|
||||||
|
}
|
||||||
|
const enginList = await response.json();
|
||||||
|
statusMsg.textContent = '正在初始化';
|
||||||
|
let options = convertEnginSelect.querySelectorAll(`option`);
|
||||||
|
let currentEngineDisabled = false;
|
||||||
|
options.forEach((option) => {
|
||||||
|
if (!enginList.includes(option.value)) {
|
||||||
|
option.disabled = true;
|
||||||
|
option.textContent += " (不可用)";
|
||||||
|
if (option.value === convertEnginSelect.value) {
|
||||||
|
currentEngineDisabled = true;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
});
|
||||||
|
if (currentEngineDisabled) {
|
||||||
|
const mineruOption = convertEnginSelect.querySelector('option[value="mineru"]');
|
||||||
|
if (mineruOption && !mineruOption.disabled) {
|
||||||
|
convertEnginSelect.value = "mineru";
|
||||||
|
} else {
|
||||||
|
const firstAvailable = convertEnginSelect.querySelector('option:not([disabled])');
|
||||||
|
if (firstAvailable) convertEnginSelect.value = firstAvailable.value;
|
||||||
|
}
|
||||||
|
updateConvertEnginUI();
|
||||||
|
}
|
||||||
|
statusMsg.textContent = '初始化完成';
|
||||||
|
} catch (error) {
|
||||||
|
console.warn("Error get engin-list", error);
|
||||||
|
statusMsg.textContent = '引擎列表初始化失败';
|
||||||
|
statusMsg.className = 'error-message';
|
||||||
|
}
|
||||||
|
try {
|
||||||
|
const response = await fetch("/translate/default_param")
|
||||||
|
default_param = await response.json();
|
||||||
|
} catch (error) {
|
||||||
|
statusMsg.textContent = error.toString();
|
||||||
|
statusMsg.className = 'error-message';
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
|
await init()
|
||||||
|
|
||||||
function saveToStorage(key, value) {
|
function saveToStorage(key, value) {
|
||||||
try {
|
try {
|
||||||
localStorage.setItem(key, value);
|
localStorage.setItem(key, value);
|
||||||
@@ -572,6 +667,7 @@
|
|||||||
saveToStorage('translator_last_platform', selectedPlatformValue);
|
saveToStorage('translator_last_platform', selectedPlatformValue);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
function updateConvertEnginUI() {
|
function updateConvertEnginUI() {
|
||||||
const selectedEngin = convertEnginSelect.value;
|
const selectedEngin = convertEnginSelect.value;
|
||||||
if (selectedEngin === 'mineru') {
|
if (selectedEngin === 'mineru') {
|
||||||
@@ -585,11 +681,50 @@
|
|||||||
saveToStorage('translator_convert_engin', selectedEngin);
|
saveToStorage('translator_convert_engin', selectedEngin);
|
||||||
}
|
}
|
||||||
|
|
||||||
|
function updateChunkSizeUI() {
|
||||||
|
let value = chunkSizeSlider.value
|
||||||
|
chunkSizeDisplay.textContent = value;
|
||||||
|
if (value !== default_param["chunk_size"].toString()) {
|
||||||
|
chunkSizeReset.style.visibility = 'visible';
|
||||||
|
} else {
|
||||||
|
chunkSizeReset.style.visibility = 'hidden';
|
||||||
|
}
|
||||||
|
saveToStorage('chunk_size', value)
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateTemperatureUI() {
|
||||||
|
let value = temperatureSlider.value
|
||||||
|
temperatureDisplay.textContent = value;
|
||||||
|
if (value !== default_param["temperature"].toString()) {
|
||||||
|
temperatureReset.style.visibility = 'visible';
|
||||||
|
} else {
|
||||||
|
temperatureReset.style.visibility = 'hidden';
|
||||||
|
}
|
||||||
|
saveToStorage('temperature', value)
|
||||||
|
}
|
||||||
|
|
||||||
|
function updateConcurrentUI() {
|
||||||
|
let value = concurrentSlider.value
|
||||||
|
concurrentDisplay.textContent = value;
|
||||||
|
if (value !== default_param["concurrent"].toString()) {
|
||||||
|
concurrentReset.style.visibility = 'visible';
|
||||||
|
} else {
|
||||||
|
concurrentReset.style.visibility = 'hidden';
|
||||||
|
}
|
||||||
|
saveToStorage('concurrent', value)
|
||||||
|
}
|
||||||
|
|
||||||
function loadSettings() {
|
function loadSettings() {
|
||||||
platformSelect.value = getFromStorage('translator_last_platform', 'custom');
|
platformSelect.value = getFromStorage('translator_last_platform', 'custom');
|
||||||
updatePlatformUI();
|
updatePlatformUI();
|
||||||
convertEnginSelect.value = getFromStorage('translator_convert_engin', 'mineru');
|
convertEnginSelect.value = getFromStorage('translator_convert_engin', 'mineru');
|
||||||
updateConvertEnginUI();
|
updateConvertEnginUI();
|
||||||
|
chunkSizeSlider.value = getFromStorage("chunk_size", default_param["chunk_size"])
|
||||||
|
updateChunkSizeUI()
|
||||||
|
concurrentSlider.value = getFromStorage("concurrent", default_param["concurrent"])
|
||||||
|
updateConcurrentUI()
|
||||||
|
temperatureSlider.value = getFromStorage("temperature", default_param["temperature"])
|
||||||
|
updateTemperatureUI()
|
||||||
toLangSelect.value = getFromStorage('translator_to_lang', '中文');
|
toLangSelect.value = getFromStorage('translator_to_lang', '中文');
|
||||||
formulaCheckbox.checked = getFromStorage('translator_formula_ocr') === 'true';
|
formulaCheckbox.checked = getFromStorage('translator_formula_ocr') === 'true';
|
||||||
codeCheckbox.checked = getFromStorage('translator_code_ocr') === 'true';
|
codeCheckbox.checked = getFromStorage('translator_code_ocr') === 'true';
|
||||||
@@ -710,52 +845,6 @@
|
|||||||
}
|
}
|
||||||
}, false);
|
}, false);
|
||||||
|
|
||||||
// 初始化调用
|
|
||||||
(async () => {
|
|
||||||
try {
|
|
||||||
const response = await fetch("/meta")
|
|
||||||
let meta = await response.json();
|
|
||||||
versionDisplay.textContent = `版本号:${meta.version}`;
|
|
||||||
} catch (error) {
|
|
||||||
console.warn("获取版本号失败", error);
|
|
||||||
}
|
|
||||||
try {
|
|
||||||
const response = await fetch('/get-engin-list')
|
|
||||||
if (!response.ok) {
|
|
||||||
console.warn(`get engine list failed: ${response.status}`);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
const enginList = await response.json();
|
|
||||||
statusMsg.textContent = '正在初始化';
|
|
||||||
let options = convertEnginSelect.querySelectorAll(`option`);
|
|
||||||
let currentEngineDisabled = false;
|
|
||||||
options.forEach((option) => {
|
|
||||||
if (!enginList.includes(option.value)) {
|
|
||||||
option.disabled = true;
|
|
||||||
option.textContent += " (不可用)";
|
|
||||||
if (option.value === convertEnginSelect.value) {
|
|
||||||
currentEngineDisabled = true;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
});
|
|
||||||
if (currentEngineDisabled) {
|
|
||||||
const mineruOption = convertEnginSelect.querySelector('option[value="mineru"]');
|
|
||||||
if (mineruOption && !mineruOption.disabled) {
|
|
||||||
convertEnginSelect.value = "mineru";
|
|
||||||
} else {
|
|
||||||
const firstAvailable = convertEnginSelect.querySelector('option:not([disabled])');
|
|
||||||
if (firstAvailable) convertEnginSelect.value = firstAvailable.value;
|
|
||||||
}
|
|
||||||
updateConvertEnginUI();
|
|
||||||
}
|
|
||||||
statusMsg.textContent = '初始化完成';
|
|
||||||
} catch (error) {
|
|
||||||
console.warn("Error get engin-list", error);
|
|
||||||
statusMsg.textContent = '引擎列表初始化失败';
|
|
||||||
statusMsg.className = 'error-message';
|
|
||||||
}
|
|
||||||
})();
|
|
||||||
|
|
||||||
|
|
||||||
async function pollLogs() {
|
async function pollLogs() {
|
||||||
try {
|
try {
|
||||||
@@ -990,6 +1079,23 @@
|
|||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
chunkSizeSlider.addEventListener('input', updateChunkSizeUI)
|
||||||
|
chunkSizeReset.addEventListener('click', () => {
|
||||||
|
chunkSizeSlider.value = default_param["chunk_size"]
|
||||||
|
updateChunkSizeUI()
|
||||||
|
})
|
||||||
|
concurrentSlider.addEventListener('input', updateConcurrentUI)
|
||||||
|
concurrentReset.addEventListener('click', () => {
|
||||||
|
concurrentSlider.value = default_param["concurrent"]
|
||||||
|
updateConcurrentUI()
|
||||||
|
})
|
||||||
|
|
||||||
|
temperatureSlider.addEventListener('input', updateTemperatureUI)
|
||||||
|
temperatureReset.addEventListener('click', () => {
|
||||||
|
temperatureSlider.value = default_param["temperature"]
|
||||||
|
updateTemperatureUI()
|
||||||
|
})
|
||||||
|
|
||||||
submitButton.addEventListener('click', async function (event) {
|
submitButton.addEventListener('click', async function (event) {
|
||||||
event.preventDefault();
|
event.preventDefault();
|
||||||
console.log(fileInput)
|
console.log(fileInput)
|
||||||
|
|||||||
@@ -18,11 +18,16 @@ DOCLING_FLAG = True if available_packages.get("docling") else False
|
|||||||
if DOCLING_FLAG:
|
if DOCLING_FLAG:
|
||||||
from docutranslate.converter import ConverterDocling
|
from docutranslate.converter import ConverterDocling
|
||||||
|
|
||||||
|
default_params={
|
||||||
|
"chunk_size":3000,
|
||||||
|
"concurrent":30,
|
||||||
|
"temperature":0.7,
|
||||||
|
}
|
||||||
|
|
||||||
class FileTranslater:
|
class FileTranslater:
|
||||||
def __init__(self, file_path: Path | str | None = None, chunksize: int = 3000,
|
def __init__(self, file_path: Path | str | None = None, chunk_size: int = default_params["chunk_size"],
|
||||||
base_url="", key=None, model_id="", temperature=0.7,
|
base_url:str|None=None, key=None, model_id:str|None=None, temperature=default_params["temperature"],
|
||||||
max_concurrent=30, timeout=2000,
|
concurrent:int=default_params["concurrent"], timeout=2000,
|
||||||
convert_engin: Literal["docling", "mineru"] = "mineru",
|
convert_engin: Literal["docling", "mineru"] = "mineru",
|
||||||
docling_artifact: Path | str | None = None,
|
docling_artifact: Path | str | None = None,
|
||||||
mineru_token: str = None, cache=True):
|
mineru_token: str = None, cache=True):
|
||||||
@@ -30,11 +35,11 @@ class FileTranslater:
|
|||||||
self.mineru_token = mineru_token.strip() if mineru_token is not None else None
|
self.mineru_token = mineru_token.strip() if mineru_token is not None else None
|
||||||
self._mask_dict = MaskDict()
|
self._mask_dict = MaskDict()
|
||||||
self.markdown: str = ""
|
self.markdown: str = ""
|
||||||
self.chunksize = chunksize
|
self.chunk_size = chunk_size
|
||||||
self.max_concurrent = max_concurrent
|
self.concurrent = concurrent
|
||||||
self.base_url: str = base_url
|
self.base_url= base_url
|
||||||
self.key: str = key if key is not None else "xx"
|
self.key = key if key is not None else "xx"
|
||||||
self.model_id: str = model_id
|
self.model_id = model_id
|
||||||
self.temperature = temperature
|
self.temperature = temperature
|
||||||
self.docling_artifact = docling_artifact
|
self.docling_artifact = docling_artifact
|
||||||
if docling_artifact is None:
|
if docling_artifact is None:
|
||||||
@@ -67,17 +72,21 @@ class FileTranslater:
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def _split_markdown_into_chunks(self) -> list[str]:
|
def _split_markdown_into_chunks(self) -> list[str]:
|
||||||
chunks: list[str] = split_markdown_text(self.markdown, self.chunksize)
|
chunks: list[str] = split_markdown_text(self.markdown, self.chunk_size)
|
||||||
translater_logger.info(f"markdown分为{len(chunks)}块")
|
translater_logger.info(f"markdown分为{len(chunks)}块")
|
||||||
return chunks
|
return chunks
|
||||||
|
|
||||||
def _default_agent_params(self) -> AgentArgs:
|
def _default_agent_params(self) -> AgentArgs:
|
||||||
|
if self.base_url is None:
|
||||||
|
raise Exception("base_url为空")
|
||||||
|
if self.model_id is None:
|
||||||
|
raise Exception("model_id为空")
|
||||||
result: AgentArgs = {
|
result: AgentArgs = {
|
||||||
"baseurl": self.base_url,
|
"baseurl": self.base_url,
|
||||||
"key": self.key,
|
"key": self.key,
|
||||||
"model_id": self.model_id,
|
"model_id": self.model_id,
|
||||||
"temperature": self.temperature,
|
"temperature": self.temperature,
|
||||||
"max_concurrent": self.max_concurrent,
|
"max_concurrent": self.concurrent,
|
||||||
"timeout": self.timeout
|
"timeout": self.timeout
|
||||||
}
|
}
|
||||||
return result
|
return result
|
||||||
|
|||||||
Reference in New Issue
Block a user