@@ -1,4 +1,7 @@
# 文件名: main.py (或您使用的任何文件名)
import asyncio
import asyncio
import base64
import io
import io
import logging
import logging
import os
import os
@@ -7,11 +10,12 @@ import time
from contextlib import asynccontextmanager , closing
from contextlib import asynccontextmanager , closing
from pathlib import Path
from pathlib import Path
from typing import List , Dict , Any , Optional
from typing import List , Dict , Any , Optional
import re
from urllib . parse import quote
from urllib . parse import quote
import httpx
import httpx
import uvicorn
import uvicorn
from fastapi import FastAPI , File , Form , UploadFile , Request , HTTPException , Query
from fastapi import FastAPI , File , Form , UploadFile , Request , HTTPException , Query , APIRouter , Body
from fastapi . responses import HTMLResponse , JSONResponse , StreamingResponse , FileResponse
from fastapi . responses import HTMLResponse , JSONResponse , StreamingResponse , FileResponse
from fastapi . staticfiles import StaticFiles
from fastapi . staticfiles import StaticFiles
from docutranslate import FileTranslater , __version__
from docutranslate import FileTranslater , __version__
@@ -22,36 +26,24 @@ from docutranslate.global_values import available_packages
httpx_client = httpx . AsyncClient ( )
httpx_client = httpx . AsyncClient ( )
# --- 全局配置 (修改) ---
# --- 全局配置 (无 修改) ---
# 将单个状态变更为一个字典, 以task_id为键, 管理多个任务的状态
tasks_state : Dict [ str , Dict [ str , Any ] ] = { }
tasks_state : Dict [ str , Dict [ str , Any ] ] = { }
# 将单个日志队列变更为字典, 为每个task_id提供独立的日志队列
tasks_log_queues : Dict [ str , asyncio . Queue ] = { }
tasks_log_queues : Dict [ str , asyncio . Queue ] = { }
# 将单个日志历史变更为字典, 为每个task_id提供独立的日志历史
tasks_log_histories : Dict [ str , List [ str ] ] = { }
tasks_log_histories : Dict [ str , List [ str ] ] = { }
MAX_LOG_HISTORY = 200
MAX_LOG_HISTORY = 200
# --- 辅助函数:创建默认任务状态 (新增 ) ---
# --- 辅助函数 (无修改 ) ---
def _create_default_task_state ( ) - > Dict [ str , Any ] :
def _create_default_task_state ( ) - > Dict [ str , Any ] :
""" 创建一个新的、默认的任务状态字典。 """
return {
return {
" is_processing " : False ,
" is_processing " : False , " status_message " : " 空闲 " , " error_flag " : False ,
" status_message " : " 空闲 " ,
" download_ready " : False , " markdown_content " : None , " markdown_zip_content " : None ,
" error_flag " : False ,
" html_content " : None , " original_filename_stem " : None , " task_start_time " : 0 ,
" download_ready " : Fals e,
" task_end_time " : 0 , " current_task_ref " : Non e,
" markdown_content " : None ,
" markdown_zip_content " : None ,
" html_content " : None ,
" original_filename_stem " : None ,
" task_start_time " : 0 ,
" task_end_time " : 0 ,
" current_task_ref " : None ,
}
}
# --- 日志处理器 (基本 无修改,但其使用方式已改变 ) ---
# --- 日志处理器 (无修改) ---
class QueueAndHistoryHandler ( logging . Handler ) :
class QueueAndHistoryHandler ( logging . Handler ) :
def __init__ ( self , queue_ref : asyncio . Queue , history_list_ref : List [ str ] , max_history_items : int ) :
def __init__ ( self , queue_ref : asyncio . Queue , history_list_ref : List [ str ] , max_history_items : int ) :
super ( ) . __init__ ( )
super ( ) . __init__ ( )
@@ -61,11 +53,11 @@ class QueueAndHistoryHandler(logging.Handler):
def emit ( self , record : logging . LogRecord ) :
def emit ( self , record : logging . LogRecord ) :
log_entry = self . format ( record )
log_entry = self . format ( record )
print ( f " [ { record . task_id } ] { log_entry } " if hasattr ( record , ' task_id ' ) else log_entry ) # 控制台日志增加task_id
task_id_prefix = f " [ { record . task_id } ] " if hasattr ( record , ' task_id ' ) else " "
print ( f " { task_id_prefix } { log_entry } " )
self . history_list . append ( log_entry )
self . history_list . append ( log_entry )
if len ( self . history_list ) > self . max_history :
if len ( self . history_list ) > self . max_history :
del self . history_list [ : len ( self . history_list ) - self . max_history ]
del self . history_list [ : len ( self . history_list ) - self . max_history ]
if self . queue is not None :
if self . queue is not None :
try :
try :
main_loop = getattr ( app . state , " main_event_loop " , None )
main_loop = getattr ( app . state , " main_event_loop " , None )
@@ -79,58 +71,37 @@ class QueueAndHistoryHandler(logging.Handler):
print ( f " Error putting log to queue for task: { e } . Log: { log_entry } " )
print ( f " Error putting log to queue for task: { e } . Log: { log_entry } " )
# --- 应用生命周期事件 (修改) ---
# --- 应用生命周期事件 (无 修改) ---
@asynccontextmanager
@asynccontextmanager
async def lifespan ( app : FastAPI ) :
async def lifespan ( app : FastAPI ) :
app . state . main_event_loop = asyncio . get_running_loop ( )
app . state . main_event_loop = asyncio . get_running_loop ( )
# 清空所有旧的任务状态,确保重启后是干净的
tasks_state . clear ( )
tasks_state . clear ( )
tasks_log_queues . clear ( )
tasks_log_queues . clear ( )
tasks_log_histories . clear ( )
tasks_log_histories . clear ( )
# 移除所有旧的处理器,因为处理器现在是按任务动态添加的
for handler in translater_logger . handlers [ : ] :
for handler in translater_logger . handlers [ : ] :
translater_logger . removeHandler ( handler )
translater_logger . removeHandler ( handler )
translater_logger . propagate = False
translater_logger . propagate = False
translater_logger . setLevel ( logging . INFO )
translater_logger . setLevel ( logging . INFO )
print ( " 应用启动完成,多任务状态已初始化。 " )
print ( " 应用启动完成,多任务状态已初始化。 " )
yield
yield
app = FastAPI ( lifespan = lifespan )
# --- Background Task Logic (无修改) ---
STATIC_DIR = resource_path ( " static " )
app . mount ( " /static " , StaticFiles ( directory = STATIC_DIR ) , name = " static " )
# --- Background Task Logic (修改) ---
async def _perform_translation ( task_id : str , params : Dict [ str , Any ] , file_contents : bytes , original_filename : str ) :
async def _perform_translation ( task_id : str , params : Dict [ str , Any ] , file_contents : bytes , original_filename : str ) :
""" 后台翻译任务,现在接收 task_id 以便操作对应的状态和日志。 """
task_state = tasks_state [ task_id ]
task_state = tasks_state [ task_id ]
log_queue = tasks_log_queues [ task_id ]
log_queue = tasks_log_queues [ task_id ]
log_history = tasks_log_histories [ task_id ]
log_history = tasks_log_histories [ task_id ]
# 为当前任务动态创建并添加日志处理器
task_handler = QueueAndHistoryHandler ( log_queue , log_history , MAX_LOG_HISTORY )
task_handler = QueueAndHistoryHandler ( log_queue , log_history , MAX_LOG_HISTORY )
task_handler . setFormatter ( logging . Formatter ( ' %(asctime)s - %(levelname)s - %(message)s ' ) )
task_handler . setFormatter ( logging . Formatter ( ' %(asctime)s - %(levelname)s - %(message)s ' ) )
# 为日志记录添加task_id上下文, 方便区分
log_filter = logging . Filter ( )
log_filter = logging . Filter ( )
log_filter . task_id = task_id
log_filter . task_id = task_id
task_handler . addFilter ( log_filter )
task_handler . addFilter ( log_filter )
translater_logger . addHandler ( task_handler )
translater_logger . addHandler ( task_handler )
translater_logger . info ( f " 后台翻译任务开始: 文件 ' { original_filename } ' " )
translater_logger . info ( f " 后台翻译任务开始: 文件 ' { original_filename } ' " )
task_state [ " status_message " ] = f " 正在处理 ' { original_filename } ' ... "
task_state [ " status_message " ] = f " 正在处理 ' { original_filename } ' ... "
try :
try :
translater_logger . info ( f " 使用 Base URL: { params [ ' base_url ' ] } , Model: { params [ ' model_id ' ] } " )
translater_logger . info ( f " 使用 Base URL: { params [ ' base_url ' ] } , Model: { params [ ' model_id ' ] } " )
# ... (其余日志记录)
ft = FileTranslater (
ft = FileTranslater (
base_url = params [ ' base_url ' ] , key = params [ ' apikey ' ] , model_id = params [ ' model_id ' ] ,
base_url = params [ ' base_url ' ] , key = params [ ' apikey ' ] , model_id = params [ ' model_id ' ] ,
chunk_size = params [ ' chunk_size ' ] , concurrent = params [ ' concurrent ' ] ,
chunk_size = params [ ' chunk_size ' ] , concurrent = params [ ' concurrent ' ] ,
@@ -143,39 +114,31 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten
custom_prompt_translate = params [ ' custom_prompt_translate ' ] ,
custom_prompt_translate = params [ ' custom_prompt_translate ' ] ,
refine = params [ ' refine_markdown ' ] , save = False
refine = params [ ' refine_markdown ' ] , save = False
)
)
md_content = ft . export_to_markdown ( )
md_content = ft . export_to_markdown ( )
md_zip_content = ft . export_to_unembed_markdown ( )
md_zip_content = ft . export_to_unembed_markdown ( )
try :
try :
await httpx_client . head ( " https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/contrib/auto-render.min.js " ,
await httpx_client . head ( " https://s4.zstatic.net/ajax/libs/KaTeX/0.16.9/contrib/auto-render.min.js " ,
timeout = 3 )
timeout = 3 )
html_content = ft . export_to_html ( title = task_state [ " original_filename_stem " ] , cdn = True )
html_content = ft . export_to_html ( title = task_state [ " original_filename_stem " ] , cdn = True )
except ( httpx . TimeoutException , httpx . RequestError ) as e :
except ( httpx . TimeoutException , httpx . RequestError ) :
translater_logger . info ( f " 连接s4.zstatic.net失败, 错误信息: { e } " )
translater_logger . info ( " CDN连接失败, 使用本地JS进行渲染。 " )
translater_logger . info ( " 使用本地js进行pdf渲染 " )
html_content = ft . export_to_html ( title = task_state [ " original_filename_stem " ] , cdn = False )
html_content = ft . export_to_html ( title = task_state [ " original_filename_stem " ] , cdn = False )
end_time = time . time ( )
end_time = time . time ( )
duration = end_time - task_state [ " task_start_time " ]
duration = end_time - task_state [ " task_start_time " ]
task_state . update ( {
task_state . update ( {
" markdown_content " : md_content ,
" markdown_content " : md_content , " markdown_zip_content " : md_zip_content ,
" markdown_zip _content" : md_zip _content,
" html _content" : html _content, " status_message " : f " 翻译成功!用时 { duration : .2f } 秒。 " ,
" html_content " : html_content ,
" status_message " : f " 翻译成功!用时 { duration : .2f } 秒。 " ,
" download_ready " : True , " error_flag " : False , " task_end_time " : end_time ,
" download_ready " : True , " error_flag " : False , " task_end_time " : end_time ,
} )
} )
translater_logger . info ( f " 翻译成功完成,用时 { duration : .2f } 秒。 " )
translater_logger . info ( f " 翻译成功完成,用时 { duration : .2f } 秒。 " )
except asyncio . CancelledError :
except asyncio . CancelledError :
end_time = time . time ( )
end_time = time . time ( )
duration = end_time - task_state [ " task_start_time " ]
duration = end_time - task_state [ " task_start_time " ]
translater_logger . info ( f " 翻译任务 ' { original_filename } ' 已被取消 (用时 { duration : .2f } 秒). " )
translater_logger . info ( f " 翻译任务 ' { original_filename } ' 已被取消 (用时 { duration : .2f } 秒). " )
task_state . update ( {
task_state . update ( {
" status_message " : f " 翻译任务已取消(若有转换任务仍会后台进行) (用时 { duration : .2f } 秒). " ,
" status_message " : f " 翻译任务已取消 (用时 { duration : .2f } 秒). " , " error_flag " : False ,
" error_flag " : False , " download_ready " : Fals e,
" download_ready " : False , " markdown_content " : None , " md_zip_content " : Non e,
" markdown _content" : None , " md_zip_content " : None , " html_content " : Non e,
" html _content" : None , " task_end_time " : end_tim e,
" task_end_time " : end_time ,
} )
} )
except Exception as e :
except Exception as e :
end_time = time . time ( )
end_time = time . time ( )
@@ -184,47 +147,23 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten
translater_logger . error ( error_message , exc_info = True )
translater_logger . error ( error_message , exc_info = True )
task_state . update ( {
task_state . update ( {
" status_message " : f " 翻译过程中发生错误 (用时 { duration : .2f } 秒): { e } " ,
" status_message " : f " 翻译过程中发生错误 (用时 { duration : .2f } 秒): { e } " ,
" error_flag " : True , " download_ready " : False ,
" error_flag " : True , " download_ready " : False , " markdown_content " : None ,
" markdown_content " : None , " md_zip_content" : None , " html_content " : None ,
" md_zip_content " : None , " html_content " : None , " task_end_time " : end_time ,
" task_end_time " : end_time ,
} )
} )
finally :
finally :
# 任务结束,重置处理状态并移除任务引用
task_state [ " is_processing " ] = False
task_state [ " is_processing " ] = False
task_state [ " current_task_ref " ] = None
task_state [ " current_task_ref " ] = None
translater_logger . info ( f " 后台翻译任务 ' { original_filename } ' 处理结束。 " )
translater_logger . info ( f " 后台翻译任务 ' { original_filename } ' 处理结束。 " )
# 关键步骤:移除此任务的处理器,防止日志系统混乱
translater_logger . removeHandler ( task_handler )
translater_logger . removeHandler ( task_handler )
# --- API Endpoints ---
# --- 核心任务启动与取消逻辑 (新增辅助函数,避免代码重复) ---
@app.get ( " / " , response_class = HTMLResponse )
async def _start_translation_task (
async def main_page ( request : Reque st) :
task_id : str ,
index_path = Path ( " index.html " )
params : Dict [ str , Any ] ,
if not index_path . exists ( ) :
file : UploadFile
index_path = STATIC_DIR / " index.html "
if not index_path . exists ( ) :
raise HTTPException ( status_code = 404 , detail = " index.html not found " )
no_cache_headers = {
" Cache-Control " : " no-store, no-cache, must-revalidate, max-age=0 " ,
" Pragma " : " no-cache " , " Expires " : " 0 " ,
}
return FileResponse ( index_path , headers = no_cache_headers )
@app.post ( " /translate " )
async def handle_translate (
# 添加 task_id 参数,默认为 '0'
task_id : str = Form ( " 0 " ) ,
base_url : str = Form ( . . . ) , apikey : str = Form ( . . . ) , model_id : str = Form ( . . . ) ,
to_lang : str = Form ( " 中文 " ) , formula_ocr : bool = Form ( False ) , code_ocr : bool = Form ( False ) ,
refine_markdown : bool = Form ( False ) , convert_engin : str = Form ( . . . ) ,
mineru_token : Optional [ str ] = Form ( None ) , chunk_size : int = Form ( . . . ) ,
concurrent : int = Form ( . . . ) , temperature : float = Form ( . . . ) ,
custom_prompt_translate : Optional [ str ] = Form ( None ) ,
file : UploadFile = File ( . . . )
) :
) :
# 获取或创建当前 task_id 的状态
""" 通用任务启动逻辑 """
if task_id not in tasks_state :
if task_id not in tasks_state :
tasks_state [ task_id ] = _create_default_task_state ( )
tasks_state [ task_id ] = _create_default_task_state ( )
tasks_log_queues [ task_id ] = asyncio . Queue ( )
tasks_log_queues [ task_id ] = asyncio . Queue ( )
@@ -232,15 +171,13 @@ async def handle_translate(
task_state = tasks_state [ task_id ]
task_state = tasks_state [ task_id ]
if task_state [ " is_processing " ] and task_state [ " current_task_ref " ] and not task_state [ " current_task_ref " ] . done ( ) :
if task_state [ " is_processing " ] and task_state [ " current_task_ref " ] and not task_state [ " current_task_ref " ] . done ( ) :
return JSONResponse (
raise HTTPException (
status_code = 429 ,
status_code = 429 ,
content = { " task_started " : False , " message " : f " 任务ID ' { task_id } ' 正在进行中,请稍后再试。 " }
detail = f " 任务ID ' { task_id } ' 正在进行中,请稍后再试。 "
)
)
task_state [ " is_processing " ] = True
task_state [ " is_processing " ] = True
original_filename_for_init = file . filename or " uploaded_file "
original_filename_for_init = file . filename or " uploaded_file "
# 更新特定 task_id 的状态
task_state . update ( {
task_state . update ( {
" status_message " : " 任务初始化中... " , " error_flag " : False , " download_ready " : False ,
" status_message " : " 任务初始化中... " , " error_flag " : False , " download_ready " : False ,
" markdown_content " : None , " md_zip_content " : None , " html_content " : None ,
" markdown_content " : None , " md_zip_content " : None , " html_content " : None ,
@@ -248,7 +185,6 @@ async def handle_translate(
" task_start_time " : time . time ( ) , " task_end_time " : 0 , " current_task_ref " : None ,
" task_start_time " : time . time ( ) , " task_end_time " : 0 , " current_task_ref " : None ,
} )
} )
# 清空特定 task_id 的日志历史和队列
log_history = tasks_log_histories [ task_id ]
log_history = tasks_log_histories [ task_id ]
log_queue = tasks_log_queues [ task_id ]
log_queue = tasks_log_queues [ task_id ]
log_history . clear ( )
log_history . clear ( )
@@ -259,7 +195,7 @@ async def handle_translate(
break
break
initial_log_msg = f " 收到新的翻译请求: { original_filename_for_init } "
initial_log_msg = f " 收到新的翻译请求: { original_filename_for_init } "
print ( f " [ { task_id } ] { initial_log_msg } " ) # 控制台直接打印
print ( f " [ { task_id } ] { initial_log_msg } " )
log_history . append ( initial_log_msg )
log_history . append ( initial_log_msg )
await log_queue . put ( initial_log_msg )
await log_queue . put ( initial_log_msg )
@@ -268,80 +204,91 @@ async def handle_translate(
original_filename = file . filename
original_filename = file . filename
await file . close ( )
await file . close ( )
task_params = {
" base_url " : base_url , " apikey " : apikey , " model_id " : model_id ,
" to_lang " : to_lang , " formula_ocr " : formula_ocr , " code_ocr " : code_ocr ,
" refine_markdown " : refine_markdown , " convert_engin " : convert_engin ,
" mineru_token " : mineru_token , " chunk_size " : chunk_size , " concurrent " : concurrent ,
" temperature " : temperature , " custom_prompt_translate " : custom_prompt_translate ,
}
loop = asyncio . get_running_loop ( )
loop = asyncio . get_running_loop ( )
# 将 task_id 传递给后台任务
task = loop . create_task (
task = loop . create_task (
_perform_translation ( task_id , task_ params, file_contents , original_filename )
_perform_translation ( task_id , params , file_contents , original_filename )
)
)
task_state [ " current_task_ref " ] = task
task_state [ " current_task_ref " ] = task
return { " task_started " : True , " task_id " : task_id , " message " : " 翻译任务已成功启动,请稍候... " }
return JSONResponse (
content = { " task_started " : True , " task_id " : task_id , " message " : " 翻译任务已成功启动,请稍候... " } )
except Exception as e :
except Exception as e :
task_state [ " is_processing " ] = False
task_state . update ( {
task_state [ " status_message " ] = f " 启动任务失败: { e } "
" is_processing " : False , " status_message " : f " 启动任务失败: { e } " ,
task_state [ " error_flag " ] = True
" error_flag " : True , " current_task_ref " : None
task_state [ " current_task_ref " ] = None
} )
return JSONResponse ( status_code = 500 ,
raise HTTPException ( status_code = 500 , detail = f " 启动翻译任务时出错: { e } " )
content = { " task_started " : False , " task_id " : task_id , " message " : f " 启动翻译任务时出错: { e } " } )
@app.post ( " / cancel- translate " )
def _ cancel_ translation_logic ( task_id : str ) :
async def cancel_translate_task ( task_id : str = Form ( " 0 " ) ) : # 使用Form以匹配POST请求
""" 通用任务取消逻辑 """
task_state = tasks_state . get ( task_id )
task_state = tasks_state . get ( task_id )
if not task_state or not task_state [ " is_processing " ] or not task_state [ " current_task_ref " ] :
if not task_state or not task_state [ " is_processing " ] or not task_state [ " current_task_ref " ] :
return JSONResponse (
raise HTTPException ( status_code = 400 , detail = f " 任务ID ' { task_id } ' 没有正在进行的翻译任务可取消。 " )
status_code = 400 ,
content = { " cancelled " : False , " message " : f " 任务ID ' { task_id } ' 没有正在进行的翻译任务可取消。 " }
)
task_to_cancel : Optional [ asyncio . Task ] = task_state [ " current_task_ref " ]
task_to_cancel : Optional [ asyncio . Task ] = task_state [ " current_task_ref " ]
if not task_to_cancel or task_to_cancel . done ( ) :
if not task_to_cancel or task_to_cancel . done ( ) :
task_state [ " is_processing " ] = False
task_state [ " is_processing " ] = False
task_state [ " current_task_ref " ] = None
task_state [ " current_task_ref " ] = None
return JSONResponse (
raise HTTPException ( status_code = 400 , detail = " 任务已完成或已被取消。 " )
status_code = 400 ,
content = { " cancelled " : False , " message " : " 任务已完成或已被取消。 " }
)
print ( f " [ { task_id } ] 收到取消翻译任务的请求。 " )
print ( f " [ { task_id } ] 收到取消翻译任务的请求。 " )
task_to_cancel . cancel ( )
task_to_cancel . cancel ( )
task_state [ " status_message " ] = " 正在取消任务... "
task_state [ " status_message " ] = " 正在取消任务... "
return { " cancelled " : True , " message " : " 取消请求已发送。请等待状态更新。 " }
return JSONResponse ( content = { " cancelled " : True , " message " : " 取消请求已发送。请等待状态更新。 " } )
@app.get ( " /get-engin-list " )
# --- FastAPI 应用和路由设置 ---
async def get_engin_list ( ) :
app = FastAPI ( lifespan = lifespan )
engin_list = [ " mineru " ]
backend_router = APIRouter ( prefix = " /backend " )
if available_packages . get ( " docling " ) :
service_router = APIRouter ( prefix = " /service " )
engin_list . append ( " docling " )
return JSONResponse ( content = engin_list )
STATIC_DIR = resource_path ( " static " )
app . mount ( " /static " , StaticFiles ( directory = STATIC_DIR ) , name = " static " )
@app.get ( " /get-status " )
# ===================================================================
async def get_status ( task_id : str = Query ( " 0 " ) ) :
# --- API Endpoints for Frontend (/backend) ---
task_state = tasks_state . get ( task_id )
# ===================================================================
if not task_state :
# 如果task_id不存在, 返回一个默认的空闲状态
@backend_router.post ( " /translate " )
task_state = _create_default_task_state ( )
async def handle_translate_for_frontend (
task_id : str = Form ( " 0 " ) , base_url : str = Form ( . . . ) , apikey : str = Form ( . . . ) ,
model_id : str = Form ( . . . ) , to_lang : str = Form ( " 中文 " ) , formula_ocr : bool = Form ( False ) ,
code_ocr : bool = Form ( False ) , refine_markdown : bool = Form ( False ) ,
convert_engin : str = Form ( . . . ) , mineru_token : Optional [ str ] = Form ( None ) ,
chunk_size : int = Form ( . . . ) , concurrent : int = Form ( . . . ) ,
temperature : float = Form ( . . . ) , custom_prompt_translate : Optional [ str ] = Form ( None ) ,
file : UploadFile = File ( . . . )
) :
task_params = locals ( ) . copy ( )
task_params . pop ( ' file ' )
task_params . pop ( ' task_id ' )
try :
response_data = await _start_translation_task ( task_id , task_params , file )
return JSONResponse ( content = response_data )
except HTTPException as e :
return JSONResponse ( status_code = e . status_code , content = { " task_started " : False , " message " : e . detail } )
@backend_router.post ( " /cancel-translate " )
async def cancel_translate_for_frontend ( task_id : str = Form ( " 0 " ) ) :
try :
response_data = _cancel_translation_logic ( task_id )
return JSONResponse ( content = response_data )
except HTTPException as e :
return JSONResponse ( status_code = e . status_code , content = { " cancelled " : False , " message " : e . detail } )
@backend_router.get ( " /get-status " )
async def get_status_for_frontend ( task_id : str = Query ( " 0 " ) ) :
task_state = tasks_state . get ( task_id , _create_default_task_state ( ) )
# 在URL中附带task_id, 以便下载和后续请求能找到正确的任务
def generate_url ( path_prefix , filename_stem , extension ) :
def generate_url ( path_prefix , filename_stem , extension ) :
if task_state [ " download_ready " ] and filename_stem :
if task_state [ " download_ready " ] and filename_stem :
return f " /download/ { path_prefix } / { filename_stem } _translated. { extension } ?task_id= { task_id } "
# 关键修改: 在URL前添加 /backend 前缀
return f " /backend/download/ { path_prefix } / { filename_stem } _translated. { extension } ?task_id= { task_id } "
return None
return None
status_data = {
return JSONResponse ( content = {
" is_processing " : task_state [ " is_processing " ] ,
" is_processing " : task_state [ " is_processing " ] ,
" status_message " : task_state [ " status_message " ] ,
" status_message " : task_state [ " status_message " ] ,
" error_flag " : task_state [ " error_flag " ] ,
" error_flag " : task_state [ " error_flag " ] ,
@@ -352,38 +299,18 @@ async def get_status(task_id: str = Query("0")):
" html_url " : generate_url ( " html " , task_state [ " original_filename_stem " ] , " html " ) ,
" html_url " : generate_url ( " html " , task_state [ " original_filename_stem " ] , " html " ) ,
" task_start_time " : task_state [ " task_start_time " ] ,
" task_start_time " : task_state [ " task_start_time " ] ,
" task_end_time " : task_state [ " task_end_time " ] ,
" task_end_time " : task_state [ " task_end_time " ] ,
}
} )
return JSONResponse ( content = status_data )
@app .get ( " /get-logs " )
@backend_router .get ( " /download/ {file_type} / {filename_with_ext} " )
async def get_logs_from_queue ( task_id : str = Query ( " 0 " ) ) :
async def download_file_for_frontend (
log_queue = tasks_log_queues . get ( task_id )
file_type : str , filename_with_ext : str , task_id : str = Query ( . . . )
new_logs = [ ]
if log_queue :
while not log_queue . empty ( ) :
try :
log_entry = log_queue . get_nowait ( )
new_logs . append ( log_entry )
log_queue . task_done ( )
except asyncio . QueueEmpty :
break
return JSONResponse ( content = { " logs " : new_logs } )
@app.get ( " /download/ {file_type} / {filename_with_ext} " )
async def download_file (
file_type : str ,
filename_with_ext : str ,
task_id : str = Query ( . . . ) # task_id 在下载时是必需的
) :
) :
task_state = tasks_state . get ( task_id )
task_state = tasks_state . get ( task_id )
if not task_state :
if not task_state :
raise HTTPException ( status_code = 404 , detail = f " 找不到任务ID ' { task_id } ' 。 " )
raise HTTPException ( status_code = 404 , detail = f " 找不到任务ID ' { task_id } ' 。 " )
if not task_state [ " download_ready " ] or not task_state [ " original_filename_stem " ] :
if not task_state [ " download_ready " ] or not task_state [ " original_filename_stem " ] :
raise HTTPException ( status_code = 404 , detail = " 内容尚未准备好或不可用。 " )
raise HTTPException ( status_code = 404 , detail = " 内容尚未准备好或不可用。 " )
if Path ( filename_with_ext ) . stem != f " { task_state [ ' original_filename_stem ' ] } _translated " :
if Path ( filename_with_ext ) . stem != f " { task_state [ ' original_filename_stem ' ] } _translated " :
raise HTTPException ( status_code = 404 , detail = " 请求的文件名与当前结果不符。 " )
raise HTTPException ( status_code = 404 , detail = " 请求的文件名与当前结果不符。 " )
@@ -394,36 +321,205 @@ async def download_file(
f " { task_state [ ' original_filename_stem ' ] } _translated.zip " ) ,
f " { task_state [ ' original_filename_stem ' ] } _translated.zip " ) ,
" html " : ( task_state [ " html_content " ] , " text/html " , f " { task_state [ ' original_filename_stem ' ] } _translated.html " ) ,
" html " : ( task_state [ " html_content " ] , " text/html " , f " { task_state [ ' original_filename_stem ' ] } _translated.html " ) ,
}
}
if file_type not in content_map :
if file_type not in content_map :
raise HTTPException ( status_code = 404 , detail = " 无效的文件类型。 " )
raise HTTPException ( status_code = 404 , detail = " 无效的文件类型。 " )
content , media_type , actual_filename = content_map [ file_type ]
content , media_type , actual_filename = content_map [ file_type ]
if content is None :
if content is None :
raise HTTPException ( status_code = 404 , detail = f " { file_type . capitalize ( ) } 内容不可用。 " )
raise HTTPException ( status_code = 404 , detail = f " { file_type . capitalize ( ) } 内容不可用。 " )
headers = {
headers = {
" Content-Disposition " : f " attachment; filename*=UTF-8 ' ' { quote ( actual_filename , safe = ' ' , encoding = ' utf-8 ' ) } " }
" Content-Disposition " : f " attachment; filename*=UTF-8 ' ' { quote ( actual_filename , safe = ' ' , encoding = ' utf-8 ' ) } " }
if file_type == " html " :
if file_type == " html " :
return HTMLResponse ( content = content , media_type = media_type , headers = headers )
return HTMLResponse ( content = content , media_type = media_type , headers = headers )
elif file_type == " markdown_zip " :
elif file_type == " markdown_zip " :
return StreamingResponse ( io . BytesIO ( content ) , media_type = media_type , headers = headers )
return StreamingResponse ( io . BytesIO ( content ) , media_type = media_type , headers = headers )
else : # markdown
else :
return StreamingResponse ( io . StringIO ( content ) , media_type = media_type , headers = headers )
return StreamingResponse ( io . StringIO ( content ) , media_type = media_type , headers = headers )
@app .get ( " /translate/default_param " )
@backend_router .get ( " /get-logs " )
def get_default_param ( ) :
async def get_logs_from_queue_for_frontend ( task_id : str = Query ( " 0 " ) ) :
log_queue = tasks_log_queues . get ( task_id )
new_logs = [ ]
if log_queue :
while not log_queue . empty ( ) :
try :
new_logs . append ( log_queue . get_nowait ( ) )
log_queue . task_done ( )
except asyncio . QueueEmpty :
break
return JSONResponse ( content = { " logs " : new_logs } )
@backend_router.get ( " /get-engin-list " )
async def get_engin_list_for_frontend ( ) :
engin_list = [ " mineru " ]
if available_packages . get ( " docling " ) :
engin_list . append ( " docling " )
return JSONResponse ( content = engin_list )
@backend_router.get ( " /translate/default_param " )
def get_default_param_for_frontend ( ) :
return JSONResponse ( content = default_params )
return JSONResponse ( content = default_params )
@app .get ( " /meta " )
@backend_router .get ( " /meta " )
async def get_app_version ( ) :
async def get_app_version_for_frontend ( ) :
return JSONResponse ( content = { " version " : __version__ } )
return JSONResponse ( content = { " version " : __version__ } )
# ===================================================================
# --- API Endpoints for Service (/service) ---
# ===================================================================
@service_router.post ( " /translate " , summary = " 提交翻译任务 " )
async def handle_translate_for_service (
file : UploadFile = File ( . . . , description = " 要翻译的文档文件 " ) ,
task_id : str = Form ( " 0 " , description = " 任务ID, 用于跟踪, 默认为 ' 0 ' " ) ,
base_url : str = Form ( . . . ) , apikey : str = Form ( . . . ) , model_id : str = Form ( . . . ) ,
to_lang : str = Form ( " 中文 " ) , formula_ocr : bool = Form ( False ) , code_ocr : bool = Form ( False ) ,
refine_markdown : bool = Form ( False ) , convert_engin : str = Form ( . . . ) ,
mineru_token : Optional [ str ] = Form ( None ) , chunk_size : int = Form ( . . . ) ,
concurrent : int = Form ( . . . ) , temperature : float = Form ( . . . ) ,
custom_prompt_translate : Optional [ str ] = Form ( None ) ,
) :
"""
提交一个文件进行翻译,并启动一个后台任务。
返回任务ID, 后续可凭此ID查询状态和下载结果。
"""
task_params = locals ( ) . copy ( )
task_params . pop ( ' file ' )
task_params . pop ( ' task_id ' )
try :
response_data = await _start_translation_task ( task_id , task_params , file )
return JSONResponse ( content = response_data )
except HTTPException as e :
return JSONResponse ( status_code = e . status_code , content = { " task_started " : False , " message " : e . detail } )
@service_router.post ( " /cancel/ {task_id} " , summary = " 取消翻译任务 " )
async def cancel_translate_for_service ( task_id : str ) :
""" 根据任务ID取消一个正在进行的翻译任务。 """
try :
response_data = _cancel_translation_logic ( task_id )
return JSONResponse ( content = response_data )
except HTTPException as e :
return JSONResponse ( status_code = e . status_code , content = { " cancelled " : False , " message " : e . detail } )
@service_router.get ( " /status/ {task_id} " , summary = " 获取任务状态 " )
async def get_status_for_service ( task_id : str ) :
""" 根据任务ID获取任务的当前状态和结果下载链接。 """
task_state = tasks_state . get ( task_id )
if not task_state :
raise HTTPException ( status_code = 404 , detail = f " 找不到任务ID ' { task_id } ' 。 " )
def generate_service_url ( file_type ) :
if task_state [ " download_ready " ] :
# 关键修改:生成 /service 的下载链接
return f " /service/download/ { task_id } / { file_type } "
return None
return JSONResponse ( content = {
" task_id " : task_id ,
" is_processing " : task_state [ " is_processing " ] ,
" status_message " : task_state [ " status_message " ] ,
" error_flag " : task_state [ " error_flag " ] ,
" download_ready " : task_state [ " download_ready " ] ,
" task_duration_seconds " : ( task_state [ " task_end_time " ] - task_state [ " task_start_time " ] ) if task_state [
" task_end_time " ] > 0 else 0 ,
" downloads " : {
" markdown " : generate_service_url ( " markdown " ) ,
" markdown_zip " : generate_service_url ( " markdown_zip " ) ,
" html " : generate_service_url ( " html " ) ,
}
} )
@service_router.get ( " /logs/ {task_id} " , summary = " 获取任务日志 " )
async def get_logs_for_service ( task_id : str ) :
""" 获取指定任务ID自上次查询以来的新日志。 """
if task_id not in tasks_log_queues :
raise HTTPException ( status_code = 404 , detail = f " 找不到任务ID ' { task_id } ' 的日志队列。 " )
log_queue = tasks_log_queues [ task_id ]
new_logs = [ ]
while not log_queue . empty ( ) :
try :
new_logs . append ( log_queue . get_nowait ( ) )
log_queue . task_done ( )
except asyncio . QueueEmpty :
break
return JSONResponse ( content = { " logs " : new_logs } )
@service_router.get ( " /download/ {task_id} / {file_type} " , summary = " 下载结果文件 " )
async def download_file_for_service ( task_id : str , file_type : str ) :
""" 根据任务ID和文件类型下载翻译结果。 """
# 此接口直接复用前端的下载逻辑,因为它们本质上是相同的
task_state = tasks_state . get ( task_id )
if not task_state or not task_state [ " download_ready " ] :
raise HTTPException ( status_code = 404 , detail = " 任务不存在或结果尚未就绪。 " )
filename_with_ext = f " { task_state [ ' original_filename_stem ' ] } _translated. { ' md ' if file_type == ' markdown ' else ' zip ' if file_type == ' markdown_zip ' else ' html ' } "
return await download_file_for_frontend ( file_type , filename_with_ext , task_id )
# ===================================================================
# --- 应用主路由和启动 ---
# ===================================================================
@app.get ( " / " , response_class = HTMLResponse , include_in_schema = False )
async def main_page ( ) :
index_path = Path ( STATIC_DIR ) / " index.html "
if not index_path . exists ( ) :
raise HTTPException ( status_code = 404 , detail = " index.html not found " )
no_cache_headers = {
" Cache-Control " : " no-store, no-cache, must-revalidate, max-age=0 " ,
" Pragma " : " no-cache " , " Expires " : " 0 " ,
}
return FileResponse ( index_path , headers = no_cache_headers )
@app.post ( " /temp/translate " )
async def temp_translate ( base_url : str = Body ( . . . ) ,
api_key : str = Body ( . . . ) ,
model_id : str = Body ( . . . ) ,
mineru_token : str = Body ( . . . ) ,
file_name : str = Body ( . . . ) ,
file_content : str = Body ( . . . ) ,
to_lang : str = Body ( " 中文 " )
) :
def is_base64 ( s ) :
# 尝试解码验证
try :
base64 . b64decode ( s )
return True
except Exception :
return False
ft = FileTranslater ( base_url = base_url ,
key = api_key ,
model_id = model_id ,
mineru_token = mineru_token ,
)
try :
if is_base64 ( file_content ) :
await ft . translate_bytes_async ( name = file_name , file = base64 . b64decode ( file_content ) , to_lang = to_lang , save = False )
else :
await ft . translate_bytes_async ( name = file_name , file = file_content . encode ( ) , to_lang = to_lang , save = False )
return { " success " : True , " content " : ft . export_to_markdown ( ) }
except Exception as e :
print ( f " 翻译出现错误: { e . __repr__ ( ) } " )
return { " success " : False , " reason " : { e . __repr__ ( ) } }
# 包含两个路由组
app . include_router ( backend_router )
app . include_router ( service_router )
# --- 启动逻辑 (无修改) ---
def find_free_port ( start_port ) :
def find_free_port ( start_port ) :
port = start_port
port = start_port
while True :
while True :
@@ -434,19 +530,16 @@ def find_free_port(start_port):
def run_app ( port : int | None = None ) :
def run_app ( port : int | None = None ) :
if port :
initial_port = port or int ( os . environ . get ( " DOCUTRANSLATE_PORT " , 8010 ) )
initial_port = port
else :
env_port = os . environ . get ( " DOCUTRANSLATE_PORT " )
initial_port = int ( env_port ) if env_port else 8010
try :
try :
port = find_free_port ( initial_port )
port_to_use = find_free_port ( initial_port )
if port != initial_port :
if port_to_use != initial_port :
print ( f " 端口 { initial_port } 被占用,将使用端口 { port } 代替 " )
print ( f " 端口 { initial_port } 被占用,将使用端口 { port_to_use } 代替 " )
print ( f " 正在启动 DocuTranslate WebUI 版本号: { __version__ } " )
print ( f " 正在启动 DocuTranslate WebUI 版本号: { __version__ } " )
print ( f " 请用浏览器访问 http://127.0.0.1: { port } (部分终端可以使用ctrl+左键点击网址打开) " )
print ( f " 请用浏览器访问 http://127.0.0.1: { port_to_use } " )
print ( f " 可以设置环境变量`DOCUTRANSLATE_PORT=<port>`改变默认服务端口号 " )
print ( " API文档 (Swagger UI): " )
uvicorn . run ( app , host = None , port = port , workers = 1 )
print ( f " 接口文档: http://127.0.0.1: { port_to_use } /docs " )
uvicorn . run ( app , host = None , port = port_to_use , workers = 1 )
except Exception as e :
except Exception as e :
print ( f " 启动失败: { e } " )
print ( f " 启动失败: { e } " )