translater现在可以拥有独立logger

This commit is contained in:
xunbu
2025-07-15 09:56:21 +08:00
parent 66106e7a15
commit 549cf78862
7 changed files with 138 additions and 126 deletions

View File

@@ -1,4 +1,5 @@
import asyncio
import logging
import time
from concurrent.futures import ThreadPoolExecutor
from threading import Lock
@@ -6,7 +7,7 @@ from typing import TypedDict
import httpx
from docutranslate.logger import translater_logger
from docutranslate.logger import global_logger
MAX_RETRY_COUNT = 2
MAX_TOTAL_ERROR_COUNT = 10
@@ -20,18 +21,20 @@ class AgentArgs(TypedDict, total=False):
temperature: float
max_concurrent: int
timeout: int
logger:logging.Logger
class TotalErrorCounter:
def __init__(self, ):
def __init__(self,logger:logging.Logger):
self.lock = Lock()
self.count = 0
self.logger=logger
def add(self):
self.lock.acquire()
self.count += 1
if self.count>MAX_TOTAL_ERROR_COUNT:
translater_logger.info(f"错误响应过多")
self.logger.info(f"错误响应过多")
self.lock.release()
return self.reach_limit()
@@ -39,20 +42,19 @@ class TotalErrorCounter:
return self.count > MAX_TOTAL_ERROR_COUNT
total_error_counter = TotalErrorCounter()
# 仅使用多线程时用以计数
class PromptsCounter:
def __init__(self, total: int):
def __init__(self, total: int,logger:logging.Logger):
self.lock = Lock()
self.count = 0
self.total = total
self.logger=logger
def add(self):
self.lock.acquire()
self.count += 1
translater_logger.info(f"多线程-已完成:{self.count}/{self.total}")
self.logger.info(f"多线程-已完成:{self.count}/{self.total}")
self.lock.release()
@@ -61,7 +63,7 @@ TIMEOUT = 600
class Agent:
def __init__(self, baseurl: str = "", key: str = "xx", model_id: str = "", system_prompt: str = "", temperature=0.7,
max_concurrent=15, timeout: int = TIMEOUT):
max_concurrent=15, timeout: int = TIMEOUT,logger:logging.Logger|None=None):
self.baseurl = baseurl.strip()
if self.baseurl.endswith("/"):
self.baseurl = self.baseurl[:-1]
@@ -74,6 +76,8 @@ class Agent:
self.max_concurrent = max_concurrent
self.timeout = timeout
self.logger=logger if logger else global_logger
self.total_error_counter = TotalErrorCounter(logger=self.logger)
def _prepare_request_data(self, prompt: str, system_prompt: str, temperature=None, top_p=0.9):
if temperature is None:
temperature = self.temperature
@@ -109,23 +113,23 @@ class Agent:
result = response.json()["choices"][0]["message"]["content"]
return result
except httpx.HTTPStatusError as e:
translater_logger.warning(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}")
self.logger.warning(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}")
print(f"prompt:\n{prompt}")
total_error_counter.add()
self.total_error_counter.add()
return prompt
except httpx.RequestError as e:
translater_logger.warning(f"AI请求连接错误 (async): {repr(e)}")
self.logger.warning(f"AI请求连接错误 (async): {repr(e)}")
except (KeyError, IndexError) as e:
raise Exception(f"AI响应格式错误 (async): {repr(e)}")
# 如果没有正常获取结果则重试
if retry and retry_count < MAX_RETRY_COUNT:
if total_error_counter.add():
if self.total_error_counter.add():
return prompt
translater_logger.info(f"正在重试,重试次数{retry_count}")
self.logger.info(f"正在重试,重试次数{retry_count}")
await asyncio.sleep(0.5)
return await self.send_async(prompt, system_prompt, retry=True, retry_count=retry_count + 1)
else:
translater_logger.error(f"达到重试次数上限")
self.logger.error(f"达到重试次数上限")
return prompt
async def send_prompts_async(
@@ -149,7 +153,7 @@ class Agent:
)
nonlocal count
count += 1
translater_logger.info(f"协程-已完成{count}/{total}")
self.logger.info(f"协程-已完成{count}/{total}")
return result
for p_text in prompts:
@@ -176,23 +180,23 @@ class Agent:
result = response.json()["choices"][0]["message"]["content"]
return result
except httpx.HTTPStatusError as e:
translater_logger.warning(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}")
self.logger.warning(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}")
print(f"prompt:\n{prompt}")
total_error_counter.add()
self.total_error_counter.add()
return prompt
except httpx.RequestError as e:
translater_logger.warning(f"AI请求连接错误 (sync): {repr(e)}\nprompt:{prompt}")
self.logger.warning(f"AI请求连接错误 (sync): {repr(e)}\nprompt:{prompt}")
except (KeyError, IndexError) as e:
raise Exception(f"AI响应格式错误 (sync): {repr(e)}")
# 如果没有正常获取结果则重试
if retry and retry_count < MAX_RETRY_COUNT:
if total_error_counter.add():
if self.total_error_counter.add():
return prompt
translater_logger.info(f"正在重试,重试次数{retry_count}")
self.logger.info(f"正在重试,重试次数{retry_count}")
time.sleep(0.5)
return self.send(prompt, system_prompt, retry=True, retry_count=retry_count + 1)
else:
translater_logger.error(f"达到重试次数上限")
self.logger.error(f"达到重试次数上限")
return prompt
def _send_prompt_count(self, prompt: str, system_prompt: None | str, count: PromptsCounter) -> str:
@@ -206,7 +210,7 @@ class Agent:
system_prompt: str | None = None,
) -> list[str]:
system_prompts = [system_prompt] * len(prompts)
counts = [PromptsCounter(len(prompts))] * len(prompts)
counts = [PromptsCounter(len(prompts),self.logger)] * len(prompts)
output_list = []
with ThreadPoolExecutor(max_workers=self.max_concurrent) as executor:
results_iterator = executor.map(self._send_prompt_count, prompts, system_prompts, counts)

View File

@@ -19,7 +19,7 @@ from pydantic import BaseModel, Field
from docutranslate import FileTranslater, __version__
from docutranslate.global_values import available_packages
from docutranslate.logger import translater_logger
from docutranslate.logger import global_logger
from docutranslate.translater import default_params
from docutranslate.utils.resource_utils import resource_path
@@ -79,10 +79,10 @@ async def lifespan(app: FastAPI):
tasks_state.clear()
tasks_log_queues.clear()
tasks_log_histories.clear()
for handler in translater_logger.handlers[:]:
translater_logger.removeHandler(handler)
translater_logger.propagate = False
translater_logger.setLevel(logging.INFO)
for handler in global_logger.handlers[:]:
global_logger.removeHandler(handler)
global_logger.propagate = False
global_logger.setLevel(logging.INFO)
print("应用启动完成,多任务状态已初始化。")
yield
await httpx_client.aclose()
@@ -100,12 +100,12 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten
log_filter = logging.Filter()
log_filter.task_id = task_id
task_handler.addFilter(log_filter)
translater_logger.addHandler(task_handler)
global_logger.addHandler(task_handler)
translater_logger.info(f"后台翻译任务开始: 文件 '{original_filename}'")
global_logger.info(f"后台翻译任务开始: 文件 '{original_filename}'")
task_state["status_message"] = f"正在处理 '{original_filename}'..."
try:
translater_logger.info(f"使用 Base URL: {params['base_url']}, Model: {params['model_id']}")
global_logger.info(f"使用 Base URL: {params['base_url']}, Model: {params['model_id']}")
ft = FileTranslater(
base_url=params['base_url'], key=params['apikey'], model_id=params['model_id'],
chunk_size=params['chunk_size'], concurrent=params['concurrent'],
@@ -125,7 +125,7 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten
timeout=3)
html_content = ft.export_to_html(title=task_state["original_filename_stem"], cdn=True)
except (httpx.TimeoutException, httpx.RequestError):
translater_logger.info("CDN连接失败使用本地JS进行渲染。")
global_logger.info("CDN连接失败使用本地JS进行渲染。")
html_content = ft.export_to_html(title=task_state["original_filename_stem"], cdn=False)
end_time = time.time()
duration = end_time - task_state["task_start_time"]
@@ -134,11 +134,11 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten
"html_content": html_content, "status_message": f"翻译成功!用时 {duration:.2f} 秒。",
"download_ready": True, "error_flag": False, "task_end_time": end_time,
})
translater_logger.info(f"翻译成功完成,用时 {duration:.2f} 秒。")
global_logger.info(f"翻译成功完成,用时 {duration:.2f} 秒。")
except asyncio.CancelledError:
end_time = time.time()
duration = end_time - task_state["task_start_time"]
translater_logger.info(f"翻译任务 '{original_filename}' 已被取消 (用时 {duration:.2f} 秒).")
global_logger.info(f"翻译任务 '{original_filename}' 已被取消 (用时 {duration:.2f} 秒).")
task_state.update({
"status_message": f"翻译任务已取消 (用时 {duration:.2f} 秒).", "error_flag": False,
"download_ready": False, "markdown_content": None, "md_zip_content": None,
@@ -148,7 +148,7 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten
end_time = time.time()
duration = end_time - task_state["task_start_time"]
error_message = f"翻译失败: {e}"
translater_logger.error(error_message, exc_info=True)
global_logger.error(error_message, exc_info=True)
task_state.update({
"status_message": f"翻译过程中发生错误 (用时 {duration:.2f} 秒): {e}",
"error_flag": True, "download_ready": False, "markdown_content": None,
@@ -157,8 +157,8 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten
finally:
task_state["is_processing"] = False
task_state["current_task_ref"] = None
translater_logger.info(f"后台翻译任务 '{original_filename}' 处理结束。")
translater_logger.removeHandler(task_handler)
global_logger.info(f"后台翻译任务 '{original_filename}' 处理结束。")
global_logger.removeHandler(task_handler)
# --- 核心任务启动与取消逻辑 (仅由服务层调用) ---

View File

@@ -1,3 +1,5 @@
import asyncio
import logging
import os
import time
from io import BytesIO
@@ -11,26 +13,44 @@ from docling.document_converter import DocumentConverter, PdfFormatOption
from docling_core.types.doc import ImageRefMode
from huggingface_hub.errors import LocalEntryNotFoundError
from docutranslate.logger import translater_logger
from docutranslate.converter import Converter, Document
import asyncio
from docutranslate.logger import global_logger
IMAGE_RESOLUTION_SCALE = 4
def file2markdown_embed_images(file_path: Path | str | DocumentStream, formula=False, code=False,
artifacts_path: Path | str | None = None) -> str:
pipeline_options = PdfPipelineOptions(artifacts_path=artifacts_path)
class ConverterDocling(Converter):
def __init__(self, code=True, formula=True, artifact=None, logger: logging.Logger | None = None):
self.code = code
self.formula = formula
self.artifact = artifact
self.logger = logger if logger else global_logger
def convert(self, document):
assert isinstance(document.filename, str)
self.logger.info(f"正在将文档转换为markdown")
time1 = time.time()
document_stream = DocumentStream(name=document.filename, stream=BytesIO(document.filebytes))
result = self.file2markdown_embed_images(document_stream)
self.logger.info(f"已转换为markdown耗时{time.time() - time1}")
return result
async def convert_async(self, document: Document) -> str:
return await asyncio.to_thread(
self.convert,
document
)
def file2markdown_embed_images(self, file_path: Path | str | DocumentStream) -> str:
pipeline_options = PdfPipelineOptions(artifacts_path=self.artifact)
pipeline_options.do_ocr = False
pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE
pipeline_options.generate_picture_images = True
# pipeline_options.table_structure_options.mode = TableFormerMode.FAST
pipeline_options.table_structure_options.do_cell_matching = False
if formula:
if self.formula:
pipeline_options.do_formula_enrichment = True
if code:
if self.code:
pipeline_options.do_code_enrichment = True
# pipeline_options.accelerator_options= AcceleratorOptions(
# num_threads=4, device=AcceleratorDevice.AUTO
@@ -45,41 +65,19 @@ def file2markdown_embed_images(file_path: Path | str | DocumentStream, formula=F
conversion_result = converter.convert(file_path)
result = conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)
except LocalEntryNotFoundError:
translater_logger.info(f"无法连接huggingface正在尝试换源")
self.logger.info(f"无法连接huggingface正在尝试换源")
os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com'
conversion_result = converter.convert(file_path)
result = conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED)
# translater_logger.info(f"docling转换耗时: {conversion_result.timings["pipeline_total"].times}")
return result
class ConverterDocling(Converter):
def __init__(self, code=True, formula=True, artifact=None):
self.code = code
self.formula = formula
self.artifact = artifact
def convert(self, document):
assert isinstance(document.filename, str)
translater_logger.info(f"正在将文档转换为markdown")
time1 = time.time()
document_stream = DocumentStream(name=document.filename, stream=BytesIO(document.filebytes))
result = file2markdown_embed_images(document_stream, formula=self.formula, code=self.code,
artifacts_path=self.artifact)
translater_logger.info(f"已转换为markdown耗时{time.time() - time1}")
return result
async def convert_async(self, document: Document) -> str:
return await asyncio.to_thread(
self.convert,
document
)
def set_config(self,cofig:dict):
def set_config(self, cofig: dict):
pass
def get_config_list(self) ->list[str]|None:
def get_config_list(self) -> list[str] | None:
pass
if __name__ == '__main__':
pass

View File

@@ -1,9 +1,10 @@
import asyncio
import logging
import time
import zipfile
import httpx
from docutranslate.converter import Converter, Document
from docutranslate.logger import translater_logger
from docutranslate.logger import global_logger
from docutranslate.utils.markdown_utils import embed_inline_image_from_zip
URL = 'https://mineru.net/api/v4/file-urls/batch'
@@ -21,10 +22,11 @@ client = httpx.Client(trust_env=False,timeout=timeout,proxy=None,verify=False)
# TODO: 提供更详细的logger
class ConverterMineru(Converter):
def __init__(self, token: str, formula=True):
def __init__(self, token: str, formula=True,logger:logging.Logger|None=None):
self.mineru_token = token.strip()
self.client_async = httpx.AsyncClient(timeout=timeout)
self.formula = formula
self.logger=logger if logger else global_logger
def _get_header(self):
return {
@@ -74,12 +76,12 @@ class ConverterMineru(Converter):
time.sleep(3)
def convert(self, document: Document) -> str:
translater_logger.info(f"正在将文档转换为markdown")
self.logger.info(f"正在将文档转换为markdown")
time1 = time.time()
batch_id = self.upload(document)
file_url = self.get_file_url(batch_id)
result = get_md_from_zip_url_with_inline_images(zip_url=file_url)
translater_logger.info(f"已转换为markdown耗时{time.time() - time1}")
self.logger.info(f"已转换为markdown耗时{time.time() - time1}")
return result
# TODO: 实现细粒度更高的协程

View File

@@ -1 +1 @@
from .logger import translater_logger
from .logger import global_logger

View File

@@ -3,8 +3,8 @@ import logging
# 创建日志对象
translater_logger = logging.getLogger("TranslaterLogger")
translater_logger.setLevel(logging.DEBUG)
global_logger = logging.getLogger("TranslaterLogger")
global_logger.setLevel(logging.DEBUG)
#输出到控制台
console_handler = logging.StreamHandler()
translater_logger.addHandler(console_handler)
global_logger.addHandler(console_handler)

View File

@@ -1,20 +1,23 @@
import asyncio
import html
import io
import logging
import zipfile
from pathlib import Path
from typing import Literal
import markdown2
import jinja2
import markdown2
from docutranslate.agents import Agent, AgentArgs
from docutranslate.agents import MDRefineAgent, MDTranslateAgent
from docutranslate.cacher import document_cacher_global
from docutranslate.converter import Document, ConverterMineru
from docutranslate.global_values import available_packages
from docutranslate.logger import global_logger
from docutranslate.utils.markdown_splitter import split_markdown_text, join_markdown_texts
from docutranslate.utils.markdown_utils import uris2placeholder, placeholder2_uris, MaskDict, clean_markdown_math_block, \
unembed_base64_images_to_zip, embed_inline_image_from_zip, find_markdown_in_zip
from docutranslate.logger import translater_logger
from docutranslate.global_values import available_packages
from docutranslate.utils.resource_utils import resource_path
DOCLING_FLAG = True if available_packages.get("docling") else False
@@ -35,7 +38,10 @@ class FileTranslater:
concurrent: int = default_params["concurrent"], timeout=2000,
convert_engin: Literal["docling", "mineru"] = "mineru",
docling_artifact: Path | str | None = None,
mineru_token: str = None, cache=True):
mineru_token: str = None, cache=True,
logger: logging.Logger | None = None):
self.logger = logger if logger else global_logger
self.convert_engin = convert_engin
self.mineru_token = mineru_token.strip() if mineru_token is not None else None
self._mask_dict = MaskDict()
@@ -51,7 +57,7 @@ class FileTranslater:
artifact_path = Path("./docling_artifact")
print(f"artifact_path:{artifact_path.resolve()}existed{artifact_path.is_dir()}")
if artifact_path.is_dir():
translater_logger.info("检测到docling_artifact文件夹")
self.logger.info("检测到docling_artifact文件夹")
self.docling_artifact = artifact_path
self.timeout = timeout
self.document: Document | None = None
@@ -78,7 +84,7 @@ class FileTranslater:
def _split_markdown_into_chunks(self) -> list[str]:
chunks: list[str] = split_markdown_text(self.markdown, self.chunk_size)
translater_logger.info(f"markdown分为{len(chunks)}")
self.logger.info(f"markdown分为{len(chunks)}")
return chunks
def _default_agent_params(self) -> AgentArgs:
@@ -92,7 +98,8 @@ class FileTranslater:
"model_id": self.model_id,
"temperature": self.temperature,
"max_concurrent": self.concurrent,
"timeout": self.timeout
"timeout": self.timeout,
"logger":self.logger
}
return result
@@ -105,26 +112,26 @@ class FileTranslater:
def _convert2markdown(self, document: Document, formula: bool, code: bool, artifact: Path = None) -> str:
cached_result = self.cacher.get_cached_result(document, formula, code, convert_engin=self.convert_engin)
if cached_result:
translater_logger.info("正在获取缓存结果")
self.logger.info("正在获取缓存结果")
return cached_result
if document.suffix in [".md", ".txt"]:
return document.filebytes.decode("utf-8")
if document.suffix in ['.zip']:
#寻找zip内的filename
filename=find_markdown_in_zip(document.filebytes)
return embed_inline_image_from_zip(document.filebytes,filename)
translater_logger.info("正在转化为markdown")
# 寻找zip内的filename
filename = find_markdown_in_zip(document.filebytes)
return embed_inline_image_from_zip(document.filebytes, filename)
self.logger.info("正在转化为markdown")
if self.convert_engin == "docling":
if artifact is None:
artifact = self.docling_artifact
mdconverter = ConverterDocling(formula=formula, code=code, artifact=artifact)
mdconverter = ConverterDocling(formula=formula, code=code, artifact=artifact,logger=self.logger)
result = mdconverter.convert(document)
else:
if self.mineru_token is None:
raise Exception("mineru_token未配置")
if code:
translater_logger.info("mineru暂不支持code识别")
mdconverter = ConverterMineru(token=self.mineru_token, formula=formula)
self.logger.info("mineru暂不支持code识别")
mdconverter = ConverterMineru(token=self.mineru_token, formula=formula,logger=self.logger)
result = mdconverter.convert(document)
return self.cacher.cache_result(result, document, formula, code, convert_engin=self.convert_engin)
@@ -132,26 +139,26 @@ class FileTranslater:
artifact: Path = None) -> str:
cached_result = self.cacher.get_cached_result(document, formula, code, convert_engin=self.convert_engin)
if cached_result:
translater_logger.info("解析结果已缓存,获取缓存结果")
self.logger.info("解析结果已缓存,获取缓存结果")
return cached_result
if document.suffix in [".md", ".txt"]:
return document.filebytes.decode("utf-8")
if document.suffix in ['.zip']:
#寻找zip内的filename
filename=find_markdown_in_zip(document.filebytes)
return embed_inline_image_from_zip(document.filebytes,filename)
translater_logger.info("正在转化为markdown")
# 寻找zip内的filename
filename = find_markdown_in_zip(document.filebytes)
return embed_inline_image_from_zip(document.filebytes, filename)
self.logger.info("正在转化为markdown")
if self.convert_engin == "docling":
if artifact is None:
artifact = self.docling_artifact
mdconverter = ConverterDocling(formula=formula, code=code, artifact=artifact)
mdconverter = ConverterDocling(formula=formula, code=code, artifact=artifact,logger=self.logger)
result = await mdconverter.convert_async(document)
else:
if self.mineru_token is None:
raise Exception("mineru_token未配置")
if code:
translater_logger.info("mineru暂不支持code识别")
mdconverter = ConverterMineru(token=self.mineru_token, formula=formula)
self.logger.info("mineru暂不支持code识别")
mdconverter = ConverterMineru(token=self.mineru_token, formula=formula,logger=self.logger)
result = await mdconverter.convert_async(document)
return self.cacher.cache_result(result, document, formula, code, convert_engin=self.convert_engin)
@@ -209,7 +216,7 @@ class FileTranslater:
document = self.document
if document is None:
raise Exception("未读取文件")
translater_logger.info(f"读取文件:{document.filename}")
self.logger.info(f"读取文件:{document.filename}")
self.read_document(document, formula=formula, code=code, save=save, save_format=save_format, refine=refine,
refine_agent=refine_agent)
return self
@@ -223,14 +230,14 @@ class FileTranslater:
document = self.document
if document is None:
raise Exception("未读取文件")
translater_logger.info(f"读取文件:{document.filename}")
self.logger.info(f"读取文件:{document.filename}")
# 如果是markdown直接读取
await self.read_document_async(document, formula=formula, code=code, save=save, save_format=save_format,
refine=refine, refine_agent=refine_agent)
return self
def refine_markdown_by_agent(self, refine_agent: Agent | None = None, custom_prompt=None) -> str:
translater_logger.info("正在修正markdown")
self.logger.info("正在修正markdown")
self._mask_uris_in_markdown()
chuncks = self._split_markdown_into_chunks()
if refine_agent is None:
@@ -241,11 +248,11 @@ class FileTranslater:
else:
self.markdown = join_markdown_texts(result)
self._unmask_uris_in_markdown()
translater_logger.info("markdown已修正")
self.logger.info("markdown已修正")
return self.markdown
def translate_markdown_by_agent(self, translate_agent: Agent | None = None, to_lang="中文", custom_prompt=None):
translater_logger.info("正在翻译markdown")
self.logger.info("正在翻译markdown")
self._mask_uris_in_markdown()
chuncks = self._split_markdown_into_chunks()
if translate_agent is None:
@@ -256,11 +263,11 @@ class FileTranslater:
else:
self.markdown = join_markdown_texts(result)
self._unmask_uris_in_markdown()
translater_logger.info("翻译完成")
self.logger.info("翻译完成")
return self.markdown
async def refine_markdown_by_agent_async(self, refine_agent: Agent | None = None, custom_prompt=None) -> str:
translater_logger.info("正在修正markdown")
self.logger.info("正在修正markdown")
self._mask_uris_in_markdown()
chuncks = self._split_markdown_into_chunks()
if refine_agent is None:
@@ -271,12 +278,12 @@ class FileTranslater:
else:
self.markdown = join_markdown_texts(result)
self._unmask_uris_in_markdown()
translater_logger.info("markdown已修正")
self.logger.info("markdown已修正")
return self.markdown
async def translate_markdown_by_agent_async(self, translate_agent: Agent | None = None, to_lang="中文",
custom_prompt=None):
translater_logger.info("正在翻译markdown")
self.logger.info("正在翻译markdown")
self._mask_uris_in_markdown()
chuncks = self._split_markdown_into_chunks()
if translate_agent is None:
@@ -287,7 +294,7 @@ class FileTranslater:
else:
self.markdown = join_markdown_texts(result)
self._unmask_uris_in_markdown()
translater_logger.info("翻译完成")
self.logger.info("翻译完成")
return self.markdown
def save_as_markdown(self, filename: str | Path | None = None, output_dir: str | Path = "./output", embeded=True):
@@ -303,9 +310,9 @@ class FileTranslater:
full_name = output_dir / filename.name
with open(full_name, "w", encoding="utf-8") as file:
file.write(self.export_to_markdown())
translater_logger.info(f"文件已写入{full_name.resolve()}")
self.logger.info(f"文件已写入{full_name.resolve()}")
else:
output_dir=output_dir/filename.stem
output_dir = output_dir / filename.stem
output_dir.mkdir(parents=True, exist_ok=True)
with zipfile.ZipFile(io.BytesIO(self.export_to_unembed_markdown())) as zip_ref:
zip_ref.extractall(output_dir)
@@ -336,9 +343,9 @@ class FileTranslater:
output_dir.mkdir(parents=True, exist_ok=True)
full_name = output_dir / filename
html_content = self.export_to_html(title=str(full_name.resolve().stem))
with open(full_name, "w",encoding="utf-8") as file:
with open(full_name, "w", encoding="utf-8") as file:
file.write(html_content)
translater_logger.info(f"文件已写入{full_name.resolve()}")
self.logger.info(f"文件已写入{full_name.resolve()}")
return self
def export_to_html(self, title="title", cdn=True) -> str:
@@ -463,7 +470,8 @@ class FileTranslater:
formula=True,
code=True, output_format: Literal["markdown", "html"] = "markdown",
custom_prompt_translate=None, refine=False,
refine_agent: Agent | None = None, translate_agent: Agent | None = None, save=False):
refine_agent: Agent | None = None, translate_agent: Agent | None = None,
save=False):
await self.read_bytes_async(name=name, file=file, formula=formula, code=code)
if refine: