diff --git a/docutranslate/agents/agent.py b/docutranslate/agents/agent.py index 2a5cd52..d22464d 100644 --- a/docutranslate/agents/agent.py +++ b/docutranslate/agents/agent.py @@ -1,4 +1,5 @@ import asyncio +import logging import time from concurrent.futures import ThreadPoolExecutor from threading import Lock @@ -6,7 +7,7 @@ from typing import TypedDict import httpx -from docutranslate.logger import translater_logger +from docutranslate.logger import global_logger MAX_RETRY_COUNT = 2 MAX_TOTAL_ERROR_COUNT = 10 @@ -20,18 +21,20 @@ class AgentArgs(TypedDict, total=False): temperature: float max_concurrent: int timeout: int + logger:logging.Logger class TotalErrorCounter: - def __init__(self, ): + def __init__(self,logger:logging.Logger): self.lock = Lock() self.count = 0 + self.logger=logger def add(self): self.lock.acquire() self.count += 1 if self.count>MAX_TOTAL_ERROR_COUNT: - translater_logger.info(f"错误响应过多") + self.logger.info(f"错误响应过多") self.lock.release() return self.reach_limit() @@ -39,20 +42,19 @@ class TotalErrorCounter: return self.count > MAX_TOTAL_ERROR_COUNT -total_error_counter = TotalErrorCounter() - # 仅使用多线程时用以计数 class PromptsCounter: - def __init__(self, total: int): + def __init__(self, total: int,logger:logging.Logger): self.lock = Lock() self.count = 0 self.total = total + self.logger=logger def add(self): self.lock.acquire() self.count += 1 - translater_logger.info(f"多线程-已完成:{self.count}/{self.total}") + self.logger.info(f"多线程-已完成:{self.count}/{self.total}") self.lock.release() @@ -61,7 +63,7 @@ TIMEOUT = 600 class Agent: def __init__(self, baseurl: str = "", key: str = "xx", model_id: str = "", system_prompt: str = "", temperature=0.7, - max_concurrent=15, timeout: int = TIMEOUT): + max_concurrent=15, timeout: int = TIMEOUT,logger:logging.Logger|None=None): self.baseurl = baseurl.strip() if self.baseurl.endswith("/"): self.baseurl = self.baseurl[:-1] @@ -74,6 +76,8 @@ class Agent: self.max_concurrent = max_concurrent self.timeout = timeout + self.logger=logger if logger else global_logger + self.total_error_counter = TotalErrorCounter(logger=self.logger) def _prepare_request_data(self, prompt: str, system_prompt: str, temperature=None, top_p=0.9): if temperature is None: temperature = self.temperature @@ -109,23 +113,23 @@ class Agent: result = response.json()["choices"][0]["message"]["content"] return result except httpx.HTTPStatusError as e: - translater_logger.warning(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") + self.logger.warning(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") print(f"prompt:\n{prompt}") - total_error_counter.add() + self.total_error_counter.add() return prompt except httpx.RequestError as e: - translater_logger.warning(f"AI请求连接错误 (async): {repr(e)}") + self.logger.warning(f"AI请求连接错误 (async): {repr(e)}") except (KeyError, IndexError) as e: raise Exception(f"AI响应格式错误 (async): {repr(e)}") # 如果没有正常获取结果则重试 if retry and retry_count < MAX_RETRY_COUNT: - if total_error_counter.add(): + if self.total_error_counter.add(): return prompt - translater_logger.info(f"正在重试,重试次数{retry_count}") + self.logger.info(f"正在重试,重试次数{retry_count}") await asyncio.sleep(0.5) return await self.send_async(prompt, system_prompt, retry=True, retry_count=retry_count + 1) else: - translater_logger.error(f"达到重试次数上限") + self.logger.error(f"达到重试次数上限") return prompt async def send_prompts_async( @@ -149,7 +153,7 @@ class Agent: ) nonlocal count count += 1 - translater_logger.info(f"协程-已完成{count}/{total}") + self.logger.info(f"协程-已完成{count}/{total}") return result for p_text in prompts: @@ -176,23 +180,23 @@ class Agent: result = response.json()["choices"][0]["message"]["content"] return result except httpx.HTTPStatusError as e: - translater_logger.warning(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") + self.logger.warning(f"AI请求错误 (async): {e.response.status_code} - {e.response.text}") print(f"prompt:\n{prompt}") - total_error_counter.add() + self.total_error_counter.add() return prompt except httpx.RequestError as e: - translater_logger.warning(f"AI请求连接错误 (sync): {repr(e)}\nprompt:{prompt}") + self.logger.warning(f"AI请求连接错误 (sync): {repr(e)}\nprompt:{prompt}") except (KeyError, IndexError) as e: raise Exception(f"AI响应格式错误 (sync): {repr(e)}") # 如果没有正常获取结果则重试 if retry and retry_count < MAX_RETRY_COUNT: - if total_error_counter.add(): + if self.total_error_counter.add(): return prompt - translater_logger.info(f"正在重试,重试次数{retry_count}") + self.logger.info(f"正在重试,重试次数{retry_count}") time.sleep(0.5) return self.send(prompt, system_prompt, retry=True, retry_count=retry_count + 1) else: - translater_logger.error(f"达到重试次数上限") + self.logger.error(f"达到重试次数上限") return prompt def _send_prompt_count(self, prompt: str, system_prompt: None | str, count: PromptsCounter) -> str: @@ -206,7 +210,7 @@ class Agent: system_prompt: str | None = None, ) -> list[str]: system_prompts = [system_prompt] * len(prompts) - counts = [PromptsCounter(len(prompts))] * len(prompts) + counts = [PromptsCounter(len(prompts),self.logger)] * len(prompts) output_list = [] with ThreadPoolExecutor(max_workers=self.max_concurrent) as executor: results_iterator = executor.map(self._send_prompt_count, prompts, system_prompts, counts) diff --git a/docutranslate/app.py b/docutranslate/app.py index 825f29d..75fd251 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -19,7 +19,7 @@ from pydantic import BaseModel, Field from docutranslate import FileTranslater, __version__ from docutranslate.global_values import available_packages -from docutranslate.logger import translater_logger +from docutranslate.logger import global_logger from docutranslate.translater import default_params from docutranslate.utils.resource_utils import resource_path @@ -79,10 +79,10 @@ async def lifespan(app: FastAPI): tasks_state.clear() tasks_log_queues.clear() tasks_log_histories.clear() - for handler in translater_logger.handlers[:]: - translater_logger.removeHandler(handler) - translater_logger.propagate = False - translater_logger.setLevel(logging.INFO) + for handler in global_logger.handlers[:]: + global_logger.removeHandler(handler) + global_logger.propagate = False + global_logger.setLevel(logging.INFO) print("应用启动完成,多任务状态已初始化。") yield await httpx_client.aclose() @@ -100,12 +100,12 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten log_filter = logging.Filter() log_filter.task_id = task_id task_handler.addFilter(log_filter) - translater_logger.addHandler(task_handler) + global_logger.addHandler(task_handler) - translater_logger.info(f"后台翻译任务开始: 文件 '{original_filename}'") + global_logger.info(f"后台翻译任务开始: 文件 '{original_filename}'") task_state["status_message"] = f"正在处理 '{original_filename}'..." try: - translater_logger.info(f"使用 Base URL: {params['base_url']}, Model: {params['model_id']}") + global_logger.info(f"使用 Base URL: {params['base_url']}, Model: {params['model_id']}") ft = FileTranslater( base_url=params['base_url'], key=params['apikey'], model_id=params['model_id'], chunk_size=params['chunk_size'], concurrent=params['concurrent'], @@ -125,7 +125,7 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten timeout=3) html_content = ft.export_to_html(title=task_state["original_filename_stem"], cdn=True) except (httpx.TimeoutException, httpx.RequestError): - translater_logger.info("CDN连接失败,使用本地JS进行渲染。") + global_logger.info("CDN连接失败,使用本地JS进行渲染。") html_content = ft.export_to_html(title=task_state["original_filename_stem"], cdn=False) end_time = time.time() duration = end_time - task_state["task_start_time"] @@ -134,11 +134,11 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten "html_content": html_content, "status_message": f"翻译成功!用时 {duration:.2f} 秒。", "download_ready": True, "error_flag": False, "task_end_time": end_time, }) - translater_logger.info(f"翻译成功完成,用时 {duration:.2f} 秒。") + global_logger.info(f"翻译成功完成,用时 {duration:.2f} 秒。") except asyncio.CancelledError: end_time = time.time() duration = end_time - task_state["task_start_time"] - translater_logger.info(f"翻译任务 '{original_filename}' 已被取消 (用时 {duration:.2f} 秒).") + global_logger.info(f"翻译任务 '{original_filename}' 已被取消 (用时 {duration:.2f} 秒).") task_state.update({ "status_message": f"翻译任务已取消 (用时 {duration:.2f} 秒).", "error_flag": False, "download_ready": False, "markdown_content": None, "md_zip_content": None, @@ -148,7 +148,7 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten end_time = time.time() duration = end_time - task_state["task_start_time"] error_message = f"翻译失败: {e}" - translater_logger.error(error_message, exc_info=True) + global_logger.error(error_message, exc_info=True) task_state.update({ "status_message": f"翻译过程中发生错误 (用时 {duration:.2f} 秒): {e}", "error_flag": True, "download_ready": False, "markdown_content": None, @@ -157,8 +157,8 @@ async def _perform_translation(task_id: str, params: Dict[str, Any], file_conten finally: task_state["is_processing"] = False task_state["current_task_ref"] = None - translater_logger.info(f"后台翻译任务 '{original_filename}' 处理结束。") - translater_logger.removeHandler(task_handler) + global_logger.info(f"后台翻译任务 '{original_filename}' 处理结束。") + global_logger.removeHandler(task_handler) # --- 核心任务启动与取消逻辑 (仅由服务层调用) --- diff --git a/docutranslate/converter/converter_docling.py b/docutranslate/converter/converter_docling.py index 3782599..8a34f65 100644 --- a/docutranslate/converter/converter_docling.py +++ b/docutranslate/converter/converter_docling.py @@ -1,3 +1,5 @@ +import asyncio +import logging import os import time from io import BytesIO @@ -11,62 +13,26 @@ from docling.document_converter import DocumentConverter, PdfFormatOption from docling_core.types.doc import ImageRefMode from huggingface_hub.errors import LocalEntryNotFoundError -from docutranslate.logger import translater_logger - from docutranslate.converter import Converter, Document - -import asyncio +from docutranslate.logger import global_logger IMAGE_RESOLUTION_SCALE = 4 -def file2markdown_embed_images(file_path: Path | str | DocumentStream, formula=False, code=False, - artifacts_path: Path | str | None = None) -> str: - pipeline_options = PdfPipelineOptions(artifacts_path=artifacts_path) - pipeline_options.do_ocr = False - pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE - pipeline_options.generate_picture_images = True - # pipeline_options.table_structure_options.mode = TableFormerMode.FAST - pipeline_options.table_structure_options.do_cell_matching = False - if formula: - pipeline_options.do_formula_enrichment = True - if code: - pipeline_options.do_code_enrichment = True - # pipeline_options.accelerator_options= AcceleratorOptions( - # num_threads=4, device=AcceleratorDevice.AUTO - # ) - # 打印时间 - settings.debug.profile_pipeline_timings = True - converter = DocumentConverter(format_options={ - InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) - - }) - try: - conversion_result = converter.convert(file_path) - result = conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED) - except LocalEntryNotFoundError: - translater_logger.info(f"无法连接huggingface,正在尝试换源") - os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com' - conversion_result = converter.convert(file_path) - result = conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED) - # translater_logger.info(f"docling转换耗时: {conversion_result.timings["pipeline_total"].times}") - return result - - class ConverterDocling(Converter): - def __init__(self, code=True, formula=True, artifact=None): + def __init__(self, code=True, formula=True, artifact=None, logger: logging.Logger | None = None): self.code = code self.formula = formula self.artifact = artifact + self.logger = logger if logger else global_logger def convert(self, document): assert isinstance(document.filename, str) - translater_logger.info(f"正在将文档转换为markdown") + self.logger.info(f"正在将文档转换为markdown") time1 = time.time() document_stream = DocumentStream(name=document.filename, stream=BytesIO(document.filebytes)) - result = file2markdown_embed_images(document_stream, formula=self.formula, code=self.code, - artifacts_path=self.artifact) - translater_logger.info(f"已转换为markdown,耗时{time.time() - time1}秒") + result = self.file2markdown_embed_images(document_stream) + self.logger.info(f"已转换为markdown,耗时{time.time() - time1}秒") return result async def convert_async(self, document: Document) -> str: @@ -75,11 +41,43 @@ class ConverterDocling(Converter): document ) - def set_config(self,cofig:dict): + def file2markdown_embed_images(self, file_path: Path | str | DocumentStream) -> str: + pipeline_options = PdfPipelineOptions(artifacts_path=self.artifact) + pipeline_options.do_ocr = False + pipeline_options.images_scale = IMAGE_RESOLUTION_SCALE + pipeline_options.generate_picture_images = True + # pipeline_options.table_structure_options.mode = TableFormerMode.FAST + pipeline_options.table_structure_options.do_cell_matching = False + if self.formula: + pipeline_options.do_formula_enrichment = True + if self.code: + pipeline_options.do_code_enrichment = True + # pipeline_options.accelerator_options= AcceleratorOptions( + # num_threads=4, device=AcceleratorDevice.AUTO + # ) + # 打印时间 + settings.debug.profile_pipeline_timings = True + converter = DocumentConverter(format_options={ + InputFormat.PDF: PdfFormatOption(pipeline_options=pipeline_options) + + }) + try: + conversion_result = converter.convert(file_path) + result = conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED) + except LocalEntryNotFoundError: + self.logger.info(f"无法连接huggingface,正在尝试换源") + os.environ['HF_ENDPOINT'] = 'https://hf-mirror.com' + conversion_result = converter.convert(file_path) + result = conversion_result.document.export_to_markdown(image_mode=ImageRefMode.EMBEDDED) + # translater_logger.info(f"docling转换耗时: {conversion_result.timings["pipeline_total"].times}") + return result + + def set_config(self, cofig: dict): pass - def get_config_list(self) ->list[str]|None: + def get_config_list(self) -> list[str] | None: pass + if __name__ == '__main__': pass diff --git a/docutranslate/converter/converter_mineru.py b/docutranslate/converter/converter_mineru.py index a7cdb95..08f1fce 100644 --- a/docutranslate/converter/converter_mineru.py +++ b/docutranslate/converter/converter_mineru.py @@ -1,9 +1,10 @@ import asyncio +import logging import time import zipfile import httpx from docutranslate.converter import Converter, Document -from docutranslate.logger import translater_logger +from docutranslate.logger import global_logger from docutranslate.utils.markdown_utils import embed_inline_image_from_zip URL = 'https://mineru.net/api/v4/file-urls/batch' @@ -21,10 +22,11 @@ client = httpx.Client(trust_env=False,timeout=timeout,proxy=None,verify=False) # TODO: 提供更详细的logger class ConverterMineru(Converter): - def __init__(self, token: str, formula=True): + def __init__(self, token: str, formula=True,logger:logging.Logger|None=None): self.mineru_token = token.strip() self.client_async = httpx.AsyncClient(timeout=timeout) self.formula = formula + self.logger=logger if logger else global_logger def _get_header(self): return { @@ -74,12 +76,12 @@ class ConverterMineru(Converter): time.sleep(3) def convert(self, document: Document) -> str: - translater_logger.info(f"正在将文档转换为markdown") + self.logger.info(f"正在将文档转换为markdown") time1 = time.time() batch_id = self.upload(document) file_url = self.get_file_url(batch_id) result = get_md_from_zip_url_with_inline_images(zip_url=file_url) - translater_logger.info(f"已转换为markdown,耗时{time.time() - time1}秒") + self.logger.info(f"已转换为markdown,耗时{time.time() - time1}秒") return result # TODO: 实现细粒度更高的协程 diff --git a/docutranslate/logger/__init__.py b/docutranslate/logger/__init__.py index c23054f..e2b81d6 100644 --- a/docutranslate/logger/__init__.py +++ b/docutranslate/logger/__init__.py @@ -1 +1 @@ -from .logger import translater_logger \ No newline at end of file +from .logger import global_logger \ No newline at end of file diff --git a/docutranslate/logger/logger.py b/docutranslate/logger/logger.py index 69243a9..d002d4f 100644 --- a/docutranslate/logger/logger.py +++ b/docutranslate/logger/logger.py @@ -3,8 +3,8 @@ import logging # 创建日志对象 -translater_logger = logging.getLogger("TranslaterLogger") -translater_logger.setLevel(logging.DEBUG) +global_logger = logging.getLogger("TranslaterLogger") +global_logger.setLevel(logging.DEBUG) #输出到控制台 console_handler = logging.StreamHandler() -translater_logger.addHandler(console_handler) \ No newline at end of file +global_logger.addHandler(console_handler) \ No newline at end of file diff --git a/docutranslate/translater.py b/docutranslate/translater.py index 6d2a523..59844af 100644 --- a/docutranslate/translater.py +++ b/docutranslate/translater.py @@ -1,20 +1,23 @@ import asyncio import html import io +import logging import zipfile from pathlib import Path from typing import Literal -import markdown2 + import jinja2 +import markdown2 + from docutranslate.agents import Agent, AgentArgs from docutranslate.agents import MDRefineAgent, MDTranslateAgent from docutranslate.cacher import document_cacher_global from docutranslate.converter import Document, ConverterMineru +from docutranslate.global_values import available_packages +from docutranslate.logger import global_logger from docutranslate.utils.markdown_splitter import split_markdown_text, join_markdown_texts from docutranslate.utils.markdown_utils import uris2placeholder, placeholder2_uris, MaskDict, clean_markdown_math_block, \ unembed_base64_images_to_zip, embed_inline_image_from_zip, find_markdown_in_zip -from docutranslate.logger import translater_logger -from docutranslate.global_values import available_packages from docutranslate.utils.resource_utils import resource_path DOCLING_FLAG = True if available_packages.get("docling") else False @@ -35,7 +38,10 @@ class FileTranslater: concurrent: int = default_params["concurrent"], timeout=2000, convert_engin: Literal["docling", "mineru"] = "mineru", docling_artifact: Path | str | None = None, - mineru_token: str = None, cache=True): + mineru_token: str = None, cache=True, + logger: logging.Logger | None = None): + self.logger = logger if logger else global_logger + self.convert_engin = convert_engin self.mineru_token = mineru_token.strip() if mineru_token is not None else None self._mask_dict = MaskDict() @@ -51,7 +57,7 @@ class FileTranslater: artifact_path = Path("./docling_artifact") print(f"artifact_path:{artifact_path.resolve()},existed:{artifact_path.is_dir()}") if artifact_path.is_dir(): - translater_logger.info("检测到docling_artifact文件夹") + self.logger.info("检测到docling_artifact文件夹") self.docling_artifact = artifact_path self.timeout = timeout self.document: Document | None = None @@ -78,7 +84,7 @@ class FileTranslater: def _split_markdown_into_chunks(self) -> list[str]: chunks: list[str] = split_markdown_text(self.markdown, self.chunk_size) - translater_logger.info(f"markdown分为{len(chunks)}块") + self.logger.info(f"markdown分为{len(chunks)}块") return chunks def _default_agent_params(self) -> AgentArgs: @@ -92,7 +98,8 @@ class FileTranslater: "model_id": self.model_id, "temperature": self.temperature, "max_concurrent": self.concurrent, - "timeout": self.timeout + "timeout": self.timeout, + "logger":self.logger } return result @@ -105,26 +112,26 @@ class FileTranslater: def _convert2markdown(self, document: Document, formula: bool, code: bool, artifact: Path = None) -> str: cached_result = self.cacher.get_cached_result(document, formula, code, convert_engin=self.convert_engin) if cached_result: - translater_logger.info("正在获取缓存结果") + self.logger.info("正在获取缓存结果") return cached_result if document.suffix in [".md", ".txt"]: return document.filebytes.decode("utf-8") if document.suffix in ['.zip']: - #寻找zip内的filename - filename=find_markdown_in_zip(document.filebytes) - return embed_inline_image_from_zip(document.filebytes,filename) - translater_logger.info("正在转化为markdown") + # 寻找zip内的filename + filename = find_markdown_in_zip(document.filebytes) + return embed_inline_image_from_zip(document.filebytes, filename) + self.logger.info("正在转化为markdown") if self.convert_engin == "docling": if artifact is None: artifact = self.docling_artifact - mdconverter = ConverterDocling(formula=formula, code=code, artifact=artifact) + mdconverter = ConverterDocling(formula=formula, code=code, artifact=artifact,logger=self.logger) result = mdconverter.convert(document) else: if self.mineru_token is None: raise Exception("mineru_token未配置") if code: - translater_logger.info("mineru暂不支持code识别") - mdconverter = ConverterMineru(token=self.mineru_token, formula=formula) + self.logger.info("mineru暂不支持code识别") + mdconverter = ConverterMineru(token=self.mineru_token, formula=formula,logger=self.logger) result = mdconverter.convert(document) return self.cacher.cache_result(result, document, formula, code, convert_engin=self.convert_engin) @@ -132,26 +139,26 @@ class FileTranslater: artifact: Path = None) -> str: cached_result = self.cacher.get_cached_result(document, formula, code, convert_engin=self.convert_engin) if cached_result: - translater_logger.info("解析结果已缓存,获取缓存结果") + self.logger.info("解析结果已缓存,获取缓存结果") return cached_result if document.suffix in [".md", ".txt"]: return document.filebytes.decode("utf-8") if document.suffix in ['.zip']: - #寻找zip内的filename - filename=find_markdown_in_zip(document.filebytes) - return embed_inline_image_from_zip(document.filebytes,filename) - translater_logger.info("正在转化为markdown") + # 寻找zip内的filename + filename = find_markdown_in_zip(document.filebytes) + return embed_inline_image_from_zip(document.filebytes, filename) + self.logger.info("正在转化为markdown") if self.convert_engin == "docling": if artifact is None: artifact = self.docling_artifact - mdconverter = ConverterDocling(formula=formula, code=code, artifact=artifact) + mdconverter = ConverterDocling(formula=formula, code=code, artifact=artifact,logger=self.logger) result = await mdconverter.convert_async(document) else: if self.mineru_token is None: raise Exception("mineru_token未配置") if code: - translater_logger.info("mineru暂不支持code识别") - mdconverter = ConverterMineru(token=self.mineru_token, formula=formula) + self.logger.info("mineru暂不支持code识别") + mdconverter = ConverterMineru(token=self.mineru_token, formula=formula,logger=self.logger) result = await mdconverter.convert_async(document) return self.cacher.cache_result(result, document, formula, code, convert_engin=self.convert_engin) @@ -209,7 +216,7 @@ class FileTranslater: document = self.document if document is None: raise Exception("未读取文件") - translater_logger.info(f"读取文件:{document.filename}") + self.logger.info(f"读取文件:{document.filename}") self.read_document(document, formula=formula, code=code, save=save, save_format=save_format, refine=refine, refine_agent=refine_agent) return self @@ -223,14 +230,14 @@ class FileTranslater: document = self.document if document is None: raise Exception("未读取文件") - translater_logger.info(f"读取文件:{document.filename}") + self.logger.info(f"读取文件:{document.filename}") # 如果是markdown,直接读取 await self.read_document_async(document, formula=formula, code=code, save=save, save_format=save_format, refine=refine, refine_agent=refine_agent) return self def refine_markdown_by_agent(self, refine_agent: Agent | None = None, custom_prompt=None) -> str: - translater_logger.info("正在修正markdown") + self.logger.info("正在修正markdown") self._mask_uris_in_markdown() chuncks = self._split_markdown_into_chunks() if refine_agent is None: @@ -241,11 +248,11 @@ class FileTranslater: else: self.markdown = join_markdown_texts(result) self._unmask_uris_in_markdown() - translater_logger.info("markdown已修正") + self.logger.info("markdown已修正") return self.markdown def translate_markdown_by_agent(self, translate_agent: Agent | None = None, to_lang="中文", custom_prompt=None): - translater_logger.info("正在翻译markdown") + self.logger.info("正在翻译markdown") self._mask_uris_in_markdown() chuncks = self._split_markdown_into_chunks() if translate_agent is None: @@ -256,11 +263,11 @@ class FileTranslater: else: self.markdown = join_markdown_texts(result) self._unmask_uris_in_markdown() - translater_logger.info("翻译完成") + self.logger.info("翻译完成") return self.markdown async def refine_markdown_by_agent_async(self, refine_agent: Agent | None = None, custom_prompt=None) -> str: - translater_logger.info("正在修正markdown") + self.logger.info("正在修正markdown") self._mask_uris_in_markdown() chuncks = self._split_markdown_into_chunks() if refine_agent is None: @@ -271,12 +278,12 @@ class FileTranslater: else: self.markdown = join_markdown_texts(result) self._unmask_uris_in_markdown() - translater_logger.info("markdown已修正") + self.logger.info("markdown已修正") return self.markdown async def translate_markdown_by_agent_async(self, translate_agent: Agent | None = None, to_lang="中文", custom_prompt=None): - translater_logger.info("正在翻译markdown") + self.logger.info("正在翻译markdown") self._mask_uris_in_markdown() chuncks = self._split_markdown_into_chunks() if translate_agent is None: @@ -287,7 +294,7 @@ class FileTranslater: else: self.markdown = join_markdown_texts(result) self._unmask_uris_in_markdown() - translater_logger.info("翻译完成") + self.logger.info("翻译完成") return self.markdown def save_as_markdown(self, filename: str | Path | None = None, output_dir: str | Path = "./output", embeded=True): @@ -303,9 +310,9 @@ class FileTranslater: full_name = output_dir / filename.name with open(full_name, "w", encoding="utf-8") as file: file.write(self.export_to_markdown()) - translater_logger.info(f"文件已写入{full_name.resolve()}") + self.logger.info(f"文件已写入{full_name.resolve()}") else: - output_dir=output_dir/filename.stem + output_dir = output_dir / filename.stem output_dir.mkdir(parents=True, exist_ok=True) with zipfile.ZipFile(io.BytesIO(self.export_to_unembed_markdown())) as zip_ref: zip_ref.extractall(output_dir) @@ -336,9 +343,9 @@ class FileTranslater: output_dir.mkdir(parents=True, exist_ok=True) full_name = output_dir / filename html_content = self.export_to_html(title=str(full_name.resolve().stem)) - with open(full_name, "w",encoding="utf-8") as file: + with open(full_name, "w", encoding="utf-8") as file: file.write(html_content) - translater_logger.info(f"文件已写入{full_name.resolve()}") + self.logger.info(f"文件已写入{full_name.resolve()}") return self def export_to_html(self, title="title", cdn=True) -> str: @@ -463,7 +470,8 @@ class FileTranslater: formula=True, code=True, output_format: Literal["markdown", "html"] = "markdown", custom_prompt_translate=None, refine=False, - refine_agent: Agent | None = None, translate_agent: Agent | None = None, save=False): + refine_agent: Agent | None = None, translate_agent: Agent | None = None, + save=False): await self.read_bytes_async(name=name, file=file, formula=formula, code=code) if refine: