diff --git a/.idea/workspace.xml b/.idea/workspace.xml index 6d58df6..85cb201 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -5,13 +5,8 @@ - - - + - - - diff --git a/docutranslate/Agents/__init__.py b/docutranslate/Agents/__init__.py index 3a9b111..d918a04 100644 --- a/docutranslate/Agents/__init__.py +++ b/docutranslate/Agents/__init__.py @@ -1,2 +1,2 @@ -from .agent import Agent,AgentArgs -from .markdown_agent import MDRefineAgent,MDTranslateAgent \ No newline at end of file +from .agent import Agent, AgentArgs +from .markdown_agent import MDRefineAgent, MDTranslateAgent diff --git a/docutranslate/translater.py b/docutranslate/translater.py index 7e1bec5..bee4b64 100644 --- a/docutranslate/translater.py +++ b/docutranslate/translater.py @@ -2,15 +2,17 @@ from pathlib import Path from typing import Literal import markdown2 -# import mdformat +from docutranslate.Agents import Agent, AgentArgs from docutranslate.Agents import MDRefineAgent, MDTranslateAgent -from docutranslate.Agents.agent import Agent, AgentArgs from docutranslate.utils.convert import file2markdown_embed_images from docutranslate.utils.markdown_splitter import split_markdown_text from docutranslate.utils.markdown_utils import uris2placeholder, placeholder2_uris, MaskDict +# import mdformat + + class FileTranslater: def __init__(self, file_path: Path | str | None = None, chunksize: int = 4096, base_url="", key=None, model_id="", temperature=0.7, max_concurrent=20, docling_artifact: Path | str | None = None, @@ -69,7 +71,8 @@ class FileTranslater: return result def read_file(self, file_path: Path | str | None = None, formula=False, code=False, save=False, - save_format: Literal["markdown", "html"] = "markdown", refine=False, refine_agent:Agent|None=None): + save_format: Literal["markdown", "html"] = "markdown", refine=False, + refine_agent: Agent | None = None): if file_path is None: if self.file_path is None: raise Exception("未设置文件路径") @@ -104,7 +107,7 @@ class FileTranslater: print("markdown已修正") return self.markdown - def translate_markdown_by_agent(self, translate_agent: Agent|None=None): + def translate_markdown_by_agent(self, translate_agent: Agent | None = None): print("正在翻译markdown") chuncks = self._split_markdown_into_chunks() diff --git a/docutranslate/utils/markdown_utils.py b/docutranslate/utils/markdown_utils.py index f764de6..23e6b8e 100644 --- a/docutranslate/utils/markdown_utils.py +++ b/docutranslate/utils/markdown_utils.py @@ -3,7 +3,6 @@ import threading import uuid - class MaskDict: def __init__(self): self._dict = {} @@ -33,8 +32,9 @@ class MaskDict: with self._lock: return item in self._dict + # def uris2placeholder(markdown:str, mask_dict:MaskDict): - ##替换整个uri +##替换整个uri # def uri2placeholder(match: re.Match): # id = mask_dict.create_id() # mask_dict.set(id, match.group()) @@ -44,7 +44,7 @@ class MaskDict: # markdown = re.sub(uri_pattern, uri2placeholder, markdown) # return markdown -def uris2placeholder(markdown:str, mask_dict:MaskDict): +def uris2placeholder(markdown: str, mask_dict: MaskDict): ##只替换uri里的链接部分,保留标题 def uri2placeholder(match: re.Match): id = mask_dict.create_id() @@ -54,10 +54,12 @@ def uris2placeholder(markdown:str, mask_dict:MaskDict): uri_pattern = r'(!?\[.*?\])\((.*?)\)' markdown = re.sub(uri_pattern, uri2placeholder, markdown) return markdown -def placeholder2_uris(markdown:str, mask_dict:MaskDict): - def placeholder2uri(match:re.Match): - id=match.group(1) - uri=mask_dict.get(id) + + +def placeholder2_uris(markdown: str, mask_dict: MaskDict): + def placeholder2uri(match: re.Match): + id = match.group(1) + uri = mask_dict.get(id) if uri is None: return match.group() return uri @@ -67,6 +69,5 @@ def placeholder2_uris(markdown:str, mask_dict:MaskDict): return markdown - if __name__ == '__main__': pass