diff --git a/.idea/workspace.xml b/.idea/workspace.xml index dd4ed45..93f7d8b 100644 --- a/.idea/workspace.xml +++ b/.idea/workspace.xml @@ -5,7 +5,11 @@ + + + + - { - "keyToString": { - "DefaultHtmlFileTemplate": "HTML File", - "JavaScript 调试.output.html (1).executor": "Run", - "JavaScript 调试.output.html.executor": "Run", - "JavaScript 调试.regex_中文.html.executor": "Run", - "JavaScript 调试.test2_英文.html.executor": "Run", - "ModuleVcsDetector.initialDetectionPerformed": "true", - "Python 测试.Python 测试 (markdown_mask.py 内).executor": "Run", - "Python 测试.markdown_mask.Test.test_basic_link_masking 的 Python 测试.executor": "Run", - "Python.PDFtranslater (1).executor": "Run", - "Python.PDFtranslater (2).executor": "Run", - "Python.agent_utils.executor": "Run", - "Python.convert.executor": "Run", - "Python.markdown_splitter.executor": "Run", - "Python.markdown_utils.executor": "Run", - "Python.test.executor": "Run", - "Python.test1.executor": "Run", - "Python.translater.executor": "Debug", - "RunOnceActivity.ShowReadmeOnStart": "true", - "RunOnceActivity.git.unshallow": "true", - "git-widget-placeholder": "master", - "last_opened_file_path": "C:/Users/jxgm/Desktop/FileTranslate/tests/resource", - "node.js.detected.package.eslint": "true", - "node.js.detected.package.tslint": "true", - "node.js.selected.package.eslint": "(autodetect)", - "node.js.selected.package.tslint": "(autodetect)", - "nodejs_package_manager_path": "npm", - "settings.editor.selected.configurable": "Errors", - "vue.rearranger.settings.migration": "true" + +}]]> @@ -75,7 +79,7 @@ - + - + @@ -301,7 +305,8 @@ - + + @@ -311,7 +316,7 @@ - + diff --git a/README.md b/README.md index 6b1dcbf..0659a2a 100644 --- a/README.md +++ b/README.md @@ -21,12 +21,15 @@ ```python from docutranslate.translater import FileTranslater + +translater=FileTranslater(base_url="", + key="", + model_id="") # 不开启公式、代码识别 -FileTranslater(base_url="", key="", model_id="").translate_pdf_file("", to_lang="中文") +translater.translate_pdf_file("", to_lang="中文") # 开启公式、代码识别(需要下载更多模型) -FileTranslater(base_url="", key="", model_id="").translate_pdf_file("", to_lang="中文", - formula=True, code=True) +translater.translate_pdf_file("", to_lang="中文",formula=True, code=True) ``` > 第一次使用时需要下载模型(约1G、使用公式、代码识别需要多约0.5G),请稍作等待 > 输出文件默认放在`./output`中 @@ -34,7 +37,7 @@ FileTranslater(base_url="", key="", model_id="").transla ## 使用不同的agent分别进行文本修正和翻译 ```python -from docutranslate.translater import FileTranslater +from docutranslate import FileTranslater translater = FileTranslater() @@ -49,7 +52,7 @@ translater.translate_pdf_file(pdf_path="", to_lang="中文", refine_a ### 创建FileTranslate ```python -from docutranslate.translater import FileTranslater +from docutranslate import FileTranslater translater = FileTranslater(base_url="", key="", @@ -83,10 +86,10 @@ translater.translate_markdown_file(r"<要翻译的markdown路径>", # 常用baseurl -| 平台名称 | baseurl | -|-----------|--------------------------------------| -| ollama | http://127.0.0.1:11434/v1 | +| 平台名称 | baseurl | +|----------|--------------------------------------| +| ollama | http://127.0.0.1:11434/v1 | | lm studio | http://127.0.0.1:1234/v1 | -| openai | https://api.openai.com/v1/ | -| deepseek | https://api.deepseek.com/v1 | -| 智谱ai | https://open.bigmodel.cn/api/paas/v4 | \ No newline at end of file +| openai | https://api.openai.com/v1/ | +| deepseek | https://api.deepseek.com/v1 | +| 智谱ai | https://open.bigmodel.cn/api/paas/v4 | \ No newline at end of file diff --git a/docutranslate/__init__.py b/docutranslate/__init__.py index e69de29..daaba6a 100644 --- a/docutranslate/__init__.py +++ b/docutranslate/__init__.py @@ -0,0 +1 @@ +from .translater import FileTranslater \ No newline at end of file diff --git a/docutranslate/utils/agent_utils.py b/docutranslate/utils/agent_utils.py index 7e5dd42..8cfcab1 100644 --- a/docutranslate/utils/agent_utils.py +++ b/docutranslate/utils/agent_utils.py @@ -2,6 +2,7 @@ import asyncio import httpx +TIMEOUT=250 class Agent: def __init__(self, baseurl="", key="", model_id="", system_prompt="", temperature=0.7, max_concurrent=5): @@ -30,7 +31,7 @@ class Agent: } return headers, data - # def send_prompt(self,prompt,system_prompt=None,timeout=50): + # def send_prompt(self,prompt,system_prompt=None,timeout=TIMEOUT): # if system_prompt is None: # system_prompt=self.system_prompt # headers,data=self._prepare_request_data(prompt,system_prompt) @@ -38,7 +39,7 @@ class Agent: # response.raise_for_status() # return response.json()["choices"][0]["message"]["content"].lstrip() - async def send_async(self, prompt: str, system_prompt: None | str = None, timeout: int = 200) -> str: + async def send_async(self, prompt: str, system_prompt: None | str = None, timeout: int = TIMEOUT) -> str: if system_prompt is None: system_prompt = self.system_prompt """Sends a single prompt asynchronously.""" @@ -64,7 +65,7 @@ class Agent: self, prompts: list[str], system_prompt: str | None = None, - timeout: int = 50, + timeout: int = TIMEOUT, max_concurrent: int = 5 # 新增参数,默认并发数为5 ) -> list[str]: total = len(prompts) @@ -81,7 +82,7 @@ class Agent: result = await self.send_async( prompt=p_text, system_prompt=system_prompt, - timeout=timeout + timeout=TIMEOUT ) nonlocal count count += 1 @@ -99,7 +100,7 @@ class Agent: self, prompts: list[str], system_prompt: str | None = None, - timeout: int = 50, + timeout: int = TIMEOUT, ) -> list[str]: result = asyncio.run(self.send_prompts_async(prompts, system_prompt, timeout, self.max_concurrent)) return result diff --git a/pyproject.toml b/pyproject.toml index 7b38cbd..d67261a 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -8,5 +8,6 @@ dependencies = [ "docling>=2.31.0", "httpx>=0.28.1", "markdown2>=2.5.3", + "transformers>=4.42.4", ]