:$c_0+1=2$,$c_0$等于几
-{$c_0$,$c_1$,$c^2$}是一个集合
-\no_think"""
+{$c_0$,$c_1$,$c^2$}是一个集合"""
+ if custom_prompt:
+ self.system_prompt += "\n#其余要求(可能为背景或指令)\n" + custom_prompt
+ self.system_prompt+=r'\no_think'
class MDTranslateAgent(Agent):
- def __init__(self,to_lang="中文",**kwargs:Unpack[AgentArgs]):
+ def __init__(self, custom_prompt=None, to_lang="中文", **kwargs: Unpack[AgentArgs]):
super().__init__(**kwargs)
- self.system_prompt=f"""
+ self.system_prompt = f"""
# 角色
你是一个专业的机器翻译引擎
# 工作
@@ -79,5 +82,7 @@ The equation is E=mc 2. This is famous.
volume 99, pages 173–186, 1999.
输出:【文献引用保持源语言】
[2] M. Castro, B. Liskov, et al. Practical byzantine fault tolerance. In OSDI,
-volume 99, pages 173–186, 1999.
-\\no_think"""
\ No newline at end of file
+volume 99, pages 173–186, 1999."""
+ if custom_prompt:
+ self.system_prompt += "\n#其余要求(可能为背景或指令)\n" + custom_prompt
+ self.system_prompt+=r'\no_think'
diff --git a/docutranslate/app.py b/docutranslate/app.py
index 3412716..0bbf985 100644
--- a/docutranslate/app.py
+++ b/docutranslate/app.py
@@ -127,13 +127,14 @@ async def _perform_translation(params: Dict[str, Any], file_contents: bytes, ori
to_lang=params['to_lang'],
formula=params['formula_ocr'],
code=params['code_ocr'],
+ custom_prompt_translate=params['custom_prompt_translate'],
refine=params['refine_markdown'],
save=False
)
md_content = ft.export_to_markdown()
try:
- httpx.head("https://cdn.jsdelivr.net/npm/katex@0.16.22/dist/contrib/auto-render.min.js",timeout=1)
+ httpx.head("https://cdn.jsdelivr.net/npm/katex@0.16.22/dist/contrib/auto-render.min.js", timeout=1)
html_content = ft.export_to_html(title=current_state["original_filename_stem"], cdn=True)
except TimeoutError:
translater_logger.info("无法连接cdn,使用本地js进行pdf渲染")
@@ -201,7 +202,6 @@ async def main_page(request: Request):
@app.post("/translate")
async def handle_translate(
- request: Request, # Added request for potential future use, not strictly needed now
base_url: str = Form(...),
apikey: str = Form(...),
model_id: str = Form(...),
@@ -209,8 +209,9 @@ async def handle_translate(
formula_ocr: bool = Form(False),
code_ocr: bool = Form(False),
refine_markdown: bool = Form(False),
- convert_engin: str = Form(...), # New parameter
- mineru_token: Optional[str] = Form(None), # New parameter
+ convert_engin: str = Form(...),
+ mineru_token: Optional[str] = Form(None),
+ custom_prompt_translate: Optional[str] = Form(None),
file: UploadFile = File(...)
):
global current_state, log_queue, log_history
@@ -276,8 +277,9 @@ async def handle_translate(
"base_url": base_url, "apikey": apikey, "model_id": model_id,
"to_lang": to_lang, "formula_ocr": formula_ocr,
"code_ocr": code_ocr, "refine_markdown": refine_markdown,
- "convert_engin": convert_engin, # Pass to task
- "mineru_token": mineru_token, # Pass to task
+ "convert_engin": convert_engin,
+ "mineru_token": mineru_token,
+ "custom_prompt_translate":custom_prompt_translate,
}
loop = asyncio.get_running_loop()
diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html
index e7be327..3c6cdf8 100644
--- a/docutranslate/static/index.html
+++ b/docutranslate/static/index.html
@@ -22,6 +22,14 @@
padding-bottom: 0.5rem;
}
+ .prompt-area {
+ resize: vertical;
+ width: 100%;
+ min-height: 200px;
+ font-size: 0.9em;
+ box-sizing: border-box;
+ display: block;
+ }
.log-area {
background-color: #f5f5f5;
@@ -398,6 +406,14 @@
+
+ 系统提示(可选)
+
+
+
+
+
diff --git a/docutranslate/translater.py b/docutranslate/translater.py
index 1fdfdce..8fea3fa 100644
--- a/docutranslate/translater.py
+++ b/docutranslate/translater.py
@@ -76,6 +76,12 @@ class FileTranslater:
}
return result
+ def default_refine_agent(self, custom_prompt=None) -> MDRefineAgent:
+ return MDRefineAgent(custom_prompt=custom_prompt, **self._default_agent_params())
+
+ def default_translate_agent(self, custom_prompt=None, to_lang="中文") -> MDTranslateAgent:
+ return MDTranslateAgent(custom_prompt=custom_prompt, to_lang=to_lang, **self._default_agent_params())
+
def _convert2markdown(self, document: Document, formula: bool, code: bool, artifact: Path = None) -> str:
translater_logger.info(f"正在使用{self.convert_engin}转换文件为markdown")
if self.convert_engin == "docling":
@@ -202,48 +208,49 @@ class FileTranslater:
self.save_as_markdown(filename=f"{file_path.stem}.md")
return self
- def refine_markdown_by_agent(self, refine_agent: Agent | None = None) -> str:
+ def refine_markdown_by_agent(self, refine_agent: Agent | None = None, custom_prompt=None) -> str:
translater_logger.info("正在修正markdown")
self._mask_uris_in_markdown()
chuncks = self._split_markdown_into_chunks()
if refine_agent is None:
- refine_agent = MDRefineAgent(**self._default_agent_params())
+ refine_agent = self.default_refine_agent(custom_prompt)
result: list[str] = refine_agent.send_prompts(chuncks)
self.markdown = join_markdown_texts(result)
self._unmask_uris_in_markdown()
translater_logger.info("markdown已修正")
return self.markdown
- def translate_markdown_by_agent(self, translate_agent: Agent | None = None, to_lang="中文"):
+ def translate_markdown_by_agent(self, translate_agent: Agent | None = None, to_lang="中文", custom_prompt=None):
translater_logger.info("正在翻译markdown")
self._mask_uris_in_markdown()
chuncks = self._split_markdown_into_chunks()
if translate_agent is None:
- translate_agent = MDTranslateAgent(to_lang=to_lang, **self._default_agent_params())
+ translate_agent = self.default_translate_agent(custom_prompt=custom_prompt, to_lang=to_lang)
result: list[str] = translate_agent.send_prompts(chuncks)
self.markdown = join_markdown_texts(result)
self._unmask_uris_in_markdown()
translater_logger.info("翻译完成")
return self.markdown
- async def refine_markdown_by_agent_async(self, refine_agent: Agent | None = None) -> str:
+ async def refine_markdown_by_agent_async(self, refine_agent: Agent | None = None, custom_prompt=None) -> str:
translater_logger.info("正在修正markdown")
self._mask_uris_in_markdown()
chuncks = self._split_markdown_into_chunks()
if refine_agent is None:
- refine_agent = MDRefineAgent(**self._default_agent_params())
+ refine_agent = self.default_refine_agent(custom_prompt=custom_prompt)
result: list[str] = await refine_agent.send_prompts_async(chuncks)
self.markdown = join_markdown_texts(result)
self._unmask_uris_in_markdown()
translater_logger.info("markdown已修正")
return self.markdown
- async def translate_markdown_by_agent_async(self, translate_agent: Agent | None = None, to_lang="中文"):
+ async def translate_markdown_by_agent_async(self, translate_agent: Agent | None = None, to_lang="中文",
+ custom_prompt=None):
translater_logger.info("正在翻译markdown")
self._mask_uris_in_markdown()
chuncks = self._split_markdown_into_chunks()
if translate_agent is None:
- translate_agent = MDTranslateAgent(to_lang=to_lang, **self._default_agent_params())
+ translate_agent = self.default_translate_agent(to_lang=to_lang, custom_prompt=custom_prompt)
result: list[str] = await translate_agent.send_prompts_async(chuncks)
self.markdown = join_markdown_texts(result)
self._unmask_uris_in_markdown()
@@ -318,7 +325,9 @@ class FileTranslater:
def translate_file(self, file_path: Path | str | None = None, to_lang="中文", output_dir="./output",
formula=True,
code=True, output_format: Literal["markdown", "html"] = "markdown", refine=False,
- refine_agent: Agent | None = None, translate_agent: Agent | None = None, save=True):
+ custom_prompt_translate=None, refine_agent: Agent | None = None,
+ translate_agent: Agent | None = None,
+ save=True):
if file_path is None:
assert self.file_path is not None, "未输入文件路径"
file_path = self.file_path
@@ -327,7 +336,7 @@ class FileTranslater:
self.read_file(file_path, formula=formula, code=code)
if refine:
self.refine_markdown_by_agent(refine_agent)
- self.translate_markdown_by_agent(translate_agent, to_lang=to_lang)
+ self.translate_markdown_by_agent(translate_agent, to_lang=to_lang, custom_prompt=custom_prompt_translate)
if save:
if output_format == "markdown":
filename = f"{file_path.stem}_{to_lang}.md"
@@ -339,7 +348,8 @@ class FileTranslater:
async def translate_file_async(self, file_path: Path | str | None = None, to_lang="中文", output_dir="./output",
formula=True,
- code=True, output_format: Literal["markdown", "html"] = "markdown", refine=False,
+ code=True, output_format: Literal["markdown", "html"] = "markdown",
+ custom_prompt_translate=None, refine=False,
refine_agent: Agent | None = None, translate_agent: Agent | None = None, save=True):
if file_path is None:
assert self.file_path is not None, "未输入文件路径"
@@ -354,7 +364,8 @@ class FileTranslater:
)
if refine:
await self.refine_markdown_by_agent_async(refine_agent)
- await self.translate_markdown_by_agent_async(translate_agent, to_lang=to_lang)
+ await self.translate_markdown_by_agent_async(translate_agent, to_lang=to_lang,
+ custom_prompt=custom_prompt_translate)
if save:
if output_format == "markdown":
filename = f"{file_path.stem}_{to_lang}.md"
@@ -366,12 +377,14 @@ class FileTranslater:
def translate_bytes(self, name: str, file: bytes, to_lang="中文", output_dir="./output",
formula=True,
- code=True, output_format: Literal["markdown", "html"] = "markdown", refine=False,
+ code=True, output_format: Literal["markdown", "html"] = "markdown",
+ custom_prompt_translate=None,
+ refine=False,
refine_agent: Agent | None = None, translate_agent: Agent | None = None, save=True):
self.read_bytes(name=name, file=file, formula=formula, code=code)
if refine:
self.refine_markdown_by_agent(refine_agent)
- self.translate_markdown_by_agent(translate_agent, to_lang=to_lang)
+ self.translate_markdown_by_agent(translate_agent, to_lang=to_lang, custom_prompt=custom_prompt_translate)
if save:
if output_format == "markdown":
filename = f"{name}_{to_lang}.md"
@@ -383,13 +396,15 @@ class FileTranslater:
async def translate_bytes_async(self, name: str, file: bytes, to_lang="中文", output_dir="./output",
formula=True,
- code=True, output_format: Literal["markdown", "html"] = "markdown", refine=False,
+ code=True, output_format: Literal["markdown", "html"] = "markdown",
+ custom_prompt_translate=None, refine=False,
refine_agent: Agent | None = None, translate_agent: Agent | None = None, save=True):
await self.read_bytes_async(name=name, file=file, formula=formula, code=code)
if refine:
await self.refine_markdown_by_agent_async(refine_agent)
- await self.translate_markdown_by_agent_async(translate_agent, to_lang=to_lang)
+ await self.translate_markdown_by_agent_async(translate_agent, to_lang=to_lang,
+ custom_prompt=custom_prompt_translate)
if save:
if output_format == "markdown":
filename = f"{name}_{to_lang}.md"