提示词改为英文,网络请求现在经过代理

This commit is contained in:
xunbu
2025-08-18 15:20:56 +08:00
parent b612c9e67e
commit 0cd049e707
6 changed files with 69 additions and 59 deletions

View File

@@ -86,8 +86,10 @@ class Agent:
self.model_id = config.model_id.strip() self.model_id = config.model_id.strip()
self.system_prompt = config.system_prompt or "" self.system_prompt = config.system_prompt or ""
self.temperature = config.temperature self.temperature = config.temperature
self.client = httpx.Client(trust_env=False, proxy=None, verify=False) # self.client = httpx.Client(trust_env=False, proxy=None, verify=False)
self.client_async = httpx.AsyncClient(trust_env=False, proxy=None, verify=False) # self.client_async = httpx.AsyncClient(trust_env=False, proxy=None, verify=False)
self.client = httpx.Client(verify=False)
self.client_async = httpx.AsyncClient(verify=False)
self.max_concurrent = config.max_concurrent self.max_concurrent = config.max_concurrent
self.timeout = config.timeout self.timeout = config.timeout
self.thinking = config.thinking self.thinking = config.thinking

View File

@@ -11,32 +11,37 @@ class MDTranslateAgent(Agent):
def __init__(self,config:MDTranslateAgentConfig): def __init__(self,config:MDTranslateAgentConfig):
super().__init__(config) super().__init__(config)
self.system_prompt = f""" self.system_prompt = f"""
# 角色 # Role
你是一个专业的机器翻译引擎 You are a professional machine translation engine.
# 工作
翻译输入的markdown文本 # Task
目标语言{config.to_lang} Translate the input markdown text.
# 要求 Target language: {config.to_lang}
翻译要求专业准确
不输出任何解释和注释 # Requirements
不能改变形如<ph-xxxxxx>的占位符 - The translation must be professional and accurate.
code、latex和HTML只翻译说明文字其余保持原文 - Do not output any explanations or annotations.
所有公式无论长短必须表示为能被解析的合法latex公式公式需被$或\\(\\)或$$正确包裹,如不正确则进行修正 - Do not change placeholders in the format of `<ph-xxxxxx>`.
去除、修正明显异常的字符、但不能改变原意 - For `code`, `LaTeX`, and `HTML`, only translate the descriptive text (e.g., comments, captions); keep the rest of the content in its original form.
引用参考文献时请严格保持原文,不要翻译。参考文献格式示例如下: - All formulas, regardless of length, must be represented as valid, parsable LaTeX. They must be correctly enclosed by `$`, `\\(\\)`, or `$$`. If a formula is not formatted correctly, you must fix it.
[1] Author A, Author B. "Original Title". Journal, 2023. - Remove or correct any obviously abnormal characters, but without altering the original meaning.
[2] 作者C. 《中文标题》. 期刊, 2022. - When citing references, strictly preserve the original text; do not translate them. Examples of reference formats are as follows:
# 输出 [1] Author A, Author B. "Original Title". Journal, 2023.
翻译后的markdown译文纯文本不是markdown代码块无任何多余文字 [2] 作者C. 《中文标题》. 期刊, 2022.
# 示例
## 目标语言为中文 # Output
输入: The translated markdown text as plain text (not in a markdown code block, with no extraneous text).
# Example
## Target language is Chinese
Input:
hello, what's your nam*@e? hello, what's your nam*@e?
![photo title](<ph-abcdde>) ![photo title](<ph-abcdde>)
The equation is E=mc 2. This is famous. The equation is E=mc 2. This is famous.
1+1=2$$ 1+1=2$$
(c_0,c_1_1,c_2^2)is a coordinate. (c_0,c_1_1,c_2^2)is a coordinate.
输出:
Output:
你好,你叫什么名字? 你好,你叫什么名字?
![图像标题](<ph-abcdde>) ![图像标题](<ph-abcdde>)
这个方程是 $E=mc^2$。这很有名。 这个方程是 $E=mc^2$。这很有名。

View File

@@ -19,26 +19,26 @@ class SegmentsTranslateAgent(Agent):
def __init__(self, config: SegmentsTranslateAgentConfig): def __init__(self, config: SegmentsTranslateAgentConfig):
super().__init__(config) super().__init__(config)
self.system_prompt = f""" self.system_prompt = f"""
# 角色 Role
你是一个专业的机器翻译引擎 You are a professional machine translation engine.
# 工作 Task
你接收一个待翻译片段的序列以json格式表示。其中键是待片段的编号值是待翻译片段。 You will receive a sequence of segments to be translated, represented in JSON format. The keys are the segment IDs, and the values are the segments for translation.
你需要将待翻译片段翻译成目标语言。 You need to translate these segments into the target language.
目标语言:{config.to_lang} Target language: {config.to_lang}
# 要求 Requirements
翻译要求专业准确 The translation must be professional and accurate.
不输出任何解释和注释 Do not output any explanations or annotations.
翻译后的片段应该与源格式尽量相同 The format of the translated segments should be as close as possible to the source format.
如果待翻译片段已经是目标语言,则保持原样 If a segment is already in the target language, keep it as is.
# 输出 Output
翻译后的片段序列以json文本表示注意不是代码块。其中键是片段编号值是翻译后的片段。 The translated sequence of segments, represented as JSON text (note: not a code block). The keys are the segment IDs, and the values are the translated segments.
返回的json文本必须能被json.loads转换为形如{{"片段编号":"译文"}}的字典。 The returned JSON text must be parsable by json.loads into a dictionary of the form {r'{"segment_id": "translation"}'}.
# 示例 Example
## 输入 Input
{r'{"0":"hello","1":"apple","2":true,"3":"false"}'} {r'{"0":"hello","1":"apple","2":true,"3":"false"}'}
## 输出 Output
{r'{"0":"你好","1":"苹果","2":true,"3":"错误"}'} {r'{"0":"你好","1":"苹果","2":true,"3":"错误"}'}
警告绝不要将整个JSON对象用引号包裹成一个字符串。 Warning: Never wrap the entire JSON object in quotes to make it a single string. Never wrap the JSON text in ```.
""" """
if config.custom_prompt: if config.custom_prompt:
self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n' self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n'

View File

@@ -13,17 +13,20 @@ class TXTTranslateAgent(Agent):
def __init__(self, config: TXTTranslateAgentConfig): def __init__(self, config: TXTTranslateAgentConfig):
super().__init__(config) super().__init__(config)
self.system_prompt = f""" self.system_prompt = f"""
# 角色 # Role
你是一个专业的机器翻译引擎 You are a professional machine translation engine.
# 工作
翻译输入的txt文本 # Task
目标语言{config.to_lang} Translate the input txt text.
# 要求 Target language: {config.to_lang}
翻译要求专业准确
不输出任何解释和注释 # Requirements
不能改变形如<ph-xxxxxx>的占位符 - The translation must be professional and accurate.
# 输出 - Do not output any explanations or annotations.
翻译后的txt译文纯文本 - Do not change placeholders in the format of `<ph-xxxxxx>`.
# Output
The translated txt text as plain text.
""" """
if config.custom_prompt: if config.custom_prompt:
self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n' self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n'

View File

@@ -2,7 +2,6 @@ import asyncio
import time import time
import zipfile import zipfile
from dataclasses import dataclass from dataclasses import dataclass
from logging import Logger
from typing import Hashable from typing import Hashable
import httpx import httpx
@@ -10,7 +9,6 @@ import httpx
from docutranslate.converter.x2md.base import X2MarkdownConverter, X2MarkdownConverterConfig from docutranslate.converter.x2md.base import X2MarkdownConverter, X2MarkdownConverterConfig
from docutranslate.ir.document import Document from docutranslate.ir.document import Document
from docutranslate.ir.markdown_document import MarkdownDocument from docutranslate.ir.markdown_document import MarkdownDocument
from docutranslate.logger import global_logger
from docutranslate.utils.markdown_utils import embed_inline_image_from_zip from docutranslate.utils.markdown_utils import embed_inline_image_from_zip
URL = 'https://mineru.net/api/v4/file-urls/batch' URL = 'https://mineru.net/api/v4/file-urls/batch'
@@ -21,7 +19,7 @@ class ConverterMineruConfig(X2MarkdownConverterConfig):
mineru_token: str mineru_token: str
formula_ocr: bool = True formula_ocr: bool = True
def gethash(self) ->Hashable: def gethash(self) -> Hashable:
return self.formula_ocr return self.formula_ocr
@@ -32,8 +30,10 @@ timeout = httpx.Timeout(
pool=1.0 # 从连接池获取连接的超时时间 pool=1.0 # 从连接池获取连接的超时时间
) )
client = httpx.Client(trust_env=False, timeout=timeout, proxy=None, verify=False) # client = httpx.Client(trust_env=False, timeout=timeout, proxy=None, verify=False)
client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, verify=False) # client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, verify=False)
client = httpx.Client(timeout=timeout, verify=False)
client_async = httpx.AsyncClient(timeout=timeout, verify=False)
class ConverterMineru(X2MarkdownConverter): class ConverterMineru(X2MarkdownConverter):

View File

@@ -4,7 +4,7 @@ from io import BytesIO
import mammoth import mammoth
from docutranslate.exporter.base import ExporterConfig from docutranslate.exporter.base import ExporterConfig
from docutranslate.exporter.xlsx.base import XlsxExporter from docutranslate.exporter.docx.base import DocxExporter
from docutranslate.ir.document import Document from docutranslate.ir.document import Document
@@ -13,7 +13,7 @@ class Docx2HTMLExporterConfig(ExporterConfig):
cdn: bool = True cdn: bool = True
class Docx2HTMLExporter(XlsxExporter): class Docx2HTMLExporter(DocxExporter):
def __init__(self, config: Docx2HTMLExporterConfig = None): def __init__(self, config: Docx2HTMLExporterConfig = None):
config = config or Docx2HTMLExporterConfig() config = config or Docx2HTMLExporterConfig()
super().__init__(config=config) super().__init__(config=config)