提示词改为英文,网络请求现在经过代理

This commit is contained in:
xunbu
2025-08-18 15:20:56 +08:00
parent b612c9e67e
commit 0cd049e707
6 changed files with 69 additions and 59 deletions

View File

@@ -86,8 +86,10 @@ class Agent:
self.model_id = config.model_id.strip()
self.system_prompt = config.system_prompt or ""
self.temperature = config.temperature
self.client = httpx.Client(trust_env=False, proxy=None, verify=False)
self.client_async = httpx.AsyncClient(trust_env=False, proxy=None, verify=False)
# self.client = httpx.Client(trust_env=False, proxy=None, verify=False)
# self.client_async = httpx.AsyncClient(trust_env=False, proxy=None, verify=False)
self.client = httpx.Client(verify=False)
self.client_async = httpx.AsyncClient(verify=False)
self.max_concurrent = config.max_concurrent
self.timeout = config.timeout
self.thinking = config.thinking

View File

@@ -11,32 +11,37 @@ class MDTranslateAgent(Agent):
def __init__(self,config:MDTranslateAgentConfig):
super().__init__(config)
self.system_prompt = f"""
# 角色
你是一个专业的机器翻译引擎
# 工作
翻译输入的markdown文本
目标语言{config.to_lang}
# 要求
翻译要求专业准确
不输出任何解释和注释
不能改变形如<ph-xxxxxx>的占位符
code、latex和HTML只翻译说明文字其余保持原文
所有公式无论长短必须表示为能被解析的合法latex公式公式需被$或\\(\\)或$$正确包裹,如不正确则进行修正
去除、修正明显异常的字符、但不能改变原意
引用参考文献时请严格保持原文,不要翻译。参考文献格式示例如下:
[1] Author A, Author B. "Original Title". Journal, 2023.
[2] 作者C. 《中文标题》. 期刊, 2022.
# 输出
翻译后的markdown译文纯文本不是markdown代码块无任何多余文字
# 示例
## 目标语言为中文
输入:
# Role
You are a professional machine translation engine.
# Task
Translate the input markdown text.
Target language: {config.to_lang}
# Requirements
- The translation must be professional and accurate.
- Do not output any explanations or annotations.
- Do not change placeholders in the format of `<ph-xxxxxx>`.
- For `code`, `LaTeX`, and `HTML`, only translate the descriptive text (e.g., comments, captions); keep the rest of the content in its original form.
- All formulas, regardless of length, must be represented as valid, parsable LaTeX. They must be correctly enclosed by `$`, `\\(\\)`, or `$$`. If a formula is not formatted correctly, you must fix it.
- Remove or correct any obviously abnormal characters, but without altering the original meaning.
- When citing references, strictly preserve the original text; do not translate them. Examples of reference formats are as follows:
[1] Author A, Author B. "Original Title". Journal, 2023.
[2] 作者C. 《中文标题》. 期刊, 2022.
# Output
The translated markdown text as plain text (not in a markdown code block, with no extraneous text).
# Example
## Target language is Chinese
Input:
hello, what's your nam*@e?
![photo title](<ph-abcdde>)
The equation is E=mc 2. This is famous.
1+1=2$$
(c_0,c_1_1,c_2^2)is a coordinate.
输出:
Output:
你好,你叫什么名字?
![图像标题](<ph-abcdde>)
这个方程是 $E=mc^2$。这很有名。

View File

@@ -19,26 +19,26 @@ class SegmentsTranslateAgent(Agent):
def __init__(self, config: SegmentsTranslateAgentConfig):
super().__init__(config)
self.system_prompt = f"""
# 角色
你是一个专业的机器翻译引擎
# 工作
你接收一个待翻译片段的序列以json格式表示。其中键是待片段的编号值是待翻译片段。
你需要将待翻译片段翻译成目标语言。
目标语言:{config.to_lang}
# 要求
翻译要求专业准确
不输出任何解释和注释
翻译后的片段应该与源格式尽量相同
如果待翻译片段已经是目标语言,则保持原样
# 输出
翻译后的片段序列以json文本表示注意不是代码块。其中键是片段编号值是翻译后的片段。
返回的json文本必须能被json.loads转换为形如{{"片段编号":"译文"}}的字典。
# 示例
## 输入
Role
You are a professional machine translation engine.
Task
You will receive a sequence of segments to be translated, represented in JSON format. The keys are the segment IDs, and the values are the segments for translation.
You need to translate these segments into the target language.
Target language: {config.to_lang}
Requirements
The translation must be professional and accurate.
Do not output any explanations or annotations.
The format of the translated segments should be as close as possible to the source format.
If a segment is already in the target language, keep it as is.
Output
The translated sequence of segments, represented as JSON text (note: not a code block). The keys are the segment IDs, and the values are the translated segments.
The returned JSON text must be parsable by json.loads into a dictionary of the form {r'{"segment_id": "translation"}'}.
Example
Input
{r'{"0":"hello","1":"apple","2":true,"3":"false"}'}
## 输出
Output
{r'{"0":"你好","1":"苹果","2":true,"3":"错误"}'}
警告绝不要将整个JSON对象用引号包裹成一个字符串。
Warning: Never wrap the entire JSON object in quotes to make it a single string. Never wrap the JSON text in ```.
"""
if config.custom_prompt:
self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n'

View File

@@ -13,17 +13,20 @@ class TXTTranslateAgent(Agent):
def __init__(self, config: TXTTranslateAgentConfig):
super().__init__(config)
self.system_prompt = f"""
# 角色
你是一个专业的机器翻译引擎
# 工作
翻译输入的txt文本
目标语言{config.to_lang}
# 要求
翻译要求专业准确
不输出任何解释和注释
不能改变形如<ph-xxxxxx>的占位符
# 输出
翻译后的txt译文纯文本
# Role
You are a professional machine translation engine.
# Task
Translate the input txt text.
Target language: {config.to_lang}
# Requirements
- The translation must be professional and accurate.
- Do not output any explanations or annotations.
- Do not change placeholders in the format of `<ph-xxxxxx>`.
# Output
The translated txt text as plain text.
"""
if config.custom_prompt:
self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n'

View File

@@ -2,7 +2,6 @@ import asyncio
import time
import zipfile
from dataclasses import dataclass
from logging import Logger
from typing import Hashable
import httpx
@@ -10,7 +9,6 @@ import httpx
from docutranslate.converter.x2md.base import X2MarkdownConverter, X2MarkdownConverterConfig
from docutranslate.ir.document import Document
from docutranslate.ir.markdown_document import MarkdownDocument
from docutranslate.logger import global_logger
from docutranslate.utils.markdown_utils import embed_inline_image_from_zip
URL = 'https://mineru.net/api/v4/file-urls/batch'
@@ -21,7 +19,7 @@ class ConverterMineruConfig(X2MarkdownConverterConfig):
mineru_token: str
formula_ocr: bool = True
def gethash(self) ->Hashable:
def gethash(self) -> Hashable:
return self.formula_ocr
@@ -32,8 +30,10 @@ timeout = httpx.Timeout(
pool=1.0 # 从连接池获取连接的超时时间
)
client = httpx.Client(trust_env=False, timeout=timeout, proxy=None, verify=False)
client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, verify=False)
# client = httpx.Client(trust_env=False, timeout=timeout, proxy=None, verify=False)
# client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, verify=False)
client = httpx.Client(timeout=timeout, verify=False)
client_async = httpx.AsyncClient(timeout=timeout, verify=False)
class ConverterMineru(X2MarkdownConverter):

View File

@@ -4,7 +4,7 @@ from io import BytesIO
import mammoth
from docutranslate.exporter.base import ExporterConfig
from docutranslate.exporter.xlsx.base import XlsxExporter
from docutranslate.exporter.docx.base import DocxExporter
from docutranslate.ir.document import Document
@@ -13,7 +13,7 @@ class Docx2HTMLExporterConfig(ExporterConfig):
cdn: bool = True
class Docx2HTMLExporter(XlsxExporter):
class Docx2HTMLExporter(DocxExporter):
def __init__(self, config: Docx2HTMLExporterConfig = None):
config = config or Docx2HTMLExporterConfig()
super().__init__(config=config)