提示词改为英文,网络请求现在经过代理
This commit is contained in:
@@ -86,8 +86,10 @@ class Agent:
|
||||
self.model_id = config.model_id.strip()
|
||||
self.system_prompt = config.system_prompt or ""
|
||||
self.temperature = config.temperature
|
||||
self.client = httpx.Client(trust_env=False, proxy=None, verify=False)
|
||||
self.client_async = httpx.AsyncClient(trust_env=False, proxy=None, verify=False)
|
||||
# self.client = httpx.Client(trust_env=False, proxy=None, verify=False)
|
||||
# self.client_async = httpx.AsyncClient(trust_env=False, proxy=None, verify=False)
|
||||
self.client = httpx.Client(verify=False)
|
||||
self.client_async = httpx.AsyncClient(verify=False)
|
||||
self.max_concurrent = config.max_concurrent
|
||||
self.timeout = config.timeout
|
||||
self.thinking = config.thinking
|
||||
|
||||
@@ -11,32 +11,37 @@ class MDTranslateAgent(Agent):
|
||||
def __init__(self,config:MDTranslateAgentConfig):
|
||||
super().__init__(config)
|
||||
self.system_prompt = f"""
|
||||
# 角色
|
||||
你是一个专业的机器翻译引擎
|
||||
# 工作
|
||||
翻译输入的markdown文本
|
||||
目标语言{config.to_lang}
|
||||
# 要求
|
||||
翻译要求专业准确
|
||||
不输出任何解释和注释
|
||||
不能改变形如<ph-xxxxxx>的占位符
|
||||
code、latex和HTML只翻译说明文字,其余保持原文
|
||||
所有公式无论长短必须表示为能被解析的合法latex公式,公式需被$或\\(\\)或$$正确包裹,如不正确则进行修正
|
||||
去除、修正明显异常的字符、但不能改变原意
|
||||
引用参考文献时请严格保持原文,不要翻译。参考文献格式示例如下:
|
||||
[1] Author A, Author B. "Original Title". Journal, 2023.
|
||||
[2] 作者C. 《中文标题》. 期刊, 2022.
|
||||
# 输出
|
||||
翻译后的markdown译文纯文本(不是markdown代码块,无任何多余文字)
|
||||
# 示例
|
||||
## 目标语言为中文
|
||||
输入:
|
||||
# Role
|
||||
You are a professional machine translation engine.
|
||||
|
||||
# Task
|
||||
Translate the input markdown text.
|
||||
Target language: {config.to_lang}
|
||||
|
||||
# Requirements
|
||||
- The translation must be professional and accurate.
|
||||
- Do not output any explanations or annotations.
|
||||
- Do not change placeholders in the format of `<ph-xxxxxx>`.
|
||||
- For `code`, `LaTeX`, and `HTML`, only translate the descriptive text (e.g., comments, captions); keep the rest of the content in its original form.
|
||||
- All formulas, regardless of length, must be represented as valid, parsable LaTeX. They must be correctly enclosed by `$`, `\\(\\)`, or `$$`. If a formula is not formatted correctly, you must fix it.
|
||||
- Remove or correct any obviously abnormal characters, but without altering the original meaning.
|
||||
- When citing references, strictly preserve the original text; do not translate them. Examples of reference formats are as follows:
|
||||
[1] Author A, Author B. "Original Title". Journal, 2023.
|
||||
[2] 作者C. 《中文标题》. 期刊, 2022.
|
||||
|
||||
# Output
|
||||
The translated markdown text as plain text (not in a markdown code block, with no extraneous text).
|
||||
|
||||
# Example
|
||||
## Target language is Chinese
|
||||
Input:
|
||||
hello, what's your nam*@e?
|
||||

|
||||
The equation is E=mc 2. This is famous.
|
||||
1+1=2$$
|
||||
(c_0,c_1_1,c_2^2)is a coordinate.
|
||||
输出:
|
||||
|
||||
Output:
|
||||
你好,你叫什么名字?
|
||||

|
||||
这个方程是 $E=mc^2$。这很有名。
|
||||
|
||||
@@ -19,26 +19,26 @@ class SegmentsTranslateAgent(Agent):
|
||||
def __init__(self, config: SegmentsTranslateAgentConfig):
|
||||
super().__init__(config)
|
||||
self.system_prompt = f"""
|
||||
# 角色
|
||||
你是一个专业的机器翻译引擎
|
||||
# 工作
|
||||
你接收一个待翻译片段的序列,以json格式表示。其中键是待片段的编号,值是待翻译片段。
|
||||
你需要将待翻译片段翻译成目标语言。
|
||||
目标语言:{config.to_lang}
|
||||
# 要求
|
||||
翻译要求专业准确
|
||||
不输出任何解释和注释
|
||||
翻译后的片段应该与源格式尽量相同
|
||||
如果待翻译片段已经是目标语言,则保持原样
|
||||
# 输出
|
||||
翻译后的片段序列,以json文本表示(注意不是代码块)。其中键是片段编号,值是翻译后的片段。
|
||||
返回的json文本必须能被json.loads转换为形如{{"片段编号":"译文"}}的字典。
|
||||
# 示例
|
||||
## 输入
|
||||
Role
|
||||
You are a professional machine translation engine.
|
||||
Task
|
||||
You will receive a sequence of segments to be translated, represented in JSON format. The keys are the segment IDs, and the values are the segments for translation.
|
||||
You need to translate these segments into the target language.
|
||||
Target language: {config.to_lang}
|
||||
Requirements
|
||||
The translation must be professional and accurate.
|
||||
Do not output any explanations or annotations.
|
||||
The format of the translated segments should be as close as possible to the source format.
|
||||
If a segment is already in the target language, keep it as is.
|
||||
Output
|
||||
The translated sequence of segments, represented as JSON text (note: not a code block). The keys are the segment IDs, and the values are the translated segments.
|
||||
The returned JSON text must be parsable by json.loads into a dictionary of the form {r'{"segment_id": "translation"}'}.
|
||||
Example
|
||||
Input
|
||||
{r'{"0":"hello","1":"apple","2":true,"3":"false"}'}
|
||||
## 输出
|
||||
Output
|
||||
{r'{"0":"你好","1":"苹果","2":true,"3":"错误"}'}
|
||||
警告:绝不要将整个JSON对象用引号包裹成一个字符串。
|
||||
Warning: Never wrap the entire JSON object in quotes to make it a single string. Never wrap the JSON text in ```.
|
||||
"""
|
||||
if config.custom_prompt:
|
||||
self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n'
|
||||
|
||||
@@ -13,17 +13,20 @@ class TXTTranslateAgent(Agent):
|
||||
def __init__(self, config: TXTTranslateAgentConfig):
|
||||
super().__init__(config)
|
||||
self.system_prompt = f"""
|
||||
# 角色
|
||||
你是一个专业的机器翻译引擎
|
||||
# 工作
|
||||
翻译输入的txt文本
|
||||
目标语言{config.to_lang}
|
||||
# 要求
|
||||
翻译要求专业准确
|
||||
不输出任何解释和注释
|
||||
不能改变形如<ph-xxxxxx>的占位符
|
||||
# 输出
|
||||
翻译后的txt译文纯文本
|
||||
# Role
|
||||
You are a professional machine translation engine.
|
||||
|
||||
# Task
|
||||
Translate the input txt text.
|
||||
Target language: {config.to_lang}
|
||||
|
||||
# Requirements
|
||||
- The translation must be professional and accurate.
|
||||
- Do not output any explanations or annotations.
|
||||
- Do not change placeholders in the format of `<ph-xxxxxx>`.
|
||||
|
||||
# Output
|
||||
The translated txt text as plain text.
|
||||
"""
|
||||
if config.custom_prompt:
|
||||
self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n'
|
||||
|
||||
@@ -2,7 +2,6 @@ import asyncio
|
||||
import time
|
||||
import zipfile
|
||||
from dataclasses import dataclass
|
||||
from logging import Logger
|
||||
from typing import Hashable
|
||||
|
||||
import httpx
|
||||
@@ -10,7 +9,6 @@ import httpx
|
||||
from docutranslate.converter.x2md.base import X2MarkdownConverter, X2MarkdownConverterConfig
|
||||
from docutranslate.ir.document import Document
|
||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||
from docutranslate.logger import global_logger
|
||||
from docutranslate.utils.markdown_utils import embed_inline_image_from_zip
|
||||
|
||||
URL = 'https://mineru.net/api/v4/file-urls/batch'
|
||||
@@ -21,7 +19,7 @@ class ConverterMineruConfig(X2MarkdownConverterConfig):
|
||||
mineru_token: str
|
||||
formula_ocr: bool = True
|
||||
|
||||
def gethash(self) ->Hashable:
|
||||
def gethash(self) -> Hashable:
|
||||
return self.formula_ocr
|
||||
|
||||
|
||||
@@ -32,8 +30,10 @@ timeout = httpx.Timeout(
|
||||
pool=1.0 # 从连接池获取连接的超时时间
|
||||
)
|
||||
|
||||
client = httpx.Client(trust_env=False, timeout=timeout, proxy=None, verify=False)
|
||||
client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, verify=False)
|
||||
# client = httpx.Client(trust_env=False, timeout=timeout, proxy=None, verify=False)
|
||||
# client_async = httpx.AsyncClient(trust_env=False, timeout=timeout, proxy=None, verify=False)
|
||||
client = httpx.Client(timeout=timeout, verify=False)
|
||||
client_async = httpx.AsyncClient(timeout=timeout, verify=False)
|
||||
|
||||
|
||||
class ConverterMineru(X2MarkdownConverter):
|
||||
|
||||
@@ -4,7 +4,7 @@ from io import BytesIO
|
||||
import mammoth
|
||||
|
||||
from docutranslate.exporter.base import ExporterConfig
|
||||
from docutranslate.exporter.xlsx.base import XlsxExporter
|
||||
from docutranslate.exporter.docx.base import DocxExporter
|
||||
from docutranslate.ir.document import Document
|
||||
|
||||
|
||||
@@ -13,7 +13,7 @@ class Docx2HTMLExporterConfig(ExporterConfig):
|
||||
cdn: bool = True
|
||||
|
||||
|
||||
class Docx2HTMLExporter(XlsxExporter):
|
||||
class Docx2HTMLExporter(DocxExporter):
|
||||
def __init__(self, config: Docx2HTMLExporterConfig = None):
|
||||
config = config or Docx2HTMLExporterConfig()
|
||||
super().__init__(config=config)
|
||||
|
||||
Reference in New Issue
Block a user