完善thinking模式、临时同步接口
This commit is contained in:
@@ -68,7 +68,7 @@ TIMEOUT = 600
|
|||||||
|
|
||||||
class Agent:
|
class Agent:
|
||||||
_think_factory = {
|
_think_factory = {
|
||||||
"open.bigmodel.cn": ("thinking", "enable", "disabled")
|
"open.bigmodel.cn": ("thinking", {"type":"enabled"}, {"type":"disabled"})
|
||||||
}
|
}
|
||||||
|
|
||||||
def __init__(self, config: AgentConfig):
|
def __init__(self, config: AgentConfig):
|
||||||
|
|||||||
@@ -44,4 +44,3 @@ $$1+1=2$$
|
|||||||
\\((c_0,c_1,c_2^2)\\)是一个坐标。"""
|
\\((c_0,c_1,c_2^2)\\)是一个坐标。"""
|
||||||
if config.custom_prompt:
|
if config.custom_prompt:
|
||||||
self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n'
|
self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n'
|
||||||
self.system_prompt += r'\no_think'
|
|
||||||
|
|||||||
@@ -27,4 +27,3 @@ class TXTTranslateAgent(Agent):
|
|||||||
"""
|
"""
|
||||||
if config.custom_prompt:
|
if config.custom_prompt:
|
||||||
self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n'
|
self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n'
|
||||||
self.system_prompt += r'\no_think'
|
|
||||||
|
|||||||
@@ -23,6 +23,7 @@ from pydantic import BaseModel, Field, field_validator
|
|||||||
from docutranslate import __version__
|
from docutranslate import __version__
|
||||||
from docutranslate.agents.agent import ThinkingMode
|
from docutranslate.agents.agent import ThinkingMode
|
||||||
from docutranslate.cacher import md_based_convert_cacher
|
from docutranslate.cacher import md_based_convert_cacher
|
||||||
|
from docutranslate.exporter.md.types import ConvertEngineType
|
||||||
# --- 核心代码 Imports ---
|
# --- 核心代码 Imports ---
|
||||||
from docutranslate.global_values.conditional_import import DOCLING_EXIST
|
from docutranslate.global_values.conditional_import import DOCLING_EXIST
|
||||||
from docutranslate.workflow.base import Workflow
|
from docutranslate.workflow.base import Workflow
|
||||||
@@ -70,23 +71,6 @@ def _create_default_task_state() -> Dict[str, Any]:
|
|||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
# --- [KEPT FOR TEMP ENDPOINT] 旧的 Workflow 工厂函数 (仅为临时接口保留) ---
|
|
||||||
def _get_workflow_for_file(filename: str, logger: logging.Logger) -> Workflow:
|
|
||||||
"""根据文件名后缀选择并返回合适的 Workflow 实例。这是扩展点。"""
|
|
||||||
suffix = Path(filename).suffix.lower()
|
|
||||||
if suffix == '.txt':
|
|
||||||
logger.info("检测到 .txt 文件,使用 TXTWorkflow。")
|
|
||||||
# 为临时接口创建虚拟config
|
|
||||||
return TXTWorkflow(config=TXTWorkflowConfig(translator_config=None, html_exporter_config=None, logger=logger))
|
|
||||||
else:
|
|
||||||
# 默认为基于 Markdown 的流程(处理 .pdf, .docx, .md 等)
|
|
||||||
logger.info(f"检测到 {suffix} 文件,使用 MarkdownBasedWorkflow。")
|
|
||||||
# 为临时接口创建虚拟config
|
|
||||||
return MarkdownBasedWorkflow(config=MarkdownBasedWorkflowConfig(
|
|
||||||
convert_engine=None, converter_config=None, translator_config=None, html_exporter_config=None, logger=logger
|
|
||||||
))
|
|
||||||
|
|
||||||
|
|
||||||
# --- 日志处理器 (保持不变) ---
|
# --- 日志处理器 (保持不变) ---
|
||||||
class QueueAndHistoryHandler(logging.Handler):
|
class QueueAndHistoryHandler(logging.Handler):
|
||||||
def __init__(self, queue_ref: asyncio.Queue, history_list_ref: List[str], max_history_items: int, task_id: str):
|
def __init__(self, queue_ref: asyncio.Queue, history_list_ref: List[str], max_history_items: int, task_id: str):
|
||||||
@@ -192,6 +176,8 @@ class BaseWorkflowParams(BaseModel):
|
|||||||
chunk_size: int = Field(default=default_params["chunk_size"], description="文本分割的块大小(字符)。")
|
chunk_size: int = Field(default=default_params["chunk_size"], description="文本分割的块大小(字符)。")
|
||||||
concurrent: int = Field(default=default_params["concurrent"], description="并发请求数。")
|
concurrent: int = Field(default=default_params["concurrent"], description="并发请求数。")
|
||||||
temperature: float = Field(default=default_params["temperature"], description="LLM温度参数。")
|
temperature: float = Field(default=default_params["temperature"], description="LLM温度参数。")
|
||||||
|
thinking: ThinkingMode = Field(default_params["thinking"], description="是否启用深度思考",
|
||||||
|
examples=["default", "enable", "disable"]),
|
||||||
custom_prompt: Optional[str] = Field(None, description="用户自定义的翻译Prompt。", alias="custom_prompt")
|
custom_prompt: Optional[str] = Field(None, description="用户自定义的翻译Prompt。", alias="custom_prompt")
|
||||||
|
|
||||||
|
|
||||||
@@ -200,10 +186,10 @@ class MarkdownWorkflowParams(BaseWorkflowParams):
|
|||||||
workflow_type: Literal['markdown_based'] = Field(..., description="指定使用基于Markdown的翻译工作流。")
|
workflow_type: Literal['markdown_based'] = Field(..., description="指定使用基于Markdown的翻译工作流。")
|
||||||
|
|
||||||
# --- Markdown-specific Converter Params ---
|
# --- Markdown-specific Converter Params ---
|
||||||
convert_engine: Optional[Literal["mineru", "docling"]] = Field(
|
convert_engine: ConvertEngineType = Field(
|
||||||
None,
|
"identity",
|
||||||
description="文档解析引擎。`mineru`在线服务, `docling`本地引擎。如果输入文件是.md,此项可为`null`或不传。",
|
description="文档解析引擎。`identity`处理markdown文件,`mineru`在线服务, `docling`本地引擎。如果输入文件是.md,此项可为`null`或不传。",
|
||||||
examples=["mineru", "docling"]
|
examples=["identity", "mineru", "docling"]
|
||||||
)
|
)
|
||||||
mineru_token: Optional[str] = Field(None, description="当 `convert_engine` 为 'mineru' 时必填的API令牌。")
|
mineru_token: Optional[str] = Field(None, description="当 `convert_engine` 为 'mineru' 时必填的API令牌。")
|
||||||
formula_ocr: bool = Field(True, description="是否对公式进行OCR识别。对 `mineru` 和 `docling` 均有效。")
|
formula_ocr: bool = Field(True, description="是否对公式进行OCR识别。对 `mineru` 和 `docling` 均有效。")
|
||||||
@@ -251,6 +237,7 @@ class TranslateServiceRequest(BaseModel):
|
|||||||
"chunk_size": 3000,
|
"chunk_size": 3000,
|
||||||
"concurrent": 10,
|
"concurrent": 10,
|
||||||
"temperature": 0.1,
|
"temperature": 0.1,
|
||||||
|
"thinking": "enable",
|
||||||
"custom_prompt": "将所有技术术语翻译为业界公认的中文对应词汇。"
|
"custom_prompt": "将所有技术术语翻译为业界公认的中文对应词汇。"
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
@@ -294,7 +281,7 @@ async def _perform_translation(
|
|||||||
translator_config = MDTranslatorConfig(
|
translator_config = MDTranslatorConfig(
|
||||||
**payload.model_dump(include={
|
**payload.model_dump(include={
|
||||||
'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt',
|
'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt',
|
||||||
'temperature', 'timeout', 'chunk_size', 'concurrent'
|
'temperature', 'thinking', 'timeout', 'chunk_size', 'concurrent'
|
||||||
}, exclude_none=True)
|
}, exclude_none=True)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -328,7 +315,7 @@ async def _perform_translation(
|
|||||||
translator_config = TXTTranslatorConfig(
|
translator_config = TXTTranslatorConfig(
|
||||||
**payload.model_dump(include={
|
**payload.model_dump(include={
|
||||||
'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt',
|
'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt',
|
||||||
'temperature', 'timeout', 'chunk_size', 'concurrent'
|
'temperature', 'thinking', 'timeout', 'chunk_size', 'concurrent'
|
||||||
}, exclude_none=True)
|
}, exclude_none=True)
|
||||||
)
|
)
|
||||||
|
|
||||||
@@ -1076,62 +1063,41 @@ async def temp_translate(
|
|||||||
model_id: str = Body(..., description="使用的模型ID。", examples=["gpt-4-turbo"]),
|
model_id: str = Body(..., description="使用的模型ID。", examples=["gpt-4-turbo"]),
|
||||||
mineru_token: Optional[str] = Body(None, description="Mineru引擎的Token。"),
|
mineru_token: Optional[str] = Body(None, description="Mineru引擎的Token。"),
|
||||||
file_name: str = Body(...,
|
file_name: str = Body(...,
|
||||||
description="文件名,用以判断文件类型。当后缀为txt时该接口返回普通文本,为其他后缀时返回翻译后的markdown文本",
|
description="文件名,用以判断文件类型。",
|
||||||
examples=["test.txt", "test.md", "test.pdf"]),
|
examples=["test.txt", "test.md", "test.pdf"]),
|
||||||
file_content: str = Body(..., description="文件内容,可以是纯文本或Base64编码的字符串。"),
|
file_content: str = Body(..., description="文件内容,可以是纯文本或Base64编码的字符串。"),
|
||||||
to_lang: str = Body("中文", description="目标语言。", examples=["中文", "英文", "English"]),
|
to_lang: str = Body("中文", description="目标语言。", examples=["中文", "英文", "English"]),
|
||||||
concurrent: int = Body(default_params["concurrent"], description="ai翻译请求并发数"),
|
concurrent: int = Body(default_params["concurrent"], description="ai翻译请求并发数"),
|
||||||
temperature: float = Body(default_params["temperature"], description="ai翻译请求温度"),
|
temperature: float = Body(default_params["temperature"], description="ai翻译请求温度"),
|
||||||
thinking: ThinkingMode = Body(default_params["thinking"], description="是否启用深度思考", examples=["default", "enable", "disable"]),
|
thinking: ThinkingMode = Body(default_params["thinking"], description="是否启用深度思考",
|
||||||
|
examples=["default", "enable", "disable"]),
|
||||||
chunk_size: int = Body(default_params["chunk_size"], description="文本分块大小(bytes)"),
|
chunk_size: int = Body(default_params["chunk_size"], description="文本分块大小(bytes)"),
|
||||||
custom_prompt: Optional[str] = Body(None, description="翻译自定义提示词",
|
custom_prompt: Optional[str] = Body(None, description="翻译自定义提示词",
|
||||||
examples=["人名保持原文不翻译"]),
|
examples=["人名保持原文不翻译"]),
|
||||||
):
|
):
|
||||||
"""一个用于快速测试的同步翻译接口。"""
|
"""一个用于快速测试的同步翻译接口。"""
|
||||||
|
file_name = Path(file_name)
|
||||||
try:
|
try:
|
||||||
decoded_content = base64.b64decode(file_content)
|
decoded_content = base64.b64decode(file_content)
|
||||||
except (ValueError, binascii.Error):
|
except (ValueError, binascii.Error):
|
||||||
decoded_content = file_content.encode('utf-8')
|
decoded_content = file_content.encode('utf-8')
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# [MODIFIED] 使用旧的辅助函数,仅为这个临时接口服务
|
workflow_config = MarkdownBasedWorkflowConfig(convert_engine="mineru",
|
||||||
workflow = _get_workflow_for_file(file_name, global_logger)
|
converter_config=ConverterMineruConfig(mineru_token=mineru_token),
|
||||||
workflow.read_bytes(decoded_content, Path(file_name).stem, Path(file_name).suffix)
|
translator_config=MDTranslatorConfig(base_url=base_url,
|
||||||
|
api_key=api_key,
|
||||||
# Manually set up configs for the workflow instance for this temp endpoint
|
model_id=model_id,
|
||||||
if isinstance(workflow, MarkdownBasedWorkflow):
|
to_lang=to_lang,
|
||||||
translator_config = MDTranslatorConfig(
|
custom_prompt=custom_prompt,
|
||||||
base_url=base_url, api_key=api_key, model_id=model_id, to_lang=to_lang,
|
temperature=temperature,
|
||||||
custom_prompt=custom_prompt, temperature=temperature,thinking=thinking,
|
thinking=thinking,
|
||||||
chunk_size=chunk_size, concurrent=concurrent, logger=global_logger, timeout=2000
|
chunk_size=chunk_size,
|
||||||
)
|
concurrent=concurrent),
|
||||||
convert_config = ConverterMineruConfig(mineru_token=mineru_token,
|
html_exporter_config=MD2HTMLExporterConfig())
|
||||||
formula_ocr=True) if mineru_token else None
|
workflow = MarkdownBasedWorkflow(workflow_config)
|
||||||
|
workflow.read_bytes(content=decoded_content, stem=file_name.stem, suffix=file_name.stem)
|
||||||
# Update workflow's internal config for translate() to work
|
return {"success": True, "content": workflow.export_to_markdown()}
|
||||||
workflow.config.translator_config = translator_config
|
|
||||||
workflow.config.converter_config = convert_config
|
|
||||||
workflow.config.convert_engine = 'mineru' if mineru_token and convert_config else 'identity'
|
|
||||||
|
|
||||||
await workflow.translate_async()
|
|
||||||
return {"success": True, "content": workflow.export_to_markdown()}
|
|
||||||
|
|
||||||
elif isinstance(workflow, TXTWorkflow):
|
|
||||||
translator_config = TXTTranslatorConfig(
|
|
||||||
base_url=base_url, api_key=api_key, model_id=model_id, to_lang=to_lang,
|
|
||||||
custom_prompt=custom_prompt, temperature=temperature,thinking=thinking,
|
|
||||||
chunk_size=chunk_size, concurrent=concurrent, logger=global_logger, timeout=2000
|
|
||||||
)
|
|
||||||
|
|
||||||
# Update workflow's internal config
|
|
||||||
workflow.config.translator_config = translator_config
|
|
||||||
|
|
||||||
await workflow.translate_async()
|
|
||||||
return {"success": True, "content": workflow.export_to_txt()}
|
|
||||||
|
|
||||||
else:
|
|
||||||
raise NotImplementedError(f"Temp endpoint does not support workflow type {type(workflow).__name__}")
|
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
global_logger.error(f"临时翻译接口出现错误:{e.__repr__()}", exc_info=True)
|
global_logger.error(f"临时翻译接口出现错误:{e.__repr__()}", exc_info=True)
|
||||||
return {"success": False, "reason": e.__repr__()}
|
return {"success": False, "reason": e.__repr__()}
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -18,15 +18,16 @@ class TXTTranslator(Translator):
|
|||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
agent_config = TXTTranslateAgentConfig(custom_prompt=config.custom_prompt,
|
agent_config = TXTTranslateAgentConfig(custom_prompt=config.custom_prompt,
|
||||||
to_lang=config.to_lang,
|
to_lang=config.to_lang,
|
||||||
baseurl=config.base_url,
|
baseurl=config.base_url,
|
||||||
key=config.api_key,
|
key=config.api_key,
|
||||||
model_id=config.model_id,
|
model_id=config.model_id,
|
||||||
system_prompt=None,
|
system_prompt=None,
|
||||||
temperature=config.temperature,
|
temperature=config.temperature,
|
||||||
max_concurrent=config.concurrent,
|
thinking=config.thinking,
|
||||||
timeout=config.timeout,
|
max_concurrent=config.concurrent,
|
||||||
logger=self.logger)
|
timeout=config.timeout,
|
||||||
|
logger=self.logger)
|
||||||
self.translate_agent = TXTTranslateAgent(agent_config)
|
self.translate_agent = TXTTranslateAgent(agent_config)
|
||||||
|
|
||||||
def translate(self, document: Document) -> Self:
|
def translate(self, document: Document) -> Self:
|
||||||
|
|||||||
Reference in New Issue
Block a user