diff --git a/docutranslate/agents/agent.py b/docutranslate/agents/agent.py index 9921b6f..65f90f9 100644 --- a/docutranslate/agents/agent.py +++ b/docutranslate/agents/agent.py @@ -26,7 +26,6 @@ class AgentConfig: baseurl: str key: str model_id: str - system_prompt: str | None temperature: float = 0.7 max_concurrent: int = 30 timeout: int = 2000 @@ -68,8 +67,8 @@ class PromptsCounter: PreSendHandlerType = Callable[[str, str], tuple[str, str]] -ResultHandlerType = Callable[[str, str, logging.Logger], str] -ErrorResultHandlerType = Callable[[str, logging.Logger], str] +ResultHandlerType = Callable[[str, str, logging.Logger], Any] +ErrorResultHandlerType = Callable[[str, logging.Logger], Any] class Agent: @@ -90,7 +89,7 @@ class Agent: self.domain = urlparse(self.baseurl).netloc self.key = config.key.strip() or "xx" self.model_id = config.model_id.strip() - self.system_prompt = config.system_prompt or "" + self.system_prompt = "" self.temperature = config.temperature self.max_concurrent = config.max_concurrent self.timeout = config.timeout @@ -158,6 +157,8 @@ class Agent: self.logger.warning(f"AI请求连接错误 (async): {repr(e)}") except (KeyError, IndexError) as e: raise Exception(f"AI响应格式错误 (async): {repr(e)}") + except ValueError as e: + self.logger.warning(f"{e.__repr__()}") # 如果没有正常获取结果则重试 if retry and retry_count < MAX_RETRY_COUNT: if self.total_error_counter.add(): @@ -181,7 +182,7 @@ class Agent: ) -> list[Any]: max_concurrent = self.max_concurrent if max_concurrent is None else max_concurrent total = len(prompts) - self.logger.info(f"base-url:{self.baseurl},model-id:{self.model_id}") + self.logger.info(f"base-url:{self.baseurl},model-id:{self.model_id},concurrent:{self.max_concurrent},temperature:{self.temperature}") self.logger.info(f"预计发送{total}个请求,并发请求数:{max_concurrent}") self.total_error_counter.max_errors_count = len(prompts) // MAX_REQUESTS_PER_ERROR # 允许多少个异常 count = 0 @@ -258,7 +259,8 @@ class Agent: pre_send_handler, result_handler, error_result_handler) -> Any: - result = self.send(client, prompt, system_prompt, pre_send_handler=pre_send_handler,result_handler=result_handler, + result = self.send(client, prompt, system_prompt, pre_send_handler=pre_send_handler, + result_handler=result_handler, error_result_handler=error_result_handler) count.add() return result @@ -267,11 +269,11 @@ class Agent: self, prompts: list[str], system_prompt: str | None = None, - pre_send_handler:PreSendHandlerType=None, + pre_send_handler: PreSendHandlerType = None, result_handler: ResultHandlerType = None, error_result_handler: ErrorResultHandlerType = None ) -> list[Any]: - self.logger.info(f"base-url:{self.baseurl},model-id:{self.model_id}") + self.logger.info(f"base-url:{self.baseurl},model-id:{self.model_id},concurrent:{self.max_concurrent},temperature:{self.temperature}") self.logger.info(f"预计发送{len(prompts)}个请求,并发请求数:{self.max_concurrent}") self.total_error_counter.max_errors_count = len(prompts) // MAX_REQUESTS_PER_ERROR # 允许多少个异常 # 创建单个计数器实例 @@ -280,7 +282,7 @@ class Agent: # 使用 itertools.repeat 将同一个实例传递给每个 map 调用 system_prompts = itertools.repeat(system_prompt, len(prompts)) counters = itertools.repeat(counter, len(prompts)) - pre_send_handlers=itertools.repeat(pre_send_handler,len(prompts)) + pre_send_handlers = itertools.repeat(pre_send_handler, len(prompts)) result_handlers = itertools.repeat(result_handler, len(prompts)) error_result_handlers = itertools.repeat(error_result_handler, len(prompts)) output_list = [] diff --git a/docutranslate/agents/glossary_agent.py b/docutranslate/agents/glossary_agent.py index 321e2d0..d521e49 100644 --- a/docutranslate/agents/glossary_agent.py +++ b/docutranslate/agents/glossary_agent.py @@ -46,6 +46,8 @@ You are a professional machine translation engine. """ def _result_handler(self, result: str, origin_prompt: str, logger: Logger): + if result == "": + return [] try: result = json_repair.loads(result) if not isinstance(result, list): @@ -56,6 +58,8 @@ You are a professional machine translation engine. return result def _error_result_handler(self, origin_prompt: str, logger: Logger): + if origin_prompt == "": + return [] try: return json_repair.loads(origin_prompt) except: @@ -79,11 +83,11 @@ You are a professional machine translation engine. self.logger.info(f"json解析错误,解析文本:{chunk},错误:{e.__repr__()}") except Exception as e: self.logger.info(f"send_segments发生错误:{e.__repr__()}") - + self.logger.info("术语表提取完成") return result async def send_segments_async(self, segments: list[str], chunk_size: int): - self.logger.info("开始提取术语表") + self.logger.info("开始术语表提取") result = {} indexed_originals, chunks, merged_indices_list = await asyncio.to_thread(segments2json_chunks, segments, chunk_size) @@ -101,4 +105,5 @@ You are a professional machine translation engine. except Exception as e: self.logger.info(f"send_segments发生错误:{e.__repr__()}") print(f"术语表:\n{result}") + self.logger.info("术语表提取完成") return result diff --git a/docutranslate/agents/segments_agent.py b/docutranslate/agents/segments_agent.py index 3ad034a..e8f70de 100644 --- a/docutranslate/agents/segments_agent.py +++ b/docutranslate/agents/segments_agent.py @@ -57,16 +57,19 @@ Warning: Never wrap the entire JSON object in quotes to make it a single string. return system_prompt, prompt def _result_handler(self, result: str, origin_prompt: str, logger: Logger): + if result == "": + return {} try: result = json_repair.loads(result) if not isinstance(result, dict): - raise ValueError("agent返回结果不是dict的json形式") - except: - logger.error("结果不能正确解析") - return self._error_result_handler(origin_prompt, logger) + raise ValueError(f"agent返回结果不是dict的json形式,result:{result}") + except RuntimeError as e: + raise ValueError(f"结果不能正确解析:{e.__repr__()}") return result def _error_result_handler(self, origin_prompt: str, logger: Logger): + if origin_prompt == "": + return {} try: return json_repair.loads(origin_prompt) except: diff --git a/docutranslate/app.py b/docutranslate/app.py index e831669..ead3427 100644 --- a/docutranslate/app.py +++ b/docutranslate/app.py @@ -22,6 +22,7 @@ from pydantic import BaseModel, Field, field_validator from docutranslate import __version__ from docutranslate.agents.agent import ThinkingMode +from docutranslate.agents.glossary_agent import GlossaryAgentConfig from docutranslate.exporter.md.types import ConvertEngineType # --- 核心代码 Imports --- from docutranslate.global_values.conditional_import import DOCLING_EXIST @@ -220,6 +221,16 @@ app.mount("/static", StaticFiles(directory=STATIC_DIR), name="static") # --- Pydantic Models for Service API --- # =================================================================== +class GlossaryAgentConfigPayload(BaseModel): + baseurl: str = Field(..., description="用于术语表生成的Agent的LLM API基础URL。", examples=["https://api.openai.com/v1"]) + key: str = Field(..., description="用于术语表生成的Agent的LLM API密钥。", examples=["sk-agent-api-key"]) + model_id: str = Field(..., description="用于术语表生成的Agent的模型ID。", examples=["gpt-4-turbo"]) + temperature: float = Field(default=0.7, description="用于术语表生成的Agent的温度参数。") + max_concurrent: int = Field(default=30, description="Agent的最大并发请求数。") + timeout: int = Field(default=2000, description="Agent的API调用超时时间。") + thinking: ThinkingMode = Field(default="default", description="Agent的思考模式。") + + # 1. 定义所有工作流共享的基础参数 class BaseWorkflowParams(BaseModel): base_url: str = Field(..., description="LLM API的基础URL。", examples=["https://api.openai.com/v1"]) @@ -233,6 +244,15 @@ class BaseWorkflowParams(BaseModel): examples=["default", "enable", "disable"]) custom_prompt: Optional[str] = Field(None, description="用户自定义的翻译Prompt。", alias="custom_prompt") glossary_dict: Optional[Dict[str, str]] = Field(None, description="术语表字典,key为原文,value为译文。") + glossary_generate_enable: bool = Field(default=False, description="是否开启术语表自动生成。") + glossary_agent_config: Optional[GlossaryAgentConfigPayload] = Field(None, + description="用于术语表生成的Agent的配置。如果 `glossary_generate_enable` 为 `True`,此项必填。") + + @field_validator('glossary_agent_config') + def check_glossary_config(cls, v, values): + if values.data.get('glossary_generate_enable') and not v: + raise ValueError("当 `glossary_generate_enable` 为 `True` 时, `glossary_agent_config` 字段是必须的。") + return v # 2. 为每个工作流创建独立的参数模型 @@ -410,6 +430,27 @@ class TranslateServiceRequest(BaseModel): } } }, + { + "summary": "XLSX 带术语表生成", + "value": { + "file_name": "complex_terms.xlsx", + "file_content": "UEsDBBQAAAAIA... (base64-encoded xlsx)", + "payload": { + "workflow_type": "xlsx", + "base_url": "https://api.openai.com/v1", + "api_key": "sk-your-main-translator-key", + "model_id": "gpt-4o", + "to_lang": "简体中文", + "glossary_generate_enable": True, + "glossary_agent_config": { + "baseurl": "https://api.openai.com/v1", + "key": "sk-your-agent-key-for-glossary", + "model_id": "gpt-4-turbo", + "temperature": 0.5 + } + } + } + }, { "summary": "DOCX 工作流示例", "value": { @@ -508,15 +549,28 @@ async def _perform_translation( workflow: Workflow + # 辅助函数:构建术语表生成配置 + def build_glossary_agent_config(): + if payload.glossary_generate_enable and payload.glossary_agent_config: + agent_payload = payload.glossary_agent_config + return GlossaryAgentConfig( + logger=task_logger, + to_lang=payload.to_lang, + **agent_payload.model_dump() + ) + return None + # 2. 根据 payload 的具体类型构建配置并实例化 workflow if isinstance(payload, MarkdownWorkflowParams): task_logger.info("构建 MarkdownBasedWorkflow 配置。") - translator_config = MDTranslatorConfig( - **payload.model_dump(include={ - 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', - 'temperature', 'thinking', 'chunk_size', 'concurrent', 'glossary_dict' - }, exclude_none=True) - ) + translator_args = payload.model_dump(include={ + 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', + 'temperature', 'thinking', 'chunk_size', 'concurrent', 'glossary_dict' + }, exclude_none=True) + translator_args['glossary_generate_enable'] = payload.glossary_generate_enable + translator_args['glossary_agent_config'] = build_glossary_agent_config() + translator_config = MDTranslatorConfig(**translator_args) + converter_config = None if payload.convert_engine == 'mineru': converter_config = ConverterMineruConfig(logger=task_logger, mineru_token=payload.mineru_token, @@ -535,12 +589,14 @@ async def _perform_translation( elif isinstance(payload, TextWorkflowParams): task_logger.info("构建 TXTWorkflow 配置。") - translator_config = TXTTranslatorConfig( - **payload.model_dump(include={ - 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', - 'temperature', 'thinking', 'chunk_size', 'concurrent', 'glossary_dict' - }, exclude_none=True) - ) + translator_args = payload.model_dump(include={ + 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', + 'temperature', 'thinking', 'chunk_size', 'concurrent', 'glossary_dict' + }, exclude_none=True) + translator_args['glossary_generate_enable'] = payload.glossary_generate_enable + translator_args['glossary_agent_config'] = build_glossary_agent_config() + translator_config = TXTTranslatorConfig(**translator_args) + html_exporter_config = TXT2HTMLExporterConfig(cdn=True) workflow_config = TXTWorkflowConfig( translator_config=translator_config, html_exporter_config=html_exporter_config, @@ -550,13 +606,15 @@ async def _perform_translation( elif isinstance(payload, JsonWorkflowParams): task_logger.info("构建 JsonWorkflow 配置。") - translator_config = JsonTranslatorConfig( - json_paths=payload.json_paths, - **payload.model_dump(include={ - 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', - 'temperature', 'thinking', 'chunk_size', 'concurrent', 'glossary_dict' - }, exclude_none=True) - ) + translator_args = payload.model_dump(include={ + 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', + 'temperature', 'thinking', 'chunk_size', 'concurrent', 'glossary_dict', + 'json_paths' + }, exclude_none=True) + translator_args['glossary_generate_enable'] = payload.glossary_generate_enable + translator_args['glossary_agent_config'] = build_glossary_agent_config() + translator_config = JsonTranslatorConfig(**translator_args) + html_exporter_config = Json2HTMLExporterConfig(cdn=True) workflow_config = JsonWorkflowConfig( translator_config=translator_config, html_exporter_config=html_exporter_config, @@ -566,13 +624,15 @@ async def _perform_translation( elif isinstance(payload, XlsxWorkflowParams): task_logger.info("构建 XlsxWorkflow 配置。") - translator_config = XlsxTranslatorConfig( - **payload.model_dump(include={ - 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', - 'temperature', 'thinking', 'chunk_size', 'concurrent', - 'insert_mode', 'separator', 'translate_regions', 'glossary_dict' - }, exclude_none=True) - ) + translator_args = payload.model_dump(include={ + 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', + 'temperature', 'thinking', 'chunk_size', 'concurrent', + 'insert_mode', 'separator', 'translate_regions', 'glossary_dict' + }, exclude_none=True) + translator_args['glossary_generate_enable'] = payload.glossary_generate_enable + translator_args['glossary_agent_config'] = build_glossary_agent_config() + translator_config = XlsxTranslatorConfig(**translator_args) + html_exporter_config = Xlsx2HTMLExporterConfig(cdn=True) workflow_config = XlsxWorkflowConfig( translator_config=translator_config, @@ -583,13 +643,15 @@ async def _perform_translation( elif isinstance(payload, DocxWorkflowParams): task_logger.info("构建 DocxWorkflow 配置。") - translator_config = DocxTranslatorConfig( - **payload.model_dump(include={ - 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', - 'temperature', 'thinking', 'chunk_size', 'concurrent', - 'insert_mode', 'separator', 'glossary_dict' - }, exclude_none=True) - ) + translator_args = payload.model_dump(include={ + 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', + 'temperature', 'thinking', 'chunk_size', 'concurrent', + 'insert_mode', 'separator', 'glossary_dict' + }, exclude_none=True) + translator_args['glossary_generate_enable'] = payload.glossary_generate_enable + translator_args['glossary_agent_config'] = build_glossary_agent_config() + translator_config = DocxTranslatorConfig(**translator_args) + html_exporter_config = Docx2HTMLExporterConfig(cdn=True) workflow_config = DocxWorkflowConfig( translator_config=translator_config, @@ -600,13 +662,15 @@ async def _perform_translation( elif isinstance(payload, SrtWorkflowParams): task_logger.info("构建 SrtWorkflow 配置。") - translator_config = SrtTranslatorConfig( - **payload.model_dump(include={ - 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', - 'temperature', 'thinking', 'chunk_size', 'concurrent', - 'insert_mode', 'separator', 'glossary_dict' - }, exclude_none=True) - ) + translator_args = payload.model_dump(include={ + 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', + 'temperature', 'thinking', 'chunk_size', 'concurrent', + 'insert_mode', 'separator', 'glossary_dict' + }, exclude_none=True) + translator_args['glossary_generate_enable'] = payload.glossary_generate_enable + translator_args['glossary_agent_config'] = build_glossary_agent_config() + translator_config = SrtTranslatorConfig(**translator_args) + html_exporter_config = Srt2HTMLExporterConfig(cdn=True) workflow_config = SrtWorkflowConfig( translator_config=translator_config, @@ -617,13 +681,15 @@ async def _perform_translation( elif isinstance(payload, EpubWorkflowParams): task_logger.info("构建 EpubWorkflow 配置。") - translator_config = EpubTranslatorConfig( - **payload.model_dump(include={ - 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', - 'temperature', 'thinking', 'chunk_size', 'concurrent', - 'insert_mode', 'separator', 'glossary_dict' - }, exclude_none=True) - ) + translator_args = payload.model_dump(include={ + 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', + 'temperature', 'thinking', 'chunk_size', 'concurrent', + 'insert_mode', 'separator', 'glossary_dict' + }, exclude_none=True) + translator_args['glossary_generate_enable'] = payload.glossary_generate_enable + translator_args['glossary_agent_config'] = build_glossary_agent_config() + translator_config = EpubTranslatorConfig(**translator_args) + html_exporter_config = Epub2HTMLExporterConfig(cdn=True) workflow_config = EpubWorkflowConfig( translator_config=translator_config, @@ -635,13 +701,15 @@ async def _perform_translation( # --- HTML WORKFLOW LOGIC START --- elif isinstance(payload, HtmlWorkflowParams): task_logger.info("构建 HtmlWorkflow 配置。") - translator_config = HtmlTranslatorConfig( - **payload.model_dump(include={ - 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', - 'temperature', 'thinking', 'chunk_size', 'concurrent', - 'insert_mode', 'separator', 'glossary_dict' - }, exclude_none=True) - ) + translator_args = payload.model_dump(include={ + 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt', + 'temperature', 'thinking', 'chunk_size', 'concurrent', + 'insert_mode', 'separator', 'glossary_dict' + }, exclude_none=True) + translator_args['glossary_generate_enable'] = payload.glossary_generate_enable + translator_args['glossary_agent_config'] = build_glossary_agent_config() + translator_config = HtmlTranslatorConfig(**translator_args) + workflow_config = HtmlWorkflowConfig( translator_config=translator_config, logger=task_logger diff --git a/docutranslate/glossary/glossary.py b/docutranslate/glossary/glossary.py index c8ce131..b1c39d2 100644 --- a/docutranslate/glossary/glossary.py +++ b/docutranslate/glossary/glossary.py @@ -8,9 +8,14 @@ class Glossary: self.glossary_dict[src]=dst def append_system_prompt(self,text:str): + flag=False prompt="\n以下为参考术语表:\n" for src,dst in self.glossary_dict.items(): if src in text: prompt+=f"{src}=>{dst}\n" + flag=True prompt+="术语表结束\n" - return prompt + if flag: + return prompt + else: + return "" diff --git a/docutranslate/static/i18nData.json b/docutranslate/static/i18nData.json index 73643a4..3f361c3 100644 --- a/docutranslate/static/i18nData.json +++ b/docutranslate/static/i18nData.json @@ -18,16 +18,16 @@ "insertModeAppend": "附加到原文后 (Append)", "insertModePrepend": "附加到原文前 (Prepend)", "insertModeHelpDocx": "选择如何将翻译后的文本插入。", - "separatorLabel": "分隔符", - "separatorPlaceholder": "例如: \\n---翻译---\\n", - "separatorHelp": "当插入模式为附加或前置时,用于分隔原文和译文的字符。\\n 代表换行。", "insertModeHelpXlsx": "选择如何将翻译后的文本插入到单元格中。", - "separatorPlaceholderSimple": "例如: \\n---\\n", - "xlsxTranslateRegionsLabel": "翻译区域 (可选)", - "xlsxTranslateRegionsPlaceholder": "每行一个区域, 例如:Sheet1!A1:B10(不指定表名则对所有表生效)", "insertModeHelpSrt": "选择如何将翻译后的文本插入。", "insertModeHelpEpub": "选择如何将翻译后的文本插入。", "insertModeHelpHtml": "选择如何将翻译后的文本插入。", + "separatorLabel": "分隔符", + "separatorPlaceholder": "例如: \\n---翻译---\\n", + "separatorPlaceholderSimple": "例如: \\n---\\n", + "separatorHelp": "当插入模式为附加或前置时,用于分隔原文和译文的字符。\\n 代表换行。", + "xlsxTranslateRegionsLabel": "翻译区域 (可选)", + "xlsxTranslateRegionsPlaceholder": "每行一个区域, 例如:Sheet1!A1:B10(不指定表名则对所有表生效)", "jsonPathLabel": "需要翻译的JSON路径", "jsonPathPlaceholder": "每行一个路径, 例如:\n$.name\n$.*", "jsonPathHelp": "采用jsonpath-ng的路径选择语法,每一行表示一个json路径", @@ -63,8 +63,13 @@ "glossaryHelp": "选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。", "viewGlossaryBtn": "查看术语表", "clearGlossaryBtn": "清空", - "chunkSizeLabel": "分块大小", + "glossaryGenTitle": "术语表", + "glossaryGenEnableLabel": "自动生成术语表", + "glossaryGenConfigLabel": "生成术语表配置", + "glossaryGenConfigSame": "与翻译配置相同", + "glossaryGenConfigCustom": "自定义", "resetBtn": "重置", + "chunkSizeLabel": "分块大小", "concurrentLabel": "并发数", "githubInfo": "GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate", "qqGroupInfo": "交流QQ群: 1047781902", @@ -75,7 +80,7 @@ "taskCardIdPlaceholder": "等待提交...", "taskCardFileDrop": "点击或拖拽文件到此处", "taskCardFileSelected": "文件已选择", - "taskCardFilenameLabel": "文件名: ", + "taskCardFilenameLabel": "文件名:", "taskCardLogLabel": "日志", "taskCardStatusWaiting": "等待上传文件...", "taskCardPreviewBtn": "预览", @@ -117,43 +122,43 @@ "aiSettingsTitleText": "翻译模型", "translationSettingsTitleText": "翻译配置", "advancedSettingsTitleText": "高级参数", - "engineOptionIdentity": "不转换(仅复制)", + "engineOptionIdentity": "不转换(Identity)", "engineOptionMineru": "Mineru", "engineOptionDocling": "Docling", "status_selectFileFirst": "请先选择文件!", - "status_invalidWorkflow": "无效的工作流选择。", - "status_fillRequired": "请填写所有必填项。", - "status_releasingOldTask": "正在释放旧任务...", + "status_fillRequired": "请填写所有必填项!", + "status_invalidWorkflow": "选择了无效的工作流。", + "status_releasingOldTask": "正在释放旧任务资源...", "btn_initializing": "初始化...", - "status_encodingAndSubmitting": "正在编码并提交文件...", - "status_requestOk": "请求成功,任务已开始。", + "status_encodingAndSubmitting": "正在编码文件并提交任务...", + "status_requestOk": "任务请求成功,等待处理...", "btn_cancelTranslation": "取消翻译", "status_requestFail": "请求失败", - "status_initFail": "初始化失败", + "status_initFail": "任务启动失败", "status_cancelling": "取消中...", "status_cancelSent": "取消请求已发送。", "status_cancelFail": "取消失败", "status_gettingStatus": "正在获取状态...", "btn_reTranslate": "重新翻译", "status_updateError": "状态更新失败。", - "preview_loading": "加载预览中...", - "preview_cantReadOriginal": "无法读取原文文件内容。", + "preview_loading": "正在加载预览...", + "preview_cantReadOriginal": "无法读取原文内容。", "preview_cantPreviewType": "无法预览此文件类型", - "preview_noOriginalCache": "无原文文件缓存可供预览。", - "preview_loadFailed": "预览加载失败。", + "preview_noOriginalCache": "无原文缓存,请重新上传文件以预览。", + "preview_loadFailed": "加载预览失败。", "pdf_preparing": "正在准备PDF...", - "pdf_print_failed": "调用打印功能失败。请尝试手动保存为PDF。", - "pdf_fetch_failed": "获取译文内容失败,无法生成PDF。", + "pdf_print_failed": "打印/另存为PDF失败。可能是浏览器限制或预览内容问题。", + "pdf_fetch_failed": "获取预览内容失败,无法生成PDF。", "preview_bilingual": "双语预览", "preview_translatedOnly": "仅译文预览", - "admin_tasklist_failed": "管理员模式:获取服务器任务列表失败。" + "admin_tasklist_failed": "管理员模式:无法从服务器加载任务列表。" }, "en": { "pageTitle": "DocuTranslate - Interactive Document Translation", "tutorialBtn": "Tutorial", "projectContributeBtn": "Contribute", "workflowTitle": "1. Select Workflow", - "workflowOptionMarkdown": "Markdown-based Translation (.pdf/.md/.png, etc.)", + "workflowOptionMarkdown": "Translate via Markdown (.pdf/.md/.png, etc.)", "workflowOptionTxt": "Plain Text Translation (.txt)", "workflowOptionJson": "JSON Translation (.json)", "workflowOptionDocx": "DOCX Translation (.docx)", @@ -163,31 +168,31 @@ "workflowOptionHtml": "HTML Translation (.html)", "autoWorkflowLabel": "Auto-select workflow", "insertModeLabel": "Insert Mode", - "insertModeReplace": "Replace Original Text", - "insertModeAppend": "Append to Original Text", - "insertModePrepend": "Prepend to Original Text", - "insertModeHelpDocx": "Choose how to insert the translated text.", + "insertModeReplace": "Replace Original", + "insertModeAppend": "Append to Original", + "insertModePrepend": "Prepend to Original", + "insertModeHelpDocx": "Choose how the translated text is inserted.", + "insertModeHelpXlsx": "Choose how the translated text is inserted into cells.", + "insertModeHelpSrt": "Choose how the translated text is inserted.", + "insertModeHelpEpub": "Choose how the translated text is inserted.", + "insertModeHelpHtml": "Choose how the translated text is inserted.", "separatorLabel": "Separator", - "separatorPlaceholder": "e.g., \\n---translation---\\n", - "separatorHelp": "Characters used to separate original and translated text in append/prepend modes. \\n represents a new line.", - "insertModeHelpXlsx": "Choose how to insert translated text into cells.", + "separatorPlaceholder": "e.g., \\n---Translation---\\n", "separatorPlaceholderSimple": "e.g., \\n---\\n", + "separatorHelp": "Characters used to separate original and translated text in 'Append' or 'Prepend' mode. \\n represents a newline.", "xlsxTranslateRegionsLabel": "Translate Regions (Optional)", "xlsxTranslateRegionsPlaceholder": "One region per line, e.g., Sheet1!A1:B10 (applies to all sheets if sheet name is omitted)", - "insertModeHelpSrt": "Choose how to insert the translated text.", - "insertModeHelpEpub": "Choose how to insert the translated text.", - "insertModeHelpHtml": "Choose how to insert the translated text.", "jsonPathLabel": "JSON Paths to Translate", "jsonPathPlaceholder": "One path per line, e.g.:\n$.name\n$.*", - "jsonPathHelp": "Uses jsonpath-ng syntax. One JSON path per line.", + "jsonPathHelp": "Uses jsonpath-ng syntax. Each line represents a JSON path.", "parsingEngineLabel": "Parsing Engine", - "parsingEngineHelp": "Optional if the uploaded file is already in .md format.", + "parsingEngineHelp": "If the uploaded file is already in .md format, this can be left unselected.", "getMineruTokenTitle": "Get Mineru Token", "mineruTokenPlaceholder": "Required when using the Mineru engine", "modelVersionLabel": "Mineru Model Version", "modelVersionVlm": "VLM", "modelVersionPipline": "Pipeline", - "modelVersionHelp": "Mineru VLM is a newer, internal test model.", + "modelVersionHelp": "Mineru VLM is a newer, internal beta model.", "formulaOcrLabel": "Formula Recognition", "codeOcrLabel": "Code Recognition", "platformLabel": "Select Platform", @@ -197,40 +202,45 @@ "getApiKeyTitle": "Get API Key", "apiKeyPlaceholder": "Please enter your API Key", "modelIdLabel": "Model ID", - "modelIdPlaceholder": "e.g., gpt-4o, glm-4", + "modelIdPlaceholder": "e.g., gpt-4o, llama-3-70b", "targetLanguageLabel": "Target Language", "targetLanguageCustom": "Other (Custom)", "customLangPlaceholder": "Enter target language, e.g., Italian", "thinkingModeLabel": "Thinking Mode", - "thinkingModeTooltip": "Set the thinking mode for hybrid inference models, currently supporting Zhipu's glm-4.5 series, Alibaba Cloud's qwen3 series, Volcengine's Doubao-Seed-1.6 series, etc.", + "thinkingModeTooltip": "Sets the thinking mode for hybrid inference models, currently supporting Zhipu's glm-4.5 series, Alibaba Cloud's qwen3 series, Volcengine's Doubao-Seed-1.6 series, etc.", "thinkingModeEnable": "Enable", "thinkingModeDisable": "Disable", "thinkingModeDefault": "Default", "customPromptLabel": "Custom Prompt", - "customPromptPlaceholder": "Optional, e.g., 'Keep proper nouns in their original form'", + "customPromptPlaceholder": "Optional, e.g., 'Do not translate proper names'", "glossaryLabel": "Glossary (Optional)", "glossaryHelp": "Select one or more CSV files. Files must contain 'src' and 'dst' headers for source and destination terms.", "viewGlossaryBtn": "View Glossary", "clearGlossaryBtn": "Clear", - "chunkSizeLabel": "Chunk Size", + "glossaryGenTitle": "Glossary", + "glossaryGenEnableLabel": "Auto-generate Glossary", + "glossaryGenConfigLabel": "Glossary Generation Config", + "glossaryGenConfigSame": "Same as Translator", + "glossaryGenConfigCustom": "Custom", "resetBtn": "Reset", + "chunkSizeLabel": "Chunk Size", "concurrentLabel": "Concurrency", - "githubInfo": "GitHub (star us ❤):
https://github.com/xunbu/docutranslate", - "qqGroupInfo": "QQ Group: 1047781902", + "githubInfo": "GitHub Repo (Star us! ❤):
https://github.com/xunbu/docutranslate", + "qqGroupInfo": "QQ Group for discussions: 1047781902", "taskListTitle": "Task List", "newTaskBtn": "New Task", "noTaskPlaceholder": "No tasks yet. Click 'New Task' to get started!", "taskCardIdLabel": "Task ID", - "taskCardIdPlaceholder": "Waiting for submission...", + "taskCardIdPlaceholder": "Awaiting submission...", "taskCardFileDrop": "Click or drag file here", - "taskCardFileSelected": "File Selected", - "taskCardFilenameLabel": "Filename: ", + "taskCardFileSelected": "File selected", + "taskCardFilenameLabel": "Filename:", "taskCardLogLabel": "Log", "taskCardStatusWaiting": "Waiting for file upload...", "taskCardPreviewBtn": "Preview", "taskCardDownloadBtn": "Download", "taskCardStartBtn": "Start Translation", - "downloadMdEmbedded": "Markdown (Embedded Images)", + "downloadMdEmbedded": "Markdown (Embedded Imgs)", "downloadMdZip": "Markdown (.zip)", "previewTitle": "Preview", "previewBilingualBtn": "Bilingual", @@ -240,61 +250,61 @@ "closeBtn": "Close", "downloadBtn": "Download", "tutorialModalTitle": "User Guide", - "tutorialModalBody": "

Video tutorials can be found by searching for docutranslate on Bilibili.

Welcome to DocuTranslate! Please follow these steps to translate your documents:

  1. Select a Workflow

    First, choose the appropriate translation process from the top of the settings panel. Different workflows are suited for different file types:

    New Feature: The \"Auto-select workflow\" switch is now enabled by default. Simply upload your file, and the system will automatically match it with the correct workflow to simplify the process.

  2. Configure Parameters

    Based on your chosen workflow, complete the necessary configurations. All settings are automatically saved in your browser.

  3. Upload File

    In the task list on the right, click or drag your document into the file upload area.

  4. Start Translation

    Once the file is selected, click the Start Translation button on the task card. The system will begin processing the task, and you can monitor real-time progress in the log area.

  5. View and Download

    After the translation is complete, action buttons will appear on the task card:

Tip: All settings are automatically saved locally in your browser for your convenience.
", - "tutorialUnderstandBtn": "Got it", - "contributorsModalTitle": "Thanks for Contributing", - "contributorsPara1": "DocuTranslate is an open-source project! The needs and usage of the community drive its progress.", - "contributorsPara2": "A heartfelt thank you to all friends who have sponsored the project, submitted code, provided valuable suggestions, and starred the project!", - "contributorsWelcome": "You are welcome to contribute in the following ways:", - "contributorsGithub": "GitHub Page", + "tutorialModalBody": "

Video tutorials are available on Bilibili by searching for docutranslate.

Welcome to DocuTranslate! Follow these steps to translate your documents:

  1. Select Workflow

    First, choose your desired translation process from the top of the settings panel. Different workflows are suited for different file types:

    New Feature: The 'Auto-select workflow' switch is now on by default. Just upload your file, and the system will automatically match it to the appropriate workflow for you.

  2. Configure Parameters

    Configure the settings based on your chosen workflow. All settings are automatically saved in your browser.

  3. Upload File

    In the task list on the right, click or drag your document into the file upload area.

  4. Start Translation

    Once the file is selected, click the Start Translation button on the task card. The system will begin processing, and you can monitor real-time progress in the log area.

  5. View & Download

    After the translation is complete, action buttons will appear on the task card:

Tip: All your settings are automatically saved in your browser's local storage for your convenience.
", + "tutorialUnderstandBtn": "I understand", + "contributorsModalTitle": "Thanks for Contributing!", + "contributorsPara1": "DocuTranslate is an open-source project! The community's needs and usage are what drive the project forward.", + "contributorsPara2": "A heartfelt thank you to everyone who has funded the project, submitted code, provided valuable suggestions, and starred the repository!", + "contributorsWelcome": "You're welcome to contribute in the following ways:", + "contributorsGithub": "GitHub Repo", "contributorsPR": "Submit a Pull Request", "contributorsIssue": "Report an Issue", - "contributorsQQ": "Or contact the author via QQ group: 1047781902", + "contributorsQQ": "Or contact the author via QQ Group: 1047781902", "glossaryModalTitle": "Current Glossary", "glossaryTableSource": "Source (src)", "glossaryTableDestination": "Destination (dst)", "init_i18n_failed_alert": "Failed to load interface translations. Please check your network connection or contact an administrator.", "init_failed_alert": "Initialization failed, could not connect to the backend service. Please ensure the service is running and refresh the page.", - "glossaryEmpty": "Glossary is empty.", - "parsingSettingsTitleText": "Parsing Settings", - "jsonSettingsTitleText": "JSON Path Settings", + "glossaryEmpty": "The glossary is empty.", + "parsingSettingsTitleText": "Parsing Config", + "jsonSettingsTitleText": "JSON Path Config", "xlsxSettingsTitleText": "XLSX Translation Options", "docxSettingsTitleText": "DOCX Translation Options", "srtSettingsTitleText": "SRT Translation Options", "epubSettingsTitleText": "EPUB Translation Options", "htmlSettingsTitleText": "HTML Translation Options", "aiSettingsTitleText": "Translation Model", - "translationSettingsTitleText": "Translation Settings", + "translationSettingsTitleText": "Translation Config", "advancedSettingsTitleText": "Advanced Parameters", - "engineOptionIdentity": "Identity (Copy only)", + "engineOptionIdentity": "No Conversion (Identity)", "engineOptionMineru": "Mineru", "engineOptionDocling": "Docling", "status_selectFileFirst": "Please select a file first!", + "status_fillRequired": "Please fill in all required fields!", "status_invalidWorkflow": "Invalid workflow selected.", - "status_fillRequired": "Please fill in all required fields.", - "status_releasingOldTask": "Releasing old task...", + "status_releasingOldTask": "Releasing old task resources...", "btn_initializing": "Initializing...", - "status_encodingAndSubmitting": "Encoding and submitting file...", - "status_requestOk": "Request successful, task started.", + "status_encodingAndSubmitting": "Encoding file and submitting task...", + "status_requestOk": "Task submitted successfully, awaiting processing...", "btn_cancelTranslation": "Cancel Translation", "status_requestFail": "Request failed", - "status_initFail": "Initialization failed", + "status_initFail": "Failed to start task", "status_cancelling": "Cancelling...", "status_cancelSent": "Cancellation request sent.", "status_cancelFail": "Cancellation failed", "status_gettingStatus": "Getting status...", "btn_reTranslate": "Re-translate", - "status_updateError": "Status update failed.", + "status_updateError": "Failed to update status.", "preview_loading": "Loading preview...", - "preview_cantReadOriginal": "Cannot read original file content.", + "preview_cantReadOriginal": "Could not read original file content.", "preview_cantPreviewType": "Cannot preview this file type", - "preview_noOriginalCache": "No original file cached for preview.", + "preview_noOriginalCache": "No original file cached. Please re-upload to preview.", "preview_loadFailed": "Failed to load preview.", "pdf_preparing": "Preparing PDF...", - "pdf_print_failed": "Failed to invoke print function. Please try saving to PDF manually.", - "pdf_fetch_failed": "Failed to fetch translated content, cannot generate PDF.", + "pdf_print_failed": "Print/Save as PDF failed. This may be due to browser restrictions or preview content issues.", + "pdf_fetch_failed": "Failed to fetch preview content, cannot generate PDF.", "preview_bilingual": "Bilingual Preview", "preview_translatedOnly": "Translated Only Preview", - "admin_tasklist_failed": "Admin Mode: Failed to get task list from server." + "admin_tasklist_failed": "Admin mode: Failed to load task list from server." } } \ No newline at end of file diff --git a/docutranslate/static/index.html b/docutranslate/static/index.html index d5df520..4fb32e5 100644 --- a/docutranslate/static/index.html +++ b/docutranslate/static/index.html @@ -1 +1 @@ - DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file + DocuTranslate - 交互式文档翻译

DocuTranslate

如果上传的文件本身是.md格式,此项可不选。
mineru VLM是更新的内测模型。

选择一个或多个CSV文件。文件需包含'src'和'dst'两列标题,分别代表原文和译文。

GitHub主页(欢迎star❤):
https://github.com/xunbu/docutranslate

交流QQ群: 1047781902

任务列表

当前没有任务,点击“新建任务”开始吧!

预览
原文
译文
\ No newline at end of file diff --git a/docutranslate/translator/ai_translator/base.py b/docutranslate/translator/ai_translator/base.py index 144de03..770463e 100644 --- a/docutranslate/translator/ai_translator/base.py +++ b/docutranslate/translator/ai_translator/base.py @@ -21,7 +21,7 @@ class AiTranslatorConfig(TranslatorConfig): chunk_size: int = 3000 concurrent: int = 30 glossary_dict: dict[str:str] | None = None - glossary_generate_enable: bool = True + glossary_generate_enable: bool = False glossary_agent_config: GlossaryAgentConfig | None = None diff --git a/docutranslate/translator/ai_translator/docx_translator.py b/docutranslate/translator/ai_translator/docx_translator.py index 4943c67..b3db35c 100644 --- a/docutranslate/translator/ai_translator/docx_translator.py +++ b/docutranslate/translator/ai_translator/docx_translator.py @@ -44,7 +44,6 @@ class DocxTranslator(AiTranslator): baseurl=config.base_url, key=config.api_key, model_id=config.model_id, - system_prompt=None, temperature=config.temperature, thinking=config.thinking, max_concurrent=config.concurrent, diff --git a/docutranslate/translator/ai_translator/epub_translator.py b/docutranslate/translator/ai_translator/epub_translator.py index d5e23b1..a2518c2 100644 --- a/docutranslate/translator/ai_translator/epub_translator.py +++ b/docutranslate/translator/ai_translator/epub_translator.py @@ -32,7 +32,7 @@ class EpubTranslator(AiTranslator): agent_config = SegmentsTranslateAgentConfig( custom_prompt=config.custom_prompt, to_lang=config.to_lang, baseurl=config.base_url, key=config.api_key, model_id=config.model_id, - system_prompt=None, temperature=config.temperature, thinking=config.thinking, + temperature=config.temperature, thinking=config.thinking, max_concurrent=config.concurrent, timeout=config.timeout, logger=self.logger, glossary_dict=config.glossary_dict ) diff --git a/docutranslate/translator/ai_translator/html_translator.py b/docutranslate/translator/ai_translator/html_translator.py index f8d6d2e..05b050d 100644 --- a/docutranslate/translator/ai_translator/html_translator.py +++ b/docutranslate/translator/ai_translator/html_translator.py @@ -91,7 +91,6 @@ class HtmlTranslator(AiTranslator): baseurl=config.base_url, key=config.api_key, model_id=config.model_id, - system_prompt=None, temperature=config.temperature, thinking=config.thinking, max_concurrent=config.concurrent, diff --git a/docutranslate/translator/ai_translator/json_translator.py b/docutranslate/translator/ai_translator/json_translator.py index 0011d29..606266d 100644 --- a/docutranslate/translator/ai_translator/json_translator.py +++ b/docutranslate/translator/ai_translator/json_translator.py @@ -23,7 +23,6 @@ class JsonTranslator(AiTranslator): baseurl=config.base_url, key=config.api_key, model_id=config.model_id, - system_prompt=None, temperature=config.temperature, thinking=config.thinking, max_concurrent=config.concurrent, diff --git a/docutranslate/translator/ai_translator/md_translator.py b/docutranslate/translator/ai_translator/md_translator.py index 7cacb32..7fc76a0 100644 --- a/docutranslate/translator/ai_translator/md_translator.py +++ b/docutranslate/translator/ai_translator/md_translator.py @@ -24,7 +24,6 @@ class MDTranslator(AiTranslator): baseurl=config.base_url, key=config.api_key, model_id=config.model_id, - system_prompt=None, temperature=config.temperature, thinking=config.thinking, max_concurrent=config.concurrent, diff --git a/docutranslate/translator/ai_translator/srt_translator.py b/docutranslate/translator/ai_translator/srt_translator.py index 415f56c..a0f9783 100644 --- a/docutranslate/translator/ai_translator/srt_translator.py +++ b/docutranslate/translator/ai_translator/srt_translator.py @@ -31,7 +31,6 @@ class SrtTranslator(AiTranslator): baseurl=config.base_url, key=config.api_key, model_id=config.model_id, - system_prompt=None, temperature=config.temperature, thinking=config.thinking, max_concurrent=config.concurrent, diff --git a/docutranslate/translator/ai_translator/txt_translator.py b/docutranslate/translator/ai_translator/txt_translator.py index 576074c..50571d7 100644 --- a/docutranslate/translator/ai_translator/txt_translator.py +++ b/docutranslate/translator/ai_translator/txt_translator.py @@ -21,7 +21,6 @@ class TXTTranslator(AiTranslator): baseurl=config.base_url, key=config.api_key, model_id=config.model_id, - system_prompt=None, temperature=config.temperature, thinking=config.thinking, max_concurrent=config.concurrent, diff --git a/docutranslate/translator/ai_translator/xlsx_translator.py b/docutranslate/translator/ai_translator/xlsx_translator.py index f90ae61..4c5b3cf 100644 --- a/docutranslate/translator/ai_translator/xlsx_translator.py +++ b/docutranslate/translator/ai_translator/xlsx_translator.py @@ -31,7 +31,6 @@ class XlsxTranslator(AiTranslator): baseurl=config.base_url, key=config.api_key, model_id=config.model_id, - system_prompt=None, temperature=config.temperature, thinking=config.thinking, max_concurrent=config.concurrent,