diff --git a/docutranslate/translator/ai_translator/docx_translator.py b/docutranslate/translator/ai_translator/docx_translator.py index 56b509c..5050efb 100644 --- a/docutranslate/translator/ai_translator/docx_translator.py +++ b/docutranslate/translator/ai_translator/docx_translator.py @@ -1,20 +1,16 @@ import asyncio -from dataclasses import dataclass, field +from dataclasses import dataclass from io import BytesIO from typing import Self, Literal, List, Dict, Any, Tuple import docx from docx.document import Document as DocumentObject -from docx.oxml.ns import nsdecls -from docx.oxml import OxmlElement -from docx.table import _Cell, Table from docx.text.paragraph import Paragraph from docx.text.run import Run from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent from docutranslate.ir.document import Document -from docutranslate.translator.ai_translator.base import AiTranslatorConfig -from docutranslate.translator.base import Translator +from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator def is_image_run(run: Run) -> bool: @@ -32,7 +28,7 @@ class DocxTranslatorConfig(AiTranslatorConfig): separator: str = "\n" -class DocxTranslator(Translator): +class DocxTranslator(AiTranslator): """ 用于翻译 .docx 文件的翻译器。 此版本经过优化,可以处理图文混排的段落而不会丢失图片。 @@ -188,4 +184,4 @@ class DocxTranslator(Translator): # 将翻译结果写回文档 document.content = await asyncio.to_thread(self._after_translate, doc, elements_to_translate, translated_texts, original_texts) - return self \ No newline at end of file + return self diff --git a/docutranslate/translator/ai_translator/epub_translator.py b/docutranslate/translator/ai_translator/epub_translator.py index da5d4ee..3381f96 100644 --- a/docutranslate/translator/ai_translator/epub_translator.py +++ b/docutranslate/translator/ai_translator/epub_translator.py @@ -1,7 +1,7 @@ import asyncio import os -import zipfile import xml.etree.ElementTree as ET +import zipfile from dataclasses import dataclass from io import BytesIO from typing import Self, Literal, List, Dict, Any @@ -10,8 +10,7 @@ from bs4 import BeautifulSoup from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent from docutranslate.ir.document import Document -from docutranslate.translator.ai_translator.base import AiTranslatorConfig -from docutranslate.translator.base import Translator +from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator @dataclass @@ -20,7 +19,7 @@ class EpubTranslatorConfig(AiTranslatorConfig): separator: str = "\n" -class EpubTranslator(Translator): +class EpubTranslator(AiTranslator): """ 一个用于翻译 EPUB 文件中内容的翻译器。 此版本使用内置的 `zipfile` 和 `xml` 库,不依赖 `ebooklib`。 @@ -197,4 +196,4 @@ class EpubTranslator(Translator): document.content = await asyncio.to_thread( self._after_translate, all_files, items_to_translate, translated_texts, original_texts ) - return self \ No newline at end of file + return self diff --git a/docutranslate/translator/ai_translator/html_translator.py b/docutranslate/translator/ai_translator/html_translator.py index a6a867f..804746b 100644 --- a/docutranslate/translator/ai_translator/html_translator.py +++ b/docutranslate/translator/ai_translator/html_translator.py @@ -2,12 +2,11 @@ import asyncio from dataclasses import dataclass from typing import Self, Literal, Set, Dict, List, Tuple -from bs4 import BeautifulSoup, NavigableString, Tag, Comment +from bs4 import BeautifulSoup, NavigableString, Comment from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent from docutranslate.ir.document import Document -from docutranslate.translator.ai_translator.base import AiTranslatorConfig -from docutranslate.translator.base import Translator +from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator # --- 规则定义 --- @@ -16,16 +15,16 @@ from docutranslate.translator.base import Translator # 在预处理阶段,这些标签及其所有子元素将被直接从文档中移除,以确保它们不会被意外修改。 NON_TRANSLATABLE_TAGS: Set[str] = { 'script', # JavaScript代码 - 'style', # CSS样式 - 'pre', # 预格式化文本,通常用于代码块 - 'code', # 行内代码 - 'kbd', # 键盘输入 - 'samp', # 示例输出 - 'var', # 变量 - 'noscript',# script未启用时的内容 - 'meta', # 元数据 - 'link', # 外部资源链接 - 'head', # 文档头部,通常不包含可见的可翻译内容 + 'style', # CSS样式 + 'pre', # 预格式化文本,通常用于代码块 + 'code', # 行内代码 + 'kbd', # 键盘输入 + 'samp', # 示例输出 + 'var', # 变量 + 'noscript', # script未启用时的内容 + 'meta', # 元数据 + 'link', # 外部资源链接 + 'head', # 文档头部,通常不包含可见的可翻译内容 } # 2. 可翻译标签(白名单) @@ -73,7 +72,7 @@ class HtmlTranslatorConfig(AiTranslatorConfig): separator: str = " " # HTML中用空格作为默认分隔符可能更合适 -class HtmlTranslator(Translator): +class HtmlTranslator(AiTranslator): """ 一个用于翻译 HTML 文件内容的翻译器。 它采用黑白名单结合的策略,以最大程度地保留页面样式和功能: @@ -133,7 +132,7 @@ class HtmlTranslator(Translator): # --- 2b. 翻译安全标签内的安全属性 --- attributes_to_check = SAFE_ATTRIBUTES.get(tag.name, []) + SAFE_ATTRIBUTES.get('*', []) - for attr in set(attributes_to_check): # 使用set去重 + for attr in set(attributes_to_check): # 使用set去重 if tag.has_attr(attr) and tag[attr].strip(): value = tag[attr] translatable_items.append({'type': 'attribute', 'tag': tag, 'attribute': attr}) @@ -171,7 +170,7 @@ class HtmlTranslator(Translator): new_content = translated_text + self.separator + original_text else: self.logger.error(f"不正确的HtmlTranslatorConfig参数: insert_mode='{self.insert_mode}'") - new_content = original_text # 出错时恢复原文 + new_content = original_text # 出错时恢复原文 # 根据类型将内容写回 if item['type'] == 'node': @@ -217,4 +216,4 @@ class HtmlTranslator(Translator): document.content = await asyncio.to_thread( self._after_translate, soup, translatable_items, translated_texts, original_texts ) - return self \ No newline at end of file + return self diff --git a/docutranslate/translator/ai_translator/json_translator.py b/docutranslate/translator/ai_translator/json_translator.py index 4c14b93..e1fa455 100644 --- a/docutranslate/translator/ai_translator/json_translator.py +++ b/docutranslate/translator/ai_translator/json_translator.py @@ -6,8 +6,7 @@ from jsonpath_ng.ext import parse from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent from docutranslate.ir.document import Document -from docutranslate.translator.ai_translator.base import AiTranslatorConfig -from docutranslate.translator.base import Translator +from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator @dataclass @@ -15,7 +14,7 @@ class JsonTranslatorConfig(AiTranslatorConfig): json_paths: list[str] -class JsonTranslator(Translator): +class JsonTranslator(AiTranslator): def __init__(self, config: JsonTranslatorConfig): super().__init__(config=config) self.chunk_size = config.chunk_size diff --git a/docutranslate/translator/ai_translator/md_translator.py b/docutranslate/translator/ai_translator/md_translator.py index d69efb7..fbbe4cc 100644 --- a/docutranslate/translator/ai_translator/md_translator.py +++ b/docutranslate/translator/ai_translator/md_translator.py @@ -6,8 +6,7 @@ from docutranslate.agents import MDTranslateAgent from docutranslate.agents.markdown_agent import MDTranslateAgentConfig from docutranslate.context.md_mask_context import MDMaskUrisContext from docutranslate.ir.markdown_document import MarkdownDocument -from docutranslate.translator.ai_translator.base import AiTranslatorConfig -from docutranslate.translator.base import Translator +from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator from docutranslate.utils.markdown_splitter import split_markdown_text, join_markdown_texts @@ -16,7 +15,7 @@ class MDTranslatorConfig(AiTranslatorConfig): ... -class MDTranslator(Translator): +class MDTranslator(AiTranslator): def __init__(self, config: MDTranslatorConfig): super().__init__(config=config) self.chunk_size = config.chunk_size @@ -64,4 +63,4 @@ class MDTranslator(Translator): await asyncio.to_thread(run) self.logger.info("翻译完成") - return self \ No newline at end of file + return self diff --git a/docutranslate/translator/ai_translator/srt_translator.py b/docutranslate/translator/ai_translator/srt_translator.py index 5606538..6d12a7f 100644 --- a/docutranslate/translator/ai_translator/srt_translator.py +++ b/docutranslate/translator/ai_translator/srt_translator.py @@ -6,8 +6,7 @@ import srt # 导入srt库来处理字幕文件 from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent from docutranslate.ir.document import Document -from docutranslate.translator.ai_translator.base import AiTranslatorConfig -from docutranslate.translator.base import Translator +from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator @dataclass @@ -16,7 +15,7 @@ class SrtTranslatorConfig(AiTranslatorConfig): separator: str = "\n" -class SrtTranslator(Translator): +class SrtTranslator(AiTranslator): """ 一个用于翻译 SRT (.srt) 字幕文件的翻译器。 它会提取每个字幕块的文本内容,进行翻译,然后根据配置将译文写回。 @@ -134,4 +133,4 @@ class SrtTranslator(Translator): document.content = await asyncio.to_thread( self._after_translate, subtitles, translated_texts, original_texts ) - return self \ No newline at end of file + return self diff --git a/docutranslate/translator/ai_translator/txt_translator.py b/docutranslate/translator/ai_translator/txt_translator.py index 243e13b..07a143e 100644 --- a/docutranslate/translator/ai_translator/txt_translator.py +++ b/docutranslate/translator/ai_translator/txt_translator.py @@ -3,8 +3,7 @@ from typing import Self from docutranslate.agents.txt_agent import TXTTranslateAgent, TXTTranslateAgentConfig from docutranslate.ir.document import Document -from docutranslate.translator.ai_translator.base import AiTranslatorConfig -from docutranslate.translator.base import Translator +from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator from docutranslate.utils.markdown_splitter import split_markdown_text @@ -13,7 +12,7 @@ class TXTTranslatorConfig(AiTranslatorConfig): ... -class TXTTranslator(Translator): +class TXTTranslator(AiTranslator): def __init__(self, config: TXTTranslatorConfig): super().__init__(config=config) self.chunk_size = config.chunk_size diff --git a/docutranslate/translator/ai_translator/xlsx_translator.py b/docutranslate/translator/ai_translator/xlsx_translator.py index e7dfb07..e768da4 100644 --- a/docutranslate/translator/ai_translator/xlsx_translator.py +++ b/docutranslate/translator/ai_translator/xlsx_translator.py @@ -8,8 +8,7 @@ from openpyxl.cell import Cell from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent from docutranslate.ir.document import Document -from docutranslate.translator.ai_translator.base import AiTranslatorConfig -from docutranslate.translator.base import Translator +from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator @dataclass @@ -23,7 +22,7 @@ class XlsxTranslatorConfig(AiTranslatorConfig): translate_regions: Optional[List[str]] = None -class XlsxTranslator(Translator): +class XlsxTranslator(AiTranslator): def __init__(self, config: XlsxTranslatorConfig): super().__init__(config=config) self.chunk_size = config.chunk_size