fix
This commit is contained in:
@@ -1,20 +1,16 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from dataclasses import dataclass, field
|
from dataclasses import dataclass
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Self, Literal, List, Dict, Any, Tuple
|
from typing import Self, Literal, List, Dict, Any, Tuple
|
||||||
|
|
||||||
import docx
|
import docx
|
||||||
from docx.document import Document as DocumentObject
|
from docx.document import Document as DocumentObject
|
||||||
from docx.oxml.ns import nsdecls
|
|
||||||
from docx.oxml import OxmlElement
|
|
||||||
from docx.table import _Cell, Table
|
|
||||||
from docx.text.paragraph import Paragraph
|
from docx.text.paragraph import Paragraph
|
||||||
from docx.text.run import Run
|
from docx.text.run import Run
|
||||||
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
from docutranslate.translator.base import Translator
|
|
||||||
|
|
||||||
|
|
||||||
def is_image_run(run: Run) -> bool:
|
def is_image_run(run: Run) -> bool:
|
||||||
@@ -32,7 +28,7 @@ class DocxTranslatorConfig(AiTranslatorConfig):
|
|||||||
separator: str = "\n"
|
separator: str = "\n"
|
||||||
|
|
||||||
|
|
||||||
class DocxTranslator(Translator):
|
class DocxTranslator(AiTranslator):
|
||||||
"""
|
"""
|
||||||
用于翻译 .docx 文件的翻译器。
|
用于翻译 .docx 文件的翻译器。
|
||||||
此版本经过优化,可以处理图文混排的段落而不会丢失图片。
|
此版本经过优化,可以处理图文混排的段落而不会丢失图片。
|
||||||
|
|||||||
@@ -1,7 +1,7 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
import os
|
import os
|
||||||
import zipfile
|
|
||||||
import xml.etree.ElementTree as ET
|
import xml.etree.ElementTree as ET
|
||||||
|
import zipfile
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Self, Literal, List, Dict, Any
|
from typing import Self, Literal, List, Dict, Any
|
||||||
@@ -10,8 +10,7 @@ from bs4 import BeautifulSoup
|
|||||||
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
from docutranslate.translator.base import Translator
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -20,7 +19,7 @@ class EpubTranslatorConfig(AiTranslatorConfig):
|
|||||||
separator: str = "\n"
|
separator: str = "\n"
|
||||||
|
|
||||||
|
|
||||||
class EpubTranslator(Translator):
|
class EpubTranslator(AiTranslator):
|
||||||
"""
|
"""
|
||||||
一个用于翻译 EPUB 文件中内容的翻译器。
|
一个用于翻译 EPUB 文件中内容的翻译器。
|
||||||
此版本使用内置的 `zipfile` 和 `xml` 库,不依赖 `ebooklib`。
|
此版本使用内置的 `zipfile` 和 `xml` 库,不依赖 `ebooklib`。
|
||||||
|
|||||||
@@ -2,12 +2,11 @@ import asyncio
|
|||||||
from dataclasses import dataclass
|
from dataclasses import dataclass
|
||||||
from typing import Self, Literal, Set, Dict, List, Tuple
|
from typing import Self, Literal, Set, Dict, List, Tuple
|
||||||
|
|
||||||
from bs4 import BeautifulSoup, NavigableString, Tag, Comment
|
from bs4 import BeautifulSoup, NavigableString, Comment
|
||||||
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
from docutranslate.translator.base import Translator
|
|
||||||
|
|
||||||
# --- 规则定义 ---
|
# --- 规则定义 ---
|
||||||
|
|
||||||
@@ -16,16 +15,16 @@ from docutranslate.translator.base import Translator
|
|||||||
# 在预处理阶段,这些标签及其所有子元素将被直接从文档中移除,以确保它们不会被意外修改。
|
# 在预处理阶段,这些标签及其所有子元素将被直接从文档中移除,以确保它们不会被意外修改。
|
||||||
NON_TRANSLATABLE_TAGS: Set[str] = {
|
NON_TRANSLATABLE_TAGS: Set[str] = {
|
||||||
'script', # JavaScript代码
|
'script', # JavaScript代码
|
||||||
'style', # CSS样式
|
'style', # CSS样式
|
||||||
'pre', # 预格式化文本,通常用于代码块
|
'pre', # 预格式化文本,通常用于代码块
|
||||||
'code', # 行内代码
|
'code', # 行内代码
|
||||||
'kbd', # 键盘输入
|
'kbd', # 键盘输入
|
||||||
'samp', # 示例输出
|
'samp', # 示例输出
|
||||||
'var', # 变量
|
'var', # 变量
|
||||||
'noscript',# script未启用时的内容
|
'noscript', # script未启用时的内容
|
||||||
'meta', # 元数据
|
'meta', # 元数据
|
||||||
'link', # 外部资源链接
|
'link', # 外部资源链接
|
||||||
'head', # 文档头部,通常不包含可见的可翻译内容
|
'head', # 文档头部,通常不包含可见的可翻译内容
|
||||||
}
|
}
|
||||||
|
|
||||||
# 2. 可翻译标签(白名单)
|
# 2. 可翻译标签(白名单)
|
||||||
@@ -73,7 +72,7 @@ class HtmlTranslatorConfig(AiTranslatorConfig):
|
|||||||
separator: str = " " # HTML中用空格作为默认分隔符可能更合适
|
separator: str = " " # HTML中用空格作为默认分隔符可能更合适
|
||||||
|
|
||||||
|
|
||||||
class HtmlTranslator(Translator):
|
class HtmlTranslator(AiTranslator):
|
||||||
"""
|
"""
|
||||||
一个用于翻译 HTML 文件内容的翻译器。
|
一个用于翻译 HTML 文件内容的翻译器。
|
||||||
它采用黑白名单结合的策略,以最大程度地保留页面样式和功能:
|
它采用黑白名单结合的策略,以最大程度地保留页面样式和功能:
|
||||||
@@ -133,7 +132,7 @@ class HtmlTranslator(Translator):
|
|||||||
|
|
||||||
# --- 2b. 翻译安全标签内的安全属性 ---
|
# --- 2b. 翻译安全标签内的安全属性 ---
|
||||||
attributes_to_check = SAFE_ATTRIBUTES.get(tag.name, []) + SAFE_ATTRIBUTES.get('*', [])
|
attributes_to_check = SAFE_ATTRIBUTES.get(tag.name, []) + SAFE_ATTRIBUTES.get('*', [])
|
||||||
for attr in set(attributes_to_check): # 使用set去重
|
for attr in set(attributes_to_check): # 使用set去重
|
||||||
if tag.has_attr(attr) and tag[attr].strip():
|
if tag.has_attr(attr) and tag[attr].strip():
|
||||||
value = tag[attr]
|
value = tag[attr]
|
||||||
translatable_items.append({'type': 'attribute', 'tag': tag, 'attribute': attr})
|
translatable_items.append({'type': 'attribute', 'tag': tag, 'attribute': attr})
|
||||||
@@ -171,7 +170,7 @@ class HtmlTranslator(Translator):
|
|||||||
new_content = translated_text + self.separator + original_text
|
new_content = translated_text + self.separator + original_text
|
||||||
else:
|
else:
|
||||||
self.logger.error(f"不正确的HtmlTranslatorConfig参数: insert_mode='{self.insert_mode}'")
|
self.logger.error(f"不正确的HtmlTranslatorConfig参数: insert_mode='{self.insert_mode}'")
|
||||||
new_content = original_text # 出错时恢复原文
|
new_content = original_text # 出错时恢复原文
|
||||||
|
|
||||||
# 根据类型将内容写回
|
# 根据类型将内容写回
|
||||||
if item['type'] == 'node':
|
if item['type'] == 'node':
|
||||||
|
|||||||
@@ -6,8 +6,7 @@ from jsonpath_ng.ext import parse
|
|||||||
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
from docutranslate.translator.base import Translator
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -15,7 +14,7 @@ class JsonTranslatorConfig(AiTranslatorConfig):
|
|||||||
json_paths: list[str]
|
json_paths: list[str]
|
||||||
|
|
||||||
|
|
||||||
class JsonTranslator(Translator):
|
class JsonTranslator(AiTranslator):
|
||||||
def __init__(self, config: JsonTranslatorConfig):
|
def __init__(self, config: JsonTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
|
|||||||
@@ -6,8 +6,7 @@ from docutranslate.agents import MDTranslateAgent
|
|||||||
from docutranslate.agents.markdown_agent import MDTranslateAgentConfig
|
from docutranslate.agents.markdown_agent import MDTranslateAgentConfig
|
||||||
from docutranslate.context.md_mask_context import MDMaskUrisContext
|
from docutranslate.context.md_mask_context import MDMaskUrisContext
|
||||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
from docutranslate.translator.base import Translator
|
|
||||||
from docutranslate.utils.markdown_splitter import split_markdown_text, join_markdown_texts
|
from docutranslate.utils.markdown_splitter import split_markdown_text, join_markdown_texts
|
||||||
|
|
||||||
|
|
||||||
@@ -16,7 +15,7 @@ class MDTranslatorConfig(AiTranslatorConfig):
|
|||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
class MDTranslator(Translator):
|
class MDTranslator(AiTranslator):
|
||||||
def __init__(self, config: MDTranslatorConfig):
|
def __init__(self, config: MDTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
|
|||||||
@@ -6,8 +6,7 @@ import srt # 导入srt库来处理字幕文件
|
|||||||
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
from docutranslate.translator.base import Translator
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -16,7 +15,7 @@ class SrtTranslatorConfig(AiTranslatorConfig):
|
|||||||
separator: str = "\n"
|
separator: str = "\n"
|
||||||
|
|
||||||
|
|
||||||
class SrtTranslator(Translator):
|
class SrtTranslator(AiTranslator):
|
||||||
"""
|
"""
|
||||||
一个用于翻译 SRT (.srt) 字幕文件的翻译器。
|
一个用于翻译 SRT (.srt) 字幕文件的翻译器。
|
||||||
它会提取每个字幕块的文本内容,进行翻译,然后根据配置将译文写回。
|
它会提取每个字幕块的文本内容,进行翻译,然后根据配置将译文写回。
|
||||||
|
|||||||
@@ -3,8 +3,7 @@ from typing import Self
|
|||||||
|
|
||||||
from docutranslate.agents.txt_agent import TXTTranslateAgent, TXTTranslateAgentConfig
|
from docutranslate.agents.txt_agent import TXTTranslateAgent, TXTTranslateAgentConfig
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
from docutranslate.translator.base import Translator
|
|
||||||
from docutranslate.utils.markdown_splitter import split_markdown_text
|
from docutranslate.utils.markdown_splitter import split_markdown_text
|
||||||
|
|
||||||
|
|
||||||
@@ -13,7 +12,7 @@ class TXTTranslatorConfig(AiTranslatorConfig):
|
|||||||
...
|
...
|
||||||
|
|
||||||
|
|
||||||
class TXTTranslator(Translator):
|
class TXTTranslator(AiTranslator):
|
||||||
def __init__(self, config: TXTTranslatorConfig):
|
def __init__(self, config: TXTTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
|
|||||||
@@ -8,8 +8,7 @@ from openpyxl.cell import Cell
|
|||||||
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
from docutranslate.translator.base import Translator
|
|
||||||
|
|
||||||
|
|
||||||
@dataclass
|
@dataclass
|
||||||
@@ -23,7 +22,7 @@ class XlsxTranslatorConfig(AiTranslatorConfig):
|
|||||||
translate_regions: Optional[List[str]] = None
|
translate_regions: Optional[List[str]] = None
|
||||||
|
|
||||||
|
|
||||||
class XlsxTranslator(Translator):
|
class XlsxTranslator(AiTranslator):
|
||||||
def __init__(self, config: XlsxTranslatorConfig):
|
def __init__(self, config: XlsxTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.chunk_size = config.chunk_size
|
self.chunk_size = config.chunk_size
|
||||||
|
|||||||
Reference in New Issue
Block a user