增加xlsx翻译区域选项
This commit is contained in:
@@ -252,6 +252,10 @@ class XlsxWorkflowParams(BaseWorkflowParams):
|
||||
"\n",
|
||||
description="当 insert_mode 为 'append' 或 'prepend' 时,用于分隔原文和译文的分隔符。"
|
||||
)
|
||||
translate_regions: Optional[List[str]] = Field(
|
||||
None,
|
||||
description="指定翻译区域列表。示例: ['Sheet1!A1:B10', 'C:D', 'E5']。如果不指定表名 (如 'C:D'),则应用于所有表。如果为 None,则翻译整个文件中的所有文本。"
|
||||
)
|
||||
|
||||
|
||||
class DocxWorkflowParams(BaseWorkflowParams):
|
||||
@@ -367,7 +371,8 @@ class TranslateServiceRequest(BaseModel):
|
||||
"insert_mode": "append",
|
||||
"separator": " \n---翻译---\n ",
|
||||
"chunk_size": 2000,
|
||||
"concurrent": 5
|
||||
"concurrent": 5,
|
||||
"translate_regions": ["Sheet1!A1:B10", "C:D"]
|
||||
}
|
||||
}
|
||||
},
|
||||
@@ -530,7 +535,7 @@ async def _perform_translation(
|
||||
**payload.model_dump(include={
|
||||
'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt',
|
||||
'temperature', 'thinking', 'chunk_size', 'concurrent',
|
||||
'insert_mode', 'separator'
|
||||
'insert_mode', 'separator', 'translate_regions'
|
||||
}, exclude_none=True)
|
||||
)
|
||||
html_exporter_config = Xlsx2HTMLExporterConfig(cdn=True)
|
||||
|
||||
@@ -26,6 +26,8 @@
|
||||
"separatorHelp": "当插入模式为附加或前置时,用于分隔原文和译文的字符。<code>\\n</code> 代表换行。",
|
||||
"xlsxSettingsTitleText": "XLSX翻译选项",
|
||||
"insertModeHelpXlsx": "选择如何将翻译后的文本插入到单元格中。",
|
||||
"xlsxTranslateRegionsLabel": "翻译区域 (可选)",
|
||||
"xlsxTranslateRegionsPlaceholder": "每行一个区域, 例如:Sheet1!A1:B10(不指定表名则对所有表生效)",
|
||||
"srtSettingsTitleText": "SRT翻译选项",
|
||||
"insertModeHelpSrt": "选择如何将翻译后的文本插入。",
|
||||
"epubSettingsTitleText": "EPUB翻译选项",
|
||||
@@ -175,6 +177,8 @@
|
||||
"separatorHelp": "Characters to separate original and translated text in append/prepend modes. <code>\\n</code> for new line.",
|
||||
"xlsxSettingsTitleText": "XLSX Translation Options",
|
||||
"insertModeHelpXlsx": "Choose how to insert translated text into cells.",
|
||||
"xlsxTranslateRegionsLabel": "Translation area (optional)",
|
||||
"xlsxTranslateRegionsPlaceholder": "One area per line, for example: Sheet1!A1:B10 (if no sheet name is specified, it applies to all sheets)",
|
||||
"srtSettingsTitleText": "SRT Translation Options",
|
||||
"insertModeHelpSrt": "Choose how to insert the translated text.",
|
||||
"epubSettingsTitleText": "EPUB Translation Options",
|
||||
|
||||
File diff suppressed because one or more lines are too long
@@ -1,9 +1,10 @@
|
||||
import asyncio
|
||||
from dataclasses import dataclass
|
||||
from dataclasses import dataclass, field
|
||||
from io import BytesIO
|
||||
from typing import Self, Literal
|
||||
from typing import Self, Literal, List, Optional
|
||||
|
||||
import openpyxl
|
||||
from openpyxl.cell import Cell
|
||||
|
||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||
from docutranslate.ir.document import Document
|
||||
@@ -15,6 +16,11 @@ from docutranslate.translator.base import Translator
|
||||
class XlsxTranslatorConfig(AiTranslatorConfig):
|
||||
insert_mode: Literal["replace", "append", "prepend"] = "replace"
|
||||
separator: str = "\n"
|
||||
# 指定翻译区域列表。
|
||||
# 示例: ["Sheet1!A1:B10", "C:D", "E5"]
|
||||
# 如果不指定表名 (如 "C:D"),则应用于所有表。
|
||||
# 如果为 None 或空列表,则翻译整个文件中的所有文本。
|
||||
translate_regions: Optional[List[str]] = None
|
||||
|
||||
|
||||
class XlsxTranslator(Translator):
|
||||
@@ -35,25 +41,80 @@ class XlsxTranslator(Translator):
|
||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||
self.insert_mode = config.insert_mode
|
||||
self.separator = config.separator
|
||||
# --- 新增功能 ---
|
||||
self.translate_regions = config.translate_regions
|
||||
|
||||
def _pre_translate(self, document: Document):
|
||||
workbook = openpyxl.load_workbook(BytesIO(document.content))
|
||||
|
||||
# --- 步骤 1: 收集所有需要翻译的文本单元格 ---
|
||||
cells_to_translate = []
|
||||
|
||||
for sheet_name in workbook.sheetnames:
|
||||
sheet = workbook[sheet_name]
|
||||
for row in sheet.iter_rows():
|
||||
for cell in row:
|
||||
# 关键判断:值是字符串(str) 且 数据类型是 's' (string),以排除公式('f')
|
||||
if isinstance(cell.value, str) and cell.data_type == "s":
|
||||
cell_info = {
|
||||
"sheet_name": sheet_name,
|
||||
"coordinate": cell.coordinate,
|
||||
"original_text": cell.value,
|
||||
}
|
||||
cells_to_translate.append(cell_info)
|
||||
# --- 步骤 1: 根据是否指定区域,收集需要翻译的文本单元格 ---
|
||||
|
||||
# 如果未指定翻译区域,则沿用旧逻辑,翻译所有单元格
|
||||
if self.translate_regions is None:
|
||||
for sheet in workbook.worksheets:
|
||||
for row in sheet.iter_rows():
|
||||
for cell in row:
|
||||
if isinstance(cell.value, str) and cell.data_type == "s":
|
||||
cells_to_translate.append({
|
||||
"sheet_name": sheet.title,
|
||||
"coordinate": cell.coordinate,
|
||||
"original_text": cell.value,
|
||||
})
|
||||
# 如果指定了翻译区域,则只在这些区域内查找
|
||||
else:
|
||||
# 用于防止重叠区域导致重复翻译
|
||||
processed_coordinates = set()
|
||||
|
||||
# 1. 解析区域,区分“全局区域”和“指定工作表区域”
|
||||
regions_by_sheet = {}
|
||||
all_sheet_regions = []
|
||||
for region in self.translate_regions:
|
||||
if '!' in region:
|
||||
sheet_name, cell_range = region.split('!', 1)
|
||||
if sheet_name not in regions_by_sheet:
|
||||
regions_by_sheet[sheet_name] = []
|
||||
regions_by_sheet[sheet_name].append(cell_range)
|
||||
else:
|
||||
all_sheet_regions.append(region)
|
||||
|
||||
# 2. 遍历工作表,应用区域规则
|
||||
for sheet in workbook.worksheets:
|
||||
# 获取当前工作表的“指定区域”和“全局区域”
|
||||
sheet_specific_ranges = regions_by_sheet.get(sheet.title, [])
|
||||
total_ranges_for_this_sheet = sheet_specific_ranges + all_sheet_regions
|
||||
|
||||
if not total_ranges_for_this_sheet:
|
||||
continue
|
||||
|
||||
# 3. 遍历区域内的单元格
|
||||
for cell_range in total_ranges_for_this_sheet:
|
||||
try:
|
||||
# sheet[cell_range] 可以获取单个单元格或一个元组的元组
|
||||
cells_in_range = sheet[cell_range]
|
||||
if isinstance(cells_in_range, Cell):
|
||||
# 将单个单元格包装成与多单元格范围一致的结构
|
||||
cells_in_range = ((cells_in_range,),)
|
||||
|
||||
for row_of_cells in cells_in_range:
|
||||
for cell in row_of_cells:
|
||||
full_coordinate = (sheet.title, cell.coordinate)
|
||||
# 如果该单元格已处理,则跳过
|
||||
if full_coordinate in processed_coordinates:
|
||||
continue
|
||||
|
||||
# 关键判断:值是字符串(str) 且 数据类型是 's' (string)
|
||||
if isinstance(cell.value, str) and cell.data_type == "s":
|
||||
cell_info = {
|
||||
"sheet_name": sheet.title,
|
||||
"coordinate": cell.coordinate,
|
||||
"original_text": cell.value,
|
||||
}
|
||||
cells_to_translate.append(cell_info)
|
||||
processed_coordinates.add(full_coordinate)
|
||||
except Exception as e:
|
||||
self.logger.warning(f"跳过无效的区域 '{cell_range}' 在工作表 '{sheet.title}'. 错误: {e}")
|
||||
|
||||
# 提取所有原文文本,准备进行批量翻译
|
||||
original_texts = [cell["original_text"] for cell in cells_to_translate]
|
||||
return workbook, cells_to_translate, original_texts
|
||||
@@ -88,7 +149,7 @@ class XlsxTranslator(Translator):
|
||||
|
||||
workbook, cells_to_translate, original_texts = self._pre_translate(document)
|
||||
if not cells_to_translate:
|
||||
print("\n文件中没有找到需要翻译的纯文本内容。")
|
||||
print("\n在指定区域中没有找到需要翻译的纯文本内容。")
|
||||
workbook.close()
|
||||
return self
|
||||
# --- 步骤 2: 调用翻译函数 ---
|
||||
@@ -101,7 +162,7 @@ class XlsxTranslator(Translator):
|
||||
|
||||
workbook, cells_to_translate, original_texts = await asyncio.to_thread(self._pre_translate, document)
|
||||
if not cells_to_translate:
|
||||
print("\n文件中没有找到需要翻译的纯文本内容。")
|
||||
print("\n在指定区域中没有找到需要翻译的纯文本内容。")
|
||||
workbook.close()
|
||||
return self
|
||||
# --- 步骤 2: 调用翻译函数 ---
|
||||
@@ -109,5 +170,4 @@ class XlsxTranslator(Translator):
|
||||
|
||||
document.content = await asyncio.to_thread(self._after_translate, workbook, cells_to_translate,
|
||||
translated_texts, original_texts)
|
||||
return self
|
||||
|
||||
return self
|
||||
Reference in New Issue
Block a user