增加xlsx翻译区域选项
This commit is contained in:
@@ -252,6 +252,10 @@ class XlsxWorkflowParams(BaseWorkflowParams):
|
|||||||
"\n",
|
"\n",
|
||||||
description="当 insert_mode 为 'append' 或 'prepend' 时,用于分隔原文和译文的分隔符。"
|
description="当 insert_mode 为 'append' 或 'prepend' 时,用于分隔原文和译文的分隔符。"
|
||||||
)
|
)
|
||||||
|
translate_regions: Optional[List[str]] = Field(
|
||||||
|
None,
|
||||||
|
description="指定翻译区域列表。示例: ['Sheet1!A1:B10', 'C:D', 'E5']。如果不指定表名 (如 'C:D'),则应用于所有表。如果为 None,则翻译整个文件中的所有文本。"
|
||||||
|
)
|
||||||
|
|
||||||
|
|
||||||
class DocxWorkflowParams(BaseWorkflowParams):
|
class DocxWorkflowParams(BaseWorkflowParams):
|
||||||
@@ -367,7 +371,8 @@ class TranslateServiceRequest(BaseModel):
|
|||||||
"insert_mode": "append",
|
"insert_mode": "append",
|
||||||
"separator": " \n---翻译---\n ",
|
"separator": " \n---翻译---\n ",
|
||||||
"chunk_size": 2000,
|
"chunk_size": 2000,
|
||||||
"concurrent": 5
|
"concurrent": 5,
|
||||||
|
"translate_regions": ["Sheet1!A1:B10", "C:D"]
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
},
|
},
|
||||||
@@ -530,7 +535,7 @@ async def _perform_translation(
|
|||||||
**payload.model_dump(include={
|
**payload.model_dump(include={
|
||||||
'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt',
|
'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt',
|
||||||
'temperature', 'thinking', 'chunk_size', 'concurrent',
|
'temperature', 'thinking', 'chunk_size', 'concurrent',
|
||||||
'insert_mode', 'separator'
|
'insert_mode', 'separator', 'translate_regions'
|
||||||
}, exclude_none=True)
|
}, exclude_none=True)
|
||||||
)
|
)
|
||||||
html_exporter_config = Xlsx2HTMLExporterConfig(cdn=True)
|
html_exporter_config = Xlsx2HTMLExporterConfig(cdn=True)
|
||||||
|
|||||||
@@ -26,6 +26,8 @@
|
|||||||
"separatorHelp": "当插入模式为附加或前置时,用于分隔原文和译文的字符。<code>\\n</code> 代表换行。",
|
"separatorHelp": "当插入模式为附加或前置时,用于分隔原文和译文的字符。<code>\\n</code> 代表换行。",
|
||||||
"xlsxSettingsTitleText": "XLSX翻译选项",
|
"xlsxSettingsTitleText": "XLSX翻译选项",
|
||||||
"insertModeHelpXlsx": "选择如何将翻译后的文本插入到单元格中。",
|
"insertModeHelpXlsx": "选择如何将翻译后的文本插入到单元格中。",
|
||||||
|
"xlsxTranslateRegionsLabel": "翻译区域 (可选)",
|
||||||
|
"xlsxTranslateRegionsPlaceholder": "每行一个区域, 例如:Sheet1!A1:B10(不指定表名则对所有表生效)",
|
||||||
"srtSettingsTitleText": "SRT翻译选项",
|
"srtSettingsTitleText": "SRT翻译选项",
|
||||||
"insertModeHelpSrt": "选择如何将翻译后的文本插入。",
|
"insertModeHelpSrt": "选择如何将翻译后的文本插入。",
|
||||||
"epubSettingsTitleText": "EPUB翻译选项",
|
"epubSettingsTitleText": "EPUB翻译选项",
|
||||||
@@ -175,6 +177,8 @@
|
|||||||
"separatorHelp": "Characters to separate original and translated text in append/prepend modes. <code>\\n</code> for new line.",
|
"separatorHelp": "Characters to separate original and translated text in append/prepend modes. <code>\\n</code> for new line.",
|
||||||
"xlsxSettingsTitleText": "XLSX Translation Options",
|
"xlsxSettingsTitleText": "XLSX Translation Options",
|
||||||
"insertModeHelpXlsx": "Choose how to insert translated text into cells.",
|
"insertModeHelpXlsx": "Choose how to insert translated text into cells.",
|
||||||
|
"xlsxTranslateRegionsLabel": "Translation area (optional)",
|
||||||
|
"xlsxTranslateRegionsPlaceholder": "One area per line, for example: Sheet1!A1:B10 (if no sheet name is specified, it applies to all sheets)",
|
||||||
"srtSettingsTitleText": "SRT Translation Options",
|
"srtSettingsTitleText": "SRT Translation Options",
|
||||||
"insertModeHelpSrt": "Choose how to insert the translated text.",
|
"insertModeHelpSrt": "Choose how to insert the translated text.",
|
||||||
"epubSettingsTitleText": "EPUB Translation Options",
|
"epubSettingsTitleText": "EPUB Translation Options",
|
||||||
|
|||||||
File diff suppressed because one or more lines are too long
@@ -1,9 +1,10 @@
|
|||||||
import asyncio
|
import asyncio
|
||||||
from dataclasses import dataclass
|
from dataclasses import dataclass, field
|
||||||
from io import BytesIO
|
from io import BytesIO
|
||||||
from typing import Self, Literal
|
from typing import Self, Literal, List, Optional
|
||||||
|
|
||||||
import openpyxl
|
import openpyxl
|
||||||
|
from openpyxl.cell import Cell
|
||||||
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
@@ -15,6 +16,11 @@ from docutranslate.translator.base import Translator
|
|||||||
class XlsxTranslatorConfig(AiTranslatorConfig):
|
class XlsxTranslatorConfig(AiTranslatorConfig):
|
||||||
insert_mode: Literal["replace", "append", "prepend"] = "replace"
|
insert_mode: Literal["replace", "append", "prepend"] = "replace"
|
||||||
separator: str = "\n"
|
separator: str = "\n"
|
||||||
|
# 指定翻译区域列表。
|
||||||
|
# 示例: ["Sheet1!A1:B10", "C:D", "E5"]
|
||||||
|
# 如果不指定表名 (如 "C:D"),则应用于所有表。
|
||||||
|
# 如果为 None 或空列表,则翻译整个文件中的所有文本。
|
||||||
|
translate_regions: Optional[List[str]] = None
|
||||||
|
|
||||||
|
|
||||||
class XlsxTranslator(Translator):
|
class XlsxTranslator(Translator):
|
||||||
@@ -35,25 +41,80 @@ class XlsxTranslator(Translator):
|
|||||||
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
self.translate_agent = SegmentsTranslateAgent(agent_config)
|
||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
|
# --- 新增功能 ---
|
||||||
|
self.translate_regions = config.translate_regions
|
||||||
|
|
||||||
def _pre_translate(self, document: Document):
|
def _pre_translate(self, document: Document):
|
||||||
workbook = openpyxl.load_workbook(BytesIO(document.content))
|
workbook = openpyxl.load_workbook(BytesIO(document.content))
|
||||||
|
|
||||||
# --- 步骤 1: 收集所有需要翻译的文本单元格 ---
|
|
||||||
cells_to_translate = []
|
cells_to_translate = []
|
||||||
|
|
||||||
for sheet_name in workbook.sheetnames:
|
# --- 步骤 1: 根据是否指定区域,收集需要翻译的文本单元格 ---
|
||||||
sheet = workbook[sheet_name]
|
|
||||||
|
# 如果未指定翻译区域,则沿用旧逻辑,翻译所有单元格
|
||||||
|
if self.translate_regions is None:
|
||||||
|
for sheet in workbook.worksheets:
|
||||||
for row in sheet.iter_rows():
|
for row in sheet.iter_rows():
|
||||||
for cell in row:
|
for cell in row:
|
||||||
# 关键判断:值是字符串(str) 且 数据类型是 's' (string),以排除公式('f')
|
if isinstance(cell.value, str) and cell.data_type == "s":
|
||||||
|
cells_to_translate.append({
|
||||||
|
"sheet_name": sheet.title,
|
||||||
|
"coordinate": cell.coordinate,
|
||||||
|
"original_text": cell.value,
|
||||||
|
})
|
||||||
|
# 如果指定了翻译区域,则只在这些区域内查找
|
||||||
|
else:
|
||||||
|
# 用于防止重叠区域导致重复翻译
|
||||||
|
processed_coordinates = set()
|
||||||
|
|
||||||
|
# 1. 解析区域,区分“全局区域”和“指定工作表区域”
|
||||||
|
regions_by_sheet = {}
|
||||||
|
all_sheet_regions = []
|
||||||
|
for region in self.translate_regions:
|
||||||
|
if '!' in region:
|
||||||
|
sheet_name, cell_range = region.split('!', 1)
|
||||||
|
if sheet_name not in regions_by_sheet:
|
||||||
|
regions_by_sheet[sheet_name] = []
|
||||||
|
regions_by_sheet[sheet_name].append(cell_range)
|
||||||
|
else:
|
||||||
|
all_sheet_regions.append(region)
|
||||||
|
|
||||||
|
# 2. 遍历工作表,应用区域规则
|
||||||
|
for sheet in workbook.worksheets:
|
||||||
|
# 获取当前工作表的“指定区域”和“全局区域”
|
||||||
|
sheet_specific_ranges = regions_by_sheet.get(sheet.title, [])
|
||||||
|
total_ranges_for_this_sheet = sheet_specific_ranges + all_sheet_regions
|
||||||
|
|
||||||
|
if not total_ranges_for_this_sheet:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 3. 遍历区域内的单元格
|
||||||
|
for cell_range in total_ranges_for_this_sheet:
|
||||||
|
try:
|
||||||
|
# sheet[cell_range] 可以获取单个单元格或一个元组的元组
|
||||||
|
cells_in_range = sheet[cell_range]
|
||||||
|
if isinstance(cells_in_range, Cell):
|
||||||
|
# 将单个单元格包装成与多单元格范围一致的结构
|
||||||
|
cells_in_range = ((cells_in_range,),)
|
||||||
|
|
||||||
|
for row_of_cells in cells_in_range:
|
||||||
|
for cell in row_of_cells:
|
||||||
|
full_coordinate = (sheet.title, cell.coordinate)
|
||||||
|
# 如果该单元格已处理,则跳过
|
||||||
|
if full_coordinate in processed_coordinates:
|
||||||
|
continue
|
||||||
|
|
||||||
|
# 关键判断:值是字符串(str) 且 数据类型是 's' (string)
|
||||||
if isinstance(cell.value, str) and cell.data_type == "s":
|
if isinstance(cell.value, str) and cell.data_type == "s":
|
||||||
cell_info = {
|
cell_info = {
|
||||||
"sheet_name": sheet_name,
|
"sheet_name": sheet.title,
|
||||||
"coordinate": cell.coordinate,
|
"coordinate": cell.coordinate,
|
||||||
"original_text": cell.value,
|
"original_text": cell.value,
|
||||||
}
|
}
|
||||||
cells_to_translate.append(cell_info)
|
cells_to_translate.append(cell_info)
|
||||||
|
processed_coordinates.add(full_coordinate)
|
||||||
|
except Exception as e:
|
||||||
|
self.logger.warning(f"跳过无效的区域 '{cell_range}' 在工作表 '{sheet.title}'. 错误: {e}")
|
||||||
|
|
||||||
# 提取所有原文文本,准备进行批量翻译
|
# 提取所有原文文本,准备进行批量翻译
|
||||||
original_texts = [cell["original_text"] for cell in cells_to_translate]
|
original_texts = [cell["original_text"] for cell in cells_to_translate]
|
||||||
return workbook, cells_to_translate, original_texts
|
return workbook, cells_to_translate, original_texts
|
||||||
@@ -88,7 +149,7 @@ class XlsxTranslator(Translator):
|
|||||||
|
|
||||||
workbook, cells_to_translate, original_texts = self._pre_translate(document)
|
workbook, cells_to_translate, original_texts = self._pre_translate(document)
|
||||||
if not cells_to_translate:
|
if not cells_to_translate:
|
||||||
print("\n文件中没有找到需要翻译的纯文本内容。")
|
print("\n在指定区域中没有找到需要翻译的纯文本内容。")
|
||||||
workbook.close()
|
workbook.close()
|
||||||
return self
|
return self
|
||||||
# --- 步骤 2: 调用翻译函数 ---
|
# --- 步骤 2: 调用翻译函数 ---
|
||||||
@@ -101,7 +162,7 @@ class XlsxTranslator(Translator):
|
|||||||
|
|
||||||
workbook, cells_to_translate, original_texts = await asyncio.to_thread(self._pre_translate, document)
|
workbook, cells_to_translate, original_texts = await asyncio.to_thread(self._pre_translate, document)
|
||||||
if not cells_to_translate:
|
if not cells_to_translate:
|
||||||
print("\n文件中没有找到需要翻译的纯文本内容。")
|
print("\n在指定区域中没有找到需要翻译的纯文本内容。")
|
||||||
workbook.close()
|
workbook.close()
|
||||||
return self
|
return self
|
||||||
# --- 步骤 2: 调用翻译函数 ---
|
# --- 步骤 2: 调用翻译函数 ---
|
||||||
@@ -110,4 +171,3 @@ class XlsxTranslator(Translator):
|
|||||||
document.content = await asyncio.to_thread(self._after_translate, workbook, cells_to_translate,
|
document.content = await asyncio.to_thread(self._after_translate, workbook, cells_to_translate,
|
||||||
translated_texts, original_texts)
|
translated_texts, original_texts)
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user