自动生成术语表不覆盖用户术语表,最终下载的是合并术语表

This commit is contained in:
xunbu
2026-01-05 23:58:57 +08:00
parent ea24f7db31
commit 8127ef8267
12 changed files with 84 additions and 42 deletions

View File

@@ -75,4 +75,5 @@ You are a professional machine translation engine.
if self.glossary_dict is None: if self.glossary_dict is None:
self.glossary_dict = {} self.glossary_dict = {}
if update_dict is not None: if update_dict is not None:
self.glossary_dict = update_dict | self.glossary_dict # 以用户上传的术语表为主,自动生成的术语表只添加用户没有的术语
self.glossary_dict = self.glossary_dict | update_dict

View File

@@ -276,4 +276,5 @@ class SegmentsTranslateAgent(Agent):
if self.glossary_dict is None: if self.glossary_dict is None:
self.glossary_dict = {} self.glossary_dict = {}
if update_dict is not None: if update_dict is not None:
self.glossary_dict = update_dict | self.glossary_dict # 以用户上传的术语表为主,自动生成的术语表只添加用户没有的术语
self.glossary_dict = self.glossary_dict | update_dict

View File

@@ -42,16 +42,20 @@ class AssWorkflow(Workflow[AssWorkflowConfig, Document, Document], HTMLExportabl
def translate(self) -> Self: def translate(self) -> Self:
document, translator=self._pre_translate(self.document_original) document, translator=self._pre_translate(self.document_original)
translator.translate(document) translator.translate(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self
async def translate_async(self) -> Self: async def translate_async(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document) await translator.translate_async(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self

View File

@@ -41,16 +41,20 @@ class DocxWorkflow(Workflow[DocxWorkflowConfig, Document, Document], HTMLExporta
def translate(self) -> Self: def translate(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
translator.translate(document) translator.translate(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self
async def translate_async(self) -> Self: async def translate_async(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document) await translator.translate_async(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self

View File

@@ -39,16 +39,20 @@ class EpubWorkflow(Workflow[EpubWorkflowConfig, Document, Document], HTMLExporta
def translate(self) -> Self: def translate(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
translator.translate(document) translator.translate(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self
async def translate_async(self) -> Self: async def translate_async(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document) await translator.translate_async(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self

View File

@@ -37,16 +37,20 @@ class HtmlWorkflow(Workflow[HtmlWorkflowConfig, Document, Document], HTMLExporta
def translate(self) -> Self: def translate(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
translator.translate(document) translator.translate(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self
async def translate_async(self) -> Self: async def translate_async(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document) await translator.translate_async(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self

View File

@@ -38,16 +38,20 @@ class JsonWorkflow(Workflow[JsonWorkflowConfig, Document, Document], HTMLExporta
def translate(self) -> Self: def translate(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
translator.translate(document) translator.translate(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self
async def translate_async(self) -> Self: async def translate_async(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document) await translator.translate_async(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self

View File

@@ -108,8 +108,10 @@ class MarkdownBasedWorkflow(Workflow[MarkdownBasedWorkflowConfig, Document, Mark
convert_engine, convert_config, translator_config, translator = self._pre_translate(self.document_original) convert_engine, convert_config, translator_config, translator = self._pre_translate(self.document_original)
document_md = self._get_document_md(convert_engine, convert_config) document_md = self._get_document_md(convert_engine, convert_config)
translator.translate(document_md) translator.translate(document_md)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document_md self.document_translated = document_md
return self return self
@@ -117,8 +119,10 @@ class MarkdownBasedWorkflow(Workflow[MarkdownBasedWorkflowConfig, Document, Mark
convert_engine, convert_config, translator_config, translator = self._pre_translate(self.document_original) convert_engine, convert_config, translator_config, translator = self._pre_translate(self.document_original)
document_md = await asyncio.to_thread(self._get_document_md, convert_engine, convert_config) document_md = await asyncio.to_thread(self._get_document_md, convert_engine, convert_config)
await translator.translate_async(document_md) await translator.translate_async(document_md)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document_md self.document_translated = document_md
return self return self

View File

@@ -41,16 +41,20 @@ class PPTXWorkflow(Workflow[PPTXWorkflowConfig, Document, Document], HTMLExporta
def translate(self) -> Self: def translate(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
translator.translate(document) translator.translate(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self
async def translate_async(self) -> Self: async def translate_async(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document) await translator.translate_async(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self

View File

@@ -39,16 +39,20 @@ class SrtWorkflow(Workflow[SrtWorkflowConfig, Document, Document], HTMLExportabl
def translate(self) -> Self: def translate(self) -> Self:
document, translator=self._pre_translate(self.document_original) document, translator=self._pre_translate(self.document_original)
translator.translate(document) translator.translate(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self
async def translate_async(self) -> Self: async def translate_async(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document) await translator.translate_async(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self

View File

@@ -39,16 +39,20 @@ class TXTWorkflow(Workflow[TXTWorkflowConfig, Document, Document], HTMLExportabl
def translate(self) -> Self: def translate(self) -> Self:
document, translator=self._pre_translate(self.document_original) document, translator=self._pre_translate(self.document_original)
translator.translate(document) translator.translate(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self
async def translate_async(self) -> Self: async def translate_async(self) -> Self:
document, translator = self._pre_translate(self.document_original) document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document) await translator.translate_async(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self

View File

@@ -62,8 +62,10 @@ class XlsxWorkflow(Workflow[XlsxWorkflowConfig, Document, Document], HTMLExporta
document_xlsx = self._get_document_xlsx(self.document_original) document_xlsx = self._get_document_xlsx(self.document_original)
document, translator = self._pre_translate(document_xlsx) document, translator = self._pre_translate(document_xlsx)
translator.translate(document) translator.translate(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self
@@ -71,8 +73,10 @@ class XlsxWorkflow(Workflow[XlsxWorkflowConfig, Document, Document], HTMLExporta
document_xlsx = await asyncio.to_thread(self._get_document_xlsx, self.document_original) document_xlsx = await asyncio.to_thread(self._get_document_xlsx, self.document_original)
document, translator = self._pre_translate(document_xlsx) document, translator = self._pre_translate(document_xlsx)
await translator.translate_async(document) await translator.translate_async(document)
if translator.glossary_dict_gen: # 使用合并后的术语表(用户上传 + 自动生成)
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) merged_glossary = getattr(translator.translate_agent, 'glossary_dict', None) or translator.glossary_dict_gen
if merged_glossary:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(merged_glossary))
self.document_translated = document self.document_translated = document
return self return self