diff --git a/docutranslate/glossary/glossary.py b/docutranslate/glossary/glossary.py index b1c39d2..513e55e 100644 --- a/docutranslate/glossary/glossary.py +++ b/docutranslate/glossary/glossary.py @@ -1,3 +1,6 @@ +from docutranslate.ir.document import Document + + class Glossary: def __init__(self,glossary_dict:dict[str:str]=None): self.glossary_dict=glossary_dict @@ -19,3 +22,9 @@ class Glossary: return prompt else: return "" + @staticmethod + def glossary_dict2csv(glossary_dict: dict[str, str], seperator=",", stem="glossary_gen") -> Document: + content = f"src{seperator}dst\n" + for src, dst in glossary_dict.items(): + content += f"{src}{seperator}{dst}\n" + return Document.from_bytes(content=content.encode("utf-8"), suffix=".csv", stem=stem) \ No newline at end of file diff --git a/docutranslate/ir/attachment.py b/docutranslate/ir/attachment.py new file mode 100644 index 0000000..0cd69ad --- /dev/null +++ b/docutranslate/ir/attachment.py @@ -0,0 +1,13 @@ +from typing import Literal + +from docutranslate.ir.document import Document + +AttachMentIdentifier = Literal["glossary"] + + +class AttachMent: + def __init__(self): + self.attachment_dict: dict[AttachMentIdentifier, Document] = {} + + def add_attachment(self, identifier: AttachMentIdentifier, document: Document): + self.attachment_dict[identifier] = document diff --git a/docutranslate/translator/ai_translator/base.py b/docutranslate/translator/ai_translator/base.py index bd674b0..6050417 100644 --- a/docutranslate/translator/ai_translator/base.py +++ b/docutranslate/translator/ai_translator/base.py @@ -36,6 +36,7 @@ class AiTranslator(Translator[T]): def __init__(self, config: AiTranslatorConfig): super().__init__(config=config) self.glossary_agent = None + self.glossary_dict_gen = None if config.glossary_generate_enable: if config.glossary_agent_config: self.glossary_agent = GlossaryAgent(config.glossary_agent_config) @@ -52,7 +53,8 @@ class AiTranslator(Translator[T]): logger=self.logger, ) self.glossary_agent = GlossaryAgent(glossary_agent_config) - + def get_glossary_dict(self): + return self.glossary_dict_gen @abstractmethod def translate(self, document: T) -> Document: ... diff --git a/docutranslate/translator/ai_translator/docx_translator.py b/docutranslate/translator/ai_translator/docx_translator.py index b3db35c..e121235 100644 --- a/docutranslate/translator/ai_translator/docx_translator.py +++ b/docutranslate/translator/ai_translator/docx_translator.py @@ -8,7 +8,6 @@ from docx.document import Document as DocumentObject from docx.text.paragraph import Paragraph from docx.text.run import Run -from docutranslate.agents.glossary_agent import GlossaryAgent, GlossaryAgentConfig from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent from docutranslate.ir.document import Document from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator @@ -160,8 +159,8 @@ class DocxTranslator(AiTranslator): return self if self.glossary_agent: - glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) # 调用翻译 agent translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size) @@ -184,8 +183,8 @@ class DocxTranslator(AiTranslator): return self if self.glossary_agent: - glossary_dict = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) # 异步调用翻译 agent translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size) diff --git a/docutranslate/translator/ai_translator/epub_translator.py b/docutranslate/translator/ai_translator/epub_translator.py index a2518c2..d54ce89 100644 --- a/docutranslate/translator/ai_translator/epub_translator.py +++ b/docutranslate/translator/ai_translator/epub_translator.py @@ -8,7 +8,6 @@ from typing import Self, Literal, List, Dict, Any from bs4 import BeautifulSoup -from docutranslate.agents.glossary_agent import GlossaryAgent, GlossaryAgentConfig from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent from docutranslate.ir.document import Document from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator @@ -40,7 +39,6 @@ class EpubTranslator(AiTranslator): self.insert_mode = config.insert_mode self.separator = config.separator - def _pre_translate(self, document: Document) -> tuple[ Dict[str, bytes], List[Dict[str, Any]], List[str] ]: @@ -176,8 +174,8 @@ class EpubTranslator(AiTranslator): self.logger.info("\n文件中没有找到需要翻译的纯文本内容。") return self if self.glossary_agent: - glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size) document.content = self._after_translate( all_files, items_to_translate, translated_texts, original_texts @@ -196,8 +194,8 @@ class EpubTranslator(AiTranslator): return self if self.glossary_agent: - glossary_dict = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) translated_texts = await self.translate_agent.send_segments_async( original_texts, self.chunk_size diff --git a/docutranslate/translator/ai_translator/html_translator.py b/docutranslate/translator/ai_translator/html_translator.py index 05b050d..ecf0406 100644 --- a/docutranslate/translator/ai_translator/html_translator.py +++ b/docutranslate/translator/ai_translator/html_translator.py @@ -198,8 +198,8 @@ class HtmlTranslator(AiTranslator): return self if self.glossary_agent: - glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size) document.content = self._after_translate(soup, translatable_items, translated_texts, original_texts) diff --git a/docutranslate/translator/ai_translator/json_translator.py b/docutranslate/translator/ai_translator/json_translator.py index 606266d..db0749b 100644 --- a/docutranslate/translator/ai_translator/json_translator.py +++ b/docutranslate/translator/ai_translator/json_translator.py @@ -75,8 +75,8 @@ class JsonTranslator(AiTranslator): original_texts = [match.value for match in all_matches] if self.glossary_agent: - glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) # 步骤 2: 批量翻译提取出的文本 translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size) @@ -106,8 +106,8 @@ class JsonTranslator(AiTranslator): original_texts = [match.value for match in all_matches] if self.glossary_agent: - glossary_dict = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) # 步骤 2: 批量翻译提取出的文本 translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size) diff --git a/docutranslate/translator/ai_translator/md_translator.py b/docutranslate/translator/ai_translator/md_translator.py index 7fc76a0..90c4cf8 100644 --- a/docutranslate/translator/ai_translator/md_translator.py +++ b/docutranslate/translator/ai_translator/md_translator.py @@ -37,8 +37,8 @@ class MDTranslator(AiTranslator): with MDMaskUrisContext(document): chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size) if self.glossary_agent: - glossary_dict = self.glossary_agent.send_segments(chunks, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = self.glossary_agent.send_segments(chunks, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) self.logger.info(f"markdown分为{len(chunks)}块") result: list[str] = self.translate_agent.send_chunks(chunks) content = join_markdown_texts(result) @@ -56,8 +56,8 @@ class MDTranslator(AiTranslator): chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size) if self.glossary_agent: - glossary_dict = await self.glossary_agent.send_segments_async(chunks, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = await self.glossary_agent.send_segments_async(chunks, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) self.logger.info(f"markdown分为{len(chunks)}块") result: list[str] = await self.translate_agent.send_chunks_async(chunks) diff --git a/docutranslate/translator/ai_translator/srt_translator.py b/docutranslate/translator/ai_translator/srt_translator.py index a0f9783..7660b7e 100644 --- a/docutranslate/translator/ai_translator/srt_translator.py +++ b/docutranslate/translator/ai_translator/srt_translator.py @@ -4,7 +4,6 @@ from typing import Self, Literal import srt # 导入srt库来处理字幕文件 -from docutranslate.agents.glossary_agent import GlossaryAgentConfig, GlossaryAgent from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent from docutranslate.ir.document import Document from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator @@ -109,8 +108,8 @@ class SrtTranslator(AiTranslator): self.logger.info("\n文件中没有找到需要翻译的字幕内容。") return self if self.glossary_agent: - glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) # --- 步骤 2: 调用翻译Agent --- translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size) diff --git a/docutranslate/translator/ai_translator/txt_translator.py b/docutranslate/translator/ai_translator/txt_translator.py index 50571d7..51da2e0 100644 --- a/docutranslate/translator/ai_translator/txt_translator.py +++ b/docutranslate/translator/ai_translator/txt_translator.py @@ -33,8 +33,8 @@ class TXTTranslator(AiTranslator): self.logger.info("正在翻译txt") chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size) if self.glossary_agent: - glossary_dict = self.glossary_agent.send_segments(chunks, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = self.glossary_agent.send_segments(chunks, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) self.logger.info(f"txt分为{len(chunks)}块") result: list[str] = self.translate_agent.send_chunks(chunks) content = "\n".join(result) diff --git a/docutranslate/translator/ai_translator/xlsx_translator.py b/docutranslate/translator/ai_translator/xlsx_translator.py index 4c5b3cf..e489bcf 100644 --- a/docutranslate/translator/ai_translator/xlsx_translator.py +++ b/docutranslate/translator/ai_translator/xlsx_translator.py @@ -155,8 +155,8 @@ class XlsxTranslator(AiTranslator): workbook.close() return self if self.glossary_agent: - glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) # --- 步骤 2: 调用翻译函数 --- translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size) @@ -172,8 +172,8 @@ class XlsxTranslator(AiTranslator): return self if self.glossary_agent: - glossary_dict = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size) - self.translate_agent.update_glossary_dict(glossary_dict) + self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size) + self.translate_agent.update_glossary_dict(self.glossary_dict_gen) # --- 步骤 2: 调用翻译函数 --- translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size) diff --git a/docutranslate/workflow/base.py b/docutranslate/workflow/base.py index 5009051..0f38bb2 100644 --- a/docutranslate/workflow/base.py +++ b/docutranslate/workflow/base.py @@ -5,6 +5,7 @@ from pathlib import Path from typing import Self, Generic, TypeVar from docutranslate.exporter.base import Exporter +from docutranslate.ir.attachment import AttachMent from docutranslate.ir.document import Document @@ -24,6 +25,7 @@ class Workflow(ABC, Generic[T_Config, T_original, T_Translated]): self.logger = self.config.logger self.document_original: T_original | None = None self.document_translated: T_Translated | None = None + self.attachment = AttachMent() def read_path(self, path: Path | str) -> Self: document = Document.from_path(path) @@ -57,3 +59,6 @@ class Workflow(ABC, Generic[T_Config, T_original, T_Translated]): output_path.write_bytes(docu.content) self.logger.info(f"文件已保存到{output_path.resolve()}") return self + + def get_attachment(self): + return self.attachment diff --git a/docutranslate/workflow/docx_workflow.py b/docutranslate/workflow/docx_workflow.py index 9b7e248..5848707 100644 --- a/docutranslate/workflow/docx_workflow.py +++ b/docutranslate/workflow/docx_workflow.py @@ -5,7 +5,7 @@ from typing import Self from docutranslate.exporter.base import ExporterConfig from docutranslate.exporter.docx.docx2docx_exporter import Docx2DocxExporter from docutranslate.exporter.docx.docx2html_exporter import Docx2HTMLExporterConfig, Docx2HTMLExporter - +from docutranslate.glossary.glossary import Glossary from docutranslate.ir.document import Document from docutranslate.translator.ai_translator.docx_translator import DocxTranslatorConfig, DocxTranslator from docutranslate.workflow.base import Workflow, WorkflowConfig @@ -36,12 +36,16 @@ class DocxWorkflow(Workflow[DocxWorkflowConfig, Document, Document], HTMLExporta def translate(self) -> Self: document, translator = self._pre_translate(self.document_original) translator.translate(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self async def translate_async(self) -> Self: document, translator = self._pre_translate(self.document_original) await translator.translate_async(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self diff --git a/docutranslate/workflow/epub_workflow.py b/docutranslate/workflow/epub_workflow.py index b2333f3..c489f5a 100644 --- a/docutranslate/workflow/epub_workflow.py +++ b/docutranslate/workflow/epub_workflow.py @@ -5,6 +5,7 @@ from typing import Self from docutranslate.exporter.base import ExporterConfig from docutranslate.exporter.epub.epub2epub_exporter import Epub2EpubExporter from docutranslate.exporter.epub.epub2html_exporter import Epub2HTMLExporterConfig, Epub2HTMLExporter +from docutranslate.glossary.glossary import Glossary from docutranslate.ir.document import Document from docutranslate.translator.ai_translator.epub_translator import EpubTranslatorConfig, EpubTranslator @@ -36,12 +37,16 @@ class EpubWorkflow(Workflow[EpubWorkflowConfig, Document, Document], HTMLExporta def translate(self) -> Self: document, translator = self._pre_translate(self.document_original) translator.translate(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self async def translate_async(self) -> Self: document, translator = self._pre_translate(self.document_original) await translator.translate_async(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self diff --git a/docutranslate/workflow/html_workflow.py b/docutranslate/workflow/html_workflow.py index 54644e1..58d7113 100644 --- a/docutranslate/workflow/html_workflow.py +++ b/docutranslate/workflow/html_workflow.py @@ -4,6 +4,7 @@ from typing import Self from docutranslate.exporter.base import ExporterConfig from docutranslate.exporter.html.html2html_exporter import Html2HtmlExporter +from docutranslate.glossary.glossary import Glossary from docutranslate.ir.document import Document from docutranslate.translator.ai_translator.html_translator import HtmlTranslatorConfig, HtmlTranslator @@ -34,12 +35,16 @@ class HtmlWorkflow(Workflow[HtmlWorkflowConfig, Document, Document], HTMLExporta def translate(self) -> Self: document, translator = self._pre_translate(self.document_original) translator.translate(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self async def translate_async(self) -> Self: document, translator = self._pre_translate(self.document_original) await translator.translate_async(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self diff --git a/docutranslate/workflow/json_workflow.py b/docutranslate/workflow/json_workflow.py index 469e686..b2b2522 100644 --- a/docutranslate/workflow/json_workflow.py +++ b/docutranslate/workflow/json_workflow.py @@ -5,6 +5,7 @@ from typing import Self from docutranslate.exporter.base import ExporterConfig from docutranslate.exporter.js.json2html_exporter import Json2HTMLExporterConfig, Json2HTMLExporter from docutranslate.exporter.js.json2json_exporter import Json2JsonExporter +from docutranslate.glossary.glossary import Glossary from docutranslate.ir.document import Document from docutranslate.translator.ai_translator.json_translator import JsonTranslatorConfig, JsonTranslator from docutranslate.workflow.base import Workflow, WorkflowConfig @@ -35,12 +36,16 @@ class JsonWorkflow(Workflow[JsonWorkflowConfig, Document, Document], HTMLExporta def translate(self) -> Self: document, translator = self._pre_translate(self.document_original) translator.translate(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self async def translate_async(self) -> Self: document, translator = self._pre_translate(self.document_original) await translator.translate_async(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self diff --git a/docutranslate/workflow/md_based_workflow.py b/docutranslate/workflow/md_based_workflow.py index 14b9917..0c513f4 100644 --- a/docutranslate/workflow/md_based_workflow.py +++ b/docutranslate/workflow/md_based_workflow.py @@ -6,6 +6,7 @@ from typing import Self, Tuple, Type from docutranslate.cacher import md_based_convert_cacher from docutranslate.exporter.base import ExporterConfig from docutranslate.global_values.conditional_import import DOCLING_EXIST +from docutranslate.glossary.glossary import Glossary from docutranslate.ir.document import Document from docutranslate.ir.markdown_document import MarkdownDocument @@ -86,6 +87,8 @@ class MarkdownBasedWorkflow(Workflow[MarkdownBasedWorkflowConfig, Document, Mark convert_engine, convert_config, translator_config, translator = self._pre_translate(self.document_original) document_md = self._get_document_md(convert_engine, convert_config) translator.translate(document_md) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document_md return self @@ -93,6 +96,8 @@ class MarkdownBasedWorkflow(Workflow[MarkdownBasedWorkflowConfig, Document, Mark convert_engine, convert_config, translator_config, translator = self._pre_translate(self.document_original) document_md = await asyncio.to_thread(self._get_document_md, convert_engine, convert_config) await translator.translate_async(document_md) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document_md return self diff --git a/docutranslate/workflow/srt_workflow.py b/docutranslate/workflow/srt_workflow.py index 8afeca6..0d03130 100644 --- a/docutranslate/workflow/srt_workflow.py +++ b/docutranslate/workflow/srt_workflow.py @@ -5,6 +5,7 @@ from typing import Self from docutranslate.exporter.base import ExporterConfig from docutranslate.exporter.srt.srt2html_exporter import Srt2HTMLExporterConfig, Srt2HTMLExporter from docutranslate.exporter.srt.srt2srt_exporter import Srt2SrtExporter +from docutranslate.glossary.glossary import Glossary from docutranslate.ir.document import Document from docutranslate.translator.ai_translator.srt_translator import SrtTranslatorConfig, SrtTranslator from docutranslate.workflow.base import Workflow, WorkflowConfig @@ -36,12 +37,16 @@ class SrtWorkflow(Workflow[SrtWorkflowConfig, Document, Document], HTMLExportabl def translate(self) -> Self: document, translator=self._pre_translate(self.document_original) translator.translate(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self async def translate_async(self) -> Self: document, translator = self._pre_translate(self.document_original) await translator.translate_async(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self diff --git a/docutranslate/workflow/txt_workflow.py b/docutranslate/workflow/txt_workflow.py index da524d0..f1b5af8 100644 --- a/docutranslate/workflow/txt_workflow.py +++ b/docutranslate/workflow/txt_workflow.py @@ -5,6 +5,7 @@ from typing import Self from docutranslate.exporter.base import ExporterConfig from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig, TXT2HTMLExporter from docutranslate.exporter.txt.txt2txt_exporter import TXT2TXTExporter +from docutranslate.glossary.glossary import Glossary from docutranslate.ir.document import Document from docutranslate.translator.ai_translator.txt_translator import TXTTranslatorConfig, TXTTranslator from docutranslate.workflow.base import Workflow, WorkflowConfig @@ -36,12 +37,16 @@ class TXTWorkflow(Workflow[TXTWorkflowConfig, Document, Document], HTMLExportabl def translate(self) -> Self: document, translator=self._pre_translate(self.document_original) translator.translate(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self async def translate_async(self) -> Self: document, translator = self._pre_translate(self.document_original) await translator.translate_async(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self diff --git a/docutranslate/workflow/xlsx_workflow.py b/docutranslate/workflow/xlsx_workflow.py index e13b441..e4ca911 100644 --- a/docutranslate/workflow/xlsx_workflow.py +++ b/docutranslate/workflow/xlsx_workflow.py @@ -11,6 +11,7 @@ from docutranslate.exporter.base import ExporterConfig from docutranslate.exporter.xlsx.xlsx2csv_exporter import Xlsx2CsvExporter from docutranslate.exporter.xlsx.xlsx2html_exporter import Xlsx2HTMLExporterConfig, Xlsx2HTMLExporter from docutranslate.exporter.xlsx.xlsx2xlsx_exporter import Xlsx2XlsxExporter +from docutranslate.glossary.glossary import Glossary from docutranslate.ir.document import Document from docutranslate.translator.ai_translator.xlsx_translator import XlsxTranslatorConfig, XlsxTranslator from docutranslate.workflow.base import Workflow, WorkflowConfig @@ -59,6 +60,8 @@ class XlsxWorkflow(Workflow[XlsxWorkflowConfig, Document, Document], HTMLExporta document_xlsx = self._get_document_xlsx(self.document_original) document, translator = self._pre_translate(document_xlsx) translator.translate(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self @@ -66,6 +69,8 @@ class XlsxWorkflow(Workflow[XlsxWorkflowConfig, Document, Document], HTMLExporta document_xlsx = await asyncio.to_thread(self._get_document_xlsx, self.document_original) document, translator = self._pre_translate(document_xlsx) await translator.translate_async(document) + if translator.glossary_dict_gen: + self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen)) self.document_translated = document return self