增加附件机制
This commit is contained in:
@@ -1,3 +1,6 @@
|
|||||||
|
from docutranslate.ir.document import Document
|
||||||
|
|
||||||
|
|
||||||
class Glossary:
|
class Glossary:
|
||||||
def __init__(self,glossary_dict:dict[str:str]=None):
|
def __init__(self,glossary_dict:dict[str:str]=None):
|
||||||
self.glossary_dict=glossary_dict
|
self.glossary_dict=glossary_dict
|
||||||
@@ -19,3 +22,9 @@ class Glossary:
|
|||||||
return prompt
|
return prompt
|
||||||
else:
|
else:
|
||||||
return ""
|
return ""
|
||||||
|
@staticmethod
|
||||||
|
def glossary_dict2csv(glossary_dict: dict[str, str], seperator=",", stem="glossary_gen") -> Document:
|
||||||
|
content = f"src{seperator}dst\n"
|
||||||
|
for src, dst in glossary_dict.items():
|
||||||
|
content += f"{src}{seperator}{dst}\n"
|
||||||
|
return Document.from_bytes(content=content.encode("utf-8"), suffix=".csv", stem=stem)
|
||||||
13
docutranslate/ir/attachment.py
Normal file
13
docutranslate/ir/attachment.py
Normal file
@@ -0,0 +1,13 @@
|
|||||||
|
from typing import Literal
|
||||||
|
|
||||||
|
from docutranslate.ir.document import Document
|
||||||
|
|
||||||
|
AttachMentIdentifier = Literal["glossary"]
|
||||||
|
|
||||||
|
|
||||||
|
class AttachMent:
|
||||||
|
def __init__(self):
|
||||||
|
self.attachment_dict: dict[AttachMentIdentifier, Document] = {}
|
||||||
|
|
||||||
|
def add_attachment(self, identifier: AttachMentIdentifier, document: Document):
|
||||||
|
self.attachment_dict[identifier] = document
|
||||||
@@ -36,6 +36,7 @@ class AiTranslator(Translator[T]):
|
|||||||
def __init__(self, config: AiTranslatorConfig):
|
def __init__(self, config: AiTranslatorConfig):
|
||||||
super().__init__(config=config)
|
super().__init__(config=config)
|
||||||
self.glossary_agent = None
|
self.glossary_agent = None
|
||||||
|
self.glossary_dict_gen = None
|
||||||
if config.glossary_generate_enable:
|
if config.glossary_generate_enable:
|
||||||
if config.glossary_agent_config:
|
if config.glossary_agent_config:
|
||||||
self.glossary_agent = GlossaryAgent(config.glossary_agent_config)
|
self.glossary_agent = GlossaryAgent(config.glossary_agent_config)
|
||||||
@@ -52,7 +53,8 @@ class AiTranslator(Translator[T]):
|
|||||||
logger=self.logger,
|
logger=self.logger,
|
||||||
)
|
)
|
||||||
self.glossary_agent = GlossaryAgent(glossary_agent_config)
|
self.glossary_agent = GlossaryAgent(glossary_agent_config)
|
||||||
|
def get_glossary_dict(self):
|
||||||
|
return self.glossary_dict_gen
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def translate(self, document: T) -> Document:
|
def translate(self, document: T) -> Document:
|
||||||
...
|
...
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ from docx.document import Document as DocumentObject
|
|||||||
from docx.text.paragraph import Paragraph
|
from docx.text.paragraph import Paragraph
|
||||||
from docx.text.run import Run
|
from docx.text.run import Run
|
||||||
|
|
||||||
from docutranslate.agents.glossary_agent import GlossaryAgent, GlossaryAgentConfig
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
@@ -160,8 +159,8 @@ class DocxTranslator(AiTranslator):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
# 调用翻译 agent
|
# 调用翻译 agent
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
@@ -184,8 +183,8 @@ class DocxTranslator(AiTranslator):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
# 异步调用翻译 agent
|
# 异步调用翻译 agent
|
||||||
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
|
|||||||
@@ -8,7 +8,6 @@ from typing import Self, Literal, List, Dict, Any
|
|||||||
|
|
||||||
from bs4 import BeautifulSoup
|
from bs4 import BeautifulSoup
|
||||||
|
|
||||||
from docutranslate.agents.glossary_agent import GlossaryAgent, GlossaryAgentConfig
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
@@ -40,7 +39,6 @@ class EpubTranslator(AiTranslator):
|
|||||||
self.insert_mode = config.insert_mode
|
self.insert_mode = config.insert_mode
|
||||||
self.separator = config.separator
|
self.separator = config.separator
|
||||||
|
|
||||||
|
|
||||||
def _pre_translate(self, document: Document) -> tuple[
|
def _pre_translate(self, document: Document) -> tuple[
|
||||||
Dict[str, bytes], List[Dict[str, Any]], List[str]
|
Dict[str, bytes], List[Dict[str, Any]], List[str]
|
||||||
]:
|
]:
|
||||||
@@ -176,8 +174,8 @@ class EpubTranslator(AiTranslator):
|
|||||||
self.logger.info("\n文件中没有找到需要翻译的纯文本内容。")
|
self.logger.info("\n文件中没有找到需要翻译的纯文本内容。")
|
||||||
return self
|
return self
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
document.content = self._after_translate(
|
document.content = self._after_translate(
|
||||||
all_files, items_to_translate, translated_texts, original_texts
|
all_files, items_to_translate, translated_texts, original_texts
|
||||||
@@ -196,8 +194,8 @@ class EpubTranslator(AiTranslator):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
translated_texts = await self.translate_agent.send_segments_async(
|
translated_texts = await self.translate_agent.send_segments_async(
|
||||||
original_texts, self.chunk_size
|
original_texts, self.chunk_size
|
||||||
|
|||||||
@@ -198,8 +198,8 @@ class HtmlTranslator(AiTranslator):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
document.content = self._after_translate(soup, translatable_items, translated_texts, original_texts)
|
document.content = self._after_translate(soup, translatable_items, translated_texts, original_texts)
|
||||||
|
|||||||
@@ -75,8 +75,8 @@ class JsonTranslator(AiTranslator):
|
|||||||
|
|
||||||
original_texts = [match.value for match in all_matches]
|
original_texts = [match.value for match in all_matches]
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
# 步骤 2: 批量翻译提取出的文本
|
# 步骤 2: 批量翻译提取出的文本
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
|
|
||||||
@@ -106,8 +106,8 @@ class JsonTranslator(AiTranslator):
|
|||||||
original_texts = [match.value for match in all_matches]
|
original_texts = [match.value for match in all_matches]
|
||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
# 步骤 2: 批量翻译提取出的文本
|
# 步骤 2: 批量翻译提取出的文本
|
||||||
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
|
|||||||
@@ -37,8 +37,8 @@ class MDTranslator(AiTranslator):
|
|||||||
with MDMaskUrisContext(document):
|
with MDMaskUrisContext(document):
|
||||||
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = self.glossary_agent.send_segments(chunks, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(chunks, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
self.logger.info(f"markdown分为{len(chunks)}块")
|
self.logger.info(f"markdown分为{len(chunks)}块")
|
||||||
result: list[str] = self.translate_agent.send_chunks(chunks)
|
result: list[str] = self.translate_agent.send_chunks(chunks)
|
||||||
content = join_markdown_texts(result)
|
content = join_markdown_texts(result)
|
||||||
@@ -56,8 +56,8 @@ class MDTranslator(AiTranslator):
|
|||||||
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
chunks: list[str] = split_markdown_text(document.content.decode(), self.chunk_size)
|
||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = await self.glossary_agent.send_segments_async(chunks, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(chunks, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
self.logger.info(f"markdown分为{len(chunks)}块")
|
self.logger.info(f"markdown分为{len(chunks)}块")
|
||||||
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
|
result: list[str] = await self.translate_agent.send_chunks_async(chunks)
|
||||||
|
|||||||
@@ -4,7 +4,6 @@ from typing import Self, Literal
|
|||||||
|
|
||||||
import srt # 导入srt库来处理字幕文件
|
import srt # 导入srt库来处理字幕文件
|
||||||
|
|
||||||
from docutranslate.agents.glossary_agent import GlossaryAgentConfig, GlossaryAgent
|
|
||||||
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
|
||||||
@@ -109,8 +108,8 @@ class SrtTranslator(AiTranslator):
|
|||||||
self.logger.info("\n文件中没有找到需要翻译的字幕内容。")
|
self.logger.info("\n文件中没有找到需要翻译的字幕内容。")
|
||||||
return self
|
return self
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
# --- 步骤 2: 调用翻译Agent ---
|
# --- 步骤 2: 调用翻译Agent ---
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
|
|
||||||
|
|||||||
@@ -33,8 +33,8 @@ class TXTTranslator(AiTranslator):
|
|||||||
self.logger.info("正在翻译txt")
|
self.logger.info("正在翻译txt")
|
||||||
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
chunks: list[str] = split_markdown_text(document.content.decode(), max_block_size=self.chunk_size)
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = self.glossary_agent.send_segments(chunks, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(chunks, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
self.logger.info(f"txt分为{len(chunks)}块")
|
self.logger.info(f"txt分为{len(chunks)}块")
|
||||||
result: list[str] = self.translate_agent.send_chunks(chunks)
|
result: list[str] = self.translate_agent.send_chunks(chunks)
|
||||||
content = "\n".join(result)
|
content = "\n".join(result)
|
||||||
|
|||||||
@@ -155,8 +155,8 @@ class XlsxTranslator(AiTranslator):
|
|||||||
workbook.close()
|
workbook.close()
|
||||||
return self
|
return self
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
# --- 步骤 2: 调用翻译函数 ---
|
# --- 步骤 2: 调用翻译函数 ---
|
||||||
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
|
||||||
|
|
||||||
@@ -172,8 +172,8 @@ class XlsxTranslator(AiTranslator):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
if self.glossary_agent:
|
if self.glossary_agent:
|
||||||
glossary_dict = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
self.translate_agent.update_glossary_dict(glossary_dict)
|
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
|
||||||
|
|
||||||
# --- 步骤 2: 调用翻译函数 ---
|
# --- 步骤 2: 调用翻译函数 ---
|
||||||
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from pathlib import Path
|
|||||||
from typing import Self, Generic, TypeVar
|
from typing import Self, Generic, TypeVar
|
||||||
|
|
||||||
from docutranslate.exporter.base import Exporter
|
from docutranslate.exporter.base import Exporter
|
||||||
|
from docutranslate.ir.attachment import AttachMent
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
|
|
||||||
|
|
||||||
@@ -24,6 +25,7 @@ class Workflow(ABC, Generic[T_Config, T_original, T_Translated]):
|
|||||||
self.logger = self.config.logger
|
self.logger = self.config.logger
|
||||||
self.document_original: T_original | None = None
|
self.document_original: T_original | None = None
|
||||||
self.document_translated: T_Translated | None = None
|
self.document_translated: T_Translated | None = None
|
||||||
|
self.attachment = AttachMent()
|
||||||
|
|
||||||
def read_path(self, path: Path | str) -> Self:
|
def read_path(self, path: Path | str) -> Self:
|
||||||
document = Document.from_path(path)
|
document = Document.from_path(path)
|
||||||
@@ -57,3 +59,6 @@ class Workflow(ABC, Generic[T_Config, T_original, T_Translated]):
|
|||||||
output_path.write_bytes(docu.content)
|
output_path.write_bytes(docu.content)
|
||||||
self.logger.info(f"文件已保存到{output_path.resolve()}")
|
self.logger.info(f"文件已保存到{output_path.resolve()}")
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
def get_attachment(self):
|
||||||
|
return self.attachment
|
||||||
|
|||||||
@@ -5,7 +5,7 @@ from typing import Self
|
|||||||
from docutranslate.exporter.base import ExporterConfig
|
from docutranslate.exporter.base import ExporterConfig
|
||||||
from docutranslate.exporter.docx.docx2docx_exporter import Docx2DocxExporter
|
from docutranslate.exporter.docx.docx2docx_exporter import Docx2DocxExporter
|
||||||
from docutranslate.exporter.docx.docx2html_exporter import Docx2HTMLExporterConfig, Docx2HTMLExporter
|
from docutranslate.exporter.docx.docx2html_exporter import Docx2HTMLExporterConfig, Docx2HTMLExporter
|
||||||
|
from docutranslate.glossary.glossary import Glossary
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.docx_translator import DocxTranslatorConfig, DocxTranslator
|
from docutranslate.translator.ai_translator.docx_translator import DocxTranslatorConfig, DocxTranslator
|
||||||
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
||||||
@@ -36,12 +36,16 @@ class DocxWorkflow(Workflow[DocxWorkflowConfig, Document, Document], HTMLExporta
|
|||||||
def translate(self) -> Self:
|
def translate(self) -> Self:
|
||||||
document, translator = self._pre_translate(self.document_original)
|
document, translator = self._pre_translate(self.document_original)
|
||||||
translator.translate(document)
|
translator.translate(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def translate_async(self) -> Self:
|
async def translate_async(self) -> Self:
|
||||||
document, translator = self._pre_translate(self.document_original)
|
document, translator = self._pre_translate(self.document_original)
|
||||||
await translator.translate_async(document)
|
await translator.translate_async(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from typing import Self
|
|||||||
from docutranslate.exporter.base import ExporterConfig
|
from docutranslate.exporter.base import ExporterConfig
|
||||||
from docutranslate.exporter.epub.epub2epub_exporter import Epub2EpubExporter
|
from docutranslate.exporter.epub.epub2epub_exporter import Epub2EpubExporter
|
||||||
from docutranslate.exporter.epub.epub2html_exporter import Epub2HTMLExporterConfig, Epub2HTMLExporter
|
from docutranslate.exporter.epub.epub2html_exporter import Epub2HTMLExporterConfig, Epub2HTMLExporter
|
||||||
|
from docutranslate.glossary.glossary import Glossary
|
||||||
|
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.epub_translator import EpubTranslatorConfig, EpubTranslator
|
from docutranslate.translator.ai_translator.epub_translator import EpubTranslatorConfig, EpubTranslator
|
||||||
@@ -36,12 +37,16 @@ class EpubWorkflow(Workflow[EpubWorkflowConfig, Document, Document], HTMLExporta
|
|||||||
def translate(self) -> Self:
|
def translate(self) -> Self:
|
||||||
document, translator = self._pre_translate(self.document_original)
|
document, translator = self._pre_translate(self.document_original)
|
||||||
translator.translate(document)
|
translator.translate(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def translate_async(self) -> Self:
|
async def translate_async(self) -> Self:
|
||||||
document, translator = self._pre_translate(self.document_original)
|
document, translator = self._pre_translate(self.document_original)
|
||||||
await translator.translate_async(document)
|
await translator.translate_async(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|||||||
@@ -4,6 +4,7 @@ from typing import Self
|
|||||||
|
|
||||||
from docutranslate.exporter.base import ExporterConfig
|
from docutranslate.exporter.base import ExporterConfig
|
||||||
from docutranslate.exporter.html.html2html_exporter import Html2HtmlExporter
|
from docutranslate.exporter.html.html2html_exporter import Html2HtmlExporter
|
||||||
|
from docutranslate.glossary.glossary import Glossary
|
||||||
|
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.html_translator import HtmlTranslatorConfig, HtmlTranslator
|
from docutranslate.translator.ai_translator.html_translator import HtmlTranslatorConfig, HtmlTranslator
|
||||||
@@ -34,12 +35,16 @@ class HtmlWorkflow(Workflow[HtmlWorkflowConfig, Document, Document], HTMLExporta
|
|||||||
def translate(self) -> Self:
|
def translate(self) -> Self:
|
||||||
document, translator = self._pre_translate(self.document_original)
|
document, translator = self._pre_translate(self.document_original)
|
||||||
translator.translate(document)
|
translator.translate(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def translate_async(self) -> Self:
|
async def translate_async(self) -> Self:
|
||||||
document, translator = self._pre_translate(self.document_original)
|
document, translator = self._pre_translate(self.document_original)
|
||||||
await translator.translate_async(document)
|
await translator.translate_async(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from typing import Self
|
|||||||
from docutranslate.exporter.base import ExporterConfig
|
from docutranslate.exporter.base import ExporterConfig
|
||||||
from docutranslate.exporter.js.json2html_exporter import Json2HTMLExporterConfig, Json2HTMLExporter
|
from docutranslate.exporter.js.json2html_exporter import Json2HTMLExporterConfig, Json2HTMLExporter
|
||||||
from docutranslate.exporter.js.json2json_exporter import Json2JsonExporter
|
from docutranslate.exporter.js.json2json_exporter import Json2JsonExporter
|
||||||
|
from docutranslate.glossary.glossary import Glossary
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.json_translator import JsonTranslatorConfig, JsonTranslator
|
from docutranslate.translator.ai_translator.json_translator import JsonTranslatorConfig, JsonTranslator
|
||||||
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
||||||
@@ -35,12 +36,16 @@ class JsonWorkflow(Workflow[JsonWorkflowConfig, Document, Document], HTMLExporta
|
|||||||
def translate(self) -> Self:
|
def translate(self) -> Self:
|
||||||
document, translator = self._pre_translate(self.document_original)
|
document, translator = self._pre_translate(self.document_original)
|
||||||
translator.translate(document)
|
translator.translate(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def translate_async(self) -> Self:
|
async def translate_async(self) -> Self:
|
||||||
document, translator = self._pre_translate(self.document_original)
|
document, translator = self._pre_translate(self.document_original)
|
||||||
await translator.translate_async(document)
|
await translator.translate_async(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|||||||
@@ -6,6 +6,7 @@ from typing import Self, Tuple, Type
|
|||||||
from docutranslate.cacher import md_based_convert_cacher
|
from docutranslate.cacher import md_based_convert_cacher
|
||||||
from docutranslate.exporter.base import ExporterConfig
|
from docutranslate.exporter.base import ExporterConfig
|
||||||
from docutranslate.global_values.conditional_import import DOCLING_EXIST
|
from docutranslate.global_values.conditional_import import DOCLING_EXIST
|
||||||
|
from docutranslate.glossary.glossary import Glossary
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.ir.markdown_document import MarkdownDocument
|
from docutranslate.ir.markdown_document import MarkdownDocument
|
||||||
|
|
||||||
@@ -86,6 +87,8 @@ class MarkdownBasedWorkflow(Workflow[MarkdownBasedWorkflowConfig, Document, Mark
|
|||||||
convert_engine, convert_config, translator_config, translator = self._pre_translate(self.document_original)
|
convert_engine, convert_config, translator_config, translator = self._pre_translate(self.document_original)
|
||||||
document_md = self._get_document_md(convert_engine, convert_config)
|
document_md = self._get_document_md(convert_engine, convert_config)
|
||||||
translator.translate(document_md)
|
translator.translate(document_md)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document_md
|
self.document_translated = document_md
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@@ -93,6 +96,8 @@ class MarkdownBasedWorkflow(Workflow[MarkdownBasedWorkflowConfig, Document, Mark
|
|||||||
convert_engine, convert_config, translator_config, translator = self._pre_translate(self.document_original)
|
convert_engine, convert_config, translator_config, translator = self._pre_translate(self.document_original)
|
||||||
document_md = await asyncio.to_thread(self._get_document_md, convert_engine, convert_config)
|
document_md = await asyncio.to_thread(self._get_document_md, convert_engine, convert_config)
|
||||||
await translator.translate_async(document_md)
|
await translator.translate_async(document_md)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document_md
|
self.document_translated = document_md
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from typing import Self
|
|||||||
from docutranslate.exporter.base import ExporterConfig
|
from docutranslate.exporter.base import ExporterConfig
|
||||||
from docutranslate.exporter.srt.srt2html_exporter import Srt2HTMLExporterConfig, Srt2HTMLExporter
|
from docutranslate.exporter.srt.srt2html_exporter import Srt2HTMLExporterConfig, Srt2HTMLExporter
|
||||||
from docutranslate.exporter.srt.srt2srt_exporter import Srt2SrtExporter
|
from docutranslate.exporter.srt.srt2srt_exporter import Srt2SrtExporter
|
||||||
|
from docutranslate.glossary.glossary import Glossary
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.srt_translator import SrtTranslatorConfig, SrtTranslator
|
from docutranslate.translator.ai_translator.srt_translator import SrtTranslatorConfig, SrtTranslator
|
||||||
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
||||||
@@ -36,12 +37,16 @@ class SrtWorkflow(Workflow[SrtWorkflowConfig, Document, Document], HTMLExportabl
|
|||||||
def translate(self) -> Self:
|
def translate(self) -> Self:
|
||||||
document, translator=self._pre_translate(self.document_original)
|
document, translator=self._pre_translate(self.document_original)
|
||||||
translator.translate(document)
|
translator.translate(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def translate_async(self) -> Self:
|
async def translate_async(self) -> Self:
|
||||||
document, translator = self._pre_translate(self.document_original)
|
document, translator = self._pre_translate(self.document_original)
|
||||||
await translator.translate_async(document)
|
await translator.translate_async(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|||||||
@@ -5,6 +5,7 @@ from typing import Self
|
|||||||
from docutranslate.exporter.base import ExporterConfig
|
from docutranslate.exporter.base import ExporterConfig
|
||||||
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig, TXT2HTMLExporter
|
from docutranslate.exporter.txt.txt2html_exporter import TXT2HTMLExporterConfig, TXT2HTMLExporter
|
||||||
from docutranslate.exporter.txt.txt2txt_exporter import TXT2TXTExporter
|
from docutranslate.exporter.txt.txt2txt_exporter import TXT2TXTExporter
|
||||||
|
from docutranslate.glossary.glossary import Glossary
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.txt_translator import TXTTranslatorConfig, TXTTranslator
|
from docutranslate.translator.ai_translator.txt_translator import TXTTranslatorConfig, TXTTranslator
|
||||||
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
||||||
@@ -36,12 +37,16 @@ class TXTWorkflow(Workflow[TXTWorkflowConfig, Document, Document], HTMLExportabl
|
|||||||
def translate(self) -> Self:
|
def translate(self) -> Self:
|
||||||
document, translator=self._pre_translate(self.document_original)
|
document, translator=self._pre_translate(self.document_original)
|
||||||
translator.translate(document)
|
translator.translate(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
async def translate_async(self) -> Self:
|
async def translate_async(self) -> Self:
|
||||||
document, translator = self._pre_translate(self.document_original)
|
document, translator = self._pre_translate(self.document_original)
|
||||||
await translator.translate_async(document)
|
await translator.translate_async(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|||||||
@@ -11,6 +11,7 @@ from docutranslate.exporter.base import ExporterConfig
|
|||||||
from docutranslate.exporter.xlsx.xlsx2csv_exporter import Xlsx2CsvExporter
|
from docutranslate.exporter.xlsx.xlsx2csv_exporter import Xlsx2CsvExporter
|
||||||
from docutranslate.exporter.xlsx.xlsx2html_exporter import Xlsx2HTMLExporterConfig, Xlsx2HTMLExporter
|
from docutranslate.exporter.xlsx.xlsx2html_exporter import Xlsx2HTMLExporterConfig, Xlsx2HTMLExporter
|
||||||
from docutranslate.exporter.xlsx.xlsx2xlsx_exporter import Xlsx2XlsxExporter
|
from docutranslate.exporter.xlsx.xlsx2xlsx_exporter import Xlsx2XlsxExporter
|
||||||
|
from docutranslate.glossary.glossary import Glossary
|
||||||
from docutranslate.ir.document import Document
|
from docutranslate.ir.document import Document
|
||||||
from docutranslate.translator.ai_translator.xlsx_translator import XlsxTranslatorConfig, XlsxTranslator
|
from docutranslate.translator.ai_translator.xlsx_translator import XlsxTranslatorConfig, XlsxTranslator
|
||||||
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
from docutranslate.workflow.base import Workflow, WorkflowConfig
|
||||||
@@ -59,6 +60,8 @@ class XlsxWorkflow(Workflow[XlsxWorkflowConfig, Document, Document], HTMLExporta
|
|||||||
document_xlsx = self._get_document_xlsx(self.document_original)
|
document_xlsx = self._get_document_xlsx(self.document_original)
|
||||||
document, translator = self._pre_translate(document_xlsx)
|
document, translator = self._pre_translate(document_xlsx)
|
||||||
translator.translate(document)
|
translator.translate(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
@@ -66,6 +69,8 @@ class XlsxWorkflow(Workflow[XlsxWorkflowConfig, Document, Document], HTMLExporta
|
|||||||
document_xlsx = await asyncio.to_thread(self._get_document_xlsx, self.document_original)
|
document_xlsx = await asyncio.to_thread(self._get_document_xlsx, self.document_original)
|
||||||
document, translator = self._pre_translate(document_xlsx)
|
document, translator = self._pre_translate(document_xlsx)
|
||||||
await translator.translate_async(document)
|
await translator.translate_async(document)
|
||||||
|
if translator.glossary_dict_gen:
|
||||||
|
self.attachment.add_attachment("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
|
||||||
self.document_translated = document
|
self.document_translated = document
|
||||||
return self
|
return self
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user