增加术语表的稳定性

This commit is contained in:
xunbu
2025-10-30 20:36:30 +08:00
parent d292f2bbd2
commit a2ade9940c

View File

@@ -1,6 +1,7 @@
# SPDX-FileCopyrightText: 2025 QinHan # SPDX-FileCopyrightText: 2025 QinHan
# SPDX-License-Identifier: MPL-2.0 # SPDX-License-Identifier: MPL-2.0
import csv import csv
import re
from io import StringIO from io import StringIO
from docutranslate.ir.document import Document from docutranslate.ir.document import Document
@@ -12,14 +13,19 @@ class Glossary:
def update(self, update_dict: dict[str:str]): def update(self, update_dict: dict[str:str]):
for src, dst in update_dict.items(): for src, dst in update_dict.items():
if src not in self.glossary_dict: if src.strip() not in self.glossary_dict:
self.glossary_dict[src] = dst self.glossary_dict[src.strip()] = dst
def append_system_prompt(self, text: str): def append_system_prompt(self, text: str):
flag = False flag = False
prompt = "\nHere is the reference glossary:\n" prompt = """
Please refer to the glossary for the translation of terms that appear in the glossary.
Here is the reference glossary:
"""
for src, dst in self.glossary_dict.items(): for src, dst in self.glossary_dict.items():
if src.lower().strip() in text.lower(): text=re.sub(r'\s+', '', text)#去除所有空白字符
src=re.sub(r'\s+', '', src)#去除所有空白字符
if src in text:
prompt += f"{src}=>{dst}\n" prompt += f"{src}=>{dst}\n"
flag = True flag = True
prompt += "Glossary ends\n" prompt += "Glossary ends\n"