From a2ade9940c94c93284808eba1417c9ae2e331923 Mon Sep 17 00:00:00 2001 From: xunbu Date: Thu, 30 Oct 2025 20:36:30 +0800 Subject: [PATCH] =?UTF-8?q?=E5=A2=9E=E5=8A=A0=E6=9C=AF=E8=AF=AD=E8=A1=A8?= =?UTF-8?q?=E7=9A=84=E7=A8=B3=E5=AE=9A=E6=80=A7?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- docutranslate/glossary/glossary.py | 14 ++++++++++---- 1 file changed, 10 insertions(+), 4 deletions(-) diff --git a/docutranslate/glossary/glossary.py b/docutranslate/glossary/glossary.py index d71cea4..33d57c7 100644 --- a/docutranslate/glossary/glossary.py +++ b/docutranslate/glossary/glossary.py @@ -1,6 +1,7 @@ # SPDX-FileCopyrightText: 2025 QinHan # SPDX-License-Identifier: MPL-2.0 import csv +import re from io import StringIO from docutranslate.ir.document import Document @@ -12,14 +13,19 @@ class Glossary: def update(self, update_dict: dict[str:str]): for src, dst in update_dict.items(): - if src not in self.glossary_dict: - self.glossary_dict[src] = dst + if src.strip() not in self.glossary_dict: + self.glossary_dict[src.strip()] = dst def append_system_prompt(self, text: str): flag = False - prompt = "\nHere is the reference glossary:\n" + prompt = """ + Please refer to the glossary for the translation of terms that appear in the glossary. + Here is the reference glossary: + """ for src, dst in self.glossary_dict.items(): - if src.lower().strip() in text.lower(): + text=re.sub(r'\s+', '', text)#去除所有空白字符 + src=re.sub(r'\s+', '', src)#去除所有空白字符 + if src in text: prompt += f"{src}=>{dst}\n" flag = True prompt += "Glossary ends\n"