提示词优化

This commit is contained in:
xunbu
2025-09-02 12:24:01 +08:00
parent 0b5f966c5f
commit bdf7321219
4 changed files with 9 additions and 6 deletions

View File

@@ -18,7 +18,7 @@ from docutranslate.logger import global_logger
from docutranslate.utils.utils import get_httpx_proxies from docutranslate.utils.utils import get_httpx_proxies
MAX_RETRY_COUNT = 2 MAX_RETRY_COUNT = 2
MAX_REQUESTS_PER_ERROR = 30 MAX_REQUESTS_PER_ERROR = 20
ThinkingMode = Literal["enable", "disable", "default"] ThinkingMode = Literal["enable", "disable", "default"]

View File

@@ -28,22 +28,23 @@ You are a professional glossary extractor
# Task # Task
You will receive a JSON-formatted list of paragraphs where keys are paragraph numbers and values are paragraph contents. You will receive a JSON-formatted list of paragraphs where keys are paragraph numbers and values are paragraph contents.
You need to extract **person names** and **location names** from these paragraphs and translate these terms into {self.to_lang}. You need to extract person names and location names from these paragraphs and translate these terms into {self.to_lang}.
Finally, output a glossary of original terms:translated terms Finally, output a glossary of original terms:translated terms
# Requirements # Requirements
- Do not include special tags or tags formatted as `<ph-xxxxxx>` in the glossary - Do not include special tags or tags formatted as `<ph-xxxxxx>` in the glossary
- The src in the output glossary must exactly match the original term, while dst is the {self.to_lang} translation of the term - The src in the output glossary must exactly match the original term, while dst is the {self.to_lang} translation of the term
- The same src should only appear once in the glossary without repetition - The same src should only appear once in the glossary without repetition
-Do not include common nouns in the glossary.
# Output # Output
The output format should be plain JSON text in a list format The output format should be plain JSON text in a list format
{[{"src": "<Original Term>", "dst": "<Translated Term>"}]} {[{"src": "<Original Term>", "dst": "<Translated Term>"}]}
# Example # Example
## Input (translating to Chinese): ## Input
{{"0":"Jobs likes apples","1":"Bill Gates is sunbathing in Shanghai."}} {{"0":"Jobs likes apples","1":"Bill Gates is sunbathing in Shanghai."}}
## Output ## Output(Assuming the target language is Chinese)
{r'[{"src": "Jobs", "dst": "乔布斯"}, {"src": "Bill Gates", "dst": "比尔盖茨"}, {"src": "Shanghai", "dst": "上海"}]'} {r'[{"src": "Jobs", "dst": "乔布斯"}, {"src": "Bill Gates", "dst": "比尔盖茨"}, {"src": "Shanghai", "dst": "上海"}]'}
""" """
@@ -106,6 +107,6 @@ The output format should be plain JSON text in a list format
self.logger.info(f"json解析错误解析文本:{chunk},错误:{e.__repr__()}") self.logger.info(f"json解析错误解析文本:{chunk},错误:{e.__repr__()}")
except Exception as e: except Exception as e:
self.logger.info(f"send_segments发生错误:{e.__repr__()}") self.logger.info(f"send_segments发生错误:{e.__repr__()}")
print(f"术语表:\n{result}") # print(f"术语表:\n{result}")
self.logger.info("术语表提取完成") self.logger.info("术语表提取完成")
return result return result

View File

@@ -12,6 +12,8 @@ class AttachMent:
self.identifier = identifier self.identifier = identifier
self.document = document self.document = document
def __repr__(self):
return self.document.name
class AttachMentManager: class AttachMentManager:
def __init__(self): def __init__(self):

File diff suppressed because one or more lines are too long