fix
This commit is contained in:
@@ -1,4 +1,5 @@
|
||||
import json
|
||||
from json_repair import json_repair
|
||||
from dataclasses import dataclass
|
||||
from json import JSONDecodeError
|
||||
|
||||
@@ -28,12 +29,18 @@ class SegmentsTranslateAgent(Agent):
|
||||
翻译后的片段应该与源格式尽量相同
|
||||
如果待翻译片段已经是目标语言,则保持原样
|
||||
# 输出
|
||||
翻译后的片段序列,以json文本表示(文本而非代码块)。其中键是片段编号,值是翻译后的片段
|
||||
翻译后的片段序列,以json文本表示(注意不是代码块)。其中键是片段编号,值是翻译后的片段。
|
||||
返回的json文本必须能被json.loads转换为形如{{"片段编号":"译文"}}的字典。
|
||||
# 示例
|
||||
## 输入
|
||||
{r'{"0":"hello","1":"apple","2":true,"3":"false"}'}
|
||||
## 输出
|
||||
{r'{"0":"你好","1":"苹果","2":true,"3":"错误"}'}
|
||||
# 错误输出示例
|
||||
"{r'"{\"0\":\"你好\",\"1\":\"苹果\"}"'}"
|
||||
# 正确输出示例
|
||||
{r'{"0":"你好","1":"苹果"}'}
|
||||
警告:绝不要将整个JSON对象用引号包裹成一个字符串。
|
||||
"""
|
||||
if config.custom_prompt:
|
||||
self.system_prompt += "\n# 重要规则或背景【非常重要】\n" + config.custom_prompt + '\n'
|
||||
@@ -41,26 +48,40 @@ class SegmentsTranslateAgent(Agent):
|
||||
def send_segments(self, segments: list[str], chunk_size: int):
|
||||
indexed_originals = {str(i): text for i, text in enumerate(segments)}
|
||||
chunks = flat_json_split(indexed_originals, chunk_size)
|
||||
prompts = [json.dumps(chunk) for chunk in chunks]
|
||||
prompts = [json.dumps(chunk,ensure_ascii=False) for chunk in chunks]
|
||||
translated_chunks = super().send_prompts(prompts=prompts)
|
||||
indexed_translated = indexed_originals.copy()
|
||||
for chunk_str in translated_chunks:
|
||||
translated_part = json.loads(chunk_str)
|
||||
indexed_translated.update(translated_part)
|
||||
try:
|
||||
translated_part = json_repair.loads(chunk_str)
|
||||
for key,val in translated_part:
|
||||
if key in indexed_translated:
|
||||
indexed_translated[key]=val
|
||||
except JSONDecodeError as e:
|
||||
self.logger.info(f"json解析错误,解析文本:{chunk_str},错误:{e.__repr__()}")
|
||||
except ValueError as e:
|
||||
self.logger.info(f"value错误,更新对象:{indexed_translated},错误:{e.__repr__()}")
|
||||
|
||||
return list(indexed_translated.values())
|
||||
|
||||
#todo:增加协程粒度
|
||||
async def send_segments_async(self, segments: list[str], chunk_size: int):
|
||||
indexed_originals = {str(i): text for i, text in enumerate(segments)}
|
||||
chunks = flat_json_split(indexed_originals, chunk_size)
|
||||
prompts = [json.dumps(chunk) for chunk in chunks]
|
||||
prompts = [json.dumps(chunk,ensure_ascii=False) for chunk in chunks]
|
||||
translated_chunks = await super().send_prompts_async(prompts=prompts)
|
||||
indexed_translated = indexed_originals.copy()
|
||||
for chunk_str in translated_chunks:
|
||||
try:
|
||||
translated_part = json.loads(chunk_str)
|
||||
indexed_translated.update(translated_part)
|
||||
translated_part:dict = json_repair.loads(chunk_str)
|
||||
for key,val in translated_part.items():
|
||||
if key in indexed_translated:
|
||||
indexed_translated[key]=val
|
||||
except JSONDecodeError as e:
|
||||
self.logger.info(f"json解析错误,解析文本:{chunk_str},错误:{e.__repr__()}")
|
||||
self.logger.error(f"json解析错误,解析文本:{chunk_str},错误:{e.__repr__()}")
|
||||
except ValueError as e:
|
||||
self.logger.error(f"value错误,更新对象:{indexed_translated},错误:{e.__repr__()}")
|
||||
except AttributeError as e:
|
||||
self.logger.error(f"属性错误,chunk_str:{chunk_str},错误:{e.__repr__()}")
|
||||
|
||||
return list(indexed_translated.values())
|
||||
|
||||
@@ -3,6 +3,7 @@ from io import BytesIO
|
||||
|
||||
import jinja2
|
||||
import openpyxl
|
||||
from xlsx2html import xlsx2html
|
||||
|
||||
from docutranslate.exporter.base import ExporterConfig
|
||||
from docutranslate.exporter.xlsx.base import XlsxExporter
|
||||
@@ -22,40 +23,5 @@ class Xlsx2HTMLExporter(XlsxExporter):
|
||||
self.cdn = config.cdn
|
||||
|
||||
def export(self, document: Document) -> Document:
|
||||
|
||||
# 1. 加载工作簿和工作表
|
||||
workbook = openpyxl.load_workbook(BytesIO(document.content))
|
||||
sheet = workbook.active
|
||||
|
||||
# 2. 手动构建HTML字符串
|
||||
table = '<table border="1">\n'
|
||||
|
||||
# 处理表头
|
||||
table += ' <thead>\n <tr>\n'
|
||||
for cell in sheet[1]: # 假设第一行是表头
|
||||
table += f' <th>{cell.value}</th>\n'
|
||||
table += ' </tr>\n </thead>\n'
|
||||
|
||||
# 处理数据行
|
||||
table += ' <tbody>\n'
|
||||
# iter_rows(min_row=2) 从第二行开始遍历
|
||||
for row in sheet.iter_rows(min_row=2):
|
||||
table += ' <tr>\n'
|
||||
for cell in row:
|
||||
# 处理None值,防止在HTML中显示"None"
|
||||
cell_value = cell.value if cell.value is not None else ""
|
||||
table += f' <td>{cell_value}</td>\n'
|
||||
table += ' </tr>\n'
|
||||
table += ' </tbody>\n'
|
||||
|
||||
table += '</table>'
|
||||
|
||||
html_template = resource_path("template/xlsx.html").read_text(encoding="utf-8")
|
||||
|
||||
pico = f'<style>{resource_path("static/pico.css").read_text(encoding="utf-8")}</style>' if not self.cdn else r'<link rel="stylesheet" href="https://s4.zstatic.net/ajax/libs/picocss/2.1.1/pico.min.css" integrity="sha512-+4kjFgVD0n6H3xt19Ox84B56MoS7srFn60tgdWFuO4hemtjhySKyW4LnftYZn46k3THUEiTTsbVjrHai+0MOFw==" crossorigin="anonymous" referrerpolicy="no-referrer" />'
|
||||
render = jinja2.Template(html_template).render(
|
||||
title=document.stem,
|
||||
pico=pico,
|
||||
body=table,
|
||||
)
|
||||
return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem)
|
||||
html_content = xlsx2html(BytesIO(document.content), output=None).getvalue()
|
||||
return Document.from_bytes(content=html_content.encode("utf-8"), suffix=".html", stem=document.stem)
|
||||
|
||||
File diff suppressed because one or more lines are too long
Reference in New Issue
Block a user