增加ass支持

This commit is contained in:
xunbu
2025-09-22 16:00:13 +08:00
parent 9769985d68
commit f1f1036fda
13 changed files with 505 additions and 175 deletions

View File

@@ -31,12 +31,11 @@ from docutranslate.global_values.conditional_import import DOCLING_EXIST
from docutranslate.workflow.base import Workflow
from docutranslate.workflow.docx_workflow import DocxWorkflow, DocxWorkflowConfig
from docutranslate.workflow.epub_workflow import EpubWorkflow, EpubWorkflowConfig
# --- HTML WORKFLOW IMPORT START ---
from docutranslate.workflow.html_workflow import HtmlWorkflow, HtmlWorkflowConfig
# --- HTML WORKFLOW IMPORT END ---
from docutranslate.workflow.ass_workflow import AssWorkflow, AssWorkflowConfig
from docutranslate.workflow.interfaces import DocxExportable, EpubExportable
from docutranslate.workflow.interfaces import HTMLExportable, MDFormatsExportable, TXTExportable, JsonExportable, \
XlsxExportable, SrtExportable, CsvExportable
XlsxExportable, SrtExportable, CsvExportable, AssExportable
from docutranslate.workflow.json_workflow import JsonWorkflow, JsonWorkflowConfig
from docutranslate.workflow.md_based_workflow import MarkdownBasedWorkflow, MarkdownBasedWorkflowConfig
from docutranslate.workflow.srt_workflow import SrtWorkflow, SrtWorkflowConfig
@@ -60,9 +59,9 @@ from docutranslate.translator.ai_translator.srt_translator import SrtTranslatorC
from docutranslate.exporter.srt.srt2html_exporter import Srt2HTMLExporterConfig
from docutranslate.translator.ai_translator.epub_translator import EpubTranslatorConfig
from docutranslate.exporter.epub.epub2html_exporter import Epub2HTMLExporterConfig
# --- HTML TRANSLATOR IMPORT START ---
from docutranslate.translator.ai_translator.html_translator import HtmlTranslatorConfig
# --- HTML TRANSLATOR IMPORT END ---
from docutranslate.translator.ai_translator.ass_translator import AssTranslatorConfig
from docutranslate.exporter.ass.ass2html_exporter import Ass2HTMLExporterConfig
# ------------------------------------
from docutranslate.logger import global_logger
@@ -86,6 +85,7 @@ WORKFLOW_DICT: Dict[str, Type[Workflow]] = {
"srt": SrtWorkflow,
"epub": EpubWorkflow,
"html": HtmlWorkflow,
"ass": AssWorkflow,
}
# --- 媒体类型映射 ---
@@ -100,6 +100,7 @@ MEDIA_TYPES = {
"docx": "application/vnd.openxmlformats-officedocument.wordprocessingml.document",
"srt": "text/plain; charset=utf-8",
"epub": "application/epub+zip",
"ass": "text/plain; charset=utf-8",
}
@@ -159,7 +160,7 @@ async def lifespan(app: FastAPI):
global_logger.propagate = False
global_logger.setLevel(logging.INFO)
print("应用启动完成,多任务状态已初始化。")
print(f"服务接口文档: http://127.0.0.1:{app.state.port_to_use}/docs")
print(f"服务接口文档: http://12ent.0.0.1:{app.state.port_to_use}/docs")
print(f"请用浏览器访问 http://127.0.0.1:{app.state.port_to_use}\n")
yield
# 清理任何可能残留的临时目录
@@ -391,10 +392,26 @@ class HtmlWorkflowParams(BaseWorkflowParams):
# --- HTML WORKFLOW PARAMS END ---
# --- ASS WORKFLOW PARAMS START ---
class AssWorkflowParams(BaseWorkflowParams):
workflow_type: Literal['ass'] = Field(..., description="指定使用ASS字幕的翻译工作流。")
insert_mode: Literal["replace", "append", "prepend"] = Field(
"replace",
description="翻译文本的插入模式。'replace':替换原文,'append':附加到原文后,'prepend':附加到原文前。"
)
separator: str = Field(
"\\N",
description="当 insert_mode 为 'append''prepend'用于分隔原文和译文的分隔符。ASS格式通常使用 \\N 作为换行符。"
)
# --- ASS WORKFLOW PARAMS END ---
# 3. 使用可辨识联合类型Discriminated Union将它们组合起来
TranslatePayload = Annotated[
Union[
MarkdownWorkflowParams, TextWorkflowParams, JsonWorkflowParams, XlsxWorkflowParams, DocxWorkflowParams, SrtWorkflowParams, EpubWorkflowParams, HtmlWorkflowParams],
MarkdownWorkflowParams, TextWorkflowParams, JsonWorkflowParams, XlsxWorkflowParams, DocxWorkflowParams, SrtWorkflowParams, EpubWorkflowParams, HtmlWorkflowParams, AssWorkflowParams],
Field(discriminator='workflow_type')
]
@@ -403,7 +420,7 @@ TranslatePayload = Annotated[
class TranslateServiceRequest(BaseModel):
file_name: str = Field(..., description="上传的原始文件名,含扩展名。",
examples=["my_paper.pdf", "chapter1.txt", "data.xlsx", "video.srt", "my_book.epub",
"index.html"])
"index.html", "dialogue.ass"])
file_content: str = Field(..., description="Base64编码的文件内容。", examples=["JVBERi0xLjQK..."])
payload: TranslatePayload = Field(..., description="包含工作流类型和相应参数的载荷。")
@@ -582,6 +599,26 @@ class TranslateServiceRequest(BaseModel):
"thinking": "default",
"retry": default_params["retry"],
}
},
{
"file_name": "dialogue.ass",
"file_content": "U2NyaXB0IEluZm8NC...",
"payload": {
"workflow_type": "ass",
"skip_translate": False,
"base_url": "https://api.openai.com/v1",
"api_key": "sk-your-api-key-here",
"model_id": "gpt-4o",
"to_lang": "中文",
"insert_mode": "replace",
"separator": "\\N",
"chunk_size": default_params["chunk_size"],
"concurrent": default_params["concurrent"],
"temperature": default_params["temperature"],
"timeout": default_params["timeout"],
"thinking": "default",
"retry": default_params["retry"],
}
}
]
}
@@ -787,6 +824,27 @@ async def _perform_translation(
workflow = HtmlWorkflow(config=workflow_config)
# --- HTML WORKFLOW LOGIC END ---
# --- ASS WORKFLOW LOGIC START ---
elif isinstance(payload, AssWorkflowParams):
task_logger.info("构建 AssWorkflow 配置。")
translator_args = payload.model_dump(include={
'skip_translate', 'base_url', 'api_key', 'model_id', 'to_lang', 'custom_prompt',
'temperature', 'thinking', 'chunk_size', 'concurrent',
'insert_mode', 'separator', 'glossary_dict', 'timeout', 'retry'
}, exclude_none=True)
translator_args['glossary_generate_enable'] = payload.glossary_generate_enable
translator_args['glossary_agent_config'] = build_glossary_agent_config()
translator_config = AssTranslatorConfig(**translator_args)
html_exporter_config = Ass2HTMLExporterConfig(cdn=True)
workflow_config = AssWorkflowConfig(
translator_config=translator_config,
html_exporter_config=html_exporter_config,
logger=task_logger
)
workflow = AssWorkflow(config=workflow_config)
# --- ASS WORKFLOW LOGIC END ---
else:
raise TypeError(f"工作流类型 '{payload.workflow_type}' 的处理逻辑未实现。")
@@ -832,6 +890,8 @@ async def _perform_translation(
html_config = Srt2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, EpubWorkflow):
html_config = Epub2HTMLExporterConfig(cdn=is_cdn_available)
elif isinstance(workflow, AssWorkflow):
html_config = Ass2HTMLExporterConfig(cdn=is_cdn_available)
export_map['html'] = (lambda: workflow.export_to_html(html_config), f"{filename_stem}_translated.html",
True)
if isinstance(workflow, MDFormatsExportable):
@@ -851,6 +911,8 @@ async def _perform_translation(
export_map['srt'] = (workflow.export_to_srt, f"{filename_stem}_translated.srt", True)
if isinstance(workflow, EpubExportable):
export_map['epub'] = (workflow.export_to_epub, f"{filename_stem}_translated.epub", False)
if isinstance(workflow, AssExportable):
export_map['ass'] = (workflow.export_to_ass, f"{filename_stem}_translated.ass", True)
# 循环生成文件
for file_type, (export_func, filename, is_string_output) in export_map.items():
@@ -1013,7 +1075,7 @@ def _cancel_translation_logic(task_id: str):
description="""
接收一个包含文件内容Base64编码和工作流参数的JSON请求启动一个后台翻译任务。
- **工作流选择**: 请求体中的 `payload.workflow_type` 字段决定了本次任务的类型(如 `markdown_based`, `txt`, `json`, `xlsx`, `docx`, `srt`, `epub`, `html`)。
- **工作流选择**: 请求体中的 `payload.workflow_type` 字段决定了本次任务的类型(如 `markdown_based`, `txt`, `json`, `xlsx`, `docx`, `srt`, `epub`, `html`, `ass`)。
- **动态参数**: 根据所选工作流API需要不同的参数集。请参考下面的Schema或示例。
- **异步处理**: 此端点会立即返回任务ID客户端需轮询状态接口获取进度。
""",
@@ -1220,6 +1282,23 @@ async def service_release_task(task_id: str):
}
},
# --- HTML STATUS EXAMPLE END ---
# --- ASS STATUS EXAMPLE START ---
"completed_ass": {
"summary": "已完成 (ASS)",
"value": {
"task_id": "a1b2c3d5", "is_processing": False,
"status_message": "翻译成功!用时 12.34 秒。",
"error_flag": False, "download_ready": True, "original_filename_stem": "dialogue",
"original_filename": "dialogue.ass", "task_start_time": 1678890200.0,
"task_end_time": 1678890212.34,
"downloads": {
"ass": "/service/download/a1b2c3d5/ass",
"html": "/service/download/a1b2c3d5/html"
},
"attachment": {}
}
},
# --- ASS STATUS EXAMPLE END ---
"error": {
"summary": "失败",
"value": {
@@ -1287,7 +1366,7 @@ async def service_get_logs(task_id: str):
return JSONResponse(content={"logs": new_logs})
FileType = Literal["markdown", "markdown_zip", "html", "txt", "json", "xlsx", "csv", "docx", "srt", "epub"]
FileType = Literal["markdown", "markdown_zip", "html", "txt", "json", "xlsx", "csv", "docx", "srt", "epub", "ass"]
@service_router.get(
@@ -1318,7 +1397,7 @@ FileType = Literal["markdown", "markdown_zip", "html", "txt", "json", "xlsx", "c
async def service_download_file(
task_id: str = FastApiPath(..., description="已完成任务的ID", examples=["b2865b93"]),
file_type: FileType = FastApiPath(..., description="要下载的文件类型。",
examples=["html", "json", "csv", "docx", "srt", "epub"])
examples=["html", "json", "csv", "docx", "srt", "epub", "ass"])
):
task_state = tasks_state.get(task_id)
if not task_state:
@@ -1418,7 +1497,7 @@ async def service_download_attachment(
async def service_content(
task_id: str = FastApiPath(..., description="已完成任务的ID", examples=["b2865b93"]),
file_type: FileType = FastApiPath(..., description="要获取内容的文件类型。",
examples=["html", "json", "csv", "docx", "srt", "epub"])
examples=["html", "json", "csv", "docx", "srt", "epub", "ass"])
):
task_state = tasks_state.get(task_id)
if not task_state:

View File

View File

@@ -0,0 +1,9 @@
# SPDX-FileCopyrightText: 2025 QinHan
# SPDX-License-Identifier: MPL-2.0
from docutranslate.exporter.ass.base import AssExporter
from docutranslate.ir.document import Document
class Ass2AssExporter(AssExporter):
def export(self, document: Document) -> Document:
return document.copy()

View File

@@ -0,0 +1,42 @@
# SPDX-FileCopyrightText: 2025 QinHan
# SPDX-License-Identifier: MPL-2.0
from dataclasses import dataclass
import jinja2
from docutranslate.exporter.ass.base import AssExporter
from docutranslate.exporter.base import ExporterConfig
from docutranslate.ir.document import Document
from docutranslate.utils.resource_utils import resource_path
@dataclass
class Ass2HTMLExporterConfig(ExporterConfig):
cdn: bool = True
class Ass2HTMLExporter(AssExporter):
def __init__(self, config: Ass2HTMLExporterConfig = None):
config = config or Ass2HTMLExporterConfig()
super().__init__(config=config)
self.cdn = config.cdn
def export(self, document: Document) -> Document:
cdn = self.cdn
html_template = resource_path("template/ass.html").read_text(encoding="utf-8")
render = jinja2.Template(html_template).render(
ass_data=document.content.decode("utf-8")
)
return Document.from_bytes(content=render.encode("utf-8"), suffix=".html", stem=document.stem)
if __name__ == '__main__':
from pathlib import Path
d=Document.from_path(r"C:\Users\jxgm\Desktop\testfiles\一个软件搞定文件翻译【DocuTranslate】.ass")
exporter=Ass2HTMLExporter()
d_html=exporter.export(d)
path=Path("./1.html")
path.write_text(d_html.content.decode("utf-8"))

View File

@@ -0,0 +1,10 @@
# SPDX-FileCopyrightText: 2025 QinHan
# SPDX-License-Identifier: MPL-2.0
from docutranslate.exporter.base import Exporter
from docutranslate.ir.document import Document
#TODO:看情况是否需要为TXT单独写一个document类型
class AssExporter(Exporter[Document]):
def export(self,document:Document)->Document:
...

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@@ -0,0 +1,16 @@
<!DOCTYPE html>
<html lang="en">
<head>
<meta charset="UTF-8">
<title>ass subtitle</title>
<style>
html {
padding: 2vh 10vw;
font-size: 15px;
}
</style>
</head>
<body>
<div><pre>{{ ass_data }}</pre></div>
</body>
</html>

View File

@@ -0,0 +1,136 @@
# SPDX-FileCopyrightText: 2025 QinHan
# SPDX-License-Identifier: MPL-2.0
import asyncio
from dataclasses import dataclass
from typing import Self, Literal, List, Optional
import pysubs2
from docutranslate.agents.segments_agent import SegmentsTranslateAgentConfig, SegmentsTranslateAgent
from docutranslate.ir.document import Document
from docutranslate.translator.ai_translator.base import AiTranslatorConfig, AiTranslator
@dataclass
class AssTranslatorConfig(AiTranslatorConfig):
insert_mode: Literal["replace", "append", "prepend"] = "replace"
separator: str = "\\N" # ASS 中换行符是 \N
# 未来可扩展:指定样式名或时间范围,当前暂不实现,翻译所有 Dialogue
translate_regions: Optional[List[str]] = None # 暂保留接口,但当前忽略
class AssTranslator(AiTranslator):
def __init__(self, config: AssTranslatorConfig):
super().__init__(config=config)
self.chunk_size = config.chunk_size
self.translate_agent = None
if not self.skip_translate:
agent_config = SegmentsTranslateAgentConfig(
custom_prompt=config.custom_prompt,
to_lang=config.to_lang,
base_url=config.base_url,
api_key=config.api_key,
model_id=config.model_id,
temperature=config.temperature,
thinking=config.thinking,
concurrent=config.concurrent,
timeout=config.timeout,
logger=self.logger,
glossary_dict=config.glossary_dict,
retry=config.retry
)
self.translate_agent = SegmentsTranslateAgent(agent_config)
self.insert_mode = config.insert_mode
self.separator = config.separator
self.translate_regions = config.translate_regions # 暂不处理,保留接口
def _pre_translate(self, document: Document):
"""
解析 ASS 文件,提取所有 Dialogue 行的文本。
返回subs 对象、待翻译条目列表、原文列表
"""
try:
content_str = document.content.decode('utf-8-sig') # ASS 通常带 BOM
except UnicodeDecodeError:
content_str = document.content.decode('utf-8')
subs = pysubs2.SSAFile.from_string(content_str)
lines_to_translate = []
for i, line in enumerate(subs):
if line.type == "Dialogue":
# 仅翻译文本部分,保留样式、时间等
if isinstance(line.text, str) and line.text.strip():
lines_to_translate.append({
"index": i, # 记录在 subs 中的位置
"original_text": line.text,
"line": line # 保留引用,便于后续修改
})
original_texts = [item["original_text"] for item in lines_to_translate]
return subs, lines_to_translate, original_texts
def _after_translate(self, subs, lines_to_translate, translated_texts, original_texts):
"""
将翻译结果写回 ASS 对象,根据 insert_mode 处理。
"""
for i, item in enumerate(lines_to_translate):
line = item["line"]
translated_text = translated_texts[i]
original_text = original_texts[i]
if self.insert_mode == "replace":
line.text = translated_text
elif self.insert_mode == "append":
line.text = original_text + self.separator + translated_text
elif self.insert_mode == "prepend":
line.text = translated_text + self.separator + original_text
else:
self.logger.error(f"不支持的插入模式: {self.insert_mode}")
# 输出为字符串,再编码为 bytes
output_str = subs.to_string(format_="ass")
return output_str.encode('utf-8-sig') # 带 BOM兼容播放器
def translate(self, document: Document) -> Self:
subs, lines_to_translate, original_texts = self._pre_translate(document)
if not lines_to_translate:
print("\n未找到需要翻译的字幕行。")
return self
if self.glossary_agent:
self.glossary_dict_gen = self.glossary_agent.send_segments(original_texts, self.chunk_size)
if self.translate_agent:
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
if self.translate_agent:
translated_texts = self.translate_agent.send_segments(original_texts, self.chunk_size)
else:
translated_texts = original_texts
document.content = self._after_translate(subs, lines_to_translate, translated_texts, original_texts)
return self
async def translate_async(self, document: Document) -> Self:
subs, lines_to_translate, original_texts = await asyncio.to_thread(self._pre_translate, document)
if not lines_to_translate:
print("\n未找到需要翻译的字幕行。")
return self
if self.glossary_agent:
self.glossary_dict_gen = await self.glossary_agent.send_segments_async(original_texts, self.chunk_size)
if self.translate_agent:
self.translate_agent.update_glossary_dict(self.glossary_dict_gen)
if self.translate_agent:
translated_texts = await self.translate_agent.send_segments_async(original_texts, self.chunk_size)
else:
translated_texts = original_texts
document.content = await asyncio.to_thread(
self._after_translate, subs, lines_to_translate, translated_texts, original_texts
)
return self

View File

@@ -0,0 +1,76 @@
# SPDX-FileCopyrightText: 2025 QinHan
# SPDX-License-Identifier: MPL-2.0
from dataclasses import dataclass
from pathlib import Path
from typing import Self
from docutranslate.exporter.ass.ass2ass_exporter import Ass2AssExporter
from docutranslate.exporter.ass.ass2html_exporter import Ass2HTMLExporterConfig, Ass2HTMLExporter
from docutranslate.exporter.base import ExporterConfig
from docutranslate.glossary.glossary import Glossary
from docutranslate.ir.document import Document
from docutranslate.translator.ai_translator.ass_translator import AssTranslatorConfig, AssTranslator
from docutranslate.workflow.base import WorkflowConfig, Workflow
from docutranslate.workflow.interfaces import HTMLExportable, AssExportable
@dataclass(kw_only=True)
class AssWorkflowConfig(WorkflowConfig):
translator_config: AssTranslatorConfig
html_exporter_config: Ass2HTMLExporterConfig
class AssWorkflow(Workflow[AssWorkflowConfig, Document, Document], HTMLExportable[Ass2HTMLExporterConfig],
AssExportable[ExporterConfig]):
def __init__(self, config: AssWorkflowConfig):
super().__init__(config=config)
if config.logger:
for sub_config in [self.config.translator_config]:
if sub_config:
sub_config.logger = config.logger
def _pre_translate(self,document_original:Document):
document = document_original.copy()
translate_config = self.config.translator_config
translator = AssTranslator(translate_config)
return document,translator
def translate(self) -> Self:
document, translator=self._pre_translate(self.document_original)
translator.translate(document)
if translator.glossary_dict_gen:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
self.document_translated = document
return self
async def translate_async(self) -> Self:
document, translator = self._pre_translate(self.document_original)
await translator.translate_async(document)
if translator.glossary_dict_gen:
self.attachment.add_document("glossary", Glossary.glossary_dict2csv(translator.glossary_dict_gen))
self.document_translated = document
return self
def export_to_html(self, config: Ass2HTMLExporterConfig = None) -> str:
config = config or self.config.html_exporter_config
docu = self._export(Ass2HTMLExporter(config))
return docu.content.decode()
def export_to_ass(self, _: ExporterConfig | None = None) -> str:
docu = self._export(Ass2AssExporter())
return docu.content.decode()
def save_as_html(self, name: str = None, output_dir: Path | str = "./output",
config: Ass2HTMLExporterConfig | None = None) -> Self:
config = config or self.config.html_exporter_config
self._save(exporter=Ass2HTMLExporter(config), name=name, output_dir=output_dir)
return self
def save_as_ass(self, name: str = None, output_dir: Path | str = "./output",
_: ExporterConfig | None = None) -> Self:
self._save(exporter=Ass2AssExporter(), name=name, output_dir=output_dir)
return self

View File

@@ -103,3 +103,11 @@ class EpubExportable(Protocol[T_ExporterConfig]):
def save_as_epub(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
...
@runtime_checkable
class AssExportable(Protocol[T_ExporterConfig]):
def export_to_ass(self, config: T_ExporterConfig | None = None) -> str:
...
def save_as_ass(self, name: str, output_dir: Path | str, config: T_ExporterConfig | None = None) -> Self:
...

View File

@@ -18,6 +18,8 @@ dependencies = [
"markdown>=3.8.2",
"pymdown-extensions>=10.16.1",
"chardet>=5.2.0",
"py>=1.11.0",
"pysubs2>=1.8.0",
]
dynamic = ["version"]

104
uv.lock generated
View File

@@ -24,15 +24,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/9f/1c/a17fb513aeb684fb83bef5f395910f53103ab30308bbdd77fd66d6698c46/accelerate-1.9.0-py3-none-any.whl", hash = "sha256:c24739a97ade1d54af4549a65f8b6b046adc87e2b3e4d6c66516e32c53d5a8f1", size = 367073 },
]
[[package]]
name = "altgraph"
version = "0.17.4"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/de/a8/7145824cf0b9e3c28046520480f207df47e927df83aa9555fb47f8505922/altgraph-0.17.4.tar.gz", hash = "sha256:1b5afbb98f6c4dcadb2e2ae6ab9fa994bbb8c1d75f4fa96d340f9437ae454406", size = 48418 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/4d/3f/3bc3f1d83f6e4a7fcb834d3720544ca597590425be5ba9db032b2bf322a2/altgraph-0.17.4-py2.py3-none-any.whl", hash = "sha256:642743b4750de17e655e6711601b077bc6598dbfa3ba5fa2b2a35ce12b508dff", size = 21212 },
]
[[package]]
name = "annotated-types"
version = "0.7.0"
@@ -334,7 +325,9 @@ dependencies = [
{ name = "mammoth" },
{ name = "markdown" },
{ name = "openpyxl" },
{ name = "py" },
{ name = "pymdown-extensions" },
{ name = "pysubs2" },
{ name = "python-docx" },
{ name = "srt" },
{ name = "xlsx2html" },
@@ -350,7 +343,6 @@ docling = [
dev = [
{ name = "docling" },
{ name = "opencv-python" },
{ name = "pyinstaller" },
]
[package.metadata]
@@ -367,7 +359,9 @@ requires-dist = [
{ name = "markdown", specifier = ">=3.8.2" },
{ name = "opencv-python", marker = "extra == 'docling'", specifier = ">=4.11.0.86" },
{ name = "openpyxl", specifier = ">=3.1.5" },
{ name = "py", specifier = ">=1.11.0" },
{ name = "pymdown-extensions", specifier = ">=10.16.1" },
{ name = "pysubs2", specifier = ">=1.8.0" },
{ name = "python-docx", specifier = ">=1.2.0" },
{ name = "srt", specifier = ">=3.5.3" },
{ name = "xlsx2html", specifier = ">=0.6.2" },
@@ -378,7 +372,6 @@ provides-extras = ["docling"]
dev = [
{ name = "docling", specifier = ">=2.40.0" },
{ name = "opencv-python", specifier = ">=4.11.0.86" },
{ name = "pyinstaller", specifier = ">=6.14.2" },
]
[[package]]
@@ -798,18 +791,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/fc/14/c115516c62a7d2499781d2d3d7215218c0731b2c940753bf9f9b7b73924d/lxml-5.4.0-cp313-cp313-win_amd64.whl", hash = "sha256:bcb7a1096b4b6b24ce1ac24d4942ad98f983cd3810f9711bcd0293f43a9d8b9f", size = 3814606 },
]
[[package]]
name = "macholib"
version = "1.16.3"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "altgraph" },
]
sdist = { url = "https://files.pythonhosted.org/packages/95/ee/af1a3842bdd5902ce133bd246eb7ffd4375c38642aeb5dc0ae3a0329dfa2/macholib-1.16.3.tar.gz", hash = "sha256:07ae9e15e8e4cd9a788013d81f5908b3609aa76f9b1421bae9c4d7606ec86a30", size = 59309 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/d1/5d/c059c180c84f7962db0aeae7c3b9303ed1d73d76f2bfbc32bc231c8be314/macholib-1.16.3-py2.py3-none-any.whl", hash = "sha256:0e315d7583d38b8c77e815b1ecbdbf504a8258d8b3e17b61165c6feb60d18f2c", size = 38094 },
]
[[package]]
name = "mammoth"
version = "1.10.0"
@@ -1267,15 +1248,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/d5/f9/07086f5b0f2a19872554abeea7658200824f5835c58a106fa8f2ae96a46c/pandas-2.3.1-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:5db9637dbc24b631ff3707269ae4559bce4b7fd75c1c4d7e13f40edc42df4444", size = 13189044 },
]
[[package]]
name = "pefile"
version = "2023.2.7"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/78/c5/3b3c62223f72e2360737fd2a57c30e5b2adecd85e70276879609a7403334/pefile-2023.2.7.tar.gz", hash = "sha256:82e6114004b3d6911c77c3953e3838654b04511b8b66e8583db70c65998017dc", size = 74854 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/55/26/d0ad8b448476d0a1e8d3ea5622dc77b916db84c6aa3cb1e1c0965af948fc/pefile-2023.2.7-py3-none-any.whl", hash = "sha256:da185cd2af68c08a6cd4481f7325ed600a88f6a813bad9dea07ab3ef73d8d8d6", size = 71791 },
]
[[package]]
name = "pillow"
version = "11.3.0"
@@ -1393,6 +1365,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/50/1b/6921afe68c74868b4c9fa424dad3be35b095e16687989ebbb50ce4fceb7c/psutil-7.0.0-cp37-abi3-win_amd64.whl", hash = "sha256:4cf3d4eb1aa9b348dec30105c55cd9b7d4629285735a102beb4441e38db90553", size = 244885 },
]
[[package]]
name = "py"
version = "1.11.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/98/ff/fec109ceb715d2a6b4c4a85a61af3b40c723a961e8828319fbcb15b868dc/py-1.11.0.tar.gz", hash = "sha256:51c75c4126074b472f746a24399ad32f6053d1b34b68d2fa41e558e6f4a98719", size = 207796 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/f6/f0/10642828a8dfb741e5f3fbaac830550a518a775c7fff6f04a007259b0548/py-1.11.0-py2.py3-none-any.whl", hash = "sha256:607c53218732647dff4acdfcd50cb62615cedf612e72d1724fb1a0cc6405b378", size = 98708 },
]
[[package]]
name = "pyclipper"
version = "1.3.0.post6"
@@ -1527,47 +1508,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c7/21/705964c7812476f378728bdf590ca4b771ec72385c533964653c68e86bdc/pygments-2.19.2-py3-none-any.whl", hash = "sha256:86540386c03d588bb81d44bc3928634ff26449851e99741617ecb9037ee5ec0b", size = 1225217 },
]
[[package]]
name = "pyinstaller"
version = "6.15.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "altgraph" },
{ name = "macholib", marker = "sys_platform == 'darwin'" },
{ name = "packaging" },
{ name = "pefile", marker = "sys_platform == 'win32'" },
{ name = "pyinstaller-hooks-contrib" },
{ name = "pywin32-ctypes", marker = "sys_platform == 'win32'" },
{ name = "setuptools" },
]
sdist = { url = "https://files.pythonhosted.org/packages/64/17/b2bb4de22650adbeef401fa82a1b43028976547a8728602e4d29735b455e/pyinstaller-6.15.0.tar.gz", hash = "sha256:a48fc4644ee4aa2aa2a35e7b51f496f8fbd7eecf6a2150646bbf1613ad07bc2d", size = 4331521 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/24/dd/d5c8a127446adda954f68ea7fac22772f7ab8656ad4b06df396d82574ca9/pyinstaller-6.15.0-py3-none-macosx_10_13_universal2.whl", hash = "sha256:9f00c71c40148cd1e61695b2c6f1e086693d3bcf9bfa22ab513aa4254c3b966f", size = 1016981 },
{ url = "https://files.pythonhosted.org/packages/2d/2a/7b50593b419db43e48d9bdeebaac0ff92a5fe035f3c30f87ca3e1650d7e2/pyinstaller-6.15.0-py3-none-manylinux2014_aarch64.whl", hash = "sha256:cbcc8eb77320c60722030ac875883b564e00768fe3ff1721c7ba3ad0e0a277e9", size = 726337 },
{ url = "https://files.pythonhosted.org/packages/77/83/7f498fba0154c57eb5fc93eb9680a2dbadb9f780a3389fb85b8d79683378/pyinstaller-6.15.0-py3-none-manylinux2014_i686.whl", hash = "sha256:c33e6302bc53db2df1104ed5566bd980b3e0ee7f18416a6e3caa908c12a54542", size = 737539 },
{ url = "https://files.pythonhosted.org/packages/09/d6/e4477feab7c8379fb49e7ec95c82d0a69ad88f6ccc247f76bef3cb0e3432/pyinstaller-6.15.0-py3-none-manylinux2014_ppc64le.whl", hash = "sha256:eb902d0fed3bb1f8b7190dc4df5c11f3b59505767e0d56d1ed782b853938bbf3", size = 735426 },
{ url = "https://files.pythonhosted.org/packages/32/7e/ff25648276f15e2e77fc563d36d8cfcd917e077bf2a172420df3588601b4/pyinstaller-6.15.0-py3-none-manylinux2014_s390x.whl", hash = "sha256:b4df862adae7cf1f08eff53c43ace283822447f7f528f72e4f94749062712f15", size = 732210 },
{ url = "https://files.pythonhosted.org/packages/db/3d/267a7dddd0647de95d260780050ccd8228ab29d2b9edea54ed1f56800967/pyinstaller-6.15.0-py3-none-manylinux2014_x86_64.whl", hash = "sha256:b9ebf16ed0f99016ae8ae5746dee4cb244848a12941539e62ce2eea1df5a3f95", size = 732194 },
{ url = "https://files.pythonhosted.org/packages/4d/61/962b2eb79ef225233e2d6e04600e998935328011dfb2fa775b1dd16b943a/pyinstaller-6.15.0-py3-none-musllinux_1_1_aarch64.whl", hash = "sha256:22193489e6a22435417103f61e7950363bba600ef36ec3ab1487303668c81092", size = 731256 },
{ url = "https://files.pythonhosted.org/packages/67/5e/4e20e1c0e5791b09b69bef3ac921fd0cd25551b56879324ad999b92fa045/pyinstaller-6.15.0-py3-none-musllinux_1_1_x86_64.whl", hash = "sha256:18f743069849dbaee3e10900385f35795a5743eabab55e99dcc42f204e40a0db", size = 731148 },
{ url = "https://files.pythonhosted.org/packages/88/31/28956c534991f289e2f981c715730b6241e75dc6295737a8cbd050a0cc8c/pyinstaller-6.15.0-py3-none-win32.whl", hash = "sha256:60da8f1b5071766b45c0f607d8bc3d7e59ba2c3b262d08f2e4066ba65f3544a2", size = 1312297 },
{ url = "https://files.pythonhosted.org/packages/09/ab/6a45186c7f8e34c422faecd72580116a67d068158c57faa2d2f6d01faa7f/pyinstaller-6.15.0-py3-none-win_amd64.whl", hash = "sha256:cbea297e16eeda30b41c300d6ec2fd2abea4dbd8d8a32650eeec36431c94fcd9", size = 1373091 },
{ url = "https://files.pythonhosted.org/packages/5b/86/72159af032b9db36f2470a3b085f79277ec1c38e7e48f8c5dc1ed16dc4e1/pyinstaller-6.15.0-py3-none-win_arm64.whl", hash = "sha256:f43c035621742cf2d19b84308c60e4e44e72c94786d176b8f6adcde351b5bd98", size = 1314305 },
]
[[package]]
name = "pyinstaller-hooks-contrib"
version = "2025.8"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "packaging" },
{ name = "setuptools" },
]
sdist = { url = "https://files.pythonhosted.org/packages/71/d6/e5b378b7d4add8c879295c531309b0320e9c07a70458665d091760ffdc87/pyinstaller_hooks_contrib-2025.8.tar.gz", hash = "sha256:3402ad41dfe9b5110af134422e37fc5d421ba342c6cb980bd67cb30b7415641c", size = 164214 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/48/34/1d973d0dae849683e53fbcda84443ce016f315e6f4dc7605ede4f56a28c3/pyinstaller_hooks_contrib-2025.8-py3-none-any.whl", hash = "sha256:8d0b8cfa0cb689a619294ae200497374234bd4e3994b3ace2a4442274c899064", size = 442346 },
]
[[package]]
name = "pylatexenc"
version = "2.10"
@@ -1607,6 +1547,15 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/be/7a/097801205b991bc3115e8af1edb850d30aeaf0118520b016354cf5ccd3f6/pypdfium2-4.30.0-py3-none-win_arm64.whl", hash = "sha256:119b2969a6d6b1e8d55e99caaf05290294f2d0fe49c12a3f17102d01c441bd29", size = 2752118 },
]
[[package]]
name = "pysubs2"
version = "1.8.0"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/31/4a/becf78d9d3df56e6c4a9c50b83794e5436b6c5ab6dd8a3f934e94c89338c/pysubs2-1.8.0.tar.gz", hash = "sha256:3397bb58a4a15b1325ba2ae3fd4d7c214e2c0ddb9f33190d6280d783bb433b20", size = 1130048 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/99/09/0fc0719162e5ad723f71d41cf336f18b6b5054d70dc0fe42ace6b4d2bdc9/pysubs2-1.8.0-py3-none-any.whl", hash = "sha256:05716f5039a9ebe32cd4d7673f923cf36204f3a3e99987f823ab83610b7035a0", size = 43516 },
]
[[package]]
name = "python-bidi"
version = "0.6.6"
@@ -1743,15 +1692,6 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/c0/d2/21af5c535501a7233e734b8af901574572da66fcc254cb35d0609c9080dd/pywin32-311-cp314-cp314-win_arm64.whl", hash = "sha256:a508e2d9025764a8270f93111a970e1d0fbfc33f4153b388bb649b7eec4f9b42", size = 8932540 },
]
[[package]]
name = "pywin32-ctypes"
version = "0.2.3"
source = { registry = "https://pypi.org/simple" }
sdist = { url = "https://files.pythonhosted.org/packages/85/9f/01a1a99704853cb63f253eea009390c88e7131c67e66a0a02099a8c917cb/pywin32-ctypes-0.2.3.tar.gz", hash = "sha256:d162dc04946d704503b2edc4d55f3dba5c1d539ead017afa00142c38b9885755", size = 29471 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/de/3d/8161f7711c017e01ac9f008dfddd9410dff3674334c233bde66e7ba65bbf/pywin32_ctypes-0.2.3-py3-none-any.whl", hash = "sha256:8a1513379d709975552d202d942d9837758905c8d01eb82b8bcc30918929e7b8", size = 30756 },
]
[[package]]
name = "pyyaml"
version = "6.0.2"