增加rpm和tpm限制

This commit is contained in:
xunbu
2025-12-17 21:46:40 +08:00
parent c9ffa8f195
commit 579f0b8a9c
5 changed files with 344 additions and 172 deletions

View File

@@ -5,6 +5,7 @@ import asyncio
import itertools
import logging
import time
from collections import deque
from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass
from threading import Lock
@@ -12,6 +13,7 @@ from typing import Literal, Callable, Any
from urllib.parse import urlparse
import httpx
import tiktoken
from docutranslate.agents.thinking.thinking_factory import get_thinking_mode
from docutranslate.logger import global_logger
@@ -46,11 +48,13 @@ class AgentConfig:
model_id: str
temperature: float = 0.7
concurrent: int = 30
timeout: int = 1200 # 单位(秒)这个值是httpx.TimeOut中read的值,并非总的超时时间
timeout: int = 1200
thinking: ThinkingMode = "disable"
retry: int = 2
system_proxy_enable: bool = False
force_json: bool = False # 应输出json格式时强制ai输出json
force_json: bool = False
rpm: int | None = None # 每分钟请求数限制
tpm: int | None = None # 每分钟Token数限制
class TotalErrorCounter:
@@ -71,7 +75,6 @@ class TotalErrorCounter:
return self.count > self.max_errors_count
# 仅使用多线程时用以计数
class PromptsCounter:
def __init__(self, total: int, logger: logging.Logger):
self.lock = Lock()
@@ -85,21 +88,101 @@ class PromptsCounter:
self.logger.info(f"多线程-已完成:{self.count}/{self.total}")
# --- 新增 RateLimiter 类 ---
class RateLimiter:
"""
基于滑动窗口的速率限制器,支持 RPM 和 TPM 控制。
同时支持 Async 和 Sync 调用。
"""
def __init__(self, rpm: int | None, tpm: int | None):
self.rpm = rpm
self.tpm = tpm
# 双端队列存储 (timestamp, value)value对于RPM是1对于TPM是token数量
self.request_timestamps = deque()
self.token_timestamps = deque()
self.lock = Lock() # 用于同步模式和保护共享数据
def _cleanup_window(self, now: float):
"""清理60秒窗口之前的数据"""
window_start = now - 60.0
while self.request_timestamps and self.request_timestamps[0] <= window_start:
self.request_timestamps.popleft()
while self.token_timestamps and self.token_timestamps[0][0] <= window_start:
self.token_timestamps.popleft()
def _check_and_get_wait_time(self, tokens: int) -> float:
"""检查是否满足限制,返回需要等待的秒数。如果不需等待返回 0"""
now = time.time()
self._cleanup_window(now)
wait_time = 0.0
# Check RPM
if self.rpm and len(self.request_timestamps) >= self.rpm:
# 取最早的一条记录,计算还需要等待多久才能腾出位置
earliest = self.request_timestamps[0]
wait_time = max(wait_time, 60 - (now - earliest))
# Check TPM
if self.tpm:
current_tokens = sum(t[1] for t in self.token_timestamps)
if current_tokens + tokens > self.tpm:
# 稍微复杂点需要移除足够多的旧token才能放入新token
# 这里做一个简化估算:如果超限,等到最早的记录过期
if self.token_timestamps:
earliest = self.token_timestamps[0][0]
wait_time = max(wait_time, 60 - (now - earliest))
else:
# 这种情况理论上不应该发生除非单次请求超过了TPM上限
# 如果单次超过上限强制等待1秒防止死循环并允许通过(或者抛异常,这里选择允许)
pass
return wait_time
def _record_usage(self, tokens: int):
"""记录使用量"""
now = time.time()
if self.rpm is not None:
self.request_timestamps.append(now)
if self.tpm is not None:
self.token_timestamps.append((now, tokens))
async def acquire_async(self, tokens: int = 0):
"""异步等待配额"""
if self.rpm is None and self.tpm is None:
return
while True:
with self.lock:
wait_time = self._check_and_get_wait_time(tokens)
if wait_time <= 0:
self._record_usage(tokens)
return
# 释放锁后等待,避免阻塞其他协程/线程的检查
# 添加一点点缓冲时间,避免刚唤醒时毫秒级误差导致再次等待
await asyncio.sleep(wait_time + 0.1)
def acquire_sync(self, tokens: int = 0):
"""同步等待配额(线程阻塞)"""
if self.rpm is None and self.tpm is None:
return
while True:
with self.lock:
wait_time = self._check_and_get_wait_time(tokens)
if wait_time <= 0:
self._record_usage(tokens)
return
time.sleep(wait_time + 0.1)
def extract_token_info(response_data: dict) -> tuple[int, int, int, int]:
"""
从API响应中提取token信息
支持多种response格式:
1. 格式1: usage.input_tokens_details.cached_tokens 和 usage.output_tokens_details.reasoning_tokens
2. 格式2: usage.prompt_tokens_details.cached_tokens
3. 格式3: usage.prompt_cache_hit_tokens 和 usage.completion_tokens_details.reasoning_tokens
Args:
response_data: API响应数据
Returns:
tuple: (input_tokens, cached_tokens, output_tokens, reasoning_tokens)
"""
"""(保持原样) 从API响应中提取token信息"""
if "usage" not in response_data:
return 0, 0, 0, 0
@@ -107,43 +190,34 @@ def extract_token_info(response_data: dict) -> tuple[int, int, int, int]:
input_tokens = usage.get("prompt_tokens", 0)
output_tokens = usage.get("completion_tokens", 0)
# 初始化token详细统计
cached_tokens = 0
reasoning_tokens = 0
try:
# 尝试从不同格式获取cached_tokens
# 格式1: input_tokens_details.cached_tokens
if (
"input_tokens_details" in usage
and "cached_tokens" in usage["input_tokens_details"]
):
cached_tokens = usage["input_tokens_details"]["cached_tokens"]
# 格式2: prompt_tokens_details.cached_tokens
elif (
"prompt_tokens_details" in usage
and "cached_tokens" in usage["prompt_tokens_details"]
):
cached_tokens = usage["prompt_tokens_details"]["cached_tokens"]
# 格式3: prompt_cache_hit_tokens (直接在usage下)
elif "prompt_cache_hit_tokens" in usage:
cached_tokens = usage["prompt_cache_hit_tokens"]
# 尝试从不同格式获取reasoning_tokens
# 格式1: output_tokens_details.reasoning_tokens
if (
"output_tokens_details" in usage
and "reasoning_tokens" in usage["output_tokens_details"]
):
reasoning_tokens = usage["output_tokens_details"]["reasoning_tokens"]
# 格式2: completion_tokens_details.reasoning_tokens
elif (
"completion_tokens_details" in usage
and "reasoning_tokens" in usage["completion_tokens_details"]
):
reasoning_tokens = usage["completion_tokens_details"]["reasoning_tokens"]
return input_tokens, cached_tokens, output_tokens, reasoning_tokens
except TypeError as e:
# print(f"获取token失败跳过token计数:{e.__repr__()}")
except TypeError:
return -1, -1, -1, -1
@@ -170,10 +244,6 @@ class TokenCounter:
self.output_tokens += output_tokens
self.reasoning_tokens += reasoning_tokens
self.total_tokens += input_tokens + output_tokens
# self.logger.debug(
# f"Token使用统计 - 输入: {self.input_tokens}(含cached: {self.cached_tokens}), "
# f"输出: {self.output_tokens}(含reasoning: {self.reasoning_tokens}), 总计: {self.total_tokens}"
# )
def get_stats(self):
with self.lock:
@@ -202,7 +272,6 @@ ErrorResultHandlerType = Callable[[str, logging.Logger], Any]
class Agent:
def __init__(self, config: AgentConfig):
self.baseurl = config.base_url.strip()
if self.baseurl.endswith("/"):
self.baseurl = self.baseurl[:-1]
@@ -216,16 +285,36 @@ class Agent:
self.thinking = config.thinking
self.logger = config.logger
self.total_error_counter = TotalErrorCounter(logger=self.logger)
# 新增:用于统计最终未解决的错误
self.unresolved_error_lock = Lock()
self.unresolved_error_count = 0
# 新增用于统计token使用情况
self.token_counter = TokenCounter(logger=self.logger)
self.retry = config.retry
self.system_proxy_enable = config.system_proxy_enable
# 新增:初始化速率限制器
self.rate_limiter = RateLimiter(rpm=config.rpm, tpm=config.tpm)
# 新增:初始化 encoding 用于估算
self.encoding = self._get_encoding_for_model(self.model_id)
def _get_encoding_for_model(self, model_name: str):
"""获取 tiktoken encoding如果失败则使用 cl100k_base 兜底"""
try:
return tiktoken.encoding_for_model(model_name)
except KeyError:
# 对于未知模型或自定义模型ID使用 GPT-4 的默认编码器
return tiktoken.get_encoding("cl100k_base")
def _estimate_tokens(self, text: str) -> int:
"""估算文本的 Token 数量"""
if not text:
return 0
try:
# 这是一个近似值,不包含特殊 token 格式的开销,但用于限流足够了
return len(self.encoding.encode(text))
except Exception:
# 极端兜底每4个字符算1个token
return len(text) // 4
def _add_thinking_mode(self, data: dict):
thinking_mode_result = get_thinking_mode(self.domain, data.get("model"))
if thinking_mode_result is None:
@@ -236,7 +325,6 @@ class Agent:
elif self.thinking == "disable":
data[field_thinking] = val_disable
def _prepare_request_data(
self, prompt: str, system_prompt: str, temperature=None, top_p=0.9, json_format=False
):
@@ -278,11 +366,16 @@ class Agent:
system_prompt = self.system_prompt
if pre_send_handler:
system_prompt, prompt = pre_send_handler(system_prompt, prompt)
# print(f"system_prompt:\n{system_prompt}")
# print(f"【测试】prompt:\n{prompt}")
# 新增:速率限制检查
# 计算估算的 tokens (system + user)
estimated_tokens = self._estimate_tokens(system_prompt) + self._estimate_tokens(prompt)
# 等待配额
await self.rate_limiter.acquire_async(tokens=estimated_tokens)
headers, data = self._prepare_request_data(prompt, system_prompt, json_format=force_json)
should_retry = False
is_hard_error = False # 新增标志,用于区分是否为硬错误
is_hard_error = False
current_partial_result = None
input_tokens = 0
output_tokens = 0
@@ -294,18 +387,14 @@ class Agent:
headers=headers,
timeout=self.timeout,
)
# print(f"【测试】json:\n{data}")
response.raise_for_status()
# print(f"【测试】resp:\n{response.json()}")
result = response.json()["choices"][0]["message"]["content"]
# print(f"【测试】\nprompt:\n{prompt}\nresp:\n{result}")
# 获取token使用情况
response_data = response.json()
input_tokens, cached_tokens, output_tokens, reasoning_tokens = (
extract_token_info(response_data)
)
# 更新token计数器
self.token_counter.add(
input_tokens, cached_tokens, output_tokens, reasoning_tokens
)
@@ -313,7 +402,6 @@ class Agent:
if retry_count > 0:
self.logger.info(f"重试成功 (第 {retry_count}/{self.retry} 次尝试)。")
# print(f"result:=============================================================\n{result}\n================\n")
return (
result
if result_handler is None
@@ -323,23 +411,23 @@ class Agent:
except AgentResultError as e:
self.logger.error(f"AI返回结果有误: {e}")
should_retry = True
# 专门捕获部分翻译错误(软错误)
except PartialAgentResultError as e:
# print(f"【测试】\nprompt:\n{prompt}\nresp:\n{result}")
self.logger.error(f"收到部分返回结果,将尝试重试: {e}")
current_partial_result = e.partial_result
should_retry = True
if e.append_prompt:
prompt += e.append_prompt
# is_hard_error 保持 False
# 捕获硬错误
except httpx.HTTPStatusError as e:
self.logger.error(
f"AI请求HTTP状态错误 (async): {e.response.status_code} - {e.response.text}"
)
should_retry = True
is_hard_error = True
# 如果是因为 Rate Limit (429) 错误,最好在这里多睡一会儿,虽然我们有了本地 Limiter
if e.response.status_code == 429:
await asyncio.sleep(5)
except httpx.RequestError as e:
self.logger.error(f"AI请求连接错误 (async): {repr(e)}")
should_retry = True
@@ -353,12 +441,10 @@ class Agent:
best_partial_result = current_partial_result
if should_retry and retry and retry_count < self.retry:
# 仅在硬错误时才增加总错误计数
if is_hard_error:
if retry_count == 0:
if self.total_error_counter.add():
self.logger.error("错误次数过多,已达到上限,不再重试。")
# 新增:当因为达到错误上限而不再重试时,增加未解决错误计数
with self.unresolved_error_lock:
self.unresolved_error_count += 1
return (
@@ -372,7 +458,6 @@ class Agent:
)
elif self.total_error_counter.reach_limit():
self.logger.error("错误次数过多,已达到上限,不再为该请求重试。")
# 新增:当因为达到错误上限而不再重试时,增加未解决错误计数
with self.unresolved_error_lock:
self.unresolved_error_count += 1
return (
@@ -386,7 +471,8 @@ class Agent:
)
self.logger.info(f"正在重试第 {retry_count + 1}/{self.retry} 次...")
await asyncio.sleep(0.5)
# 指数退避
await asyncio.sleep(0.5 * (2 ** retry_count))
return await self.send_async(
client,
prompt,
@@ -402,7 +488,6 @@ class Agent:
else:
if should_retry:
self.logger.error(f"所有重试均失败,已达到重试次数上限。")
# 新增:当所有重试失败后,增加未解决错误计数
with self.unresolved_error_lock:
self.unresolved_error_count += 1
@@ -430,17 +515,19 @@ class Agent:
self.max_concurrent if max_concurrent is None else max_concurrent
)
total = len(prompts)
rpm_info = f", RPM:{self.rate_limiter.rpm}" if self.rate_limiter.rpm else ""
tpm_info = f", TPM:{self.rate_limiter.tpm}" if self.rate_limiter.tpm else ""
self.logger.info(
f"base-url:{self.baseurl},model-id:{self.model_id},concurrent:{max_concurrent},temperature:{self.temperature},system_proxy:{self.system_proxy_enable},json_output:{force_json}"
f"base-url:{self.baseurl},model-id:{self.model_id},concurrent:{max_concurrent}{rpm_info}{tpm_info},temperature:{self.temperature},system_proxy:{self.system_proxy_enable},json_output:{force_json}"
)
self.logger.info(f"预计发送{total}个请求,并发请求数:{max_concurrent}")
self.logger.info(f"预计发送{total}个请求")
self.total_error_counter.max_errors_count = (
len(prompts) // MAX_REQUESTS_PER_ERROR
)
# 新增:在每次批量发送前重置计数器
self.unresolved_error_count = 0
# 重置token计数器
self.token_counter.reset()
count = 0
@@ -450,8 +537,8 @@ class Agent:
proxies = get_httpx_proxies(asyn=True) if self.system_proxy_enable else None
limits = httpx.Limits(
max_connections=self.max_concurrent * 2, # 为重试和并发预留空间
max_keepalive_connections=self.max_concurrent, # 保持活动的连接数
max_connections=self.max_concurrent * 2,
max_keepalive_connections=self.max_concurrent,
)
async with httpx.AsyncClient(
@@ -460,6 +547,9 @@ class Agent:
async def send_with_semaphore(p_text: str):
async with semaphore:
# 注意:我们在 semaphore 内部调用 send_async
# send_async 内部会调用 rate_limiter.acquire_async
# 这样可以防止并发过高,同时 rate_limiter 防止频率过快
result = await self.send_async(
client=client,
prompt=p_text,
@@ -480,16 +570,11 @@ class Agent:
results = await asyncio.gather(*tasks, return_exceptions=False)
# 新增:在所有任务完成后打印未解决的错误总数
self.logger.info(
f"所有请求处理完毕。未解决的错误总数: {self.unresolved_error_count}"
)
# 新增打印token使用统计
token_stats = self.token_counter.get_stats()
if token_stats["input_tokens"] < 0:
self.logger.info("Token统计失败")
else:
self.logger.info(
f"Token使用统计 - 输入: {token_stats['input_tokens'] / 1000:.2f}K(含cached: {token_stats['cached_tokens'] / 1000:.2f}K), "
f"输出: {token_stats['output_tokens'] / 1000:.2f}K(含reasoning: {token_stats['reasoning_tokens'] / 1000:.2f}K), "
@@ -516,9 +601,13 @@ class Agent:
if pre_send_handler:
system_prompt, prompt = pre_send_handler(system_prompt, prompt)
# 新增:同步环境下的速率限制
estimated_tokens = self._estimate_tokens(system_prompt) + self._estimate_tokens(prompt)
self.rate_limiter.acquire_sync(tokens=estimated_tokens)
headers, data = self._prepare_request_data(prompt, system_prompt, json_format=force_json)
should_retry = False
is_hard_error = False # 新增标志,用于区分是否为硬错误
is_hard_error = False
current_partial_result = None
input_tokens = 0
output_tokens = 0
@@ -534,13 +623,11 @@ class Agent:
result = response.json()["choices"][0]["message"]["content"]
# 获取token使用情况
response_data = response.json()
input_tokens, cached_tokens, output_tokens, reasoning_tokens = (
extract_token_info(response_data)
)
# 更新token计数器
self.token_counter.add(
input_tokens, cached_tokens, output_tokens, reasoning_tokens
)
@@ -556,20 +643,20 @@ class Agent:
except AgentResultError as e:
self.logger.error(f"AI返回结果有误: {e}")
should_retry = True
# 专门捕获部分翻译错误(软错误)
except PartialAgentResultError as e:
self.logger.error(f"收到部分翻译结果,将尝试重试: {e}")
current_partial_result = e.partial_result
should_retry = True
# is_hard_error 保持 False
# 捕获硬错误
except httpx.HTTPStatusError as e:
self.logger.error(
f"AI请求HTTP状态错误 (sync): {e.response.status_code} - {e.response.text}"
)
should_retry = True
is_hard_error = True
if e.response.status_code == 429:
time.sleep(5)
except httpx.RequestError as e:
self.logger.error(f"AI请求连接错误 (sync): {repr(e)}\nprompt:{prompt}")
should_retry = True
@@ -583,12 +670,10 @@ class Agent:
best_partial_result = current_partial_result
if should_retry and retry and retry_count < self.retry:
# 仅在硬错误时才增加总错误计数
if is_hard_error:
if retry_count == 0:
if self.total_error_counter.add():
self.logger.error("错误次数过多,已达到上限,不再重试。")
# 新增:当因为达到错误上限而不再重试时,增加未解决错误计数
with self.unresolved_error_lock:
self.unresolved_error_count += 1
return (
@@ -602,7 +687,6 @@ class Agent:
)
elif self.total_error_counter.reach_limit():
self.logger.error("错误次数过多,已达到上限,不再为该请求重试。")
# 新增:当因为达到错误上限而不再重试时,增加未解决错误计数
with self.unresolved_error_lock:
self.unresolved_error_count += 1
return (
@@ -616,7 +700,7 @@ class Agent:
)
self.logger.info(f"正在重试第 {retry_count + 1}/{self.retry} 次...")
time.sleep(0.5)
time.sleep(0.5 * (2 ** retry_count))
return self.send(
client,
prompt,
@@ -632,7 +716,6 @@ class Agent:
else:
if should_retry:
self.logger.error(f"所有重试均失败,已达到重试次数上限。")
# 新增:当所有重试失败后,增加未解决错误计数
with self.unresolved_error_lock:
self.unresolved_error_count += 1
@@ -657,6 +740,7 @@ class Agent:
result_handler,
error_result_handler
) -> Any:
# 该方法在 ThreadPoolExecutor 中运行
result = self.send(
client,
prompt,
@@ -678,19 +762,20 @@ class Agent:
result_handler: ResultHandlerType = None,
error_result_handler: ErrorResultHandlerType = None,
) -> list[Any]:
rpm_info = f", RPM:{self.rate_limiter.rpm}" if self.rate_limiter.rpm else ""
tpm_info = f", TPM:{self.rate_limiter.tpm}" if self.rate_limiter.tpm else ""
self.logger.info(
f"base-url:{self.baseurl},model-id:{self.model_id},concurrent:{self.max_concurrent},temperature:{self.temperature},system_proxy:{self.system_proxy_enable},json_output:{json_format}"
f"base-url:{self.baseurl},model-id:{self.model_id},concurrent:{self.max_concurrent}{rpm_info}{tpm_info},temperature:{self.temperature},system_proxy:{self.system_proxy_enable},json_output:{json_format}"
)
self.logger.info(
f"预计发送{len(prompts)}个请求,并发请求数:{self.max_concurrent}"
f"预计发送{len(prompts)}个请求"
)
self.total_error_counter.max_errors_count = (
len(prompts) // MAX_REQUESTS_PER_ERROR
)
# 新增:在每次批量发送前重置计数器
self.unresolved_error_count = 0
# 重置token计数器
self.token_counter.reset()
counter = PromptsCounter(len(prompts), self.logger)
@@ -702,10 +787,11 @@ class Agent:
result_handlers = itertools.repeat(result_handler, len(prompts))
error_result_handlers = itertools.repeat(error_result_handler, len(prompts))
limits = httpx.Limits(
max_connections=self.max_concurrent * 2, # 允许连接复用
max_keepalive_connections=self.max_concurrent, # 保持活跃连接
max_connections=self.max_concurrent * 2,
max_keepalive_connections=self.max_concurrent,
)
proxies = get_httpx_proxies(asyn=False) if self.system_proxy_enable else None
with httpx.Client(
trust_env=False, mounts=proxies, verify=False, limits=limits
) as client:
@@ -724,16 +810,11 @@ class Agent:
)
output_list = list(results_iterator)
# 新增:在所有任务完成后打印未解决的错误总数
self.logger.info(
f"所有请求处理完毕。未解决的错误总数: {self.unresolved_error_count}"
)
# 新增打印token使用统计
token_stats = self.token_counter.get_stats()
if token_stats["input_tokens"] < 0:
self.logger.info("Token统计失败")
else:
self.logger.info(
f"Token使用统计 - 输入: {token_stats['input_tokens'] / 1000:.2f}K(含cached: {token_stats['cached_tokens'] / 1000:.2f}K), "
f"输出: {token_stats['output_tokens'] / 1000:.2f}K(含reasoning: {token_stats['reasoning_tokens'] / 1000:.2f}K), "
@@ -741,7 +822,3 @@ class Agent:
)
return output_list
if __name__ == "__main__":
pass

View File

@@ -337,6 +337,12 @@ class GlossaryAgentConfigPayload(BaseModel):
force_json: bool = Field(
default=False, description="强制Agent输出JSON格式的术语表。"
)
rpm: Optional[int] = Field(
default=None, description="RPM限制 (Requests Per Minute)"
)
tpm: Optional[int] = Field(
default=None, description="TPM限制 (Tokens Per Minute)"
)
# 1. 定义所有工作流共享的基础参数
@@ -411,6 +417,12 @@ class BaseWorkflowParams(BaseModel):
force_json: bool = Field(
default=False, description="应输出json格式时强制ai输出json"
)
rpm: Optional[int] = Field(
default=None, description="RPM限制 (Requests Per Minute)"
)
tpm: Optional[int] = Field(
default=None, description="TPM限制 (Tokens Per Minute)"
)
@model_validator(mode="before")
@classmethod
@@ -723,6 +735,8 @@ class TranslateServiceRequest(BaseModel):
"mineru_token": "your-mineru-token-if-any",
"formula_ocr": True,
"model_version": "vlm",
"rpm": 100,
"tpm": 100000,
},
},
{
@@ -1007,6 +1021,8 @@ async def _perform_translation(
"retry",
"system_proxy_enable",
"force_json",
"rpm",
"tpm",
},
exclude_none=True,
)
@@ -1072,6 +1088,8 @@ async def _perform_translation(
"retry",
"system_proxy_enable",
"force_json",
"rpm",
"tpm",
},
exclude_none=True,
)
@@ -1109,6 +1127,8 @@ async def _perform_translation(
"retry",
"system_proxy_enable",
"force_json",
"rpm",
"tpm",
},
exclude_none=True,
)
@@ -1148,6 +1168,8 @@ async def _perform_translation(
"retry",
"system_proxy_enable",
"force_json",
"rpm",
"tpm",
},
exclude_none=True,
)
@@ -1186,6 +1208,8 @@ async def _perform_translation(
"retry",
"system_proxy_enable",
"force_json",
"rpm",
"tpm",
},
exclude_none=True,
)
@@ -1224,6 +1248,8 @@ async def _perform_translation(
"retry",
"system_proxy_enable",
"force_json",
"rpm",
"tpm",
},
exclude_none=True,
)
@@ -1262,6 +1288,8 @@ async def _perform_translation(
"retry",
"system_proxy_enable",
"force_json",
"rpm",
"tpm",
},
exclude_none=True,
)
@@ -1301,6 +1329,8 @@ async def _perform_translation(
"retry",
"system_proxy_enable",
"force_json",
"rpm",
"tpm",
},
exclude_none=True,
)
@@ -1338,6 +1368,8 @@ async def _perform_translation(
"retry",
"system_proxy_enable",
"force_json",
"rpm",
"tpm",
},
exclude_none=True,
)
@@ -1378,6 +1410,8 @@ async def _perform_translation(
"retry",
"system_proxy_enable",
"force_json",
"rpm",
"tpm",
},
exclude_none=True,
)
@@ -2507,6 +2541,8 @@ async def temp_translate(
custom_prompt: Optional[str] = Body(None),
model_version: Literal["pipeline", "vlm"] = Body("vlm"),
glossary_dict: Optional[Dict[str, str]] = Body(None),
rpm: Optional[int] = Body(None),
tpm: Optional[int] = Body(None),
):
file_name = Path(file_name)
try:
@@ -2530,6 +2566,8 @@ async def temp_translate(
chunk_size=chunk_size,
concurrent=concurrent,
glossary_dict=glossary_dict,
rpm=rpm,
tpm=tpm,
),
html_exporter_config=MD2HTMLExporterConfig(),
)

File diff suppressed because one or more lines are too long

View File

@@ -21,6 +21,7 @@ dependencies = [
"httpx>=0.28.1",
"python-pptx>=1.0.2",
"pypdf>=6.4.2",
"tiktoken>=0.12.0",
]
dynamic = ["version"]

56
uv.lock generated
View File

@@ -394,6 +394,7 @@ dependencies = [
{ name = "python-docx" },
{ name = "python-pptx" },
{ name = "srt" },
{ name = "tiktoken" },
{ name = "xlsx2html" },
]
@@ -432,6 +433,7 @@ requires-dist = [
{ name = "python-docx", specifier = ">=1.2.0" },
{ name = "python-pptx", specifier = ">=1.0.2" },
{ name = "srt", specifier = ">=3.5.3" },
{ name = "tiktoken", specifier = ">=0.12.0" },
{ name = "xlsx2html", specifier = ">=0.6.2" },
]
provides-extras = ["docling"]
@@ -2720,6 +2722,60 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 },
]
[[package]]
name = "tiktoken"
version = "0.12.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "regex" },
{ name = "requests" },
]
sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/de/46/21ea696b21f1d6d1efec8639c204bdf20fde8bafb351e1355c72c5d7de52/tiktoken-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb", size = 1051565 },
{ url = "https://files.pythonhosted.org/packages/c9/d9/35c5d2d9e22bb2a5f74ba48266fb56c63d76ae6f66e02feb628671c0283e/tiktoken-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa", size = 995284 },
{ url = "https://files.pythonhosted.org/packages/01/84/961106c37b8e49b9fdcf33fe007bb3a8fdcc380c528b20cc7fbba80578b8/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc", size = 1129201 },
{ url = "https://files.pythonhosted.org/packages/6a/d0/3d9275198e067f8b65076a68894bb52fd253875f3644f0a321a720277b8a/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:47a5bc270b8c3db00bb46ece01ef34ad050e364b51d406b6f9730b64ac28eded", size = 1152444 },
{ url = "https://files.pythonhosted.org/packages/78/db/a58e09687c1698a7c592e1038e01c206569b86a0377828d51635561f8ebf/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:508fa71810c0efdcd1b898fda574889ee62852989f7c1667414736bcb2b9a4bd", size = 1195080 },
{ url = "https://files.pythonhosted.org/packages/9e/1b/a9e4d2bf91d515c0f74afc526fd773a812232dd6cda33ebea7f531202325/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1af81a6c44f008cba48494089dd98cccb8b313f55e961a52f5b222d1e507967", size = 1255240 },
{ url = "https://files.pythonhosted.org/packages/9d/15/963819345f1b1fb0809070a79e9dd96938d4ca41297367d471733e79c76c/tiktoken-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:3e68e3e593637b53e56f7237be560f7a394451cb8c11079755e80ae64b9e6def", size = 879422 },
{ url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728 },
{ url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049 },
{ url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008 },
{ url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665 },
{ url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230 },
{ url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688 },
{ url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694 },
{ url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802 },
{ url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995 },
{ url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948 },
{ url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986 },
{ url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222 },
{ url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097 },
{ url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117 },
{ url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309 },
{ url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712 },
{ url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725 },
{ url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875 },
{ url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451 },
{ url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794 },
{ url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777 },
{ url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188 },
{ url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978 },
{ url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271 },
{ url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216 },
{ url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860 },
{ url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567 },
{ url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067 },
{ url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473 },
{ url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855 },
{ url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022 },
{ url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736 },
{ url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908 },
{ url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706 },
{ url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667 },
]
[[package]]
name = "tokenizers"
version = "0.22.1"