增加rpm和tpm限制

This commit is contained in:
xunbu
2025-12-17 21:46:40 +08:00
parent c9ffa8f195
commit 579f0b8a9c
5 changed files with 344 additions and 172 deletions

View File

@@ -5,6 +5,7 @@ import asyncio
import itertools import itertools
import logging import logging
import time import time
from collections import deque
from concurrent.futures import ThreadPoolExecutor from concurrent.futures import ThreadPoolExecutor
from dataclasses import dataclass from dataclasses import dataclass
from threading import Lock from threading import Lock
@@ -12,6 +13,7 @@ from typing import Literal, Callable, Any
from urllib.parse import urlparse from urllib.parse import urlparse
import httpx import httpx
import tiktoken
from docutranslate.agents.thinking.thinking_factory import get_thinking_mode from docutranslate.agents.thinking.thinking_factory import get_thinking_mode
from docutranslate.logger import global_logger from docutranslate.logger import global_logger
@@ -32,10 +34,10 @@ class AgentResultError(ValueError):
class PartialAgentResultError(ValueError): class PartialAgentResultError(ValueError):
"""一个特殊的异常,用于表示结果不完整但包含了部分成功的数据,以便触发重试。该错误不计入总错误数""" """一个特殊的异常,用于表示结果不完整但包含了部分成功的数据,以便触发重试。该错误不计入总错误数"""
def __init__(self, message, partial_result: dict,append_prompt:str=None): def __init__(self, message, partial_result: dict, append_prompt: str = None):
super().__init__(message) super().__init__(message)
self.partial_result = partial_result self.partial_result = partial_result
self.append_prompt=append_prompt self.append_prompt = append_prompt
@dataclass(kw_only=True) @dataclass(kw_only=True)
@@ -46,11 +48,13 @@ class AgentConfig:
model_id: str model_id: str
temperature: float = 0.7 temperature: float = 0.7
concurrent: int = 30 concurrent: int = 30
timeout: int = 1200 # 单位(秒)这个值是httpx.TimeOut中read的值,并非总的超时时间 timeout: int = 1200
thinking: ThinkingMode = "disable" thinking: ThinkingMode = "disable"
retry: int = 2 retry: int = 2
system_proxy_enable: bool = False system_proxy_enable: bool = False
force_json: bool = False # 应输出json格式时强制ai输出json force_json: bool = False
rpm: int | None = None # 每分钟请求数限制
tpm: int | None = None # 每分钟Token数限制
class TotalErrorCounter: class TotalErrorCounter:
@@ -71,7 +75,6 @@ class TotalErrorCounter:
return self.count > self.max_errors_count return self.count > self.max_errors_count
# 仅使用多线程时用以计数
class PromptsCounter: class PromptsCounter:
def __init__(self, total: int, logger: logging.Logger): def __init__(self, total: int, logger: logging.Logger):
self.lock = Lock() self.lock = Lock()
@@ -85,21 +88,101 @@ class PromptsCounter:
self.logger.info(f"多线程-已完成:{self.count}/{self.total}") self.logger.info(f"多线程-已完成:{self.count}/{self.total}")
# --- 新增 RateLimiter 类 ---
class RateLimiter:
"""
基于滑动窗口的速率限制器,支持 RPM 和 TPM 控制。
同时支持 Async 和 Sync 调用。
"""
def __init__(self, rpm: int | None, tpm: int | None):
self.rpm = rpm
self.tpm = tpm
# 双端队列存储 (timestamp, value)value对于RPM是1对于TPM是token数量
self.request_timestamps = deque()
self.token_timestamps = deque()
self.lock = Lock() # 用于同步模式和保护共享数据
def _cleanup_window(self, now: float):
"""清理60秒窗口之前的数据"""
window_start = now - 60.0
while self.request_timestamps and self.request_timestamps[0] <= window_start:
self.request_timestamps.popleft()
while self.token_timestamps and self.token_timestamps[0][0] <= window_start:
self.token_timestamps.popleft()
def _check_and_get_wait_time(self, tokens: int) -> float:
"""检查是否满足限制,返回需要等待的秒数。如果不需等待返回 0"""
now = time.time()
self._cleanup_window(now)
wait_time = 0.0
# Check RPM
if self.rpm and len(self.request_timestamps) >= self.rpm:
# 取最早的一条记录,计算还需要等待多久才能腾出位置
earliest = self.request_timestamps[0]
wait_time = max(wait_time, 60 - (now - earliest))
# Check TPM
if self.tpm:
current_tokens = sum(t[1] for t in self.token_timestamps)
if current_tokens + tokens > self.tpm:
# 稍微复杂点需要移除足够多的旧token才能放入新token
# 这里做一个简化估算:如果超限,等到最早的记录过期
if self.token_timestamps:
earliest = self.token_timestamps[0][0]
wait_time = max(wait_time, 60 - (now - earliest))
else:
# 这种情况理论上不应该发生除非单次请求超过了TPM上限
# 如果单次超过上限强制等待1秒防止死循环并允许通过(或者抛异常,这里选择允许)
pass
return wait_time
def _record_usage(self, tokens: int):
"""记录使用量"""
now = time.time()
if self.rpm is not None:
self.request_timestamps.append(now)
if self.tpm is not None:
self.token_timestamps.append((now, tokens))
async def acquire_async(self, tokens: int = 0):
"""异步等待配额"""
if self.rpm is None and self.tpm is None:
return
while True:
with self.lock:
wait_time = self._check_and_get_wait_time(tokens)
if wait_time <= 0:
self._record_usage(tokens)
return
# 释放锁后等待,避免阻塞其他协程/线程的检查
# 添加一点点缓冲时间,避免刚唤醒时毫秒级误差导致再次等待
await asyncio.sleep(wait_time + 0.1)
def acquire_sync(self, tokens: int = 0):
"""同步等待配额(线程阻塞)"""
if self.rpm is None and self.tpm is None:
return
while True:
with self.lock:
wait_time = self._check_and_get_wait_time(tokens)
if wait_time <= 0:
self._record_usage(tokens)
return
time.sleep(wait_time + 0.1)
def extract_token_info(response_data: dict) -> tuple[int, int, int, int]: def extract_token_info(response_data: dict) -> tuple[int, int, int, int]:
""" """(保持原样) 从API响应中提取token信息"""
从API响应中提取token信息
支持多种response格式:
1. 格式1: usage.input_tokens_details.cached_tokens 和 usage.output_tokens_details.reasoning_tokens
2. 格式2: usage.prompt_tokens_details.cached_tokens
3. 格式3: usage.prompt_cache_hit_tokens 和 usage.completion_tokens_details.reasoning_tokens
Args:
response_data: API响应数据
Returns:
tuple: (input_tokens, cached_tokens, output_tokens, reasoning_tokens)
"""
if "usage" not in response_data: if "usage" not in response_data:
return 0, 0, 0, 0 return 0, 0, 0, 0
@@ -107,43 +190,34 @@ def extract_token_info(response_data: dict) -> tuple[int, int, int, int]:
input_tokens = usage.get("prompt_tokens", 0) input_tokens = usage.get("prompt_tokens", 0)
output_tokens = usage.get("completion_tokens", 0) output_tokens = usage.get("completion_tokens", 0)
# 初始化token详细统计
cached_tokens = 0 cached_tokens = 0
reasoning_tokens = 0 reasoning_tokens = 0
try: try:
# 尝试从不同格式获取cached_tokens
# 格式1: input_tokens_details.cached_tokens
if ( if (
"input_tokens_details" in usage "input_tokens_details" in usage
and "cached_tokens" in usage["input_tokens_details"] and "cached_tokens" in usage["input_tokens_details"]
): ):
cached_tokens = usage["input_tokens_details"]["cached_tokens"] cached_tokens = usage["input_tokens_details"]["cached_tokens"]
# 格式2: prompt_tokens_details.cached_tokens
elif ( elif (
"prompt_tokens_details" in usage "prompt_tokens_details" in usage
and "cached_tokens" in usage["prompt_tokens_details"] and "cached_tokens" in usage["prompt_tokens_details"]
): ):
cached_tokens = usage["prompt_tokens_details"]["cached_tokens"] cached_tokens = usage["prompt_tokens_details"]["cached_tokens"]
# 格式3: prompt_cache_hit_tokens (直接在usage下)
elif "prompt_cache_hit_tokens" in usage: elif "prompt_cache_hit_tokens" in usage:
cached_tokens = usage["prompt_cache_hit_tokens"] cached_tokens = usage["prompt_cache_hit_tokens"]
# 尝试从不同格式获取reasoning_tokens
# 格式1: output_tokens_details.reasoning_tokens
if ( if (
"output_tokens_details" in usage "output_tokens_details" in usage
and "reasoning_tokens" in usage["output_tokens_details"] and "reasoning_tokens" in usage["output_tokens_details"]
): ):
reasoning_tokens = usage["output_tokens_details"]["reasoning_tokens"] reasoning_tokens = usage["output_tokens_details"]["reasoning_tokens"]
# 格式2: completion_tokens_details.reasoning_tokens
elif ( elif (
"completion_tokens_details" in usage "completion_tokens_details" in usage
and "reasoning_tokens" in usage["completion_tokens_details"] and "reasoning_tokens" in usage["completion_tokens_details"]
): ):
reasoning_tokens = usage["completion_tokens_details"]["reasoning_tokens"] reasoning_tokens = usage["completion_tokens_details"]["reasoning_tokens"]
return input_tokens, cached_tokens, output_tokens, reasoning_tokens return input_tokens, cached_tokens, output_tokens, reasoning_tokens
except TypeError as e: except TypeError:
# print(f"获取token失败跳过token计数:{e.__repr__()}")
return -1, -1, -1, -1 return -1, -1, -1, -1
@@ -158,11 +232,11 @@ class TokenCounter:
self.logger = logger self.logger = logger
def add( def add(
self, self,
input_tokens: int, input_tokens: int,
cached_tokens: int, cached_tokens: int,
output_tokens: int, output_tokens: int,
reasoning_tokens: int, reasoning_tokens: int,
): ):
with self.lock: with self.lock:
self.input_tokens += input_tokens self.input_tokens += input_tokens
@@ -170,10 +244,6 @@ class TokenCounter:
self.output_tokens += output_tokens self.output_tokens += output_tokens
self.reasoning_tokens += reasoning_tokens self.reasoning_tokens += reasoning_tokens
self.total_tokens += input_tokens + output_tokens self.total_tokens += input_tokens + output_tokens
# self.logger.debug(
# f"Token使用统计 - 输入: {self.input_tokens}(含cached: {self.cached_tokens}), "
# f"输出: {self.output_tokens}(含reasoning: {self.reasoning_tokens}), 总计: {self.total_tokens}"
# )
def get_stats(self): def get_stats(self):
with self.lock: with self.lock:
@@ -202,7 +272,6 @@ ErrorResultHandlerType = Callable[[str, logging.Logger], Any]
class Agent: class Agent:
def __init__(self, config: AgentConfig): def __init__(self, config: AgentConfig):
self.baseurl = config.base_url.strip() self.baseurl = config.base_url.strip()
if self.baseurl.endswith("/"): if self.baseurl.endswith("/"):
self.baseurl = self.baseurl[:-1] self.baseurl = self.baseurl[:-1]
@@ -216,18 +285,38 @@ class Agent:
self.thinking = config.thinking self.thinking = config.thinking
self.logger = config.logger self.logger = config.logger
self.total_error_counter = TotalErrorCounter(logger=self.logger) self.total_error_counter = TotalErrorCounter(logger=self.logger)
# 新增:用于统计最终未解决的错误
self.unresolved_error_lock = Lock() self.unresolved_error_lock = Lock()
self.unresolved_error_count = 0 self.unresolved_error_count = 0
# 新增用于统计token使用情况
self.token_counter = TokenCounter(logger=self.logger) self.token_counter = TokenCounter(logger=self.logger)
self.retry = config.retry self.retry = config.retry
self.system_proxy_enable = config.system_proxy_enable self.system_proxy_enable = config.system_proxy_enable
# 新增:初始化速率限制器
self.rate_limiter = RateLimiter(rpm=config.rpm, tpm=config.tpm)
# 新增:初始化 encoding 用于估算
self.encoding = self._get_encoding_for_model(self.model_id)
def _get_encoding_for_model(self, model_name: str):
"""获取 tiktoken encoding如果失败则使用 cl100k_base 兜底"""
try:
return tiktoken.encoding_for_model(model_name)
except KeyError:
# 对于未知模型或自定义模型ID使用 GPT-4 的默认编码器
return tiktoken.get_encoding("cl100k_base")
def _estimate_tokens(self, text: str) -> int:
"""估算文本的 Token 数量"""
if not text:
return 0
try:
# 这是一个近似值,不包含特殊 token 格式的开销,但用于限流足够了
return len(self.encoding.encode(text))
except Exception:
# 极端兜底每4个字符算1个token
return len(text) // 4
def _add_thinking_mode(self, data: dict): def _add_thinking_mode(self, data: dict):
thinking_mode_result=get_thinking_mode(self.domain,data.get("model")) thinking_mode_result = get_thinking_mode(self.domain, data.get("model"))
if thinking_mode_result is None: if thinking_mode_result is None:
return return
field_thinking, val_enable, val_disable = thinking_mode_result field_thinking, val_enable, val_disable = thinking_mode_result
@@ -236,9 +325,8 @@ class Agent:
elif self.thinking == "disable": elif self.thinking == "disable":
data[field_thinking] = val_disable data[field_thinking] = val_disable
def _prepare_request_data( def _prepare_request_data(
self, prompt: str, system_prompt: str, temperature=None, top_p=0.9,json_format=False self, prompt: str, system_prompt: str, temperature=None, top_p=0.9, json_format=False
): ):
if temperature is None: if temperature is None:
temperature = self.temperature temperature = self.temperature
@@ -262,27 +350,32 @@ class Agent:
return headers, data return headers, data
async def send_async( async def send_async(
self, self,
client: httpx.AsyncClient, client: httpx.AsyncClient,
prompt: str, prompt: str,
system_prompt: None | str = None, system_prompt: None | str = None,
retry=True, retry=True,
retry_count=0, retry_count=0,
force_json=False, force_json=False,
pre_send_handler: PreSendHandlerType = None, pre_send_handler: PreSendHandlerType = None,
result_handler: ResultHandlerType = None, result_handler: ResultHandlerType = None,
error_result_handler: ErrorResultHandlerType = None, error_result_handler: ErrorResultHandlerType = None,
best_partial_result: dict | None = None, best_partial_result: dict | None = None,
) -> Any: ) -> Any:
if system_prompt is None: if system_prompt is None:
system_prompt = self.system_prompt system_prompt = self.system_prompt
if pre_send_handler: if pre_send_handler:
system_prompt, prompt = pre_send_handler(system_prompt, prompt) system_prompt, prompt = pre_send_handler(system_prompt, prompt)
# print(f"system_prompt:\n{system_prompt}")
# print(f"【测试】prompt:\n{prompt}") # 新增:速率限制检查
# 计算估算的 tokens (system + user)
estimated_tokens = self._estimate_tokens(system_prompt) + self._estimate_tokens(prompt)
# 等待配额
await self.rate_limiter.acquire_async(tokens=estimated_tokens)
headers, data = self._prepare_request_data(prompt, system_prompt, json_format=force_json) headers, data = self._prepare_request_data(prompt, system_prompt, json_format=force_json)
should_retry = False should_retry = False
is_hard_error = False # 新增标志,用于区分是否为硬错误 is_hard_error = False
current_partial_result = None current_partial_result = None
input_tokens = 0 input_tokens = 0
output_tokens = 0 output_tokens = 0
@@ -294,18 +387,14 @@ class Agent:
headers=headers, headers=headers,
timeout=self.timeout, timeout=self.timeout,
) )
# print(f"【测试】json:\n{data}")
response.raise_for_status() response.raise_for_status()
# print(f"【测试】resp:\n{response.json()}")
result = response.json()["choices"][0]["message"]["content"] result = response.json()["choices"][0]["message"]["content"]
# print(f"【测试】\nprompt:\n{prompt}\nresp:\n{result}")
# 获取token使用情况
response_data = response.json() response_data = response.json()
input_tokens, cached_tokens, output_tokens, reasoning_tokens = ( input_tokens, cached_tokens, output_tokens, reasoning_tokens = (
extract_token_info(response_data) extract_token_info(response_data)
) )
# 更新token计数器
self.token_counter.add( self.token_counter.add(
input_tokens, cached_tokens, output_tokens, reasoning_tokens input_tokens, cached_tokens, output_tokens, reasoning_tokens
) )
@@ -313,7 +402,6 @@ class Agent:
if retry_count > 0: if retry_count > 0:
self.logger.info(f"重试成功 (第 {retry_count}/{self.retry} 次尝试)。") self.logger.info(f"重试成功 (第 {retry_count}/{self.retry} 次尝试)。")
# print(f"result:=============================================================\n{result}\n================\n")
return ( return (
result result
if result_handler is None if result_handler is None
@@ -323,23 +411,23 @@ class Agent:
except AgentResultError as e: except AgentResultError as e:
self.logger.error(f"AI返回结果有误: {e}") self.logger.error(f"AI返回结果有误: {e}")
should_retry = True should_retry = True
# 专门捕获部分翻译错误(软错误)
except PartialAgentResultError as e: except PartialAgentResultError as e:
# print(f"【测试】\nprompt:\n{prompt}\nresp:\n{result}")
self.logger.error(f"收到部分返回结果,将尝试重试: {e}") self.logger.error(f"收到部分返回结果,将尝试重试: {e}")
current_partial_result = e.partial_result current_partial_result = e.partial_result
should_retry = True should_retry = True
if e.append_prompt: if e.append_prompt:
prompt+=e.append_prompt prompt += e.append_prompt
# is_hard_error 保持 False
# 捕获硬错误
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
self.logger.error( self.logger.error(
f"AI请求HTTP状态错误 (async): {e.response.status_code} - {e.response.text}" f"AI请求HTTP状态错误 (async): {e.response.status_code} - {e.response.text}"
) )
should_retry = True should_retry = True
is_hard_error = True is_hard_error = True
# 如果是因为 Rate Limit (429) 错误,最好在这里多睡一会儿,虽然我们有了本地 Limiter
if e.response.status_code == 429:
await asyncio.sleep(5)
except httpx.RequestError as e: except httpx.RequestError as e:
self.logger.error(f"AI请求连接错误 (async): {repr(e)}") self.logger.error(f"AI请求连接错误 (async): {repr(e)}")
should_retry = True should_retry = True
@@ -353,12 +441,10 @@ class Agent:
best_partial_result = current_partial_result best_partial_result = current_partial_result
if should_retry and retry and retry_count < self.retry: if should_retry and retry and retry_count < self.retry:
# 仅在硬错误时才增加总错误计数
if is_hard_error: if is_hard_error:
if retry_count == 0: if retry_count == 0:
if self.total_error_counter.add(): if self.total_error_counter.add():
self.logger.error("错误次数过多,已达到上限,不再重试。") self.logger.error("错误次数过多,已达到上限,不再重试。")
# 新增:当因为达到错误上限而不再重试时,增加未解决错误计数
with self.unresolved_error_lock: with self.unresolved_error_lock:
self.unresolved_error_count += 1 self.unresolved_error_count += 1
return ( return (
@@ -372,7 +458,6 @@ class Agent:
) )
elif self.total_error_counter.reach_limit(): elif self.total_error_counter.reach_limit():
self.logger.error("错误次数过多,已达到上限,不再为该请求重试。") self.logger.error("错误次数过多,已达到上限,不再为该请求重试。")
# 新增:当因为达到错误上限而不再重试时,增加未解决错误计数
with self.unresolved_error_lock: with self.unresolved_error_lock:
self.unresolved_error_count += 1 self.unresolved_error_count += 1
return ( return (
@@ -386,7 +471,8 @@ class Agent:
) )
self.logger.info(f"正在重试第 {retry_count + 1}/{self.retry} 次...") self.logger.info(f"正在重试第 {retry_count + 1}/{self.retry} 次...")
await asyncio.sleep(0.5) # 指数退避
await asyncio.sleep(0.5 * (2 ** retry_count))
return await self.send_async( return await self.send_async(
client, client,
prompt, prompt,
@@ -402,7 +488,6 @@ class Agent:
else: else:
if should_retry: if should_retry:
self.logger.error(f"所有重试均失败,已达到重试次数上限。") self.logger.error(f"所有重试均失败,已达到重试次数上限。")
# 新增:当所有重试失败后,增加未解决错误计数
with self.unresolved_error_lock: with self.unresolved_error_lock:
self.unresolved_error_count += 1 self.unresolved_error_count += 1
@@ -417,30 +502,32 @@ class Agent:
) )
async def send_prompts_async( async def send_prompts_async(
self, self,
prompts: list[str], prompts: list[str],
system_prompt: str | None = None, system_prompt: str | None = None,
max_concurrent: int | None = None, max_concurrent: int | None = None,
force_json=False, force_json=False,
pre_send_handler: PreSendHandlerType = None, pre_send_handler: PreSendHandlerType = None,
result_handler: ResultHandlerType = None, result_handler: ResultHandlerType = None,
error_result_handler: ErrorResultHandlerType = None, error_result_handler: ErrorResultHandlerType = None,
) -> list[Any]: ) -> list[Any]:
max_concurrent = ( max_concurrent = (
self.max_concurrent if max_concurrent is None else max_concurrent self.max_concurrent if max_concurrent is None else max_concurrent
) )
total = len(prompts) total = len(prompts)
rpm_info = f", RPM:{self.rate_limiter.rpm}" if self.rate_limiter.rpm else ""
tpm_info = f", TPM:{self.rate_limiter.tpm}" if self.rate_limiter.tpm else ""
self.logger.info( self.logger.info(
f"base-url:{self.baseurl},model-id:{self.model_id},concurrent:{max_concurrent},temperature:{self.temperature},system_proxy:{self.system_proxy_enable},json_output:{force_json}" f"base-url:{self.baseurl},model-id:{self.model_id},concurrent:{max_concurrent}{rpm_info}{tpm_info},temperature:{self.temperature},system_proxy:{self.system_proxy_enable},json_output:{force_json}"
) )
self.logger.info(f"预计发送{total}个请求,并发请求数:{max_concurrent}") self.logger.info(f"预计发送{total}个请求")
self.total_error_counter.max_errors_count = ( self.total_error_counter.max_errors_count = (
len(prompts) // MAX_REQUESTS_PER_ERROR len(prompts) // MAX_REQUESTS_PER_ERROR
) )
# 新增:在每次批量发送前重置计数器
self.unresolved_error_count = 0 self.unresolved_error_count = 0
# 重置token计数器
self.token_counter.reset() self.token_counter.reset()
count = 0 count = 0
@@ -450,16 +537,19 @@ class Agent:
proxies = get_httpx_proxies(asyn=True) if self.system_proxy_enable else None proxies = get_httpx_proxies(asyn=True) if self.system_proxy_enable else None
limits = httpx.Limits( limits = httpx.Limits(
max_connections=self.max_concurrent * 2, # 为重试和并发预留空间 max_connections=self.max_concurrent * 2,
max_keepalive_connections=self.max_concurrent, # 保持活动的连接数 max_keepalive_connections=self.max_concurrent,
) )
async with httpx.AsyncClient( async with httpx.AsyncClient(
trust_env=False, mounts=proxies, verify=False, limits=limits trust_env=False, mounts=proxies, verify=False, limits=limits
) as client: ) as client:
async def send_with_semaphore(p_text: str): async def send_with_semaphore(p_text: str):
async with semaphore: async with semaphore:
# 注意:我们在 semaphore 内部调用 send_async
# send_async 内部会调用 rate_limiter.acquire_async
# 这样可以防止并发过高,同时 rate_limiter 防止频率过快
result = await self.send_async( result = await self.send_async(
client=client, client=client,
prompt=p_text, prompt=p_text,
@@ -480,45 +570,44 @@ class Agent:
results = await asyncio.gather(*tasks, return_exceptions=False) results = await asyncio.gather(*tasks, return_exceptions=False)
# 新增:在所有任务完成后打印未解决的错误总数
self.logger.info( self.logger.info(
f"所有请求处理完毕。未解决的错误总数: {self.unresolved_error_count}" f"所有请求处理完毕。未解决的错误总数: {self.unresolved_error_count}"
) )
# 新增打印token使用统计
token_stats = self.token_counter.get_stats() token_stats = self.token_counter.get_stats()
if token_stats["input_tokens"] < 0: self.logger.info(
self.logger.info("Token统计失败") f"Token使用统计 - 输入: {token_stats['input_tokens'] / 1000:.2f}K(含cached: {token_stats['cached_tokens'] / 1000:.2f}K), "
else: f"输出: {token_stats['output_tokens'] / 1000:.2f}K(含reasoning: {token_stats['reasoning_tokens'] / 1000:.2f}K), "
self.logger.info( f"总计: {token_stats['total_tokens'] / 1000:.2f}K"
f"Token使用统计 - 输入: {token_stats['input_tokens'] / 1000:.2f}K(含cached: {token_stats['cached_tokens'] / 1000:.2f}K), " )
f"输出: {token_stats['output_tokens'] / 1000:.2f}K(含reasoning: {token_stats['reasoning_tokens'] / 1000:.2f}K), "
f"总计: {token_stats['total_tokens'] / 1000:.2f}K"
)
return results return results
def send( def send(
self, self,
client: httpx.Client, client: httpx.Client,
prompt: str, prompt: str,
system_prompt: None | str = None, system_prompt: None | str = None,
retry=True, retry=True,
retry_count=0, retry_count=0,
force_json=False, force_json=False,
pre_send_handler=None, pre_send_handler=None,
result_handler=None, result_handler=None,
error_result_handler=None, error_result_handler=None,
best_partial_result: dict | None = None, best_partial_result: dict | None = None,
) -> Any: ) -> Any:
if system_prompt is None: if system_prompt is None:
system_prompt = self.system_prompt system_prompt = self.system_prompt
if pre_send_handler: if pre_send_handler:
system_prompt, prompt = pre_send_handler(system_prompt, prompt) system_prompt, prompt = pre_send_handler(system_prompt, prompt)
# 新增:同步环境下的速率限制
estimated_tokens = self._estimate_tokens(system_prompt) + self._estimate_tokens(prompt)
self.rate_limiter.acquire_sync(tokens=estimated_tokens)
headers, data = self._prepare_request_data(prompt, system_prompt, json_format=force_json) headers, data = self._prepare_request_data(prompt, system_prompt, json_format=force_json)
should_retry = False should_retry = False
is_hard_error = False # 新增标志,用于区分是否为硬错误 is_hard_error = False
current_partial_result = None current_partial_result = None
input_tokens = 0 input_tokens = 0
output_tokens = 0 output_tokens = 0
@@ -534,13 +623,11 @@ class Agent:
result = response.json()["choices"][0]["message"]["content"] result = response.json()["choices"][0]["message"]["content"]
# 获取token使用情况
response_data = response.json() response_data = response.json()
input_tokens, cached_tokens, output_tokens, reasoning_tokens = ( input_tokens, cached_tokens, output_tokens, reasoning_tokens = (
extract_token_info(response_data) extract_token_info(response_data)
) )
# 更新token计数器
self.token_counter.add( self.token_counter.add(
input_tokens, cached_tokens, output_tokens, reasoning_tokens input_tokens, cached_tokens, output_tokens, reasoning_tokens
) )
@@ -556,20 +643,20 @@ class Agent:
except AgentResultError as e: except AgentResultError as e:
self.logger.error(f"AI返回结果有误: {e}") self.logger.error(f"AI返回结果有误: {e}")
should_retry = True should_retry = True
# 专门捕获部分翻译错误(软错误)
except PartialAgentResultError as e: except PartialAgentResultError as e:
self.logger.error(f"收到部分翻译结果,将尝试重试: {e}") self.logger.error(f"收到部分翻译结果,将尝试重试: {e}")
current_partial_result = e.partial_result current_partial_result = e.partial_result
should_retry = True should_retry = True
# is_hard_error 保持 False
# 捕获硬错误
except httpx.HTTPStatusError as e: except httpx.HTTPStatusError as e:
self.logger.error( self.logger.error(
f"AI请求HTTP状态错误 (sync): {e.response.status_code} - {e.response.text}" f"AI请求HTTP状态错误 (sync): {e.response.status_code} - {e.response.text}"
) )
should_retry = True should_retry = True
is_hard_error = True is_hard_error = True
if e.response.status_code == 429:
time.sleep(5)
except httpx.RequestError as e: except httpx.RequestError as e:
self.logger.error(f"AI请求连接错误 (sync): {repr(e)}\nprompt:{prompt}") self.logger.error(f"AI请求连接错误 (sync): {repr(e)}\nprompt:{prompt}")
should_retry = True should_retry = True
@@ -583,12 +670,10 @@ class Agent:
best_partial_result = current_partial_result best_partial_result = current_partial_result
if should_retry and retry and retry_count < self.retry: if should_retry and retry and retry_count < self.retry:
# 仅在硬错误时才增加总错误计数
if is_hard_error: if is_hard_error:
if retry_count == 0: if retry_count == 0:
if self.total_error_counter.add(): if self.total_error_counter.add():
self.logger.error("错误次数过多,已达到上限,不再重试。") self.logger.error("错误次数过多,已达到上限,不再重试。")
# 新增:当因为达到错误上限而不再重试时,增加未解决错误计数
with self.unresolved_error_lock: with self.unresolved_error_lock:
self.unresolved_error_count += 1 self.unresolved_error_count += 1
return ( return (
@@ -602,7 +687,6 @@ class Agent:
) )
elif self.total_error_counter.reach_limit(): elif self.total_error_counter.reach_limit():
self.logger.error("错误次数过多,已达到上限,不再为该请求重试。") self.logger.error("错误次数过多,已达到上限,不再为该请求重试。")
# 新增:当因为达到错误上限而不再重试时,增加未解决错误计数
with self.unresolved_error_lock: with self.unresolved_error_lock:
self.unresolved_error_count += 1 self.unresolved_error_count += 1
return ( return (
@@ -616,7 +700,7 @@ class Agent:
) )
self.logger.info(f"正在重试第 {retry_count + 1}/{self.retry} 次...") self.logger.info(f"正在重试第 {retry_count + 1}/{self.retry} 次...")
time.sleep(0.5) time.sleep(0.5 * (2 ** retry_count))
return self.send( return self.send(
client, client,
prompt, prompt,
@@ -632,7 +716,6 @@ class Agent:
else: else:
if should_retry: if should_retry:
self.logger.error(f"所有重试均失败,已达到重试次数上限。") self.logger.error(f"所有重试均失败,已达到重试次数上限。")
# 新增:当所有重试失败后,增加未解决错误计数
with self.unresolved_error_lock: with self.unresolved_error_lock:
self.unresolved_error_count += 1 self.unresolved_error_count += 1
@@ -647,16 +730,17 @@ class Agent:
) )
def _send_prompt_count( def _send_prompt_count(
self, self,
client: httpx.Client, client: httpx.Client,
prompt: str, prompt: str,
system_prompt: None | str, system_prompt: None | str,
force_json, force_json,
count: PromptsCounter, count: PromptsCounter,
pre_send_handler, pre_send_handler,
result_handler, result_handler,
error_result_handler error_result_handler
) -> Any: ) -> Any:
# 该方法在 ThreadPoolExecutor 中运行
result = self.send( result = self.send(
client, client,
prompt, prompt,
@@ -670,27 +754,28 @@ class Agent:
return result return result
def send_prompts( def send_prompts(
self, self,
prompts: list[str], prompts: list[str],
system_prompt: str | None = None, system_prompt: str | None = None,
json_format=False, json_format=False,
pre_send_handler: PreSendHandlerType = None, pre_send_handler: PreSendHandlerType = None,
result_handler: ResultHandlerType = None, result_handler: ResultHandlerType = None,
error_result_handler: ErrorResultHandlerType = None, error_result_handler: ErrorResultHandlerType = None,
) -> list[Any]: ) -> list[Any]:
rpm_info = f", RPM:{self.rate_limiter.rpm}" if self.rate_limiter.rpm else ""
tpm_info = f", TPM:{self.rate_limiter.tpm}" if self.rate_limiter.tpm else ""
self.logger.info( self.logger.info(
f"base-url:{self.baseurl},model-id:{self.model_id},concurrent:{self.max_concurrent},temperature:{self.temperature},system_proxy:{self.system_proxy_enable},json_output:{json_format}" f"base-url:{self.baseurl},model-id:{self.model_id},concurrent:{self.max_concurrent}{rpm_info}{tpm_info},temperature:{self.temperature},system_proxy:{self.system_proxy_enable},json_output:{json_format}"
) )
self.logger.info( self.logger.info(
f"预计发送{len(prompts)}个请求,并发请求数:{self.max_concurrent}" f"预计发送{len(prompts)}个请求"
) )
self.total_error_counter.max_errors_count = ( self.total_error_counter.max_errors_count = (
len(prompts) // MAX_REQUESTS_PER_ERROR len(prompts) // MAX_REQUESTS_PER_ERROR
) )
# 新增:在每次批量发送前重置计数器
self.unresolved_error_count = 0 self.unresolved_error_count = 0
# 重置token计数器
self.token_counter.reset() self.token_counter.reset()
counter = PromptsCounter(len(prompts), self.logger) counter = PromptsCounter(len(prompts), self.logger)
@@ -702,12 +787,13 @@ class Agent:
result_handlers = itertools.repeat(result_handler, len(prompts)) result_handlers = itertools.repeat(result_handler, len(prompts))
error_result_handlers = itertools.repeat(error_result_handler, len(prompts)) error_result_handlers = itertools.repeat(error_result_handler, len(prompts))
limits = httpx.Limits( limits = httpx.Limits(
max_connections=self.max_concurrent * 2, # 允许连接复用 max_connections=self.max_concurrent * 2,
max_keepalive_connections=self.max_concurrent, # 保持活跃连接 max_keepalive_connections=self.max_concurrent,
) )
proxies = get_httpx_proxies(asyn=False) if self.system_proxy_enable else None proxies = get_httpx_proxies(asyn=False) if self.system_proxy_enable else None
with httpx.Client( with httpx.Client(
trust_env=False, mounts=proxies, verify=False, limits=limits trust_env=False, mounts=proxies, verify=False, limits=limits
) as client: ) as client:
clients = itertools.repeat(client, len(prompts)) clients = itertools.repeat(client, len(prompts))
with ThreadPoolExecutor(max_workers=self.max_concurrent) as executor: with ThreadPoolExecutor(max_workers=self.max_concurrent) as executor:
@@ -724,24 +810,15 @@ class Agent:
) )
output_list = list(results_iterator) output_list = list(results_iterator)
# 新增:在所有任务完成后打印未解决的错误总数
self.logger.info( self.logger.info(
f"所有请求处理完毕。未解决的错误总数: {self.unresolved_error_count}" f"所有请求处理完毕。未解决的错误总数: {self.unresolved_error_count}"
) )
# 新增打印token使用统计
token_stats = self.token_counter.get_stats() token_stats = self.token_counter.get_stats()
if token_stats["input_tokens"] < 0: self.logger.info(
self.logger.info("Token统计失败") f"Token使用统计 - 输入: {token_stats['input_tokens'] / 1000:.2f}K(含cached: {token_stats['cached_tokens'] / 1000:.2f}K), "
else: f"输出: {token_stats['output_tokens'] / 1000:.2f}K(含reasoning: {token_stats['reasoning_tokens'] / 1000:.2f}K), "
self.logger.info( f"总计: {token_stats['total_tokens'] / 1000:.2f}K"
f"Token使用统计 - 输入: {token_stats['input_tokens'] / 1000:.2f}K(含cached: {token_stats['cached_tokens'] / 1000:.2f}K), " )
f"输出: {token_stats['output_tokens'] / 1000:.2f}K(含reasoning: {token_stats['reasoning_tokens'] / 1000:.2f}K), "
f"总计: {token_stats['total_tokens'] / 1000:.2f}K"
)
return output_list return output_list
if __name__ == "__main__":
pass

View File

@@ -337,6 +337,12 @@ class GlossaryAgentConfigPayload(BaseModel):
force_json: bool = Field( force_json: bool = Field(
default=False, description="强制Agent输出JSON格式的术语表。" default=False, description="强制Agent输出JSON格式的术语表。"
) )
rpm: Optional[int] = Field(
default=None, description="RPM限制 (Requests Per Minute)"
)
tpm: Optional[int] = Field(
default=None, description="TPM限制 (Tokens Per Minute)"
)
# 1. 定义所有工作流共享的基础参数 # 1. 定义所有工作流共享的基础参数
@@ -411,6 +417,12 @@ class BaseWorkflowParams(BaseModel):
force_json: bool = Field( force_json: bool = Field(
default=False, description="应输出json格式时强制ai输出json" default=False, description="应输出json格式时强制ai输出json"
) )
rpm: Optional[int] = Field(
default=None, description="RPM限制 (Requests Per Minute)"
)
tpm: Optional[int] = Field(
default=None, description="TPM限制 (Tokens Per Minute)"
)
@model_validator(mode="before") @model_validator(mode="before")
@classmethod @classmethod
@@ -723,6 +735,8 @@ class TranslateServiceRequest(BaseModel):
"mineru_token": "your-mineru-token-if-any", "mineru_token": "your-mineru-token-if-any",
"formula_ocr": True, "formula_ocr": True,
"model_version": "vlm", "model_version": "vlm",
"rpm": 100,
"tpm": 100000,
}, },
}, },
{ {
@@ -1007,6 +1021,8 @@ async def _perform_translation(
"retry", "retry",
"system_proxy_enable", "system_proxy_enable",
"force_json", "force_json",
"rpm",
"tpm",
}, },
exclude_none=True, exclude_none=True,
) )
@@ -1072,6 +1088,8 @@ async def _perform_translation(
"retry", "retry",
"system_proxy_enable", "system_proxy_enable",
"force_json", "force_json",
"rpm",
"tpm",
}, },
exclude_none=True, exclude_none=True,
) )
@@ -1109,6 +1127,8 @@ async def _perform_translation(
"retry", "retry",
"system_proxy_enable", "system_proxy_enable",
"force_json", "force_json",
"rpm",
"tpm",
}, },
exclude_none=True, exclude_none=True,
) )
@@ -1148,6 +1168,8 @@ async def _perform_translation(
"retry", "retry",
"system_proxy_enable", "system_proxy_enable",
"force_json", "force_json",
"rpm",
"tpm",
}, },
exclude_none=True, exclude_none=True,
) )
@@ -1186,6 +1208,8 @@ async def _perform_translation(
"retry", "retry",
"system_proxy_enable", "system_proxy_enable",
"force_json", "force_json",
"rpm",
"tpm",
}, },
exclude_none=True, exclude_none=True,
) )
@@ -1224,6 +1248,8 @@ async def _perform_translation(
"retry", "retry",
"system_proxy_enable", "system_proxy_enable",
"force_json", "force_json",
"rpm",
"tpm",
}, },
exclude_none=True, exclude_none=True,
) )
@@ -1262,6 +1288,8 @@ async def _perform_translation(
"retry", "retry",
"system_proxy_enable", "system_proxy_enable",
"force_json", "force_json",
"rpm",
"tpm",
}, },
exclude_none=True, exclude_none=True,
) )
@@ -1301,6 +1329,8 @@ async def _perform_translation(
"retry", "retry",
"system_proxy_enable", "system_proxy_enable",
"force_json", "force_json",
"rpm",
"tpm",
}, },
exclude_none=True, exclude_none=True,
) )
@@ -1338,6 +1368,8 @@ async def _perform_translation(
"retry", "retry",
"system_proxy_enable", "system_proxy_enable",
"force_json", "force_json",
"rpm",
"tpm",
}, },
exclude_none=True, exclude_none=True,
) )
@@ -1378,6 +1410,8 @@ async def _perform_translation(
"retry", "retry",
"system_proxy_enable", "system_proxy_enable",
"force_json", "force_json",
"rpm",
"tpm",
}, },
exclude_none=True, exclude_none=True,
) )
@@ -2507,6 +2541,8 @@ async def temp_translate(
custom_prompt: Optional[str] = Body(None), custom_prompt: Optional[str] = Body(None),
model_version: Literal["pipeline", "vlm"] = Body("vlm"), model_version: Literal["pipeline", "vlm"] = Body("vlm"),
glossary_dict: Optional[Dict[str, str]] = Body(None), glossary_dict: Optional[Dict[str, str]] = Body(None),
rpm: Optional[int] = Body(None),
tpm: Optional[int] = Body(None),
): ):
file_name = Path(file_name) file_name = Path(file_name)
try: try:
@@ -2530,6 +2566,8 @@ async def temp_translate(
chunk_size=chunk_size, chunk_size=chunk_size,
concurrent=concurrent, concurrent=concurrent,
glossary_dict=glossary_dict, glossary_dict=glossary_dict,
rpm=rpm,
tpm=tpm,
), ),
html_exporter_config=MD2HTMLExporterConfig(), html_exporter_config=MD2HTMLExporterConfig(),
) )

File diff suppressed because one or more lines are too long

View File

@@ -21,6 +21,7 @@ dependencies = [
"httpx>=0.28.1", "httpx>=0.28.1",
"python-pptx>=1.0.2", "python-pptx>=1.0.2",
"pypdf>=6.4.2", "pypdf>=6.4.2",
"tiktoken>=0.12.0",
] ]
dynamic = ["version"] dynamic = ["version"]

56
uv.lock generated
View File

@@ -394,6 +394,7 @@ dependencies = [
{ name = "python-docx" }, { name = "python-docx" },
{ name = "python-pptx" }, { name = "python-pptx" },
{ name = "srt" }, { name = "srt" },
{ name = "tiktoken" },
{ name = "xlsx2html" }, { name = "xlsx2html" },
] ]
@@ -432,6 +433,7 @@ requires-dist = [
{ name = "python-docx", specifier = ">=1.2.0" }, { name = "python-docx", specifier = ">=1.2.0" },
{ name = "python-pptx", specifier = ">=1.0.2" }, { name = "python-pptx", specifier = ">=1.0.2" },
{ name = "srt", specifier = ">=3.5.3" }, { name = "srt", specifier = ">=3.5.3" },
{ name = "tiktoken", specifier = ">=0.12.0" },
{ name = "xlsx2html", specifier = ">=0.6.2" }, { name = "xlsx2html", specifier = ">=0.6.2" },
] ]
provides-extras = ["docling"] provides-extras = ["docling"]
@@ -2720,6 +2722,60 @@ wheels = [
{ url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 }, { url = "https://files.pythonhosted.org/packages/40/44/4a5f08c96eb108af5cb50b41f76142f0afa346dfa99d5296fe7202a11854/tabulate-0.9.0-py3-none-any.whl", hash = "sha256:024ca478df22e9340661486f85298cff5f6dcdba14f3813e8830015b9ed1948f", size = 35252 },
] ]
[[package]]
name = "tiktoken"
version = "0.12.0"
source = { registry = "https://pypi.org/simple" }
dependencies = [
{ name = "regex" },
{ name = "requests" },
]
sdist = { url = "https://files.pythonhosted.org/packages/7d/ab/4d017d0f76ec3171d469d80fc03dfbb4e48a4bcaddaa831b31d526f05edc/tiktoken-0.12.0.tar.gz", hash = "sha256:b18ba7ee2b093863978fcb14f74b3707cdc8d4d4d3836853ce7ec60772139931", size = 37806 }
wheels = [
{ url = "https://files.pythonhosted.org/packages/de/46/21ea696b21f1d6d1efec8639c204bdf20fde8bafb351e1355c72c5d7de52/tiktoken-0.12.0-cp311-cp311-macosx_10_12_x86_64.whl", hash = "sha256:6e227c7f96925003487c33b1b32265fad2fbcec2b7cf4817afb76d416f40f6bb", size = 1051565 },
{ url = "https://files.pythonhosted.org/packages/c9/d9/35c5d2d9e22bb2a5f74ba48266fb56c63d76ae6f66e02feb628671c0283e/tiktoken-0.12.0-cp311-cp311-macosx_11_0_arm64.whl", hash = "sha256:c06cf0fcc24c2cb2adb5e185c7082a82cba29c17575e828518c2f11a01f445aa", size = 995284 },
{ url = "https://files.pythonhosted.org/packages/01/84/961106c37b8e49b9fdcf33fe007bb3a8fdcc380c528b20cc7fbba80578b8/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_aarch64.whl", hash = "sha256:f18f249b041851954217e9fd8e5c00b024ab2315ffda5ed77665a05fa91f42dc", size = 1129201 },
{ url = "https://files.pythonhosted.org/packages/6a/d0/3d9275198e067f8b65076a68894bb52fd253875f3644f0a321a720277b8a/tiktoken-0.12.0-cp311-cp311-manylinux_2_28_x86_64.whl", hash = "sha256:47a5bc270b8c3db00bb46ece01ef34ad050e364b51d406b6f9730b64ac28eded", size = 1152444 },
{ url = "https://files.pythonhosted.org/packages/78/db/a58e09687c1698a7c592e1038e01c206569b86a0377828d51635561f8ebf/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_aarch64.whl", hash = "sha256:508fa71810c0efdcd1b898fda574889ee62852989f7c1667414736bcb2b9a4bd", size = 1195080 },
{ url = "https://files.pythonhosted.org/packages/9e/1b/a9e4d2bf91d515c0f74afc526fd773a812232dd6cda33ebea7f531202325/tiktoken-0.12.0-cp311-cp311-musllinux_1_2_x86_64.whl", hash = "sha256:a1af81a6c44f008cba48494089dd98cccb8b313f55e961a52f5b222d1e507967", size = 1255240 },
{ url = "https://files.pythonhosted.org/packages/9d/15/963819345f1b1fb0809070a79e9dd96938d4ca41297367d471733e79c76c/tiktoken-0.12.0-cp311-cp311-win_amd64.whl", hash = "sha256:3e68e3e593637b53e56f7237be560f7a394451cb8c11079755e80ae64b9e6def", size = 879422 },
{ url = "https://files.pythonhosted.org/packages/a4/85/be65d39d6b647c79800fd9d29241d081d4eeb06271f383bb87200d74cf76/tiktoken-0.12.0-cp312-cp312-macosx_10_13_x86_64.whl", hash = "sha256:b97f74aca0d78a1ff21b8cd9e9925714c15a9236d6ceacf5c7327c117e6e21e8", size = 1050728 },
{ url = "https://files.pythonhosted.org/packages/4a/42/6573e9129bc55c9bf7300b3a35bef2c6b9117018acca0dc760ac2d93dffe/tiktoken-0.12.0-cp312-cp312-macosx_11_0_arm64.whl", hash = "sha256:2b90f5ad190a4bb7c3eb30c5fa32e1e182ca1ca79f05e49b448438c3e225a49b", size = 994049 },
{ url = "https://files.pythonhosted.org/packages/66/c5/ed88504d2f4a5fd6856990b230b56d85a777feab84e6129af0822f5d0f70/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_aarch64.whl", hash = "sha256:65b26c7a780e2139e73acc193e5c63ac754021f160df919add909c1492c0fb37", size = 1129008 },
{ url = "https://files.pythonhosted.org/packages/f4/90/3dae6cc5436137ebd38944d396b5849e167896fc2073da643a49f372dc4f/tiktoken-0.12.0-cp312-cp312-manylinux_2_28_x86_64.whl", hash = "sha256:edde1ec917dfd21c1f2f8046b86348b0f54a2c0547f68149d8600859598769ad", size = 1152665 },
{ url = "https://files.pythonhosted.org/packages/a3/fe/26df24ce53ffde419a42f5f53d755b995c9318908288c17ec3f3448313a3/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_aarch64.whl", hash = "sha256:35a2f8ddd3824608b3d650a000c1ef71f730d0c56486845705a8248da00f9fe5", size = 1194230 },
{ url = "https://files.pythonhosted.org/packages/20/cc/b064cae1a0e9fac84b0d2c46b89f4e57051a5f41324e385d10225a984c24/tiktoken-0.12.0-cp312-cp312-musllinux_1_2_x86_64.whl", hash = "sha256:83d16643edb7fa2c99eff2ab7733508aae1eebb03d5dfc46f5565862810f24e3", size = 1254688 },
{ url = "https://files.pythonhosted.org/packages/81/10/b8523105c590c5b8349f2587e2fdfe51a69544bd5a76295fc20f2374f470/tiktoken-0.12.0-cp312-cp312-win_amd64.whl", hash = "sha256:ffc5288f34a8bc02e1ea7047b8d041104791d2ddbf42d1e5fa07822cbffe16bd", size = 878694 },
{ url = "https://files.pythonhosted.org/packages/00/61/441588ee21e6b5cdf59d6870f86beb9789e532ee9718c251b391b70c68d6/tiktoken-0.12.0-cp313-cp313-macosx_10_13_x86_64.whl", hash = "sha256:775c2c55de2310cc1bc9a3ad8826761cbdc87770e586fd7b6da7d4589e13dab3", size = 1050802 },
{ url = "https://files.pythonhosted.org/packages/1f/05/dcf94486d5c5c8d34496abe271ac76c5b785507c8eae71b3708f1ad9b45a/tiktoken-0.12.0-cp313-cp313-macosx_11_0_arm64.whl", hash = "sha256:a01b12f69052fbe4b080a2cfb867c4de12c704b56178edf1d1d7b273561db160", size = 993995 },
{ url = "https://files.pythonhosted.org/packages/a0/70/5163fe5359b943f8db9946b62f19be2305de8c3d78a16f629d4165e2f40e/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_aarch64.whl", hash = "sha256:01d99484dc93b129cd0964f9d34eee953f2737301f18b3c7257bf368d7615baa", size = 1128948 },
{ url = "https://files.pythonhosted.org/packages/0c/da/c028aa0babf77315e1cef357d4d768800c5f8a6de04d0eac0f377cb619fa/tiktoken-0.12.0-cp313-cp313-manylinux_2_28_x86_64.whl", hash = "sha256:4a1a4fcd021f022bfc81904a911d3df0f6543b9e7627b51411da75ff2fe7a1be", size = 1151986 },
{ url = "https://files.pythonhosted.org/packages/a0/5a/886b108b766aa53e295f7216b509be95eb7d60b166049ce2c58416b25f2a/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_aarch64.whl", hash = "sha256:981a81e39812d57031efdc9ec59fa32b2a5a5524d20d4776574c4b4bd2e9014a", size = 1194222 },
{ url = "https://files.pythonhosted.org/packages/f4/f8/4db272048397636ac7a078d22773dd2795b1becee7bc4922fe6207288d57/tiktoken-0.12.0-cp313-cp313-musllinux_1_2_x86_64.whl", hash = "sha256:9baf52f84a3f42eef3ff4e754a0db79a13a27921b457ca9832cf944c6be4f8f3", size = 1255097 },
{ url = "https://files.pythonhosted.org/packages/8e/32/45d02e2e0ea2be3a9ed22afc47d93741247e75018aac967b713b2941f8ea/tiktoken-0.12.0-cp313-cp313-win_amd64.whl", hash = "sha256:b8a0cd0c789a61f31bf44851defbd609e8dd1e2c8589c614cc1060940ef1f697", size = 879117 },
{ url = "https://files.pythonhosted.org/packages/ce/76/994fc868f88e016e6d05b0da5ac24582a14c47893f4474c3e9744283f1d5/tiktoken-0.12.0-cp313-cp313t-macosx_10_13_x86_64.whl", hash = "sha256:d5f89ea5680066b68bcb797ae85219c72916c922ef0fcdd3480c7d2315ffff16", size = 1050309 },
{ url = "https://files.pythonhosted.org/packages/f6/b8/57ef1456504c43a849821920d582a738a461b76a047f352f18c0b26c6516/tiktoken-0.12.0-cp313-cp313t-macosx_11_0_arm64.whl", hash = "sha256:b4e7ed1c6a7a8a60a3230965bdedba8cc58f68926b835e519341413370e0399a", size = 993712 },
{ url = "https://files.pythonhosted.org/packages/72/90/13da56f664286ffbae9dbcfadcc625439142675845baa62715e49b87b68b/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_aarch64.whl", hash = "sha256:fc530a28591a2d74bce821d10b418b26a094bf33839e69042a6e86ddb7a7fb27", size = 1128725 },
{ url = "https://files.pythonhosted.org/packages/05/df/4f80030d44682235bdaecd7346c90f67ae87ec8f3df4a3442cb53834f7e4/tiktoken-0.12.0-cp313-cp313t-manylinux_2_28_x86_64.whl", hash = "sha256:06a9f4f49884139013b138920a4c393aa6556b2f8f536345f11819389c703ebb", size = 1151875 },
{ url = "https://files.pythonhosted.org/packages/22/1f/ae535223a8c4ef4c0c1192e3f9b82da660be9eb66b9279e95c99288e9dab/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_aarch64.whl", hash = "sha256:04f0e6a985d95913cabc96a741c5ffec525a2c72e9df086ff17ebe35985c800e", size = 1194451 },
{ url = "https://files.pythonhosted.org/packages/78/a7/f8ead382fce0243cb625c4f266e66c27f65ae65ee9e77f59ea1653b6d730/tiktoken-0.12.0-cp313-cp313t-musllinux_1_2_x86_64.whl", hash = "sha256:0ee8f9ae00c41770b5f9b0bb1235474768884ae157de3beb5439ca0fd70f3e25", size = 1253794 },
{ url = "https://files.pythonhosted.org/packages/93/e0/6cc82a562bc6365785a3ff0af27a2a092d57c47d7a81d9e2295d8c36f011/tiktoken-0.12.0-cp313-cp313t-win_amd64.whl", hash = "sha256:dc2dd125a62cb2b3d858484d6c614d136b5b848976794edfb63688d539b8b93f", size = 878777 },
{ url = "https://files.pythonhosted.org/packages/72/05/3abc1db5d2c9aadc4d2c76fa5640134e475e58d9fbb82b5c535dc0de9b01/tiktoken-0.12.0-cp314-cp314-macosx_10_13_x86_64.whl", hash = "sha256:a90388128df3b3abeb2bfd1895b0681412a8d7dc644142519e6f0a97c2111646", size = 1050188 },
{ url = "https://files.pythonhosted.org/packages/e3/7b/50c2f060412202d6c95f32b20755c7a6273543b125c0985d6fa9465105af/tiktoken-0.12.0-cp314-cp314-macosx_11_0_arm64.whl", hash = "sha256:da900aa0ad52247d8794e307d6446bd3cdea8e192769b56276695d34d2c9aa88", size = 993978 },
{ url = "https://files.pythonhosted.org/packages/14/27/bf795595a2b897e271771cd31cb847d479073497344c637966bdf2853da1/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_aarch64.whl", hash = "sha256:285ba9d73ea0d6171e7f9407039a290ca77efcdb026be7769dccc01d2c8d7fff", size = 1129271 },
{ url = "https://files.pythonhosted.org/packages/f5/de/9341a6d7a8f1b448573bbf3425fa57669ac58258a667eb48a25dfe916d70/tiktoken-0.12.0-cp314-cp314-manylinux_2_28_x86_64.whl", hash = "sha256:d186a5c60c6a0213f04a7a802264083dea1bbde92a2d4c7069e1a56630aef830", size = 1151216 },
{ url = "https://files.pythonhosted.org/packages/75/0d/881866647b8d1be4d67cb24e50d0c26f9f807f994aa1510cb9ba2fe5f612/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_aarch64.whl", hash = "sha256:604831189bd05480f2b885ecd2d1986dc7686f609de48208ebbbddeea071fc0b", size = 1194860 },
{ url = "https://files.pythonhosted.org/packages/b3/1e/b651ec3059474dab649b8d5b69f5c65cd8fcd8918568c1935bd4136c9392/tiktoken-0.12.0-cp314-cp314-musllinux_1_2_x86_64.whl", hash = "sha256:8f317e8530bb3a222547b85a58583238c8f74fd7a7408305f9f63246d1a0958b", size = 1254567 },
{ url = "https://files.pythonhosted.org/packages/80/57/ce64fd16ac390fafde001268c364d559447ba09b509181b2808622420eec/tiktoken-0.12.0-cp314-cp314-win_amd64.whl", hash = "sha256:399c3dd672a6406719d84442299a490420b458c44d3ae65516302a99675888f3", size = 921067 },
{ url = "https://files.pythonhosted.org/packages/ac/a4/72eed53e8976a099539cdd5eb36f241987212c29629d0a52c305173e0a68/tiktoken-0.12.0-cp314-cp314t-macosx_10_13_x86_64.whl", hash = "sha256:c2c714c72bc00a38ca969dae79e8266ddec999c7ceccd603cc4f0d04ccd76365", size = 1050473 },
{ url = "https://files.pythonhosted.org/packages/e6/d7/0110b8f54c008466b19672c615f2168896b83706a6611ba6e47313dbc6e9/tiktoken-0.12.0-cp314-cp314t-macosx_11_0_arm64.whl", hash = "sha256:cbb9a3ba275165a2cb0f9a83f5d7025afe6b9d0ab01a22b50f0e74fee2ad253e", size = 993855 },
{ url = "https://files.pythonhosted.org/packages/5f/77/4f268c41a3957c418b084dd576ea2fad2e95da0d8e1ab705372892c2ca22/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_aarch64.whl", hash = "sha256:dfdfaa5ffff8993a3af94d1125870b1d27aed7cb97aa7eb8c1cefdbc87dbee63", size = 1129022 },
{ url = "https://files.pythonhosted.org/packages/4e/2b/fc46c90fe5028bd094cd6ee25a7db321cb91d45dc87531e2bdbb26b4867a/tiktoken-0.12.0-cp314-cp314t-manylinux_2_28_x86_64.whl", hash = "sha256:584c3ad3d0c74f5269906eb8a659c8bfc6144a52895d9261cdaf90a0ae5f4de0", size = 1150736 },
{ url = "https://files.pythonhosted.org/packages/28/c0/3c7a39ff68022ddfd7d93f3337ad90389a342f761c4d71de99a3ccc57857/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_aarch64.whl", hash = "sha256:54c891b416a0e36b8e2045b12b33dd66fb34a4fe7965565f1b482da50da3e86a", size = 1194908 },
{ url = "https://files.pythonhosted.org/packages/ab/0d/c1ad6f4016a3968c048545f5d9b8ffebf577774b2ede3e2e352553b685fe/tiktoken-0.12.0-cp314-cp314t-musllinux_1_2_x86_64.whl", hash = "sha256:5edb8743b88d5be814b1a8a8854494719080c28faaa1ccbef02e87354fe71ef0", size = 1253706 },
{ url = "https://files.pythonhosted.org/packages/af/df/c7891ef9d2712ad774777271d39fdef63941ffba0a9d59b7ad1fd2765e57/tiktoken-0.12.0-cp314-cp314t-win_amd64.whl", hash = "sha256:f61c0aea5565ac82e2ec50a05e02a6c44734e91b51c10510b084ea1b8e633a71", size = 920667 },
]
[[package]] [[package]]
name = "tokenizers" name = "tokenizers"
version = "0.22.1" version = "0.22.1"