57 lines
1.8 KiB
Python
57 lines
1.8 KiB
Python
import threading
|
|
import time
|
|
from dataclasses import dataclass
|
|
from queue import Queue
|
|
|
|
|
|
@dataclass
|
|
class RateLimitConfig:
|
|
"""Configuration for rate limiting LLM requests"""
|
|
|
|
requests_per_minute: int = 60
|
|
requests_per_second: int = 2
|
|
burst_limit: int = 5
|
|
|
|
|
|
class RateLimiter:
|
|
"""Thread-safe rate limiter for LLM requests"""
|
|
|
|
def __init__(self, config: RateLimitConfig):
|
|
self.config = config
|
|
self.request_times = Queue()
|
|
self.lock = threading.Lock()
|
|
self.last_request_time = 0
|
|
|
|
def wait_if_needed(self):
|
|
"""Wait if necessary to respect rate limits"""
|
|
with self.lock:
|
|
current_time = time.time()
|
|
|
|
# Check requests per second limit
|
|
time_since_last = current_time - self.last_request_time
|
|
min_interval = 1.0 / self.config.requests_per_second
|
|
|
|
if time_since_last < min_interval:
|
|
sleep_time = min_interval - time_since_last
|
|
time.sleep(sleep_time)
|
|
current_time = time.time()
|
|
|
|
# Clean old request times (older than 1 minute)
|
|
while not self.request_times.empty():
|
|
if current_time - self.request_times.queue[0] > 60:
|
|
self.request_times.get()
|
|
else:
|
|
break
|
|
|
|
# Check requests per minute limit
|
|
if self.request_times.qsize() >= self.config.requests_per_minute:
|
|
oldest_request = self.request_times.queue[0]
|
|
wait_time = 60 - (current_time - oldest_request)
|
|
if wait_time > 0:
|
|
time.sleep(wait_time)
|
|
current_time = time.time()
|
|
|
|
# Record this request
|
|
self.request_times.put(current_time)
|
|
self.last_request_time = current_time
|