Files
drc-ners-nlp/core/utils/rate_limiter.py
T

57 lines
1.8 KiB
Python

import threading
import time
from dataclasses import dataclass
from queue import Queue
@dataclass
class RateLimitConfig:
"""Configuration for rate limiting LLM requests"""
requests_per_minute: int = 60
requests_per_second: int = 2
burst_limit: int = 5
class RateLimiter:
"""Thread-safe rate limiter for LLM requests"""
def __init__(self, config: RateLimitConfig):
self.config = config
self.request_times = Queue()
self.lock = threading.Lock()
self.last_request_time = 0
def wait_if_needed(self):
"""Wait if necessary to respect rate limits"""
with self.lock:
current_time = time.time()
# Check requests per second limit
time_since_last = current_time - self.last_request_time
min_interval = 1.0 / self.config.requests_per_second
if time_since_last < min_interval:
sleep_time = min_interval - time_since_last
time.sleep(sleep_time)
current_time = time.time()
# Clean old request times (older than 1 minute)
while not self.request_times.empty():
if current_time - self.request_times.queue[0] > 60:
self.request_times.get()
else:
break
# Check requests per minute limit
if self.request_times.qsize() >= self.config.requests_per_minute:
oldest_request = self.request_times.queue[0]
wait_time = 60 - (current_time - oldest_request)
if wait_time > 0:
time.sleep(wait_time)
current_time = time.time()
# Record this request
self.request_times.put(current_time)
self.last_request_time = current_time