refactoring: uv
This commit is contained in:
@@ -0,0 +1,56 @@
|
||||
import threading
|
||||
import time
|
||||
from dataclasses import dataclass
|
||||
from queue import Queue
|
||||
|
||||
|
||||
@dataclass
|
||||
class RateLimitConfig:
|
||||
"""Configuration for rate limiting LLM requests"""
|
||||
|
||||
requests_per_minute: int = 60
|
||||
requests_per_second: int = 2
|
||||
burst_limit: int = 5
|
||||
|
||||
|
||||
class RateLimiter:
|
||||
"""Thread-safe rate limiter for LLM requests"""
|
||||
|
||||
def __init__(self, config: RateLimitConfig):
|
||||
self.config = config
|
||||
self.request_times = Queue()
|
||||
self.lock = threading.Lock()
|
||||
self.last_request_time = 0
|
||||
|
||||
def wait_if_needed(self):
|
||||
"""Wait if necessary to respect rate limits"""
|
||||
with self.lock:
|
||||
current_time = time.time()
|
||||
|
||||
# Check requests per second limit
|
||||
time_since_last = current_time - self.last_request_time
|
||||
min_interval = 1.0 / self.config.requests_per_second
|
||||
|
||||
if time_since_last < min_interval:
|
||||
sleep_time = min_interval - time_since_last
|
||||
time.sleep(sleep_time)
|
||||
current_time = time.time()
|
||||
|
||||
# Clean old request times (older than 1 minute)
|
||||
while not self.request_times.empty():
|
||||
if current_time - self.request_times.queue[0] > 60:
|
||||
self.request_times.get()
|
||||
else:
|
||||
break
|
||||
|
||||
# Check requests per minute limit
|
||||
if self.request_times.qsize() >= self.config.requests_per_minute:
|
||||
oldest_request = self.request_times.queue[0]
|
||||
wait_time = 60 - (current_time - oldest_request)
|
||||
if wait_time > 0:
|
||||
time.sleep(wait_time)
|
||||
current_time = time.time()
|
||||
|
||||
# Record this request
|
||||
self.request_times.put(current_time)
|
||||
self.last_request_time = current_time
|
||||
Reference in New Issue
Block a user