refactoring: add initial pipeline configuration and model classes

This commit is contained in:
2025-08-04 16:12:25 +02:00
parent 19c66fd0ee
commit f4689faf80
82 changed files with 7176 additions and 1218 deletions
+11
View File
@@ -0,0 +1,11 @@
from dataclasses import dataclass
@dataclass
class BatchConfig:
"""Configuration for batch processing"""
batch_size: int = 1000
max_workers: int = 4
checkpoint_interval: int = 5 # Save checkpoint every N batches
use_multiprocessing: bool = False # Use ProcessPoolExecutor instead of ThreadPoolExecutor