Files
drc-ners-nlp/src/ners/core/config/processing_config.py
T
2025-10-05 18:14:15 +02:00

18 lines
426 B
Python

from dataclasses import field
from pydantic import BaseModel
class ProcessingConfig(BaseModel):
"""Data processing pipeline configuration"""
batch_size: int = 1000
max_workers: int = 4
checkpoint_interval: int = 5
use_multiprocessing: bool = False
encoding_options: list = field(
default_factory=lambda: ["utf-8", "utf-16", "latin1"]
)
chunk_size: int = 100_000
epochs: int = 2