39 lines
725 B
YAML
39 lines
725 B
YAML
environment: "development"
|
|
debug: true
|
|
|
|
# Processing settings
|
|
processing:
|
|
batch_size: 10_000
|
|
max_workers: 8
|
|
checkpoint_interval: 10
|
|
use_multiprocessing: true
|
|
|
|
# Pipeline stages
|
|
stages:
|
|
- "data_cleaning"
|
|
- "feature_extraction"
|
|
#- "llm_annotation"
|
|
- "data_splitting"
|
|
|
|
# Production LLM settings
|
|
llm:
|
|
model_name: "mistral:7b"
|
|
requests_per_minute: 120
|
|
requests_per_second: 3
|
|
retry_attempts: 3
|
|
timeout_seconds: 45
|
|
max_concurrent_requests: 4
|
|
enable_rate_limiting: true
|
|
|
|
# Data handling configuration
|
|
data:
|
|
max_dataset_size: 100_000
|
|
balance_by_sex: true
|
|
|
|
# Enhanced logging for development
|
|
logging:
|
|
level: "INFO"
|
|
console_logging: true
|
|
file_logging: true
|
|
log_file: "pipeline.development.log"
|