47 lines
944 B
YAML
47 lines
944 B
YAML
# Production Environment Configuration
|
|
# Optimized settings for production deployment
|
|
|
|
name: "drc_names_pipeline"
|
|
version: "1.0.0"
|
|
environment: "development"
|
|
debug: true
|
|
|
|
# Processing settings
|
|
processing:
|
|
batch_size: 100_000
|
|
max_workers: 8
|
|
checkpoint_interval: 10
|
|
use_multiprocessing: true # Enable multiprocessing for CPU-bound tasks
|
|
|
|
# Pipeline stages
|
|
stages:
|
|
- "data_cleaning"
|
|
- "feature_extraction"
|
|
#- "llm_annotation"
|
|
- "data_splitting"
|
|
|
|
|
|
# Production LLM settings
|
|
llm:
|
|
model_name: "mistral:7b"
|
|
requests_per_minute: 120
|
|
requests_per_second: 3
|
|
retry_attempts: 3
|
|
timeout_seconds: 45
|
|
max_concurrent_requests: 4
|
|
enable_rate_limiting: true
|
|
|
|
# Production data settings
|
|
data:
|
|
split_evaluation: true
|
|
split_by_gender: true
|
|
evaluation_fraction: 0.2
|
|
random_seed: 42
|
|
|
|
# Enhanced logging for development
|
|
logging:
|
|
level: "INFO"
|
|
console_logging: true
|
|
file_logging: true
|
|
log_file: "pipeline.development.log"
|