environment: "development" debug: true # Processing settings processing: batch_size: 100_000 max_workers: 1 checkpoint_interval: 10 use_multiprocessing: false # Pipeline stages stages: - "data_cleaning" - "data_selection" - "feature_extraction" #- "ner_annotation" #- "llm_annotation" - "data_splitting" # Production LLM settings llm: model_name: "mistral:7b" requests_per_minute: 120 requests_per_second: 3 retry_attempts: 3 timeout_seconds: 45 max_concurrent_requests: 4 enable_rate_limiting: true # Data handling configuration data: split_evaluation: false max_dataset_size: 10_000 balance_by_sex: true # Enhanced logging for development logging: level: "INFO" console_logging: true file_logging: true log_file: "pipeline.development.log"