Files
drc-ners-nlp/config/pipeline.development.yaml
T

41 lines
775 B
YAML

environment: "development"
debug: true
# Processing settings
processing:
batch_size: 100_000
max_workers: 1
checkpoint_interval: 10
use_multiprocessing: false
# Pipeline stages
stages:
- "data_cleaning"
- "feature_extraction"
#- "ner_annotation"
#- "llm_annotation"
- "data_splitting"
# Production LLM settings
llm:
model_name: "mistral:7b"
requests_per_minute: 120
requests_per_second: 3
retry_attempts: 3
timeout_seconds: 45
max_concurrent_requests: 4
enable_rate_limiting: true
# Data handling configuration
data:
split_evaluation: false
max_dataset_size: 100_000
balance_by_sex: true
# Enhanced logging for development
logging:
level: "INFO"
console_logging: true
file_logging: true
log_file: "pipeline.development.log"