feat: add NER annotation step and integrate into pipeline

This commit is contained in:
2025-08-11 07:13:09 +02:00
parent 6d39c3afc1
commit d5a4aaaf4a
23 changed files with 1108 additions and 160 deletions
+3 -1
View File
@@ -12,6 +12,7 @@ processing:
stages:
- "data_cleaning"
- "feature_extraction"
#- "ner_annotation"
#- "llm_annotation"
- "data_splitting"
@@ -27,7 +28,8 @@ llm:
# Data handling configuration
data:
max_dataset_size: 100_000
split_evaluation: false
max_dataset_size: null
balance_by_sex: true
# Enhanced logging for development