feat: add NER annotation step and integrate into pipeline

This commit is contained in:
2025-08-11 07:13:09 +02:00
parent 6d39c3afc1
commit d5a4aaaf4a
23 changed files with 1108 additions and 160 deletions
+3
View File
@@ -25,4 +25,7 @@ class DataCleaningStep(PipelineStep):
# Apply text cleaning
batch = self.text_cleaner.clean_dataframe_text_columns(batch)
# Remove duplicates
batch = batch.drop_duplicates(subset=self.required_columns)
return batch