feat: add NER annotation step and integrate into pipeline
This commit is contained in:
@@ -25,4 +25,7 @@ class DataCleaningStep(PipelineStep):
|
||||
# Apply text cleaning
|
||||
batch = self.text_cleaner.clean_dataframe_text_columns(batch)
|
||||
|
||||
# Remove duplicates
|
||||
batch = batch.drop_duplicates(subset=self.required_columns)
|
||||
|
||||
return batch
|
||||
|
||||
Reference in New Issue
Block a user