feat: add osm data
This commit is contained in:
@@ -21,6 +21,7 @@ paths:
|
||||
# List of stages in the processing pipeline
|
||||
stages:
|
||||
- "data_cleaning" # Data cleaning stage
|
||||
- "data_selection" # Data selection stage - keep only required columns
|
||||
- "feature_extraction" # Feature extraction stage
|
||||
- "ner_annotation" # NER-based annotation stage
|
||||
- "llm_annotation" # LLM annotation stage (computational intensive)
|
||||
@@ -64,6 +65,11 @@ data:
|
||||
females: "names_females.csv" # Output files for female names
|
||||
ner_data: "names_ner.json" # Output file for NER annotated data
|
||||
ner_spacy: "names_ner.spacy" # Output file for NER annotated data using spaCy format
|
||||
selected_columns: # Required columns for processing
|
||||
- name
|
||||
- sex
|
||||
- region
|
||||
- year
|
||||
split_evaluation: false # Should the dataset be split into training and evaluation sets ?
|
||||
split_by_gender: true # Should the dataset be split by gender ?
|
||||
split_by_province: true # Should the dataset be split by province ?
|
||||
|
||||
Reference in New Issue
Block a user