fix: add missing regions in region_mapper

This commit is contained in:
2025-09-23 00:05:35 +02:00
parent c1b502c878
commit dda83510ac
5 changed files with 939 additions and 101 deletions
+9
View File
@@ -17,6 +17,15 @@ class DataSelectionStep(PipelineStep):
"""Process a single batch for data selection"""
logging.info(f"Selecting columns for batch {batch_id} with {len(batch)} rows")
# Remove rows where region == "global" only for specific years
if "region" in batch.columns and "year" in batch.columns:
target_years = {2015, 2021, 2022}
mask_remove = batch["region"].str.lower().eq("global") & batch["year"].isin(target_years)
removed = int(mask_remove.sum())
if removed:
batch = batch[~mask_remove]
logging.info(f"Removed {removed} rows with region == 'global' for years {sorted(target_years)} in batch {batch_id}")
# Check which columns exist in the batch
available_columns = [col for col in self.selected_columns if col in batch.columns]
missing_columns = [col for col in self.selected_columns if col not in batch.columns]