feat: support gpu

This commit is contained in:
2025-09-29 21:07:23 +02:00
parent 9e35f95107
commit a1d500830b
15 changed files with 661 additions and 85 deletions
+6 -2
View File
@@ -20,11 +20,15 @@ class DataSelectionStep(PipelineStep):
# Remove rows where region == "global" only for specific years
if "region" in batch.columns and "year" in batch.columns:
target_years = {2015, 2021, 2022}
mask_remove = batch["region"].str.lower().eq("global") & batch["year"].isin(target_years)
mask_remove = batch["region"].str.lower().eq("global") & batch["year"].isin(
target_years
)
removed = int(mask_remove.sum())
if removed:
batch = batch[~mask_remove]
logging.info(f"Removed {removed} rows with region == 'global' for years {sorted(target_years)} in batch {batch_id}")
logging.info(
f"Removed {removed} rows with region == 'global' for years {sorted(target_years)} in batch {batch_id}"
)
# Check which columns exist in the batch
available_columns = [col for col in self.selected_columns if col in batch.columns]