feat: support gpu
This commit is contained in:
@@ -29,6 +29,15 @@ class NameModel:
|
||||
"""Create a blank spaCy model with NER pipeline"""
|
||||
logging.info(f"Creating blank {language} model for NER training")
|
||||
|
||||
# Prefer GPU for spaCy if available (falls back to CPU automatically)
|
||||
try:
|
||||
if spacy.prefer_gpu():
|
||||
logging.info("spaCy GPU enabled (cupy) for NER training")
|
||||
else:
|
||||
logging.info("spaCy running on CPU")
|
||||
except Exception as e:
|
||||
logging.debug(f"spaCy GPU selection skipped: {e}")
|
||||
|
||||
# Create blank model - French tokenizer works well for DRC names
|
||||
self.nlp = spacy.blank(language)
|
||||
|
||||
|
||||
@@ -20,11 +20,15 @@ class DataSelectionStep(PipelineStep):
|
||||
# Remove rows where region == "global" only for specific years
|
||||
if "region" in batch.columns and "year" in batch.columns:
|
||||
target_years = {2015, 2021, 2022}
|
||||
mask_remove = batch["region"].str.lower().eq("global") & batch["year"].isin(target_years)
|
||||
mask_remove = batch["region"].str.lower().eq("global") & batch["year"].isin(
|
||||
target_years
|
||||
)
|
||||
removed = int(mask_remove.sum())
|
||||
if removed:
|
||||
batch = batch[~mask_remove]
|
||||
logging.info(f"Removed {removed} rows with region == 'global' for years {sorted(target_years)} in batch {batch_id}")
|
||||
logging.info(
|
||||
f"Removed {removed} rows with region == 'global' for years {sorted(target_years)} in batch {batch_id}"
|
||||
)
|
||||
|
||||
# Check which columns exist in the batch
|
||||
available_columns = [col for col in self.selected_columns if col in batch.columns]
|
||||
|
||||
Reference in New Issue
Block a user