From a1d500830bf81cdfff47ccf097d2ecd0eeed09a9 Mon Sep 17 00:00:00 2001
From: bernard-ng <ngandubernard@gmail.com>
Date: Mon, 29 Sep 2025 21:07:23 +0200
Subject: [PATCH] feat: support gpu

---
 README.md                               |  51 +++
 config/gpu_research_templates.yaml      | 412 ++++++++++++++++++++++++
 core/utils/region_mapper.py             |  12 +-
 processing/ner/name_model.py            |   9 +
 processing/steps/data_selection_step.py |   8 +-
 research/models/bigru_model.py          |   2 +-
 research/models/cnn_model.py            |   2 +-
 research/models/lightgbm_model.py       |   9 +
 research/models/lstm_model.py           |   2 +-
 research/models/transformer_model.py    |   2 +-
 research/models/xgboost_model.py        |  11 +-
 research/neural_network_model.py        |  85 +++++
 research/statistics/__init__.py         |   2 +-
 research/statistics/plots.py            |  14 +-
 research/statistics/utils.py            | 125 ++++---
 15 files changed, 661 insertions(+), 85 deletions(-)
 create mode 100644 config/gpu_research_templates.yaml
diff --git a/README.md b/README.md
index 7065314..dc429bb 100644
--- a/README.md
+++ b/README.md
@@ -166,6 +166,57 @@ experiments and make predictions without needing to understand the underlying co
 streamlit run web/app.py
 ```
 
+## GPU Acceleration
+
+This project can leverage GPUs for faster training when supported libraries and hardware are available.
+
+- TensorFlow/Keras models (BiGRU, LSTM, CNN, Transformer)
+  - Uses GPU automatically if a TensorFlow GPU build is installed.
+  - The code enables safe GPU memory growth by default; optionally enable mixed precision for additional speed:
+    - Add `mixed_precision: true` in the experiment `model_params` (e.g., in `config/research_templates.yaml`).
+  - The final layer outputs are set to float32 for numerical stability under mixed precision.
+
+- spaCy NER
+  - Automatically prefers GPU if available; otherwise falls back to CPU.
+  - Ensure a compatible CUDA-enabled spaCy/thinc stack is installed to use GPU.
+
+- XGBoost
+  - Enable GPU by adding to the experiment `model_params`:
+    - `use_gpu: true` (sets `tree_method: gpu_hist` and `predictor: gpu_predictor`).
+
+- LightGBM
+  - Enable GPU by adding to the experiment `model_params`:
+    - `use_gpu: true` (sets `device: gpu`). Optional: `gpu_platform_id`, `gpu_device_id`.
+
+Example template snippet (GPU on):
+
+```yaml
+- name: "lstm_gpu"
+  description: "LSTM with GPU + mixed precision"
+  model_type: "lstm"
+  features: ["full_name"]
+  model_params:
+    embedding_dim: 128
+    lstm_units: 64
+    epochs: 5
+    batch_size: 128
+    use_gpu: true
+    mixed_precision: true
+  tags: ["gpu", "mixed_precision"]
+
+- name: "xgboost_gpu"
+  description: "XGBoost with GPU"
+  model_type: "xgboost"
+  features: ["full_name"]
+  model_params:
+    n_estimators: 200
+    use_gpu: true
+```
+
+Notes:
+- Install CUDA‑enabled binaries for TensorFlow/spaCy/LightGBM/XGBoost to actually use GPU.
+- If GPU is requested but not available, training will proceed on CPU with a warning.
+
 ## Contributors
 
 <a href="https://github.com/bernard-ng/drc-ners-nlp/graphs/contributors" title="show all contributors">
diff --git a/config/gpu_research_templates.yaml b/config/gpu_research_templates.yaml
new file mode 100644
index 0000000..c309415
--- /dev/null
+++ b/config/gpu_research_templates.yaml
@@ -0,0 +1,412 @@
+baseline_experiments:
+  # BiGRU Models (GPU-enabled)
+  - name: "bigru"
+    description: "Baseline BiGRU with full name features (GPU)"
+    model_type: "bigru"
+    features: [ "full_name" ]
+    model_params:
+      embedding_dim: 64
+      gru_units: 32
+      epochs: 2
+      batch_size: 32
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "bigru", "gpu" ]
+
+  - name: "bigru_native"
+    description: "Baseline BiGRU with native name features (GPU)"
+    model_type: "bigru"
+    features: [ "native_name" ]
+    model_params:
+      embedding_dim: 64
+      gru_units: 32
+      epochs: 2
+      batch_size: 32
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "bigru", "native", "gpu" ]
+
+  - name: "bigru_surname"
+    description: "Baseline BiGRU with surname features (GPU)"
+    model_type: "bigru"
+    features: [ "surname" ]
+    model_params:
+      embedding_dim: 64
+      gru_units: 32
+      epochs: 2
+      batch_size: 32
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "bigru", "surname", "gpu" ]
+
+  ## CNN Models (GPU-enabled)
+  - name: "cnn"
+    description: "Baseline CNN with character patterns (GPU)"
+    model_type: "cnn"
+    features: [ "full_name" ]
+    model_params:
+      embedding_dim: 64
+      filters: 64
+      kernel_size: 3
+      dropout: 0.5
+      epochs: 2
+      batch_size: 32
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "cnn", "gpu" ]
+
+  - name: "cnn_native"
+    description: "Baseline CNN with native name character patterns (GPU)"
+    model_type: "cnn"
+    features: [ "native_name" ]
+    model_params:
+      embedding_dim: 64
+      filters: 64
+      kernel_size: 3
+      dropout: 0.5
+      epochs: 2
+      batch_size: 32
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "cnn", "native", "gpu" ]
+
+  - name: "cnn_surname"
+    description: "Baseline CNN with surname character patterns (GPU)"
+    model_type: "cnn"
+    features: [ "surname" ]
+    model_params:
+      embedding_dim: 64
+      filters: 64
+      kernel_size: 3
+      dropout: 0.5
+      epochs: 2
+      batch_size: 32
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "cnn", "surname", "gpu" ]
+
+  ## Ensemble Models (CPU)
+  - name: "ensemble"
+    description: "Baseline Ensemble with multiple models"
+    model_type: "ensemble"
+    features: [ "full_name" ]
+    model_params:
+      base_models: [ "logistic_regression", "random_forest", "xgboost" ]
+      voting: "soft"
+      cv_folds: 5
+    tags: [ "baseline", "ensemble" ]
+
+  - name: "ensemble_native"
+    description: "Baseline Ensemble with native name"
+    model_type: "ensemble"
+    features: [ "native_name" ]
+    model_params:
+      base_models: [ "logistic_regression", "random_forest", "xgboost" ]
+      voting: "soft"
+      cv_folds: 5
+    tags: [ "baseline", "ensemble", "native" ]
+
+  - name: "ensemble_surname"
+    description: "Baseline Ensemble with surname"
+    model_type: "ensemble"
+    features: [ "surname" ]
+    model_params:
+      base_models: [ "logistic_regression", "random_forest", "xgboost" ]
+      voting: "soft"
+      cv_folds: 5
+    tags: [ "baseline", "ensemble", "surname" ]
+
+  # LightGBM Models (GPU-enabled)
+  - name: "lightgbm"
+    description: "Baseline LightGBM with engineered features (GPU)"
+    model_type: "lightgbm"
+    features: [ "full_name" ]
+    model_params:
+      n_estimators: 100
+      max_depth: -1
+      learning_rate: 0.1
+      num_leaves: 31
+      subsample: 0.8
+      colsample_bytree: 0.8
+      use_gpu: true
+    tags: [ "baseline", "lightgbm", "gpu" ]
+
+  - name: "lightgbm_native"
+    description: "Baseline LightGBM with native name features (GPU)"
+    model_type: "lightgbm"
+    features: [ "native_name" ]
+    model_params:
+      n_estimators: 100
+      max_depth: -1
+      learning_rate: 0.1
+      num_leaves: 31
+      subsample: 0.8
+      colsample_bytree: 0.8
+      use_gpu: true
+    tags: [ "baseline", "lightgbm", "native", "gpu" ]
+
+  - name: "lightgbm_surname"
+    description: "Baseline LightGBM with surname features (GPU)"
+    model_type: "lightgbm"
+    features: [ "surname" ]
+    model_params:
+      n_estimators: 100
+      max_depth: -1
+      learning_rate: 0.1
+      num_leaves: 31
+      subsample: 0.8
+      colsample_bytree: 0.8
+      use_gpu: true
+    tags: [ "baseline", "lightgbm", "surname", "gpu" ]
+
+  # Logistic Regression Models (CPU)
+  - name: "logistic_regression"
+    description: "Baseline logistic regression with full name"
+    model_type: "logistic_regression"
+    features: [ "full_name" ]
+    model_params:
+      max_features: 10000
+    tags: [ "baseline", "logistic_regression", "fullname" ]
+
+  - name: "logistic_regression_native"
+    description: "Logistic regression with native name only"
+    model_type: "logistic_regression"
+    features: [ "native_name" ]
+    model_params:
+      max_features: 5000
+    tags: [ "baseline", "logistic_regression", "native" ]
+
+  - name: "logistic_regression_surname"
+    description: "Logistic regression with surname name only"
+    model_type: "logistic_regression"
+    features: [ "surname" ]
+    model_params:
+      max_features: 5000
+    tags: [ "baseline", "logistic_regression", "surname" ]
+
+  # LSTM Models (GPU-enabled)
+  - name: "lstm"
+    description: "Baseline LSTM with full name features (GPU)"
+    model_type: "lstm"
+    features: [ "full_name" ]
+    model_params:
+      embedding_dim: 128
+      lstm_units: 64
+      epochs: 2
+      batch_size: 64
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "lstm", "gpu" ]
+
+  - name: "lstm_native"
+    description: "Baseline LSTM with native name features (GPU)"
+    model_type: "lstm"
+    features: [ "native_name" ]
+    model_params:
+      embedding_dim: 128
+      lstm_units: 64
+      epochs: 2
+      batch_size: 64
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "lstm", "native", "gpu" ]
+
+  - name: "lstm_surname"
+    description: "Baseline LSTM with surname features (GPU)"
+    model_type: "lstm"
+    features: [ "surname" ]
+    model_params:
+      embedding_dim: 128
+      lstm_units: 64
+      epochs: 2
+      batch_size: 64
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "lstm", "surname", "gpu" ]
+
+  # Naive Bayes Models (CPU)
+  - name: "naive_bayes"
+    description: "Baseline Naive Bayes with full name features"
+    model_type: "naive_bayes"
+    features: [ "full_name" ]
+    model_params:
+      max_features: 5000
+    tags: [ "baseline", "naive_bayes" ]
+
+  - name: "naive_bayes_native"
+    description: "Baseline Naive Bayes with native name features"
+    model_type: "naive_bayes"
+    features: [ "native_name" ]
+    model_params:
+      max_features: 5000
+    tags: [ "baseline", "naive_bayes", "native" ]
+
+  - name: "naive_bayes_surname"
+    description: "Baseline Naive Bayes with surname features"
+    model_type: "naive_bayes"
+    features: [ "surname" ]
+    model_params:
+      max_features: 5000
+    tags: [ "baseline", "naive_bayes", "surname" ]
+
+  # Random Forest Models (CPU)
+  - name: "random_forest"
+    description: "Baseline Random Forest with engineered features"
+    model_type: "random_forest"
+    features: [ "full_name" ]
+    model_params:
+      n_estimators: 100
+      max_depth: 10
+      min_samples_split: 2
+      min_samples_leaf: 1
+    tags: [ "baseline", "random_forest", "engineered" ]
+
+  - name: "random_forest_native"
+    description: "Baseline Random Forest with native name engineered features"
+    model_type: "random_forest"
+    features: [ "native_name" ]
+    model_params:
+      n_estimators: 100
+      max_depth: 10
+      min_samples_split: 2
+      min_samples_leaf: 1
+    tags: [ "baseline", "random_forest", "engineered", "native" ]
+
+  - name: "random_forest_surname"
+    description: "Baseline Random Forest with surname engineered features"
+    model_type: "random_forest"
+    features: [ "surname" ]
+    model_params:
+      n_estimators: 100
+      max_depth: 10
+      min_samples_split: 2
+      min_samples_leaf: 1
+    tags: [ "baseline", "random_forest", "engineered", "surname" ]
+
+  # SVM Models (CPU)
+  - name: "svm"
+    description: "Baseline SVM with full name features"
+    model_type: "svm"
+    features: [ "full_name" ]
+    model_params:
+      C: 1.0
+      kernel: "rbf"
+      ngram_range: [ 2, 4 ]
+      max_features: 5000
+    tags: [ "baseline", "svm" ]
+
+  - name: "svm_native"
+    description: "Baseline SVM with native name features"
+    model_type: "svm"
+    features: [ "native_name" ]
+    model_params:
+      C: 1.0
+      kernel: "rbf"
+      ngram_range: [ 2, 4 ]
+      max_features: 5000
+    tags: [ "baseline", "svm", "native" ]
+
+  - name: "svm_surname"
+    description: "Baseline SVM with surname features"
+    model_type: "svm"
+    features: [ "surname" ]
+    model_params:
+      C: 1.0
+      kernel: "rbf"
+      ngram_range: [ 2, 4 ]
+      max_features: 5000
+    tags: [ "baseline", "svm", "surname" ]
+
+  # Transformer Models (GPU-enabled)
+  - name: "transformer"
+    description: "Baseline Transformer with attention mechanism (GPU)"
+    model_type: "transformer"
+    features: [ "full_name" ]
+    model_params:
+      embedding_dim: 128
+      num_heads: 4
+      num_layers: 2
+      epochs: 2
+      batch_size: 64
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "transformer", "gpu" ]
+
+  - name: "transformer_native"
+    description: "Baseline Transformer with native name attention mechanism (GPU)"
+    model_type: "transformer"
+    features: [ "native_name" ]
+    model_params:
+      embedding_dim: 128
+      num_heads: 4
+      num_layers: 2
+      epochs: 2
+      batch_size: 64
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "transformer", "native", "gpu" ]
+
+  - name: "transformer_surname"
+    description: "Baseline Transformer with surname attention mechanism (GPU)"
+    model_type: "transformer"
+    features: [ "surname" ]
+    model_params:
+      embedding_dim: 128
+      num_heads: 4
+      num_layers: 2
+      epochs: 2
+      batch_size: 64
+      use_gpu: true
+      mixed_precision: true
+    tags: [ "baseline", "neural", "transformer", "surname", "gpu" ]
+
+  # XGBoost Models (GPU-enabled)
+  - name: "xgboost"
+    description: "Baseline XGBoost with engineered features (GPU)"
+    model_type: "xgboost"
+    features: [ "full_name" ]
+    model_params:
+      n_estimators: 100
+      max_depth: 6
+      learning_rate: 0.1
+      subsample: 0.8
+      colsample_bytree: 0.8
+      use_gpu: true
+    tags: [ "baseline", "xgboost", "gpu" ]
+
+  - name: "xgboost_native"
+    description: "Baseline XGBoost with native name engineered features (GPU)"
+    model_type: "xgboost"
+    features: [ "native_name" ]
+    model_params:
+      n_estimators: 100
+      max_depth: 6
+      learning_rate: 0.1
+      subsample: 0.8
+      colsample_bytree: 0.8
+      use_gpu: true
+    tags: [ "baseline", "xgboost", "native", "gpu" ]
+
+  - name: "xgboost_surname"
+    description: "Baseline XGBoost with surname engineered features (GPU)"
+    model_type: "xgboost"
+    features: [ "surname" ]
+    model_params:
+      n_estimators: 100
+      max_depth: 6
+      learning_rate: 0.1
+      subsample: 0.8
+      colsample_bytree: 0.8
+      use_gpu: true
+    tags: [ "baseline", "xgboost", "surname", "gpu" ]
+
+
+# Advanced Experiments Configuration
+advanced_experiments:
+
+# Feature Study Configurations
+feature_studies:
+
+# Hyperparameter Tuning Configurations
+hyperparameter_tuning:
+
diff --git a/core/utils/region_mapper.py b/core/utils/region_mapper.py
index ec380da..6e54c6e 100644
--- a/core/utils/region_mapper.py
+++ b/core/utils/region_mapper.py
@@ -19,9 +19,15 @@ class RegionMapper:
         return (
             series.str.upper()
             .str.strip()
-            .apply(lambda x: unicodedata.normalize("NFKD", x)
-                   .encode("ascii", errors="ignore")
-                   .decode("utf-8") if isinstance(x, str) else x)
+            .apply(
+                lambda x: (
+                    unicodedata.normalize("NFKD", x)
+                    .encode("ascii", errors="ignore")
+                    .decode("utf-8")
+                    if isinstance(x, str)
+                    else x
+                )
+            )
         )
 
     @staticmethod
diff --git a/processing/ner/name_model.py b/processing/ner/name_model.py
index 782770c..bc4901e 100644
--- a/processing/ner/name_model.py
+++ b/processing/ner/name_model.py
@@ -29,6 +29,15 @@ class NameModel:
         """Create a blank spaCy model with NER pipeline"""
         logging.info(f"Creating blank {language} model for NER training")
 
+        # Prefer GPU for spaCy if available (falls back to CPU automatically)
+        try:
+            if spacy.prefer_gpu():
+                logging.info("spaCy GPU enabled (cupy) for NER training")
+            else:
+                logging.info("spaCy running on CPU")
+        except Exception as e:
+            logging.debug(f"spaCy GPU selection skipped: {e}")
+
         # Create blank model - French tokenizer works well for DRC names
         self.nlp = spacy.blank(language)
 
diff --git a/processing/steps/data_selection_step.py b/processing/steps/data_selection_step.py
index ab9409e..5b07104 100644
--- a/processing/steps/data_selection_step.py
+++ b/processing/steps/data_selection_step.py
@@ -20,11 +20,15 @@ class DataSelectionStep(PipelineStep):
         # Remove rows where region == "global" only for specific years
         if "region" in batch.columns and "year" in batch.columns:
             target_years = {2015, 2021, 2022}
-            mask_remove = batch["region"].str.lower().eq("global") & batch["year"].isin(target_years)
+            mask_remove = batch["region"].str.lower().eq("global") & batch["year"].isin(
+                target_years
+            )
             removed = int(mask_remove.sum())
             if removed:
                 batch = batch[~mask_remove]
-                logging.info(f"Removed {removed} rows with region == 'global' for years {sorted(target_years)} in batch {batch_id}")
+                logging.info(
+                    f"Removed {removed} rows with region == 'global' for years {sorted(target_years)} in batch {batch_id}"
+                )
 
         # Check which columns exist in the batch
         available_columns = [col for col in self.selected_columns if col in batch.columns]
diff --git a/research/models/bigru_model.py b/research/models/bigru_model.py
index 9954f1c..5c9c347 100644
--- a/research/models/bigru_model.py
+++ b/research/models/bigru_model.py
@@ -48,7 +48,7 @@ class BiGRUModel(NeuralNetworkModel):
                 Dense(64, activation="relu"),
                 Dropout(params.get("dropout", 0.5)),
                 # Two-way softmax for binary gender classification.
-                Dense(2, activation="softmax"),
+                Dense(2, activation="softmax", dtype="float32"),
             ]
         )
 
diff --git a/research/models/cnn_model.py b/research/models/cnn_model.py
index 8763d6f..d1b1257 100644
--- a/research/models/cnn_model.py
+++ b/research/models/cnn_model.py
@@ -54,7 +54,7 @@ class CNNModel(NeuralNetworkModel):
                 Dense(64, activation="relu"),
                 Dropout(params.get("dropout", 0.5)),
                 # Two-way softmax for binary classification.
-                Dense(2, activation="softmax"),
+                Dense(2, activation="softmax", dtype="float32"),
             ]
         )
 
diff --git a/research/models/lightgbm_model.py b/research/models/lightgbm_model.py
index 8b242ad..deb895f 100644
--- a/research/models/lightgbm_model.py
+++ b/research/models/lightgbm_model.py
@@ -20,6 +20,12 @@ class LightGBMModel(TraditionalModel):
     def build_model(self) -> BaseEstimator:
         params = self.config.model_params
 
+        # Optional GPU acceleration
+        use_gpu = bool(params.get("use_gpu", False))
+        device = params.get("device", "gpu" if use_gpu else "cpu")
+        gpu_platform_id = params.get("gpu_platform_id", None)
+        gpu_device_id = params.get("gpu_device_id", None)
+
         # Leaf-wise boosted trees excel on sparse/categorical mixes; binary objective
         # and parallelism improve training speed for this task.
         return lgb.LGBMClassifier(
@@ -33,6 +39,9 @@ class LightGBMModel(TraditionalModel):
             objective=params.get("objective", "binary"),
             n_jobs=params.get("n_jobs", -1),
             verbose=2,
+            device=device,
+            gpu_platform_id=gpu_platform_id,
+            gpu_device_id=gpu_device_id,
         )
 
     def prepare_features(self, X: pd.DataFrame) -> np.ndarray:
diff --git a/research/models/lstm_model.py b/research/models/lstm_model.py
index 7d2cb3c..1e65aa1 100644
--- a/research/models/lstm_model.py
+++ b/research/models/lstm_model.py
@@ -45,7 +45,7 @@ class LSTMModel(NeuralNetworkModel):
                 Dense(64, activation="relu"),
                 Dropout(params.get("dropout", 0.5)),
                 # Two-way softmax for binary classification.
-                Dense(2, activation="softmax"),
+                Dense(2, activation="softmax", dtype="float32"),
             ]
         )
 
diff --git a/research/models/transformer_model.py b/research/models/transformer_model.py
index d587d31..f5bdcbe 100644
--- a/research/models/transformer_model.py
+++ b/research/models/transformer_model.py
@@ -45,7 +45,7 @@ class TransformerModel(NeuralNetworkModel):
         x = GlobalAveragePooling1D()(x)
         x = Dense(32, activation="relu")(x)
         x = Dropout(params.get("dropout", 0.1))(x)
-        outputs = Dense(2, activation="softmax")(x)
+        outputs = Dense(2, activation="softmax", dtype="float32")(x)
 
         model = Model(inputs, outputs)
         model.compile(
diff --git a/research/models/xgboost_model.py b/research/models/xgboost_model.py
index 28093ee..c642203 100644
--- a/research/models/xgboost_model.py
+++ b/research/models/xgboost_model.py
@@ -20,6 +20,14 @@ class XGBoostModel(TraditionalModel):
     def build_model(self) -> BaseEstimator:
         params = self.config.model_params
 
+        # Optional GPU acceleration
+        use_gpu = bool(params.get("use_gpu", False))
+        default_tree_method = "gpu_hist" if use_gpu else "hist"
+        tree_method = params.get("tree_method", default_tree_method)
+        predictor = params.get(
+            "predictor", "gpu_predictor" if tree_method.startswith("gpu") else "auto"
+        )
+
         # Histogram-based trees and parallelism provide fast training; default
         # logloss metric suits binary classification of gender.
         return xgb.XGBClassifier(
@@ -31,7 +39,8 @@ class XGBoostModel(TraditionalModel):
             random_state=self.config.random_seed,
             eval_metric="logloss",
             n_jobs=params.get("n_jobs", -1),
-            tree_method=params.get("tree_method", "hist"),
+            tree_method=tree_method,
+            predictor=predictor,
             verbosity=2,
         )
 
diff --git a/research/neural_network_model.py b/research/neural_network_model.py
index ac90d57..2f28afd 100644
--- a/research/neural_network_model.py
+++ b/research/neural_network_model.py
@@ -30,6 +30,38 @@ class NeuralNetworkModel(BaseModel):
         """Fit the neural network model with deferred building"""
         logging.info(f"Training {self.__class__.__name__}")
 
+        # Best-effort GPU configuration for TensorFlow when available
+        # - Enables memory growth to avoid pre-allocating all VRAM
+        # - Optionally enables mixed precision if requested via model params
+        try:
+            import tensorflow as tf  # Imported lazily to avoid dependency for non-NN runs
+
+            requested_gpu = bool(self.config.model_params.get("use_gpu", False))
+            enable_mixed = bool(self.config.model_params.get("mixed_precision", False))
+
+            gpus = tf.config.list_physical_devices("GPU")
+            if gpus:
+                for gpu in gpus:
+                    try:
+                        tf.config.experimental.set_memory_growth(gpu, True)
+                    except Exception:
+                        pass
+
+                if enable_mixed:
+                    try:
+                        from tensorflow.keras import mixed_precision
+
+                        mixed_precision.set_global_policy("mixed_float16")
+                        logging.info("Enabled TensorFlow mixed precision (float16)")
+                    except Exception as e:
+                        logging.warning(f"Could not enable mixed precision: {e}")
+            else:
+                if requested_gpu:
+                    logging.warning("Requested GPU but no TensorFlow GPU device is available.")
+        except Exception as e:
+            # Keep silent in non-TF environments / non-NN workflows
+            logging.debug(f"TensorFlow GPU setup skipped: {e}")
+
         # Setup feature extraction
         if self.feature_extractor is None:
             self.feature_extractor = FeatureExtractor(
@@ -105,6 +137,32 @@ class NeuralNetworkModel(BaseModel):
     def cross_validate(
         self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5
     ) -> dict[str, np.floating[Any]]:
+        # Ensure TF GPU/mixed-precision config also applies to CV runs
+        try:
+            import tensorflow as tf
+
+            requested_gpu = bool(self.config.model_params.get("use_gpu", False))
+            enable_mixed = bool(self.config.model_params.get("mixed_precision", False))
+
+            gpus = tf.config.list_physical_devices("GPU")
+            if gpus:
+                for gpu in gpus:
+                    try:
+                        tf.config.experimental.set_memory_growth(gpu, True)
+                    except Exception:
+                        pass
+                if enable_mixed:
+                    try:
+                        from tensorflow.keras import mixed_precision
+
+                        mixed_precision.set_global_policy("mixed_float16")
+                    except Exception:
+                        pass
+            else:
+                if requested_gpu:
+                    logging.warning("Requested GPU for CV but none is available.")
+        except Exception:
+            pass
         features_df = self.feature_extractor.extract_features(X)
         X_prepared = self.prepare_features(features_df)
         y_encoded = self.label_encoder.transform(y)
@@ -165,6 +223,33 @@ class NeuralNetworkModel(BaseModel):
         """Generate learning curve data for the model"""
         logging.info(f"Generating learning curve for {self.__class__.__name__}")
 
+        # Ensure TF GPU/mixed-precision config also applies here
+        try:
+            import tensorflow as tf
+
+            requested_gpu = bool(self.config.model_params.get("use_gpu", False))
+            enable_mixed = bool(self.config.model_params.get("mixed_precision", False))
+
+            gpus = tf.config.list_physical_devices("GPU")
+            if gpus:
+                for gpu in gpus:
+                    try:
+                        tf.config.experimental.set_memory_growth(gpu, True)
+                    except Exception:
+                        pass
+                if enable_mixed:
+                    try:
+                        from tensorflow.keras import mixed_precision
+
+                        mixed_precision.set_global_policy("mixed_float16")
+                    except Exception:
+                        pass
+            else:
+                if requested_gpu:
+                    logging.warning("Requested GPU for learning curve but none is available.")
+        except Exception:
+            pass
+
         if train_sizes is None:
             train_sizes = [0.1, 0.3, 0.5, 0.7, 1.0]
 
diff --git a/research/statistics/__init__.py b/research/statistics/__init__.py
index b5a77d9..c24a7ba 100644
--- a/research/statistics/__init__.py
+++ b/research/statistics/__init__.py
@@ -1 +1 @@
-LETTERS = 'abcdefghijklmnopqrstuvwxyz'
+LETTERS = "abcdefghijklmnopqrstuvwxyz"
diff --git a/research/statistics/plots.py b/research/statistics/plots.py
index ff35298..051feb1 100644
--- a/research/statistics/plots.py
+++ b/research/statistics/plots.py
@@ -8,11 +8,7 @@ from research.statistics.utils import LETTERS, build_letter_frequencies
 
 def plot_transition_matrix(ax, df_probs, title=""):
     hm = sns.heatmap(
-        df_probs.loc[list(LETTERS), list(LETTERS)],
-        cmap="Reds",
-        annot=False,
-        cbar=False,
-        ax=ax
+        df_probs.loc[list(LETTERS), list(LETTERS)], cmap="Reds", annot=False, cbar=False, ax=ax
     )
     ax.set_title(title, fontsize=12)
     return hm
@@ -20,8 +16,8 @@ def plot_transition_matrix(ax, df_probs, title=""):
 
 def plot_letter_frequencies(males, females, sort_values=False, title=None):
     # Compute frequencies
-    L_m = build_letter_frequencies(males['name']).set_index("letter")["freq"]
-    L_f = build_letter_frequencies(females['name']).set_index("letter")["freq"]
+    L_m = build_letter_frequencies(males["name"]).set_index("letter")["freq"]
+    L_f = build_letter_frequencies(females["name"]).set_index("letter")["freq"]
 
     # Combine into one DataFrame
     df_plot = pd.DataFrame({"Male": L_m, "Female": L_f}).fillna(0).reset_index()
@@ -35,8 +31,8 @@ def plot_letter_frequencies(males, females, sort_values=False, title=None):
     x = np.arange(len(df_plot))
     w = 0.4
     fig, ax = plt.subplots(figsize=(16, 6))
-    ax.bar(x - w/2, df_plot["Male"], width=w, label="Male", color="steelblue", alpha=0.8)
-    ax.bar(x + w/2, df_plot["Female"], width=w, label="Female", color="salmon", alpha=0.8)
+    ax.bar(x - w / 2, df_plot["Male"], width=w, label="Male", color="steelblue", alpha=0.8)
+    ax.bar(x + w / 2, df_plot["Female"], width=w, label="Female", color="salmon", alpha=0.8)
 
     ax.set_xticks(x)
     ax.set_xticklabels(df_plot["letter"])
diff --git a/research/statistics/utils.py b/research/statistics/utils.py
index 901fffd..25f2248 100644
--- a/research/statistics/utils.py
+++ b/research/statistics/utils.py
@@ -9,9 +9,10 @@ from scipy.spatial.distance import euclidean
 from scipy.stats import entropy
 from typing import Dict, Any
 
-LETTERS = 'abcdefghijklmnopqrstuvwxyz'
-START_TOKEN = '^'
-END_TOKEN = '$'
+LETTERS = "abcdefghijklmnopqrstuvwxyz"
+START_TOKEN = "^"
+END_TOKEN = "$"
+
 
 def normalize_letters(s):
     """Normalize accents -> ascii, lowercase, keep only a-z."""
@@ -27,41 +28,28 @@ def build_category_distribution(df: pd.DataFrame) -> pd.DataFrame:
     return (
         df.groupby("province")["identified_category"]
         .value_counts(normalize=True)  # get proportions
-        .unstack(fill_value=0)          # reshape into columns per word count
+        .unstack(fill_value=0)  # reshape into columns per word count
     )
 
 
 def build_words_token(df: pd.DataFrame, source: str, target: str) -> pd.DataFrame:
     # Normalize + split once (vectorized)
-    s = df[source].fillna('').astype(str)
-    s = (
-        s.str.lower()
-        .str.replace(r"[^\w'\-]+", " ", regex=True)
-        .str.strip()
-        .str.split()
-    )
+    s = df[source].fillna("").astype(str)
+    s = s.str.lower().str.replace(r"[^\w'\-]+", " ", regex=True).str.strip().str.split()
 
     # Explode the token list into rows under `target`
-    out = (
-        df.assign(**{target: s})
-        .explode(target, ignore_index=True)
-    )
+    out = df.assign(**{target: s}).explode(target, ignore_index=True)
 
     # Drop NA/empty tokens and strip whitespace
     out[target] = out[target].astype(str).str.strip()
-    out = out[out[target].ne('')].dropna(subset=[target]).reset_index(drop=True)
+    out = out[out[target].ne("")].dropna(subset=[target]).reset_index(drop=True)
 
     return out
 
 
 def build_letter_frequencies(series: pd.Series) -> pd.DataFrame:
     # Normalize: lowercase, remove non-letters, concatenate all into one string
-    s = (
-        series.astype(str)
-        .str.lower()
-        .str.replace(r'[^a-z]', '', regex=True)
-        .str.cat(sep='')
-    )
+    s = series.astype(str).str.lower().str.replace(r"[^a-z]", "", regex=True).str.cat(sep="")
 
     # Convert string into Series of characters
     chars = pd.Series(list(s))
@@ -82,11 +70,7 @@ def build_letter_frequencies(series: pd.Series) -> pd.DataFrame:
 
 def build_transition_probabilities(names: pd.Series, alpha: float = 0.0) -> dict:
     # 1) Normalize
-    names = (
-        names.astype(str)
-        .str.lower()
-        .str.replace(fr"[^{LETTERS}]", "", regex=True)
-    )
+    names = names.astype(str).str.lower().str.replace(rf"[^{LETTERS}]", "", regex=True)
     names = names[names.str.len() > 0]
 
     # 2) Prepare sequences
@@ -130,7 +114,7 @@ def build_transition_probabilities(names: pd.Series, alpha: float = 0.0) -> dict
 
     # 11) DataFrames
     df_counts = pd.DataFrame(counts, index=tokens, columns=tokens)
-    df_probs  = pd.DataFrame(probs, index=tokens, columns=tokens)
+    df_probs = pd.DataFrame(probs, index=tokens, columns=tokens)
 
     return {
         "tokens": tokens,
@@ -142,7 +126,11 @@ def build_transition_probabilities(names: pd.Series, alpha: float = 0.0) -> dict
     }
 
 
-def build_transition_comparisons(names_transitions: Dict[str, Any], surnames_transitions: Dict[str, Any], n_permutations: int = 1000) -> pd.DataFrame:
+def build_transition_comparisons(
+    names_transitions: Dict[str, Any],
+    surnames_transitions: Dict[str, Any],
+    n_permutations: int = 1000,
+) -> pd.DataFrame:
     """
     Compares letter transition probability matrices for names and surnames using
     various distance metrics and a permutation test for statistical significance.
@@ -150,23 +138,20 @@ def build_transition_comparisons(names_transitions: Dict[str, Any], surnames_tra
 
     # Helper function to flatten and smooth matrices
     def prepare_data(data):
-        return {
-            'm': data['m']['probs'].flatten(),
-            'f': data['f']['probs'].flatten()
-        }
+        return {"m": data["m"]["probs"].flatten(), "f": data["f"]["probs"].flatten()}
 
     prepared_names = prepare_data(names_transitions)
     prepared_surnames = prepare_data(surnames_transitions)
 
     # Distance Metrics
-    names_l2 = euclidean(prepared_names['m'], prepared_names['f'])
-    surnames_l2 = euclidean(prepared_surnames['m'], prepared_surnames['f'])
+    names_l2 = euclidean(prepared_names["m"], prepared_names["f"])
+    surnames_l2 = euclidean(prepared_surnames["m"], prepared_surnames["f"])
 
-    kl_names_mf = entropy(prepared_names['m'] + 1e-12, prepared_names['f'] + 1e-12)
-    kl_names_fm = entropy(prepared_names['f'] + 1e-12, prepared_names['m'] + 1e-12)
+    kl_names_mf = entropy(prepared_names["m"] + 1e-12, prepared_names["f"] + 1e-12)
+    kl_names_fm = entropy(prepared_names["f"] + 1e-12, prepared_names["m"] + 1e-12)
 
-    kl_surnames_mf = entropy(prepared_surnames['m'] + 1e-12, prepared_surnames['f'] + 1e-12)
-    kl_surnames_fm = entropy(prepared_surnames['f'] + 1e-12, prepared_surnames['m'] + 1e-12)
+    kl_surnames_mf = entropy(prepared_surnames["m"] + 1e-12, prepared_surnames["f"] + 1e-12)
+    kl_surnames_fm = entropy(prepared_surnames["f"] + 1e-12, prepared_surnames["m"] + 1e-12)
 
     jsd_names = 0.5 * (kl_names_mf + kl_names_fm)
     jsd_surnames = 0.5 * (kl_surnames_mf + kl_surnames_fm)
@@ -174,15 +159,15 @@ def build_transition_comparisons(names_transitions: Dict[str, Any], surnames_tra
     # Permutation Test
     def run_permutation_test(transitions):
         # Flattened probabilities for male and female
-        P_m = transitions['m']['probs'].flatten()
-        P_f = transitions['f']['probs'].flatten()
+        P_m = transitions["m"]["probs"].flatten()
+        P_f = transitions["f"]["probs"].flatten()
 
         # Calculate the observed JSD (our test statistic)
         observed_jsd = 0.5 * (entropy(P_m + 1e-12, P_f + 1e-12) + entropy(P_f + 1e-12, P_m + 1e-12))
 
         # Concatenate male and female counts
-        counts_m = transitions['m']['counts']
-        counts_f = transitions['f']['counts']
+        counts_m = transitions["m"]["counts"]
+        counts_f = transitions["f"]["counts"]
         all_counts = np.concatenate((counts_m, counts_f), axis=1)
         total_counts = counts_m.shape[1] + counts_f.shape[1]
 
@@ -194,17 +179,27 @@ def build_transition_comparisons(names_transitions: Dict[str, Any], surnames_tra
             # Note: This is a simplified approach, assuming counts are
             # structured per name. A more robust implementation would
             # shuffle the actual names themselves.
-            permuted_counts_m = all_counts[:, shuffled_indices[:counts_m.shape[1]]]
-            permuted_counts_f = all_counts[:, shuffled_indices[counts_m.shape[1]:]]
+            permuted_counts_m = all_counts[:, shuffled_indices[: counts_m.shape[1]]]
+            permuted_counts_f = all_counts[:, shuffled_indices[counts_m.shape[1] :]]
 
             # Re-calculate probabilities and JSD for the permuted groups
             # Add a small epsilon to the denominator to prevent division by zero
             epsilon = 1e-12
-            permuted_probs_m = permuted_counts_m / (permuted_counts_m.sum(axis=0, keepdims=True) + epsilon)
-            permuted_probs_f = permuted_counts_f / (permuted_counts_f.sum(axis=0, keepdims=True) + epsilon)
+            permuted_probs_m = permuted_counts_m / (
+                permuted_counts_m.sum(axis=0, keepdims=True) + epsilon
+            )
+            permuted_probs_f = permuted_counts_f / (
+                permuted_counts_f.sum(axis=0, keepdims=True) + epsilon
+            )
 
-            permuted_jsd = 0.5 * (entropy(permuted_probs_m.mean(axis=1) + 1e-12, permuted_probs_f.mean(axis=1) + 1e-12) +
-                                  entropy(permuted_probs_f.mean(axis=1) + 1e-12, permuted_probs_m.mean(axis=1) + 1e-12))
+            permuted_jsd = 0.5 * (
+                entropy(
+                    permuted_probs_m.mean(axis=1) + 1e-12, permuted_probs_f.mean(axis=1) + 1e-12
+                )
+                + entropy(
+                    permuted_probs_f.mean(axis=1) + 1e-12, permuted_probs_m.mean(axis=1) + 1e-12
+                )
+            )
             permuted_jsds.append(permuted_jsd)
 
         # Calculate the p-value
@@ -214,39 +209,39 @@ def build_transition_comparisons(names_transitions: Dict[str, Any], surnames_tra
     names_p_value = run_permutation_test(names_transitions)
     surnames_p_value = run_permutation_test(surnames_transitions)
 
-    out = pd.DataFrame({
-        "l2": [names_l2, surnames_l2],
-        "kl_mf": [kl_names_mf, kl_surnames_mf],
-        "kl_fm": [kl_names_fm, kl_surnames_fm],
-        "jsd": [jsd_names, jsd_surnames],
-        "permutation_p_value": [names_p_value, surnames_p_value]
-    }, index=["names", "surnames"])
+    out = pd.DataFrame(
+        {
+            "l2": [names_l2, surnames_l2],
+            "kl_mf": [kl_names_mf, kl_surnames_mf],
+            "kl_fm": [kl_names_fm, kl_surnames_fm],
+            "jsd": [jsd_names, jsd_surnames],
+            "permutation_p_value": [names_p_value, surnames_p_value],
+        },
+        index=["names", "surnames"],
+    )
 
     return out
 
+
 import pandas as pd
 from collections import Counter
 from typing import Literal
 
 
 def build_ngrams_count(
-        df: pd.DataFrame,
-        n: int,
-        where: Literal["any", "prefix", "suffix"] = "any",
+    df: pd.DataFrame,
+    n: int,
+    where: Literal["any", "prefix", "suffix"] = "any",
 ) -> pd.DataFrame:
     # Normalize and clean to a–z
-    names = (
-        df["name"].astype(str)
-        .str.lower()
-        .str.replace(r"[^a-z]", "", regex=True)
-    )
+    names = df["name"].astype(str).str.lower().str.replace(r"[^a-z]", "", regex=True)
 
     ngrams = []
     if where == "any":
         for s in names:
             L = len(s)
             if L >= n:
-                ngrams.extend(s[i:i+n] for i in range(L - n + 1))
+                ngrams.extend(s[i : i + n] for i in range(L - n + 1))
     elif where == "prefix":
         for s in names:
             if len(s) >= n: