refactoring: uv

2025-10-05 18:14:15 +02:00
parent f3b06fbd07
commit 9dd4f759b3
120 changed files with 5525 additions and 3366 deletions
@@ -0,0 +1,16 @@
+.git
+.gitignore
+.idea
+.vscode
+__pycache__
+.ruff_cache
+.venv
+*.pyc
+*.pyo
+*.pyd
+*.swp
+*.swo
+*.DS_Store
+dist
+build
+*.egg-info
@@ -0,0 +1 @@
+3.11
@@ -0,0 +1,49 @@
+# syntax=docker/dockerfile:1
+
+# Minimal Linux base (glibc) – Python will be installed by uv
+FROM debian:bookworm-slim
+
+ENV DEBIAN_FRONTEND=noninteractive \
+    PYTHONDONTWRITEBYTECODE=1 \
+    PYTHONUNBUFFERED=1 \
+    UV_INSTALL_DIR=/usr/local/bin \
+    UV_LINK_MODE=copy \
+    UV_PYTHON_DOWNLOADS=1 \
+    UV_PROJECT_ENVIRONMENT=/app/.venv \
+    PATH=/app/.venv/bin:/usr/local/bin:/usr/sbin:/usr/bin:/sbin:/bin
+
+WORKDIR /app
+
+# System deps for building/using common scientific stack
+# Keep minimal; rely on wheels where possible
+RUN apt-get update && apt-get install -y --no-install-recommends \
+      ca-certificates curl git \
+      build-essential pkg-config \
+      libssl-dev libffi-dev \
+      libopenblas0 libstdc++6 \
+      libfreetype6 libpng16-16 libjpeg62-turbo \
+    && rm -rf /var/lib/apt/lists/*
+
+# Install uv (static binary)
+RUN curl -LsSf https://astral.sh/uv/install.sh | sh
+
+# Copy project metadata first for layer caching
+COPY pyproject.toml README.md ./
+
+# Install a managed Python via uv and create the project venv
+RUN uv python install 3.11 \
+    && uv venv /app/.venv --python 3.11
+
+# Resolve and install runtime deps into project venv
+# Use lockfile if present for reproducibility
+RUN if [ -f uv.lock ]; then uv sync --no-dev --no-install-project --frozen; else uv sync --no-dev --no-install-project; fi
+
+# Copy source code and optional templates
+COPY src ./src
+
+# Re-sync to ensure the local package is installed
+RUN uv sync --no-dev \
+    && rm -rf /root/.cache
+
+# Default command shows help; override in compose or docker run
+CMD ["ners", "--help"]
@@ -10,37 +10,23 @@ million names from the Democratic Republic of Congo (DRC) annotated with gender

 ### Installation & Setup

-Instructions and command line snippets bellow are provided to help you set up the project environment quickly and
-efficiently.
-assuming you have Python 3.11 and Git installed and working on a Unix-like system (Linux, macOS, etc.).
-
-**Using Makefile (Recommended)**
-
+**Unix based**
 ```bash
+curl -LsSf https://astral.sh/uv/install.sh | sh
+
 git clone https://github.com/bernard-ng/drc-ners-nlp.git
 cd drc-ners-nlp

-# Setup environment
-make setup
-make activate
+uv sync
 ```

-**Manual Setup**
-
+**Macos & windows**
 ```bash
-git clone https://github.com/bernard-ng/drc-ners-nlp.git
-cd drc-ners-nlp
-
-# Setup environment
-python -m venv .venv
-.venv/bin/pip install --upgrade pip
-.venv/bin/pip install -r requirements.txt
-
-pip install --upgrade pip
-pip install -r requirements.txt
-pip install jupyter notebook ipykernel pytest black flake8 mypy
-
-source .venv/bin/activate
+docker compose build
+docker compose run --rm app
+docker compose run --rm app ners pipeline run --env=production
+docker compose run --rm app ners research train --name=lightgbm --type=baseline --env=production
+docker compose run --rm --service-ports app ners web run --env=production
 ```

 ## Data Processing
@@ -55,6 +41,7 @@ the `drc-ners-nlp/config/pipeline.yaml` file.
 ```yaml
 stages:
  - "data_cleaning"
+  - "data_selection"
  - "feature_extraction"
  - "data_splitting"
 ```
@@ -62,37 +49,7 @@ stages:
 **Running the Pipeline**

 ```bash
-python main.py --env production
-```
-
-## NER Processing (Optional)
-
-This project implements a custom named entity recognition (NER) pipeline tailored for Congolese names. 
-Its main objective is to accurately identify and tag the different components of a Congolese name, 
-specifically distinguishing between the native part and the surname.
-
-```bash
-python ner.py --env production
-```
-
-Once you've built and train the NER model you can use it to annotate **COMPOSE** name in the original dataset 
-
-**Running the Pipeline with NER Annotation**
-```yaml
-stages:
-  - "data_cleaning"
-  - "feature_extraction"
-  - "ner_annotation"
-  - "data_splitting"
-```
-
-**Running the Pipeline with LLM Annotation**
-```yaml
-stages:
-  - "data_cleaning"
-  - "feature_extraction"
-  - "llm_annotation"
-  - "data_splitting"
+uv run ners pipeline run --env="production"
 ```

 ## Experiments
@@ -105,54 +62,94 @@ you can define model features, training parameters, and evaluation metrics in th

 ```bash
 # bigru
-python train.py --name="bigru" --type="baseline" --env="production"
-python train.py --name="bigru_native" --type="baseline" --env="production"
-python train.py --name="bigru_surname" --type="baseline" --env="production"
+uv run ners research train --name="bigru" --type="baseline" --env="production"
+uv run ners research train --name="bigru_native" --type="baseline" --env="production"
+uv run ners research train --name="bigru_surname" --type="baseline" --env="production"

 # cnn
-python train.py --name="cnn" --type="baseline" --env="production"
-python train.py --name="cnn_native" --type="baseline" --env="production"
-python train.py --name="cnn_surname" --type="baseline" --env="production"
+uv run ners research train --name="cnn" --type="baseline" --env="production"
+uv run ners research train --name="cnn_native" --type="baseline" --env="production"
+uv run ners research train --name="cnn_surname" --type="baseline" --env="production"

 # lightgbm
-python train.py --name="lightgbm" --type="baseline" --env="production"
-python train.py --name="lightgbm_native" --type="baseline" --env="production"
-python train.py --name="lightgbm_surname" --type="baseline" --env="production"
+uv run ners research train --name="lightgbm" --type="baseline" --env="production"
+uv run ners research train --name="lightgbm_native" --type="baseline" --env="production"
+uv run ners research train --name="lightgbm_surname" --type="baseline" --env="production"

 # logistic regression
-python train.py --name="logistic_regression" --type="baseline" --env="production"
-python train.py --name="logistic_regression_native" --type="baseline" --env="production"
-python train.py --name="logistic_regression_surname" --type="baseline" --env="production"
+uv run ners research train --name="logistic_regression" --type="baseline" --env="production"
+uv run ners research train --name="logistic_regression_native" --type="baseline" --env="production"
+uv run ners research train --name="logistic_regression_surname" --type="baseline" --env="production"

 # lstm
-python train.py --name="lstm" --type="baseline" --env="production"
-python train.py --name="lstm_native" --type="baseline" --env="production"
-python train.py --name="lstm_surname" --type="baseline" --env="production"
+uv run ners research train --name="lstm" --type="baseline" --env="production"
+uv run ners research train --name="lstm_native" --type="baseline" --env="production"
+uv run ners research train --name="lstm_surname" --type="baseline" --env="production"

 # random forest
-python train.py --name="random_forest" --type="baseline" --env="production"
-python train.py --name="random_forest_native" --type="baseline" --env="production"
-python train.py --name="random_forest_surname" --type="baseline" --env="production"
+uv run ners research train --name="random_forest" --type="baseline" --env="production"
+uv run ners research train --name="random_forest_native" --type="baseline" --env="production"
+uv run ners research train --name="random_forest_surname" --type="baseline" --env="production"

 # svm
-python train.py --name="svm" --type="baseline" --env="production"
-python train.py --name="svm_native" --type="baseline" --env="production"
-python train.py --name="svm_surname" --type="baseline" --env="production"
+uv run ners research train --name="svm" --type="baseline" --env="production"
+uv run ners research train --name="svm_native" --type="baseline" --env="production"
+uv run ners research train --name="svm_surname" --type="baseline" --env="production"

 # naive bayes
-python train.py --name="naive_bayes" --type="baseline" --env="production"
-python train.py --name="naive_bayes_native" --type="baseline" --env="production"
-python train.py --name="naive_bayes_surname" --type="baseline" --env="production"
+uv run ners research train --name="naive_bayes" --type="baseline" --env="production"
+uv run ners research train --name="naive_bayes_native" --type="baseline" --env="production"
+uv run ners research train --name="naive_bayes_surname" --type="baseline" --env="production"

 # transformer
-python train.py --name="transformer" --type="baseline" --env="production"
-python train.py --name="transformer_native" --type="baseline" --env="production"
-python train.py --name="transformer_surname" --type="baseline" --env="production"
+uv run ners research train --name="transformer" --type="baseline" --env="production"
+uv run ners research train --name="transformer_native" --type="baseline" --env="production"
+uv run ners research train --name="transformer_surname" --type="baseline" --env="production"

 # xgboost
-python train.py --name="xgboost" --type="baseline" --env="production"
-python train.py --name="xgboost_native" --type="baseline" --env="production"
-python train.py --name="xgboost_surname" --type="baseline" --env="production"
+uv run ners research train --name="xgboost" --type="baseline" --env="production"
+uv run ners research train --name="xgboost_native" --type="baseline" --env="production"
+uv run ners research train --name="xgboost_surname" --type="baseline" --env="production"
+```
+
+## TensorFlow on macOS (Intel) with uv
+
+TensorFlow no longer publishes wheels for macOS Intel. To keep using uv and run TF reliably, use a Linux container with TF preinstalled and install project code with minimal extras inside the container.
+
+### One-time build
+
+```bash
+docker compose -f docker/compose.tf.yml build
+
+If you see a message like `tensorflow/tensorflow:<tag>: not found`, update `docker/Dockerfile.tf-cpu` to a tag that exists (e.g., `2.17.0`) and rebuild:
+
+```bash
+sed -n '1,20p' docker/Dockerfile.tf-cpu  # verify the FROM line
+docker pull tensorflow/tensorflow:2.17.0 # quick availability check
+docker compose -f docker/compose.tf.yml build
+```
+```
+
+### Start a shell with uv and TF available
+
+```bash
+docker compose -f docker/compose.tf.yml run --rm tf bash
+```
+
+Inside the container:
+
+```bash
+# Install project in editable mode without pulling full deps
+uv pip install -e . --no-deps
+
+# Install only what research needs alongside TensorFlow
+uv pip install typer pandas scikit-learn seaborn plotly
+
+# Sanity check
+uv run python -c "import tensorflow as tf; print(tf.__version__)"
+
+# Run an experiment
+uv run ners research train --name="lstm" --type="baseline" --env="production"
 ```

 ## Web Interface
@@ -163,60 +160,9 @@ experiments and make predictions without needing to understand the underlying co
 ### Running the Web Interface

 ```bash
-streamlit run web/app.py
+uv run ners web run --env="production"
 ```

-## GPU Acceleration
-
-This project can leverage GPUs for faster training when supported libraries and hardware are available.
-
- TensorFlow/Keras models (BiGRU, LSTM, CNN, Transformer)
-  - Uses GPU automatically if a TensorFlow GPU build is installed.
-  - The code enables safe GPU memory growth by default; optionally enable mixed precision for additional speed:
-    - Add `mixed_precision: true` in the experiment `model_params` (e.g., in `config/research_templates.yaml`).
-  - The final layer outputs are set to float32 for numerical stability under mixed precision.
-
- spaCy NER
-  - Automatically prefers GPU if available; otherwise falls back to CPU.
-  - Ensure a compatible CUDA-enabled spaCy/thinc stack is installed to use GPU.
-
- XGBoost
-  - Enable GPU by adding to the experiment `model_params`:
-    - `use_gpu: true` (sets `tree_method: gpu_hist` and `predictor: gpu_predictor`).
-
- LightGBM
-  - Enable GPU by adding to the experiment `model_params`:
-    - `use_gpu: true` (sets `device: gpu`). Optional: `gpu_platform_id`, `gpu_device_id`.
-
-Example template snippet (GPU on):
-
-```yaml
- name: "lstm_gpu"
-  description: "LSTM with GPU + mixed precision"
-  model_type: "lstm"
-  features: ["full_name"]
-  model_params:
-    embedding_dim: 128
-    lstm_units: 64
-    epochs: 5
-    batch_size: 128
-    use_gpu: true
-    mixed_precision: true
-  tags: ["gpu", "mixed_precision"]
-
- name: "xgboost_gpu"
-  description: "XGBoost with GPU"
-  model_type: "xgboost"
-  features: ["full_name"]
-  model_params:
-    n_estimators: 200
-    use_gpu: true
-```
-
-Notes:
- Install CUDA‑enabled binaries for TensorFlow/spaCy/LightGBM/XGBoost to actually use GPU.
- If GPU is requested but not available, training will proceed on CPU with a warning.
-
 ## Contributors

 <a href="https://github.com/bernard-ng/drc-ners-nlp/graphs/contributors" title="show all contributors">
@@ -0,0 +1,21 @@
+services:
+  app:
+    build:
+      context: .
+      dockerfile: Dockerfile
+    image: drc-ners:uv
+    working_dir: /app
+    tty: true
+    stdin_open: true
+    environment:
+      NERS_ENV: production
+      STREAMLIT_SERVER_ADDRESS: 0.0.0.0
+    # expose Streamlit for `ners web run`
+    ports:
+      - "8501:8501"
+    volumes:
+      - ./assets:/app/assets
+      - ./config:/app/config
+      - ./data:/app/data
+    # default command shows CLI help; override per run
+    command: ["ners", "--help"]
@@ -1,90 +0,0 @@
-#!.venv/bin/python3
-import argparse
-import sys
-import traceback
-from pathlib import Path
-
-from core.config import setup_config
-from processing.monitoring.pipeline_monitor import PipelineMonitor
-
-
-def main():
-    choices = [
-        "data_cleaning",
-        "data_selection",
-        "feature_extraction",
-        "ner_annotation",
-        "llm_annotation",
-        "data_splitting",
-    ]
-
-    parser = argparse.ArgumentParser(description="DRC NERS Processing Monitoring")
-    parser.add_argument("--config", type=Path, help="Path to configuration file")
-    parser.add_argument("--env", type=str, default="development", help="Environment")
-    subparsers = parser.add_subparsers(dest="command", help="Available commands")
-
-    # Clean command
-    clean_parser = subparsers.add_parser("clean", help="Clean checkpoint files")
-    clean_parser.add_argument("--step", type=str, choices=choices, help="default: all")
-    clean_parser.add_argument("--keep-last", type=int, default=1, help="(default: 1)")
-    clean_parser.add_argument("--force", action="store_true", help="Clean without confirmation")
-
-    # Reset command
-    reset_parser = subparsers.add_parser("reset", help="Reset pipeline step")
-    reset_parser.add_argument("--step", type=str, choices=choices, help="(default: all)")
-    reset_parser.add_argument("--all", action="store_true", help="Reset all steps")
-    reset_parser.add_argument("--force", action="store_true", help="Reset without confirmation")
-    args = parser.parse_args()
-
-    try:
-        setup_config(config_path=args.config, env=args.env)
-        monitor = PipelineMonitor()
-
-        if not args.command:
-            parser.print_help()
-            monitor.print_status(detailed=True)
-            return 1
-
-        elif args.command == "clean":
-            checkpoint_info = monitor.count_checkpoint_files()
-            print(f"Current checkpoint storage: {checkpoint_info['total_size_mb']:.1f} MB")
-
-            if not args.force:
-                response = input("Are you sure you want to clean checkpoints? (y/N): ")
-                if response.lower() != "y":
-                    print("Cancelled")
-                    return 0
-
-            if args.step:
-                monitor.clean_step_checkpoints(args.step, args.keep_last)
-            else:
-                for step in monitor.steps:
-                    monitor.clean_step_checkpoints(step, args.keep_last)
-
-            print("Checkpoint cleaning completed")
-
-        elif args.command == "reset":
-            if not args.force:
-                response = input(
-                    f"Are you sure you want to reset {args.step}? This will delete all checkpoints. (y/N): "
-                )
-                if response.lower() != "y":
-                    print("Cancelled")
-                    return 0
-
-            if args.step:
-                monitor.reset_step(args.step)
-            else:
-                for step in monitor.steps:
-                    monitor.reset_step(step)
-
-            print(f"Reset completed")
-
-    except Exception as e:
-        print(f"Monitoring failed: {e}")
-        traceback.print_exc()
-        return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
@@ -0,0 +1,41 @@
+[project]
+name = "ners"
+version = "0.1.0"
+description = "Add your description here"
+readme = "README.md"
+requires-python = ">=3.11"
+dependencies = [
+    "geopandas>=1.1.1",
+    "joblib>=1.5.2",
+    "lightgbm>=4.6.0",
+    "matplotlib>=3.10.6",
+    "numpy>=2.3.3",
+    "ollama>=0.6.0",
+    "pandas>=2.3.3",
+    "plotly>=6.3.1",
+    "psutil>=7.1.0",
+    "pydantic>=2.11.10",
+    "pyyaml>=6.0.3",
+    "scikit-learn>=1.7.2",
+    "seaborn>=0.13.2",
+    "spacy>=3.8.7",
+    "streamlit>=1.50.0",
+    "tqdm>=4.67.1",
+    "typer>=0.19.2",
+    "xgboost>=3.0.5",
+]
+
+[project.scripts]
+ners = "ners.cli:app"
+
+[build-system]
+requires = ["uv_build>=0.8.12,<0.9.0"]
+build-backend = "uv_build"
+
+[dependency-groups]
+dev = [
+    "ruff>=0.13.3",
+]
+
+[tool.uv]
+required-environments = ["sys_platform == 'linux' and platform_machine == 'x86_64'"]
@@ -1,170 +0,0 @@
-absl-py==2.3.0
-altair==5.1.2
-annotated-types==0.7.0
-anyio==4.9.0
-appnope==0.1.4
-argon2-cffi==25.1.0
-argon2-cffi-bindings==21.2.0
-arrow==1.3.0
-asttokens==3.0.0
-astunparse==1.6.3
-async-lru==2.0.5
-attrs==25.3.0
-babel==2.17.0
-beautifulsoup4==4.13.4
-black==25.1.0
-bleach==6.2.0
-blinker==1.9.0
-cachetools==6.1.0
-certifi==2025.6.15
-cffi==1.17.1
-charset-normalizer==3.4.2
-click==8.2.1
-comm==0.2.2
-contourpy==1.3.2
-cycler==0.12.1
-debugpy==1.8.14
-decorator==5.2.1
-defusedxml==0.7.1
-executing==2.2.0
-fastjsonschema==2.21.1
-flake8==7.3.0
-flatbuffers==25.2.10
-fonttools==4.58.4
-fqdn==1.5.1
-gast==0.6.0
-gitdb==4.0.12
-GitPython==3.1.45
-google-pasta==0.2.0
-grpcio==1.73.0
-h11==0.16.0
-h5py==3.14.0
-httpcore==1.0.9
-httpx==0.28.1
-idna==3.10
-imbalanced-learn==0.13.0
-ipykernel==6.29.5
-ipython>=8.0,<9.0
-ipython_pygments_lexers==1.1.1
-isoduration==20.11.0
-jedi==0.19.2
-Jinja2==3.1.6
-joblib==1.5.1
-json5==0.12.0
-jsonpointer==3.0.0
-jsonschema==4.24.0
-jsonschema-specifications==2025.4.1
-jupyter-events==0.12.0
-jupyter-lsp==2.2.5
-jupyter_client==8.6.3
-jupyter_core==5.8.1
-jupyter_server==2.16.0
-jupyter_server_terminals==0.5.3
-jupyterlab==4.4.4
-jupyterlab_pygments==0.3.0
-jupyterlab_server==2.27.3
-keras==3.10.0
-kiwisolver==1.4.8
-libclang==18.1.1
-lightgbm~=4.6.0
-Markdown==3.8.2
-markdown-it-py==3.0.0
-MarkupSafe==3.0.2
-matplotlib==3.10.3
-matplotlib-inline==0.1.7
-mccabe==0.7.0
-mdurl==0.1.2
-mistune==3.1.3
-ml-dtypes==0.3.2
-mypy==1.17.0
-mypy_extensions==1.1.0
-namex==0.1.0
-narwhals==2.0.1
-nbclient==0.10.2
-nbconvert==7.16.6
-nbformat==5.10.4
-nest-asyncio==1.6.0
-nltk==3.9.1
-notebook==7.4.4
-notebook_shim==0.2.4
-numpy==1.26.4
-ollama~=0.5.1
-opt_einsum==3.4.0
-optree==0.16.0
-overrides==7.7.0
-packaging==25.0
-pandas==2.3.0
-pandocfilters==1.5.1
-parso==0.8.4
-pathspec==0.12.1
-pexpect==4.9.0
-pillow==11.2.1
-platformdirs==4.3.8
-plotly~=6.2.0
-prometheus_client==0.22.1
-prompt_toolkit==3.0.51
-protobuf==4.25.8
-psutil==7.0.0
-ptyprocess==0.7.0
-pure_eval==0.2.3
-pyarrow==21.0.0
-pycodestyle==2.14.0
-pycparser==2.22
-pydantic~=2.11.7
-pydantic_core==2.33.2
-pydeck==0.9.1
-pyflakes==3.4.0
-Pygments==2.19.1
-pyparsing==3.2.3
-python-dateutil==2.9.0.post0
-python-json-logger==3.3.0
-pytz==2025.2
-PyYAML~=6.0.2
-pyzmq==27.0.0
-referencing==0.36.2
-regex==2024.11.6
-requests==2.32.4
-rfc3339-validator==0.1.4
-rfc3986-validator==0.1.1
-rich==14.0.0
-rpds-py==0.26.0
-scikit-learn~=1.6.1
-scipy==1.15.3
-seaborn==0.13.2
-Send2Trash==1.8.3
-six==1.17.0
-sklearn-compat==0.1.3
-smmap==5.0.2
-sniffio==1.3.1
-soupsieve==2.7
-spacy~=3.8.7
-stack-data==0.6.3
-streamlit~=1.47.1
-tenacity==9.1.2
-tensorboard==2.16.2
-tensorboard-data-server==0.7.2
-tensorflow==2.16.2
-tensorflow-io-gcs-filesystem==0.37.1
-termcolor==3.1.0
-terminado==0.18.1
-threadpoolctl==3.6.0
-tinycss2==1.4.0
-toml==0.10.2
-toolz==1.0.0
-tornado==6.5.1
-tqdm==4.67.1
-traitlets==5.14.3
-types-python-dateutil==2.9.0.20250516
-types-PyYAML==6.0.12.20250516
-typing-inspection==0.4.1
-typing_extensions==4.14.0
-tzdata==2025.2
-uri-template==1.3.0
-urllib3==2.5.0
-wcwidth==0.2.13
-webcolors==24.11.1
-webencodings==0.5.1
-websocket-client==1.8.0
-Werkzeug==3.1.3
-wrapt==1.17.2
-xgboost~=3.0.3
@@ -0,0 +1,3 @@
+"""DRC NERS NLP package."""
+
+__all__: list[str] = []
@@ -0,0 +1,226 @@
+from __future__ import annotations
+
+import os
+import subprocess
+import sys
+from pathlib import Path
+from typing import Optional
+
+import typer
+
+from ners.core.config import setup_config, PipelineConfig
+
+app = typer.Typer(help="DRC NERS command-line interface", no_args_is_help=True)
+
+
+# -------------------------
+# Pipeline commands
+# -------------------------
+pipeline_app = typer.Typer(help="Data processing pipeline")
+app.add_typer(pipeline_app, name="pipeline")
+
+
+@pipeline_app.command("run")
+def pipeline_run(
+    config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
+    env: str = typer.Option("development", help="Environment name"),
+) -> None:
+    """Run the full processing pipeline."""
+    from ners.main import run_pipeline as _run_pipeline
+
+    cfg = setup_config(config_path=config, env=env)
+    code = _run_pipeline(cfg)
+    raise typer.Exit(code)
+
+
+# -------------------------
+# NER commands
+# -------------------------
+ner_app = typer.Typer(help="NER dataset and model")
+app.add_typer(ner_app, name="ner")
+
+
+def _load_config(config: Optional[Path], env: str) -> PipelineConfig:
+    return setup_config(config_path=config, env=env)
+
+
+@ner_app.command("feature")
+def ner_feature(
+    config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
+    env: str = typer.Option("development", help="Environment name"),
+) -> None:
+    from ners.ner import feature as _feature
+
+    cfg = _load_config(config, env)
+    _feature(cfg)
+
+
+@ner_app.command("build")
+def ner_build(
+    config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
+    env: str = typer.Option("development", help="Environment name"),
+) -> None:
+    from ners.ner import build as _build
+
+    cfg = _load_config(config, env)
+    _build(cfg)
+
+
+@ner_app.command("train")
+def ner_train(
+    config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
+    env: str = typer.Option("development", help="Environment name"),
+) -> None:
+    from ners.ner import train as _train
+
+    cfg = _load_config(config, env)
+    _train(cfg)
+
+
+@ner_app.command("run")
+def ner_run(
+    config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
+    env: str = typer.Option("development", help="Environment name"),
+    reset: bool = typer.Option(
+        False, help="Reset intermediate outputs and rerun all steps"
+    ),
+) -> None:
+    from ners.ner import run_pipeline as _ner_pipeline
+
+    cfg = _load_config(config, env)
+    code = _ner_pipeline(cfg, reset)
+    raise typer.Exit(code)
+
+
+# -------------------------
+# Research commands
+# -------------------------
+research_app = typer.Typer(help="Research experiments and training")
+app.add_typer(research_app, name="research")
+
+
+@research_app.command("train")
+def research_train(
+    name: str = typer.Option(..., "--name", help="Model name to train"),
+    type: str = typer.Option(..., "--type", help="Experiment type"),
+    templates: str = typer.Option(
+        "research_templates.yaml", help="Templates file path"
+    ),
+    config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
+    env: str = typer.Option("development", help="Environment name"),
+) -> None:
+    from ners.research.experiment.experiment_builder import ExperimentBuilder
+    from ners.research.model_trainer import ModelTrainer
+
+    cfg = _load_config(config, env)
+    exp_builder = ExperimentBuilder(cfg)
+    tmpl = exp_builder.load_templates(templates)
+    exp_cfg = exp_builder.find_template(tmpl, name, type)
+
+    trainer = ModelTrainer(cfg)
+    trainer.train_single_model(
+        model_name=exp_cfg.get("name"),
+        model_type=exp_cfg.get("model_type"),
+        features=exp_cfg.get("features"),
+        model_params=exp_cfg.get("model_params", {}),
+        tags=exp_cfg.get("tags", []),
+    )
+
+
+# -------------------------
+# Monitor commands
+# -------------------------
+monitor_app = typer.Typer(help="Monitor pipeline checkpoints")
+app.add_typer(monitor_app, name="monitor")
+
+
+@monitor_app.command("status")
+def monitor_status(
+    config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
+    env: str = typer.Option("development", help="Environment name"),
+    detailed: bool = typer.Option(
+        False, help="Show detailed status (failed batch IDs)"
+    ),
+) -> None:
+    _ = _load_config(config, env)
+    from ners.processing.monitoring.pipeline_monitor import PipelineMonitor
+
+    PipelineMonitor().print_status(detailed=detailed)
+
+
+@monitor_app.command("clean")
+def monitor_clean(
+    step: Optional[str] = typer.Option(None, help="Step to clean; default all"),
+    keep_last: int = typer.Option(1, help="Number of latest checkpoint files to keep"),
+    force: bool = typer.Option(False, help="Do not ask for confirmation"),
+    config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
+    env: str = typer.Option("development", help="Environment name"),
+) -> None:
+    _ = _load_config(config, env)
+    from ners.processing.monitoring.pipeline_monitor import PipelineMonitor
+
+    mon = PipelineMonitor()
+    if not force:
+        typer.confirm("Clean checkpoints?", abort=True)
+
+    if step:
+        mon.clean_step_checkpoints(step, keep_last)
+    else:
+        for s in mon.steps:
+            mon.clean_step_checkpoints(s, keep_last)
+
+
+@monitor_app.command("reset")
+def monitor_reset(
+    step: Optional[str] = typer.Option(None, help="Step to reset; default all"),
+    force: bool = typer.Option(False, help="Do not ask for confirmation"),
+    config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
+    env: str = typer.Option("development", help="Environment name"),
+) -> None:
+    _ = _load_config(config, env)
+    from ners.processing.monitoring.pipeline_monitor import PipelineMonitor
+
+    mon = PipelineMonitor()
+    if not force:
+        msg = f"Reset {step or 'all steps'}? This deletes checkpoints."
+        typer.confirm(msg, abort=True)
+
+    if step:
+        mon.reset_step(step)
+    else:
+        for s in mon.steps:
+            mon.reset_step(s)
+
+
+# -------------------------
+# Web commands
+# -------------------------
+web_app = typer.Typer(help="Web UI wrapper")
+app.add_typer(web_app, name="web")
+
+
+@web_app.command("run")
+def web_run(
+    config: Optional[Path] = typer.Option(None, help="Path to configuration file"),
+    env: str = typer.Option("development", help="Environment name"),
+) -> None:
+    """Launch the Streamlit web app via subprocess."""
+    app_path = Path(__file__).parent / "web" / "app.py"
+    cmd = [
+        sys.executable,
+        "-m",
+        "streamlit",
+        "run",
+        str(app_path),
+    ]
+    # Pass configuration via environment variables to avoid argparse in Streamlit
+    env_vars = os.environ.copy()
+    if config is not None:
+        env_vars["NERS_CONFIG"] = str(config)
+    env_vars["NERS_ENV"] = env
+
+    raise typer.Exit(subprocess.call(cmd, env=env_vars))
+
+
+if __name__ == "__main__":  # pragma: no cover
+    app()
@@ -2,10 +2,10 @@ import logging
 from pathlib import Path
 from typing import Optional, Union

-from core.utils import ensure_directories
-from .config_manager import ConfigManager
-from .logging_config import LoggingConfig
-from .pipeline_config import PipelineConfig
+from ners.core.utils import ensure_directories
+from ners.core.config.config_manager import ConfigManager
+from ners.core.config.logging_config import LoggingConfig
+from ners.core.config.pipeline_config import PipelineConfig

 config_manager = ConfigManager()

@@ -22,7 +22,9 @@ def load_config(config_path: Optional[Union[str, Path]] = None) -> PipelineConfi
    return config_manager.get_config()


-def setup_config(config_path: Optional[Path] = None, env: str = "development") -> PipelineConfig:
+def setup_config(
+    config_path: Optional[Path] = None, env: str = "development"
+) -> PipelineConfig:
    """
    Unified configuration loading and logging setup for all entrypoint scripts.

@@ -5,8 +5,8 @@ from typing import Optional, Union, Dict, Any

 import yaml

-from core.config.pipeline_config import PipelineConfig
-from core.config.project_paths import ProjectPaths
+from ners.core.config.pipeline_config import PipelineConfig
+from ners.core.config.project_paths import ProjectPaths


 class ConfigManager:
@@ -36,7 +36,7 @@ class ConfigManager:

    def _setup_default_paths(self):
        """Setup default project paths"""
-        root_dir = Path(__file__).parent.parent.parent
+        root_dir = Path(__file__).parent.parent.parent.parent.parent
        self.default_paths = ProjectPaths(
            root_dir=root_dir,
            configs_dir=root_dir / "config",
@@ -53,7 +53,9 @@ class ConfigManager:
            self.config_path = config_path

        if not self.config_path.exists():
-            logging.warning(f"Config file not found: {self.config_path}. Using defaults.")
+            logging.warning(
+                f"Config file not found: {self.config_path}. Using defaults."
+            )
            return self._create_default_config()

        try:
@@ -122,7 +124,11 @@ class ConfigManager:
    def _deep_update(self, base_dict: Dict, update_dict: Dict):
        """Recursively update nested dictionaries"""
        for key, value in update_dict.items():
-            if key in base_dict and isinstance(base_dict[key], dict) and isinstance(value, dict):
+            if (
+                key in base_dict
+                and isinstance(base_dict[key], dict)
+                and isinstance(value, dict)
+            ):
                self._deep_update(base_dict[key], value)
            else:
                base_dict[key] = value
@@ -1,10 +1,10 @@
 from pydantic import BaseModel

-from core.config.annotation_config import AnnotationConfig
-from core.config.data_config import DataConfig
-from core.config.logging_config import LoggingConfig
-from core.config.processing_config import ProcessingConfig
-from core.config.project_paths import ProjectPaths
+from ners.core.config.annotation_config import AnnotationConfig
+from ners.core.config.data_config import DataConfig
+from ners.core.config.logging_config import LoggingConfig
+from ners.core.config.processing_config import ProcessingConfig
+from ners.core.config.project_paths import ProjectPaths


 class PipelineConfig(BaseModel):
@@ -10,6 +10,8 @@ class ProcessingConfig(BaseModel):
    max_workers: int = 4
    checkpoint_interval: int = 5
    use_multiprocessing: bool = False
-    encoding_options: list = field(default_factory=lambda: ["utf-8", "utf-16", "latin1"])
+    encoding_options: list = field(
+        default_factory=lambda: ["utf-8", "utf-16", "latin1"]
+    )
    chunk_size: int = 100_000
    epochs: int = 2
@@ -4,13 +4,13 @@ from pathlib import Path
 from typing import TYPE_CHECKING

 if TYPE_CHECKING:
-    from core.config import PipelineConfig
+    from ners.core.config import PipelineConfig


@contextmanager
 def temporary_config_override(**overrides):
    """Context manager for temporarily overriding configuration"""
-    from core.config import get_config
+    from ners.core.config import get_config

    config = get_config()
    original_values = {}
@@ -5,7 +5,7 @@ from typing import Optional, Union, Iterator, Dict

 import pandas as pd

-from core.config.pipeline_config import PipelineConfig
+from ners.core.config.pipeline_config import PipelineConfig

 OPTIMIZED_DTYPES = {
    # Numeric columns with appropriate bit-width
@@ -113,7 +113,9 @@ class DataLoader:
        sex_values = df["sex"].dropna().unique()

        if len(sex_values) == 0:
-            logging.warning(f"No valid values found in sex column 'sex', using random sampling")
+            logging.warning(
+                "No valid values found in sex column 'sex', using random sampling"
+            )
            return df.sample(n=max_size, random_state=self.config.data.random_seed)

        # Calculate samples per sex category
@@ -140,18 +142,22 @@ class DataLoader:
                logging.info(f"Sampled {current_samples} records for sex '{sex}'")

        if not balanced_samples:
-            logging.warning("No balanced samples could be created, using random sampling")
+            logging.warning(
+                "No balanced samples could be created, using random sampling"
+            )
            return df.sample(n=max_size, random_state=self.config.data.random_seed)

        # Create result using iloc with indices (no copying until final step)
        result = df.iloc[balanced_samples].copy()

        # Shuffle the final result
-        result = result.sample(frac=1, random_state=self.config.data.random_seed).reset_index(
-            drop=True
-        )
+        result = result.sample(
+            frac=1, random_state=self.config.data.random_seed
+        ).reset_index(drop=True)

-        logging.info(f"Created balanced dataset with {len(result)} records from {len(df)} total")
+        logging.info(
+            f"Created balanced dataset with {len(result)} records from {len(df)} total"
+        )
        return result

    @classmethod
@@ -1,4 +1,4 @@
-from core.config.pipeline_config import PipelineConfig
+from ners.core.config.pipeline_config import PipelineConfig


 class PromptManager:
@@ -2,7 +2,7 @@ import json
 import logging
 from typing import Dict, Any

-from core.config.pipeline_config import PipelineConfig
+from ners.core.config.pipeline_config import PipelineConfig


 class StateManager:
@@ -1,21 +1,17 @@
 #!.venv/bin/python3
-import argparse
 import logging
-import sys
-import traceback
-
-from core.config import setup_config
-from core.utils.data_loader import DataLoader
-from processing.batch.batch_config import BatchConfig
-from processing.pipeline import Pipeline
-from processing.steps.data_cleaning_step import DataCleaningStep
-from processing.steps.data_selection_step import DataSelectionStep
-from processing.steps.data_splitting_step import DataSplittingStep
-from processing.steps.feature_extraction_step import FeatureExtractionStep
+from ners.core.utils.data_loader import DataLoader
+from ners.processing.batch.batch_config import BatchConfig
+from ners.processing.pipeline import Pipeline
+from ners.processing.steps.data_cleaning_step import DataCleaningStep
+from ners.processing.steps.data_selection_step import DataSelectionStep
+from ners.processing.steps.data_splitting_step import DataSplittingStep
+from ners.processing.steps.llm_annotation_step import LLMAnnotationStep
+from ners.processing.steps.ner_annotation_step import NERAnnotationStep
+from ners.processing.steps.feature_extraction_step import FeatureExtractionStep


 def create_pipeline(config) -> Pipeline:
-    """Create pipeline from configuration"""
    batch_config = BatchConfig(
        batch_size=config.processing.batch_size,
        max_workers=config.processing.max_workers,
@@ -23,14 +19,13 @@ def create_pipeline(config) -> Pipeline:
        use_multiprocessing=config.processing.use_multiprocessing,
    )

-    # Add steps based on configuration
    pipeline = Pipeline(batch_config)
    steps = [
        DataCleaningStep(config),
        FeatureExtractionStep(config),
        DataSelectionStep(config),
-        # NERAnnotationStep(config),
-        # LLMAnnotationStep(config),
+        NERAnnotationStep(config),
+        LLMAnnotationStep(config),
    ]

    for stage in config.stages:
@@ -42,7 +37,6 @@ def create_pipeline(config) -> Pipeline:


 def run_pipeline(config) -> int:
-    """Run the complete pipeline"""
    try:
        logging.info(f"Starting pipeline: {config.name} v{config.version}")

@@ -79,27 +73,3 @@ def run_pipeline(config) -> int:
    except Exception as e:
        logging.error(f"Pipeline failed: {e}", exc_info=True)
        return 1
-
-
-def main():
-    """Main entry point with unified configuration loading"""
-    parser = argparse.ArgumentParser(
-        description="DRC NERS Processing Pipeline",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    parser.add_argument("--config", type=str, help="Path to configuration file")
-    parser.add_argument("--env", type=str, default="development", help="Environment name")
-    args = parser.parse_args()
-
-    try:
-        config = setup_config(config_path=args.config, env=args.env)
-        return run_pipeline(config)
-
-    except Exception as e:
-        print(f"Pipeline failed: {e}")
-        traceback.print_exc()
-        return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
@@ -0,0 +1,14 @@
+#!.venv/bin/python3
+from ners.processing.monitoring.pipeline_monitor import PipelineMonitor
+
+
+def status(*, detailed: bool = False) -> None:
+    PipelineMonitor().print_status(detailed=detailed)
+
+
+def clean_step(step: str, *, keep_last: int = 1) -> None:
+    PipelineMonitor().clean_step_checkpoints(step, keep_last)
+
+
+def reset_step(step: str) -> None:
+    PipelineMonitor().reset_step(step)
@@ -1,29 +1,24 @@
 #!/usr/bin/env python3
-import argparse
 import logging
 import os
-import sys
 import traceback
 from pathlib import Path

-from core.config import setup_config, PipelineConfig
-from processing.ner.name_builder import NameBuilder
-from processing.ner.name_engineering import NameEngineering
-from processing.ner.name_model import NameModel
+from ners.core.config import PipelineConfig
+from ners.processing.ner.name_builder import NameBuilder
+from ners.processing.ner.name_engineering import NameEngineering
+from ners.processing.ner.name_model import NameModel


 def feature(config: PipelineConfig):
-    """Apply feature engineering to create position-independent NER dataset."""
    NameEngineering(config).compute()


 def build(config: PipelineConfig):
-    """Build NER dataset using NERDataBuilder."""
    NameBuilder(config).build()


 def train(config: PipelineConfig):
-    """Train the NER model."""
    name_model = NameModel(config)

    data_path = Path(config.paths.data_dir) / config.data.output_files["ner_data"]
@@ -37,7 +32,9 @@ def train(config: PipelineConfig):
    split_idx = int(len(data) * 0.9)
    train_data, eval_data = data[:split_idx], data[split_idx:]

-    logging.info(f"Training with {len(train_data)} examples, evaluating on {len(eval_data)}")
+    logging.info(
+        f"Training with {len(train_data)} examples, evaluating on {len(eval_data)}"
+    )
    name_model.train(
        data=train_data,
        epochs=config.processing.epochs,
@@ -75,21 +72,9 @@ def run_pipeline(config: PipelineConfig, reset: bool = False):


 def main():
-    parser = argparse.ArgumentParser(description="NER model management for DRC names")
-    parser.add_argument("--config", type=str, help="Path to configuration file")
-    parser.add_argument("--env", type=str, default="development", help="Environment name")
-    parser.add_argument("--reset", action="store_true", help="Reset all steps")
-    args = parser.parse_args()
-
    try:
-        config = setup_config(config_path=args.config, env=args.env)
-        return run_pipeline(config, args.reset)
-
-    except Exception as e:
-        print(f"Pipeline failed: {e}")
+        logging.error("This module is no longer a CLI. Use 'ners ner ...' instead.")
+        return 1
+    except Exception:
        traceback.print_exc()
        return 1
-
-
-if __name__ == "__main__":
-    sys.exit(main())
@@ -8,4 +8,6 @@ class BatchConfig:
    batch_size: int = 1000
    max_workers: int = 4
    checkpoint_interval: int = 5  # Save checkpoint every N batches
-    use_multiprocessing: bool = False  # Use ProcessPoolExecutor instead of ThreadPoolExecutor
+    use_multiprocessing: bool = (
+        False  # Use ProcessPoolExecutor instead of ThreadPoolExecutor
+    )
@@ -4,9 +4,9 @@ from typing import Iterator

 import pandas as pd

-from processing.batch.batch_config import BatchConfig
-from processing.batch.memory_monitor import MemoryMonitor
-from processing.steps import PipelineStep
+from ners.processing.batch.batch_config import BatchConfig
+from ners.processing.batch.memory_monitor import MemoryMonitor
+from ners.processing.steps import PipelineStep


 class BatchProcessor:
@@ -33,7 +33,9 @@ class BatchProcessor:

        for batch_num, (batch, batch_id) in enumerate(self.create_batches(df)):
            if step.batch_exists(batch_id):
-                logging.info(f"Batch {batch_id} already processed, loading from checkpoint")
+                logging.info(
+                    f"Batch {batch_id} already processed, loading from checkpoint"
+                )
                processed_batch = step.load_batch(batch_id)
            else:
                try:
@@ -80,7 +82,9 @@ class BatchProcessor:
    def process_concurrent(self, step: PipelineStep, df: pd.DataFrame) -> pd.DataFrame:
        """Memory-optimized concurrent processing"""
        executor_class = (
-            ProcessPoolExecutor if self.config.use_multiprocessing else ThreadPoolExecutor
+            ProcessPoolExecutor
+            if self.config.use_multiprocessing
+            else ThreadPoolExecutor
        )
        results = {}

@@ -89,7 +93,9 @@ class BatchProcessor:
            future_to_batch = {}
            for batch, batch_id in self.create_batches(df):
                if step.batch_exists(batch_id):
-                    logging.info(f"Batch {batch_id} already processed, loading from checkpoint")
+                    logging.info(
+                        f"Batch {batch_id} already processed, loading from checkpoint"
+                    )
                    results[batch_id] = step.load_batch(batch_id)
                else:
                    # Only copy if necessary for concurrent processing
@@ -121,7 +127,9 @@ class BatchProcessor:
        del results
        self.memory_monitor.cleanup_memory()

-        result = self._safe_concat(ordered_results) if ordered_results else pd.DataFrame()
+        result = (
+            self._safe_concat(ordered_results) if ordered_results else pd.DataFrame()
+        )

        # Final cleanup
        del ordered_results
@@ -131,7 +139,9 @@ class BatchProcessor:

    def process(self, step: PipelineStep, df: pd.DataFrame) -> pd.DataFrame:
        """Process data using the configured strategy"""
-        step.state.total_batches = (len(df) + self.config.batch_size - 1) // self.config.batch_size
+        step.state.total_batches = (
+            len(df) + self.config.batch_size - 1
+        ) // self.config.batch_size
        step.load_state()

        logging.info(f"Starting {step.name} with {step.state.total_batches} batches")
@@ -4,8 +4,8 @@ import shutil
 from datetime import datetime
 from typing import Optional, Dict

-from core.config.config_manager import ConfigManager
-from core.config.project_paths import ProjectPaths
+from ners.core.config.config_manager import ConfigManager
+from ners.core.config.project_paths import ProjectPaths


 class PipelineMonitor:
@@ -97,7 +97,10 @@ class PipelineMonitor:

        avg_completion = total_completion / len(self.steps)

-        if avg_completion >= 100 and overall_status not in ["error", "completed_with_errors"]:
+        if avg_completion >= 100 and overall_status not in [
+            "error",
+            "completed_with_errors",
+        ]:
            overall_status = "completed"

        return {
@@ -121,7 +124,9 @@ class PipelineMonitor:
            print(f"{step_name.replace('_', ' ').title()}:")
            print(f"  Status: {step_status['status']}")
            print(f"  Progress: {step_status['completion_percentage']:.1f}%")
-            print(f"  Batches: {step_status['processed_batches']}/{step_status['total_batches']}")
+            print(
+                f"  Batches: {step_status['processed_batches']}/{step_status['total_batches']}"
+            )

            if step_status["failed_batches"] > 0:
                print(f"  Failed Batches: {step_status['failed_batches']}")
@@ -141,7 +146,10 @@ class PipelineMonitor:
            if step_dir.exists():
                csv_files = list(step_dir.glob("*.csv"))
                step_size = sum(f.stat().st_size for f in csv_files)
-                counts[step] = {"files": len(csv_files), "size_mb": step_size / (1024 * 1024)}
+                counts[step] = {
+                    "files": len(csv_files),
+                    "size_mb": step_size / (1024 * 1024),
+                }
                total_size += step_size
            else:
                counts[step] = {"files": 0, "size_mb": 0}
@@ -160,7 +168,9 @@ class PipelineMonitor:
        csv_files = sorted(step_dir.glob("batch_*.csv"))

        if len(csv_files) <= keep_last:
-            logging.info(f"Only {len(csv_files)} checkpoint files for {step_name}, keeping all")
+            logging.info(
+                f"Only {len(csv_files)} checkpoint files for {step_name}, keeping all"
+            )
            return

        files_to_delete = csv_files[:-keep_last] if keep_last > 0 else csv_files
@@ -3,7 +3,7 @@ from typing import List, Tuple, Dict

 import pandas as pd

-from processing.steps.feature_extraction_step import NameCategory
+from ners.processing.steps.feature_extraction_step import NameCategory


 class BaseNameFormatter(ABC):
@@ -12,7 +12,9 @@ class BaseNameFormatter(ABC):
    Contains common logic for NER tagging and attribute computation.
    """

-    def __init__(self, connectors: List[str] = None, additional_surnames: List[str] = None):
+    def __init__(
+        self, connectors: List[str] = None, additional_surnames: List[str] = None
+    ):
        self.connectors = connectors or ["wa", "ya", "ka", "ba"]
        self.additional_surnames = additional_surnames or [
            "jean",
@@ -46,7 +48,9 @@ class BaseNameFormatter(ABC):
            end_pos = current_pos + len(word)

            # Determine tag based on word content
-            if word in native_parts or any(connector in word for connector in self.connectors):
+            if word in native_parts or any(
+                connector in word for connector in self.connectors
+            ):
                tag = "NATIVE"
            elif word == surname or word in self.additional_surnames:
                tag = "SURNAME"
@@ -72,7 +76,9 @@ class BaseNameFormatter(ABC):
            "words": words_count,
            "length": length,
            "identified_category": (
-                NameCategory.SIMPLE.value if words_count == 3 else NameCategory.COMPOSE.value
+                NameCategory.SIMPLE.value
+                if words_count == 3
+                else NameCategory.COMPOSE.value
            ),
        }

@@ -3,7 +3,7 @@ from typing import Dict

 import pandas as pd

-from processing.ner.formats import BaseNameFormatter
+from ners.processing.ner.formats import BaseNameFormatter


 class ConnectorFormatter(BaseNameFormatter):
@@ -3,13 +3,15 @@ from typing import Dict

 import pandas as pd

-from processing.ner.formats import BaseNameFormatter
+from ners.processing.ner.formats import BaseNameFormatter


 class ExtendedSurnameFormatter(BaseNameFormatter):
    def transform(self, row: pd.Series) -> Dict:
        native_parts = self.parse_native_components(row["probable_native"])
-        original_surname = row["probable_surname"] if pd.notna(row["probable_surname"]) else ""
+        original_surname = (
+            row["probable_surname"] if pd.notna(row["probable_surname"]) else ""
+        )

        # Add random additional surname
        additional_surname = random.choice(self.additional_surnames)
@@ -22,7 +24,9 @@ class ExtendedSurnameFormatter(BaseNameFormatter):
            "identified_name": row["probable_native"],
            "probable_surname": combined_surname,
            "identified_surname": combined_surname,
-            "ner_entities": str(self.create_ner_tags(full_name, native_parts, combined_surname)),
+            "ner_entities": str(
+                self.create_ner_tags(full_name, native_parts, combined_surname)
+            ),
            "transformation_type": self.transformation_type,
            **self.compute_numeric_features(full_name),
        }
@@ -2,7 +2,7 @@ from typing import Dict

 import pandas as pd

-from processing.ner.formats import BaseNameFormatter
+from ners.processing.ner.formats import BaseNameFormatter


 class NativeOnlyFormatter(BaseNameFormatter):
@@ -2,7 +2,7 @@ from typing import Dict

 import pandas as pd

-from processing.ner.formats import BaseNameFormatter
+from ners.processing.ner.formats import BaseNameFormatter


 class OriginalFormatter(BaseNameFormatter):
@@ -2,7 +2,7 @@ from typing import Dict

 import pandas as pd

-from processing.ner.formats import BaseNameFormatter
+from ners.processing.ner.formats import BaseNameFormatter


 class PositionFlippedFormatter(BaseNameFormatter):
@@ -2,7 +2,7 @@ from typing import Dict

 import pandas as pd

-from processing.ner.formats import BaseNameFormatter
+from ners.processing.ner.formats import BaseNameFormatter


 class ReducedNativeFormatter(BaseNameFormatter):
@@ -11,7 +11,9 @@ class ReducedNativeFormatter(BaseNameFormatter):
        surname = row["probable_surname"] if pd.notna(row["probable_surname"]) else ""

        # Keep only first native component + surname
-        reduced_native = native_parts[0] if len(native_parts) > 1 else row["probable_native"]
+        reduced_native = (
+            native_parts[0] if len(native_parts) > 1 else row["probable_native"]
+        )
        full_name = f"{reduced_native} {surname}".strip()

        return {
@@ -20,7 +22,9 @@ class ReducedNativeFormatter(BaseNameFormatter):
            "identified_name": reduced_native,
            "probable_surname": surname,
            "identified_surname": surname,
-            "ner_entities": str(self.create_ner_tags(full_name, [reduced_native], surname)),
+            "ner_entities": str(
+                self.create_ner_tags(full_name, [reduced_native], surname)
+            ),
            "transformation_type": self.transformation_type,
            **self.compute_numeric_features(full_name),
        }
@@ -4,8 +4,8 @@ import logging
 import spacy
 from spacy.tokens import DocBin

-from core.config import PipelineConfig
-from core.utils.data_loader import DataLoader
+from ners.core.config import PipelineConfig
+from ners.core.utils.data_loader import DataLoader
 from .name_tagger import NameTagger


@@ -20,7 +20,9 @@ class NameBuilder:
        self.tagger = NameTagger()

    def build(self) -> int:
-        filepath = self.config.paths.get_data_path(self.config.data.output_files["engineered"])
+        filepath = self.config.paths.get_data_path(
+            self.config.data.output_files["engineered"]
+        )
        df = self.data_loader.load_csv_complete(filepath)
        df = df[["name", "ner_tagged", "ner_entities"]]

@@ -38,7 +40,9 @@ class NameBuilder:

        # Use NERNameTagger for parsing and validation
        parsed_entities = self.tagger.parse_entities(ner_df["ner_entities"])
-        validated_entities = self.tagger.validate_entities(ner_df["name"], parsed_entities)
+        validated_entities = self.tagger.validate_entities(
+            ner_df["name"], parsed_entities
+        )

        # Drop rows with no valid entities
        mask = validated_entities.map(bool)
@@ -51,22 +55,33 @@ class NameBuilder:

        # Prepare training data
        training_data = list(
-            zip(ner_df["name"].tolist(), [{"entities": ents} for ents in validated_entities])
+            zip(
+                ner_df["name"].tolist(),
+                [{"entities": ents} for ents in validated_entities],
+            )
        )

        # Use NERNameTagger to create spaCy DocBin
-        docs = self.tagger.create_docs(nlp, ner_df["name"].tolist(), validated_entities.tolist())
+        docs = self.tagger.create_docs(
+            nlp, ner_df["name"].tolist(), validated_entities.tolist()
+        )
        doc_bin = DocBin(docs=docs)

        # Save
-        json_path = self.config.paths.get_data_path(self.config.data.output_files["ner_data"])
-        spacy_path = self.config.paths.get_data_path(self.config.data.output_files["ner_spacy"])
+        json_path = self.config.paths.get_data_path(
+            self.config.data.output_files["ner_data"]
+        )
+        spacy_path = self.config.paths.get_data_path(
+            self.config.data.output_files["ner_spacy"]
+        )

        with open(json_path, "w", encoding="utf-8") as f:
            json.dump(training_data, f, ensure_ascii=False, separators=(",", ":"))
        doc_bin.to_disk(spacy_path)

-        logging.info(f"Processed: {len(training_data)}, Skipped: {total_rows - len(training_data)}")
+        logging.info(
+            f"Processed: {len(training_data)}, Skipped: {total_rows - len(training_data)}"
+        )
        logging.info(f"Saved NER JSON to {json_path}")
        logging.info(f"Saved NER spacy to {spacy_path}")
        return 0
@@ -6,14 +6,14 @@ import numpy as np
 import pandas as pd
 from tqdm import tqdm

-from core.config import PipelineConfig
-from core.utils.data_loader import DataLoader
-from processing.ner.formats.connectors_format import ConnectorFormatter
-from processing.ner.formats.extended_surname_format import ExtendedSurnameFormatter
-from processing.ner.formats.native_only_format import NativeOnlyFormatter
-from processing.ner.formats.original_format import OriginalFormatter
-from processing.ner.formats.position_flipped_format import PositionFlippedFormatter
-from processing.ner.formats.reduced_native_format import ReducedNativeFormatter
+from ners.core.config import PipelineConfig
+from ners.core.utils.data_loader import DataLoader
+from ners.processing.ner.formats.connectors_format import ConnectorFormatter
+from ners.processing.ner.formats.extended_surname_format import ExtendedSurnameFormatter
+from ners.processing.ner.formats.native_only_format import NativeOnlyFormatter
+from ners.processing.ner.formats.original_format import OriginalFormatter
+from ners.processing.ner.formats.position_flipped_format import PositionFlippedFormatter
+from ners.processing.ner.formats.reduced_native_format import ReducedNativeFormatter


 class NameEngineering:
@@ -44,42 +44,60 @@ class NameEngineering:
        # Initialize format classes
        self.formatters = {
            "original": OriginalFormatter(self.connectors, self.additional_surnames),
-            "native_only": NativeOnlyFormatter(self.connectors, self.additional_surnames),
-            "position_flipped": PositionFlippedFormatter(self.connectors, self.additional_surnames),
-            "reduced_native": ReducedNativeFormatter(self.connectors, self.additional_surnames),
-            "connector_added": ConnectorFormatter(self.connectors, self.additional_surnames),
-            "extended_surname": ExtendedSurnameFormatter(self.connectors, self.additional_surnames),
+            "native_only": NativeOnlyFormatter(
+                self.connectors, self.additional_surnames
+            ),
+            "position_flipped": PositionFlippedFormatter(
+                self.connectors, self.additional_surnames
+            ),
+            "reduced_native": ReducedNativeFormatter(
+                self.connectors, self.additional_surnames
+            ),
+            "connector_added": ConnectorFormatter(
+                self.connectors, self.additional_surnames
+            ),
+            "extended_surname": ExtendedSurnameFormatter(
+                self.connectors, self.additional_surnames
+            ),
        }

    def load_data(self) -> pd.DataFrame:
        """Load and filter NER-tagged data from CSV file"""

-        filepath = self.config.paths.get_data_path(self.config.data.output_files["featured"])
+        filepath = self.config.paths.get_data_path(
+            self.config.data.output_files["featured"]
+        )
        df = self.data_loader.load_csv_complete(filepath)

        # Filter only NER-tagged rows
        ner_data = df[df["ner_tagged"] == 1].copy()
-        logging.info(f"Loaded {len(ner_data)} NER-tagged records from {len(df)} total records")
+        logging.info(
+            f"Loaded {len(ner_data)} NER-tagged records from {len(df)} total records"
+        )

        return ner_data

    def compute(self) -> None:
        logging.info("Applying feature engineering transformations...")
-        input_filepath = self.config.paths.get_data_path(self.config.data.output_files["featured"])
+        input_filepath = self.config.paths.get_data_path(
+            self.config.data.output_files["featured"]
+        )
        output_filepath = self.config.paths.get_data_path(
            self.config.data.output_files["engineered"]
        )

        df = self.data_loader.load_csv_complete(input_filepath)
        ner_df = df[df["ner_tagged"] == 1].copy()
-        logging.info(f"Loaded {len(ner_df)} NER-tagged records from {len(df)} total records")
+        logging.info(
+            f"Loaded {len(ner_df)} NER-tagged records from {len(df)} total records"
+        )

        del df  # No need to keep in memory
        gc.collect()

-        ner_df = ner_df.sample(frac=1, random_state=self.config.data.random_seed).reset_index(
-            drop=True
-        )
+        ner_df = ner_df.sample(
+            frac=1, random_state=self.config.data.random_seed
+        ).reset_index(drop=True)
        total_rows = len(ner_df)

        # Calculate split points
@@ -94,7 +112,11 @@ class NameEngineering:
            (0, split_25_1, "original"),  # First 25%: original format
            (split_25_1, split_25_2, "native_only"),  # Second 25%: remove surname
            (split_25_2, split_25_3, "position_flipped"),  # Third 25%: flip positions
-            (split_25_3, split_10_1, "reduced_native"),  # Fourth 10%: reduce native components
+            (
+                split_25_3,
+                split_10_1,
+                "reduced_native",
+            ),  # Fourth 10%: reduce native components
            (split_10_1, split_10_2, "connector_added"),  # Fifth 10%: add connectors
            (split_10_2, total_rows, "extended_surname"),  # Last 5%: extend surnames
        ]
@@ -11,7 +11,7 @@ from spacy.training import Example
 from spacy.util import minibatch
 from tqdm import tqdm

-from core.config.pipeline_config import PipelineConfig
+from ners.core.config.pipeline_config import PipelineConfig


 class NameModel:
@@ -87,7 +87,9 @@ class NameModel:

                # Handle different annotation formats from NERNameTagger
                if not isinstance(annotations, dict) or "entities" not in annotations:
-                    logging.warning(f"Skipping invalid annotations at index {i}: {annotations}")
+                    logging.warning(
+                        f"Skipping invalid annotations at index {i}: {annotations}"
+                    )
                    skipped_count += 1
                    continue

@@ -124,7 +126,9 @@ class NameModel:
                valid_entities = []
                for entity in entities:
                    if not isinstance(entity, (list, tuple)) or len(entity) != 3:
-                        logging.warning(f"Skipping invalid entity format in '{text}': {entity}")
+                        logging.warning(
+                            f"Skipping invalid entity format in '{text}': {entity}"
+                        )
                        continue

                    start, end, label = entity
@@ -138,21 +142,30 @@ class NameModel:
                        or start < 0
                        or end > len(text)
                    ):
-                        logging.warning(f"Skipping invalid entity bounds in '{text}': {entity}")
+                        logging.warning(
+                            f"Skipping invalid entity bounds in '{text}': {entity}"
+                        )
                        continue

                    # Check for overlaps with already validated entities
                    has_overlap = any(
-                        start < v_end and end > v_start for v_start, v_end, _ in valid_entities
+                        start < v_end and end > v_start
+                        for v_start, v_end, _ in valid_entities
                    )

                    if has_overlap:
-                        logging.warning(f"Skipping overlapping entity in '{text}': {entity}")
+                        logging.warning(
+                            f"Skipping overlapping entity in '{text}': {entity}"
+                        )
                        continue

                    # Validate that the span doesn't contain spaces (matching tagger validation)
                    span_text = text[start:end]
-                    if not span_text or span_text != span_text.strip() or " " in span_text:
+                    if (
+                        not span_text
+                        or span_text != span_text.strip()
+                        or " " in span_text
+                    ):
                        logging.warning(
                            f"Skipping entity with spaces in '{text}': {entity} -> '{span_text}'"
                        )
@@ -161,7 +174,9 @@ class NameModel:
                    valid_entities.append((start, end, label))

                if not valid_entities:
-                    logging.warning(f"Skipping training example with no valid entities: '{text}'")
+                    logging.warning(
+                        f"Skipping training example with no valid entities: '{text}'"
+                    )
                    skipped_count += 1
                    continue

@@ -219,7 +234,9 @@ class NameModel:
            batches = minibatch(examples, size=batch_size)
            for batch in batches:
                batch_losses = {}
-                self.nlp.update(batch, losses=batch_losses, drop=dropout_rate, sgd=optimizer)
+                self.nlp.update(
+                    batch, losses=batch_losses, drop=dropout_rate, sgd=optimizer
+                )
                logging.info(
                    f"Training batch with {len(batch)} examples, current losses: {batch_losses}"
                )
@@ -242,7 +259,9 @@ class NameModel:
            "dropout_rate": dropout_rate,
        }

-        logging.info(f"Training completed. Final loss: {self.training_stats['final_loss']:.4f}")
+        logging.info(
+            f"Training completed. Final loss: {self.training_stats['final_loss']:.4f}"
+        )

    def evaluate(self, test_data: List[Tuple[str, Dict]]) -> Dict[str, Any]:
        """Evaluate the trained model on test data"""
@@ -291,10 +310,14 @@ class NameModel:
                    entity_stats[label]["fp"] += 1

        # Calculate overall metrics
-        precision = correct_entities / predicted_entities if predicted_entities > 0 else 0
+        precision = (
+            correct_entities / predicted_entities if predicted_entities > 0 else 0
+        )
        recall = correct_entities / actual_entities if actual_entities > 0 else 0
        f1_score = (
-            2 * (precision * recall) / (precision + recall) if (precision + recall) > 0 else 0
+            2 * (precision * recall) / (precision + recall)
+            if (precision + recall) > 0
+            else 0
        )

        # Calculate per-label metrics
@@ -304,7 +327,11 @@ class NameModel:
            label_precision = tp / (tp + fp) if (tp + fp) > 0 else 0
            label_recall = tp / (tp + fn) if (tp + fn) > 0 else 0
            label_f1 = (
-                (2 * (label_precision * label_recall) / (label_precision + label_recall))
+                (
+                    2
+                    * (label_precision * label_recall)
+                    / (label_precision + label_recall)
+                )
                if (label_precision + label_recall) > 0
                else 0
            )
@@ -394,7 +421,9 @@ class NameModel:
                    "label": ent.label_,
                    "start": ent.start_char,
                    "end": ent.end_char,
-                    "confidence": getattr(ent, "score", None),  # If confidence scores are available
+                    "confidence": getattr(
+                        ent, "score", None
+                    ),  # If confidence scores are available
                }
            )

@@ -48,7 +48,9 @@ class NameTagger:
            # Find the first occurrence of this native word that doesn't overlap
            start_pos = 0
            while True:
-                pos = name_lower.find(native_word_lower, start_pos)  # Case-insensitive search
+                pos = name_lower.find(
+                    native_word_lower, start_pos
+                )  # Case-insensitive search
                if pos == -1:
                    break

@@ -78,7 +80,9 @@ class NameTagger:
            # Find the first occurrence that doesn't overlap
            start_pos = 0
            while True:
-                pos = name_lower.find(surname_lower, start_pos)  # Case-insensitive search
+                pos = name_lower.find(
+                    surname_lower, start_pos
+                )  # Case-insensitive search
                if pos == -1:
                    break

@@ -120,8 +124,13 @@ class NameTagger:
                continue

            # Check for overlaps with already validated entities
-            if any(start < v_end and end > v_start for v_start, v_end, _ in validated_entities):
-                logging.warning(f"Overlapping span ({start}, {end}, '{label}') in '{name}'")
+            if any(
+                start < v_end and end > v_start
+                for v_start, v_end, _ in validated_entities
+            ):
+                logging.warning(
+                    f"Overlapping span ({start}, {end}, '{label}') in '{name}'"
+                )
                continue

            # CRITICAL VALIDATION: Check that the span contains only the expected word (no spaces)
@@ -200,10 +209,16 @@ class NameTagger:
            elif entities_str.startswith("[[") and entities_str.endswith("]]"):
                return [tuple(e) for e in ast.literal_eval(entities_str)]
            elif entities_str.startswith("[{") and entities_str.endswith("}]"):
-                return [(e["start"], e["end"], e["label"]) for e in json.loads(entities_str)]
+                return [
+                    (e["start"], e["end"], e["label"]) for e in json.loads(entities_str)
+                ]
            else:
                parsed = ast.literal_eval(entities_str)
-                return [tuple(e) for e in parsed if isinstance(e, (list, tuple)) and len(e) == 3]
+                return [
+                    tuple(e)
+                    for e in parsed
+                    if isinstance(e, (list, tuple)) and len(e) == 3
+                ]
        except (ValueError, SyntaxError, json.JSONDecodeError):
            return []

@@ -251,7 +266,9 @@ class NameTagger:
                last_end = e
        return filtered

-    def validate_entities(self, texts: pd.Series, entities_series: pd.Series) -> pd.Series:
+    def validate_entities(
+        self, texts: pd.Series, entities_series: pd.Series
+    ) -> pd.Series:
        """Vectorized entity validation."""
        return pd.Series(map(self.validate, texts, entities_series), index=texts.index)

@@ -4,9 +4,9 @@ from typing import Dict, Any

 import pandas as pd

-from processing.batch.batch_config import BatchConfig
-from processing.batch.batch_processor import BatchProcessor
-from processing.steps import PipelineStep
+from ners.processing.batch.batch_config import BatchConfig
+from ners.processing.batch.batch_processor import BatchProcessor
+from ners.processing.steps import PipelineStep


 class Pipeline:
@@ -8,9 +8,9 @@ from typing import List, Optional
 import pandas as pd
 from pydantic import BaseModel

-from core.config.pipeline_config import PipelineConfig
-from core.utils.data_loader import DataLoader
-from processing.batch.batch_config import BatchConfig
+from ners.core.config.pipeline_config import PipelineConfig
+from ners.core.utils.data_loader import DataLoader
+from ners.processing.batch.batch_config import BatchConfig


@dataclass
@@ -38,7 +38,10 @@ class PipelineStep(ABC):
    """Abstract base class for pipeline steps"""

    def __init__(
-        self, name: str, pipeline_config: PipelineConfig, batch_config: Optional[BatchConfig] = None
+        self,
+        name: str,
+        pipeline_config: PipelineConfig,
+        batch_config: Optional[BatchConfig] = None,
    ):
        self.name = name
        self.pipeline_config = pipeline_config
@@ -2,9 +2,9 @@ import logging

 import pandas as pd

-from core.config.pipeline_config import PipelineConfig
-from core.utils.text_cleaner import TextCleaner
-from processing.steps import PipelineStep
+from ners.core.config.pipeline_config import PipelineConfig
+from ners.core.utils.text_cleaner import TextCleaner
+from ners.processing.steps import PipelineStep


 class DataCleaningStep(PipelineStep):
@@ -2,8 +2,8 @@ import logging

 import pandas as pd

-from core.config.pipeline_config import PipelineConfig
-from processing.steps import PipelineStep
+from ners.core.config.pipeline_config import PipelineConfig
+from ners.processing.steps import PipelineStep


 class DataSelectionStep(PipelineStep):
@@ -31,8 +31,12 @@ class DataSelectionStep(PipelineStep):
                )

        # Check which columns exist in the batch
-        available_columns = [col for col in self.selected_columns if col in batch.columns]
-        missing_columns = [col for col in self.selected_columns if col not in batch.columns]
+        available_columns = [
+            col for col in self.selected_columns if col in batch.columns
+        ]
+        missing_columns = [
+            col for col in self.selected_columns if col not in batch.columns
+        ]

        if missing_columns:
            logging.warning(f"Missing columns in batch {batch_id}: {missing_columns}")
@@ -1,11 +1,11 @@
 import numpy as np
 import pandas as pd

-from core.config.pipeline_config import PipelineConfig
-from core.utils.region_mapper import RegionMapper
-from processing.batch.batch_config import BatchConfig
-from processing.steps import PipelineStep
-from processing.steps.feature_extraction_step import Gender
+from ners.core.config.pipeline_config import PipelineConfig
+from ners.core.utils.region_mapper import RegionMapper
+from ners.processing.batch.batch_config import BatchConfig
+from ners.processing.steps import PipelineStep
+from ners.processing.steps.feature_extraction_step import Gender


 class DataSplittingStep(PipelineStep):
@@ -26,7 +26,9 @@ class DataSplittingStep(PipelineStep):
        if self.eval_indices is None:
            np.random.seed(self.pipeline_config.data.random_seed)
            eval_size = int(total_size * self.pipeline_config.data.evaluation_fraction)
-            self.eval_indices = set(np.random.choice(total_size, size=eval_size, replace=False))
+            self.eval_indices = set(
+                np.random.choice(total_size, size=eval_size, replace=False)
+            )
        return self.eval_indices

    def process_batch(self, batch: pd.DataFrame, batch_id: int) -> pd.DataFrame:
@@ -45,7 +47,9 @@ class DataSplittingStep(PipelineStep):
            df_evaluation = df[eval_mask]
            df_featured = df[~eval_mask]

-            self.data_loader.save_csv(df_evaluation, data_dir / output_files["evaluation"])
+            self.data_loader.save_csv(
+                df_evaluation, data_dir / output_files["evaluation"]
+            )
            self.data_loader.save_csv(df_featured, data_dir / output_files["featured"])
        else:
            self.data_loader.save_csv(df, data_dir / output_files["featured"])
@@ -53,7 +57,9 @@ class DataSplittingStep(PipelineStep):
        if self.pipeline_config.data.split_by_province:
            for province in RegionMapper.get_provinces():
                df_region = df[df.province == province]
-                self.data_loader.save_csv(df_region, data_dir / "provinces" / f"{province}.csv")
+                self.data_loader.save_csv(
+                    df_region, data_dir / "provinces" / f"{province}.csv"
+                )

        if self.pipeline_config.data.split_by_gender:
            df_males = df[df.sex == Gender.MALE.value]
@@ -5,10 +5,10 @@ from typing import Dict, Any

 import pandas as pd

-from core.config.pipeline_config import PipelineConfig
-from core.utils.region_mapper import RegionMapper
-from processing.ner.name_tagger import NameTagger
-from processing.steps import PipelineStep
+from ners.core.config.pipeline_config import PipelineConfig
+from ners.core.utils.region_mapper import RegionMapper
+from ners.processing.ner.name_tagger import NameTagger
+from ners.processing.steps import PipelineStep


 class Gender(Enum):
@@ -64,10 +64,14 @@ class FeatureExtractionStep(PipelineStep):

        self._assign_probable_names(result)
        self._process_simple_names(result)
-        result["identified_category"] = self._assign_identified_category(result["words"])
+        result["identified_category"] = self._assign_identified_category(
+            result["words"]
+        )

        if "year" in result.columns:
-            result["year"] = pd.to_numeric(result["year"], errors="coerce").astype("Int16")
+            result["year"] = pd.to_numeric(result["year"], errors="coerce").astype(
+                "Int16"
+            )

        if "region" in result.columns:
            result["province"] = self.region_mapper.map(result["region"]).str.lower()
@@ -7,12 +7,12 @@ import ollama
 import pandas as pd
 from pydantic import ValidationError

-from core.config.pipeline_config import PipelineConfig
-from core.utils.prompt_manager import PromptManager
-from core.utils.rate_limiter import RateLimitConfig
-from core.utils.rate_limiter import RateLimiter
-from processing.batch.batch_config import BatchConfig
-from processing.steps import PipelineStep, NameAnnotation
+from ners.core.config.pipeline_config import PipelineConfig
+from ners.core.utils.prompt_manager import PromptManager
+from ners.core.utils.rate_limiter import RateLimitConfig
+from ners.core.utils.rate_limiter import RateLimiter
+from ners.processing.batch.batch_config import BatchConfig
+from ners.processing.steps import PipelineStep, NameAnnotation


 class LLMAnnotationStep(PipelineStep):
@@ -24,7 +24,8 @@ class LLMAnnotationStep(PipelineStep):
        batch_config = BatchConfig(
            batch_size=pipeline_config.processing.batch_size,
            max_workers=min(
-                self.llm_config.max_concurrent_requests, pipeline_config.processing.max_workers
+                self.llm_config.max_concurrent_requests,
+                pipeline_config.processing.max_workers,
            ),
            checkpoint_interval=pipeline_config.processing.checkpoint_interval,
            use_multiprocessing=pipeline_config.processing.use_multiprocessing,
@@ -33,7 +34,9 @@ class LLMAnnotationStep(PipelineStep):

        self.prompt = PromptManager(pipeline_config).load_prompt()
        self.rate_limiter = (
-            self._create_rate_limiter() if self.llm_config.enable_rate_limiting else None
+            self._create_rate_limiter()
+            if self.llm_config.enable_rate_limiting
+            else None
        )

        # Statistics
@@ -76,7 +79,9 @@ class LLMAnnotationStep(PipelineStep):
                        f"Request took {elapsed_time:.2f}s, exceeding {self.llm_config.timeout_seconds}s timeout"
                    )

-                annotation = NameAnnotation.model_validate_json(response.message.content)
+                annotation = NameAnnotation.model_validate_json(
+                    response.message.content
+                )
                result = {
                    **annotation.model_dump(),
                    "annotated": 1,
@@ -119,7 +124,9 @@ class LLMAnnotationStep(PipelineStep):
            logging.info(f"Batch {batch_id}: No entries to annotate")
            return batch

-        logging.info(f"Batch {batch_id}: Annotating {len(unannotated_entries)} entries with LLM")
+        logging.info(
+            f"Batch {batch_id}: Annotating {len(unannotated_entries)} entries with LLM"
+        )

        batch = batch.copy()
        client = ollama.Client()
@@ -5,9 +5,9 @@ from typing import Dict

 import pandas as pd

-from core.config.pipeline_config import PipelineConfig
-from processing.ner.name_model import NameModel
-from processing.steps import PipelineStep, NameAnnotation
+from ners.core.config.pipeline_config import PipelineConfig
+from ners.processing.ner.name_model import NameModel
+from ners.processing.steps import PipelineStep, NameAnnotation


 class NERAnnotationStep(PipelineStep):
@@ -39,7 +39,9 @@ class NERAnnotationStep(PipelineStep):
                logging.info("NER model loaded successfully")
            else:
                logging.warning(f"NER model not found at {self.model_path}")
-                logging.warning("NER annotation will be skipped. Train the model first.")
+                logging.warning(
+                    "NER annotation will be skipped. Train the model first."
+                )
                self.name_model.nlp = None
        except Exception as e:
            logging.error(f"Failed to load NER model: {e}")
@@ -80,7 +82,9 @@ class NERAnnotationStep(PipelineStep):
                # Create annotation result in same format as LLM step
                annotation = NameAnnotation(
                    identified_name=" ".join(native_parts) if native_parts else None,
-                    identified_surname=" ".join(surname_parts) if surname_parts else None,
+                    identified_surname=" ".join(surname_parts)
+                    if surname_parts
+                    else None,
                )

                result = {
@@ -124,7 +128,9 @@ class NERAnnotationStep(PipelineStep):
            logging.info(f"Batch {batch_id}: No entries to annotate")
            return batch

-        logging.info(f"Batch {batch_id}: Annotating {len(unannotated_entries)} entries with NER")
+        logging.info(
+            f"Batch {batch_id}: Annotating {len(unannotated_entries)} entries with NER"
+        )

        batch = batch.copy()

@@ -7,7 +7,7 @@ import matplotlib.pyplot as plt
 import numpy as np
 import pandas as pd

-from research.experiment import ExperimentConfig
+from ners.research.experiment import ExperimentConfig


 class BaseModel(ABC):
@@ -103,16 +103,25 @@ class BaseModel(ABC):
            feature_names = self._get_feature_names()
            return dict(zip(feature_names, coefficients))

-        elif hasattr(self.model, "named_steps") and "classifier" in self.model.named_steps:
+        elif (
+            hasattr(self.model, "named_steps")
+            and "classifier" in self.model.named_steps
+        ):
            # For sklearn pipelines (like LogisticRegression with vectorizer)
            classifier = self.model.named_steps["classifier"]
            if hasattr(classifier, "coef_"):
                coefficients = np.abs(classifier.coef_[0])
-                if hasattr(self.model.named_steps["vectorizer"], "get_feature_names_out"):
-                    feature_names = self.model.named_steps["vectorizer"].get_feature_names_out()
+                if hasattr(
+                    self.model.named_steps["vectorizer"], "get_feature_names_out"
+                ):
+                    feature_names = self.model.named_steps[
+                        "vectorizer"
+                    ].get_feature_names_out()
                    # Take top features to avoid too many n-grams
                    top_indices = np.argsort(coefficients)[-20:]
-                    return dict(zip(feature_names[top_indices], coefficients[top_indices]))
+                    return dict(
+                        zip(feature_names[top_indices], coefficients[top_indices])
+                    )

        return None

@@ -143,7 +152,7 @@ class BaseModel(ABC):
        model_data = joblib.load(path)

        # Recreate the model instance
-        from research.experiment import ExperimentConfig
+        from ners.research.experiment import ExperimentConfig

        config = ExperimentConfig.from_dict(model_data["config"])
        instance = cls(config)
@@ -221,7 +230,9 @@ class BaseModel(ABC):
        if "accuracy" in self.training_history:
            axes[0].plot(self.training_history["accuracy"], label="Training Accuracy")
            if "val_accuracy" in self.training_history:
-                axes[0].plot(self.training_history["val_accuracy"], label="Validation Accuracy")
+                axes[0].plot(
+                    self.training_history["val_accuracy"], label="Validation Accuracy"
+                )
            axes[0].set_title("Model Accuracy")
            axes[0].set_xlabel("Epoch")
            axes[0].set_ylabel("Accuracy")
@@ -18,7 +18,9 @@ class ExperimentConfig:
    tags: List[str] = field(default_factory=list)

    # Model configuration
-    model_type: str = "logistic_regression"  # logistic_regression, lstm, transformer, etc.
+    model_type: str = (
+        "logistic_regression"  # logistic_regression, lstm, transformer, etc.
+    )
    model_params: Dict[str, Any] = field(default_factory=dict)

    # Feature configuration
@@ -26,7 +28,9 @@ class ExperimentConfig:
    feature_params: Dict[str, Any] = field(default_factory=dict)

    # Data configuration
-    train_data_filter: Optional[Dict[str, Any]] = None  # Filter criteria for training data
+    train_data_filter: Optional[Dict[str, Any]] = (
+        None  # Filter criteria for training data
+    )
    test_data_filter: Optional[Dict[str, Any]] = None
    target_column: str = "sex"

@@ -36,7 +40,9 @@ class ExperimentConfig:
    cross_validation_folds: int = 5

    # Evaluation configuration
-    metrics: List[str] = field(default_factory=lambda: ["accuracy", "precision", "recall", "f1"])
+    metrics: List[str] = field(
+        default_factory=lambda: ["accuracy", "precision", "recall", "f1"]
+    )

    def to_dict(self) -> Dict[str, Any]:
        """Convert to dictionary for serialization"""
@@ -2,7 +2,7 @@ from dataclasses import dataclass, field, asdict
 from datetime import datetime
 from typing import Optional, Dict, List, Any

-from research.experiment import ExperimentConfig, ExperimentStatus
+from ners.research.experiment import ExperimentConfig, ExperimentStatus


@dataclass
@@ -51,6 +51,8 @@ class ExperimentResult:
        """Create from dictionary"""
        data["config"] = ExperimentConfig.from_dict(data["config"])
        data["start_time"] = datetime.fromisoformat(data["start_time"])
-        data["end_time"] = datetime.fromisoformat(data["end_time"]) if data["end_time"] else None
+        data["end_time"] = (
+            datetime.fromisoformat(data["end_time"]) if data["end_time"] else None
+        )
        data["status"] = ExperimentStatus(data["status"])
        return cls(**data)
@@ -3,9 +3,9 @@ from typing import List, Dict

 import yaml

-from core.config.pipeline_config import PipelineConfig
-from research.experiment import ExperimentConfig
-from research.experiment.feature_extractor import FeatureType
+from ners.core.config.pipeline_config import PipelineConfig
+from ners.research.experiment import ExperimentConfig
+from ners.research.experiment.feature_extractor import FeatureType


 class ExperimentBuilder:
@@ -27,7 +27,9 @@ class ExperimentBuilder:
            raise

    @classmethod
-    def find_template(cls, templates: dict, name: str, experiment_type: str = "baseline") -> dict:
+    def find_template(
+        cls, templates: dict, name: str, experiment_type: str = "baseline"
+    ) -> dict:
        """Find experiment configuration by name and type"""

        # Map type to section in templates
@@ -9,12 +9,16 @@ import pandas as pd
 from sklearn.metrics import confusion_matrix
 from sklearn.model_selection import train_test_split

-from core.config import PipelineConfig
-from core.utils.data_loader import DataLoader
-from research.base_model import BaseModel
-from research.experiment import ExperimentConfig, ExperimentStatus, calculate_metrics
-from research.experiment.experiment_tracker import ExperimentTracker
-from research.model_registry import create_model
+from ners.core.config import PipelineConfig
+from ners.core.utils.data_loader import DataLoader
+from ners.research.base_model import BaseModel
+from ners.research.experiment import (
+    ExperimentConfig,
+    ExperimentStatus,
+    calculate_metrics,
+)
+from ners.research.experiment.experiment_tracker import ExperimentTracker
+from ners.research.model_registry import create_model


 class ExperimentRunner:
@@ -32,10 +36,14 @@ class ExperimentRunner:

        try:
            logging.info(f"Starting experiment: {experiment_id}")
-            self.tracker.update_experiment(experiment_id, status=ExperimentStatus.RUNNING)
+            self.tracker.update_experiment(
+                experiment_id, status=ExperimentStatus.RUNNING
+            )

            # Load data
-            filepath = self.config.paths.get_data_path(self.config.data.output_files["featured"])
+            filepath = self.config.paths.get_data_path(
+                self.config.data.output_files["featured"]
+            )
            df = self.data_loader.load_csv_complete(filepath)

            # Apply data filters if specified
@@ -63,8 +71,12 @@ class ExperimentRunner:
            test_pred = model.predict(X_test)

            # Calculate metrics
-            train_metrics = calculate_metrics(y_train, train_pred, experiment_config.metrics)
-            test_metrics = calculate_metrics(y_test, test_pred, experiment_config.metrics)
+            train_metrics = calculate_metrics(
+                y_train, train_pred, experiment_config.metrics
+            )
+            test_metrics = calculate_metrics(
+                y_test, test_pred, experiment_config.metrics
+            )

            # Cross-validation if requested
            cv_metrics = {}
@@ -125,7 +137,9 @@ class ExperimentRunner:
        experiment_ids = []

        for i, config in enumerate(experiments):
-            logging.info(f"Running experiment {i + 1}/{len(experiments)}: {config.name}")
+            logging.info(
+                f"Running experiment {i + 1}/{len(experiments)}: {config.name}"
+            )
            try:
                exp_id = self.run_experiment(config)
                experiment_ids.append(exp_id)
@@ -136,7 +150,9 @@ class ExperimentRunner:
        return experiment_ids

    @classmethod
-    def _apply_data_filters(cls, df: pd.DataFrame, config: ExperimentConfig) -> pd.DataFrame:
+    def _apply_data_filters(
+        cls, df: pd.DataFrame, config: ExperimentConfig
+    ) -> pd.DataFrame:
        """Apply data filters specified in experiment config"""
        filtered_df = df.copy()

@@ -148,9 +164,13 @@ class ExperimentRunner:
                        filtered_df = filtered_df[filtered_df[column].isin(criteria)]
                    elif isinstance(criteria, dict):
                        if "min" in criteria:
-                            filtered_df = filtered_df[filtered_df[column] >= criteria["min"]]
+                            filtered_df = filtered_df[
+                                filtered_df[column] >= criteria["min"]
+                            ]
                        if "max" in criteria:
-                            filtered_df = filtered_df[filtered_df[column] <= criteria["max"]]
+                            filtered_df = filtered_df[
+                                filtered_df[column] <= criteria["max"]
+                            ]
                    else:
                        filtered_df = filtered_df[filtered_df[column] == criteria]

@@ -231,7 +251,9 @@ class ExperimentRunner:
                return model

            except Exception as e:
-                logging.error(f"Failed to load model for experiment {experiment_id}: {e}")
+                logging.error(
+                    f"Failed to load model for experiment {experiment_id}: {e}"
+                )
                return None

        return None
@@ -6,9 +6,9 @@ from typing import Optional, Dict, List

 import pandas as pd

-from core.config import PipelineConfig, get_config
-from research.experiment import ExperimentConfig, ExperimentStatus
-from research.experiment.experiement_result import ExperimentResult
+from ners.core.config import PipelineConfig, get_config
+from ners.research.experiment import ExperimentConfig, ExperimentStatus
+from ners.research.experiment.experiement_result import ExperimentResult


 class ExperimentTracker:
@@ -97,7 +97,10 @@ class ExperimentTracker:
        return sorted(results, key=lambda x: x.start_time, reverse=True)

    def get_best_experiment(
-        self, metric: str = "accuracy", dataset: str = "test", filters: Optional[Dict] = None
+        self,
+        metric: str = "accuracy",
+        dataset: str = "test",
+        filters: Optional[Dict] = None,
    ) -> Optional[ExperimentResult]:
        """Get the best experiment based on a metric"""
        experiments = self.list_experiments()
@@ -106,7 +109,9 @@ class ExperimentTracker:
            # Apply additional filters
            if "model_type" in filters:
                experiments = [
-                    e for e in experiments if e.config.model_type == filters["model_type"]
+                    e
+                    for e in experiments
+                    if e.config.model_type == filters["model_type"]
                ]
            if "features" in filters:
                experiments = [
@@ -118,7 +123,9 @@ class ExperimentTracker:
        valid_experiments = []
        for exp in experiments:
            if exp.status == ExperimentStatus.COMPLETED:
-                metrics_dict = exp.test_metrics if dataset == "test" else exp.train_metrics
+                metrics_dict = (
+                    exp.test_metrics if dataset == "test" else exp.train_metrics
+                )
                if metric in metrics_dict:
                    valid_experiments.append((exp, metrics_dict[metric]))

@@ -24,7 +24,9 @@ class FeatureType(Enum):
 class FeatureExtractor:
    """Extract different types of features from name data"""

-    def __init__(self, feature_types: List[FeatureType], feature_params: Dict[str, Any] = None):
+    def __init__(
+        self, feature_types: List[FeatureType], feature_params: Dict[str, Any] = None
+    ):
        self.feature_types = feature_types
        self.feature_params = feature_params or {}

@@ -1,18 +1,18 @@
 from typing import List

-from research.base_model import BaseModel
-from research.experiment import ExperimentConfig
-from research.models.bigru_model import BiGRUModel
-from research.models.cnn_model import CNNModel
-from research.models.ensemble_model import EnsembleModel
-from research.models.lightgbm_model import LightGBMModel
-from research.models.logistic_regression_model import LogisticRegressionModel
-from research.models.lstm_model import LSTMModel
-from research.models.naive_bayes_model import NaiveBayesModel
-from research.models.random_forest_model import RandomForestModel
-from research.models.svm_model import SVMModel
-from research.models.transformer_model import TransformerModel
-from research.models.xgboost_model import XGBoostModel
+from ners.research.base_model import BaseModel
+from ners.research.experiment import ExperimentConfig
+from ners.research.models.bigru_model import BiGRUModel
+from ners.research.models.cnn_model import CNNModel
+from ners.research.models.ensemble_model import EnsembleModel
+from ners.research.models.lightgbm_model import LightGBMModel
+from ners.research.models.logistic_regression_model import LogisticRegressionModel
+from ners.research.models.lstm_model import LSTMModel
+from ners.research.models.naive_bayes_model import NaiveBayesModel
+from ners.research.models.random_forest_model import RandomForestModel
+from ners.research.models.svm_model import SVMModel
+from ners.research.models.transformer_model import TransformerModel
+from ners.research.models.xgboost_model import XGBoostModel

 MODEL_REGISTRY = {
    "bigru": BiGRUModel,
@@ -5,12 +5,12 @@ from typing import List, Dict, Any

 import pandas as pd

-from core.config import get_config
-from core.utils.data_loader import DataLoader
-from research.experiment import FeatureType, ExperimentConfig
-from research.experiment.experiment_runner import ExperimentRunner
-from research.experiment.experiment_tracker import ExperimentTracker
-from research.model_registry import MODEL_REGISTRY
+from ners.core.config import get_config
+from ners.core.utils.data_loader import DataLoader
+from ners.research.experiment import FeatureType, ExperimentConfig
+from ners.research.experiment.experiment_runner import ExperimentRunner
+from ners.research.experiment.experiment_tracker import ExperimentTracker
+from ners.research.model_registry import MODEL_REGISTRY


 class ModelTrainer:
@@ -66,7 +66,9 @@ class ModelTrainer:
        if experiment and experiment.test_metrics:
            logging.info("Training completed successfully!")
            logging.info(f"Experiment ID: {experiment_id}")
-            logging.info(f"Test Accuracy: {experiment.test_metrics.get('accuracy', 0):.4f}")
+            logging.info(
+                f"Test Accuracy: {experiment.test_metrics.get('accuracy', 0):.4f}"
+            )
            logging.info(f"Test F1-Score: {experiment.test_metrics.get('f1', 0):.4f}")

            if save_artifacts:
@@ -144,13 +146,17 @@ class ModelTrainer:

        try:
            # Load data for learning curve generation
-            data_path = self.config.paths.get_data_path(self.config.data.output_files["featured"])
+            data_path = self.config.paths.get_data_path(
+                self.config.data.output_files["featured"]
+            )
            if data_path.exists():
                df = self.data_loader.load_csv_complete(data_path)

                # Generate learning curve
                logging.info("Generating learning curve...")
-                trained_model.generate_learning_curve(df, df[experiment.config.target_column])
+                trained_model.generate_learning_curve(
+                    df, df[experiment.config.target_column]
+                )

                # Plot and save learning curve
                learning_curve_path = model_dir / "learning_curve.png"
@@ -187,8 +193,12 @@ class ModelTrainer:
            "model_path": str(model_path),
            "config_path": str(config_path),
            "results_path": str(results_path),
-            "learning_curve_plot": str(learning_curve_path) if learning_curve_path else None,
-            "training_history_plot": str(training_history_path) if training_history_path else None,
+            "learning_curve_plot": str(learning_curve_path)
+            if learning_curve_path
+            else None,
+            "training_history_plot": str(training_history_path)
+            if training_history_path
+            else None,
            "has_learning_curve": bool(trained_model.learning_curve_data),
            "has_training_history": bool(trained_model.training_history),
        }
@@ -215,8 +225,12 @@ class ModelTrainer:
            "config_path": str(config_path),
            "results_path": str(results_path),
            "metadata_path": str(metadata_path),
-            "learning_curve_plot": str(learning_curve_path) if learning_curve_path else None,
-            "training_history_plot": str(training_history_path) if training_history_path else None,
+            "learning_curve_plot": str(learning_curve_path)
+            if learning_curve_path
+            else None,
+            "training_history_plot": str(training_history_path)
+            if training_history_path
+            else None,
        }

    def load_trained_model(self, experiment_id: str):
@@ -227,7 +241,9 @@ class ModelTrainer:
        model_path = model_dir / "complete_model.joblib"

        if not model_path.exists():
-            raise FileNotFoundError(f"Model artifacts not found for experiment {experiment_id}")
+            raise FileNotFoundError(
+                f"Model artifacts not found for experiment {experiment_id}"
+            )

        # Load the model class dynamically
        metadata_path = model_dir / "metadata.json"
@@ -261,7 +277,9 @@ class ModelTrainer:
                            metadata = json.load(f)
                        models_data.append(metadata)
                    except Exception as e:
-                        logging.warning(f"Could not read metadata for {model_dir.name}: {e}")
+                        logging.warning(
+                            f"Could not read metadata for {model_dir.name}: {e}"
+                        )

        if not models_data:
            logging.info("No saved models found.")
@@ -7,7 +7,7 @@ from tensorflow.keras.models import Sequential
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 from tensorflow.keras.preprocessing.text import Tokenizer

-from research.neural_network_model import NeuralNetworkModel
+from ners.research.neural_network_model import NeuralNetworkModel


 class BiGRUModel(NeuralNetworkModel):
@@ -53,7 +53,9 @@ class BiGRUModel(NeuralNetworkModel):
        )

        model.compile(
-            loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
+            loss="sparse_categorical_crossentropy",
+            optimizer="adam",
+            metrics=["accuracy"],
        )
        return model

@@ -15,7 +15,7 @@ from tensorflow.keras.models import Sequential
 from tensorflow.keras.preprocessing.text import Tokenizer
 from tensorflow.keras.preprocessing.sequence import pad_sequences

-from research.neural_network_model import NeuralNetworkModel
+from ners.research.neural_network_model import NeuralNetworkModel


 class CNNModel(NeuralNetworkModel):
@@ -29,7 +29,9 @@ class CNNModel(NeuralNetworkModel):
            [
                # Learn char/subword embeddings; spatial dropout regularizes across channels
                # to make the model robust to noisy characters and transliteration.
-                Embedding(input_dim=vocab_size, output_dim=params.get("embedding_dim", 64)),
+                Embedding(
+                    input_dim=vocab_size, output_dim=params.get("embedding_dim", 64)
+                ),
                SpatialDropout1D(rate=params.get("embedding_dropout", 0.1)),
                # Small kernels capture short n-gram like patterns; padding='same' keeps
                # sequence length stable for simpler pooling behavior.
@@ -59,7 +61,9 @@ class CNNModel(NeuralNetworkModel):
        )

        model.compile(
-            loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
+            loss="sparse_categorical_crossentropy",
+            optimizer="adam",
+            metrics=["accuracy"],
        )
        return model

@@ -75,6 +79,8 @@ class CNNModel(NeuralNetworkModel):
            self.tokenizer.fit_on_texts(text_data)

        sequences = self.tokenizer.texts_to_sequences(text_data)
-        max_len = self.config.model_params.get("max_len", 20)  # Longer for character level
+        max_len = self.config.model_params.get(
+            "max_len", 20
+        )  # Longer for character level

        return pad_sequences(sequences, maxlen=max_len, padding="post")
@@ -8,8 +8,8 @@ from sklearn.linear_model import LogisticRegression
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.pipeline import Pipeline

-from research.experiment import ExperimentConfig
-from research.traditional_model import TraditionalModel
+from ners.research.experiment import ExperimentConfig
+from ners.research.traditional_model import TraditionalModel


 class EnsembleModel(TraditionalModel):
@@ -40,22 +40,28 @@ class EnsembleModel(TraditionalModel):
                    [
                        (
                            "vectorizer",
-                            CountVectorizer(analyzer="char", ngram_range=(2, 4), max_features=5000),
+                            CountVectorizer(
+                                analyzer="char", ngram_range=(2, 4), max_features=5000
+                            ),
                        ),
                        (
                            "classifier",
-                            LogisticRegression(max_iter=1000, random_state=self.config.random_seed),
+                            LogisticRegression(
+                                max_iter=1000, random_state=self.config.random_seed
+                            ),
                        ),
                    ]
                )
-                estimators.append((f"logistic_regression", model))
+                estimators.append(("logistic_regression", model))

            elif model_type == "random_forest":
                model = Pipeline(
                    [
                        (
                            "vectorizer",
-                            TfidfVectorizer(analyzer="char", ngram_range=(2, 3), max_features=3000),
+                            TfidfVectorizer(
+                                analyzer="char", ngram_range=(2, 3), max_features=3000
+                            ),
                        ),
                        (
                            "classifier",
@@ -65,19 +71,21 @@ class EnsembleModel(TraditionalModel):
                        ),
                    ]
                )
-                estimators.append((f"rf", model))
+                estimators.append(("rf", model))

            elif model_type == "naive_bayes":
                model = Pipeline(
                    [
                        (
                            "vectorizer",
-                            CountVectorizer(analyzer="char", ngram_range=(1, 3), max_features=4000),
+                            CountVectorizer(
+                                analyzer="char", ngram_range=(1, 3), max_features=4000
+                            ),
                        ),
                        ("classifier", MultinomialNB()),
                    ]
                )
-                estimators.append((f"nb", model))
+                estimators.append(("nb", model))

        # Soft voting averages probabilities (preferred when members are calibrated);
        # hard voting uses majority class. Parallelize member predictions.
@@ -5,7 +5,7 @@ from sklearn.base import BaseEstimator
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.preprocessing import LabelEncoder

-from research.traditional_model import TraditionalModel
+from ners.research.traditional_model import TraditionalModel


 class LightGBMModel(TraditionalModel):
@@ -106,7 +106,9 @@ class LightGBMModel(TraditionalModel):
                            lambda x: x if x in known_classes else default_class
                        )

-                        encoded = self.label_encoders[feature_key].transform(column_mapped)
+                        encoded = self.label_encoders[feature_key].transform(
+                            column_mapped
+                        )

                    features.append(encoded.reshape(-1, 1))

@@ -5,7 +5,7 @@ from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.linear_model import LogisticRegression
 from sklearn.pipeline import Pipeline

-from research.traditional_model import TraditionalModel
+from ners.research.traditional_model import TraditionalModel


 class LogisticRegressionModel(TraditionalModel):
@@ -7,7 +7,7 @@ from tensorflow.keras.models import Sequential
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 from tensorflow.keras.preprocessing.text import Tokenizer

-from research.neural_network_model import NeuralNetworkModel
+from ners.research.neural_network_model import NeuralNetworkModel


 class LSTMModel(NeuralNetworkModel):
@@ -50,7 +50,9 @@ class LSTMModel(NeuralNetworkModel):
        )

        model.compile(
-            loss="sparse_categorical_crossentropy", optimizer="adam", metrics=["accuracy"]
+            loss="sparse_categorical_crossentropy",
+            optimizer="adam",
+            metrics=["accuracy"],
        )
        return model

@@ -5,7 +5,7 @@ from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.naive_bayes import MultinomialNB
 from sklearn.pipeline import Pipeline

-from research.traditional_model import TraditionalModel
+from ners.research.traditional_model import TraditionalModel


 class NaiveBayesModel(TraditionalModel):
@@ -6,7 +6,7 @@ from sklearn.base import BaseEstimator
 from sklearn.ensemble import RandomForestClassifier
 from sklearn.preprocessing import LabelEncoder

-from research.traditional_model import TraditionalModel
+from ners.research.traditional_model import TraditionalModel


 class RandomForestModel(TraditionalModel):
@@ -18,7 +18,6 @@ class RandomForestModel(TraditionalModel):
        self.label_encoders: Dict[str, LabelEncoder] = {}

    def build_model(self) -> BaseEstimator:
-
        params = self.config.model_params

        # Tree ensemble is robust to mixed numeric/categorical encodings; parallelize
@@ -56,10 +55,14 @@ class RandomForestModel(TraditionalModel):
                        column_clean = column.fillna("unknown").astype(str)
                        known_classes = set(encoder.classes_)
                        default_class = (
-                            "unknown" if "unknown" in known_classes else encoder.classes_[0]
+                            "unknown"
+                            if "unknown" in known_classes
+                            else encoder.classes_[0]
                        )
                        column_mapped = column_clean.apply(
-                            lambda value: value if value in known_classes else default_class
+                            lambda value: value
+                            if value in known_classes
+                            else default_class
                        )
                        encoded = encoder.transform(column_mapped)

@@ -5,7 +5,7 @@ from sklearn.feature_extraction.text import TfidfVectorizer
 from sklearn.pipeline import Pipeline
 from sklearn.svm import SVC

-from research.traditional_model import TraditionalModel
+from ners.research.traditional_model import TraditionalModel


 class SVMModel(TraditionalModel):
@@ -16,7 +16,7 @@ from tensorflow.keras.models import Model
 from tensorflow.keras.preprocessing.sequence import pad_sequences
 from tensorflow.keras.preprocessing.text import Tokenizer

-from research.neural_network_model import NeuralNetworkModel
+from ners.research.neural_network_model import NeuralNetworkModel


 class TransformerModel(NeuralNetworkModel):
@@ -37,7 +37,8 @@ class TransformerModel(NeuralNetworkModel):
        # Add positional encoding
        positions = tf.range(start=0, limit=params.get("max_len", 8), delta=1)
        pos_embedding = Embedding(
-            input_dim=params.get("max_len", 8), output_dim=params.get("embedding_dim", 64)
+            input_dim=params.get("max_len", 8),
+            output_dim=params.get("embedding_dim", 64),
        )(positions)
        x = x + pos_embedding

@@ -49,7 +50,9 @@ class TransformerModel(NeuralNetworkModel):

        model = Model(inputs, outputs)
        model.compile(
-            optimizer="adam", loss="sparse_categorical_crossentropy", metrics=["accuracy"]
+            optimizer="adam",
+            loss="sparse_categorical_crossentropy",
+            metrics=["accuracy"],
        )
        return model

@@ -62,11 +65,15 @@ class TransformerModel(NeuralNetworkModel):
            key_dim=cfg_params.get("transformer_head_size", 64),
            dropout=cfg_params.get("attn_dropout", 0.1),
        )(x, x)
-        x = LayerNormalization(epsilon=1e-6)(x + Dropout(cfg_params.get("dropout", 0.1))(attn))
+        x = LayerNormalization(epsilon=1e-6)(
+            x + Dropout(cfg_params.get("dropout", 0.1))(attn)
+        )

        ff = Dense(cfg_params.get("transformer_ff_dim", 128), activation="relu")(x)
        ff = Dense(x.shape[-1])(ff)
-        return LayerNormalization(epsilon=1e-6)(x + Dropout(cfg_params.get("dropout", 0.1))(ff))
+        return LayerNormalization(epsilon=1e-6)(
+            x + Dropout(cfg_params.get("dropout", 0.1))(ff)
+        )

    def prepare_features(self, X: pd.DataFrame) -> np.ndarray:
        text_data = self._collect_text_corpus(X)
@@ -5,7 +5,7 @@ from sklearn.base import BaseEstimator
 from sklearn.feature_extraction.text import CountVectorizer
 from sklearn.preprocessing import LabelEncoder

-from research.traditional_model import TraditionalModel
+from ners.research.traditional_model import TraditionalModel


 class XGBoostModel(TraditionalModel):
@@ -106,7 +106,9 @@ class XGBoostModel(TraditionalModel):
                            lambda x: x if x in known_classes else default_class
                        )

-                        encoded = self.label_encoders[feature_key].transform(column_mapped)
+                        encoded = self.label_encoders[feature_key].transform(
+                            column_mapped
+                        )

                    features.append(encoded.reshape(-1, 1))

@@ -10,8 +10,10 @@ from sklearn.model_selection import StratifiedKFold
 from sklearn.model_selection import train_test_split
 from sklearn.preprocessing import LabelEncoder

-from research.base_model import BaseModel
-from research.experiment.feature_extractor import FeatureExtractor
+import tensorflow as tf
+
+from ners.research.base_model import BaseModel
+from ners.research.experiment.feature_extractor import FeatureExtractor


 class NeuralNetworkModel(BaseModel):
@@ -34,8 +36,6 @@ class NeuralNetworkModel(BaseModel):
        # - Enables memory growth to avoid pre-allocating all VRAM
        # - Optionally enables mixed precision if requested via model params
        try:
-            import tensorflow as tf  # Imported lazily to avoid dependency for non-NN runs
-
            requested_gpu = bool(self.config.model_params.get("use_gpu", False))
            enable_mixed = bool(self.config.model_params.get("mixed_precision", False))

@@ -49,15 +49,15 @@ class NeuralNetworkModel(BaseModel):

                if enable_mixed:
                    try:
-                        from tensorflow.keras import mixed_precision
-
-                        mixed_precision.set_global_policy("mixed_float16")
+                        tf.keras.mixed_precision.set_global_policy("mixed_float16")
                        logging.info("Enabled TensorFlow mixed precision (float16)")
                    except Exception as e:
                        logging.warning(f"Could not enable mixed precision: {e}")
            else:
                if requested_gpu:
-                    logging.warning("Requested GPU but no TensorFlow GPU device is available.")
+                    logging.warning(
+                        "Requested GPU but no TensorFlow GPU device is available."
+                    )
        except Exception as e:
            # Keep silent in non-TF environments / non-NN workflows
            logging.debug(f"TensorFlow GPU setup skipped: {e}")
@@ -86,7 +86,9 @@ class NeuralNetworkModel(BaseModel):
        logging.info(f"Vocabulary size: {vocab_size}")

        # Get additional model parameters
-        self.model = self.build_model_with_vocab(vocab_size=vocab_size, **self.config.model_params)
+        self.model = self.build_model_with_vocab(
+            vocab_size=vocab_size, **self.config.model_params
+        )

        # Train the neural network
        logging.info(
@@ -143,7 +145,7 @@ class NeuralNetworkModel(BaseModel):
                # Keep zeros (padding) untouched; clamp negatives and > max_idx to OOV
                invalid_mask = (arr < 0) | (arr > max_idx)
                # Avoid turning zeros into OOV
-                invalid_mask &= (arr != 0)
+                invalid_mask &= arr != 0
                if invalid_mask.any():
                    arr[invalid_mask] = oov_index

@@ -157,10 +159,14 @@ class NeuralNetworkModel(BaseModel):
        """Combine configured textual features into one string per record."""

        column_names = [
-            feature.value for feature in self.config.features if feature.value in X.columns
+            feature.value
+            for feature in self.config.features
+            if feature.value in X.columns
        ]
        if not column_names:
-            raise ValueError("No configured text features found in the provided DataFrame.")
+            raise ValueError(
+                "No configured text features found in the provided DataFrame."
+            )

        text_frame = X[column_names].fillna("").astype(str)

@@ -193,9 +199,7 @@ class NeuralNetworkModel(BaseModel):
                        pass
                if enable_mixed:
                    try:
-                        from tensorflow.keras import mixed_precision
-
-                        mixed_precision.set_global_policy("mixed_float16")
+                        tf.keras.mixed_precision.set_global_policy("mixed_float16")
                    except Exception:
                        pass
            else:
@@ -208,7 +212,9 @@ class NeuralNetworkModel(BaseModel):
        X_prepared = self._sanitize_sequences(X_prepared)
        y_encoded = self.label_encoder.transform(y)

-        cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=self.config.random_seed)
+        cv = StratifiedKFold(
+            n_splits=cv_folds, shuffle=True, random_state=self.config.random_seed
+        )

        accuracies = []
        precisions = []
@@ -280,14 +286,14 @@ class NeuralNetworkModel(BaseModel):
                        pass
                if enable_mixed:
                    try:
-                        from tensorflow.keras import mixed_precision
-
-                        mixed_precision.set_global_policy("mixed_float16")
+                        tf.keras.mixed_precision.set_global_policy("mixed_float16")
                    except Exception:
                        pass
            else:
                if requested_gpu:
-                    logging.warning("Requested GPU for learning curve but none is available.")
+                    logging.warning(
+                        "Requested GPU for learning curve but none is available."
+                    )
        except Exception:
            pass

@@ -342,7 +348,7 @@ class NeuralNetworkModel(BaseModel):

                # Train model
                if hasattr(model, "fit"):
-                    history = model.fit(
+                    model.fit(
                        X_train_subset,
                        y_train_subset,
                        epochs=self.config.model_params.get("epochs", 10),
@@ -3,12 +3,16 @@ import pandas as pd
 import matplotlib.pyplot as plt
 import seaborn as sns

-from research.statistics.utils import LETTERS, build_letter_frequencies
+from ners.research.statistics.utils import LETTERS, build_letter_frequencies


 def plot_transition_matrix(ax, df_probs, title=""):
    hm = sns.heatmap(
-        df_probs.loc[list(LETTERS), list(LETTERS)], cmap="Reds", annot=False, cbar=False, ax=ax
+        df_probs.loc[list(LETTERS), list(LETTERS)],
+        cmap="Reds",
+        annot=False,
+        cbar=False,
+        ax=ax,
    )
    ax.set_title(title, fontsize=12)
    return hm
@@ -31,8 +35,12 @@ def plot_letter_frequencies(males, females, sort_values=False, title=None):
    x = np.arange(len(df_plot))
    w = 0.4
    fig, ax = plt.subplots(figsize=(16, 6))
-    ax.bar(x - w / 2, df_plot["Male"], width=w, label="Male", color="steelblue", alpha=0.8)
-    ax.bar(x + w / 2, df_plot["Female"], width=w, label="Female", color="salmon", alpha=0.8)
+    ax.bar(
+        x - w / 2, df_plot["Male"], width=w, label="Male", color="steelblue", alpha=0.8
+    )
+    ax.bar(
+        x + w / 2, df_plot["Female"], width=w, label="Female", color="salmon", alpha=0.8
+    )

    ax.set_xticks(x)
    ax.set_xticklabels(df_plot["letter"])
@@ -5,8 +5,6 @@ import numpy as np
 import pandas as pd
 from scipy.spatial.distance import euclidean
 from scipy.stats import entropy
-from scipy.spatial.distance import euclidean
-from scipy.stats import entropy
 from typing import Dict, Any

 LETTERS = "abcdefghijklmnopqrstuvwxyz"
@@ -49,7 +47,12 @@ def build_words_token(df: pd.DataFrame, source: str, target: str) -> pd.DataFram

 def build_letter_frequencies(series: pd.Series) -> pd.DataFrame:
    # Normalize: lowercase, remove non-letters, concatenate all into one string
-    s = series.astype(str).str.lower().str.replace(r"[^a-z]", "", regex=True).str.cat(sep="")
+    s = (
+        series.astype(str)
+        .str.lower()
+        .str.replace(r"[^a-z]", "", regex=True)
+        .str.cat(sep="")
+    )

    # Convert string into Series of characters
    chars = pd.Series(list(s))
@@ -150,8 +153,12 @@ def build_transition_comparisons(
    kl_names_mf = entropy(prepared_names["m"] + 1e-12, prepared_names["f"] + 1e-12)
    kl_names_fm = entropy(prepared_names["f"] + 1e-12, prepared_names["m"] + 1e-12)

-    kl_surnames_mf = entropy(prepared_surnames["m"] + 1e-12, prepared_surnames["f"] + 1e-12)
-    kl_surnames_fm = entropy(prepared_surnames["f"] + 1e-12, prepared_surnames["m"] + 1e-12)
+    kl_surnames_mf = entropy(
+        prepared_surnames["m"] + 1e-12, prepared_surnames["f"] + 1e-12
+    )
+    kl_surnames_fm = entropy(
+        prepared_surnames["f"] + 1e-12, prepared_surnames["m"] + 1e-12
+    )

    jsd_names = 0.5 * (kl_names_mf + kl_names_fm)
    jsd_surnames = 0.5 * (kl_surnames_mf + kl_surnames_fm)
@@ -163,7 +170,9 @@ def build_transition_comparisons(
        P_f = transitions["f"]["probs"].flatten()

        # Calculate the observed JSD (our test statistic)
-        observed_jsd = 0.5 * (entropy(P_m + 1e-12, P_f + 1e-12) + entropy(P_f + 1e-12, P_m + 1e-12))
+        observed_jsd = 0.5 * (
+            entropy(P_m + 1e-12, P_f + 1e-12) + entropy(P_f + 1e-12, P_m + 1e-12)
+        )

        # Concatenate male and female counts
        counts_m = transitions["m"]["counts"]
@@ -194,10 +203,12 @@ def build_transition_comparisons(

            permuted_jsd = 0.5 * (
                entropy(
-                    permuted_probs_m.mean(axis=1) + 1e-12, permuted_probs_f.mean(axis=1) + 1e-12
+                    permuted_probs_m.mean(axis=1) + 1e-12,
+                    permuted_probs_f.mean(axis=1) + 1e-12,
                )
                + entropy(
-                    permuted_probs_f.mean(axis=1) + 1e-12, permuted_probs_m.mean(axis=1) + 1e-12
+                    permuted_probs_f.mean(axis=1) + 1e-12,
+                    permuted_probs_m.mean(axis=1) + 1e-12,
                )
            )
            permuted_jsds.append(permuted_jsd)
@@ -8,8 +8,8 @@ from sklearn.model_selection import StratifiedKFold, cross_val_score
 from sklearn.model_selection import learning_curve
 from sklearn.preprocessing import LabelEncoder

-from research.base_model import BaseModel
-from research.experiment.feature_extractor import FeatureExtractor
+from ners.research.base_model import BaseModel
+from ners.research.experiment.feature_extractor import FeatureExtractor


 class TraditionalModel(BaseModel):
@@ -52,7 +52,9 @@ class TraditionalModel(BaseModel):
        # Train model
        if len(X_prepared.shape) == 1:
            # For text-based features (like LogisticRegression with vectorization)
-            logging.info(f"Fitting model with {X_prepared.shape[0]} samples (text features)")
+            logging.info(
+                f"Fitting model with {X_prepared.shape[0]} samples (text features)"
+            )
        else:
            # For numerical features
            logging.info(
@@ -74,12 +76,16 @@ class TraditionalModel(BaseModel):

        return self

-    def cross_validate(self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5) -> Dict[str, float]:
+    def cross_validate(
+        self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5
+    ) -> Dict[str, float]:
        features_df = self.feature_extractor.extract_features(X)
        X_prepared = self.prepare_features(features_df)
        y_encoded = self.label_encoder.transform(y)

-        cv = StratifiedKFold(n_splits=cv_folds, shuffle=True, random_state=self.config.random_seed)
+        cv = StratifiedKFold(
+            n_splits=cv_folds, shuffle=True, random_state=self.config.random_seed
+        )

        # Calculate different metrics
        results = {}
@@ -95,7 +101,11 @@ class TraditionalModel(BaseModel):
        for metric in ["precision", "recall", "f1"]:
            if metric in self.config.metrics:
                scores = cross_val_score(
-                    self.model, X_prepared, y_encoded, cv=cv, scoring=f"{metric}_weighted"
+                    self.model,
+                    X_prepared,
+                    y_encoded,
+                    cv=cv,
+                    scoring=f"{metric}_weighted",
                )
                results[metric] = scores.mean()
                results[f"{metric}_std"] = scores.std()
@@ -0,0 +1,46 @@
+#!.venv/bin/python3
+import logging
+import traceback
+
+from ners.core.config import setup_config
+from ners.research.experiment.experiment_builder import ExperimentBuilder
+from ners.research.model_trainer import ModelTrainer
+
+
+def train_from_template(
+    name: str,
+    type: str,
+    *,
+    templates: str = "research_templates.yaml",
+    config: str | None = None,
+    env: str = "development",
+) -> int:
+    try:
+        cfg = setup_config(config_path=config, env=env)
+        experiment_builder = ExperimentBuilder(cfg)
+
+        logging.info(f"Loading research templates from: {templates}")
+        tmpl = experiment_builder.load_templates(templates)
+
+        logging.info(f"Looking for experiment: name='{name}', type='{type}'")
+        experiment_config = experiment_builder.find_template(tmpl, name, type)
+
+        logging.info(f"Found experiment: {experiment_config.get('name')}")
+        logging.info(f"Description: {experiment_config.get('description')}")
+        logging.info(f"Features: {experiment_config.get('features')}")
+
+        trainer = ModelTrainer(cfg)
+        trainer.train_single_model(
+            model_name=experiment_config.get("name"),
+            model_type=experiment_config.get("model_type"),
+            features=experiment_config.get("features"),
+            model_params=experiment_config.get("model_params", {}),
+            tags=experiment_config.get("tags", []),
+        )
+
+        logging.info("Training completed successfully!")
+        return 0
+    except Exception as e:
+        logging.error(f"Training failed: {e}")
+        traceback.print_exc()
+        return 1
@@ -1,19 +1,13 @@
 #!.venv/bin/python3
-import argparse
-import sys
-from pathlib import Path
+import os

 import streamlit as st

-# Add parent directory to Python path to access core modules
-parent_dir = Path(__file__).parent.parent
-sys.path.insert(0, str(parent_dir))
-
-from core.config import setup_config, PipelineConfig
-from core.utils.data_loader import DataLoader
-from processing.monitoring.pipeline_monitor import PipelineMonitor
-from research.experiment.experiment_runner import ExperimentRunner
-from research.experiment.experiment_tracker import ExperimentTracker
+from ners.core.config import setup_config, PipelineConfig
+from ners.core.utils.data_loader import DataLoader
+from ners.processing.monitoring.pipeline_monitor import PipelineMonitor
+from ners.research.experiment.experiment_runner import ExperimentRunner
+from ners.research.experiment.experiment_tracker import ExperimentTracker

 # Page configuration
 st.set_page_config(
@@ -65,19 +59,9 @@ class StreamlitApp:
        )


-def main():
-    parser = argparse.ArgumentParser(
-        description="DRC NERS Platform",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    parser.add_argument("--config", type=str, help="Path to configuration file")
-    parser.add_argument("--env", type=str, default="development", help="Environment name")
-    args = parser.parse_args()
-
-    config = setup_config(args.config, env=args.env)
-    app = StreamlitApp(config)
-    app.run()
-
-
-if __name__ == "__main__":
-    main()
+# Initialize app using environment variables when launched via Typer
+_config_path = os.environ.get("NERS_CONFIG")
+_env = os.environ.get("NERS_ENV", "development")
+_cfg = setup_config(_config_path, env=_env)
+_app = StreamlitApp(_cfg)
+_app.run()
@@ -1,7 +1,7 @@
 import pandas as pd
 import streamlit as st

-from core.utils.data_loader import OPTIMIZED_DTYPES
+from ners.core.utils.data_loader import OPTIMIZED_DTYPES


@st.cache_data
@@ -25,7 +25,9 @@ class Dashboard:

        # Load basic statistics
        try:
-            data_path = self.config.paths.get_data_path(self.config.data.output_files["featured"])
+            data_path = self.config.paths.get_data_path(
+                self.config.data.output_files["featured"]
+            )
            if data_path.exists():
                df = load_dataset(str(data_path))

@@ -37,13 +39,17 @@ class Dashboard:
                    st.metric("Annotated Names", f"{annotated:,}")

                with col3:
-                    provinces = df["province"].nunique() if "province" in df.columns else 0
+                    provinces = (
+                        df["province"].nunique() if "province" in df.columns else 0
+                    )
                    st.metric("Provinces", provinces)

                with col4:
                    if "sex" in df.columns:
                        gender_dist = df["sex"].value_counts()
-                        ratio = gender_dist.get("f", 0) / max(gender_dist.get("m", 1), 1)
+                        ratio = gender_dist.get("f", 0) / max(
+                            gender_dist.get("m", 1), 1
+                        )
                        st.metric("F/M Rate", f"{ratio:.2%}")
                with col5:
                    if "annotated" in df.columns:
@@ -79,4 +85,6 @@ class Dashboard:

            st.dataframe(pd.DataFrame(exp_data), use_container_width=True)
        else:
-            st.info("No experiments found. Create your first experiment in the Experiments tab!")
+            st.info(
+                "No experiments found. Create your first experiment in the Experiments tab!"
+            )
@@ -0,0 +1,52 @@
+from datetime import datetime
+
+import pandas as pd
+import streamlit as st
+
+from ners.core.utils.data_loader import OPTIMIZED_DTYPES
+
+
+@st.cache_data
+def load_dataset(file_path: str) -> pd.DataFrame:
+    try:
+        return pd.read_csv(file_path, dtype=OPTIMIZED_DTYPES)
+    except Exception as e:
+        st.error(f"Error loading dataset: {e}")
+        return pd.DataFrame()
+
+
+class DataOverview:
+    def __init__(self, config):
+        self.config = config
+
+    def index(self):
+        st.title("Data Overview")
+        data_files = {
+            "Names": self.config.data.input_file,
+            "Featured Dataset": self.config.data.output_files["featured"],
+            "Evaluation Dataset": self.config.data.output_files["evaluation"],
+            "Male Names": self.config.data.output_files["males"],
+            "Female Names": self.config.data.output_files["females"],
+        }
+
+        st.write("Available Data Files:")
+        for name, rel_path in data_files.items():
+            file_path = self.config.paths.get_data_path(rel_path)
+            exists = file_path.exists()
+            size = file_path.stat().st_size if exists else 0
+            stats = (
+                f"Size: {size / (1024 * 1024):.1f} MB, Last Modified: {datetime.fromtimestamp(file_path.stat().st_mtime)}"
+                if exists
+                else "Not found"
+            )
+            st.write(f"- {name}: {file_path} ({stats})")
+
+        # Preview featured dataset if available
+        data_path = self.config.paths.get_data_path(
+            self.config.data.output_files["featured"]
+        )
+        if data_path.exists():
+            df = load_dataset(str(data_path))
+            st.subheader("Featured Dataset Preview")
+            st.dataframe(df.head(), use_container_width=True)
+            st.write(f"Rows: {len(df):,}")
@@ -2,8 +2,8 @@ import pandas as pd
 import plotly.express as px
 import streamlit as st

-from core.utils.data_loader import OPTIMIZED_DTYPES
-from web.interfaces.log_reader import LogReader
+from ners.core.utils.data_loader import OPTIMIZED_DTYPES
+from ners.web.interfaces.log_reader import LogReader


@st.cache_data
@@ -31,7 +31,9 @@ class DataProcessing:

        # Step details
        for step_name, step_status in status["steps"].items():
-            with st.expander(f"{step_name.replace('_', ' ').title()} - {step_status['status']}"):
+            with st.expander(
+                f"{step_name.replace('_', ' ').title()} - {step_status['status']}"
+            ):
                col1, col2, col3 = st.columns(3)

                with col1:
@@ -63,14 +65,20 @@ class DataProcessing:

            with col2:
                num_entries = st.number_input(
-                    "Number of entries", min_value=5, max_value=50, value=10, key="num_log_entries"
+                    "Number of entries",
+                    min_value=5,
+                    max_value=50,
+                    value=10,
+                    key="num_log_entries",
                )

            # Get log entries based on filter
            if log_level_filter == "All":
                log_entries = log_reader.read_last_entries(num_entries)
            else:
-                log_entries = log_reader.read_entries_by_level(log_level_filter, num_entries)
+                log_entries = log_reader.read_entries_by_level(
+                    log_level_filter, num_entries
+                )

            if log_entries:
                for entry in log_entries:
@@ -2,13 +2,13 @@ from typing import List, Dict

 import streamlit as st

-from core.config.pipeline_config import PipelineConfig
-from research.experiment import ExperimentConfig, ExperimentStatus
-from research.experiment.experiment_builder import ExperimentBuilder
-from research.experiment.experiment_runner import ExperimentRunner
-from research.experiment.experiment_tracker import ExperimentTracker
-from research.experiment.feature_extractor import FeatureType
-from research.model_registry import list_available_models
+from ners.core.config.pipeline_config import PipelineConfig
+from ners.research.experiment import ExperimentConfig, ExperimentStatus
+from ners.research.experiment.experiment_builder import ExperimentBuilder
+from ners.research.experiment.experiment_runner import ExperimentRunner
+from ners.research.experiment.experiment_tracker import ExperimentTracker
+from ners.research.experiment.feature_extractor import FeatureType
+from ners.research.model_registry import list_available_models


 class Experiments:
@@ -46,13 +46,19 @@ class Experiments:
            available_experiments = self.experiment_builder.get_templates()

            # Create tabs for different experiment types
-            exp_tabs = st.tabs(["Baseline", "Advanced", "Feature Studies", "Hyperparameter Tuning"])
+            exp_tabs = st.tabs(
+                ["Baseline", "Advanced", "Feature Studies", "Hyperparameter Tuning"]
+            )

            with exp_tabs[0]:
-                self._show_experiments_by_type(available_experiments["baseline"], "baseline")
+                self._show_experiments_by_type(
+                    available_experiments["baseline"], "baseline"
+                )

            with exp_tabs[1]:
-                self._show_experiments_by_type(available_experiments["advanced"], "advanced")
+                self._show_experiments_by_type(
+                    available_experiments["advanced"], "advanced"
+                )

            with exp_tabs[2]:
                self._show_experiments_by_type(
@@ -60,7 +66,9 @@ class Experiments:
                )

            with exp_tabs[3]:
-                self._show_experiments_by_type(available_experiments["tuning"], "tuning")
+                self._show_experiments_by_type(
+                    available_experiments["tuning"], "tuning"
+                )

        except Exception as e:
            st.error(f"Error loading experiment templates: {e}")
@@ -79,7 +87,9 @@ class Experiments:
        # Show available experiments
        for i, exp_template in enumerate(experiments):
            exp_name = exp_template.get("name", f"Experiment {i + 1}")
-            exp_description = exp_template.get("description", "No description available")
+            exp_description = exp_template.get(
+                "description", "No description available"
+            )

            with st.expander(f"📊 {exp_name} - {exp_description}"):
                col1, col2 = st.columns([2, 1])
@@ -88,7 +98,7 @@ class Experiments:
                    st.json(exp_template)

                with col2:
-                    if st.button(f"🚀 Run Experiment", key=f"run_{experiment_type}_{i}"):
+                    if st.button("🚀 Run Experiment", key=f"run_{experiment_type}_{i}"):
                        self._run_template_experiment(exp_template)

    def _run_template_experiment(self, exp_template: Dict):
@@ -100,7 +110,9 @@ class Experiments:

                # Run the experiment
                experiment_id = self.experiment_runner.run_experiment(experiment_config)
-                st.success(f"Experiment '{experiment_config.name}' completed successfully!")
+                st.success(
+                    f"Experiment '{experiment_config.name}' completed successfully!"
+                )
                st.info(f"Experiment ID: `{experiment_id}`")

                # Show results
@@ -130,13 +142,17 @@ class Experiments:
            )

        with col2:
-            model_filter = st.selectbox("Filter by Model", ["All"] + list_available_models())
+            model_filter = st.selectbox(
+                "Filter by Model", ["All"] + list_available_models()
+            )

        with col3:
            tag_filter = st.text_input("Filter by Tags (comma-separated)")

        # Get and filter experiments
-        experiments = self._get_filtered_experiments(status_filter, model_filter, tag_filter)
+        experiments = self._get_filtered_experiments(
+            status_filter, model_filter, tag_filter
+        )

        if not experiments:
            st.info("No experiments found matching the filters.")
@@ -149,20 +165,28 @@ class Experiments:
            ):
                self._display_experiment_details(exp, i)

-    def _get_filtered_experiments(self, status_filter: str, model_filter: str, tag_filter: str):
+    def _get_filtered_experiments(
+        self, status_filter: str, model_filter: str, tag_filter: str
+    ):
        """Get experiments with applied filters"""
        experiments = self.experiment_tracker.list_experiments()

        # Apply filters
        if status_filter != "All":
-            experiments = [e for e in experiments if e.status == ExperimentStatus(status_filter)]
+            experiments = [
+                e for e in experiments if e.status == ExperimentStatus(status_filter)
+            ]

        if model_filter != "All":
-            experiments = [e for e in experiments if e.config.model_type == model_filter]
+            experiments = [
+                e for e in experiments if e.config.model_type == model_filter
+            ]

        if tag_filter:
            tags = [tag.strip() for tag in tag_filter.split(",")]
-            experiments = [e for e in experiments if any(tag in e.config.tags for tag in tags)]
+            experiments = [
+                e for e in experiments if any(tag in e.config.tags for tag in tags)
+            ]

        return experiments

@@ -173,7 +197,9 @@ class Experiments:

        with col1:
            st.write(f"**Model:** {exp.config.model_type}")
-            st.write(f"**Features:** {', '.join([f.value for f in exp.config.features])}")
+            st.write(
+                f"**Features:** {', '.join([f.value for f in exp.config.features])}"
+            )
            st.write(f"**Tags:** {', '.join(exp.config.tags)}")

        with col2:
@@ -185,7 +211,7 @@ class Experiments:
            st.write(f"**Train Size:** {exp.train_size:,}")
            st.write(f"**Test Size:** {exp.test_size:,}")

-            if st.button(f"View Details", key=f"details_{index}"):
+            if st.button("View Details", key=f"details_{index}"):
                st.session_state.selected_experiment = exp.experiment_id
                st.rerun()

@@ -198,7 +224,9 @@ class Experiments:
        st.write("Run multiple experiments with different parameter combinations.")

        # Add option to run template batch experiments
-        batch_type = st.radio("Batch Type", ["Template Batch", "Custom Parameter Sweep"])
+        batch_type = st.radio(
+            "Batch Type", ["Template Batch", "Custom Parameter Sweep"]
+        )

        if batch_type == "Template Batch":
            self._show_template_batch_experiments()
@@ -227,10 +255,13 @@ class Experiments:
                    if experiments:
                        st.write(f"**{exp_type.title()} Experiments:**")
                        exp_names = [
-                            exp.get("name", f"Exp {i}") for i, exp in enumerate(experiments)
+                            exp.get("name", f"Exp {i}")
+                            for i, exp in enumerate(experiments)
                        ]
                        selected_names = st.multiselect(
-                            f"Select {exp_type} experiments", exp_names, key=f"select_{exp_type}"
+                            f"Select {exp_type} experiments",
+                            exp_names,
+                            key=f"select_{exp_type}",
                        )

                        for name in selected_names:
@@ -258,13 +289,17 @@ class Experiments:
                    experiment_configs.append(config)

                # Run batch experiments
-                experiment_ids = self.experiment_runner.run_experiment_batch(experiment_configs)
+                experiment_ids = self.experiment_runner.run_experiment_batch(
+                    experiment_configs
+                )

                st.success(f"Completed {len(experiment_ids)} template experiments!")

                # Show summary
                if experiment_ids:
-                    comparison = self.experiment_runner.compare_experiments(experiment_ids)
+                    comparison = self.experiment_runner.compare_experiments(
+                        experiment_ids
+                    )
                    st.write("**Template Batch Results:**")
                    st.dataframe(
                        comparison[["name", "model_type", "test_accuracy"]],
@@ -285,7 +320,9 @@ class Experiments:
            with col1:
                base_name = st.text_input("Base Experiment Name", "parameter_sweep")
                model_types = st.multiselect(
-                    "Model Types", list_available_models(), default=["logistic_regression"]
+                    "Model Types",
+                    list_available_models(),
+                    default=["logistic_regression"],
                )

                # N-gram ranges for logistic regression
@@ -301,13 +338,20 @@ class Experiments:
                    default=["full_name", "native_name", "surname"],
                )

-                test_sizes = st.text_input("Test Sizes (comma-separated)", "0.15,0.2,0.25")
+                test_sizes = st.text_input(
+                    "Test Sizes (comma-separated)", "0.15,0.2,0.25"
+                )

                tags = st.text_input("Common Tags", "parameter_sweep,batch")

            if st.form_submit_button("🚀 Run Parameter Sweep"):
                self.run_batch_experiments(
-                    base_name, model_types, ngram_ranges, feature_combinations, test_sizes, tags
+                    base_name,
+                    model_types,
+                    ngram_ranges,
+                    feature_combinations,
+                    test_sizes,
+                    tags,
                )

    def run_batch_experiments(
@@ -369,13 +413,17 @@ class Experiments:
                                exp_count += 1

                # Run experiments
-                experiment_ids = self.experiment_runner.run_experiment_batch(experiments)
+                experiment_ids = self.experiment_runner.run_experiment_batch(
+                    experiments
+                )

                st.success(f"Completed {len(experiment_ids)} batch experiments")

                # Show summary
                if experiment_ids:
-                    comparison = self.experiment_runner.compare_experiments(experiment_ids)
+                    comparison = self.experiment_runner.compare_experiments(
+                        experiment_ids
+                    )
                    st.write("**Batch Results Summary:**")
                    st.dataframe(
                        comparison[["name", "model_type", "test_accuracy"]],
@@ -0,0 +1,80 @@
+from __future__ import annotations
+
+from dataclasses import dataclass
+from datetime import datetime
+from pathlib import Path
+from typing import List
+
+
+@dataclass
+class LogEntry:
+    timestamp: datetime
+    level: str
+    message: str
+
+
+class LogReader:
+    def __init__(self, log_file_path: Path):
+        self.log_file_path = Path(log_file_path)
+
+    def read_last_entries(self, num_entries: int = 20) -> List[LogEntry]:
+        entries = []
+        if not self.log_file_path.exists():
+            return entries
+
+        with open(self.log_file_path, "r") as f:
+            lines = f.readlines()[-num_entries:]
+
+        for line in lines:
+            entry = self._parse_log_line(line)
+            if entry:
+                entries.append(entry)
+
+        return entries
+
+    def read_entries_by_level(
+        self, level: str, num_entries: int = 20
+    ) -> List[LogEntry]:
+        entries = []
+        if not self.log_file_path.exists():
+            return entries
+
+        with open(self.log_file_path, "r") as f:
+            for line in reversed(f.readlines()):
+                entry = self._parse_log_line(line)
+                if entry and entry.level == level:
+                    entries.append(entry)
+                    if len(entries) >= num_entries:
+                        break
+
+        return list(reversed(entries))
+
+    def get_log_stats(self) -> dict:
+        if not self.log_file_path.exists():
+            return {}
+
+        stats = {"total_lines": 0}
+        with open(self.log_file_path, "r") as f:
+            for line in f:
+                stats["total_lines"] += 1
+                entry = self._parse_log_line(line)
+                if entry:
+                    stats[entry.level] = stats.get(entry.level, 0) + 1
+
+        return stats
+
+    @staticmethod
+    def _parse_log_line(line: str) -> LogEntry | None:
+        try:
+            # Expected format from logging config: [timestamp] - LEVEL - message
+            parts = line.strip().split(" - ")
+            if len(parts) >= 3:
+                timestamp_str = parts[0].strip("[]")
+                timestamp = datetime.fromisoformat(timestamp_str)
+                level = parts[1].strip()
+                message = " - ".join(parts[2:])
+                return LogEntry(timestamp, level, message)
+        except Exception:
+            return None
+
+        return None
@@ -1,10 +1,8 @@
-from pathlib import Path
-
 import streamlit as st
 from spacy import displacy

-from core.config import PipelineConfig
-from processing.ner.name_model import NameModel
+from ners.core.config import PipelineConfig
+from ners.processing.ner.name_model import NameModel


 class NERTesting:
@@ -56,12 +54,15 @@ class NERTesting:

            with col1:
                st.metric(
-                    "Training Examples", f"{self.training_stats.get('training_examples', 0):,}"
+                    "Training Examples",
+                    f"{self.training_stats.get('training_examples', 0):,}",
                )
            with col2:
                st.metric("Epochs", self.training_stats.get("epochs", 0))
            with col3:
-                st.metric("Final Loss", f"{self.training_stats.get('final_loss', 0):.2f}")
+                st.metric(
+                    "Final Loss", f"{self.training_stats.get('final_loss', 0):.2f}"
+                )
            with col4:
                st.metric("Batch Size", f"{self.training_stats.get('batch_size', 0):,}")

@@ -99,7 +100,9 @@ class NERTesting:

        if names_input.strip():
            if st.button("Analyze All Names", type="primary"):
-                names = [name.strip() for name in names_input.split("\n") if name.strip()]
+                names = [
+                    name.strip() for name in names_input.split("\n") if name.strip()
+                ]
                for i, name in enumerate(names):
                    st.markdown(f"**Name {i + 1}: {name}**")
                    self.analyze_and_display(name)
@@ -127,7 +130,9 @@ class NERTesting:

            else:
                st.warning("No entities detected in the input text.")
-                st.info("Try using traditional Congolese names or ensure the spelling is correct.")
+                st.info(
+                    "Try using traditional Congolese names or ensure the spelling is correct."
+                )

        except Exception as e:
            st.error(f"Error analyzing text: {e}")
@@ -139,14 +144,21 @@ class NERTesting:
            ents = []
            for entity in entities:
                ents.append(
-                    {"start": entity["start"], "end": entity["end"], "label": entity["label"]}
+                    {
+                        "start": entity["start"],
+                        "end": entity["end"],
+                        "label": entity["label"],
+                    }
                )

            # Create doc-like structure for displacy
            doc_data = {"text": text, "ents": ents, "title": None}

            # Custom colors for our labels
-            colors = {"NATIVE": "#74C0FC", "SURNAME": "#69DB7C"}  # Light blue  # Light green
+            colors = {
+                "NATIVE": "#74C0FC",
+                "SURNAME": "#69DB7C",
+            }  # Light blue  # Light green

            options = {"colors": colors, "distance": 90}

--- a/Show More
+++ b/Show More