From 84f7d41a8407b62e564a818f178d4056409bcae5 Mon Sep 17 00:00:00 2001
From: bernard-ng <ngandubernard@gmail.com>
Date: Sat, 16 Aug 2025 19:05:24 +0200
Subject: [PATCH] feat: web application multipage support

---
 app.py                                        | 101 -------
 config/pipeline.yaml                          |   7 +-
 config/research_templates.yaml                |   8 +-
 config/spacy_ner.cfg                          | 145 ++++++++++
 core/config/processing_config.py              |   1 +
 ner.py                                        |  25 +-
 .../__init__.py => pages/1_📊_Dashboard.py    |   0
 pages/2_📋_Data_Overview.py                   |   0
 pages/3_⚙️_Data_Processing.py                  |   0
 pages/4_🧪_Experiments.py                     |   0
 pages/5_📈_Results_Analysis.py                |   0
 pages/6_🔮_Predictions.py                     |   0
 pages/7_⚙️_Configuration.py                    |   0
 pages/README.md                               |   0
 processing/ner/name_builder.py                |  68 +++++
 ...ner_engineering.py => name_engineering.py} |  11 +-
 .../ner/{ner_name_model.py => name_model.py}  |  14 +-
 processing/ner/name_tagger.py                 | 273 ++++++++++++++++++
 processing/ner/ner_data_builder.py            | 149 ----------
 processing/ner/ner_name_tagger.py             | 212 --------------
 web/__init__.py                               |   0
 web/app.py                                    |  89 ++++++
 web/interfaces/__init__.py                    |   0
 .../interfaces}/configuration.py              |   4 +-
 {interface => web/interfaces}/dashboard.py    |   2 +-
 .../interfaces}/data_overview.py              |   2 +-
 .../interfaces}/data_processing.py            |   4 +-
 {interface => web/interfaces}/experiments.py  |   5 +-
 {interface => web/interfaces}/log_reader.py   |   0
 {interface => web/interfaces}/predictions.py  |   5 +-
 .../interfaces}/results_analysis.py           |   5 +-
 web/pages/1_📊_Dashboard.py                   |  22 ++
 web/pages/2_📋_Data_Overview.py               |  18 ++
 web/pages/3_⚙️_Data_Processing.py              |  18 ++
 web/pages/4_🧪_Experiments.py                 |  22 ++
 web/pages/5_📈_Results_Analysis.py            |  22 ++
 web/pages/6_🔮_Predictions.py                 |  22 ++
 web/pages/7_⚙️_Configuration.py                |  18 ++
 38 files changed, 765 insertions(+), 507 deletions(-)
 delete mode 100644 app.py
 create mode 100644 config/spacy_ner.cfg
 rename interface/__init__.py => pages/1_📊_Dashboard.py (100%)
 create mode 100644 pages/2_📋_Data_Overview.py
 create mode 100644 pages/3_⚙️_Data_Processing.py
 create mode 100644 pages/4_🧪_Experiments.py
 create mode 100644 pages/5_📈_Results_Analysis.py
 create mode 100644 pages/6_🔮_Predictions.py
 create mode 100644 pages/7_⚙️_Configuration.py
 create mode 100644 pages/README.md
 create mode 100644 processing/ner/name_builder.py
 rename processing/ner/{ner_engineering.py => name_engineering.py} (95%)
 rename processing/ner/{ner_name_model.py => name_model.py} (98%)
 create mode 100644 processing/ner/name_tagger.py
 create mode 100644 web/__init__.py
 create mode 100644 web/app.py
 create mode 100644 web/interfaces/__init__.py
 rename {interface => web/interfaces}/configuration.py (63%)
 rename {interface => web/interfaces}/dashboard.py (98%)
 rename {interface => web/interfaces}/data_overview.py (99%)
 rename {interface => web/interfaces}/data_processing.py (98%)
 rename {interface => web/interfaces}/experiments.py (99%)
 rename {interface => web/interfaces}/log_reader.py (100%)
 rename {interface => web/interfaces}/predictions.py (99%)
 rename {interface => web/interfaces}/results_analysis.py (98%)
 create mode 100644 web/pages/1_📊_Dashboard.py
 create mode 100644 web/pages/2_📋_Data_Overview.py
 create mode 100644 web/pages/3_⚙️_Data_Processing.py
 create mode 100644 web/pages/4_🧪_Experiments.py
 create mode 100644 web/pages/5_📈_Results_Analysis.py
 create mode 100644 web/pages/6_🔮_Predictions.py
 create mode 100644 web/pages/7_⚙️_Configuration.py

diff --git a/app.py b/app.py
deleted file mode 100644
index 81a5b08..0000000
--- a/app.py
+++ /dev/null
@@ -1,101 +0,0 @@
-#!.venv/bin/python3
-import argparse
-
-import streamlit as st
-
-from core.config import setup_config, PipelineConfig
-from core.utils.data_loader import DataLoader
-from interface.configuration import Configuration
-from interface.dashboard import Dashboard
-from interface.data_overview import DataOverview
-from interface.data_processing import DataProcessing
-from interface.experiments import Experiments
-from interface.predictions import Predictions
-from interface.results_analysis import ResultsAnalysis
-from processing.monitoring.pipeline_monitor import PipelineMonitor
-from research.experiment.experiment_runner import ExperimentRunner
-from research.experiment.experiment_tracker import ExperimentTracker
-
-# Page configuration
-st.set_page_config(
-    page_title="DRC Names NLP Pipeline",
-    page_icon="🇨🇩",
-    layout="wide",
-    initial_sidebar_state="expanded",
-)
-
-
-class StreamlitApp:
-    """Main Streamlit application class"""
-
-    def __init__(self, config: PipelineConfig):
-        self.config = config
-        self.data_loader = DataLoader(self.config)
-        self.experiment_tracker = ExperimentTracker(self.config)
-        self.experiment_runner = ExperimentRunner(self.config)
-        self.pipeline_monitor = PipelineMonitor()
-
-        # Initialize interface components
-        self.dashboard = Dashboard(self.config, self.experiment_tracker, self.experiment_runner)
-        self.data_overview = DataOverview(self.config)
-        self.data_processing = DataProcessing(self.config, self.pipeline_monitor)
-        self.experiments = Experiments(self.config, self.experiment_tracker, self.experiment_runner)
-        self.results_analysis = ResultsAnalysis(
-            self.config, self.experiment_tracker, self.experiment_runner
-        )
-        self.predictions = Predictions(self.config, self.experiment_tracker, self.experiment_runner)
-        self.configuration = Configuration(self.config)
-
-        # Initialize session state
-        if "current_experiment" not in st.session_state:
-            st.session_state.current_experiment = None
-        if "experiment_results" not in st.session_state:
-            st.session_state.experiment_results = {}
-
-    def run(self):
-        st.title("🇨🇩 DRC NERS Pipeline")
-        st.markdown("A comprehensive tool for Congolese name analysis and gender prediction")
-
-        # Sidebar navigation
-        page = st.sidebar.selectbox(
-            "Navigation",
-            [
-                "Dashboard",
-                "Dataset Overview",
-                "Data Processing",
-                "Experiments",
-                "Results & Analysis",
-                "Predictions",
-                "Configuration",
-            ],
-        )
-
-        # Route to appropriate page
-        page_map = {
-            "Dashboard": self.dashboard.index,
-            "Dataset Overview": self.data_overview.index,
-            "Data Processing": self.data_processing.index,
-            "Experiments": self.experiments.index,
-            "Results & Analysis": self.results_analysis.index,
-            "Predictions": self.predictions.index,
-            "Configuration": self.configuration.index,
-        }
-        page_map.get(page, lambda: None)()
-
-
-def main():
-    parser = argparse.ArgumentParser(
-        description="DRC NERS Platform",
-        formatter_class=argparse.RawDescriptionHelpFormatter,
-    )
-    parser.add_argument("--config", type=str, help="Path to configuration file")
-    parser.add_argument("--env", type=str, default="development", help="Environment name")
-    args = parser.parse_args()
-
-    config = setup_config(args.config, env=args.env)
-    app = StreamlitApp(config)
-    app.run()
-
-
-if __name__ == "__main__":
-    main()
diff --git a/config/pipeline.yaml b/config/pipeline.yaml
index babc9be..36bb561 100644
--- a/config/pipeline.yaml
+++ b/config/pipeline.yaml
@@ -18,7 +18,8 @@ paths:
   checkpoints_dir: "./data/checkpoints"   # Directory for model checkpoints
 
 # Pipeline stages
-stages: # List of stages in the processing pipeline
+# List of stages in the processing pipeline
+stages:
   - "data_cleaning"                        # Data cleaning stage
   - "feature_extraction"                   # Feature extraction stage
   - "ner_annotation"                       # NER-based annotation stage
@@ -36,6 +37,7 @@ processing:
     - "utf-16"
     - "latin1"
   chunk_size: 100_000                      # Size of data chunks to process in parallel
+  epochs: 2                                # Number of Epochs for training
 
 # Annotation settings
 annotation:
@@ -72,8 +74,9 @@ data:
   balance_by_sex: false                     # Should the dataset be balanced by sex when limiting the dataset size?
 
 # Logging configuration
+# Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
 logging:
-  level: "INFO"                            # Logging level (DEBUG, INFO, WARNING, ERROR, CRITICAL)
+  level: "INFO"
   format: "%(asctime)s - %(name)s - %(levelname)s - %(message)s"
   file_logging: true                       # Enable logging to file
   console_logging: true                    # Enable logging to console
diff --git a/config/research_templates.yaml b/config/research_templates.yaml
index a968aec..f8a122c 100644
--- a/config/research_templates.yaml
+++ b/config/research_templates.yaml
@@ -7,7 +7,7 @@ baseline_experiments:
       max_len: 20
       embedding_dim: 64
       gru_units: 32
-      epochs: 10
+      epochs: 2
       batch_size: 32
     tags: [ "baseline", "neural", "bigru" ]
 
@@ -21,7 +21,7 @@ baseline_experiments:
       filters: 64
       kernel_size: 3
       dropout: 0.5
-      epochs: 10
+      epochs: 2
       batch_size: 32
     tags: [ "baseline", "neural", "cnn" ]
 
@@ -79,7 +79,7 @@ baseline_experiments:
     model_params:
       embedding_dim: 128
       lstm_units: 64
-      epochs: 10
+      epochs: 2
       batch_size: 64
     tags: [ "baseline", "neural", "lstm" ]
 
@@ -121,7 +121,7 @@ baseline_experiments:
       embedding_dim: 128
       num_heads: 4
       num_layers: 2
-      epochs: 10
+      epochs: 2
       batch_size: 64
     tags: [ "baseline", "neural", "transformer" ]
 
diff --git a/config/spacy_ner.cfg b/config/spacy_ner.cfg
new file mode 100644
index 0000000..911a774
--- /dev/null
+++ b/config/spacy_ner.cfg
@@ -0,0 +1,145 @@
+[paths]
+train = null
+dev = null
+vectors = null
+init_tok2vec = null
+
+[system]
+gpu_allocator = null
+seed = 42
+
+[nlp]
+lang = "fr"
+pipeline = ["tok2vec","ner"]
+batch_size = 100000
+disabled = []
+before_creation = null
+after_creation = null
+after_pipeline_creation = null
+tokenizer = {"@tokenizers":"spacy.Tokenizer.v1"}
+vectors = {"@vectors":"spacy.Vectors.v1"}
+
+[components]
+
+[components.ner]
+factory = "ner"
+incorrect_spans_key = null
+moves = null
+scorer = {"@scorers":"spacy.ner_scorer.v1"}
+update_with_oracle_cut_size = 100
+
+[components.ner.model]
+@architectures = "spacy.TransitionBasedParser.v2"
+state_type = "ner"
+extra_state_tokens = false
+hidden_width = 64
+maxout_pieces = 2
+use_upper = true
+nO = null
+
+[components.ner.model.tok2vec]
+@architectures = "spacy.Tok2VecListener.v1"
+width = ${components.tok2vec.model.encode.width}
+upstream = "*"
+
+[components.tok2vec]
+factory = "tok2vec"
+
+[components.tok2vec.model]
+@architectures = "spacy.Tok2Vec.v2"
+
+[components.tok2vec.model.embed]
+@architectures = "spacy.MultiHashEmbed.v2"
+width = ${components.tok2vec.model.encode.width}
+attrs = ["NORM","PREFIX","SUFFIX","SHAPE"]
+rows = [5000,1000,2500,2500]
+include_static_vectors = false
+
+[components.tok2vec.model.encode]
+@architectures = "spacy.MaxoutWindowEncoder.v2"
+width = 96
+depth = 4
+window_size = 1
+maxout_pieces = 3
+
+[corpora]
+
+[corpora.dev]
+@readers = "spacy.Corpus.v1"
+path = ${paths.dev}
+max_length = 0
+gold_preproc = false
+limit = 0
+augmenter = null
+
+[corpora.train]
+@readers = "spacy.Corpus.v1"
+path = ${paths.train}
+max_length = 0
+gold_preproc = false
+limit = 0
+augmenter = null
+
+[training]
+dev_corpus = "corpora.dev"
+train_corpus = "corpora.train"
+seed = ${system.seed}
+gpu_allocator = ${system.gpu_allocator}
+dropout = 0.1
+accumulate_gradient = 1
+patience = 1600
+max_epochs = 0
+max_steps = 20000
+eval_frequency = 200
+frozen_components = []
+annotating_components = []
+before_to_disk = null
+before_update = null
+
+[training.batcher]
+@batchers = "spacy.batch_by_words.v1"
+discard_oversize = false
+tolerance = 0.2
+get_length = null
+
+[training.batcher.size]
+@schedules = "compounding.v1"
+start = 100
+stop = 1000
+compound = 1.001
+t = 0.0
+
+[training.logger]
+@loggers = "spacy.ConsoleLogger.v1"
+progress_bar = false
+
+[training.optimizer]
+@optimizers = "Adam.v1"
+beta1 = 0.9
+beta2 = 0.999
+L2_is_weight_decay = true
+L2 = 0.01
+grad_clip = 1.0
+use_averages = false
+eps = 0.00000001
+learn_rate = 0.001
+
+[training.score_weights]
+ents_f = 1.0
+ents_p = 0.0
+ents_r = 0.0
+ents_per_type = null
+
+[pretraining]
+
+[initialize]
+vectors = ${paths.vectors}
+init_tok2vec = ${paths.init_tok2vec}
+vocab_data = null
+lookups = null
+before_init = null
+after_init = null
+
+[initialize.components]
+
+[initialize.tokenizer]
diff --git a/core/config/processing_config.py b/core/config/processing_config.py
index 0037a4e..5d1d705 100644
--- a/core/config/processing_config.py
+++ b/core/config/processing_config.py
@@ -12,3 +12,4 @@ class ProcessingConfig(BaseModel):
     use_multiprocessing: bool = False
     encoding_options: list = field(default_factory=lambda: ["utf-8", "utf-16", "latin1"])
     chunk_size: int = 100_000
+    epochs: int = 2
diff --git a/ner.py b/ner.py
index 1e4ed8f..3b0ac49 100755
--- a/ner.py
+++ b/ner.py
@@ -7,24 +7,24 @@ import traceback
 from pathlib import Path
 
 from core.config import setup_config, PipelineConfig
-from processing.ner.ner_data_builder import NERDataBuilder
-from processing.ner.ner_engineering import NEREngineering
-from processing.ner.ner_name_model import NERNameModel
+from processing.ner.name_builder import NameBuilder
+from processing.ner.name_engineering import NameEngineering
+from processing.ner.name_model import NameModel
 
 
 def feature(config: PipelineConfig):
     """Apply feature engineering to create position-independent NER dataset."""
-    NEREngineering(config).compute()
+    NameEngineering(config).compute()
 
 
 def build(config: PipelineConfig):
     """Build NER dataset using NERDataBuilder."""
-    NERDataBuilder(config).build()
+    NameBuilder(config).build()
 
 
 def train(config: PipelineConfig):
     """Train the NER model."""
-    trainer = NERNameModel(config)
+    trainer = NameModel(config)
 
     data_path = Path(config.paths.data_dir) / config.data.output_files["ner_data"]
     if not data_path.exists():
@@ -39,7 +39,10 @@ def train(config: PipelineConfig):
 
     logging.info(f"Training with {len(train_data)} examples, evaluating on {len(eval_data)}")
     trainer.train(
-        data=train_data, epochs=1, batch_size=config.processing.batch_size, dropout_rate=0.3
+        data=train_data,
+        epochs=config.processing.epochs,
+        batch_size=config.processing.batch_size,
+        dropout_rate=0.3,
     )
     trainer.evaluate(eval_data)
 
@@ -48,13 +51,17 @@ def train(config: PipelineConfig):
 
 
 def run_pipeline(config: PipelineConfig, reset: bool = False):
-    if not reset and os.path.exists(config.paths.get_data_path(config.data.output_files["engineered"])):
+    if not reset and os.path.exists(
+        config.paths.get_data_path(config.data.output_files["engineered"])
+    ):
         logging.info("Step 1: Feature engineering already done.")
     else:
         logging.info("Step 1: Running feature engineering")
         feature(config)
 
-    if not reset and os.path.exists(config.paths.get_data_path(config.data.output_files["ner_data"])):
+    if not reset and os.path.exists(
+        config.paths.get_data_path(config.data.output_files["ner_data"])
+    ):
         logging.info("Step 2: NER dataset already built.")
     else:
         logging.info("Step 2: Building NER dataset")
diff --git a/interface/__init__.py b/pages/1_📊_Dashboard.py
similarity index 100%
rename from interface/__init__.py
rename to pages/1_📊_Dashboard.py
diff --git a/pages/2_📋_Data_Overview.py b/pages/2_📋_Data_Overview.py
new file mode 100644
index 0000000..e69de29
diff --git a/pages/3_⚙️_Data_Processing.py b/pages/3_⚙️_Data_Processing.py
new file mode 100644
index 0000000..e69de29
diff --git a/pages/4_🧪_Experiments.py b/pages/4_🧪_Experiments.py
new file mode 100644
index 0000000..e69de29
diff --git a/pages/5_📈_Results_Analysis.py b/pages/5_📈_Results_Analysis.py
new file mode 100644
index 0000000..e69de29
diff --git a/pages/6_🔮_Predictions.py b/pages/6_🔮_Predictions.py
new file mode 100644
index 0000000..e69de29
diff --git a/pages/7_⚙️_Configuration.py b/pages/7_⚙️_Configuration.py
new file mode 100644
index 0000000..e69de29
diff --git a/pages/README.md b/pages/README.md
new file mode 100644
index 0000000..e69de29
diff --git a/processing/ner/name_builder.py b/processing/ner/name_builder.py
new file mode 100644
index 0000000..b569f7f
--- /dev/null
+++ b/processing/ner/name_builder.py
@@ -0,0 +1,68 @@
+import json
+import logging
+
+import spacy
+from spacy.tokens import DocBin
+
+from core.config import PipelineConfig
+from core.utils.data_loader import DataLoader
+from .name_tagger import NameTagger
+
+
+class NameBuilder:
+    def __init__(self, config: PipelineConfig):
+        self.config = config
+        self.data_loader = DataLoader(config)
+        self.tagger = NameTagger()
+
+    def build(self) -> int:
+        filepath = self.config.paths.get_data_path(self.config.data.output_files["engineered"])
+        df = self.data_loader.load_csv_complete(filepath)
+        df = df[["name", "ner_tagged", "ner_entities"]]
+
+        # Filter early
+        ner_df = df.loc[df["ner_tagged"] == 1, ["name", "ner_entities"]]
+        if ner_df.empty:
+            logging.error("No NER tagged data found")
+            return 1
+
+        total_rows = len(df)
+        del df  # No need to keep in memory
+
+        logging.info(f"Found {len(ner_df)} NER tagged entries")
+        nlp = spacy.blank("fr")
+
+        # Use NERNameTagger for parsing and validation
+        parsed_entities = NameTagger.parse_entities(ner_df["ner_entities"])
+        validated_entities = NameTagger.validate_entities(ner_df["name"], parsed_entities)
+
+        # Drop rows with no valid entities
+        mask = validated_entities.map(bool)
+        ner_df = ner_df.loc[mask]
+        validated_entities = validated_entities.loc[mask]
+
+        if ner_df.empty:
+            logging.error("No valid training examples after validation")
+            return 1
+
+        # Prepare training data
+        training_data = list(
+            zip(ner_df["name"].tolist(), [{"entities": ents} for ents in validated_entities])
+        )
+
+        # Use NERNameTagger to create spaCy DocBin
+        docs = NameTagger.create_docs(nlp, ner_df["name"].tolist(), validated_entities.tolist())
+        doc_bin = DocBin(docs=docs)
+
+        # Save
+        json_path = self.config.paths.get_data_path(self.config.data.output_files["ner_data"])
+        spacy_path = self.config.paths.get_data_path(self.config.data.output_files["ner_spacy"])
+
+        with open(json_path, "w", encoding="utf-8") as f:
+            json.dump(training_data, f, ensure_ascii=False, separators=(",", ":"))
+        doc_bin.to_disk(spacy_path)
+
+        logging.info(f"Processed: {len(training_data)}, Skipped: {total_rows - len(training_data)}")
+        logging.info(f"Saved NER JSON to {json_path}")
+        logging.info(f"Saved NER spacy to {spacy_path}")
+        return 0
diff --git a/processing/ner/ner_engineering.py b/processing/ner/name_engineering.py
similarity index 95%
rename from processing/ner/ner_engineering.py
rename to processing/ner/name_engineering.py
index 713654b..0b300f6 100644
--- a/processing/ner/ner_engineering.py
+++ b/processing/ner/name_engineering.py
@@ -1,5 +1,5 @@
+import gc
 import random
-from typing import List
 import logging
 
 import numpy as np
@@ -7,7 +7,7 @@ import pandas as pd
 from tqdm import tqdm
 
 from core.config import PipelineConfig
-from core.utils.data_loader import OPTIMIZED_DTYPES, DataLoader
+from core.utils.data_loader import DataLoader
 from processing.ner.formats.connectors_format import ConnectorFormatter
 from processing.ner.formats.extended_surname_format import ExtendedSurnameFormatter
 from processing.ner.formats.native_only_format import NativeOnlyFormatter
@@ -16,7 +16,7 @@ from processing.ner.formats.position_flipped_format import PositionFlippedFormat
 from processing.ner.formats.reduced_native_format import ReducedNativeFormatter
 
 
-class NEREngineering:
+class NameEngineering:
     """
     Feature engineering for NER dataset to prevent position-based learning
     and encourage sequence characteristic learning.
@@ -66,13 +66,16 @@ class NEREngineering:
     def compute(self) -> None:
         logging.info("Applying feature engineering transformations...")
         input_filepath = self.config.paths.get_data_path(self.config.data.output_files["featured"])
-        output_filepath = self.config.paths.get_data_path(self.config.data.output_files["engineered"])
+        output_filepath = self.config.paths.get_data_path(
+            self.config.data.output_files["engineered"]
+        )
 
         df = self.data_loader.load_csv_complete(input_filepath)
         ner_df = df[df["ner_tagged"] == 1].copy()
         logging.info(f"Loaded {len(ner_df)} NER-tagged records from {len(df)} total records")
 
         del df  # No need to keep in memory
+        gc.collect()
 
         ner_df = ner_df.sample(frac=1, random_state=self.config.data.random_seed).reset_index(
             drop=True
diff --git a/processing/ner/ner_name_model.py b/processing/ner/name_model.py
similarity index 98%
rename from processing/ner/ner_name_model.py
rename to processing/ner/name_model.py
index ab2aa01..8488424 100644
--- a/processing/ner/ner_name_model.py
+++ b/processing/ner/name_model.py
@@ -1,3 +1,4 @@
+import ast
 import json
 import logging
 import os
@@ -11,7 +12,7 @@ from spacy.util import minibatch
 from core.config.pipeline_config import PipelineConfig
 
 
-class NERNameModel:
+class NameModel:
     """NER model trainer using spaCy for DRC names entity recognition"""
 
     def __init__(self, config: PipelineConfig):
@@ -84,8 +85,6 @@ class NERNameModel:
                 if isinstance(entities_raw, str):
                     # String format from tagger: "[(0, 6, 'NATIVE'), ...]"
                     try:
-                        import ast
-
                         entities = ast.literal_eval(entities_raw)
                         if not isinstance(entities, list):
                             logging.warning(
@@ -175,9 +174,9 @@ class NERNameModel:
     def train(
         self,
         data: List[Tuple[str, Dict]],
-        epochs: int = 5,
-        batch_size: int = 16,
-        dropout_rate: float = 0.2,
+        epochs: int = 1,
+        batch_size: int = 10_000,
+        dropout_rate: float = 0.3,
     ) -> None:
         """Train the NER model"""
         logging.info(f"Starting NER training with {len(data)} examples")
@@ -204,7 +203,7 @@ class NERNameModel:
                 example = Example.from_dict(doc, annotations)
                 examples.append(example)
                 logging.info(
-                    f"Training example: {text[:30]}... with entities {annotations.get('entities', [])}"
+                    f"Training example: {text[:30]} with entities {annotations.get('entities', [])}"
                 )
 
             # Train in batches
@@ -215,6 +214,7 @@ class NERNameModel:
                 )
                 logging.info(f"Training batch with {len(batch)} examples, current losses: {losses}")
 
+            del batches  # free memory
             epoch_loss = losses.get("ner", 0)
             losses_history.append(epoch_loss)
             logging.info(f"Epoch {epoch + 1}/{epochs}, Loss: {epoch_loss:.4f}")
diff --git a/processing/ner/name_tagger.py b/processing/ner/name_tagger.py
new file mode 100644
index 0000000..6251ac0
--- /dev/null
+++ b/processing/ner/name_tagger.py
@@ -0,0 +1,273 @@
+from typing import Union, Dict, Any, List
+import ast
+import json
+import logging
+import pandas as pd
+from spacy.util import filter_spans
+
+
+class NameTagger:
+    def tag_name(
+        self, name: str, probable_native: str, probable_surname: str
+    ) -> Union[Dict[str, Any], None]:
+        """Create a single NER training example using probable_native and probable_surname"""
+        if not name or not probable_native or not probable_surname:
+            return None
+
+        name = name.strip()
+        probable_native = probable_native.strip()
+        probable_surname = probable_surname.strip()
+
+        entities = []
+        used_spans = []  # Track used character spans to prevent overlaps
+
+        # Helper function to check if a span overlaps with any existing span
+        def has_overlap(start, end):
+            for used_start, used_end in used_spans:
+                if not (end <= used_start or start >= used_end):
+                    return True
+            return False
+
+        # Find positions of native names in the full name
+        native_words = probable_native.split()
+        name_lower = name.lower()  # Use lowercase for consistent searching
+        processed_native_words = set()
+
+        for native_word in native_words:
+            native_word = native_word.strip()
+            if len(native_word) < 2:  # Skip very short words
+                continue
+
+            native_word_lower = native_word.lower()
+
+            # Skip if we've already processed this exact word
+            if native_word_lower in processed_native_words:
+                continue
+            processed_native_words.add(native_word_lower)
+
+            # Find the first occurrence of this native word that doesn't overlap
+            start_pos = 0
+            while True:
+                pos = name_lower.find(native_word_lower, start_pos)  # Case-insensitive search
+                if pos == -1:
+                    break
+
+                # Calculate end position - make sure we only include the word itself
+                end_pos = pos + len(native_word_lower)
+
+                # Double-check that the extracted span matches exactly what we expect
+                extracted_text = name[pos:end_pos]  # Get original case text
+                if extracted_text.lower() != native_word_lower:
+                    start_pos = pos + 1
+                    continue
+
+                # Check if this is a word boundary match and doesn't overlap
+                if self._is_word_boundary_match(name, pos, end_pos) and not has_overlap(
+                    pos, end_pos
+                ):
+                    entities.append((pos, end_pos, "NATIVE"))
+                    used_spans.append((pos, end_pos))
+                    break  # Only take the first non-overlapping occurrence
+
+                start_pos = pos + 1
+
+        # Find position of surname in the full name
+        if probable_surname and len(probable_surname.strip()) >= 2:
+            surname_lower = probable_surname.lower()
+
+            # Find the first occurrence that doesn't overlap
+            start_pos = 0
+            while True:
+                pos = name_lower.find(surname_lower, start_pos)  # Case-insensitive search
+                if pos == -1:
+                    break
+
+                # Calculate end position correctly - exact match only
+                end_pos = pos + len(surname_lower)
+
+                # Double-check that the extracted span matches exactly what we expect
+                extracted_text = name[pos:end_pos]  # Get original case text
+                if extracted_text.lower() != surname_lower:
+                    start_pos = pos + 1
+                    continue
+
+                if self._is_word_boundary_match(name, pos, end_pos) and not has_overlap(
+                    pos, end_pos
+                ):
+                    entities.append((pos, end_pos, "SURNAME"))
+                    used_spans.append((pos, end_pos))
+                    break
+
+                start_pos = pos + 1
+
+        if not entities:
+            logging.warning(
+                f"No valid entities found for name: '{name}' with native: '{probable_native}' and surname: '{probable_surname}'"
+            )
+            return None
+
+        # Sort entities by position and validate
+        entities.sort(key=lambda x: x[0])
+
+        # Final validation - ensure no overlaps and valid spans
+        validated_entities = []
+        for start, end, label in entities:
+            # Check bounds
+            if not (0 <= start < end <= len(name)):
+                logging.warning(
+                    f"Invalid span bounds ({start}, {end}) for text length {len(name)}: '{name}'"
+                )
+                continue
+
+            # Check for overlaps with already validated entities
+            if any(start < v_end and end > v_start for v_start, v_end, _ in validated_entities):
+                logging.warning(f"Overlapping span ({start}, {end}, '{label}') in '{name}'")
+                continue
+
+            # CRITICAL VALIDATION: Check that the span contains only the expected word (no spaces)
+            span_text = name[start:end]
+            if not span_text or span_text != span_text.strip() or " " in span_text:
+                logging.warning(
+                    f"Span contains spaces or is empty ({start}, {end}) in '{name}': '{span_text}'"
+                )
+                continue
+
+            validated_entities.append((start, end, label))
+
+        if not validated_entities:
+            logging.warning(f"No valid entities after validation for: '{name}'")
+            return None
+
+        # Convert to string format that matches the dataset
+        entities_str = str(validated_entities)
+
+        return {
+            "entities": entities_str,
+            "spans": validated_entities,  # Keep the original tuples for internal use
+        }
+
+    @classmethod
+    def _is_word_boundary_match(cls, text: str, start: int, end: int) -> bool:
+        """Check if the match is at word boundaries"""
+        # Check character before start position
+        if start > 0:
+            prev_char = text[start - 1]
+            if prev_char.isalnum():
+                return False
+
+        # Check character after end position
+        if end < len(text):
+            next_char = text[end]
+            if next_char.isalnum():
+                return False
+
+        return True
+
+    @classmethod
+    def extract_entity_text(cls, name: str, entities_str: str) -> Dict[str, List[str]]:
+        """Extract the actual text for each entity type"""
+        result = {"NATIVE": [], "SURNAME": []}
+
+        try:
+            entities = ast.literal_eval(entities_str)
+
+            for start, end, label in entities:
+                if 0 <= start < end <= len(name):
+                    span_text = name[start:end]
+                    if label in result:
+                        result[label].append(span_text)
+
+        except (ValueError, SyntaxError, TypeError):
+            pass
+
+        return result
+
+    @classmethod
+    def parse(cls, entities_str: str) -> List[tuple]:
+        """Parse entity strings from various formats.
+
+        Supports formats:
+        - [(start, end, label), ...]
+        - [[start, end, label], ...]
+        - [{"start": start, "end": end, "label": label}, ...]
+        """
+        if not entities_str or entities_str in ["[]", "", "nan"]:
+            return []
+        entities_str = str(entities_str).strip()
+        try:
+            if entities_str.startswith("[(") and entities_str.endswith(")]"):
+                return ast.literal_eval(entities_str)
+            elif entities_str.startswith("[[") and entities_str.endswith("]]"):
+                return [tuple(e) for e in ast.literal_eval(entities_str)]
+            elif entities_str.startswith("[{") and entities_str.endswith("}]"):
+                return [(e["start"], e["end"], e["label"]) for e in json.loads(entities_str)]
+            else:
+                parsed = ast.literal_eval(entities_str)
+                return [tuple(e) for e in parsed if isinstance(e, (list, tuple)) and len(e) == 3]
+        except (ValueError, SyntaxError, json.JSONDecodeError):
+            return []
+
+    def parse_entities(self, series: pd.Series) -> pd.Series:
+        """Vectorized parse of entity strings."""
+        return series.map(self.parse)
+
+    @classmethod
+    def validate(cls, text: str, entities: List[tuple]) -> List[tuple]:
+        """Advanced entity validation with overlap removal.
+
+        This is more comprehensive than the basic validate_entities method.
+        """
+        if not entities or not text:
+            return []
+        text = str(text).strip()
+        valid = []
+
+        for ent in entities:
+            if not isinstance(ent, (list, tuple)) or len(ent) != 3:
+                continue
+            start, end, label = ent
+            try:
+                start, end = int(start), int(end)
+            except (ValueError, TypeError):
+                continue
+            if not isinstance(label, str):
+                continue
+            if not (0 <= start < end <= len(text)):
+                continue
+            if not text[start:end].strip():
+                continue
+            valid.append((start, end, label))
+
+        if not valid:
+            return []
+
+        valid.sort(key=lambda x: (x[0], x[1]))
+
+        # Remove overlaps
+        filtered, last_end = [], -1
+        for s, e, l in valid:
+            if s >= last_end:
+                filtered.append((s, e, l))
+                last_end = e
+        return filtered
+
+    def validate_entities(self, texts: pd.Series, entities_series: pd.Series) -> pd.Series:
+        """Vectorized entity validation."""
+        return pd.Series(map(self.validate, texts, entities_series), index=texts.index)
+
+    @classmethod
+    def create_docs(cls, nlp, texts: List[str], entities: List[List[tuple]]) -> List:
+        """Batch create spaCy Docs from texts and entities."""
+        docs = []
+        for text, ents in zip(texts, entities):
+            doc = nlp(text)
+            spans = []
+            for start, end, label in ents:
+                span = doc.char_span(
+                    start, end, label=label, alignment_mode="contract"
+                ) or doc.char_span(start, end, label=label, alignment_mode="strict")
+                if span:
+                    spans.append(span)
+            doc.ents = filter_spans(spans)
+            docs.append(doc)
+        return docs
diff --git a/processing/ner/ner_data_builder.py b/processing/ner/ner_data_builder.py
index 4d1d5cc..e69de29 100644
--- a/processing/ner/ner_data_builder.py
+++ b/processing/ner/ner_data_builder.py
@@ -1,149 +0,0 @@
-import ast
-import json
-import logging
-from pathlib import Path
-
-import pandas as pd
-import spacy
-from spacy.tokens import DocBin
-from spacy.util import filter_spans
-
-from core.config import PipelineConfig
-from core.utils.data_loader import DataLoader
-
-
-class NERDataBuilder:
-    def __init__(self, config: PipelineConfig):
-        self.config = config
-        self.data_loader = DataLoader(config)
-
-    @staticmethod
-    def _parse_entities(series: pd.Series) -> pd.Series:
-        """Vectorized parse of entity strings."""
-
-        def _parse(entities_str):
-            if not entities_str or entities_str in ["[]", "", "nan"]:
-                return []
-            entities_str = str(entities_str).strip()
-            try:
-                if entities_str.startswith("[(") and entities_str.endswith(")]"):
-                    return ast.literal_eval(entities_str)
-                elif entities_str.startswith("[[") and entities_str.endswith("]]"):
-                    return [tuple(e) for e in ast.literal_eval(entities_str)]
-                elif entities_str.startswith("[{") and entities_str.endswith("}]"):
-                    return [(e["start"], e["end"], e["label"]) for e in json.loads(entities_str)]
-                else:
-                    parsed = ast.literal_eval(entities_str)
-                    return [
-                        tuple(e) for e in parsed if isinstance(e, (list, tuple)) and len(e) == 3
-                    ]
-            except (ValueError, SyntaxError, json.JSONDecodeError):
-                return []
-
-        return series.map(_parse)
-
-    @staticmethod
-    def _validate_entities(texts: pd.Series, entities_series: pd.Series) -> pd.Series:
-        """Vectorized entity validation."""
-
-        def _validate(text, entities):
-            if not entities or not text:
-                return []
-            text = str(text).strip()
-            valid = []
-            for ent in entities:
-                if not isinstance(ent, (list, tuple)) or len(ent) != 3:
-                    continue
-                start, end, label = ent
-                try:
-                    start, end = int(start), int(end)
-                except (ValueError, TypeError):
-                    continue
-                if not isinstance(label, str):
-                    continue
-                if not (0 <= start < end <= len(text)):
-                    continue
-                if not text[start:end].strip():
-                    continue
-                valid.append((start, end, label))
-            if not valid:
-                return []
-            valid.sort(key=lambda x: (x[0], x[1]))
-            # remove overlaps
-            filtered, last_end = [], -1
-            for s, e, l in valid:
-                if s >= last_end:
-                    filtered.append((s, e, l))
-                    last_end = e
-            return filtered
-
-        return pd.Series(map(_validate, texts, entities_series), index=texts.index)
-
-    @staticmethod
-    def _create_docs(nlp, texts, entities):
-        """Batch create spaCy Docs."""
-        docs = []
-        for text, ents in zip(texts, entities):
-            doc = nlp(text)
-            spans = []
-            for start, end, label in ents:
-                span = doc.char_span(
-                    start, end, label=label, alignment_mode="contract"
-                ) or doc.char_span(start, end, label=label, alignment_mode="strict")
-                if span:
-                    spans.append(span)
-            doc.ents = filter_spans(spans)
-            docs.append(doc)
-        return docs
-
-    def build(self) -> int:
-        filepath = self.config.paths.get_data_path(self.config.data.output_files["engineered"])
-        df = self.data_loader.load_csv_complete(filepath)
-        df = df[["name", "ner_tagged", "ner_entities"]]
-
-        # Filter early
-        ner_df = df.loc[df["ner_tagged"] == 1, ["name", "ner_entities"]]
-        if ner_df.empty:
-            logging.error("No NER tagged data found")
-            return 1
-
-        total_rows = len(df)
-        del df  # No need to keep in memory
-
-        logging.info(f"Found {len(ner_df)} NER tagged entries")
-        nlp = spacy.blank("fr")
-
-        # Vectorized parsing + validation
-        parsed_entities = self._parse_entities(ner_df["ner_entities"])
-        validated_entities = self._validate_entities(ner_df["name"], parsed_entities)
-
-        # Drop rows with no valid entities
-        mask = validated_entities.map(bool)
-        ner_df = ner_df.loc[mask]
-        validated_entities = validated_entities.loc[mask]
-
-        if ner_df.empty:
-            logging.error("No valid training examples after validation")
-            return 1
-
-        # Prepare training data
-        training_data = list(
-            zip(ner_df["name"].tolist(), [{"entities": ents} for ents in validated_entities])
-        )
-
-        # Create spaCy DocBin in batch
-        docs = self._create_docs(nlp, ner_df["name"].tolist(), validated_entities.tolist())
-        doc_bin = DocBin(docs=docs)
-
-        # Save
-        json_path = self.config.paths.get_data_path(self.config.data.output_files["ner_data"])
-        spacy_path = self.config.paths.get_data_path(self.config.data.output_files["ner_spacy"])
-
-        with open(json_path, "w", encoding="utf-8") as f:
-            json.dump(training_data, f, ensure_ascii=False, separators=(",", ":"))
-        doc_bin.to_disk(spacy_path)
-
-        logging.info(f"Processed: {len(training_data)}, Skipped: {total_rows - len(training_data)}")
-        logging.info(f"Saved NER JSON to {json_path}")
-        logging.info(f"Saved NER spacy to {spacy_path}")
-        return 0
diff --git a/processing/ner/ner_name_tagger.py b/processing/ner/ner_name_tagger.py
index 3eeee9c..e69de29 100644
--- a/processing/ner/ner_name_tagger.py
+++ b/processing/ner/ner_name_tagger.py
@@ -1,212 +0,0 @@
-from typing import Union, Dict, Any, List
-import logging
-
-
-class NERNameTagger:
-    def tag_name(
-        self, name: str, probable_native: str, probable_surname: str
-    ) -> Union[Dict[str, Any], None]:
-        """Create a single NER training example using probable_native and probable_surname"""
-        if not name or not probable_native or not probable_surname:
-            return None
-
-        name = name.strip()
-        probable_native = probable_native.strip()
-        probable_surname = probable_surname.strip()
-
-        entities = []
-        used_spans = []  # Track used character spans to prevent overlaps
-
-        # Helper function to check if a span overlaps with any existing span
-        def has_overlap(start, end):
-            for used_start, used_end in used_spans:
-                if not (end <= used_start or start >= used_end):
-                    return True
-            return False
-
-        # Find positions of native names in the full name
-        native_words = probable_native.split()
-        name_lower = name.lower()  # Use lowercase for consistent searching
-        processed_native_words = set()
-
-        for native_word in native_words:
-            native_word = native_word.strip()
-            if len(native_word) < 2:  # Skip very short words
-                continue
-
-            native_word_lower = native_word.lower()
-
-            # Skip if we've already processed this exact word
-            if native_word_lower in processed_native_words:
-                continue
-            processed_native_words.add(native_word_lower)
-
-            # Find the first occurrence of this native word that doesn't overlap
-            start_pos = 0
-            while True:
-                pos = name_lower.find(native_word_lower, start_pos)  # Case-insensitive search
-                if pos == -1:
-                    break
-
-                # Calculate end position - make sure we only include the word itself
-                end_pos = pos + len(native_word_lower)
-
-                # Double-check that the extracted span matches exactly what we expect
-                extracted_text = name[pos:end_pos]  # Get original case text
-                if extracted_text.lower() != native_word_lower:
-                    start_pos = pos + 1
-                    continue
-
-                # Check if this is a word boundary match and doesn't overlap
-                if self._is_word_boundary_match(name, pos, end_pos) and not has_overlap(
-                    pos, end_pos
-                ):
-                    entities.append((pos, end_pos, "NATIVE"))
-                    used_spans.append((pos, end_pos))
-                    break  # Only take the first non-overlapping occurrence
-
-                start_pos = pos + 1
-
-        # Find position of surname in the full name
-        if probable_surname and len(probable_surname.strip()) >= 2:
-            surname_lower = probable_surname.lower()
-
-            # Find the first occurrence that doesn't overlap
-            start_pos = 0
-            while True:
-                pos = name_lower.find(surname_lower, start_pos)  # Case-insensitive search
-                if pos == -1:
-                    break
-
-                # Calculate end position correctly - exact match only
-                end_pos = pos + len(surname_lower)
-
-                # Double-check that the extracted span matches exactly what we expect
-                extracted_text = name[pos:end_pos]  # Get original case text
-                if extracted_text.lower() != surname_lower:
-                    start_pos = pos + 1
-                    continue
-
-                if self._is_word_boundary_match(name, pos, end_pos) and not has_overlap(
-                    pos, end_pos
-                ):
-                    entities.append((pos, end_pos, "SURNAME"))
-                    used_spans.append((pos, end_pos))
-                    break
-
-                start_pos = pos + 1
-
-        if not entities:
-            logging.warning(
-                f"No valid entities found for name: '{name}' with native: '{probable_native}' and surname: '{probable_surname}'"
-            )
-            return None
-
-        # Sort entities by position and validate
-        entities.sort(key=lambda x: x[0])
-
-        # Final validation - ensure no overlaps and valid spans
-        validated_entities = []
-        for start, end, label in entities:
-            # Check bounds
-            if not (0 <= start < end <= len(name)):
-                logging.warning(
-                    f"Invalid span bounds ({start}, {end}) for text length {len(name)}: '{name}'"
-                )
-                continue
-
-            # Check for overlaps with already validated entities
-            if any(start < v_end and end > v_start for v_start, v_end, _ in validated_entities):
-                logging.warning(f"Overlapping span ({start}, {end}, '{label}') in '{name}'")
-                continue
-
-            # CRITICAL VALIDATION: Check that the span contains only the expected word (no spaces)
-            span_text = name[start:end]
-            if not span_text or span_text != span_text.strip() or " " in span_text:
-                logging.warning(
-                    f"Span contains spaces or is empty ({start}, {end}) in '{name}': '{span_text}'"
-                )
-                continue
-
-            validated_entities.append((start, end, label))
-
-        if not validated_entities:
-            logging.warning(f"No valid entities after validation for: '{name}'")
-            return None
-
-        # Convert to string format that matches the dataset
-        entities_str = str(validated_entities)
-
-        return {
-            "entities": entities_str,
-            "spans": validated_entities,  # Keep the original tuples for internal use
-        }
-
-    @classmethod
-    def _is_word_boundary_match(cls, text: str, start: int, end: int) -> bool:
-        """Check if the match is at word boundaries"""
-        # Check character before start position
-        if start > 0:
-            prev_char = text[start - 1]
-            if prev_char.isalnum():
-                return False
-
-        # Check character after end position
-        if end < len(text):
-            next_char = text[end]
-            if next_char.isalnum():
-                return False
-
-        return True
-
-    @classmethod
-    def validate_entities(cls, name: str, entities_str: str) -> bool:
-        """Validate that entity annotations are correct for a given name"""
-        try:
-            import ast
-
-            entities = ast.literal_eval(entities_str)
-
-            # Check for overlaps and valid bounds
-            sorted_entities = sorted(entities, key=lambda x: x[0])
-
-            for i, (start, end, label) in enumerate(sorted_entities):
-                # Check bounds
-                if not (0 <= start < end <= len(name)):
-                    return False
-
-                # Check for overlaps with next entity
-                if i < len(sorted_entities) - 1:
-                    next_start = sorted_entities[i + 1][0]
-                    if end > next_start:
-                        return False
-
-                # Extract the text span and validate it's not empty
-                span_text = name[start:end]
-                if not span_text.strip():
-                    return False
-
-            return True
-        except (ValueError, SyntaxError, TypeError):
-            return False
-
-    @classmethod
-    def extract_entity_text(cls, name: str, entities_str: str) -> Dict[str, List[str]]:
-        """Extract the actual text for each entity type"""
-        result = {"NATIVE": [], "SURNAME": []}
-
-        try:
-            import ast
-
-            entities = ast.literal_eval(entities_str)
-
-            for start, end, label in entities:
-                if 0 <= start < end <= len(name):
-                    span_text = name[start:end]
-                    if label in result:
-                        result[label].append(span_text)
-
-        except (ValueError, SyntaxError, TypeError):
-            pass
-
-        return result
diff --git a/web/__init__.py b/web/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/web/app.py b/web/app.py
new file mode 100644
index 0000000..9b5b487
--- /dev/null
+++ b/web/app.py
@@ -0,0 +1,89 @@
+#!.venv/bin/python3
+import argparse
+import sys
+from pathlib import Path
+import streamlit as st
+
+# Add parent directory to Python path to access core modules
+parent_dir = Path(__file__).parent.parent
+sys.path.insert(0, str(parent_dir))
+
+from core.config import setup_config, PipelineConfig
+from core.utils.data_loader import DataLoader
+from processing.monitoring.pipeline_monitor import PipelineMonitor
+from research.experiment.experiment_runner import ExperimentRunner
+from research.experiment.experiment_tracker import ExperimentTracker
+from web.interfaces.configuration import Configuration
+from web.interfaces.dashboard import Dashboard
+from web.interfaces.data_overview import DataOverview
+from web.interfaces.data_processing import DataProcessing
+from web.interfaces.experiments import Experiments
+from web.interfaces.predictions import Predictions
+from web.interfaces.results_analysis import ResultsAnalysis
+
+# Page configuration
+st.set_page_config(
+    page_title="DRC NERS Platform",
+    page_icon="🇨🇩",
+    layout="wide",
+    initial_sidebar_state="expanded",
+)
+
+
+def initialize_session_state(config: PipelineConfig):
+    """Initialize session state variables"""
+    if "config" not in st.session_state:
+        st.session_state.config = config
+    if "data_loader" not in st.session_state:
+        st.session_state.data_loader = DataLoader(config)
+    if "experiment_tracker" not in st.session_state:
+        st.session_state.experiment_tracker = ExperimentTracker(config)
+    if "experiment_runner" not in st.session_state:
+        st.session_state.experiment_runner = ExperimentRunner(config)
+    if "pipeline_monitor" not in st.session_state:
+        st.session_state.pipeline_monitor = PipelineMonitor()
+    if "current_experiment" not in st.session_state:
+        st.session_state.current_experiment = None
+    if "experiment_results" not in st.session_state:
+        st.session_state.experiment_results = {}
+
+
+class StreamlitApp:
+    def __init__(self, config: PipelineConfig):
+        self.config = config
+        initialize_session_state(config)
+
+    def run(self):
+        st.title("🇨🇩 DRC NERS Pipeline")
+        st.markdown(
+            "A Culturally-Aware NLP System for Congolese Name Analysis and Gender Inference"
+        )
+
+        st.markdown(
+            """
+            ## Overview
+            Despite the growing success of gender inference models in Natural Language Processing (NLP), these tools often
+            underperform when applied to culturally diverse African contexts due to the lack of culturally-representative training
+            data.
+            This project introduces a comprehensive pipeline for Congolese name analysis with a large-scale dataset of over 5
+            million names from the Democratic Republic of Congo (DRC) annotated with gender and demographic metadata.
+        """
+        )
+
+
+def main():
+    parser = argparse.ArgumentParser(
+        description="DRC NERS Platform",
+        formatter_class=argparse.RawDescriptionHelpFormatter,
+    )
+    parser.add_argument("--config", type=str, help="Path to configuration file")
+    parser.add_argument("--env", type=str, default="development", help="Environment name")
+    args = parser.parse_args()
+
+    config = setup_config(args.config, env=args.env)
+    app = StreamlitApp(config)
+    app.run()
+
+
+if __name__ == "__main__":
+    main()
diff --git a/web/interfaces/__init__.py b/web/interfaces/__init__.py
new file mode 100644
index 0000000..e69de29
diff --git a/interface/configuration.py b/web/interfaces/configuration.py
similarity index 63%
rename from interface/configuration.py
rename to web/interfaces/configuration.py
index bc843e9..712f613 100644
--- a/interface/configuration.py
+++ b/web/interfaces/configuration.py
@@ -2,11 +2,9 @@ import streamlit as st
 
 
 class Configuration:
-    """Handles configuration display and management"""
-
     def __init__(self, config):
         self.config = config
 
     def index(self):
-        st.header("Current Configuration")
+        st.title("Configuration")
         st.json(self.config.model_dump())
diff --git a/interface/dashboard.py b/web/interfaces/dashboard.py
similarity index 98%
rename from interface/dashboard.py
rename to web/interfaces/dashboard.py
index 5287322..804c66b 100644
--- a/interface/dashboard.py
+++ b/web/interfaces/dashboard.py
@@ -20,7 +20,7 @@ class Dashboard:
         self.experiment_runner = experiment_runner
 
     def index(self):
-        st.header("Dashboard")
+        st.title("Dashboard")
         col1, col2, col3, col4 = st.columns(4)
 
         # Load basic statistics
diff --git a/interface/data_overview.py b/web/interfaces/data_overview.py
similarity index 99%
rename from interface/data_overview.py
rename to web/interfaces/data_overview.py
index fc34190..74a3acc 100644
--- a/interface/data_overview.py
+++ b/web/interfaces/data_overview.py
@@ -21,7 +21,7 @@ class DataOverview:
         self.config = config
 
     def index(self):
-        st.header("Data Overview")
+        st.title("Data Overview")
         data_files = {
             "Names": self.config.data.input_file,
             "Featured Dataset": self.config.data.output_files["featured"],
diff --git a/interface/data_processing.py b/web/interfaces/data_processing.py
similarity index 98%
rename from interface/data_processing.py
rename to web/interfaces/data_processing.py
index 4f6093e..1f82d53 100644
--- a/interface/data_processing.py
+++ b/web/interfaces/data_processing.py
@@ -3,7 +3,7 @@ import plotly.express as px
 import streamlit as st
 
 from core.utils.data_loader import OPTIMIZED_DTYPES
-from interface.log_reader import LogReader
+from web.interfaces.log_reader import LogReader
 
 
 @st.cache_data
@@ -21,7 +21,7 @@ class DataProcessing:
         self.pipeline_monitor = pipeline_monitor
 
     def index(self):
-        st.header("Data Processing Pipeline")
+        st.title("Data Processing")
         status = self.pipeline_monitor.get_pipeline_status()
 
         # Overall progress
diff --git a/interface/experiments.py b/web/interfaces/experiments.py
similarity index 99%
rename from interface/experiments.py
rename to web/interfaces/experiments.py
index aa519ec..dd258c2 100644
--- a/interface/experiments.py
+++ b/web/interfaces/experiments.py
@@ -12,8 +12,6 @@ from research.model_registry import list_available_models
 
 
 class Experiments:
-    """Handles experiment management interface"""
-
     def __init__(
         self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
     ):
@@ -22,8 +20,7 @@ class Experiments:
         self.experiment_runner = experiment_runner
 
     def index(self):
-        """Main experiments page"""
-        st.header("Experiment Management")
+        st.title("Experiments")
         tab1, tab2, tab3 = st.tabs(["New Experiment", "Experiment List", "Batch Experiments"])
 
         with tab1:
diff --git a/interface/log_reader.py b/web/interfaces/log_reader.py
similarity index 100%
rename from interface/log_reader.py
rename to web/interfaces/log_reader.py
diff --git a/interface/predictions.py b/web/interfaces/predictions.py
similarity index 99%
rename from interface/predictions.py
rename to web/interfaces/predictions.py
index 56bbf99..b3804d1 100644
--- a/interface/predictions.py
+++ b/web/interfaces/predictions.py
@@ -12,8 +12,6 @@ from research.experiment.experiment_tracker import ExperimentTracker
 
 
 class Predictions:
-    """Handles prediction interface"""
-
     def __init__(
         self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
     ):
@@ -22,8 +20,7 @@ class Predictions:
         self.experiment_runner = experiment_runner
 
     def index(self):
-        """Main predictions page"""
-        st.header("Make Predictions")
+        st.title("Predictions")
 
         # Load available models
         experiments = self.experiment_tracker.list_experiments()
diff --git a/interface/results_analysis.py b/web/interfaces/results_analysis.py
similarity index 98%
rename from interface/results_analysis.py
rename to web/interfaces/results_analysis.py
index 22123f9..aa3d52c 100644
--- a/interface/results_analysis.py
+++ b/web/interfaces/results_analysis.py
@@ -11,8 +11,6 @@ from research.experiment.experiment_tracker import ExperimentTracker
 
 
 class ResultsAnalysis:
-    """Handles experiment results and analysis interface"""
-
     def __init__(
         self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
     ):
@@ -21,8 +19,7 @@ class ResultsAnalysis:
         self.experiment_runner = experiment_runner
 
     def index(self):
-        """Main results analysis page"""
-        st.header("Results & Analysis")
+        st.title("Results & Analysis")
         tab1, tab2, tab3 = st.tabs(
             ["Experiment Comparison", "Performance Analysis", "Model Analysis"]
         )
diff --git a/web/pages/1_📊_Dashboard.py b/web/pages/1_📊_Dashboard.py
new file mode 100644
index 0000000..3cb186c
--- /dev/null
+++ b/web/pages/1_📊_Dashboard.py
@@ -0,0 +1,22 @@
+import sys
+from pathlib import Path
+import streamlit as st
+
+# Add parent directory to Python path to access core modules
+parent_dir = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(parent_dir))
+
+from web.interfaces.dashboard import Dashboard
+
+st.set_page_config(page_title="Dashboard", page_icon="📊", layout="wide")
+
+if "config" in st.session_state:
+    dashboard = Dashboard(
+        st.session_state.config,
+        st.session_state.experiment_tracker,
+        st.session_state.experiment_runner,
+    )
+    dashboard.index()
+else:
+    st.error("Please run the main app first to initialize the configuration.")
+    st.markdown("Go back to the [main page](/) to start the application.")
diff --git a/web/pages/2_📋_Data_Overview.py b/web/pages/2_📋_Data_Overview.py
new file mode 100644
index 0000000..8a520e1
--- /dev/null
+++ b/web/pages/2_📋_Data_Overview.py
@@ -0,0 +1,18 @@
+import sys
+from pathlib import Path
+import streamlit as st
+
+# Add parent directory to Python path to access core modules
+parent_dir = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(parent_dir))
+
+from web.interfaces.data_overview import DataOverview
+
+st.set_page_config(page_title="Data Overview", page_icon="📋", layout="wide")
+
+if "config" in st.session_state:
+    data_overview = DataOverview(st.session_state.config)
+    data_overview.index()
+else:
+    st.error("Please run the main app first to initialize the configuration.")
+    st.markdown("Go back to the [main page](/) to start the application.")
diff --git a/web/pages/3_⚙️_Data_Processing.py b/web/pages/3_⚙️_Data_Processing.py
new file mode 100644
index 0000000..d028daf
--- /dev/null
+++ b/web/pages/3_⚙️_Data_Processing.py
@@ -0,0 +1,18 @@
+import sys
+from pathlib import Path
+import streamlit as st
+
+# Add parent directory to Python path to access core modules
+parent_dir = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(parent_dir))
+
+from web.interfaces.data_processing import DataProcessing
+
+st.set_page_config(page_title="Data Processing", page_icon="⚙️", layout="wide")
+
+if "config" in st.session_state:
+    data_processing = DataProcessing(st.session_state.config, st.session_state.pipeline_monitor)
+    data_processing.index()
+else:
+    st.error("Please run the main app first to initialize the configuration.")
+    st.markdown("Go back to the [main page](/) to start the application.")
diff --git a/web/pages/4_🧪_Experiments.py b/web/pages/4_🧪_Experiments.py
new file mode 100644
index 0000000..880b5d9
--- /dev/null
+++ b/web/pages/4_🧪_Experiments.py
@@ -0,0 +1,22 @@
+import sys
+from pathlib import Path
+import streamlit as st
+
+# Add parent directory to Python path to access core modules
+parent_dir = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(parent_dir))
+
+from web.interfaces.experiments import Experiments
+
+st.set_page_config(page_title="Experiments", page_icon="🧪", layout="wide")
+
+if "config" in st.session_state:
+    experiments = Experiments(
+        st.session_state.config,
+        st.session_state.experiment_tracker,
+        st.session_state.experiment_runner,
+    )
+    experiments.index()
+else:
+    st.error("Please run the main app first to initialize the configuration.")
+    st.markdown("Go back to the [main page](/) to start the application.")
diff --git a/web/pages/5_📈_Results_Analysis.py b/web/pages/5_📈_Results_Analysis.py
new file mode 100644
index 0000000..593dc8a
--- /dev/null
+++ b/web/pages/5_📈_Results_Analysis.py
@@ -0,0 +1,22 @@
+import sys
+from pathlib import Path
+import streamlit as st
+
+# Add parent directory to Python path to access core modules
+parent_dir = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(parent_dir))
+
+from web.interfaces.results_analysis import ResultsAnalysis
+
+st.set_page_config(page_title="Results & Analysis", page_icon="📈", layout="wide")
+
+if "config" in st.session_state:
+    results_analysis = ResultsAnalysis(
+        st.session_state.config,
+        st.session_state.experiment_tracker,
+        st.session_state.experiment_runner,
+    )
+    results_analysis.index()
+else:
+    st.error("Please run the main app first to initialize the configuration.")
+    st.markdown("Go back to the [main page](/) to start the application.")
diff --git a/web/pages/6_🔮_Predictions.py b/web/pages/6_🔮_Predictions.py
new file mode 100644
index 0000000..1fa3a2a
--- /dev/null
+++ b/web/pages/6_🔮_Predictions.py
@@ -0,0 +1,22 @@
+import sys
+from pathlib import Path
+import streamlit as st
+
+# Add parent directory to Python path to access core modules
+parent_dir = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(parent_dir))
+
+from web.interfaces.predictions import Predictions
+
+st.set_page_config(page_title="Predictions", page_icon="🔮", layout="wide")
+
+if "config" in st.session_state:
+    predictions = Predictions(
+        st.session_state.config,
+        st.session_state.experiment_tracker,
+        st.session_state.experiment_runner,
+    )
+    predictions.index()
+else:
+    st.error("Please run the main app first to initialize the configuration.")
+    st.markdown("Go back to the [main page](/) to start the application.")
diff --git a/web/pages/7_⚙️_Configuration.py b/web/pages/7_⚙️_Configuration.py
new file mode 100644
index 0000000..abd2f8e
--- /dev/null
+++ b/web/pages/7_⚙️_Configuration.py
@@ -0,0 +1,18 @@
+import sys
+from pathlib import Path
+import streamlit as st
+
+# Add parent directory to Python path to access core modules
+parent_dir = Path(__file__).parent.parent.parent
+sys.path.insert(0, str(parent_dir))
+
+from web.interfaces.configuration import Configuration
+
+st.set_page_config(page_title="Configuration", page_icon="⚙️", layout="wide")
+
+if "config" in st.session_state:
+    configuration = Configuration(st.session_state.config)
+    configuration.index()
+else:
+    st.error("Please run the main app first to initialize the configuration.")
+    st.markdown("Go back to the [main page](/) to start the application.")