Files
drc-ners-nlp/web/app.py
T

90 lines
3.3 KiB
Python

#!.venv/bin/python3
import argparse
import sys
from pathlib import Path
import streamlit as st
# Add parent directory to Python path to access core modules
parent_dir = Path(__file__).parent.parent
sys.path.insert(0, str(parent_dir))
from core.config import setup_config, PipelineConfig
from core.utils.data_loader import DataLoader
from processing.monitoring.pipeline_monitor import PipelineMonitor
from research.experiment.experiment_runner import ExperimentRunner
from research.experiment.experiment_tracker import ExperimentTracker
from web.interfaces.configuration import Configuration
from web.interfaces.dashboard import Dashboard
from web.interfaces.data_overview import DataOverview
from web.interfaces.data_processing import DataProcessing
from web.interfaces.experiments import Experiments
from web.interfaces.predictions import Predictions
from web.interfaces.results_analysis import ResultsAnalysis
# Page configuration
st.set_page_config(
page_title="DRC NERS Platform",
page_icon="🇨🇩",
layout="wide",
initial_sidebar_state="expanded",
)
def initialize_session_state(config: PipelineConfig):
"""Initialize session state variables"""
if "config" not in st.session_state:
st.session_state.config = config
if "data_loader" not in st.session_state:
st.session_state.data_loader = DataLoader(config)
if "experiment_tracker" not in st.session_state:
st.session_state.experiment_tracker = ExperimentTracker(config)
if "experiment_runner" not in st.session_state:
st.session_state.experiment_runner = ExperimentRunner(config)
if "pipeline_monitor" not in st.session_state:
st.session_state.pipeline_monitor = PipelineMonitor()
if "current_experiment" not in st.session_state:
st.session_state.current_experiment = None
if "experiment_results" not in st.session_state:
st.session_state.experiment_results = {}
class StreamlitApp:
def __init__(self, config: PipelineConfig):
self.config = config
initialize_session_state(config)
def run(self):
st.title("🇨🇩 DRC NERS Pipeline")
st.markdown(
"A Culturally-Aware NLP System for Congolese Name Analysis and Gender Inference"
)
st.markdown(
"""
## Overview
Despite the growing success of gender inference models in Natural Language Processing (NLP), these tools often
underperform when applied to culturally diverse African contexts due to the lack of culturally-representative training
data.
This project introduces a comprehensive pipeline for Congolese name analysis with a large-scale dataset of over 5
million names from the Democratic Republic of Congo (DRC) annotated with gender and demographic metadata.
"""
)
def main():
parser = argparse.ArgumentParser(
description="DRC NERS Platform",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("--config", type=str, help="Path to configuration file")
parser.add_argument("--env", type=str, default="development", help="Environment name")
args = parser.parse_args()
config = setup_config(args.config, env=args.env)
app = StreamlitApp(config)
app.run()
if __name__ == "__main__":
main()