Files
drc-ners-nlp/web/app.py
T
2025-08-16 20:34:45 +02:00

82 lines
2.9 KiB
Python

#!.venv/bin/python3
import argparse
import sys
from pathlib import Path
import streamlit as st
# Add parent directory to Python path to access core modules
parent_dir = Path(__file__).parent.parent
sys.path.insert(0, str(parent_dir))
from core.config import setup_config, PipelineConfig
from core.utils.data_loader import DataLoader
from processing.monitoring.pipeline_monitor import PipelineMonitor
from research.experiment.experiment_runner import ExperimentRunner
from research.experiment.experiment_tracker import ExperimentTracker
# Page configuration
st.set_page_config(
page_title="DRC NERS Platform",
page_icon="🇨🇩",
layout="wide",
initial_sidebar_state="expanded",
)
def initialize_session_state(config: PipelineConfig):
"""Initialize session state variables"""
if "config" not in st.session_state:
st.session_state.config = config
if "data_loader" not in st.session_state:
st.session_state.data_loader = DataLoader(config)
if "experiment_tracker" not in st.session_state:
st.session_state.experiment_tracker = ExperimentTracker(config)
if "experiment_runner" not in st.session_state:
st.session_state.experiment_runner = ExperimentRunner(config)
if "pipeline_monitor" not in st.session_state:
st.session_state.pipeline_monitor = PipelineMonitor()
if "current_experiment" not in st.session_state:
st.session_state.current_experiment = None
if "experiment_results" not in st.session_state:
st.session_state.experiment_results = {}
class StreamlitApp:
def __init__(self, config: PipelineConfig):
self.config = config
initialize_session_state(config)
@classmethod
def run(cls):
st.title("🇨🇩 DRC NERS Platform")
st.markdown("A Culturally-Aware NLP System for Congolese Name Analysis and Gender Inference")
st.markdown(
"""
## Overview
Despite the growing success of gender inference models in Natural Language Processing (NLP), these tools often
underperform when applied to culturally diverse African contexts due to the lack of culturally-representative training
data.
This project introduces a comprehensive pipeline for Congolese name analysis with a large-scale dataset of over 5
million names from the Democratic Republic of Congo (DRC) annotated with gender and demographic metadata.
"""
)
def main():
parser = argparse.ArgumentParser(
description="DRC NERS Platform",
formatter_class=argparse.RawDescriptionHelpFormatter,
)
parser.add_argument("--config", type=str, help="Path to configuration file")
parser.add_argument("--env", type=str, default="development", help="Environment name")
args = parser.parse_args()
config = setup_config(args.config, env=args.env)
app = StreamlitApp(config)
app.run()
if __name__ == "__main__":
main()