refactor: clean up imports and improve gender normalization method
This commit is contained in:
+23
-18
@@ -23,7 +23,7 @@ class RegionMapper:
|
||||
"bandundu",
|
||||
"katanga",
|
||||
"equateur",
|
||||
"province-orientale",
|
||||
"orientale",
|
||||
"maniema",
|
||||
"nord-kivu",
|
||||
"sud-kivu",
|
||||
@@ -70,6 +70,11 @@ REGION_MAPPING: Dict[str, Tuple[str, str]] = {
|
||||
"mai-ndombe-2": ("MAI-NDOMBE", "BANDUNDU"),
|
||||
"mai-ndombe-3": ("MAI-NDOMBE", "BANDUNDU"),
|
||||
# Katanga → HAUT-KATANGA, HAUT-LOMAMI, LUALABA, TANGANYIKA
|
||||
"katanga": ("KATANGA", "KATANGA"),
|
||||
"katanga-1": ("KATANGA", "KATANGA"),
|
||||
"katanga-2": ("KATANGA", "KATANGA"),
|
||||
"katanga-3": ("KATANGA", "KATANGA"),
|
||||
"katanga-4": ("KATANGA", "KATANGA"),
|
||||
"haut-katanga": ("HAUT-KATANGA", "KATANGA"),
|
||||
"haut-katanga-1": ("HAUT-KATANGA", "KATANGA"),
|
||||
"haut-katanga-2": ("HAUT-KATANGA", "KATANGA"),
|
||||
@@ -103,23 +108,23 @@ REGION_MAPPING: Dict[str, Tuple[str, str]] = {
|
||||
"tshuapa-1": ("TSHUAPA", "EQUATEUR"),
|
||||
"tshuapa-2": ("TSHUAPA", "EQUATEUR"),
|
||||
# Province-Orientale
|
||||
"province-orientale": ("PROVINCE-ORIENTALE", "PROVINCE-ORIENTALE"),
|
||||
"province-orientale-1": ("PROVINCE-ORIENTALE", "PROVINCE-ORIENTALE"),
|
||||
"province-orientale-2": ("PROVINCE-ORIENTALE", "PROVINCE-ORIENTALE"),
|
||||
"province-orientale-3": ("PROVINCE-ORIENTALE", "PROVINCE-ORIENTALE"),
|
||||
"province-orientale-4": ("PROVINCE-ORIENTALE", "PROVINCE-ORIENTALE"),
|
||||
"haut-uele": ("HAUT-UELE", "PROVINCE-ORIENTALE"),
|
||||
"haut-uele-1": ("HAUT-UELE", "PROVINCE-ORIENTALE"),
|
||||
"haut-uele-2": ("HAUT-UELE", "PROVINCE-ORIENTALE"),
|
||||
"bas-uele": ("BAS-UELE", "PROVINCE-ORIENTALE"),
|
||||
"bas-uele-1": ("BAS-UELE", "PROVINCE-ORIENTALE"),
|
||||
"bas-uele-2": ("BAS-UELE", "PROVINCE-ORIENTALE"),
|
||||
"ituri": ("ITURI", "PROVINCE-ORIENTALE"),
|
||||
"ituri-1": ("ITURI", "PROVINCE-ORIENTALE"),
|
||||
"ituri-2": ("ITURI", "PROVINCE-ORIENTALE"),
|
||||
"tshopo": ("TSHOPO", "PROVINCE-ORIENTALE"),
|
||||
"tshopo-1": ("TSHOPO", "PROVINCE-ORIENTALE"),
|
||||
"tshopo-2": ("TSHOPO", "PROVINCE-ORIENTALE"),
|
||||
"province-orientale": ("ORIENTALE", "ORIENTALE"),
|
||||
"province-orientale-1": ("ORIENTALE", "ORIENTALE"),
|
||||
"province-orientale-2": ("ORIENTALE", "ORIENTALE"),
|
||||
"province-orientale-3": ("ORIENTALE", "ORIENTALE"),
|
||||
"province-orientale-4": ("ORIENTALE", "ORIENTALE"),
|
||||
"haut-uele": ("HAUT-UELE", "ORIENTALE"),
|
||||
"haut-uele-1": ("HAUT-UELE", "ORIENTALE"),
|
||||
"haut-uele-2": ("HAUT-UELE", "ORIENTALE"),
|
||||
"bas-uele": ("BAS-UELE", "ORIENTALE"),
|
||||
"bas-uele-1": ("BAS-UELE", "ORIENTALE"),
|
||||
"bas-uele-2": ("BAS-UELE", "ORIENTALE"),
|
||||
"ituri": ("ITURI", "ORIENTALE"),
|
||||
"ituri-1": ("ITURI", "ORIENTALE"),
|
||||
"ituri-2": ("ITURI", "ORIENTALE"),
|
||||
"tshopo": ("TSHOPO", "ORIENTALE"),
|
||||
"tshopo-1": ("TSHOPO", "ORIENTALE"),
|
||||
"tshopo-2": ("TSHOPO", "ORIENTALE"),
|
||||
# Maniema
|
||||
"maniema": ("MANIEMA", "MANIEMA"),
|
||||
"maniema-1": ("MANIEMA", "MANIEMA"),
|
||||
|
||||
Vendored
+193
-60
File diff suppressed because one or more lines are too long
@@ -9,7 +9,7 @@ import pandas as pd
|
||||
from pydantic import BaseModel
|
||||
|
||||
from core.config.pipeline_config import PipelineConfig
|
||||
from core.utils.data_loader import OPTIMIZED_DTYPES, DataLoader
|
||||
from core.utils.data_loader import DataLoader
|
||||
from processing.batch.batch_config import BatchConfig
|
||||
|
||||
|
||||
|
||||
@@ -150,7 +150,8 @@ class FeatureExtractionStep(PipelineStep):
|
||||
except Exception as e:
|
||||
logging.warning(f"NER tagging failed for row {idx}: {e}")
|
||||
|
||||
def _normalize_gender(self, series: pd.Series) -> pd.Series:
|
||||
@classmethod
|
||||
def _normalize_gender(cls, series: pd.Series) -> pd.Series:
|
||||
gender_mapping = {
|
||||
"m": "m",
|
||||
"male": "m",
|
||||
|
||||
@@ -21,7 +21,7 @@ class Dashboard:
|
||||
|
||||
def index(self):
|
||||
st.title("Dashboard")
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
col1, col2, col3, col4, col5 = st.columns(5)
|
||||
|
||||
# Load basic statistics
|
||||
try:
|
||||
@@ -44,7 +44,12 @@ class Dashboard:
|
||||
if "sex" in df.columns:
|
||||
gender_dist = df["sex"].value_counts()
|
||||
ratio = gender_dist.get("f", 0) / max(gender_dist.get("m", 1), 1)
|
||||
st.metric("F/M Ratio", f"{ratio:.2f}")
|
||||
st.metric("F/M Rate", f"{ratio:.2%}")
|
||||
with col5:
|
||||
if "annotated" in df.columns:
|
||||
annotated = (df.get("annotated", 0) == 1).sum()
|
||||
ratio = annotated / len(df) if len(df) > 0 else 0
|
||||
st.metric("Annotation Rate", f"{ratio:.2%}")
|
||||
else:
|
||||
st.warning("No processed data found. Please run data processing first.")
|
||||
|
||||
|
||||
Reference in New Issue
Block a user