refactor: clean up imports and improve gender normalization method
This commit is contained in:
+23
-18
@@ -23,7 +23,7 @@ class RegionMapper:
|
|||||||
"bandundu",
|
"bandundu",
|
||||||
"katanga",
|
"katanga",
|
||||||
"equateur",
|
"equateur",
|
||||||
"province-orientale",
|
"orientale",
|
||||||
"maniema",
|
"maniema",
|
||||||
"nord-kivu",
|
"nord-kivu",
|
||||||
"sud-kivu",
|
"sud-kivu",
|
||||||
@@ -70,6 +70,11 @@ REGION_MAPPING: Dict[str, Tuple[str, str]] = {
|
|||||||
"mai-ndombe-2": ("MAI-NDOMBE", "BANDUNDU"),
|
"mai-ndombe-2": ("MAI-NDOMBE", "BANDUNDU"),
|
||||||
"mai-ndombe-3": ("MAI-NDOMBE", "BANDUNDU"),
|
"mai-ndombe-3": ("MAI-NDOMBE", "BANDUNDU"),
|
||||||
# Katanga → HAUT-KATANGA, HAUT-LOMAMI, LUALABA, TANGANYIKA
|
# Katanga → HAUT-KATANGA, HAUT-LOMAMI, LUALABA, TANGANYIKA
|
||||||
|
"katanga": ("KATANGA", "KATANGA"),
|
||||||
|
"katanga-1": ("KATANGA", "KATANGA"),
|
||||||
|
"katanga-2": ("KATANGA", "KATANGA"),
|
||||||
|
"katanga-3": ("KATANGA", "KATANGA"),
|
||||||
|
"katanga-4": ("KATANGA", "KATANGA"),
|
||||||
"haut-katanga": ("HAUT-KATANGA", "KATANGA"),
|
"haut-katanga": ("HAUT-KATANGA", "KATANGA"),
|
||||||
"haut-katanga-1": ("HAUT-KATANGA", "KATANGA"),
|
"haut-katanga-1": ("HAUT-KATANGA", "KATANGA"),
|
||||||
"haut-katanga-2": ("HAUT-KATANGA", "KATANGA"),
|
"haut-katanga-2": ("HAUT-KATANGA", "KATANGA"),
|
||||||
@@ -103,23 +108,23 @@ REGION_MAPPING: Dict[str, Tuple[str, str]] = {
|
|||||||
"tshuapa-1": ("TSHUAPA", "EQUATEUR"),
|
"tshuapa-1": ("TSHUAPA", "EQUATEUR"),
|
||||||
"tshuapa-2": ("TSHUAPA", "EQUATEUR"),
|
"tshuapa-2": ("TSHUAPA", "EQUATEUR"),
|
||||||
# Province-Orientale
|
# Province-Orientale
|
||||||
"province-orientale": ("PROVINCE-ORIENTALE", "PROVINCE-ORIENTALE"),
|
"province-orientale": ("ORIENTALE", "ORIENTALE"),
|
||||||
"province-orientale-1": ("PROVINCE-ORIENTALE", "PROVINCE-ORIENTALE"),
|
"province-orientale-1": ("ORIENTALE", "ORIENTALE"),
|
||||||
"province-orientale-2": ("PROVINCE-ORIENTALE", "PROVINCE-ORIENTALE"),
|
"province-orientale-2": ("ORIENTALE", "ORIENTALE"),
|
||||||
"province-orientale-3": ("PROVINCE-ORIENTALE", "PROVINCE-ORIENTALE"),
|
"province-orientale-3": ("ORIENTALE", "ORIENTALE"),
|
||||||
"province-orientale-4": ("PROVINCE-ORIENTALE", "PROVINCE-ORIENTALE"),
|
"province-orientale-4": ("ORIENTALE", "ORIENTALE"),
|
||||||
"haut-uele": ("HAUT-UELE", "PROVINCE-ORIENTALE"),
|
"haut-uele": ("HAUT-UELE", "ORIENTALE"),
|
||||||
"haut-uele-1": ("HAUT-UELE", "PROVINCE-ORIENTALE"),
|
"haut-uele-1": ("HAUT-UELE", "ORIENTALE"),
|
||||||
"haut-uele-2": ("HAUT-UELE", "PROVINCE-ORIENTALE"),
|
"haut-uele-2": ("HAUT-UELE", "ORIENTALE"),
|
||||||
"bas-uele": ("BAS-UELE", "PROVINCE-ORIENTALE"),
|
"bas-uele": ("BAS-UELE", "ORIENTALE"),
|
||||||
"bas-uele-1": ("BAS-UELE", "PROVINCE-ORIENTALE"),
|
"bas-uele-1": ("BAS-UELE", "ORIENTALE"),
|
||||||
"bas-uele-2": ("BAS-UELE", "PROVINCE-ORIENTALE"),
|
"bas-uele-2": ("BAS-UELE", "ORIENTALE"),
|
||||||
"ituri": ("ITURI", "PROVINCE-ORIENTALE"),
|
"ituri": ("ITURI", "ORIENTALE"),
|
||||||
"ituri-1": ("ITURI", "PROVINCE-ORIENTALE"),
|
"ituri-1": ("ITURI", "ORIENTALE"),
|
||||||
"ituri-2": ("ITURI", "PROVINCE-ORIENTALE"),
|
"ituri-2": ("ITURI", "ORIENTALE"),
|
||||||
"tshopo": ("TSHOPO", "PROVINCE-ORIENTALE"),
|
"tshopo": ("TSHOPO", "ORIENTALE"),
|
||||||
"tshopo-1": ("TSHOPO", "PROVINCE-ORIENTALE"),
|
"tshopo-1": ("TSHOPO", "ORIENTALE"),
|
||||||
"tshopo-2": ("TSHOPO", "PROVINCE-ORIENTALE"),
|
"tshopo-2": ("TSHOPO", "ORIENTALE"),
|
||||||
# Maniema
|
# Maniema
|
||||||
"maniema": ("MANIEMA", "MANIEMA"),
|
"maniema": ("MANIEMA", "MANIEMA"),
|
||||||
"maniema-1": ("MANIEMA", "MANIEMA"),
|
"maniema-1": ("MANIEMA", "MANIEMA"),
|
||||||
|
|||||||
Vendored
+193
-60
File diff suppressed because one or more lines are too long
@@ -9,7 +9,7 @@ import pandas as pd
|
|||||||
from pydantic import BaseModel
|
from pydantic import BaseModel
|
||||||
|
|
||||||
from core.config.pipeline_config import PipelineConfig
|
from core.config.pipeline_config import PipelineConfig
|
||||||
from core.utils.data_loader import OPTIMIZED_DTYPES, DataLoader
|
from core.utils.data_loader import DataLoader
|
||||||
from processing.batch.batch_config import BatchConfig
|
from processing.batch.batch_config import BatchConfig
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -150,7 +150,8 @@ class FeatureExtractionStep(PipelineStep):
|
|||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.warning(f"NER tagging failed for row {idx}: {e}")
|
logging.warning(f"NER tagging failed for row {idx}: {e}")
|
||||||
|
|
||||||
def _normalize_gender(self, series: pd.Series) -> pd.Series:
|
@classmethod
|
||||||
|
def _normalize_gender(cls, series: pd.Series) -> pd.Series:
|
||||||
gender_mapping = {
|
gender_mapping = {
|
||||||
"m": "m",
|
"m": "m",
|
||||||
"male": "m",
|
"male": "m",
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ class Dashboard:
|
|||||||
|
|
||||||
def index(self):
|
def index(self):
|
||||||
st.title("Dashboard")
|
st.title("Dashboard")
|
||||||
col1, col2, col3, col4 = st.columns(4)
|
col1, col2, col3, col4, col5 = st.columns(5)
|
||||||
|
|
||||||
# Load basic statistics
|
# Load basic statistics
|
||||||
try:
|
try:
|
||||||
@@ -44,7 +44,12 @@ class Dashboard:
|
|||||||
if "sex" in df.columns:
|
if "sex" in df.columns:
|
||||||
gender_dist = df["sex"].value_counts()
|
gender_dist = df["sex"].value_counts()
|
||||||
ratio = gender_dist.get("f", 0) / max(gender_dist.get("m", 1), 1)
|
ratio = gender_dist.get("f", 0) / max(gender_dist.get("m", 1), 1)
|
||||||
st.metric("F/M Ratio", f"{ratio:.2f}")
|
st.metric("F/M Rate", f"{ratio:.2%}")
|
||||||
|
with col5:
|
||||||
|
if "annotated" in df.columns:
|
||||||
|
annotated = (df.get("annotated", 0) == 1).sum()
|
||||||
|
ratio = annotated / len(df) if len(df) > 0 else 0
|
||||||
|
st.metric("Annotation Rate", f"{ratio:.2%}")
|
||||||
else:
|
else:
|
||||||
st.warning("No processed data found. Please run data processing first.")
|
st.warning("No processed data found. Please run data processing first.")
|
||||||
|
|
||||||
|
|||||||
Reference in New Issue
Block a user