feat: add osm data
This commit is contained in:
+3
-1
@@ -50,7 +50,9 @@ class StreamlitApp:
|
||||
@classmethod
|
||||
def run(cls):
|
||||
st.title("🇨🇩 DRC NERS Platform")
|
||||
st.markdown("A Culturally-Aware NLP System for Congolese Name Analysis and Gender Inference")
|
||||
st.markdown(
|
||||
"A Culturally-Aware NLP System for Congolese Name Analysis and Gender Inference"
|
||||
)
|
||||
st.markdown(
|
||||
"""
|
||||
## Overview
|
||||
|
||||
@@ -1,2 +1 @@
|
||||
from .ner_testing import NERTesting
|
||||
|
||||
|
||||
@@ -13,10 +13,10 @@ from research.model_registry import list_available_models
|
||||
|
||||
class Experiments:
|
||||
def __init__(
|
||||
self,
|
||||
config: PipelineConfig,
|
||||
experiment_tracker: ExperimentTracker,
|
||||
experiment_runner: ExperimentRunner
|
||||
self,
|
||||
config: PipelineConfig,
|
||||
experiment_tracker: ExperimentTracker,
|
||||
experiment_runner: ExperimentRunner,
|
||||
):
|
||||
self.config = config
|
||||
self.experiment_tracker = experiment_tracker
|
||||
@@ -26,8 +26,7 @@ class Experiments:
|
||||
def index(self):
|
||||
st.title("Experiments")
|
||||
|
||||
tab1, tab2, tab3 = st.tabs(
|
||||
["Templates", "Experiments", "Batch Experiments"])
|
||||
tab1, tab2, tab3 = st.tabs(["Templates", "Experiments", "Batch Experiments"])
|
||||
|
||||
with tab1:
|
||||
self.show_template_experiments()
|
||||
@@ -56,14 +55,18 @@ class Experiments:
|
||||
self._show_experiments_by_type(available_experiments["advanced"], "advanced")
|
||||
|
||||
with exp_tabs[2]:
|
||||
self._show_experiments_by_type(available_experiments["feature_study"], "feature_study")
|
||||
self._show_experiments_by_type(
|
||||
available_experiments["feature_study"], "feature_study"
|
||||
)
|
||||
|
||||
with exp_tabs[3]:
|
||||
self._show_experiments_by_type(available_experiments["tuning"], "tuning")
|
||||
|
||||
except Exception as e:
|
||||
st.error(f"Error loading experiment templates: {e}")
|
||||
st.info("Make sure the research templates file exists at `config/research_templates.yaml`")
|
||||
st.info(
|
||||
"Make sure the research templates file exists at `config/research_templates.yaml`"
|
||||
)
|
||||
|
||||
def _show_experiments_by_type(self, experiments: List[Dict], experiment_type: str):
|
||||
"""Show experiments for a specific type"""
|
||||
@@ -142,7 +145,7 @@ class Experiments:
|
||||
# Display experiments
|
||||
for i, exp in enumerate(experiments):
|
||||
with st.expander(
|
||||
f"{exp.config.name} - {exp.status.value} - {exp.start_time.strftime('%Y-%m-%d %H:%M')}"
|
||||
f"{exp.config.name} - {exp.status.value} - {exp.start_time.strftime('%Y-%m-%d %H:%M')}"
|
||||
):
|
||||
self._display_experiment_details(exp, i)
|
||||
|
||||
@@ -213,7 +216,7 @@ class Experiments:
|
||||
experiment_types = st.multiselect(
|
||||
"Select Experiment Types",
|
||||
["baseline", "advanced", "feature_study", "tuning"],
|
||||
default=["baseline"]
|
||||
default=["baseline"],
|
||||
)
|
||||
|
||||
if experiment_types:
|
||||
@@ -223,11 +226,11 @@ class Experiments:
|
||||
experiments = available_experiments.get(exp_type, [])
|
||||
if experiments:
|
||||
st.write(f"**{exp_type.title()} Experiments:**")
|
||||
exp_names = [exp.get("name", f"Exp {i}") for i, exp in enumerate(experiments)]
|
||||
exp_names = [
|
||||
exp.get("name", f"Exp {i}") for i, exp in enumerate(experiments)
|
||||
]
|
||||
selected_names = st.multiselect(
|
||||
f"Select {exp_type} experiments",
|
||||
exp_names,
|
||||
key=f"select_{exp_type}"
|
||||
f"Select {exp_type} experiments", exp_names, key=f"select_{exp_type}"
|
||||
)
|
||||
|
||||
for name in selected_names:
|
||||
@@ -308,13 +311,13 @@ class Experiments:
|
||||
)
|
||||
|
||||
def run_batch_experiments(
|
||||
self,
|
||||
base_name: str,
|
||||
model_types: List[str],
|
||||
ngram_ranges: str,
|
||||
feature_combinations: List[str],
|
||||
test_sizes: str,
|
||||
tags: str,
|
||||
self,
|
||||
base_name: str,
|
||||
model_types: List[str],
|
||||
ngram_ranges: str,
|
||||
feature_combinations: List[str],
|
||||
test_sizes: str,
|
||||
tags: str,
|
||||
):
|
||||
"""Run batch experiments with parameter combinations"""
|
||||
with st.spinner("Running batch experiments..."):
|
||||
|
||||
@@ -38,7 +38,7 @@ class LogReader:
|
||||
|
||||
# Parse log entries from the end
|
||||
entries = []
|
||||
for line in reversed(lines[-count * 2:]): # Read more lines in case some don't match
|
||||
for line in reversed(lines[-count * 2 :]): # Read more lines in case some don't match
|
||||
entry = self._parse_log_line(line.strip())
|
||||
if entry:
|
||||
entries.append(entry)
|
||||
|
||||
@@ -33,7 +33,9 @@ class NERTesting:
|
||||
|
||||
# Load model
|
||||
if not self.load_ner_model():
|
||||
st.warning("NER model could not be loaded. Please ensure the model is trained and available.")
|
||||
st.warning(
|
||||
"NER model could not be loaded. Please ensure the model is trained and available."
|
||||
)
|
||||
return
|
||||
|
||||
# Display model information
|
||||
@@ -53,9 +55,11 @@ class NERTesting:
|
||||
col1, col2, col3, col4 = st.columns(4)
|
||||
|
||||
with col1:
|
||||
st.metric("Training Examples", f"{self.training_stats.get('training_examples', 0):,}")
|
||||
st.metric(
|
||||
"Training Examples", f"{self.training_stats.get('training_examples', 0):,}"
|
||||
)
|
||||
with col2:
|
||||
st.metric("Epochs", self.training_stats.get('epochs', 0))
|
||||
st.metric("Epochs", self.training_stats.get("epochs", 0))
|
||||
with col3:
|
||||
st.metric("Final Loss", f"{self.training_stats.get('final_loss', 0):.2f}")
|
||||
with col4:
|
||||
@@ -64,7 +68,7 @@ class NERTesting:
|
||||
def show_model_evaluation_info(self):
|
||||
if self.evaluation_stats:
|
||||
col1, col2, col3 = st.columns(4)
|
||||
overall = self.evaluation_stats.get('overall', {})
|
||||
overall = self.evaluation_stats.get("overall", {})
|
||||
|
||||
with col1:
|
||||
st.metric("Overall Precision", f"{overall['precision']:.2f}")
|
||||
@@ -79,7 +83,7 @@ class NERTesting:
|
||||
name_input = st.text_input(
|
||||
"Name:",
|
||||
placeholder="e.g., Jean Baptiste Mukendi, Marie Kabamba Tshiala, Joseph Kasongo",
|
||||
help="Enter a full name or multiple names separated by spaces"
|
||||
help="Enter a full name or multiple names separated by spaces",
|
||||
)
|
||||
if name_input.strip():
|
||||
if st.button("Analyze Name", type="primary"):
|
||||
@@ -90,12 +94,12 @@ class NERTesting:
|
||||
"Names:",
|
||||
placeholder="Jean Baptiste Mukendi\nMarie Kabamba Tshiala\nJoseph Kasongo\nGrace Mbuyi Kalala",
|
||||
height=150,
|
||||
help="Enter each name on a new line"
|
||||
help="Enter each name on a new line",
|
||||
)
|
||||
|
||||
if names_input.strip():
|
||||
if st.button("Analyze All Names", type="primary"):
|
||||
names = [name.strip() for name in names_input.split('\n') if name.strip()]
|
||||
names = [name.strip() for name in names_input.split("\n") if name.strip()]
|
||||
for i, name in enumerate(names):
|
||||
st.markdown(f"**Name {i+1}: {name}**")
|
||||
self.analyze_and_display(name)
|
||||
@@ -106,12 +110,12 @@ class NERTesting:
|
||||
try:
|
||||
result = self.ner_model.predict(text)
|
||||
st.subheader("Analysis Results")
|
||||
entities = result.get('entities', [])
|
||||
entities = result.get("entities", [])
|
||||
|
||||
if entities:
|
||||
self.show_visual_entities(text, entities)
|
||||
native_count = sum(1 for e in entities if e['label'] == 'NATIVE')
|
||||
surname_count = sum(1 for e in entities if e['label'] == 'SURNAME')
|
||||
native_count = sum(1 for e in entities if e["label"] == "NATIVE")
|
||||
surname_count = sum(1 for e in entities if e["label"] == "SURNAME")
|
||||
|
||||
col1, col2, col3 = st.columns(3)
|
||||
with col1:
|
||||
@@ -134,29 +138,17 @@ class NERTesting:
|
||||
# Convert our entities format to spaCy format for displacy
|
||||
ents = []
|
||||
for entity in entities:
|
||||
ents.append({
|
||||
"start": entity['start'],
|
||||
"end": entity['end'],
|
||||
"label": entity['label']
|
||||
})
|
||||
ents.append(
|
||||
{"start": entity["start"], "end": entity["end"], "label": entity["label"]}
|
||||
)
|
||||
|
||||
# Create doc-like structure for displacy
|
||||
doc_data = {
|
||||
"text": text,
|
||||
"ents": ents,
|
||||
"title": None
|
||||
}
|
||||
doc_data = {"text": text, "ents": ents, "title": None}
|
||||
|
||||
# Custom colors for our labels
|
||||
colors = {
|
||||
"NATIVE": "#74C0FC", # Light blue
|
||||
"SURNAME": "#69DB7C" # Light green
|
||||
}
|
||||
colors = {"NATIVE": "#74C0FC", "SURNAME": "#69DB7C"} # Light blue # Light green
|
||||
|
||||
options = {
|
||||
"colors": colors,
|
||||
"distance": 90
|
||||
}
|
||||
options = {"colors": colors, "distance": 90}
|
||||
|
||||
# Generate HTML visualization
|
||||
html = displacy.render(doc_data, style="ent", manual=True, options=options)
|
||||
|
||||
@@ -13,7 +13,7 @@ from research.experiment.experiment_tracker import ExperimentTracker
|
||||
|
||||
class Predictions:
|
||||
def __init__(
|
||||
self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
||||
self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
||||
):
|
||||
self.config = config
|
||||
self.experiment_tracker = experiment_tracker
|
||||
@@ -111,7 +111,7 @@ class Predictions:
|
||||
return None
|
||||
|
||||
def _display_single_prediction_results(
|
||||
self, prediction: str, confidence: Optional[float], experiment, name_input: str
|
||||
self, prediction: str, confidence: Optional[float], experiment, name_input: str
|
||||
):
|
||||
"""Display single prediction results"""
|
||||
col1, col2 = st.columns(2)
|
||||
@@ -288,7 +288,7 @@ class Predictions:
|
||||
return pd.DataFrame()
|
||||
|
||||
def _run_dataset_prediction(
|
||||
self, df: pd.DataFrame, experiment, sample_size: int, compare_with_actual: bool
|
||||
self, df: pd.DataFrame, experiment, sample_size: int, compare_with_actual: bool
|
||||
):
|
||||
"""Run dataset prediction and display results"""
|
||||
with st.spinner("Running predictions..."):
|
||||
|
||||
@@ -12,7 +12,7 @@ from research.experiment.experiment_tracker import ExperimentTracker
|
||||
|
||||
class ResultsAnalysis:
|
||||
def __init__(
|
||||
self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
||||
self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
||||
):
|
||||
self.config = config
|
||||
self.experiment_tracker = experiment_tracker
|
||||
|
||||
Reference in New Issue
Block a user