hotfixes
This commit is contained in:
@@ -0,0 +1,8 @@
|
|||||||
|
notebooks/* linguist-vendored
|
||||||
|
|
||||||
|
linguist-detectable=false
|
||||||
|
|
||||||
|
*.ipynb linguist-detectable=false
|
||||||
|
|
||||||
|
# Enforce Unix newlines
|
||||||
|
*.py text eol=lf
|
||||||
@@ -24,29 +24,29 @@ def build(config: PipelineConfig):
|
|||||||
|
|
||||||
def train(config: PipelineConfig):
|
def train(config: PipelineConfig):
|
||||||
"""Train the NER model."""
|
"""Train the NER model."""
|
||||||
trainer = NameModel(config)
|
name_model = NameModel(config)
|
||||||
|
|
||||||
data_path = Path(config.paths.data_dir) / config.data.output_files["ner_data"]
|
data_path = Path(config.paths.data_dir) / config.data.output_files["ner_data"]
|
||||||
if not data_path.exists():
|
if not data_path.exists():
|
||||||
logging.info("NER data not found. Building dataset first...")
|
logging.info("NER data not found. Building dataset first...")
|
||||||
build(config)
|
build(config)
|
||||||
|
|
||||||
trainer.create_blank_model("fr")
|
name_model.create_blank_model("fr")
|
||||||
data = trainer.load_data(str(data_path))
|
data = name_model.load_data(str(data_path))
|
||||||
|
|
||||||
split_idx = int(len(data) * 0.9)
|
split_idx = int(len(data) * 0.9)
|
||||||
train_data, eval_data = data[:split_idx], data[split_idx:]
|
train_data, eval_data = data[:split_idx], data[split_idx:]
|
||||||
|
|
||||||
logging.info(f"Training with {len(train_data)} examples, evaluating on {len(eval_data)}")
|
logging.info(f"Training with {len(train_data)} examples, evaluating on {len(eval_data)}")
|
||||||
trainer.train(
|
name_model.train(
|
||||||
data=train_data,
|
data=train_data,
|
||||||
epochs=config.processing.epochs,
|
epochs=config.processing.epochs,
|
||||||
batch_size=config.processing.batch_size,
|
batch_size=config.processing.batch_size,
|
||||||
dropout_rate=0.3,
|
dropout_rate=0.3,
|
||||||
)
|
)
|
||||||
trainer.evaluate(eval_data)
|
name_model.evaluate(eval_data)
|
||||||
|
|
||||||
model_path = trainer.save()
|
model_path = name_model.save()
|
||||||
logging.info(f"Model saved to: {model_path}")
|
logging.info(f"Model saved to: {model_path}")
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -1,8 +1,8 @@
|
|||||||
import logging
|
import logging
|
||||||
|
import time
|
||||||
|
from typing import Dict, Any
|
||||||
|
|
||||||
import pandas as pd
|
import pandas as pd
|
||||||
from typing import Dict, Any
|
|
||||||
import time
|
|
||||||
|
|
||||||
from processing.batch.batch_config import BatchConfig
|
from processing.batch.batch_config import BatchConfig
|
||||||
from processing.batch.batch_processor import BatchProcessor
|
from processing.batch.batch_processor import BatchProcessor
|
||||||
@@ -49,9 +49,6 @@ class Pipeline:
|
|||||||
"processed_batches": step.state.processed_batches,
|
"processed_batches": step.state.processed_batches,
|
||||||
"total_batches": step.state.total_batches,
|
"total_batches": step.state.total_batches,
|
||||||
"failed_batches": len(step.state.failed_batches),
|
"failed_batches": len(step.state.failed_batches),
|
||||||
"completion_percentage": (
|
"completion_percentage": (step.state.processed_batches / max(1, step.state.total_batches)) * 100,
|
||||||
step.state.processed_batches / max(1, step.state.total_batches)
|
|
||||||
)
|
|
||||||
* 100,
|
|
||||||
}
|
}
|
||||||
return progress
|
return progress
|
||||||
|
|||||||
@@ -7,7 +7,7 @@ import pandas as pd
|
|||||||
|
|
||||||
from core.config.pipeline_config import PipelineConfig
|
from core.config.pipeline_config import PipelineConfig
|
||||||
from core.utils.region_mapper import RegionMapper
|
from core.utils.region_mapper import RegionMapper
|
||||||
from processing.ner.ner_name_tagger import NERNameTagger
|
from processing.ner.name_tagger import NameTagger
|
||||||
from processing.steps import PipelineStep
|
from processing.steps import PipelineStep
|
||||||
|
|
||||||
|
|
||||||
@@ -27,7 +27,7 @@ class FeatureExtractionStep(PipelineStep):
|
|||||||
def __init__(self, pipeline_config: PipelineConfig):
|
def __init__(self, pipeline_config: PipelineConfig):
|
||||||
super().__init__("feature_extraction", pipeline_config)
|
super().__init__("feature_extraction", pipeline_config)
|
||||||
self.region_mapper = RegionMapper()
|
self.region_mapper = RegionMapper()
|
||||||
self.name_tagger = NERNameTagger()
|
self.name_tagger = NameTagger()
|
||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def requires_batch_mutation(cls) -> bool:
|
def requires_batch_mutation(cls) -> bool:
|
||||||
|
|||||||
@@ -6,7 +6,7 @@ from typing import Dict
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from core.config.pipeline_config import PipelineConfig
|
from core.config.pipeline_config import PipelineConfig
|
||||||
from processing.ner.ner_name_model import NERNameModel
|
from processing.ner.name_model import NameModel
|
||||||
from processing.steps import PipelineStep, NameAnnotation
|
from processing.steps import PipelineStep, NameAnnotation
|
||||||
|
|
||||||
|
|
||||||
@@ -19,7 +19,7 @@ class NERAnnotationStep(PipelineStep):
|
|||||||
|
|
||||||
self.model_name = "drc_ner_model"
|
self.model_name = "drc_ner_model"
|
||||||
self.model_path = pipeline_config.paths.models_dir / "drc_ner_model"
|
self.model_path = pipeline_config.paths.models_dir / "drc_ner_model"
|
||||||
self.ner_trainer = NERNameModel(pipeline_config)
|
self.name_model = NameModel(pipeline_config)
|
||||||
self.ner_config = pipeline_config.annotation.ner
|
self.ner_config = pipeline_config.annotation.ner
|
||||||
|
|
||||||
# Statistics
|
# Statistics
|
||||||
@@ -35,19 +35,19 @@ class NERAnnotationStep(PipelineStep):
|
|||||||
try:
|
try:
|
||||||
if self.model_path.exists():
|
if self.model_path.exists():
|
||||||
logging.info(f"Loading NER model from {self.model_path}")
|
logging.info(f"Loading NER model from {self.model_path}")
|
||||||
self.ner_trainer.load(str(self.model_path))
|
self.name_model.load(str(self.model_path))
|
||||||
logging.info("NER model loaded successfully")
|
logging.info("NER model loaded successfully")
|
||||||
else:
|
else:
|
||||||
logging.warning(f"NER model not found at {self.model_path}")
|
logging.warning(f"NER model not found at {self.model_path}")
|
||||||
logging.warning("NER annotation will be skipped. Train the model first.")
|
logging.warning("NER annotation will be skipped. Train the model first.")
|
||||||
self.ner_trainer.nlp = None
|
self.name_model.nlp = None
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
logging.error(f"Failed to load NER model: {e}")
|
logging.error(f"Failed to load NER model: {e}")
|
||||||
self.ner_trainer.nlp = None
|
self.name_model.nlp = None
|
||||||
|
|
||||||
def analyze_name(self, name: str) -> Dict:
|
def analyze_name(self, name: str) -> Dict:
|
||||||
"""Analyze a name with retry logic"""
|
"""Analyze a name with retry logic"""
|
||||||
if self.ner_trainer.nlp is None:
|
if self.name_model.nlp is None:
|
||||||
return {
|
return {
|
||||||
"identified_name": None,
|
"identified_name": None,
|
||||||
"identified_surname": None,
|
"identified_surname": None,
|
||||||
@@ -62,7 +62,7 @@ class NERAnnotationStep(PipelineStep):
|
|||||||
start_time = time.time()
|
start_time = time.time()
|
||||||
|
|
||||||
# Get NER predictions
|
# Get NER predictions
|
||||||
prediction = self.ner_trainer.predict(name.lower())
|
prediction = self.name_model.predict(name.lower())
|
||||||
entities = prediction.get("entities", [])
|
entities = prediction.get("entities", [])
|
||||||
|
|
||||||
elapsed_time = time.time() - start_time
|
elapsed_time = time.time() - start_time
|
||||||
|
|||||||
@@ -41,14 +41,14 @@ class BaseModel(ABC):
|
|||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def cross_validate(
|
def cross_validate(
|
||||||
self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5
|
self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5
|
||||||
) -> Dict[str, float] | dict[str, np.floating[Any]]:
|
) -> Dict[str, float] | dict[str, np.floating[Any]]:
|
||||||
"""Perform cross-validation and return average scores"""
|
"""Perform cross-validation and return average scores"""
|
||||||
pass
|
pass
|
||||||
|
|
||||||
@abstractmethod
|
@abstractmethod
|
||||||
def generate_learning_curve(
|
def generate_learning_curve(
|
||||||
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
|
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Generate learning curve data for the model"""
|
"""Generate learning curve data for the model"""
|
||||||
pass
|
pass
|
||||||
|
|||||||
@@ -158,12 +158,12 @@ class ExperimentRunner:
|
|||||||
|
|
||||||
@classmethod
|
@classmethod
|
||||||
def _create_prediction_examples(
|
def _create_prediction_examples(
|
||||||
cls,
|
cls,
|
||||||
X_test: pd.DataFrame,
|
X_test: pd.DataFrame,
|
||||||
y_test: pd.Series,
|
y_test: pd.Series,
|
||||||
predictions: np.ndarray,
|
predictions: np.ndarray,
|
||||||
model: BaseModel,
|
model: BaseModel,
|
||||||
n_examples: int = 10,
|
n_examples: int = 10,
|
||||||
) -> List[Dict]:
|
) -> List[Dict]:
|
||||||
"""Create prediction examples for analysis"""
|
"""Create prediction examples for analysis"""
|
||||||
examples = []
|
examples = []
|
||||||
@@ -237,7 +237,7 @@ class ExperimentRunner:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def compare_experiments(
|
def compare_experiments(
|
||||||
self, experiment_ids: List[str], metric: str = "accuracy"
|
self, experiment_ids: List[str], metric: str = "accuracy"
|
||||||
) -> pd.DataFrame:
|
) -> pd.DataFrame:
|
||||||
"""Compare experiments and return analysis"""
|
"""Compare experiments and return analysis"""
|
||||||
comparison_df = self.tracker.compare_experiments(experiment_ids)
|
comparison_df = self.tracker.compare_experiments(experiment_ids)
|
||||||
|
|||||||
@@ -7,7 +7,6 @@ from typing import Optional, Dict, List
|
|||||||
import pandas as pd
|
import pandas as pd
|
||||||
|
|
||||||
from core.config import PipelineConfig, get_config
|
from core.config import PipelineConfig, get_config
|
||||||
|
|
||||||
from research.experiment import ExperimentConfig, ExperimentStatus
|
from research.experiment import ExperimentConfig, ExperimentStatus
|
||||||
from research.experiment.experiement_result import ExperimentResult
|
from research.experiment.experiement_result import ExperimentResult
|
||||||
|
|
||||||
@@ -78,10 +77,10 @@ class ExperimentTracker:
|
|||||||
return self._results.get(experiment_id)
|
return self._results.get(experiment_id)
|
||||||
|
|
||||||
def list_experiments(
|
def list_experiments(
|
||||||
self,
|
self,
|
||||||
status: Optional[ExperimentStatus] = None,
|
status: Optional[ExperimentStatus] = None,
|
||||||
tags: Optional[List[str]] = None,
|
tags: Optional[List[str]] = None,
|
||||||
model_type: Optional[str] = None,
|
model_type: Optional[str] = None,
|
||||||
) -> List[ExperimentResult]:
|
) -> List[ExperimentResult]:
|
||||||
"""List experiments with optional filtering"""
|
"""List experiments with optional filtering"""
|
||||||
results = list(self._results.values())
|
results = list(self._results.values())
|
||||||
@@ -98,7 +97,7 @@ class ExperimentTracker:
|
|||||||
return sorted(results, key=lambda x: x.start_time, reverse=True)
|
return sorted(results, key=lambda x: x.start_time, reverse=True)
|
||||||
|
|
||||||
def get_best_experiment(
|
def get_best_experiment(
|
||||||
self, metric: str = "accuracy", dataset: str = "test", filters: Optional[Dict] = None
|
self, metric: str = "accuracy", dataset: str = "test", filters: Optional[Dict] = None
|
||||||
) -> Optional[ExperimentResult]:
|
) -> Optional[ExperimentResult]:
|
||||||
"""Get the best experiment based on a metric"""
|
"""Get the best experiment based on a metric"""
|
||||||
experiments = self.list_experiments()
|
experiments = self.list_experiments()
|
||||||
@@ -160,8 +159,8 @@ class ExperimentTracker:
|
|||||||
"""Export all results to CSV"""
|
"""Export all results to CSV"""
|
||||||
if output_path is None:
|
if output_path is None:
|
||||||
output_path = (
|
output_path = (
|
||||||
self.experiments_dir
|
self.experiments_dir
|
||||||
/ f"experiments_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
|
/ f"experiments_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
|
||||||
)
|
)
|
||||||
|
|
||||||
rows = []
|
rows = []
|
||||||
|
|||||||
@@ -43,7 +43,7 @@ class FeatureExtractor:
|
|||||||
return features_df
|
return features_df
|
||||||
|
|
||||||
def _extract_single_feature(
|
def _extract_single_feature(
|
||||||
self, df: pd.DataFrame, feature_type: FeatureType
|
self, df: pd.DataFrame, feature_type: FeatureType
|
||||||
) -> Union[pd.Series, pd.DataFrame]:
|
) -> Union[pd.Series, pd.DataFrame]:
|
||||||
"""Extract a single type of feature"""
|
"""Extract a single type of feature"""
|
||||||
if feature_type == FeatureType.FULL_NAME:
|
if feature_type == FeatureType.FULL_NAME:
|
||||||
|
|||||||
@@ -27,13 +27,13 @@ class ModelTrainer:
|
|||||||
self.models_dir.mkdir(parents=True, exist_ok=True)
|
self.models_dir.mkdir(parents=True, exist_ok=True)
|
||||||
|
|
||||||
def train_single_model(
|
def train_single_model(
|
||||||
self,
|
self,
|
||||||
model_name: str,
|
model_name: str,
|
||||||
model_type: str = "logistic_regression",
|
model_type: str = "logistic_regression",
|
||||||
features: List[str] = None,
|
features: List[str] = None,
|
||||||
model_params: Dict[str, Any] = None,
|
model_params: Dict[str, Any] = None,
|
||||||
tags: List[str] = None,
|
tags: List[str] = None,
|
||||||
save_artifacts: bool = True,
|
save_artifacts: bool = True,
|
||||||
) -> str:
|
) -> str:
|
||||||
"""
|
"""
|
||||||
Train a single model and save its artifacts.
|
Train a single model and save its artifacts.
|
||||||
@@ -75,7 +75,7 @@ class ModelTrainer:
|
|||||||
return experiment_id
|
return experiment_id
|
||||||
|
|
||||||
def train_multiple_models(
|
def train_multiple_models(
|
||||||
self, base_name: str, model_configs: List[Dict[str, Any]], save_all: bool = True
|
self, base_name: str, model_configs: List[Dict[str, Any]], save_all: bool = True
|
||||||
) -> List[str]:
|
) -> List[str]:
|
||||||
"""
|
"""
|
||||||
Train multiple models with different configurations.
|
Train multiple models with different configurations.
|
||||||
|
|||||||
@@ -83,7 +83,7 @@ class NeuralNetworkModel(BaseModel):
|
|||||||
return self
|
return self
|
||||||
|
|
||||||
def cross_validate(
|
def cross_validate(
|
||||||
self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5
|
self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5
|
||||||
) -> dict[str, np.floating[Any]]:
|
) -> dict[str, np.floating[Any]]:
|
||||||
features_df = self.feature_extractor.extract_features(X)
|
features_df = self.feature_extractor.extract_features(X)
|
||||||
X_prepared = self.prepare_features(features_df)
|
X_prepared = self.prepare_features(features_df)
|
||||||
@@ -140,7 +140,7 @@ class NeuralNetworkModel(BaseModel):
|
|||||||
}
|
}
|
||||||
|
|
||||||
def generate_learning_curve(
|
def generate_learning_curve(
|
||||||
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
|
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Generate learning curve data for the model"""
|
"""Generate learning curve data for the model"""
|
||||||
logging.info(f"Generating learning curve for {self.__class__.__name__}")
|
logging.info(f"Generating learning curve for {self.__class__.__name__}")
|
||||||
|
|||||||
@@ -93,7 +93,7 @@ class TraditionalModel(BaseModel):
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
def generate_learning_curve(
|
def generate_learning_curve(
|
||||||
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
|
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
|
||||||
) -> Dict[str, Any]:
|
) -> Dict[str, Any]:
|
||||||
"""Generate learning curve data for the model"""
|
"""Generate learning curve data for the model"""
|
||||||
logging.info(f"Generating learning curve for {self.__class__.__name__}")
|
logging.info(f"Generating learning curve for {self.__class__.__name__}")
|
||||||
|
|||||||
+6
-14
@@ -2,6 +2,7 @@
|
|||||||
import argparse
|
import argparse
|
||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
# Add parent directory to Python path to access core modules
|
# Add parent directory to Python path to access core modules
|
||||||
@@ -13,13 +14,6 @@ from core.utils.data_loader import DataLoader
|
|||||||
from processing.monitoring.pipeline_monitor import PipelineMonitor
|
from processing.monitoring.pipeline_monitor import PipelineMonitor
|
||||||
from research.experiment.experiment_runner import ExperimentRunner
|
from research.experiment.experiment_runner import ExperimentRunner
|
||||||
from research.experiment.experiment_tracker import ExperimentTracker
|
from research.experiment.experiment_tracker import ExperimentTracker
|
||||||
from web.interfaces.configuration import Configuration
|
|
||||||
from web.interfaces.dashboard import Dashboard
|
|
||||||
from web.interfaces.data_overview import DataOverview
|
|
||||||
from web.interfaces.data_processing import DataProcessing
|
|
||||||
from web.interfaces.experiments import Experiments
|
|
||||||
from web.interfaces.predictions import Predictions
|
|
||||||
from web.interfaces.results_analysis import ResultsAnalysis
|
|
||||||
|
|
||||||
# Page configuration
|
# Page configuration
|
||||||
st.set_page_config(
|
st.set_page_config(
|
||||||
@@ -53,12 +47,10 @@ class StreamlitApp:
|
|||||||
self.config = config
|
self.config = config
|
||||||
initialize_session_state(config)
|
initialize_session_state(config)
|
||||||
|
|
||||||
def run(self):
|
@classmethod
|
||||||
st.title("🇨🇩 DRC NERS Pipeline")
|
def run(cls):
|
||||||
st.markdown(
|
st.title("🇨🇩 DRC NERS Platform")
|
||||||
"A Culturally-Aware NLP System for Congolese Name Analysis and Gender Inference"
|
st.markdown("A Culturally-Aware NLP System for Congolese Name Analysis and Gender Inference")
|
||||||
)
|
|
||||||
|
|
||||||
st.markdown(
|
st.markdown(
|
||||||
"""
|
"""
|
||||||
## Overview
|
## Overview
|
||||||
@@ -67,7 +59,7 @@ class StreamlitApp:
|
|||||||
data.
|
data.
|
||||||
This project introduces a comprehensive pipeline for Congolese name analysis with a large-scale dataset of over 5
|
This project introduces a comprehensive pipeline for Congolese name analysis with a large-scale dataset of over 5
|
||||||
million names from the Democratic Republic of Congo (DRC) annotated with gender and demographic metadata.
|
million names from the Democratic Republic of Congo (DRC) annotated with gender and demographic metadata.
|
||||||
"""
|
"""
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from research.model_registry import list_available_models
|
|||||||
|
|
||||||
class Experiments:
|
class Experiments:
|
||||||
def __init__(
|
def __init__(
|
||||||
self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
||||||
):
|
):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.experiment_tracker = experiment_tracker
|
self.experiment_tracker = experiment_tracker
|
||||||
@@ -113,18 +113,18 @@ class Experiments:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def _handle_experiment_submission(
|
def _handle_experiment_submission(
|
||||||
self,
|
self,
|
||||||
exp_name: str,
|
exp_name: str,
|
||||||
description: str,
|
description: str,
|
||||||
model_type: str,
|
model_type: str,
|
||||||
selected_features: List[str],
|
selected_features: List[str],
|
||||||
model_params: Dict[str, Any],
|
model_params: Dict[str, Any],
|
||||||
test_size: float,
|
test_size: float,
|
||||||
cv_folds: int,
|
cv_folds: int,
|
||||||
tags: str,
|
tags: str,
|
||||||
filter_province: str,
|
filter_province: str,
|
||||||
min_words: int,
|
min_words: int,
|
||||||
max_words: int,
|
max_words: int,
|
||||||
):
|
):
|
||||||
"""Handle experiment form submission"""
|
"""Handle experiment form submission"""
|
||||||
if not exp_name:
|
if not exp_name:
|
||||||
@@ -209,7 +209,7 @@ class Experiments:
|
|||||||
# Display experiments
|
# Display experiments
|
||||||
for i, exp in enumerate(experiments):
|
for i, exp in enumerate(experiments):
|
||||||
with st.expander(
|
with st.expander(
|
||||||
f"{exp.config.name} - {exp.status.value} - {exp.start_time.strftime('%Y-%m-%d %H:%M')}"
|
f"{exp.config.name} - {exp.status.value} - {exp.start_time.strftime('%Y-%m-%d %H:%M')}"
|
||||||
):
|
):
|
||||||
self._display_experiment_details(exp, i)
|
self._display_experiment_details(exp, i)
|
||||||
|
|
||||||
@@ -230,7 +230,8 @@ class Experiments:
|
|||||||
|
|
||||||
return experiments
|
return experiments
|
||||||
|
|
||||||
def _display_experiment_details(self, exp, index: int):
|
@classmethod
|
||||||
|
def _display_experiment_details(cls, exp, index: int):
|
||||||
"""Display details for a single experiment"""
|
"""Display details for a single experiment"""
|
||||||
col1, col2, col3 = st.columns(3)
|
col1, col2, col3 = st.columns(3)
|
||||||
|
|
||||||
@@ -295,13 +296,13 @@ class Experiments:
|
|||||||
)
|
)
|
||||||
|
|
||||||
def run_batch_experiments(
|
def run_batch_experiments(
|
||||||
self,
|
self,
|
||||||
base_name: str,
|
base_name: str,
|
||||||
model_types: List[str],
|
model_types: List[str],
|
||||||
ngram_ranges: str,
|
ngram_ranges: str,
|
||||||
feature_combinations: List[str],
|
feature_combinations: List[str],
|
||||||
test_sizes: str,
|
test_sizes: str,
|
||||||
tags: str,
|
tags: str,
|
||||||
):
|
):
|
||||||
"""Run batch experiments with parameter combinations"""
|
"""Run batch experiments with parameter combinations"""
|
||||||
with st.spinner("Running batch experiments..."):
|
with st.spinner("Running batch experiments..."):
|
||||||
@@ -368,64 +369,3 @@ class Experiments:
|
|||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
st.error(f"Error running batch experiments: {e}")
|
st.error(f"Error running batch experiments: {e}")
|
||||||
|
|
||||||
def run_baseline_experiments(self):
|
|
||||||
"""Run baseline experiments"""
|
|
||||||
with st.spinner("Running baseline experiments..."):
|
|
||||||
try:
|
|
||||||
builder = ExperimentBuilder()
|
|
||||||
experiments = builder.create_baseline_experiments()
|
|
||||||
experiment_ids = self.experiment_runner.run_experiment_batch(experiments)
|
|
||||||
|
|
||||||
st.success(f"Completed {len(experiment_ids)} baseline experiments")
|
|
||||||
|
|
||||||
# Show quick comparison
|
|
||||||
if experiment_ids:
|
|
||||||
comparison = self.experiment_runner.compare_experiments(experiment_ids)
|
|
||||||
st.write("**Results Summary:**")
|
|
||||||
st.dataframe(
|
|
||||||
comparison[["name", "model_type", "test_accuracy"]],
|
|
||||||
use_container_width=True,
|
|
||||||
)
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
st.error(f"Error running baseline experiments: {e}")
|
|
||||||
|
|
||||||
def run_ablation_study(self):
|
|
||||||
"""Run feature ablation study"""
|
|
||||||
with st.spinner("Running ablation study..."):
|
|
||||||
try:
|
|
||||||
builder = ExperimentBuilder()
|
|
||||||
experiments = builder.create_feature_ablation_study()
|
|
||||||
experiment_ids = self.experiment_runner.run_experiment_batch(experiments)
|
|
||||||
|
|
||||||
st.success(f"Completed {len(experiment_ids)} ablation experiments")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
st.error(f"Error running ablation study: {e}")
|
|
||||||
|
|
||||||
def run_component_study(self):
|
|
||||||
"""Run name component study"""
|
|
||||||
with st.spinner("Running component study..."):
|
|
||||||
try:
|
|
||||||
builder = ExperimentBuilder()
|
|
||||||
experiments = builder.create_name_component_study()
|
|
||||||
experiment_ids = self.experiment_runner.run_experiment_batch(experiments)
|
|
||||||
|
|
||||||
st.success(f"Completed {len(experiment_ids)} component experiments")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
st.error(f"Error running component study: {e}")
|
|
||||||
|
|
||||||
def run_province_study(self):
|
|
||||||
"""Run province-specific study"""
|
|
||||||
with st.spinner("Running province study..."):
|
|
||||||
try:
|
|
||||||
builder = ExperimentBuilder()
|
|
||||||
experiments = builder.create_province_specific_study()
|
|
||||||
experiment_ids = self.experiment_runner.run_experiment_batch(experiments)
|
|
||||||
|
|
||||||
st.success(f"Completed {len(experiment_ids)} province experiments")
|
|
||||||
|
|
||||||
except Exception as e:
|
|
||||||
st.error(f"Error running province study: {e}")
|
|
||||||
|
|||||||
@@ -38,7 +38,7 @@ class LogReader:
|
|||||||
|
|
||||||
# Parse log entries from the end
|
# Parse log entries from the end
|
||||||
entries = []
|
entries = []
|
||||||
for line in reversed(lines[-count * 2 :]): # Read more lines in case some don't match
|
for line in reversed(lines[-count * 2:]): # Read more lines in case some don't match
|
||||||
entry = self._parse_log_line(line.strip())
|
entry = self._parse_log_line(line.strip())
|
||||||
if entry:
|
if entry:
|
||||||
entries.append(entry)
|
entries.append(entry)
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from research.experiment.experiment_tracker import ExperimentTracker
|
|||||||
|
|
||||||
class Predictions:
|
class Predictions:
|
||||||
def __init__(
|
def __init__(
|
||||||
self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
||||||
):
|
):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.experiment_tracker = experiment_tracker
|
self.experiment_tracker = experiment_tracker
|
||||||
@@ -114,7 +114,7 @@ class Predictions:
|
|||||||
return None
|
return None
|
||||||
|
|
||||||
def _display_single_prediction_results(
|
def _display_single_prediction_results(
|
||||||
self, prediction: str, confidence: Optional[float], experiment, name_input: str
|
self, prediction: str, confidence: Optional[float], experiment, name_input: str
|
||||||
):
|
):
|
||||||
"""Display single prediction results"""
|
"""Display single prediction results"""
|
||||||
col1, col2 = st.columns(2)
|
col1, col2 = st.columns(2)
|
||||||
@@ -300,7 +300,7 @@ class Predictions:
|
|||||||
return pd.DataFrame()
|
return pd.DataFrame()
|
||||||
|
|
||||||
def _run_dataset_prediction(
|
def _run_dataset_prediction(
|
||||||
self, df: pd.DataFrame, experiment, sample_size: int, compare_with_actual: bool
|
self, df: pd.DataFrame, experiment, sample_size: int, compare_with_actual: bool
|
||||||
):
|
):
|
||||||
"""Run dataset prediction and display results"""
|
"""Run dataset prediction and display results"""
|
||||||
with st.spinner("Running predictions..."):
|
with st.spinner("Running predictions..."):
|
||||||
|
|||||||
@@ -12,7 +12,7 @@ from research.experiment.experiment_tracker import ExperimentTracker
|
|||||||
|
|
||||||
class ResultsAnalysis:
|
class ResultsAnalysis:
|
||||||
def __init__(
|
def __init__(
|
||||||
self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
||||||
):
|
):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.experiment_tracker = experiment_tracker
|
self.experiment_tracker = experiment_tracker
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
# Add parent directory to Python path to access core modules
|
# Add parent directory to Python path to access core modules
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
# Add parent directory to Python path to access core modules
|
# Add parent directory to Python path to access core modules
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
# Add parent directory to Python path to access core modules
|
# Add parent directory to Python path to access core modules
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
# Add parent directory to Python path to access core modules
|
# Add parent directory to Python path to access core modules
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
# Add parent directory to Python path to access core modules
|
# Add parent directory to Python path to access core modules
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
# Add parent directory to Python path to access core modules
|
# Add parent directory to Python path to access core modules
|
||||||
|
|||||||
@@ -1,5 +1,6 @@
|
|||||||
import sys
|
import sys
|
||||||
from pathlib import Path
|
from pathlib import Path
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
# Add parent directory to Python path to access core modules
|
# Add parent directory to Python path to access core modules
|
||||||
|
|||||||
Reference in New Issue
Block a user