This commit is contained in:
2025-08-16 20:34:45 +02:00
parent 84f7d41a84
commit cf1cbac1a8
36 changed files with 95 additions and 152 deletions
+2 -2
View File
@@ -41,14 +41,14 @@ class BaseModel(ABC):
@abstractmethod
def cross_validate(
self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5
self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5
) -> Dict[str, float] | dict[str, np.floating[Any]]:
"""Perform cross-validation and return average scores"""
pass
@abstractmethod
def generate_learning_curve(
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
) -> Dict[str, Any]:
"""Generate learning curve data for the model"""
pass
+7 -7
View File
@@ -158,12 +158,12 @@ class ExperimentRunner:
@classmethod
def _create_prediction_examples(
cls,
X_test: pd.DataFrame,
y_test: pd.Series,
predictions: np.ndarray,
model: BaseModel,
n_examples: int = 10,
cls,
X_test: pd.DataFrame,
y_test: pd.Series,
predictions: np.ndarray,
model: BaseModel,
n_examples: int = 10,
) -> List[Dict]:
"""Create prediction examples for analysis"""
examples = []
@@ -237,7 +237,7 @@ class ExperimentRunner:
return None
def compare_experiments(
self, experiment_ids: List[str], metric: str = "accuracy"
self, experiment_ids: List[str], metric: str = "accuracy"
) -> pd.DataFrame:
"""Compare experiments and return analysis"""
comparison_df = self.tracker.compare_experiments(experiment_ids)
+7 -8
View File
@@ -7,7 +7,6 @@ from typing import Optional, Dict, List
import pandas as pd
from core.config import PipelineConfig, get_config
from research.experiment import ExperimentConfig, ExperimentStatus
from research.experiment.experiement_result import ExperimentResult
@@ -78,10 +77,10 @@ class ExperimentTracker:
return self._results.get(experiment_id)
def list_experiments(
self,
status: Optional[ExperimentStatus] = None,
tags: Optional[List[str]] = None,
model_type: Optional[str] = None,
self,
status: Optional[ExperimentStatus] = None,
tags: Optional[List[str]] = None,
model_type: Optional[str] = None,
) -> List[ExperimentResult]:
"""List experiments with optional filtering"""
results = list(self._results.values())
@@ -98,7 +97,7 @@ class ExperimentTracker:
return sorted(results, key=lambda x: x.start_time, reverse=True)
def get_best_experiment(
self, metric: str = "accuracy", dataset: str = "test", filters: Optional[Dict] = None
self, metric: str = "accuracy", dataset: str = "test", filters: Optional[Dict] = None
) -> Optional[ExperimentResult]:
"""Get the best experiment based on a metric"""
experiments = self.list_experiments()
@@ -160,8 +159,8 @@ class ExperimentTracker:
"""Export all results to CSV"""
if output_path is None:
output_path = (
self.experiments_dir
/ f"experiments_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
self.experiments_dir
/ f"experiments_export_{datetime.now().strftime('%Y%m%d_%H%M%S')}.csv"
)
rows = []
+1 -1
View File
@@ -43,7 +43,7 @@ class FeatureExtractor:
return features_df
def _extract_single_feature(
self, df: pd.DataFrame, feature_type: FeatureType
self, df: pd.DataFrame, feature_type: FeatureType
) -> Union[pd.Series, pd.DataFrame]:
"""Extract a single type of feature"""
if feature_type == FeatureType.FULL_NAME:
+8 -8
View File
@@ -27,13 +27,13 @@ class ModelTrainer:
self.models_dir.mkdir(parents=True, exist_ok=True)
def train_single_model(
self,
model_name: str,
model_type: str = "logistic_regression",
features: List[str] = None,
model_params: Dict[str, Any] = None,
tags: List[str] = None,
save_artifacts: bool = True,
self,
model_name: str,
model_type: str = "logistic_regression",
features: List[str] = None,
model_params: Dict[str, Any] = None,
tags: List[str] = None,
save_artifacts: bool = True,
) -> str:
"""
Train a single model and save its artifacts.
@@ -75,7 +75,7 @@ class ModelTrainer:
return experiment_id
def train_multiple_models(
self, base_name: str, model_configs: List[Dict[str, Any]], save_all: bool = True
self, base_name: str, model_configs: List[Dict[str, Any]], save_all: bool = True
) -> List[str]:
"""
Train multiple models with different configurations.
+2 -2
View File
@@ -83,7 +83,7 @@ class NeuralNetworkModel(BaseModel):
return self
def cross_validate(
self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5
self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5
) -> dict[str, np.floating[Any]]:
features_df = self.feature_extractor.extract_features(X)
X_prepared = self.prepare_features(features_df)
@@ -140,7 +140,7 @@ class NeuralNetworkModel(BaseModel):
}
def generate_learning_curve(
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
) -> Dict[str, Any]:
"""Generate learning curve data for the model"""
logging.info(f"Generating learning curve for {self.__class__.__name__}")
+1 -1
View File
@@ -93,7 +93,7 @@ class TraditionalModel(BaseModel):
return results
def generate_learning_curve(
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
self, X: pd.DataFrame, y: pd.Series, train_sizes: List[float] = None
) -> Dict[str, Any]:
"""Generate learning curve data for the model"""
logging.info(f"Generating learning curve for {self.__class__.__name__}")