feat: Experiment Builder
This commit is contained in:
@@ -1,5 +1,9 @@
|
|||||||
from typing import List
|
import logging
|
||||||
|
from typing import List, Dict
|
||||||
|
|
||||||
|
import yaml
|
||||||
|
|
||||||
|
from core.config.pipeline_config import PipelineConfig
|
||||||
from research.experiment import ExperimentConfig
|
from research.experiment import ExperimentConfig
|
||||||
from research.experiment.feature_extractor import FeatureType
|
from research.experiment.feature_extractor import FeatureType
|
||||||
|
|
||||||
@@ -7,117 +11,98 @@ from research.experiment.feature_extractor import FeatureType
|
|||||||
class ExperimentBuilder:
|
class ExperimentBuilder:
|
||||||
"""Helper class to build experiment configurations"""
|
"""Helper class to build experiment configurations"""
|
||||||
|
|
||||||
@staticmethod
|
def __init__(self, config: PipelineConfig):
|
||||||
def create_baseline_experiments() -> List[ExperimentConfig]:
|
self.config = config
|
||||||
"""Create a set of baseline experiments for comparison"""
|
|
||||||
|
|
||||||
return [
|
def load_templates(self, templates: str = "research_templates.yaml") -> dict:
|
||||||
# Full name experiments
|
"""Load research templates from YAML file"""
|
||||||
ExperimentConfig(
|
try:
|
||||||
name="baseline_logistic_regression_fullname",
|
with open(self.config.paths.configs_dir / templates, "r") as file:
|
||||||
description="Logistic regression with full name",
|
return yaml.safe_load(file)
|
||||||
model_type="logistic_regression",
|
except FileNotFoundError:
|
||||||
features=[FeatureType.FULL_NAME],
|
logging.error(f"Templates file not found: {templates}")
|
||||||
tags=["baseline", "fullname"],
|
raise
|
||||||
),
|
except yaml.YAMLError as e:
|
||||||
# Native name only
|
logging.error(f"Error parsing templates file: {e}")
|
||||||
ExperimentConfig(
|
raise
|
||||||
name="baseline_logistic_regression_native",
|
|
||||||
description="Logistic regression with native name only",
|
|
||||||
model_type="logistic_regression",
|
|
||||||
features=[FeatureType.NATIVE_NAME],
|
|
||||||
tags=["baseline", "native"],
|
|
||||||
),
|
|
||||||
# Surname only
|
|
||||||
ExperimentConfig(
|
|
||||||
name="baseline_logistic_regression_surname",
|
|
||||||
description="Logistic regression with surname only",
|
|
||||||
model_type="logistic_regression",
|
|
||||||
features=[FeatureType.SURNAME],
|
|
||||||
tags=["baseline", "surname"],
|
|
||||||
),
|
|
||||||
# Random Forest with engineered features
|
|
||||||
ExperimentConfig(
|
|
||||||
name="baseline_rf_engineered",
|
|
||||||
description="Random Forest with engineered features",
|
|
||||||
model_type="random_forest",
|
|
||||||
features=[FeatureType.NAME_LENGTH, FeatureType.WORD_COUNT, FeatureType.PROVINCE],
|
|
||||||
tags=["baseline", "engineered"],
|
|
||||||
),
|
|
||||||
]
|
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def create_feature_ablation_study() -> List[ExperimentConfig]:
|
def find_template(cls, templates: dict, name: str, experiment_type: str = "baseline") -> dict:
|
||||||
"""Create experiments for feature ablation study"""
|
"""Find experiment configuration by name and type"""
|
||||||
base_features = [
|
|
||||||
FeatureType.FULL_NAME,
|
|
||||||
FeatureType.NAME_LENGTH,
|
|
||||||
FeatureType.WORD_COUNT,
|
|
||||||
FeatureType.PROVINCE,
|
|
||||||
]
|
|
||||||
|
|
||||||
experiments = []
|
# Map type to section in templates
|
||||||
|
type_mapping = {
|
||||||
|
"baseline": "baseline_experiments",
|
||||||
|
"advanced": "advanced_experiments",
|
||||||
|
"feature_study": "feature_studies",
|
||||||
|
"tuning": "hyperparameter_tuning",
|
||||||
|
}
|
||||||
|
|
||||||
# Test removing each feature one by one
|
section_name = type_mapping.get(experiment_type)
|
||||||
for i, feature_to_remove in enumerate(base_features):
|
if not section_name:
|
||||||
remaining_features = [f for f in base_features if f != feature_to_remove]
|
available_types = list(type_mapping.keys())
|
||||||
|
raise ValueError(
|
||||||
experiments.append(
|
f"Unknown experiment type '{experiment_type}'. Available types: {available_types}"
|
||||||
ExperimentConfig(
|
|
||||||
name=f"ablation_remove_{feature_to_remove.value}",
|
|
||||||
description=f"Ablation study: removed {feature_to_remove.value}",
|
|
||||||
model_type="logistic_regression",
|
|
||||||
features=remaining_features,
|
|
||||||
tags=["ablation", feature_to_remove.value],
|
|
||||||
)
|
|
||||||
)
|
)
|
||||||
|
|
||||||
return experiments
|
if section_name not in templates:
|
||||||
|
raise ValueError(f"Section '{section_name}' not found in templates")
|
||||||
|
|
||||||
@staticmethod
|
experiments = templates[section_name]
|
||||||
def create_name_component_study() -> List[ExperimentConfig]:
|
|
||||||
"""Create experiments to study different name components"""
|
|
||||||
experiments = []
|
|
||||||
|
|
||||||
name_components = [
|
# Search for experiment by model name
|
||||||
(FeatureType.FIRST_WORD, "first_word"),
|
for experiment in experiments:
|
||||||
(FeatureType.LAST_WORD, "last_word"),
|
# Check if this is the experiment we're looking for
|
||||||
(FeatureType.NATIVE_NAME, "native_name"),
|
# Look for experiments that match the model type or contain the name
|
||||||
(FeatureType.SURNAME, "surname"),
|
if (
|
||||||
(FeatureType.NAME_BEGINNINGS, "name_beginnings"),
|
experiment.get("model_type") == name
|
||||||
(FeatureType.NAME_ENDINGS, "name_endings"),
|
or name.lower() in experiment.get("name", "").lower()
|
||||||
|
or experiment.get("name") == name
|
||||||
|
or f"baseline_{name}" == experiment.get("name")
|
||||||
|
or f"advanced_{name}" == experiment.get("name")
|
||||||
|
):
|
||||||
|
return experiment
|
||||||
|
|
||||||
|
# If not found, list available experiments
|
||||||
|
available_experiments = [
|
||||||
|
exp.get("name", exp.get("model_type", "unknown")) for exp in experiments
|
||||||
]
|
]
|
||||||
|
raise ValueError(
|
||||||
|
f"Experiment '{name}' not found in '{experiment_type}' section. "
|
||||||
|
f"Available experiments: {available_experiments}"
|
||||||
|
)
|
||||||
|
|
||||||
for feature, name in name_components:
|
def get_templates(self, templates_path: str = "research_templates.yaml") -> Dict[str, List[Dict]]:
|
||||||
experiments.append(
|
"""Get all available experiments from templates organized by type"""
|
||||||
ExperimentConfig(
|
templates = self.load_templates(templates_path)
|
||||||
name=f"component_study_{name}",
|
|
||||||
description=f"Study of {name} for gender prediction",
|
|
||||||
model_type="logistic_regression",
|
|
||||||
features=[feature],
|
|
||||||
tags=["component_study", name],
|
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return experiments
|
return {
|
||||||
|
"baseline": templates.get("baseline_experiments", []),
|
||||||
|
"advanced": templates.get("advanced_experiments", []),
|
||||||
|
"feature_study": templates.get("feature_studies", []),
|
||||||
|
"tuning": templates.get("hyperparameter_tuning", [])
|
||||||
|
}
|
||||||
|
|
||||||
@staticmethod
|
@classmethod
|
||||||
def create_province_specific_study() -> List[ExperimentConfig]:
|
def from_template(cls, template_config: dict) -> ExperimentConfig:
|
||||||
"""Create experiments for province-specific analysis"""
|
"""Create an ExperimentConfig from a template configuration"""
|
||||||
provinces = ["kinshasa", "bas-congo", "bandundu", "katanga"] # Add more as needed
|
# Convert feature strings to FeatureType objects
|
||||||
|
features = []
|
||||||
|
for feature_str in template_config.get("features", []):
|
||||||
|
try:
|
||||||
|
features.append(FeatureType(feature_str))
|
||||||
|
except ValueError:
|
||||||
|
logging.warning(f"Unknown feature type: {feature_str}")
|
||||||
|
continue
|
||||||
|
|
||||||
experiments = []
|
return ExperimentConfig(
|
||||||
|
name=template_config.get("name"),
|
||||||
for province in provinces:
|
description=template_config.get("description"),
|
||||||
experiments.append(
|
model_type=template_config.get("model_type"),
|
||||||
ExperimentConfig(
|
features=features,
|
||||||
name=f"province_study_{province}",
|
model_params=template_config.get("model_params", {}),
|
||||||
description=f"Gender prediction for {province} province only",
|
tags=template_config.get("tags", []),
|
||||||
model_type="logistic_regression",
|
test_size=template_config.get("test_size", 0.2),
|
||||||
features=[FeatureType.FULL_NAME],
|
cross_validation_folds=template_config.get("cross_validation_folds", 5),
|
||||||
train_data_filter={"province": province},
|
train_data_filter=template_config.get("train_data_filter")
|
||||||
tags=["province_study", province],
|
)
|
||||||
)
|
|
||||||
)
|
|
||||||
|
|
||||||
return experiments
|
|
||||||
|
|||||||
@@ -4,69 +4,11 @@ import logging
|
|||||||
import sys
|
import sys
|
||||||
import traceback
|
import traceback
|
||||||
|
|
||||||
import yaml
|
|
||||||
|
|
||||||
from core.config import setup_config
|
from core.config import setup_config
|
||||||
|
from research.experiment.experiment_builder import ExperimentBuilder
|
||||||
from research.model_trainer import ModelTrainer
|
from research.model_trainer import ModelTrainer
|
||||||
|
|
||||||
|
|
||||||
def load_research_templates(templates_path: str = "config/research_templates.yaml") -> dict:
|
|
||||||
"""Load research templates from YAML file"""
|
|
||||||
try:
|
|
||||||
with open(templates_path, "r") as file:
|
|
||||||
return yaml.safe_load(file)
|
|
||||||
except FileNotFoundError:
|
|
||||||
logging.error(f"Templates file not found: {templates_path}")
|
|
||||||
raise
|
|
||||||
except yaml.YAMLError as e:
|
|
||||||
logging.error(f"Error parsing templates file: {e}")
|
|
||||||
raise
|
|
||||||
|
|
||||||
|
|
||||||
def find_experiment_config(templates: dict, name: str, experiment_type: str) -> dict:
|
|
||||||
"""Find experiment configuration by name and type"""
|
|
||||||
# Map type to section in templates
|
|
||||||
type_mapping = {
|
|
||||||
"baseline": "baseline_experiments",
|
|
||||||
"advanced": "advanced_experiments",
|
|
||||||
"feature_study": "feature_studies",
|
|
||||||
"tuning": "hyperparameter_tuning",
|
|
||||||
}
|
|
||||||
|
|
||||||
section_name = type_mapping.get(experiment_type)
|
|
||||||
if not section_name:
|
|
||||||
available_types = list(type_mapping.keys())
|
|
||||||
raise ValueError(
|
|
||||||
f"Unknown experiment type '{experiment_type}'. Available types: {available_types}"
|
|
||||||
)
|
|
||||||
|
|
||||||
if section_name not in templates:
|
|
||||||
raise ValueError(f"Section '{section_name}' not found in templates")
|
|
||||||
|
|
||||||
experiments = templates[section_name]
|
|
||||||
|
|
||||||
# Search for experiment by model name
|
|
||||||
for experiment in experiments:
|
|
||||||
# Check if this is the experiment we're looking for
|
|
||||||
# Look for experiments that match the model type or contain the name
|
|
||||||
if (
|
|
||||||
experiment.get("model_type") == name
|
|
||||||
or name.lower() in experiment.get("name", "").lower()
|
|
||||||
or f"baseline_{name}" == experiment.get("name")
|
|
||||||
or f"advanced_{name}" == experiment.get("name")
|
|
||||||
):
|
|
||||||
return experiment
|
|
||||||
|
|
||||||
# If not found, list available experiments
|
|
||||||
available_experiments = [
|
|
||||||
exp.get("name", exp.get("model_type", "unknown")) for exp in experiments
|
|
||||||
]
|
|
||||||
raise ValueError(
|
|
||||||
f"Experiment '{name}' not found in '{experiment_type}' section. "
|
|
||||||
f"Available experiments: {available_experiments}"
|
|
||||||
)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
parser = argparse.ArgumentParser(description="Train DRC Names Models using Research Templates")
|
parser = argparse.ArgumentParser(description="Train DRC Names Models using Research Templates")
|
||||||
parser.add_argument("--name", type=str, required=True, help="Model name to train")
|
parser.add_argument("--name", type=str, required=True, help="Model name to train")
|
||||||
@@ -79,14 +21,15 @@ def main():
|
|||||||
try:
|
try:
|
||||||
# Setup pipeline configuration
|
# Setup pipeline configuration
|
||||||
config = setup_config(config_path=args.config, env=args.env)
|
config = setup_config(config_path=args.config, env=args.env)
|
||||||
|
experiment_builder = ExperimentBuilder(config)
|
||||||
|
|
||||||
# Load research templates
|
# Load research templates
|
||||||
logging.info(f"Loading research templates from: {args.templates}")
|
logging.info(f"Loading research templates from: {args.templates}")
|
||||||
templates = load_research_templates(args.templates)
|
templates = experiment_builder.load_templates(args.templates)
|
||||||
|
|
||||||
# Find the specific experiment configuration
|
# Find the specific experiment configuration
|
||||||
logging.info(f"Looking for experiment: name='{args.name}', type='{args.type}'")
|
logging.info(f"Looking for experiment: name='{args.name}', type='{args.type}'")
|
||||||
experiment_config = find_experiment_config(templates, args.name, args.type)
|
experiment_config = experiment_builder.find_template(templates, args.name, args.type)
|
||||||
|
|
||||||
logging.info(f"Found experiment: {experiment_config.get('name')}")
|
logging.info(f"Found experiment: {experiment_config.get('name')}")
|
||||||
logging.info(f"Description: {experiment_config.get('description')}")
|
logging.info(f"Description: {experiment_config.get('description')}")
|
||||||
|
|||||||
+152
-144
@@ -1,8 +1,8 @@
|
|||||||
from typing import List, Dict, Any
|
from typing import List, Dict
|
||||||
|
|
||||||
import streamlit as st
|
import streamlit as st
|
||||||
|
|
||||||
from core.utils.region_mapper import RegionMapper
|
from core.config.pipeline_config import PipelineConfig
|
||||||
from research.experiment import ExperimentConfig, ExperimentStatus
|
from research.experiment import ExperimentConfig, ExperimentStatus
|
||||||
from research.experiment.experiment_builder import ExperimentBuilder
|
from research.experiment.experiment_builder import ExperimentBuilder
|
||||||
from research.experiment.experiment_runner import ExperimentRunner
|
from research.experiment.experiment_runner import ExperimentRunner
|
||||||
@@ -13,18 +13,20 @@ from research.model_registry import list_available_models
|
|||||||
|
|
||||||
class Experiments:
|
class Experiments:
|
||||||
def __init__(
|
def __init__(
|
||||||
self, config, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
self, config: PipelineConfig, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
|
||||||
):
|
):
|
||||||
self.config = config
|
self.config = config
|
||||||
self.experiment_tracker = experiment_tracker
|
self.experiment_tracker = experiment_tracker
|
||||||
self.experiment_runner = experiment_runner
|
self.experiment_runner = experiment_runner
|
||||||
|
self.experiment_builder = ExperimentBuilder(config)
|
||||||
|
|
||||||
def index(self):
|
def index(self):
|
||||||
st.title("Experiments")
|
st.title("Experiments")
|
||||||
tab1, tab2, tab3 = st.tabs(["New Experiment", "Experiment List", "Batch Experiments"])
|
tab1, tab2, tab3 = st.tabs(
|
||||||
|
["Templates", "Experiments", "Batch Experiments"])
|
||||||
|
|
||||||
with tab1:
|
with tab1:
|
||||||
self.show_experiment_creation()
|
self.show_template_experiments()
|
||||||
|
|
||||||
with tab2:
|
with tab2:
|
||||||
self.show_experiment_list()
|
self.show_experiment_list()
|
||||||
@@ -32,151 +34,78 @@ class Experiments:
|
|||||||
with tab3:
|
with tab3:
|
||||||
self.show_batch_experiments()
|
self.show_batch_experiments()
|
||||||
|
|
||||||
def show_experiment_creation(self):
|
def show_template_experiments(self):
|
||||||
"""Show interface for creating new experiments"""
|
"""Show interface for running predefined template experiments"""
|
||||||
st.subheader("Create New Experiment")
|
st.subheader("Template Experiments")
|
||||||
|
st.write("Run predefined experiments based on research templates.")
|
||||||
with st.form("new_experiment"):
|
|
||||||
col1, col2 = st.columns(2)
|
|
||||||
|
|
||||||
with col1:
|
|
||||||
exp_name = st.text_input(
|
|
||||||
"Experiment Name", placeholder="e.g., native_name_gender_prediction"
|
|
||||||
)
|
|
||||||
description = st.text_area(
|
|
||||||
"Description", placeholder="Brief description of the experiment"
|
|
||||||
)
|
|
||||||
model_type = st.selectbox("Model Type", list_available_models())
|
|
||||||
|
|
||||||
# Feature selection
|
|
||||||
feature_options = [f.value for f in FeatureType]
|
|
||||||
selected_features = st.multiselect(
|
|
||||||
"Features to Use", feature_options, default=["full_name"]
|
|
||||||
)
|
|
||||||
|
|
||||||
with col2:
|
|
||||||
# Model parameters
|
|
||||||
st.write("**Model Parameters**")
|
|
||||||
model_params = {}
|
|
||||||
if model_type == "logistic_regression":
|
|
||||||
ngram_min = st.number_input("N-gram Min", 1, 5, 2)
|
|
||||||
ngram_max = st.number_input("N-gram Max", 2, 8, 5)
|
|
||||||
max_features = st.number_input("Max Features", 1000, 50000, 10000)
|
|
||||||
model_params = {
|
|
||||||
"ngram_range": [ngram_min, ngram_max],
|
|
||||||
"max_features": max_features,
|
|
||||||
}
|
|
||||||
elif model_type == "random_forest":
|
|
||||||
n_estimators = st.number_input("Number of Trees", 10, 500, 100)
|
|
||||||
max_depth = st.number_input("Max Depth", 1, 20, 10)
|
|
||||||
model_params = {
|
|
||||||
"n_estimators": n_estimators,
|
|
||||||
"max_depth": max_depth if max_depth > 0 else None,
|
|
||||||
}
|
|
||||||
|
|
||||||
# Training parameters
|
|
||||||
st.write("**Training Parameters**")
|
|
||||||
test_size = st.slider("Test Set Size", 0.1, 0.5, 0.2)
|
|
||||||
cv_folds = st.number_input("Cross-Validation Folds", 3, 10, 5)
|
|
||||||
|
|
||||||
tags = st.text_input(
|
|
||||||
"Tags (comma-separated)", placeholder="e.g., baseline, feature_study"
|
|
||||||
)
|
|
||||||
|
|
||||||
# Advanced options
|
|
||||||
with st.expander("Advanced Options"):
|
|
||||||
# Data filters
|
|
||||||
st.write("**Data Filters**")
|
|
||||||
filter_province = st.selectbox(
|
|
||||||
"Filter by Province (optional)",
|
|
||||||
["None"] + RegionMapper().get_provinces(),
|
|
||||||
)
|
|
||||||
|
|
||||||
min_words = st.number_input("Minimum Word Count", 0, 10, 0)
|
|
||||||
max_words = st.number_input("Maximum Word Count (0 = no limit)", 0, 20, 0)
|
|
||||||
|
|
||||||
submitted = st.form_submit_button("Create and Run Experiment", type="primary")
|
|
||||||
|
|
||||||
if submitted:
|
|
||||||
self._handle_experiment_submission(
|
|
||||||
exp_name,
|
|
||||||
description,
|
|
||||||
model_type,
|
|
||||||
selected_features,
|
|
||||||
model_params,
|
|
||||||
test_size,
|
|
||||||
cv_folds,
|
|
||||||
tags,
|
|
||||||
filter_province,
|
|
||||||
min_words,
|
|
||||||
max_words,
|
|
||||||
)
|
|
||||||
|
|
||||||
def _handle_experiment_submission(
|
|
||||||
self,
|
|
||||||
exp_name: str,
|
|
||||||
description: str,
|
|
||||||
model_type: str,
|
|
||||||
selected_features: List[str],
|
|
||||||
model_params: Dict[str, Any],
|
|
||||||
test_size: float,
|
|
||||||
cv_folds: int,
|
|
||||||
tags: str,
|
|
||||||
filter_province: str,
|
|
||||||
min_words: int,
|
|
||||||
max_words: int,
|
|
||||||
):
|
|
||||||
"""Handle experiment form submission"""
|
|
||||||
if not exp_name:
|
|
||||||
st.error("Please provide an experiment name")
|
|
||||||
return
|
|
||||||
|
|
||||||
if not selected_features:
|
|
||||||
st.error("Please select at least one feature")
|
|
||||||
return
|
|
||||||
|
|
||||||
try:
|
try:
|
||||||
# Prepare data filters
|
available_experiments = self.experiment_builder.get_templates()
|
||||||
train_filter = {}
|
|
||||||
if filter_province != "None":
|
|
||||||
train_filter["province"] = filter_province
|
|
||||||
if min_words > 0:
|
|
||||||
train_filter["words"] = {"min": min_words}
|
|
||||||
if max_words > 0:
|
|
||||||
if "words" in train_filter:
|
|
||||||
train_filter["words"]["max"] = max_words
|
|
||||||
else:
|
|
||||||
train_filter["words"] = {"max": max_words}
|
|
||||||
|
|
||||||
# Create experiment config
|
# Create tabs for different experiment types
|
||||||
features = [FeatureType(f) for f in selected_features]
|
exp_tabs = st.tabs(["Baseline", "Advanced", "Feature Studies", "Hyperparameter Tuning"])
|
||||||
tag_list = [tag.strip() for tag in tags.split(",") if tag.strip()]
|
|
||||||
|
|
||||||
config = ExperimentConfig(
|
with exp_tabs[0]:
|
||||||
name=exp_name,
|
self._show_experiments_by_type(available_experiments["baseline"], "baseline")
|
||||||
description=description,
|
|
||||||
tags=tag_list,
|
|
||||||
model_type=model_type,
|
|
||||||
model_params=model_params,
|
|
||||||
features=features,
|
|
||||||
train_data_filter=train_filter if train_filter else None,
|
|
||||||
test_size=test_size,
|
|
||||||
cross_validation_folds=cv_folds,
|
|
||||||
)
|
|
||||||
|
|
||||||
# Run experiment
|
with exp_tabs[1]:
|
||||||
with st.spinner("Running experiment..."):
|
self._show_experiments_by_type(available_experiments["advanced"], "advanced")
|
||||||
experiment_id = self.experiment_runner.run_experiment(config)
|
|
||||||
|
|
||||||
st.success(f"Experiment completed successfully!")
|
with exp_tabs[2]:
|
||||||
st.info(f"Experiment ID: `{experiment_id}`")
|
self._show_experiments_by_type(available_experiments["feature_study"], "feature_study")
|
||||||
|
|
||||||
# Show results
|
with exp_tabs[3]:
|
||||||
experiment = self.experiment_tracker.get_experiment(experiment_id)
|
self._show_experiments_by_type(available_experiments["tuning"], "tuning")
|
||||||
if experiment and experiment.test_metrics:
|
|
||||||
st.write("**Results:**")
|
except Exception as e:
|
||||||
for metric, value in experiment.test_metrics.items():
|
st.error(f"Error loading experiment templates: {e}")
|
||||||
st.metric(metric.title(), f"{value:.4f}")
|
st.info("Make sure the research templates file exists at `config/research_templates.yaml`")
|
||||||
|
|
||||||
|
def _show_experiments_by_type(self, experiments: List[Dict], experiment_type: str):
|
||||||
|
"""Show experiments for a specific type"""
|
||||||
|
if not experiments:
|
||||||
|
st.info(f"No {experiment_type} experiments available in templates.")
|
||||||
|
return
|
||||||
|
|
||||||
|
st.write(f"**{experiment_type.title()} Experiments**")
|
||||||
|
|
||||||
|
# Show available experiments
|
||||||
|
for i, exp_template in enumerate(experiments):
|
||||||
|
exp_name = exp_template.get("name", f"Experiment {i + 1}")
|
||||||
|
exp_description = exp_template.get("description", "No description available")
|
||||||
|
|
||||||
|
with st.expander(f"📊 {exp_name} - {exp_description}"):
|
||||||
|
col1, col2 = st.columns([2, 1])
|
||||||
|
|
||||||
|
with col1:
|
||||||
|
st.json(exp_template)
|
||||||
|
|
||||||
|
with col2:
|
||||||
|
if st.button(f"🚀 Run Experiment", key=f"run_{experiment_type}_{i}"):
|
||||||
|
self._run_template_experiment(exp_template)
|
||||||
|
|
||||||
|
def _run_template_experiment(self, exp_template: Dict):
|
||||||
|
"""Run a template experiment"""
|
||||||
|
try:
|
||||||
|
with st.spinner(f"Running {exp_template.get('name')}..."):
|
||||||
|
# Create experiment config from template
|
||||||
|
experiment_config = self.experiment_builder.from_template(exp_template)
|
||||||
|
|
||||||
|
# Run the experiment
|
||||||
|
experiment_id = self.experiment_runner.run_experiment(experiment_config)
|
||||||
|
st.success(f"Experiment '{experiment_config.name}' completed successfully!")
|
||||||
|
st.info(f"Experiment ID: `{experiment_id}`")
|
||||||
|
|
||||||
|
# Show results
|
||||||
|
experiment = self.experiment_tracker.get_experiment(experiment_id)
|
||||||
|
if experiment and experiment.test_metrics:
|
||||||
|
st.write("**Results:**")
|
||||||
|
col1, col2, col3 = st.columns(3)
|
||||||
|
|
||||||
|
metrics = list(experiment.test_metrics.items())
|
||||||
|
for i, (metric, value) in enumerate(metrics):
|
||||||
|
with [col1, col2, col3][i % 3]:
|
||||||
|
st.metric(metric.title(), f"{value:.4f}")
|
||||||
|
|
||||||
except Exception as e:
|
except Exception as e:
|
||||||
st.error(f"Error running experiment: {e}")
|
st.error(f"Error running experiment: {e}")
|
||||||
@@ -261,6 +190,85 @@ class Experiments:
|
|||||||
st.subheader("Batch Experiments")
|
st.subheader("Batch Experiments")
|
||||||
st.write("Run multiple experiments with different parameter combinations.")
|
st.write("Run multiple experiments with different parameter combinations.")
|
||||||
|
|
||||||
|
# Add option to run template batch experiments
|
||||||
|
batch_type = st.radio("Batch Type", ["Template Batch", "Custom Parameter Sweep"])
|
||||||
|
|
||||||
|
if batch_type == "Template Batch":
|
||||||
|
self._show_template_batch_experiments()
|
||||||
|
else:
|
||||||
|
self._show_custom_batch_experiments()
|
||||||
|
|
||||||
|
def _show_template_batch_experiments(self):
|
||||||
|
"""Show interface for running batch experiments from templates"""
|
||||||
|
st.write("**Run Multiple Template Experiments**")
|
||||||
|
|
||||||
|
try:
|
||||||
|
available_experiments = self.experiment_builder.get_templates()
|
||||||
|
|
||||||
|
# Select experiment types to run
|
||||||
|
experiment_types = st.multiselect(
|
||||||
|
"Select Experiment Types",
|
||||||
|
["baseline", "advanced", "feature_study", "tuning"],
|
||||||
|
default=["baseline"]
|
||||||
|
)
|
||||||
|
|
||||||
|
if experiment_types:
|
||||||
|
selected_experiments = []
|
||||||
|
|
||||||
|
for exp_type in experiment_types:
|
||||||
|
experiments = available_experiments.get(exp_type, [])
|
||||||
|
if experiments:
|
||||||
|
st.write(f"**{exp_type.title()} Experiments:**")
|
||||||
|
exp_names = [exp.get("name", f"Exp {i}") for i, exp in enumerate(experiments)]
|
||||||
|
selected_names = st.multiselect(
|
||||||
|
f"Select {exp_type} experiments",
|
||||||
|
exp_names,
|
||||||
|
key=f"select_{exp_type}"
|
||||||
|
)
|
||||||
|
|
||||||
|
for name in selected_names:
|
||||||
|
for exp in experiments:
|
||||||
|
if exp.get("name") == name:
|
||||||
|
selected_experiments.append(exp)
|
||||||
|
|
||||||
|
if st.button("🚀 Run Selected Template Experiments"):
|
||||||
|
self._run_template_batch_experiments(selected_experiments)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
st.error(f"Error loading templates for batch experiments: {e}")
|
||||||
|
|
||||||
|
def _run_template_batch_experiments(self, selected_experiments: List[Dict]):
|
||||||
|
"""Run batch experiments from templates"""
|
||||||
|
if not selected_experiments:
|
||||||
|
st.warning("No experiments selected")
|
||||||
|
return
|
||||||
|
|
||||||
|
with st.spinner(f"Running {len(selected_experiments)} template experiments..."):
|
||||||
|
try:
|
||||||
|
experiment_configs = []
|
||||||
|
for exp_template in selected_experiments:
|
||||||
|
config = self.experiment_builder.from_template(exp_template)
|
||||||
|
experiment_configs.append(config)
|
||||||
|
|
||||||
|
# Run batch experiments
|
||||||
|
experiment_ids = self.experiment_runner.run_experiment_batch(experiment_configs)
|
||||||
|
|
||||||
|
st.success(f"Completed {len(experiment_ids)} template experiments!")
|
||||||
|
|
||||||
|
# Show summary
|
||||||
|
if experiment_ids:
|
||||||
|
comparison = self.experiment_runner.compare_experiments(experiment_ids)
|
||||||
|
st.write("**Template Batch Results:**")
|
||||||
|
st.dataframe(
|
||||||
|
comparison[["name", "model_type", "test_accuracy"]],
|
||||||
|
use_container_width=True,
|
||||||
|
)
|
||||||
|
|
||||||
|
except Exception as e:
|
||||||
|
st.error(f"Error running template batch experiments: {e}")
|
||||||
|
|
||||||
|
def _show_custom_batch_experiments(self):
|
||||||
|
"""Show interface for custom parameter sweep experiments"""
|
||||||
# Parameter sweep configuration
|
# Parameter sweep configuration
|
||||||
with st.form("batch_experiments"):
|
with st.form("batch_experiments"):
|
||||||
st.write("**Parameter Sweep Configuration**")
|
st.write("**Parameter Sweep Configuration**")
|
||||||
@@ -290,7 +298,7 @@ class Experiments:
|
|||||||
|
|
||||||
tags = st.text_input("Common Tags", "parameter_sweep,batch")
|
tags = st.text_input("Common Tags", "parameter_sweep,batch")
|
||||||
|
|
||||||
if st.form_submit_button("🚀 Run Batch Experiments"):
|
if st.form_submit_button("🚀 Run Parameter Sweep"):
|
||||||
self.run_batch_experiments(
|
self.run_batch_experiments(
|
||||||
base_name, model_types, ngram_ranges, feature_combinations, test_sizes, tags
|
base_name, model_types, ngram_ranges, feature_combinations, test_sizes, tags
|
||||||
)
|
)
|
||||||
|
|||||||
Reference in New Issue
Block a user