Files
drc-ners-nlp/web/interfaces/experiments.py
T
2025-08-16 22:14:55 +02:00

380 lines
16 KiB
Python

from typing import List, Dict
import streamlit as st
from core.config.pipeline_config import PipelineConfig
from research.experiment import ExperimentConfig, ExperimentStatus
from research.experiment.experiment_builder import ExperimentBuilder
from research.experiment.experiment_runner import ExperimentRunner
from research.experiment.experiment_tracker import ExperimentTracker
from research.experiment.feature_extractor import FeatureType
from research.model_registry import list_available_models
class Experiments:
def __init__(
self, config: PipelineConfig, experiment_tracker: ExperimentTracker, experiment_runner: ExperimentRunner
):
self.config = config
self.experiment_tracker = experiment_tracker
self.experiment_runner = experiment_runner
self.experiment_builder = ExperimentBuilder(config)
def index(self):
st.title("Experiments")
tab1, tab2, tab3 = st.tabs(
["Templates", "Experiments", "Batch Experiments"])
with tab1:
self.show_template_experiments()
with tab2:
self.show_experiment_list()
with tab3:
self.show_batch_experiments()
def show_template_experiments(self):
"""Show interface for running predefined template experiments"""
st.subheader("Template Experiments")
st.write("Run predefined experiments based on research templates.")
try:
available_experiments = self.experiment_builder.get_templates()
# Create tabs for different experiment types
exp_tabs = st.tabs(["Baseline", "Advanced", "Feature Studies", "Hyperparameter Tuning"])
with exp_tabs[0]:
self._show_experiments_by_type(available_experiments["baseline"], "baseline")
with exp_tabs[1]:
self._show_experiments_by_type(available_experiments["advanced"], "advanced")
with exp_tabs[2]:
self._show_experiments_by_type(available_experiments["feature_study"], "feature_study")
with exp_tabs[3]:
self._show_experiments_by_type(available_experiments["tuning"], "tuning")
except Exception as e:
st.error(f"Error loading experiment templates: {e}")
st.info("Make sure the research templates file exists at `config/research_templates.yaml`")
def _show_experiments_by_type(self, experiments: List[Dict], experiment_type: str):
"""Show experiments for a specific type"""
if not experiments:
st.info(f"No {experiment_type} experiments available in templates.")
return
st.write(f"**{experiment_type.title()} Experiments**")
# Show available experiments
for i, exp_template in enumerate(experiments):
exp_name = exp_template.get("name", f"Experiment {i + 1}")
exp_description = exp_template.get("description", "No description available")
with st.expander(f"📊 {exp_name} - {exp_description}"):
col1, col2 = st.columns([2, 1])
with col1:
st.json(exp_template)
with col2:
if st.button(f"🚀 Run Experiment", key=f"run_{experiment_type}_{i}"):
self._run_template_experiment(exp_template)
def _run_template_experiment(self, exp_template: Dict):
"""Run a template experiment"""
try:
with st.spinner(f"Running {exp_template.get('name')}..."):
# Create experiment config from template
experiment_config = self.experiment_builder.from_template(exp_template)
# Run the experiment
experiment_id = self.experiment_runner.run_experiment(experiment_config)
st.success(f"Experiment '{experiment_config.name}' completed successfully!")
st.info(f"Experiment ID: `{experiment_id}`")
# Show results
experiment = self.experiment_tracker.get_experiment(experiment_id)
if experiment and experiment.test_metrics:
st.write("**Results:**")
col1, col2, col3 = st.columns(3)
metrics = list(experiment.test_metrics.items())
for i, (metric, value) in enumerate(metrics):
with [col1, col2, col3][i % 3]:
st.metric(metric.title(), f"{value:.4f}")
except Exception as e:
st.error(f"Error running experiment: {e}")
def show_experiment_list(self):
"""Show list of all experiments with filtering"""
st.subheader("All Experiments")
# Filters
col1, col2, col3 = st.columns(3)
with col1:
status_filter = st.selectbox(
"Filter by Status", ["All", "completed", "running", "failed", "pending"]
)
with col2:
model_filter = st.selectbox("Filter by Model", ["All"] + list_available_models())
with col3:
tag_filter = st.text_input("Filter by Tags (comma-separated)")
# Get and filter experiments
experiments = self._get_filtered_experiments(status_filter, model_filter, tag_filter)
if not experiments:
st.info("No experiments found matching the filters.")
return
# Display experiments
for i, exp in enumerate(experiments):
with st.expander(
f"{exp.config.name} - {exp.status.value} - {exp.start_time.strftime('%Y-%m-%d %H:%M')}"
):
self._display_experiment_details(exp, i)
def _get_filtered_experiments(self, status_filter: str, model_filter: str, tag_filter: str):
"""Get experiments with applied filters"""
experiments = self.experiment_tracker.list_experiments()
# Apply filters
if status_filter != "All":
experiments = [e for e in experiments if e.status == ExperimentStatus(status_filter)]
if model_filter != "All":
experiments = [e for e in experiments if e.config.model_type == model_filter]
if tag_filter:
tags = [tag.strip() for tag in tag_filter.split(",")]
experiments = [e for e in experiments if any(tag in e.config.tags for tag in tags)]
return experiments
@classmethod
def _display_experiment_details(cls, exp, index: int):
"""Display details for a single experiment"""
col1, col2, col3 = st.columns(3)
with col1:
st.write(f"**Model:** {exp.config.model_type}")
st.write(f"**Features:** {', '.join([f.value for f in exp.config.features])}")
st.write(f"**Tags:** {', '.join(exp.config.tags)}")
with col2:
if exp.test_metrics:
for metric, value in exp.test_metrics.items():
st.metric(metric.title(), f"{value:.4f}")
with col3:
st.write(f"**Train Size:** {exp.train_size:,}")
st.write(f"**Test Size:** {exp.test_size:,}")
if st.button(f"View Details", key=f"details_{index}"):
st.session_state.selected_experiment = exp.experiment_id
st.rerun()
if exp.config.description:
st.write(f"**Description:** {exp.config.description}")
def show_batch_experiments(self):
"""Show interface for running batch experiments"""
st.subheader("Batch Experiments")
st.write("Run multiple experiments with different parameter combinations.")
# Add option to run template batch experiments
batch_type = st.radio("Batch Type", ["Template Batch", "Custom Parameter Sweep"])
if batch_type == "Template Batch":
self._show_template_batch_experiments()
else:
self._show_custom_batch_experiments()
def _show_template_batch_experiments(self):
"""Show interface for running batch experiments from templates"""
st.write("**Run Multiple Template Experiments**")
try:
available_experiments = self.experiment_builder.get_templates()
# Select experiment types to run
experiment_types = st.multiselect(
"Select Experiment Types",
["baseline", "advanced", "feature_study", "tuning"],
default=["baseline"]
)
if experiment_types:
selected_experiments = []
for exp_type in experiment_types:
experiments = available_experiments.get(exp_type, [])
if experiments:
st.write(f"**{exp_type.title()} Experiments:**")
exp_names = [exp.get("name", f"Exp {i}") for i, exp in enumerate(experiments)]
selected_names = st.multiselect(
f"Select {exp_type} experiments",
exp_names,
key=f"select_{exp_type}"
)
for name in selected_names:
for exp in experiments:
if exp.get("name") == name:
selected_experiments.append(exp)
if st.button("🚀 Run Selected Template Experiments"):
self._run_template_batch_experiments(selected_experiments)
except Exception as e:
st.error(f"Error loading templates for batch experiments: {e}")
def _run_template_batch_experiments(self, selected_experiments: List[Dict]):
"""Run batch experiments from templates"""
if not selected_experiments:
st.warning("No experiments selected")
return
with st.spinner(f"Running {len(selected_experiments)} template experiments..."):
try:
experiment_configs = []
for exp_template in selected_experiments:
config = self.experiment_builder.from_template(exp_template)
experiment_configs.append(config)
# Run batch experiments
experiment_ids = self.experiment_runner.run_experiment_batch(experiment_configs)
st.success(f"Completed {len(experiment_ids)} template experiments!")
# Show summary
if experiment_ids:
comparison = self.experiment_runner.compare_experiments(experiment_ids)
st.write("**Template Batch Results:**")
st.dataframe(
comparison[["name", "model_type", "test_accuracy"]],
use_container_width=True,
)
except Exception as e:
st.error(f"Error running template batch experiments: {e}")
def _show_custom_batch_experiments(self):
"""Show interface for custom parameter sweep experiments"""
# Parameter sweep configuration
with st.form("batch_experiments"):
st.write("**Parameter Sweep Configuration**")
col1, col2 = st.columns(2)
with col1:
base_name = st.text_input("Base Experiment Name", "parameter_sweep")
model_types = st.multiselect(
"Model Types", list_available_models(), default=["logistic_regression"]
)
# N-gram ranges for logistic regression
st.write("**Logistic Regression Parameters**")
ngram_ranges = st.text_area(
"N-gram Ranges (one per line, format: min,max)", "2,4\n2,5\n3,6"
)
with col2:
feature_combinations = st.multiselect(
"Feature Combinations",
[f.value for f in FeatureType],
default=["full_name", "native_name", "surname"],
)
test_sizes = st.text_input("Test Sizes (comma-separated)", "0.15,0.2,0.25")
tags = st.text_input("Common Tags", "parameter_sweep,batch")
if st.form_submit_button("🚀 Run Parameter Sweep"):
self.run_batch_experiments(
base_name, model_types, ngram_ranges, feature_combinations, test_sizes, tags
)
def run_batch_experiments(
self,
base_name: str,
model_types: List[str],
ngram_ranges: str,
feature_combinations: List[str],
test_sizes: str,
tags: str,
):
"""Run batch experiments with parameter combinations"""
with st.spinner("Running batch experiments..."):
try:
experiments = []
# Parse parameters
ngram_list = []
for line in ngram_ranges.strip().split("\n"):
if "," in line:
min_val, max_val = map(int, line.split(","))
ngram_list.append([min_val, max_val])
test_size_list = [float(x.strip()) for x in test_sizes.split(",")]
tag_list = [tag.strip() for tag in tags.split(",") if tag.strip()]
# Generate experiment combinations
exp_count = 0
for model_type in model_types:
for feature_combo in feature_combinations:
for test_size in test_size_list:
if model_type == "logistic_regression":
for ngram_range in ngram_list:
exp_name = f"{base_name}_{model_type}_{feature_combo}_{ngram_range[0]}_{ngram_range[1]}_{test_size}"
config = ExperimentConfig(
name=exp_name,
description=f"Batch experiment: {model_type} with {feature_combo}",
model_type=model_type,
features=[FeatureType(feature_combo)],
model_params={"ngram_range": ngram_range},
test_size=test_size,
tags=tag_list,
)
experiments.append(config)
exp_count += 1
else:
exp_name = f"{base_name}_{model_type}_{feature_combo}_{test_size}"
config = ExperimentConfig(
name=exp_name,
description=f"Batch experiment: {model_type} with {feature_combo}",
model_type=model_type,
features=[FeatureType(feature_combo)],
test_size=test_size,
tags=tag_list,
)
experiments.append(config)
exp_count += 1
# Run experiments
experiment_ids = self.experiment_runner.run_experiment_batch(experiments)
st.success(f"Completed {len(experiment_ids)} batch experiments")
# Show summary
if experiment_ids:
comparison = self.experiment_runner.compare_experiments(experiment_ids)
st.write("**Batch Results Summary:**")
st.dataframe(
comparison[["name", "model_type", "test_accuracy"]],
use_container_width=True,
)
except Exception as e:
st.error(f"Error running batch experiments: {e}")