refactoring: add initial pipeline configuration and model classes

This commit is contained in:
2025-08-04 16:12:25 +02:00
parent 19c66fd0ee
commit f4689faf80
82 changed files with 7176 additions and 1218 deletions
+121 -18
View File
@@ -2,24 +2,127 @@
default: help
.PHONY: help
help:
@echo Tasks:
help: ## Show this help message
@awk 'BEGIN {FS = ":.*?## "} /^[a-zA-Z_-]+:.*?## / {printf "\033[36m%-30s\033[0m %s\n", $$1, $$2}' $(MAKEFILE_LIST)
.PHONY: download
download:
@if [ ! -f dataset/names.csv ]; then \
set -a; [ -f .env.local ] && . .env.local; set +a; \
[ -z "$$DATASET_URL" ] && . .env; \
mkdir -p dataset; \
curl -L "$${DATASET_URL}" -o dataset/names.csv; \
else \
echo "dataset/names.csv already exists. Skipping download."; \
fi
# =============================================================================
# ENVIRONMENT SETUP
# =============================================================================
.PHONY: clean
clean:
rm -rf ./models
rm -rf ./results
rm -rf ./dataset/spacy/train.spacy
rm -rf ./dataset/spacy/dev.spacy
.PHONY: setup
setup: ## Setup virtual environment and install dependencies
python -m venv .venv
.venv/bin/pip install --upgrade pip
.venv/bin/pip install -r requirements.txt
.PHONY: install
install: ## Install/update dependencies
pip install --upgrade pip
pip install -r requirements.txt
.PHONY: install-dev
install-dev: ## Install development dependencies
pip install -r requirements.txt
pip install jupyter notebook ipykernel pytest black flake8 mypy
.PHONY: activate
activate: ## Show activation command
@echo "Run: source .venv/bin/activate"
# =============================================================================
# MODEL TRAINING & ARTIFACTS
# =============================================================================
.PHONY: train-baseline
train-baseline: ## Train all baseline models and save artifacts
python research/train.py --mode baseline
.PHONY: train-neural
train-neural: ## Train neural network models (LSTM, CNN, Transformer)
python research/train.py --mode neural
.PHONY: train-model
train-model: ## Train specific model (use: make train-model MODEL=logistic_regression NAME=my_model)
python research/train.py --model-type $(MODEL) --name $(NAME)
.PHONY: list-models
list-models: ## List all saved model artifacts
python research/train.py --mode list
# =============================================================================
# RESEARCH & EXPERIMENTS
# =============================================================================
.PHONY: experiment
experiment: ## Create sample experiment configuration
python research/cli.py run --name "sample_experiment" --features full_name --model-type logistic_regression
.PHONY: baseline
baseline: ## Run baseline experiments
python research/cli.py baseline
.PHONY: ablation
ablation: ## Run feature ablation study
python research/cli.py ablation
.PHONY: components
components: ## Run name component analysis
python research/cli.py components
.PHONY: list-experiments
list-experiments: ## List all experiments
python research/cli.py list
.PHONY: list-completed
list-completed: ## List completed experiments only
python research/cli.py list --status completed
.PHONY: export-results
export-results: ## Export all experiment results to CSV
python research/cli.py export --output results_$(shell date +%Y%m%d_%H%M%S).csv
.PHONY: best-model
best-model: ## Show best performing model
python research/cli.py list --status completed | head -5
# =============================================================================
# WEB INTERFACE
# =============================================================================
.PHONY: web
web: ## Launch Streamlit web interface
streamlit run web/app.py --server.runOnSave true --server.port 8501
# =============================================================================
# DEVELOPMENT & CODE QUALITY
# =============================================================================
.PHONY: format
format: ## Format code with black
black . --line-length 100
.PHONY: lint
lint: ## Lint code with flake8
flake8 . --max-line-length=100 --ignore=E203,W503 --exclude=.venv
.PHONY: type-check
type-check: ## Type check with mypy
mypy . --ignore-missing-imports
.PHONY: notebook
notebook: ## Start Jupyter notebook
jupyter notebook notebooks/
.PHONY: lab
lab: ## Start Jupyter lab
jupyter lab notebooks/
# =============================================================================
# DEPLOYMENT & PRODUCTION
# =============================================================================
.PHONY: backup
backup: ## Backup datasets and results
@mkdir -p backups/$(shell date +%Y%m%d_%H%M%S)
@cp -r data/ backups/$(shell date +%Y%m%d_%H%M%S)/data/
@echo "Backup created in backups/$(shell date +%Y%m%d_%H%M%S)/"