baseline_experiments: - name: "bigru" description: "Baseline BiGRU with full name features" model_type: "bigru" features: [ "full_name" ] model_params: max_len: 20 embedding_dim: 64 gru_units: 32 epochs: 2 batch_size: 32 tags: [ "baseline", "neural", "bigru" ] - name: "cnn" description: "Baseline CNN with character patterns" model_type: "cnn" features: [ "full_name" ] model_params: max_len: 20 embedding_dim: 64 filters: 64 kernel_size: 3 dropout: 0.5 epochs: 2 batch_size: 32 tags: [ "baseline", "neural", "cnn" ] - name: "ensemble" description: "Baseline Ensemble with multiple models" model_type: "ensemble" features: [ "full_name", "name_length", "word_count" ] model_params: base_models: [ "logistic_regression", "random_forest", "xgboost" ] voting: "soft" cv_folds: 5 tags: [ "baseline", "ensemble" ] - name: "lightgbm" description: "Baseline LightGBM with engineered features" model_type: "lightgbm" features: [ "full_name", "name_length", "word_count" ] model_params: n_estimators: 100 max_depth: -1 learning_rate: 0.1 num_leaves: 31 subsample: 0.8 colsample_bytree: 0.8 tags: [ "baseline", "lightgbm" ] - name: "logistic_regression_fullname" description: "Baseline logistic regression with full name" model_type: "logistic_regression" features: [ "full_name" ] model_params: max_features: 10000 tags: [ "baseline", "logistic_regression", "fullname" ] - name: "logistic_regression_native" description: "Logistic regression with native name only" model_type: "logistic_regression" features: [ "native_name" ] model_params: max_features: 5000 tags: [ "baseline", "logistic_regression", "native" ] - name: "logistic_regression_surname" description: "Logistic regression with surname name only" model_type: "logistic_regression" features: [ "surname" ] model_params: max_features: 5000 tags: [ "baseline", "logistic_regression", "surname" ] - name: "lstm" description: "Baseline LSTM with full name features" model_type: "lstm" features: [ "full_name" ] model_params: embedding_dim: 128 lstm_units: 64 epochs: 2 batch_size: 64 tags: [ "baseline", "neural", "lstm" ] - name: "naive_bayes" description: "Baseline Naive Bayes with full name features" model_type: "naive_bayes" features: [ "full_name" ] model_params: max_features: 5000 tags: [ "baseline", "naive_bayes" ] - name: "random_forest" description: "Baseline Random Forest with engineered features" model_type: "random_forest" features: [ "name_length", "word_count", "province" ] model_params: n_estimators: 100 max_depth: 10 min_samples_split: 2 min_samples_leaf: 1 tags: [ "baseline", "random_forest", "engineered" ] - name: "svm" description: "Baseline SVM with full name features" model_type: "svm" features: [ "full_name" ] model_params: C: 1.0 kernel: "rbf" ngram_range: [ 2, 4 ] max_features: 5000 tags: [ "baseline", "svm" ] - name: "transformer" description: "Baseline Transformer with attention mechanism" model_type: "transformer" features: [ "full_name" ] model_params: embedding_dim: 128 num_heads: 4 num_layers: 2 epochs: 2 batch_size: 64 tags: [ "baseline", "neural", "transformer" ] - name: "xgboost" description: "Baseline XGBoost with engineered features" model_type: "xgboost" features: [ "full_name", "name_length", "word_count" ] model_params: n_estimators: 100 max_depth: 6 learning_rate: 0.1 subsample: 0.8 colsample_bytree: 0.8 tags: [ "baseline", "xgboost" ] # Advanced Experiments Configuration advanced_experiments: # Feature Study Configurations feature_studies: # Hyperparameter Tuning Configurations hyperparameter_tuning: