fix: normalize hyper params

This commit is contained in:
2025-09-21 13:10:07 +02:00
parent 83d21c640b
commit 63e23d6600
8 changed files with 26 additions and 19 deletions
+1 -1
View File
@@ -29,7 +29,7 @@ llm:
# Data handling configuration # Data handling configuration
data: data:
split_evaluation: false split_evaluation: false
max_dataset_size: null max_dataset_size: 100_000
balance_by_sex: true balance_by_sex: true
# Enhanced logging for development # Enhanced logging for development
+1 -2
View File
@@ -13,7 +13,7 @@ from research.neural_network_model import NeuralNetworkModel
class BiGRUModel(NeuralNetworkModel): class BiGRUModel(NeuralNetworkModel):
"""Bidirectional GRU model for name classification""" """Bidirectional GRU model for name classification"""
def build_model_with_vocab(self, vocab_size: int, max_len: int = 6, **kwargs) -> Any: def build_model_with_vocab(self, vocab_size: int, **kwargs) -> Any:
params = kwargs params = kwargs
model = Sequential( model = Sequential(
[ [
@@ -22,7 +22,6 @@ class BiGRUModel(NeuralNetworkModel):
Embedding( Embedding(
input_dim=vocab_size, input_dim=vocab_size,
output_dim=params.get("embedding_dim", 64), output_dim=params.get("embedding_dim", 64),
input_length=max_len,
mask_zero=True, mask_zero=True,
), ),
# First recurrent block returns full sequences to allow stacking. # First recurrent block returns full sequences to allow stacking.
+1 -1
View File
@@ -21,7 +21,7 @@ from research.neural_network_model import NeuralNetworkModel
class CNNModel(NeuralNetworkModel): class CNNModel(NeuralNetworkModel):
"""1D Convolutional Neural Network for character patterns""" """1D Convolutional Neural Network for character patterns"""
def build_model_with_vocab(self, vocab_size: int, max_len: int = 20, **kwargs) -> Any: def build_model_with_vocab(self, vocab_size: int, **kwargs) -> Any:
"""Build CNN model with known vocabulary size""" """Build CNN model with known vocabulary size"""
params = kwargs params = kwargs
+1 -2
View File
@@ -13,7 +13,7 @@ from research.neural_network_model import NeuralNetworkModel
class LSTMModel(NeuralNetworkModel): class LSTMModel(NeuralNetworkModel):
"""LSTM model for sequence learning""" """LSTM model for sequence learning"""
def build_model_with_vocab(self, vocab_size: int, max_len: int = 6, **kwargs) -> Any: def build_model_with_vocab(self, vocab_size: int, **kwargs) -> Any:
params = kwargs params = kwargs
model = Sequential( model = Sequential(
[ [
@@ -21,7 +21,6 @@ class LSTMModel(NeuralNetworkModel):
Embedding( Embedding(
input_dim=vocab_size, input_dim=vocab_size,
output_dim=params.get("embedding_dim", 64), output_dim=params.get("embedding_dim", 64),
input_length=max_len,
mask_zero=True, mask_zero=True,
), ),
# Stacked bidirectional LSTMs: first returns sequences to feed the next. # Stacked bidirectional LSTMs: first returns sequences to feed the next.
+1 -1
View File
@@ -17,7 +17,7 @@ class NaiveBayesModel(TraditionalModel):
# includes unigrams for coverage and higher n for suffix/prefix cues. # includes unigrams for coverage and higher n for suffix/prefix cues.
vectorizer = CountVectorizer( vectorizer = CountVectorizer(
analyzer="char", analyzer="char",
ngram_range=params.get("ngram_range", (1, 4)), ngram_range=params.get("ngram_range", (2, 5)),
max_features=params.get("max_features", 8000), max_features=params.get("max_features", 8000),
) )
+5 -5
View File
@@ -22,21 +22,21 @@ from research.neural_network_model import NeuralNetworkModel
class TransformerModel(NeuralNetworkModel): class TransformerModel(NeuralNetworkModel):
"""Transformer-based model""" """Transformer-based model"""
def build_model_with_vocab(self, vocab_size: int, max_len: int = 6, **kwargs) -> Any: def build_model_with_vocab(self, vocab_size: int, **kwargs) -> Any:
params = kwargs params = kwargs
# Build Transformer model # Build Transformer model
inputs = Input(shape=(max_len,)) inputs = Input(shape=(params.get("max_len", 8),))
x = Embedding( x = Embedding(
input_dim=vocab_size, input_dim=vocab_size,
output_dim=params.get("embedding_dim", 64), output_dim=params.get("embedding_dim", 64),
input_length=max_len, input_length=params.get("max_len", 8),
mask_zero=True, mask_zero=True,
)(inputs) )(inputs)
# Add positional encoding # Add positional encoding
positions = tf.range(start=0, limit=max_len, delta=1) positions = tf.range(start=0, limit=params.get("max_len", 8), delta=1)
pos_embedding = Embedding(input_dim=max_len, output_dim=params.get("embedding_dim", 64))( pos_embedding = Embedding(input_dim=params.get("max_len", 8), output_dim=params.get("embedding_dim", 64))(
positions positions
) )
x = x + pos_embedding x = x + pos_embedding
+5 -6
View File
@@ -52,22 +52,21 @@ class NeuralNetworkModel(BaseModel):
logging.info(f"Vocabulary size: {vocab_size}") logging.info(f"Vocabulary size: {vocab_size}")
# Get additional model parameters # Get additional model parameters
max_len = self.config.model_params.get("max_len", 6) self.model = self.build_model_with_vocab(vocab_size=vocab_size, **self.config.model_params)
self.model = self.build_model_with_vocab(
vocab_size=vocab_size, max_len=max_len, **self.config.model_params
)
# Train the neural network # Train the neural network
logging.info( logging.info(
f"Fitting model with {X_prepared.shape[0]} samples and {X_prepared.shape[1]} features" f"Fitting model with {X_prepared.shape[0]} samples and {X_prepared.shape[1]} features"
) )
logging.info(X_prepared[0])
logging.info(f"Model parameters: {self.config.model_params}")
history = self.model.fit( history = self.model.fit(
X_prepared, X_prepared,
y_encoded, y_encoded,
epochs=self.config.model_params.get("epochs", 10), epochs=self.config.model_params.get("epochs", 10),
batch_size=self.config.model_params.get("batch_size", 64), batch_size=self.config.model_params.get("batch_size", 64),
validation_split=0.1, validation_split=self.config.model_params.get("validation_split", 0.1),
verbose=2, verbose=2,
) )
+11 -1
View File
@@ -59,9 +59,19 @@ class TraditionalModel(BaseModel):
f"Fitting model with {X_prepared.shape[0]} samples and {X_prepared.shape[1]} features" f"Fitting model with {X_prepared.shape[0]} samples and {X_prepared.shape[1]} features"
) )
self.model.fit(X_prepared, y_encoded) logging.info(X_prepared[0])
logging.info(f"Model parameters: {self.config.model_params}")
history = self.model.fit(X_prepared, y_encoded)
self.is_fitted = True self.is_fitted = True
self.training_history = {
"accuracy": history.history["accuracy"],
"loss": history.history["loss"],
"val_accuracy": history.history.get("val_accuracy", []),
"val_loss": history.history.get("val_loss", []),
}
return self return self
def cross_validate(self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5) -> Dict[str, float]: def cross_validate(self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5) -> Dict[str, float]: