fix: normalize hyper params
This commit is contained in:
@@ -29,7 +29,7 @@ llm:
|
|||||||
# Data handling configuration
|
# Data handling configuration
|
||||||
data:
|
data:
|
||||||
split_evaluation: false
|
split_evaluation: false
|
||||||
max_dataset_size: null
|
max_dataset_size: 100_000
|
||||||
balance_by_sex: true
|
balance_by_sex: true
|
||||||
|
|
||||||
# Enhanced logging for development
|
# Enhanced logging for development
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from research.neural_network_model import NeuralNetworkModel
|
|||||||
class BiGRUModel(NeuralNetworkModel):
|
class BiGRUModel(NeuralNetworkModel):
|
||||||
"""Bidirectional GRU model for name classification"""
|
"""Bidirectional GRU model for name classification"""
|
||||||
|
|
||||||
def build_model_with_vocab(self, vocab_size: int, max_len: int = 6, **kwargs) -> Any:
|
def build_model_with_vocab(self, vocab_size: int, **kwargs) -> Any:
|
||||||
params = kwargs
|
params = kwargs
|
||||||
model = Sequential(
|
model = Sequential(
|
||||||
[
|
[
|
||||||
@@ -22,7 +22,6 @@ class BiGRUModel(NeuralNetworkModel):
|
|||||||
Embedding(
|
Embedding(
|
||||||
input_dim=vocab_size,
|
input_dim=vocab_size,
|
||||||
output_dim=params.get("embedding_dim", 64),
|
output_dim=params.get("embedding_dim", 64),
|
||||||
input_length=max_len,
|
|
||||||
mask_zero=True,
|
mask_zero=True,
|
||||||
),
|
),
|
||||||
# First recurrent block returns full sequences to allow stacking.
|
# First recurrent block returns full sequences to allow stacking.
|
||||||
|
|||||||
@@ -21,7 +21,7 @@ from research.neural_network_model import NeuralNetworkModel
|
|||||||
class CNNModel(NeuralNetworkModel):
|
class CNNModel(NeuralNetworkModel):
|
||||||
"""1D Convolutional Neural Network for character patterns"""
|
"""1D Convolutional Neural Network for character patterns"""
|
||||||
|
|
||||||
def build_model_with_vocab(self, vocab_size: int, max_len: int = 20, **kwargs) -> Any:
|
def build_model_with_vocab(self, vocab_size: int, **kwargs) -> Any:
|
||||||
"""Build CNN model with known vocabulary size"""
|
"""Build CNN model with known vocabulary size"""
|
||||||
|
|
||||||
params = kwargs
|
params = kwargs
|
||||||
|
|||||||
@@ -13,7 +13,7 @@ from research.neural_network_model import NeuralNetworkModel
|
|||||||
class LSTMModel(NeuralNetworkModel):
|
class LSTMModel(NeuralNetworkModel):
|
||||||
"""LSTM model for sequence learning"""
|
"""LSTM model for sequence learning"""
|
||||||
|
|
||||||
def build_model_with_vocab(self, vocab_size: int, max_len: int = 6, **kwargs) -> Any:
|
def build_model_with_vocab(self, vocab_size: int, **kwargs) -> Any:
|
||||||
params = kwargs
|
params = kwargs
|
||||||
model = Sequential(
|
model = Sequential(
|
||||||
[
|
[
|
||||||
@@ -21,7 +21,6 @@ class LSTMModel(NeuralNetworkModel):
|
|||||||
Embedding(
|
Embedding(
|
||||||
input_dim=vocab_size,
|
input_dim=vocab_size,
|
||||||
output_dim=params.get("embedding_dim", 64),
|
output_dim=params.get("embedding_dim", 64),
|
||||||
input_length=max_len,
|
|
||||||
mask_zero=True,
|
mask_zero=True,
|
||||||
),
|
),
|
||||||
# Stacked bidirectional LSTMs: first returns sequences to feed the next.
|
# Stacked bidirectional LSTMs: first returns sequences to feed the next.
|
||||||
|
|||||||
@@ -17,7 +17,7 @@ class NaiveBayesModel(TraditionalModel):
|
|||||||
# includes unigrams for coverage and higher n for suffix/prefix cues.
|
# includes unigrams for coverage and higher n for suffix/prefix cues.
|
||||||
vectorizer = CountVectorizer(
|
vectorizer = CountVectorizer(
|
||||||
analyzer="char",
|
analyzer="char",
|
||||||
ngram_range=params.get("ngram_range", (1, 4)),
|
ngram_range=params.get("ngram_range", (2, 5)),
|
||||||
max_features=params.get("max_features", 8000),
|
max_features=params.get("max_features", 8000),
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -22,21 +22,21 @@ from research.neural_network_model import NeuralNetworkModel
|
|||||||
class TransformerModel(NeuralNetworkModel):
|
class TransformerModel(NeuralNetworkModel):
|
||||||
"""Transformer-based model"""
|
"""Transformer-based model"""
|
||||||
|
|
||||||
def build_model_with_vocab(self, vocab_size: int, max_len: int = 6, **kwargs) -> Any:
|
def build_model_with_vocab(self, vocab_size: int, **kwargs) -> Any:
|
||||||
params = kwargs
|
params = kwargs
|
||||||
|
|
||||||
# Build Transformer model
|
# Build Transformer model
|
||||||
inputs = Input(shape=(max_len,))
|
inputs = Input(shape=(params.get("max_len", 8),))
|
||||||
x = Embedding(
|
x = Embedding(
|
||||||
input_dim=vocab_size,
|
input_dim=vocab_size,
|
||||||
output_dim=params.get("embedding_dim", 64),
|
output_dim=params.get("embedding_dim", 64),
|
||||||
input_length=max_len,
|
input_length=params.get("max_len", 8),
|
||||||
mask_zero=True,
|
mask_zero=True,
|
||||||
)(inputs)
|
)(inputs)
|
||||||
|
|
||||||
# Add positional encoding
|
# Add positional encoding
|
||||||
positions = tf.range(start=0, limit=max_len, delta=1)
|
positions = tf.range(start=0, limit=params.get("max_len", 8), delta=1)
|
||||||
pos_embedding = Embedding(input_dim=max_len, output_dim=params.get("embedding_dim", 64))(
|
pos_embedding = Embedding(input_dim=params.get("max_len", 8), output_dim=params.get("embedding_dim", 64))(
|
||||||
positions
|
positions
|
||||||
)
|
)
|
||||||
x = x + pos_embedding
|
x = x + pos_embedding
|
||||||
|
|||||||
@@ -52,22 +52,21 @@ class NeuralNetworkModel(BaseModel):
|
|||||||
logging.info(f"Vocabulary size: {vocab_size}")
|
logging.info(f"Vocabulary size: {vocab_size}")
|
||||||
|
|
||||||
# Get additional model parameters
|
# Get additional model parameters
|
||||||
max_len = self.config.model_params.get("max_len", 6)
|
self.model = self.build_model_with_vocab(vocab_size=vocab_size, **self.config.model_params)
|
||||||
|
|
||||||
self.model = self.build_model_with_vocab(
|
|
||||||
vocab_size=vocab_size, max_len=max_len, **self.config.model_params
|
|
||||||
)
|
|
||||||
|
|
||||||
# Train the neural network
|
# Train the neural network
|
||||||
logging.info(
|
logging.info(
|
||||||
f"Fitting model with {X_prepared.shape[0]} samples and {X_prepared.shape[1]} features"
|
f"Fitting model with {X_prepared.shape[0]} samples and {X_prepared.shape[1]} features"
|
||||||
)
|
)
|
||||||
|
logging.info(X_prepared[0])
|
||||||
|
logging.info(f"Model parameters: {self.config.model_params}")
|
||||||
|
|
||||||
history = self.model.fit(
|
history = self.model.fit(
|
||||||
X_prepared,
|
X_prepared,
|
||||||
y_encoded,
|
y_encoded,
|
||||||
epochs=self.config.model_params.get("epochs", 10),
|
epochs=self.config.model_params.get("epochs", 10),
|
||||||
batch_size=self.config.model_params.get("batch_size", 64),
|
batch_size=self.config.model_params.get("batch_size", 64),
|
||||||
validation_split=0.1,
|
validation_split=self.config.model_params.get("validation_split", 0.1),
|
||||||
verbose=2,
|
verbose=2,
|
||||||
)
|
)
|
||||||
|
|
||||||
|
|||||||
@@ -59,9 +59,19 @@ class TraditionalModel(BaseModel):
|
|||||||
f"Fitting model with {X_prepared.shape[0]} samples and {X_prepared.shape[1]} features"
|
f"Fitting model with {X_prepared.shape[0]} samples and {X_prepared.shape[1]} features"
|
||||||
)
|
)
|
||||||
|
|
||||||
self.model.fit(X_prepared, y_encoded)
|
logging.info(X_prepared[0])
|
||||||
|
logging.info(f"Model parameters: {self.config.model_params}")
|
||||||
|
|
||||||
|
history = self.model.fit(X_prepared, y_encoded)
|
||||||
self.is_fitted = True
|
self.is_fitted = True
|
||||||
|
|
||||||
|
self.training_history = {
|
||||||
|
"accuracy": history.history["accuracy"],
|
||||||
|
"loss": history.history["loss"],
|
||||||
|
"val_accuracy": history.history.get("val_accuracy", []),
|
||||||
|
"val_loss": history.history.get("val_loss", []),
|
||||||
|
}
|
||||||
|
|
||||||
return self
|
return self
|
||||||
|
|
||||||
def cross_validate(self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5) -> Dict[str, float]:
|
def cross_validate(self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5) -> Dict[str, float]:
|
||||||
|
|||||||
Reference in New Issue
Block a user