refactor: update configuration loading and ensure directory existence across modules

This commit is contained in:
2025-08-07 00:36:32 +02:00
parent 104d7e1146
commit 96291b4ad0
9 changed files with 179 additions and 45 deletions
+21 -9
View File
@@ -95,9 +95,15 @@ class NeuralNetworkModel(BaseModel):
recalls = []
f1_scores = []
# Get vocabulary size and model parameters
vocab_size = len(self.tokenizer.word_index) + 1 if self.tokenizer else 1000
max_len = self.config.model_params.get("max_len", 6)
for fold, (train_idx, val_idx) in enumerate(cv.split(X_prepared, y_encoded)):
# Create fresh model for each fold
fold_model = self.build_model()
# Create fresh model for each fold using build_model_with_vocab
fold_model = self.build_model_with_vocab(
vocab_size=vocab_size, max_len=max_len, **self.config.model_params
)
# Train on fold
if hasattr(fold_model, "fit"):
@@ -127,13 +133,9 @@ class NeuralNetworkModel(BaseModel):
return {
"accuracy": np.mean(accuracies),
"accuracy_std": np.std(accuracies),
"precision": np.mean(precisions),
"precision_std": np.std(precisions),
"recall": np.mean(recalls),
"recall_std": np.std(recalls),
"f1": np.mean(f1_scores),
"f1_std": np.std(f1_scores),
}
def generate_learning_curve(
@@ -150,9 +152,17 @@ class NeuralNetworkModel(BaseModel):
"val_scores_std": [],
}
# Prepare features and get vocabulary size
features_df = self.feature_extractor.extract_features(X)
X_prepared = self.prepare_features(features_df)
y_encoded = self.label_encoder.transform(y)
vocab_size = len(self.tokenizer.word_index) + 1 if self.tokenizer else 1000
max_len = self.config.model_params.get("max_len", 6)
# Split data once for validation
X_train_full, X_val, y_train_full, y_val = train_test_split(
X, y, test_size=0.2, random_state=self.config.random_seed, stratify=y
X_prepared, y_encoded, test_size=0.2, random_state=self.config.random_seed, stratify=y_encoded
)
for size in train_sizes:
@@ -170,8 +180,10 @@ class NeuralNetworkModel(BaseModel):
val_scores = []
for seed in range(3): # 3 runs for variance
# Build fresh model
model = self.build_model()
# Build fresh model using build_model_with_vocab
model = self.build_model_with_vocab(
vocab_size=vocab_size, max_len=max_len, **self.config.model_params
)
# Train model
if hasattr(model, "fit"):