feat: document models

This commit is contained in:
2025-09-20 23:35:54 +02:00
parent dd2a9f2711
commit e41b15a863
13 changed files with 256 additions and 47 deletions
+22
View File
@@ -82,6 +82,25 @@ class NeuralNetworkModel(BaseModel):
self.is_fitted = True
return self
def _collect_text_corpus(self, X: pd.DataFrame) -> List[str]:
"""Combine configured textual features into one string per record."""
column_names = [feature.value for feature in self.config.features if feature.value in X.columns]
if not column_names:
raise ValueError("No configured text features found in the provided DataFrame.")
text_frame = X[column_names].fillna("").astype(str)
if len(column_names) == 1:
return text_frame.iloc[:, 0].tolist()
combined_rows = []
for row in text_frame.itertuples(index=False):
tokens = [value for value in row if value]
combined_rows.append(" ".join(tokens))
return combined_rows
def cross_validate(
self, X: pd.DataFrame, y: pd.Series, cv_folds: int = 5
) -> dict[str, np.floating[Any]]:
@@ -145,6 +164,9 @@ class NeuralNetworkModel(BaseModel):
"""Generate learning curve data for the model"""
logging.info(f"Generating learning curve for {self.__class__.__name__}")
if train_sizes is None:
train_sizes = [0.1, 0.3, 0.5, 0.7, 1.0]
learning_curve_data = {
"train_sizes": [],
"train_scores": [],