feat: enhance logging and memory management across modules

This commit is contained in:
2025-08-13 23:09:05 +02:00
parent 47e52d130c
commit 9601c5e44d
48 changed files with 1004 additions and 773 deletions
+11 -7
View File
@@ -28,7 +28,7 @@ class XGBoostModel(TraditionalModel):
colsample_bytree=params.get("colsample_bytree", 0.8),
random_state=self.config.random_seed,
eval_metric="logloss",
verbosity=2
verbosity=2,
)
def prepare_features(self, X: pd.DataFrame) -> np.ndarray:
@@ -50,14 +50,18 @@ class XGBoostModel(TraditionalModel):
self.vectorizers[feature_key] = CountVectorizer(
analyzer="char", ngram_range=(2, 3), max_features=100
)
char_features = self.vectorizers[feature_key].fit_transform(
column.fillna("").astype(str)
).toarray()
char_features = (
self.vectorizers[feature_key]
.fit_transform(column.fillna("").astype(str))
.toarray()
)
else:
# Subsequent times - use existing vectorizer
char_features = self.vectorizers[feature_key].transform(
column.fillna("").astype(str)
).toarray()
char_features = (
self.vectorizers[feature_key]
.transform(column.fillna("").astype(str))
.toarray()
)
features.append(char_features)
else: